Home | History | Annotate | Download | only in text
      1 /*
      2  * Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
      3  * Copyright (C) 2010 Patrick Gansterer <paroga (at) paroga.com>
      4  *
      5  * This library is free software; you can redistribute it and/or
      6  * modify it under the terms of the GNU Library General Public
      7  * License as published by the Free Software Foundation; either
      8  * version 2 of the License, or (at your option) any later version.
      9  *
     10  * This library is distributed in the hope that it will be useful,
     11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     13  * Library General Public License for more details.
     14  *
     15  * You should have received a copy of the GNU Library General Public License
     16  * along with this library; see the file COPYING.LIB.  If not, write to
     17  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     18  * Boston, MA 02110-1301, USA.
     19  *
     20  */
     21 
     22 #include "config.h"
     23 
     24 #include "AtomicString.h"
     25 
     26 #include "StringHash.h"
     27 #include <wtf/HashSet.h>
     28 #include <wtf/Threading.h>
     29 #include <wtf/WTFThreadData.h>
     30 #include <wtf/unicode/UTF8.h>
     31 
     32 namespace WTF {
     33 
     34 using namespace Unicode;
     35 
     36 COMPILE_ASSERT(sizeof(AtomicString) == sizeof(String), atomic_string_and_string_must_be_same_size);
     37 
     38 class AtomicStringTable {
     39 public:
     40     static AtomicStringTable* create()
     41     {
     42         AtomicStringTable* table = new AtomicStringTable;
     43 
     44         WTFThreadData& data = wtfThreadData();
     45         data.m_atomicStringTable = table;
     46         data.m_atomicStringTableDestructor = AtomicStringTable::destroy;
     47 
     48         return table;
     49     }
     50 
     51     HashSet<StringImpl*>& table()
     52     {
     53         return m_table;
     54     }
     55 
     56 private:
     57     static void destroy(AtomicStringTable* table)
     58     {
     59         HashSet<StringImpl*>::iterator end = table->m_table.end();
     60         for (HashSet<StringImpl*>::iterator iter = table->m_table.begin(); iter != end; ++iter)
     61             (*iter)->setIsAtomic(false);
     62         delete table;
     63     }
     64 
     65     HashSet<StringImpl*> m_table;
     66 };
     67 
     68 static inline HashSet<StringImpl*>& stringTable()
     69 {
     70     // Once possible we should make this non-lazy (constructed in WTFThreadData's constructor).
     71     AtomicStringTable* table = wtfThreadData().atomicStringTable();
     72     if (UNLIKELY(!table))
     73         table = AtomicStringTable::create();
     74     return table->table();
     75 }
     76 
     77 template<typename T, typename HashTranslator>
     78 static inline PassRefPtr<StringImpl> addToStringTable(const T& value)
     79 {
     80     pair<HashSet<StringImpl*>::iterator, bool> addResult = stringTable().add<T, HashTranslator>(value);
     81 
     82     // If the string is newly-translated, then we need to adopt it.
     83     // The boolean in the pair tells us if that is so.
     84     return addResult.second ? adoptRef(*addResult.first) : *addResult.first;
     85 }
     86 
     87 struct CStringTranslator {
     88     static unsigned hash(const char* c)
     89     {
     90         return StringHasher::computeHash(c);
     91     }
     92 
     93     static bool equal(StringImpl* r, const char* s)
     94     {
     95         int length = r->length();
     96         const UChar* d = r->characters();
     97         for (int i = 0; i != length; ++i) {
     98             unsigned char c = s[i];
     99             if (d[i] != c)
    100                 return false;
    101         }
    102         return !s[length];
    103     }
    104 
    105     static void translate(StringImpl*& location, const char* const& c, unsigned hash)
    106     {
    107         location = StringImpl::create(c).leakRef();
    108         location->setHash(hash);
    109         location->setIsAtomic(true);
    110     }
    111 };
    112 
    113 bool operator==(const AtomicString& a, const char* b)
    114 {
    115     StringImpl* impl = a.impl();
    116     if ((!impl || !impl->characters()) && !b)
    117         return true;
    118     if ((!impl || !impl->characters()) || !b)
    119         return false;
    120     return CStringTranslator::equal(impl, b);
    121 }
    122 
    123 PassRefPtr<StringImpl> AtomicString::add(const char* c)
    124 {
    125     if (!c)
    126         return 0;
    127     if (!*c)
    128         return StringImpl::empty();
    129 
    130     return addToStringTable<const char*, CStringTranslator>(c);
    131 }
    132 
    133 struct UCharBuffer {
    134     const UChar* s;
    135     unsigned length;
    136 };
    137 
    138 static inline bool equal(StringImpl* string, const UChar* characters, unsigned length)
    139 {
    140     if (string->length() != length)
    141         return false;
    142 
    143     // FIXME: perhaps we should have a more abstract macro that indicates when
    144     // going 4 bytes at a time is unsafe
    145 #if CPU(ARM) || CPU(SH4) || CPU(MIPS) || CPU(SPARC)
    146     const UChar* stringCharacters = string->characters();
    147     for (unsigned i = 0; i != length; ++i) {
    148         if (*stringCharacters++ != *characters++)
    149             return false;
    150     }
    151     return true;
    152 #else
    153     /* Do it 4-bytes-at-a-time on architectures where it's safe */
    154 
    155     const uint32_t* stringCharacters = reinterpret_cast<const uint32_t*>(string->characters());
    156     const uint32_t* bufferCharacters = reinterpret_cast<const uint32_t*>(characters);
    157 
    158     unsigned halfLength = length >> 1;
    159     for (unsigned i = 0; i != halfLength; ++i) {
    160         if (*stringCharacters++ != *bufferCharacters++)
    161             return false;
    162     }
    163 
    164     if (length & 1 &&  *reinterpret_cast<const uint16_t*>(stringCharacters) != *reinterpret_cast<const uint16_t*>(bufferCharacters))
    165         return false;
    166 
    167     return true;
    168 #endif
    169 }
    170 
    171 bool operator==(const AtomicString& string, const Vector<UChar>& vector)
    172 {
    173     return string.impl() && equal(string.impl(), vector.data(), vector.size());
    174 }
    175 
    176 struct UCharBufferTranslator {
    177     static unsigned hash(const UCharBuffer& buf)
    178     {
    179         return StringHasher::computeHash(buf.s, buf.length);
    180     }
    181 
    182     static bool equal(StringImpl* const& str, const UCharBuffer& buf)
    183     {
    184         return WTF::equal(str, buf.s, buf.length);
    185     }
    186 
    187     static void translate(StringImpl*& location, const UCharBuffer& buf, unsigned hash)
    188     {
    189         location = StringImpl::create(buf.s, buf.length).leakRef();
    190         location->setHash(hash);
    191         location->setIsAtomic(true);
    192     }
    193 };
    194 
    195 struct HashAndCharacters {
    196     unsigned hash;
    197     const UChar* characters;
    198     unsigned length;
    199 };
    200 
    201 struct HashAndCharactersTranslator {
    202     static unsigned hash(const HashAndCharacters& buffer)
    203     {
    204         ASSERT(buffer.hash == StringHasher::computeHash(buffer.characters, buffer.length));
    205         return buffer.hash;
    206     }
    207 
    208     static bool equal(StringImpl* const& string, const HashAndCharacters& buffer)
    209     {
    210         return WTF::equal(string, buffer.characters, buffer.length);
    211     }
    212 
    213     static void translate(StringImpl*& location, const HashAndCharacters& buffer, unsigned hash)
    214     {
    215         location = StringImpl::create(buffer.characters, buffer.length).leakRef();
    216         location->setHash(hash);
    217         location->setIsAtomic(true);
    218     }
    219 };
    220 
    221 struct HashAndUTF8Characters {
    222     unsigned hash;
    223     const char* characters;
    224     unsigned length;
    225     unsigned utf16Length;
    226 };
    227 
    228 struct HashAndUTF8CharactersTranslator {
    229     static unsigned hash(const HashAndUTF8Characters& buffer)
    230     {
    231         return buffer.hash;
    232     }
    233 
    234     static bool equal(StringImpl* const& string, const HashAndUTF8Characters& buffer)
    235     {
    236         if (buffer.utf16Length != string->length())
    237             return false;
    238 
    239         const UChar* stringCharacters = string->characters();
    240 
    241         // If buffer contains only ASCII characters UTF-8 and UTF16 length are the same.
    242         if (buffer.utf16Length != buffer.length)
    243             return equalUTF16WithUTF8(stringCharacters, stringCharacters + string->length(), buffer.characters, buffer.characters + buffer.length);
    244 
    245         for (unsigned i = 0; i < buffer.length; ++i) {
    246             ASSERT(isASCII(buffer.characters[i]));
    247             if (stringCharacters[i] != buffer.characters[i])
    248                 return false;
    249         }
    250 
    251         return true;
    252     }
    253 
    254     static void translate(StringImpl*& location, const HashAndUTF8Characters& buffer, unsigned hash)
    255     {
    256         UChar* target;
    257         location = StringImpl::createUninitialized(buffer.utf16Length, target).releaseRef();
    258 
    259         const char* source = buffer.characters;
    260         if (convertUTF8ToUTF16(&source, source + buffer.length, &target, target + buffer.utf16Length) != conversionOK)
    261             ASSERT_NOT_REACHED();
    262 
    263         location->setHash(hash);
    264         location->setIsAtomic(true);
    265     }
    266 };
    267 
    268 PassRefPtr<StringImpl> AtomicString::add(const UChar* s, unsigned length)
    269 {
    270     if (!s)
    271         return 0;
    272 
    273     if (!length)
    274         return StringImpl::empty();
    275 
    276     UCharBuffer buffer = { s, length };
    277     return addToStringTable<UCharBuffer, UCharBufferTranslator>(buffer);
    278 }
    279 
    280 PassRefPtr<StringImpl> AtomicString::add(const UChar* s, unsigned length, unsigned existingHash)
    281 {
    282     ASSERT(s);
    283     ASSERT(existingHash);
    284 
    285     if (!length)
    286         return StringImpl::empty();
    287 
    288     HashAndCharacters buffer = { existingHash, s, length };
    289     return addToStringTable<HashAndCharacters, HashAndCharactersTranslator>(buffer);
    290 }
    291 
    292 PassRefPtr<StringImpl> AtomicString::add(const UChar* s)
    293 {
    294     if (!s)
    295         return 0;
    296 
    297     int length = 0;
    298     while (s[length] != UChar(0))
    299         length++;
    300 
    301     if (!length)
    302         return StringImpl::empty();
    303 
    304     UCharBuffer buffer = { s, length };
    305     return addToStringTable<UCharBuffer, UCharBufferTranslator>(buffer);
    306 }
    307 
    308 PassRefPtr<StringImpl> AtomicString::addSlowCase(StringImpl* r)
    309 {
    310     if (!r || r->isAtomic())
    311         return r;
    312 
    313     if (!r->length())
    314         return StringImpl::empty();
    315 
    316     StringImpl* result = *stringTable().add(r).first;
    317     if (result == r)
    318         r->setIsAtomic(true);
    319     return result;
    320 }
    321 
    322 AtomicStringImpl* AtomicString::find(const UChar* s, unsigned length, unsigned existingHash)
    323 {
    324     ASSERT(s);
    325     ASSERT(existingHash);
    326 
    327     if (!length)
    328         return static_cast<AtomicStringImpl*>(StringImpl::empty());
    329 
    330     HashAndCharacters buffer = { existingHash, s, length };
    331     HashSet<StringImpl*>::iterator iterator = stringTable().find<HashAndCharacters, HashAndCharactersTranslator>(buffer);
    332     if (iterator == stringTable().end())
    333         return 0;
    334     return static_cast<AtomicStringImpl*>(*iterator);
    335 }
    336 
    337 void AtomicString::remove(StringImpl* r)
    338 {
    339     stringTable().remove(r);
    340 }
    341 
    342 AtomicString AtomicString::lower() const
    343 {
    344     // Note: This is a hot function in the Dromaeo benchmark.
    345     StringImpl* impl = this->impl();
    346     if (UNLIKELY(!impl))
    347         return *this;
    348     RefPtr<StringImpl> newImpl = impl->lower();
    349     if (LIKELY(newImpl == impl))
    350         return *this;
    351     return AtomicString(newImpl);
    352 }
    353 
    354 AtomicString AtomicString::fromUTF8Internal(const char* charactersStart, const char* charactersEnd)
    355 {
    356     HashAndUTF8Characters buffer;
    357     buffer.characters = charactersStart;
    358     buffer.hash = calculateStringHashAndLengthFromUTF8(charactersStart, charactersEnd, buffer.length, buffer.utf16Length);
    359 
    360     if (!buffer.hash)
    361         return nullAtom;
    362 
    363     AtomicString atomicString;
    364     atomicString.m_string = addToStringTable<HashAndUTF8Characters, HashAndUTF8CharactersTranslator>(buffer);
    365     return atomicString;
    366 }
    367 
    368 } // namespace WTF
    369