Home | History | Annotate | Download | only in text
      1 /*
      2  * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2013 Apple Inc. All rights reserved.
      3  * Copyright (C) 2010 Patrick Gansterer <paroga (at) paroga.com>
      4  * Copyright (C) 2012 Google Inc. All rights reserved.
      5  *
      6  * This library is free software; you can redistribute it and/or
      7  * modify it under the terms of the GNU Library General Public
      8  * License as published by the Free Software Foundation; either
      9  * version 2 of the License, or (at your option) any later version.
     10  *
     11  * This library is distributed in the hope that it will be useful,
     12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     14  * Library General Public License for more details.
     15  *
     16  * You should have received a copy of the GNU Library General Public License
     17  * along with this library; see the file COPYING.LIB.  If not, write to
     18  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     19  * Boston, MA 02110-1301, USA.
     20  *
     21  */
     22 
     23 #include "config.h"
     24 #include "AtomicString.h"
     25 
     26 #include "StringHash.h"
     27 #include "wtf/HashSet.h"
     28 #include "wtf/WTFThreadData.h"
     29 #include "wtf/dtoa.h"
     30 #include "wtf/text/IntegerToStringConversion.h"
     31 #include "wtf/unicode/UTF8.h"
     32 
     33 namespace WTF {
     34 
     35 using namespace Unicode;
     36 
     37 COMPILE_ASSERT(sizeof(AtomicString) == sizeof(String), atomic_string_and_string_must_be_same_size);
     38 
     39 class AtomicStringTable {
     40     WTF_MAKE_NONCOPYABLE(AtomicStringTable);
     41 public:
     42     static AtomicStringTable* create(WTFThreadData& data)
     43     {
     44         data.m_atomicStringTable = new AtomicStringTable;
     45         data.m_atomicStringTableDestructor = AtomicStringTable::destroy;
     46         data.m_atomicStringTable->addStaticStrings();
     47         return data.m_atomicStringTable;
     48     }
     49 
     50     StringImpl* addStringImpl(StringImpl* string)
     51     {
     52         if (!string->length())
     53             return StringImpl::empty();
     54 
     55         StringImpl* result = *m_table.add(string).storedValue;
     56 
     57         if (!result->isAtomic())
     58             result->setIsAtomic(true);
     59 
     60         ASSERT(!string->isStatic() || result->isStatic());
     61         return result;
     62     }
     63 
     64     HashSet<StringImpl*>& table()
     65     {
     66         return m_table;
     67     }
     68 
     69 private:
     70     AtomicStringTable() { }
     71 
     72     void addStaticStrings()
     73     {
     74         const StaticStringsTable& staticStrings = StringImpl::allStaticStrings();
     75 
     76         StaticStringsTable::const_iterator it = staticStrings.begin();
     77         for (; it != staticStrings.end(); ++it) {
     78             addStringImpl(it->value);
     79         }
     80     }
     81 
     82     static void destroy(AtomicStringTable* table)
     83     {
     84         HashSet<StringImpl*>::iterator end = table->m_table.end();
     85         for (HashSet<StringImpl*>::iterator iter = table->m_table.begin(); iter != end; ++iter) {
     86             StringImpl* string = *iter;
     87             if (!string->isStatic()) {
     88                 ASSERT(string->isAtomic());
     89                 string->setIsAtomic(false);
     90             }
     91         }
     92         delete table;
     93     }
     94 
     95     HashSet<StringImpl*> m_table;
     96 };
     97 
     98 static inline AtomicStringTable& atomicStringTable()
     99 {
    100     // Once possible we should make this non-lazy (constructed in WTFThreadData's constructor).
    101     WTFThreadData& data = wtfThreadData();
    102     AtomicStringTable* table = data.atomicStringTable();
    103     if (UNLIKELY(!table))
    104         table = AtomicStringTable::create(data);
    105     return *table;
    106 }
    107 
    108 static inline HashSet<StringImpl*>& atomicStrings()
    109 {
    110     return atomicStringTable().table();
    111 }
    112 
    113 template<typename T, typename HashTranslator>
    114 static inline PassRefPtr<StringImpl> addToStringTable(const T& value)
    115 {
    116     HashSet<StringImpl*>::AddResult addResult = atomicStrings().add<HashTranslator>(value);
    117 
    118     // If the string is newly-translated, then we need to adopt it.
    119     // The boolean in the pair tells us if that is so.
    120     return addResult.isNewEntry ? adoptRef(*addResult.storedValue) : *addResult.storedValue;
    121 }
    122 
    123 PassRefPtr<StringImpl> AtomicString::add(const LChar* c)
    124 {
    125     if (!c)
    126         return nullptr;
    127     if (!*c)
    128         return StringImpl::empty();
    129 
    130     return add(c, strlen(reinterpret_cast<const char*>(c)));
    131 }
    132 
    133 template<typename CharacterType>
    134 struct HashTranslatorCharBuffer {
    135     const CharacterType* s;
    136     unsigned length;
    137 };
    138 
    139 typedef HashTranslatorCharBuffer<UChar> UCharBuffer;
    140 struct UCharBufferTranslator {
    141     static unsigned hash(const UCharBuffer& buf)
    142     {
    143         return StringHasher::computeHashAndMaskTop8Bits(buf.s, buf.length);
    144     }
    145 
    146     static bool equal(StringImpl* const& str, const UCharBuffer& buf)
    147     {
    148         return WTF::equal(str, buf.s, buf.length);
    149     }
    150 
    151     static void translate(StringImpl*& location, const UCharBuffer& buf, unsigned hash)
    152     {
    153         location = StringImpl::create8BitIfPossible(buf.s, buf.length).leakRef();
    154         location->setHash(hash);
    155         location->setIsAtomic(true);
    156     }
    157 };
    158 
    159 template<typename CharacterType>
    160 struct HashAndCharacters {
    161     unsigned hash;
    162     const CharacterType* characters;
    163     unsigned length;
    164 };
    165 
    166 template<typename CharacterType>
    167 struct HashAndCharactersTranslator {
    168     static unsigned hash(const HashAndCharacters<CharacterType>& buffer)
    169     {
    170         ASSERT(buffer.hash == StringHasher::computeHashAndMaskTop8Bits(buffer.characters, buffer.length));
    171         return buffer.hash;
    172     }
    173 
    174     static bool equal(StringImpl* const& string, const HashAndCharacters<CharacterType>& buffer)
    175     {
    176         return WTF::equal(string, buffer.characters, buffer.length);
    177     }
    178 
    179     static void translate(StringImpl*& location, const HashAndCharacters<CharacterType>& buffer, unsigned hash)
    180     {
    181         location = StringImpl::create(buffer.characters, buffer.length).leakRef();
    182         location->setHash(hash);
    183         location->setIsAtomic(true);
    184     }
    185 };
    186 
    187 struct HashAndUTF8Characters {
    188     unsigned hash;
    189     const char* characters;
    190     unsigned length;
    191     unsigned utf16Length;
    192 };
    193 
    194 struct HashAndUTF8CharactersTranslator {
    195     static unsigned hash(const HashAndUTF8Characters& buffer)
    196     {
    197         return buffer.hash;
    198     }
    199 
    200     static bool equal(StringImpl* const& string, const HashAndUTF8Characters& buffer)
    201     {
    202         if (buffer.utf16Length != string->length())
    203             return false;
    204 
    205         // If buffer contains only ASCII characters UTF-8 and UTF16 length are the same.
    206         if (buffer.utf16Length != buffer.length) {
    207             if (string->is8Bit()) {
    208                 const LChar* characters8 = string->characters8();
    209                 return equalLatin1WithUTF8(characters8, characters8 + string->length(), buffer.characters, buffer.characters + buffer.length);
    210             }
    211             const UChar* characters16 = string->characters16();
    212             return equalUTF16WithUTF8(characters16, characters16 + string->length(), buffer.characters, buffer.characters + buffer.length);
    213         }
    214 
    215         if (string->is8Bit()) {
    216             const LChar* stringCharacters = string->characters8();
    217 
    218             for (unsigned i = 0; i < buffer.length; ++i) {
    219                 ASSERT(isASCII(buffer.characters[i]));
    220                 if (stringCharacters[i] != buffer.characters[i])
    221                     return false;
    222             }
    223 
    224             return true;
    225         }
    226 
    227         const UChar* stringCharacters = string->characters16();
    228 
    229         for (unsigned i = 0; i < buffer.length; ++i) {
    230             ASSERT(isASCII(buffer.characters[i]));
    231             if (stringCharacters[i] != buffer.characters[i])
    232                 return false;
    233         }
    234 
    235         return true;
    236     }
    237 
    238     static void translate(StringImpl*& location, const HashAndUTF8Characters& buffer, unsigned hash)
    239     {
    240         UChar* target;
    241         RefPtr<StringImpl> newString = StringImpl::createUninitialized(buffer.utf16Length, target);
    242 
    243         bool isAllASCII;
    244         const char* source = buffer.characters;
    245         if (convertUTF8ToUTF16(&source, source + buffer.length, &target, target + buffer.utf16Length, &isAllASCII) != conversionOK)
    246             ASSERT_NOT_REACHED();
    247 
    248         if (isAllASCII)
    249             newString = StringImpl::create(buffer.characters, buffer.length);
    250 
    251         location = newString.release().leakRef();
    252         location->setHash(hash);
    253         location->setIsAtomic(true);
    254     }
    255 };
    256 
    257 PassRefPtr<StringImpl> AtomicString::add(const UChar* s, unsigned length)
    258 {
    259     if (!s)
    260         return nullptr;
    261 
    262     if (!length)
    263         return StringImpl::empty();
    264 
    265     UCharBuffer buffer = { s, length };
    266     return addToStringTable<UCharBuffer, UCharBufferTranslator>(buffer);
    267 }
    268 
    269 PassRefPtr<StringImpl> AtomicString::add(const UChar* s, unsigned length, unsigned existingHash)
    270 {
    271     ASSERT(s);
    272     ASSERT(existingHash);
    273 
    274     if (!length)
    275         return StringImpl::empty();
    276 
    277     HashAndCharacters<UChar> buffer = { existingHash, s, length };
    278     return addToStringTable<HashAndCharacters<UChar>, HashAndCharactersTranslator<UChar> >(buffer);
    279 }
    280 
    281 PassRefPtr<StringImpl> AtomicString::add(const UChar* s)
    282 {
    283     if (!s)
    284         return nullptr;
    285 
    286     unsigned length = 0;
    287     while (s[length] != UChar(0))
    288         ++length;
    289 
    290     if (!length)
    291         return StringImpl::empty();
    292 
    293     UCharBuffer buffer = { s, length };
    294     return addToStringTable<UCharBuffer, UCharBufferTranslator>(buffer);
    295 }
    296 
    297 struct SubstringLocation {
    298     StringImpl* baseString;
    299     unsigned start;
    300     unsigned length;
    301 };
    302 
    303 struct SubstringTranslator {
    304     static unsigned hash(const SubstringLocation& buffer)
    305     {
    306         if (buffer.baseString->is8Bit())
    307             return StringHasher::computeHashAndMaskTop8Bits(buffer.baseString->characters8() + buffer.start, buffer.length);
    308         return StringHasher::computeHashAndMaskTop8Bits(buffer.baseString->characters16() + buffer.start, buffer.length);
    309     }
    310 
    311     static bool equal(StringImpl* const& string, const SubstringLocation& buffer)
    312     {
    313         if (buffer.baseString->is8Bit())
    314             return WTF::equal(string, buffer.baseString->characters8() + buffer.start, buffer.length);
    315         return WTF::equal(string, buffer.baseString->characters16() + buffer.start, buffer.length);
    316     }
    317 
    318     static void translate(StringImpl*& location, const SubstringLocation& buffer, unsigned hash)
    319     {
    320         location = buffer.baseString->substring(buffer.start, buffer.length).leakRef();
    321         location->setHash(hash);
    322         location->setIsAtomic(true);
    323     }
    324 };
    325 
    326 PassRefPtr<StringImpl> AtomicString::add(StringImpl* baseString, unsigned start, unsigned length)
    327 {
    328     if (!baseString)
    329         return nullptr;
    330 
    331     if (!length || start >= baseString->length())
    332         return StringImpl::empty();
    333 
    334     unsigned maxLength = baseString->length() - start;
    335     if (length >= maxLength) {
    336         if (!start)
    337             return add(baseString);
    338         length = maxLength;
    339     }
    340 
    341     SubstringLocation buffer = { baseString, start, length };
    342     return addToStringTable<SubstringLocation, SubstringTranslator>(buffer);
    343 }
    344 
    345 typedef HashTranslatorCharBuffer<LChar> LCharBuffer;
    346 struct LCharBufferTranslator {
    347     static unsigned hash(const LCharBuffer& buf)
    348     {
    349         return StringHasher::computeHashAndMaskTop8Bits(buf.s, buf.length);
    350     }
    351 
    352     static bool equal(StringImpl* const& str, const LCharBuffer& buf)
    353     {
    354         return WTF::equal(str, buf.s, buf.length);
    355     }
    356 
    357     static void translate(StringImpl*& location, const LCharBuffer& buf, unsigned hash)
    358     {
    359         location = StringImpl::create(buf.s, buf.length).leakRef();
    360         location->setHash(hash);
    361         location->setIsAtomic(true);
    362     }
    363 };
    364 
    365 typedef HashTranslatorCharBuffer<char> CharBuffer;
    366 struct CharBufferFromLiteralDataTranslator {
    367     static unsigned hash(const CharBuffer& buf)
    368     {
    369         return StringHasher::computeHashAndMaskTop8Bits(reinterpret_cast<const LChar*>(buf.s), buf.length);
    370     }
    371 
    372     static bool equal(StringImpl* const& str, const CharBuffer& buf)
    373     {
    374         return WTF::equal(str, buf.s, buf.length);
    375     }
    376 
    377     static void translate(StringImpl*& location, const CharBuffer& buf, unsigned hash)
    378     {
    379         location = StringImpl::create(buf.s, buf.length).leakRef();
    380         location->setHash(hash);
    381         location->setIsAtomic(true);
    382     }
    383 };
    384 
    385 PassRefPtr<StringImpl> AtomicString::add(const LChar* s, unsigned length)
    386 {
    387     if (!s)
    388         return nullptr;
    389 
    390     if (!length)
    391         return StringImpl::empty();
    392 
    393     LCharBuffer buffer = { s, length };
    394     return addToStringTable<LCharBuffer, LCharBufferTranslator>(buffer);
    395 }
    396 
    397 PassRefPtr<StringImpl> AtomicString::addFromLiteralData(const char* characters, unsigned length)
    398 {
    399     ASSERT(characters);
    400     ASSERT(length);
    401 
    402     CharBuffer buffer = { characters, length };
    403     return addToStringTable<CharBuffer, CharBufferFromLiteralDataTranslator>(buffer);
    404 }
    405 
    406 PassRefPtr<StringImpl> AtomicString::addSlowCase(StringImpl* string)
    407 {
    408     return atomicStringTable().addStringImpl(string);
    409 }
    410 
    411 template<typename CharacterType>
    412 static inline HashSet<StringImpl*>::iterator findString(const StringImpl* stringImpl)
    413 {
    414     HashAndCharacters<CharacterType> buffer = { stringImpl->existingHash(), stringImpl->getCharacters<CharacterType>(), stringImpl->length() };
    415     return atomicStrings().find<HashAndCharactersTranslator<CharacterType> >(buffer);
    416 }
    417 
    418 StringImpl* AtomicString::find(const StringImpl* stringImpl)
    419 {
    420     ASSERT(stringImpl);
    421     ASSERT(stringImpl->existingHash());
    422 
    423     if (!stringImpl->length())
    424         return StringImpl::empty();
    425 
    426     HashSet<StringImpl*>::iterator iterator;
    427     if (stringImpl->is8Bit())
    428         iterator = findString<LChar>(stringImpl);
    429     else
    430         iterator = findString<UChar>(stringImpl);
    431     if (iterator == atomicStrings().end())
    432         return 0;
    433     return *iterator;
    434 }
    435 
    436 void AtomicString::remove(StringImpl* r)
    437 {
    438     HashSet<StringImpl*>::iterator iterator;
    439     if (r->is8Bit())
    440         iterator = findString<LChar>(r);
    441     else
    442         iterator = findString<UChar>(r);
    443     RELEASE_ASSERT(iterator != atomicStrings().end());
    444     atomicStrings().remove(iterator);
    445 }
    446 
    447 AtomicString AtomicString::lower() const
    448 {
    449     // Note: This is a hot function in the Dromaeo benchmark.
    450     StringImpl* impl = this->impl();
    451     if (UNLIKELY(!impl))
    452         return *this;
    453     RefPtr<StringImpl> newImpl = impl->lower();
    454     if (LIKELY(newImpl == impl))
    455         return *this;
    456     return AtomicString(newImpl.release());
    457 }
    458 
    459 AtomicString AtomicString::fromUTF8Internal(const char* charactersStart, const char* charactersEnd)
    460 {
    461     HashAndUTF8Characters buffer;
    462     buffer.characters = charactersStart;
    463     buffer.hash = calculateStringHashAndLengthFromUTF8MaskingTop8Bits(charactersStart, charactersEnd, buffer.length, buffer.utf16Length);
    464 
    465     if (!buffer.hash)
    466         return nullAtom;
    467 
    468     AtomicString atomicString;
    469     atomicString.m_string = addToStringTable<HashAndUTF8Characters, HashAndUTF8CharactersTranslator>(buffer);
    470     return atomicString;
    471 }
    472 
    473 AtomicString AtomicString::number(int number)
    474 {
    475     return numberToStringSigned<AtomicString>(number);
    476 }
    477 
    478 AtomicString AtomicString::number(unsigned number)
    479 {
    480     return numberToStringUnsigned<AtomicString>(number);
    481 }
    482 
    483 AtomicString AtomicString::number(long number)
    484 {
    485     return numberToStringSigned<AtomicString>(number);
    486 }
    487 
    488 AtomicString AtomicString::number(unsigned long number)
    489 {
    490     return numberToStringUnsigned<AtomicString>(number);
    491 }
    492 
    493 AtomicString AtomicString::number(long long number)
    494 {
    495     return numberToStringSigned<AtomicString>(number);
    496 }
    497 
    498 AtomicString AtomicString::number(unsigned long long number)
    499 {
    500     return numberToStringUnsigned<AtomicString>(number);
    501 }
    502 
    503 AtomicString AtomicString::number(double number, unsigned precision, TrailingZerosTruncatingPolicy trailingZerosTruncatingPolicy)
    504 {
    505     NumberToStringBuffer buffer;
    506     return AtomicString(numberToFixedPrecisionString(number, precision, buffer, trailingZerosTruncatingPolicy == TruncateTrailingZeros));
    507 }
    508 
    509 #ifndef NDEBUG
    510 void AtomicString::show() const
    511 {
    512     m_string.show();
    513 }
    514 #endif
    515 
    516 } // namespace WTF
    517