Home | History | Annotate | Download | only in text
      1 /*
      2  * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2013 Apple Inc. All rights reserved.
      3  * Copyright (C) 2010 Patrick Gansterer <paroga (at) paroga.com>
      4  * Copyright (C) 2012 Google Inc. All rights reserved.
      5  *
      6  * This library is free software; you can redistribute it and/or
      7  * modify it under the terms of the GNU Library General Public
      8  * License as published by the Free Software Foundation; either
      9  * version 2 of the License, or (at your option) any later version.
     10  *
     11  * This library is distributed in the hope that it will be useful,
     12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     14  * Library General Public License for more details.
     15  *
     16  * You should have received a copy of the GNU Library General Public License
     17  * along with this library; see the file COPYING.LIB.  If not, write to
     18  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     19  * Boston, MA 02110-1301, USA.
     20  *
     21  */
     22 
     23 #include "config.h"
     24 #include "AtomicString.h"
     25 
     26 #include "StringHash.h"
     27 #include "wtf/HashSet.h"
     28 #include "wtf/WTFThreadData.h"
     29 #include "wtf/dtoa.h"
     30 #include "wtf/text/IntegerToStringConversion.h"
     31 #include "wtf/unicode/UTF8.h"
     32 
     33 namespace WTF {
     34 
     35 using namespace Unicode;
     36 
     37 COMPILE_ASSERT(sizeof(AtomicString) == sizeof(String), atomic_string_and_string_must_be_same_size);
     38 
     39 class AtomicStringTable {
     40     WTF_MAKE_NONCOPYABLE(AtomicStringTable);
     41 public:
     42     static AtomicStringTable* create(WTFThreadData& data)
     43     {
     44         data.m_atomicStringTable = new AtomicStringTable;
     45         data.m_atomicStringTableDestructor = AtomicStringTable::destroy;
     46         data.m_atomicStringTable->addStaticStrings();
     47         return data.m_atomicStringTable;
     48     }
     49 
     50     StringImpl* addStringImpl(StringImpl* string)
     51     {
     52         if (!string->length())
     53             return StringImpl::empty();
     54 
     55         StringImpl* result = *m_table.add(string).iterator;
     56 
     57         if (!result->isAtomic())
     58             result->setIsAtomic(true);
     59 
     60         ASSERT(!string->isStatic() || result->isStatic());
     61         return result;
     62     }
     63 
     64     HashSet<StringImpl*>& table()
     65     {
     66         return m_table;
     67     }
     68 
     69 private:
     70     AtomicStringTable() { }
     71 
     72     void addStaticStrings()
     73     {
     74         const StaticStringsTable& staticStrings = StringImpl::allStaticStrings();
     75 
     76         StaticStringsTable::const_iterator it = staticStrings.begin();
     77         for (; it != staticStrings.end(); ++it) {
     78             addStringImpl(it->value);
     79         }
     80     }
     81 
     82     static void destroy(AtomicStringTable* table)
     83     {
     84         HashSet<StringImpl*>::iterator end = table->m_table.end();
     85         for (HashSet<StringImpl*>::iterator iter = table->m_table.begin(); iter != end; ++iter) {
     86             StringImpl* string = *iter;
     87             if (!string->isStatic()) {
     88                 ASSERT(string->isAtomic());
     89                 string->setIsAtomic(false);
     90             }
     91         }
     92         delete table;
     93     }
     94 
     95     HashSet<StringImpl*> m_table;
     96 };
     97 
     98 static inline AtomicStringTable& atomicStringTable()
     99 {
    100     // Once possible we should make this non-lazy (constructed in WTFThreadData's constructor).
    101     WTFThreadData& data = wtfThreadData();
    102     AtomicStringTable* table = data.atomicStringTable();
    103     if (UNLIKELY(!table))
    104         table = AtomicStringTable::create(data);
    105     return *table;
    106 }
    107 
    108 static inline HashSet<StringImpl*>& atomicStrings()
    109 {
    110     return atomicStringTable().table();
    111 }
    112 
    113 template<typename T, typename HashTranslator>
    114 static inline PassRefPtr<StringImpl> addToStringTable(const T& value)
    115 {
    116     HashSet<StringImpl*>::AddResult addResult = atomicStrings().add<HashTranslator>(value);
    117 
    118     // If the string is newly-translated, then we need to adopt it.
    119     // The boolean in the pair tells us if that is so.
    120     return addResult.isNewEntry ? adoptRef(*addResult.iterator) : *addResult.iterator;
    121 }
    122 
    123 struct CStringTranslator {
    124     static unsigned hash(const LChar* c)
    125     {
    126         return StringHasher::computeHashAndMaskTop8Bits(c);
    127     }
    128 
    129     static inline bool equal(StringImpl* r, const LChar* s)
    130     {
    131         return WTF::equal(r, s);
    132     }
    133 
    134     static void translate(StringImpl*& location, const LChar* const& c, unsigned hash)
    135     {
    136         location = StringImpl::create(c).leakRef();
    137         location->setHash(hash);
    138         location->setIsAtomic(true);
    139     }
    140 };
    141 
    142 PassRefPtr<StringImpl> AtomicString::add(const LChar* c)
    143 {
    144     if (!c)
    145         return 0;
    146     if (!*c)
    147         return StringImpl::empty();
    148 
    149     return addToStringTable<const LChar*, CStringTranslator>(c);
    150 }
    151 
    152 template<typename CharacterType>
    153 struct HashTranslatorCharBuffer {
    154     const CharacterType* s;
    155     unsigned length;
    156 };
    157 
    158 typedef HashTranslatorCharBuffer<UChar> UCharBuffer;
    159 struct UCharBufferTranslator {
    160     static unsigned hash(const UCharBuffer& buf)
    161     {
    162         return StringHasher::computeHashAndMaskTop8Bits(buf.s, buf.length);
    163     }
    164 
    165     static bool equal(StringImpl* const& str, const UCharBuffer& buf)
    166     {
    167         return WTF::equal(str, buf.s, buf.length);
    168     }
    169 
    170     static void translate(StringImpl*& location, const UCharBuffer& buf, unsigned hash)
    171     {
    172         location = StringImpl::create8BitIfPossible(buf.s, buf.length).leakRef();
    173         location->setHash(hash);
    174         location->setIsAtomic(true);
    175     }
    176 };
    177 
    178 template<typename CharacterType>
    179 struct HashAndCharacters {
    180     unsigned hash;
    181     const CharacterType* characters;
    182     unsigned length;
    183 };
    184 
    185 template<typename CharacterType>
    186 struct HashAndCharactersTranslator {
    187     static unsigned hash(const HashAndCharacters<CharacterType>& buffer)
    188     {
    189         ASSERT(buffer.hash == StringHasher::computeHashAndMaskTop8Bits(buffer.characters, buffer.length));
    190         return buffer.hash;
    191     }
    192 
    193     static bool equal(StringImpl* const& string, const HashAndCharacters<CharacterType>& buffer)
    194     {
    195         return WTF::equal(string, buffer.characters, buffer.length);
    196     }
    197 
    198     static void translate(StringImpl*& location, const HashAndCharacters<CharacterType>& buffer, unsigned hash)
    199     {
    200         location = StringImpl::create(buffer.characters, buffer.length).leakRef();
    201         location->setHash(hash);
    202         location->setIsAtomic(true);
    203     }
    204 };
    205 
    206 struct HashAndUTF8Characters {
    207     unsigned hash;
    208     const char* characters;
    209     unsigned length;
    210     unsigned utf16Length;
    211 };
    212 
    213 struct HashAndUTF8CharactersTranslator {
    214     static unsigned hash(const HashAndUTF8Characters& buffer)
    215     {
    216         return buffer.hash;
    217     }
    218 
    219     static bool equal(StringImpl* const& string, const HashAndUTF8Characters& buffer)
    220     {
    221         if (buffer.utf16Length != string->length())
    222             return false;
    223 
    224         // If buffer contains only ASCII characters UTF-8 and UTF16 length are the same.
    225         if (buffer.utf16Length != buffer.length) {
    226             if (string->is8Bit()) {
    227                 const LChar* characters8 = string->characters8();
    228                 return equalLatin1WithUTF8(characters8, characters8 + string->length(), buffer.characters, buffer.characters + buffer.length);
    229             }
    230             const UChar* characters16 = string->characters16();
    231             return equalUTF16WithUTF8(characters16, characters16 + string->length(), buffer.characters, buffer.characters + buffer.length);
    232         }
    233 
    234         if (string->is8Bit()) {
    235             const LChar* stringCharacters = string->characters8();
    236 
    237             for (unsigned i = 0; i < buffer.length; ++i) {
    238                 ASSERT(isASCII(buffer.characters[i]));
    239                 if (stringCharacters[i] != buffer.characters[i])
    240                     return false;
    241             }
    242 
    243             return true;
    244         }
    245 
    246         const UChar* stringCharacters = string->characters16();
    247 
    248         for (unsigned i = 0; i < buffer.length; ++i) {
    249             ASSERT(isASCII(buffer.characters[i]));
    250             if (stringCharacters[i] != buffer.characters[i])
    251                 return false;
    252         }
    253 
    254         return true;
    255     }
    256 
    257     static void translate(StringImpl*& location, const HashAndUTF8Characters& buffer, unsigned hash)
    258     {
    259         UChar* target;
    260         RefPtr<StringImpl> newString = StringImpl::createUninitialized(buffer.utf16Length, target);
    261 
    262         bool isAllASCII;
    263         const char* source = buffer.characters;
    264         if (convertUTF8ToUTF16(&source, source + buffer.length, &target, target + buffer.utf16Length, &isAllASCII) != conversionOK)
    265             ASSERT_NOT_REACHED();
    266 
    267         if (isAllASCII)
    268             newString = StringImpl::create(buffer.characters, buffer.length);
    269 
    270         location = newString.release().leakRef();
    271         location->setHash(hash);
    272         location->setIsAtomic(true);
    273     }
    274 };
    275 
    276 PassRefPtr<StringImpl> AtomicString::add(const UChar* s, unsigned length)
    277 {
    278     if (!s)
    279         return 0;
    280 
    281     if (!length)
    282         return StringImpl::empty();
    283 
    284     UCharBuffer buffer = { s, length };
    285     return addToStringTable<UCharBuffer, UCharBufferTranslator>(buffer);
    286 }
    287 
    288 PassRefPtr<StringImpl> AtomicString::add(const UChar* s, unsigned length, unsigned existingHash)
    289 {
    290     ASSERT(s);
    291     ASSERT(existingHash);
    292 
    293     if (!length)
    294         return StringImpl::empty();
    295 
    296     HashAndCharacters<UChar> buffer = { existingHash, s, length };
    297     return addToStringTable<HashAndCharacters<UChar>, HashAndCharactersTranslator<UChar> >(buffer);
    298 }
    299 
    300 PassRefPtr<StringImpl> AtomicString::add(const UChar* s)
    301 {
    302     if (!s)
    303         return 0;
    304 
    305     unsigned length = 0;
    306     while (s[length] != UChar(0))
    307         ++length;
    308 
    309     if (!length)
    310         return StringImpl::empty();
    311 
    312     UCharBuffer buffer = { s, length };
    313     return addToStringTable<UCharBuffer, UCharBufferTranslator>(buffer);
    314 }
    315 
    316 struct SubstringLocation {
    317     StringImpl* baseString;
    318     unsigned start;
    319     unsigned length;
    320 };
    321 
    322 struct SubstringTranslator {
    323     static unsigned hash(const SubstringLocation& buffer)
    324     {
    325         if (buffer.baseString->is8Bit())
    326             return StringHasher::computeHashAndMaskTop8Bits(buffer.baseString->characters8() + buffer.start, buffer.length);
    327         return StringHasher::computeHashAndMaskTop8Bits(buffer.baseString->characters16() + buffer.start, buffer.length);
    328     }
    329 
    330     static bool equal(StringImpl* const& string, const SubstringLocation& buffer)
    331     {
    332         if (buffer.baseString->is8Bit())
    333             return WTF::equal(string, buffer.baseString->characters8() + buffer.start, buffer.length);
    334         return WTF::equal(string, buffer.baseString->characters16() + buffer.start, buffer.length);
    335     }
    336 
    337     static void translate(StringImpl*& location, const SubstringLocation& buffer, unsigned hash)
    338     {
    339         location = buffer.baseString->substring(buffer.start, buffer.length).leakRef();
    340         location->setHash(hash);
    341         location->setIsAtomic(true);
    342     }
    343 };
    344 
    345 PassRefPtr<StringImpl> AtomicString::add(StringImpl* baseString, unsigned start, unsigned length)
    346 {
    347     if (!baseString)
    348         return 0;
    349 
    350     if (!length || start >= baseString->length())
    351         return StringImpl::empty();
    352 
    353     unsigned maxLength = baseString->length() - start;
    354     if (length >= maxLength) {
    355         if (!start)
    356             return add(baseString);
    357         length = maxLength;
    358     }
    359 
    360     SubstringLocation buffer = { baseString, start, length };
    361     return addToStringTable<SubstringLocation, SubstringTranslator>(buffer);
    362 }
    363 
    364 typedef HashTranslatorCharBuffer<LChar> LCharBuffer;
    365 struct LCharBufferTranslator {
    366     static unsigned hash(const LCharBuffer& buf)
    367     {
    368         return StringHasher::computeHashAndMaskTop8Bits(buf.s, buf.length);
    369     }
    370 
    371     static bool equal(StringImpl* const& str, const LCharBuffer& buf)
    372     {
    373         return WTF::equal(str, buf.s, buf.length);
    374     }
    375 
    376     static void translate(StringImpl*& location, const LCharBuffer& buf, unsigned hash)
    377     {
    378         location = StringImpl::create(buf.s, buf.length).leakRef();
    379         location->setHash(hash);
    380         location->setIsAtomic(true);
    381     }
    382 };
    383 
    384 typedef HashTranslatorCharBuffer<char> CharBuffer;
    385 struct CharBufferFromLiteralDataTranslator {
    386     static unsigned hash(const CharBuffer& buf)
    387     {
    388         return StringHasher::computeHashAndMaskTop8Bits(reinterpret_cast<const LChar*>(buf.s), buf.length);
    389     }
    390 
    391     static bool equal(StringImpl* const& str, const CharBuffer& buf)
    392     {
    393         return WTF::equal(str, buf.s, buf.length);
    394     }
    395 
    396     static void translate(StringImpl*& location, const CharBuffer& buf, unsigned hash)
    397     {
    398         location = StringImpl::create(buf.s, buf.length).leakRef();
    399         location->setHash(hash);
    400         location->setIsAtomic(true);
    401     }
    402 };
    403 
    404 PassRefPtr<StringImpl> AtomicString::add(const LChar* s, unsigned length)
    405 {
    406     if (!s)
    407         return 0;
    408 
    409     if (!length)
    410         return StringImpl::empty();
    411 
    412     LCharBuffer buffer = { s, length };
    413     return addToStringTable<LCharBuffer, LCharBufferTranslator>(buffer);
    414 }
    415 
    416 PassRefPtr<StringImpl> AtomicString::addFromLiteralData(const char* characters, unsigned length)
    417 {
    418     ASSERT(characters);
    419     ASSERT(length);
    420 
    421     CharBuffer buffer = { characters, length };
    422     return addToStringTable<CharBuffer, CharBufferFromLiteralDataTranslator>(buffer);
    423 }
    424 
    425 PassRefPtr<StringImpl> AtomicString::addSlowCase(StringImpl* string)
    426 {
    427     return atomicStringTable().addStringImpl(string);
    428 }
    429 
    430 template<typename CharacterType>
    431 static inline HashSet<StringImpl*>::iterator findString(const StringImpl* stringImpl)
    432 {
    433     HashAndCharacters<CharacterType> buffer = { stringImpl->existingHash(), stringImpl->getCharacters<CharacterType>(), stringImpl->length() };
    434     return atomicStrings().find<HashAndCharactersTranslator<CharacterType> >(buffer);
    435 }
    436 
    437 StringImpl* AtomicString::find(const StringImpl* stringImpl)
    438 {
    439     ASSERT(stringImpl);
    440     ASSERT(stringImpl->existingHash());
    441 
    442     if (!stringImpl->length())
    443         return StringImpl::empty();
    444 
    445     HashSet<StringImpl*>::iterator iterator;
    446     if (stringImpl->is8Bit())
    447         iterator = findString<LChar>(stringImpl);
    448     else
    449         iterator = findString<UChar>(stringImpl);
    450     if (iterator == atomicStrings().end())
    451         return 0;
    452     return *iterator;
    453 }
    454 
    455 void AtomicString::remove(StringImpl* r)
    456 {
    457     HashSet<StringImpl*>::iterator iterator;
    458     if (r->is8Bit())
    459         iterator = findString<LChar>(r);
    460     else
    461         iterator = findString<UChar>(r);
    462     RELEASE_ASSERT(iterator != atomicStrings().end());
    463     atomicStrings().remove(iterator);
    464 }
    465 
    466 AtomicString AtomicString::lower() const
    467 {
    468     // Note: This is a hot function in the Dromaeo benchmark.
    469     StringImpl* impl = this->impl();
    470     if (UNLIKELY(!impl))
    471         return *this;
    472     RefPtr<StringImpl> newImpl = impl->lower();
    473     if (LIKELY(newImpl == impl))
    474         return *this;
    475     return AtomicString(newImpl.release());
    476 }
    477 
    478 AtomicString AtomicString::fromUTF8Internal(const char* charactersStart, const char* charactersEnd)
    479 {
    480     HashAndUTF8Characters buffer;
    481     buffer.characters = charactersStart;
    482     buffer.hash = calculateStringHashAndLengthFromUTF8MaskingTop8Bits(charactersStart, charactersEnd, buffer.length, buffer.utf16Length);
    483 
    484     if (!buffer.hash)
    485         return nullAtom;
    486 
    487     AtomicString atomicString;
    488     atomicString.m_string = addToStringTable<HashAndUTF8Characters, HashAndUTF8CharactersTranslator>(buffer);
    489     return atomicString;
    490 }
    491 
    492 AtomicString AtomicString::number(int number)
    493 {
    494     return numberToStringSigned<AtomicString>(number);
    495 }
    496 
    497 AtomicString AtomicString::number(unsigned number)
    498 {
    499     return numberToStringUnsigned<AtomicString>(number);
    500 }
    501 
    502 AtomicString AtomicString::number(long number)
    503 {
    504     return numberToStringSigned<AtomicString>(number);
    505 }
    506 
    507 AtomicString AtomicString::number(unsigned long number)
    508 {
    509     return numberToStringUnsigned<AtomicString>(number);
    510 }
    511 
    512 AtomicString AtomicString::number(long long number)
    513 {
    514     return numberToStringSigned<AtomicString>(number);
    515 }
    516 
    517 AtomicString AtomicString::number(unsigned long long number)
    518 {
    519     return numberToStringUnsigned<AtomicString>(number);
    520 }
    521 
    522 AtomicString AtomicString::number(double number, unsigned precision, TrailingZerosTruncatingPolicy trailingZerosTruncatingPolicy)
    523 {
    524     NumberToStringBuffer buffer;
    525     return AtomicString(numberToFixedPrecisionString(number, precision, buffer, trailingZerosTruncatingPolicy == TruncateTrailingZeros));
    526 }
    527 
    528 #ifndef NDEBUG
    529 void AtomicString::show() const
    530 {
    531     m_string.show();
    532 }
    533 #endif
    534 
    535 } // namespace WTF
    536