1 /* 2 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2013 Apple Inc. All rights reserved. 3 * Copyright (C) 2010 Patrick Gansterer <paroga (at) paroga.com> 4 * Copyright (C) 2012 Google Inc. All rights reserved. 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Library General Public 8 * License as published by the Free Software Foundation; either 9 * version 2 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Library General Public License for more details. 15 * 16 * You should have received a copy of the GNU Library General Public License 17 * along with this library; see the file COPYING.LIB. If not, write to 18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 19 * Boston, MA 02110-1301, USA. 20 * 21 */ 22 23 #include "config.h" 24 #include "AtomicString.h" 25 26 #include "StringHash.h" 27 #include "wtf/HashSet.h" 28 #include "wtf/WTFThreadData.h" 29 #include "wtf/dtoa.h" 30 #include "wtf/text/IntegerToStringConversion.h" 31 #include "wtf/unicode/UTF8.h" 32 33 namespace WTF { 34 35 using namespace Unicode; 36 37 COMPILE_ASSERT(sizeof(AtomicString) == sizeof(String), atomic_string_and_string_must_be_same_size); 38 39 class AtomicStringTable { 40 WTF_MAKE_NONCOPYABLE(AtomicStringTable); 41 public: 42 static AtomicStringTable* create(WTFThreadData& data) 43 { 44 data.m_atomicStringTable = new AtomicStringTable; 45 data.m_atomicStringTableDestructor = AtomicStringTable::destroy; 46 data.m_atomicStringTable->addStaticStrings(); 47 return data.m_atomicStringTable; 48 } 49 50 StringImpl* addStringImpl(StringImpl* string) 51 { 52 if (!string->length()) 53 return StringImpl::empty(); 54 55 StringImpl* result = *m_table.add(string).iterator; 56 57 if (!result->isAtomic()) 58 result->setIsAtomic(true); 59 60 ASSERT(!string->isStatic() || result->isStatic()); 61 return result; 62 } 63 64 HashSet<StringImpl*>& table() 65 { 66 return m_table; 67 } 68 69 private: 70 AtomicStringTable() { } 71 72 void addStaticStrings() 73 { 74 const StaticStringsTable& staticStrings = StringImpl::allStaticStrings(); 75 76 StaticStringsTable::const_iterator it = staticStrings.begin(); 77 for (; it != staticStrings.end(); ++it) { 78 addStringImpl(it->value); 79 } 80 } 81 82 static void destroy(AtomicStringTable* table) 83 { 84 HashSet<StringImpl*>::iterator end = table->m_table.end(); 85 for (HashSet<StringImpl*>::iterator iter = table->m_table.begin(); iter != end; ++iter) { 86 StringImpl* string = *iter; 87 if (!string->isStatic()) { 88 ASSERT(string->isAtomic()); 89 string->setIsAtomic(false); 90 } 91 } 92 delete table; 93 } 94 95 HashSet<StringImpl*> m_table; 96 }; 97 98 static inline AtomicStringTable& atomicStringTable() 99 { 100 // Once possible we should make this non-lazy (constructed in WTFThreadData's constructor). 101 WTFThreadData& data = wtfThreadData(); 102 AtomicStringTable* table = data.atomicStringTable(); 103 if (UNLIKELY(!table)) 104 table = AtomicStringTable::create(data); 105 return *table; 106 } 107 108 static inline HashSet<StringImpl*>& atomicStrings() 109 { 110 return atomicStringTable().table(); 111 } 112 113 template<typename T, typename HashTranslator> 114 static inline PassRefPtr<StringImpl> addToStringTable(const T& value) 115 { 116 HashSet<StringImpl*>::AddResult addResult = atomicStrings().add<HashTranslator>(value); 117 118 // If the string is newly-translated, then we need to adopt it. 119 // The boolean in the pair tells us if that is so. 120 return addResult.isNewEntry ? adoptRef(*addResult.iterator) : *addResult.iterator; 121 } 122 123 struct CStringTranslator { 124 static unsigned hash(const LChar* c) 125 { 126 return StringHasher::computeHashAndMaskTop8Bits(c); 127 } 128 129 static inline bool equal(StringImpl* r, const LChar* s) 130 { 131 return WTF::equal(r, s); 132 } 133 134 static void translate(StringImpl*& location, const LChar* const& c, unsigned hash) 135 { 136 location = StringImpl::create(c).leakRef(); 137 location->setHash(hash); 138 location->setIsAtomic(true); 139 } 140 }; 141 142 PassRefPtr<StringImpl> AtomicString::add(const LChar* c) 143 { 144 if (!c) 145 return 0; 146 if (!*c) 147 return StringImpl::empty(); 148 149 return addToStringTable<const LChar*, CStringTranslator>(c); 150 } 151 152 template<typename CharacterType> 153 struct HashTranslatorCharBuffer { 154 const CharacterType* s; 155 unsigned length; 156 }; 157 158 typedef HashTranslatorCharBuffer<UChar> UCharBuffer; 159 struct UCharBufferTranslator { 160 static unsigned hash(const UCharBuffer& buf) 161 { 162 return StringHasher::computeHashAndMaskTop8Bits(buf.s, buf.length); 163 } 164 165 static bool equal(StringImpl* const& str, const UCharBuffer& buf) 166 { 167 return WTF::equal(str, buf.s, buf.length); 168 } 169 170 static void translate(StringImpl*& location, const UCharBuffer& buf, unsigned hash) 171 { 172 location = StringImpl::create8BitIfPossible(buf.s, buf.length).leakRef(); 173 location->setHash(hash); 174 location->setIsAtomic(true); 175 } 176 }; 177 178 template<typename CharacterType> 179 struct HashAndCharacters { 180 unsigned hash; 181 const CharacterType* characters; 182 unsigned length; 183 }; 184 185 template<typename CharacterType> 186 struct HashAndCharactersTranslator { 187 static unsigned hash(const HashAndCharacters<CharacterType>& buffer) 188 { 189 ASSERT(buffer.hash == StringHasher::computeHashAndMaskTop8Bits(buffer.characters, buffer.length)); 190 return buffer.hash; 191 } 192 193 static bool equal(StringImpl* const& string, const HashAndCharacters<CharacterType>& buffer) 194 { 195 return WTF::equal(string, buffer.characters, buffer.length); 196 } 197 198 static void translate(StringImpl*& location, const HashAndCharacters<CharacterType>& buffer, unsigned hash) 199 { 200 location = StringImpl::create(buffer.characters, buffer.length).leakRef(); 201 location->setHash(hash); 202 location->setIsAtomic(true); 203 } 204 }; 205 206 struct HashAndUTF8Characters { 207 unsigned hash; 208 const char* characters; 209 unsigned length; 210 unsigned utf16Length; 211 }; 212 213 struct HashAndUTF8CharactersTranslator { 214 static unsigned hash(const HashAndUTF8Characters& buffer) 215 { 216 return buffer.hash; 217 } 218 219 static bool equal(StringImpl* const& string, const HashAndUTF8Characters& buffer) 220 { 221 if (buffer.utf16Length != string->length()) 222 return false; 223 224 // If buffer contains only ASCII characters UTF-8 and UTF16 length are the same. 225 if (buffer.utf16Length != buffer.length) { 226 if (string->is8Bit()) { 227 const LChar* characters8 = string->characters8(); 228 return equalLatin1WithUTF8(characters8, characters8 + string->length(), buffer.characters, buffer.characters + buffer.length); 229 } 230 const UChar* characters16 = string->characters16(); 231 return equalUTF16WithUTF8(characters16, characters16 + string->length(), buffer.characters, buffer.characters + buffer.length); 232 } 233 234 if (string->is8Bit()) { 235 const LChar* stringCharacters = string->characters8(); 236 237 for (unsigned i = 0; i < buffer.length; ++i) { 238 ASSERT(isASCII(buffer.characters[i])); 239 if (stringCharacters[i] != buffer.characters[i]) 240 return false; 241 } 242 243 return true; 244 } 245 246 const UChar* stringCharacters = string->characters16(); 247 248 for (unsigned i = 0; i < buffer.length; ++i) { 249 ASSERT(isASCII(buffer.characters[i])); 250 if (stringCharacters[i] != buffer.characters[i]) 251 return false; 252 } 253 254 return true; 255 } 256 257 static void translate(StringImpl*& location, const HashAndUTF8Characters& buffer, unsigned hash) 258 { 259 UChar* target; 260 RefPtr<StringImpl> newString = StringImpl::createUninitialized(buffer.utf16Length, target); 261 262 bool isAllASCII; 263 const char* source = buffer.characters; 264 if (convertUTF8ToUTF16(&source, source + buffer.length, &target, target + buffer.utf16Length, &isAllASCII) != conversionOK) 265 ASSERT_NOT_REACHED(); 266 267 if (isAllASCII) 268 newString = StringImpl::create(buffer.characters, buffer.length); 269 270 location = newString.release().leakRef(); 271 location->setHash(hash); 272 location->setIsAtomic(true); 273 } 274 }; 275 276 PassRefPtr<StringImpl> AtomicString::add(const UChar* s, unsigned length) 277 { 278 if (!s) 279 return 0; 280 281 if (!length) 282 return StringImpl::empty(); 283 284 UCharBuffer buffer = { s, length }; 285 return addToStringTable<UCharBuffer, UCharBufferTranslator>(buffer); 286 } 287 288 PassRefPtr<StringImpl> AtomicString::add(const UChar* s, unsigned length, unsigned existingHash) 289 { 290 ASSERT(s); 291 ASSERT(existingHash); 292 293 if (!length) 294 return StringImpl::empty(); 295 296 HashAndCharacters<UChar> buffer = { existingHash, s, length }; 297 return addToStringTable<HashAndCharacters<UChar>, HashAndCharactersTranslator<UChar> >(buffer); 298 } 299 300 PassRefPtr<StringImpl> AtomicString::add(const UChar* s) 301 { 302 if (!s) 303 return 0; 304 305 unsigned length = 0; 306 while (s[length] != UChar(0)) 307 ++length; 308 309 if (!length) 310 return StringImpl::empty(); 311 312 UCharBuffer buffer = { s, length }; 313 return addToStringTable<UCharBuffer, UCharBufferTranslator>(buffer); 314 } 315 316 struct SubstringLocation { 317 StringImpl* baseString; 318 unsigned start; 319 unsigned length; 320 }; 321 322 struct SubstringTranslator { 323 static unsigned hash(const SubstringLocation& buffer) 324 { 325 if (buffer.baseString->is8Bit()) 326 return StringHasher::computeHashAndMaskTop8Bits(buffer.baseString->characters8() + buffer.start, buffer.length); 327 return StringHasher::computeHashAndMaskTop8Bits(buffer.baseString->characters16() + buffer.start, buffer.length); 328 } 329 330 static bool equal(StringImpl* const& string, const SubstringLocation& buffer) 331 { 332 if (buffer.baseString->is8Bit()) 333 return WTF::equal(string, buffer.baseString->characters8() + buffer.start, buffer.length); 334 return WTF::equal(string, buffer.baseString->characters16() + buffer.start, buffer.length); 335 } 336 337 static void translate(StringImpl*& location, const SubstringLocation& buffer, unsigned hash) 338 { 339 location = buffer.baseString->substring(buffer.start, buffer.length).leakRef(); 340 location->setHash(hash); 341 location->setIsAtomic(true); 342 } 343 }; 344 345 PassRefPtr<StringImpl> AtomicString::add(StringImpl* baseString, unsigned start, unsigned length) 346 { 347 if (!baseString) 348 return 0; 349 350 if (!length || start >= baseString->length()) 351 return StringImpl::empty(); 352 353 unsigned maxLength = baseString->length() - start; 354 if (length >= maxLength) { 355 if (!start) 356 return add(baseString); 357 length = maxLength; 358 } 359 360 SubstringLocation buffer = { baseString, start, length }; 361 return addToStringTable<SubstringLocation, SubstringTranslator>(buffer); 362 } 363 364 typedef HashTranslatorCharBuffer<LChar> LCharBuffer; 365 struct LCharBufferTranslator { 366 static unsigned hash(const LCharBuffer& buf) 367 { 368 return StringHasher::computeHashAndMaskTop8Bits(buf.s, buf.length); 369 } 370 371 static bool equal(StringImpl* const& str, const LCharBuffer& buf) 372 { 373 return WTF::equal(str, buf.s, buf.length); 374 } 375 376 static void translate(StringImpl*& location, const LCharBuffer& buf, unsigned hash) 377 { 378 location = StringImpl::create(buf.s, buf.length).leakRef(); 379 location->setHash(hash); 380 location->setIsAtomic(true); 381 } 382 }; 383 384 typedef HashTranslatorCharBuffer<char> CharBuffer; 385 struct CharBufferFromLiteralDataTranslator { 386 static unsigned hash(const CharBuffer& buf) 387 { 388 return StringHasher::computeHashAndMaskTop8Bits(reinterpret_cast<const LChar*>(buf.s), buf.length); 389 } 390 391 static bool equal(StringImpl* const& str, const CharBuffer& buf) 392 { 393 return WTF::equal(str, buf.s, buf.length); 394 } 395 396 static void translate(StringImpl*& location, const CharBuffer& buf, unsigned hash) 397 { 398 location = StringImpl::create(buf.s, buf.length).leakRef(); 399 location->setHash(hash); 400 location->setIsAtomic(true); 401 } 402 }; 403 404 PassRefPtr<StringImpl> AtomicString::add(const LChar* s, unsigned length) 405 { 406 if (!s) 407 return 0; 408 409 if (!length) 410 return StringImpl::empty(); 411 412 LCharBuffer buffer = { s, length }; 413 return addToStringTable<LCharBuffer, LCharBufferTranslator>(buffer); 414 } 415 416 PassRefPtr<StringImpl> AtomicString::addFromLiteralData(const char* characters, unsigned length) 417 { 418 ASSERT(characters); 419 ASSERT(length); 420 421 CharBuffer buffer = { characters, length }; 422 return addToStringTable<CharBuffer, CharBufferFromLiteralDataTranslator>(buffer); 423 } 424 425 PassRefPtr<StringImpl> AtomicString::addSlowCase(StringImpl* string) 426 { 427 return atomicStringTable().addStringImpl(string); 428 } 429 430 template<typename CharacterType> 431 static inline HashSet<StringImpl*>::iterator findString(const StringImpl* stringImpl) 432 { 433 HashAndCharacters<CharacterType> buffer = { stringImpl->existingHash(), stringImpl->getCharacters<CharacterType>(), stringImpl->length() }; 434 return atomicStrings().find<HashAndCharactersTranslator<CharacterType> >(buffer); 435 } 436 437 StringImpl* AtomicString::find(const StringImpl* stringImpl) 438 { 439 ASSERT(stringImpl); 440 ASSERT(stringImpl->existingHash()); 441 442 if (!stringImpl->length()) 443 return StringImpl::empty(); 444 445 HashSet<StringImpl*>::iterator iterator; 446 if (stringImpl->is8Bit()) 447 iterator = findString<LChar>(stringImpl); 448 else 449 iterator = findString<UChar>(stringImpl); 450 if (iterator == atomicStrings().end()) 451 return 0; 452 return *iterator; 453 } 454 455 void AtomicString::remove(StringImpl* r) 456 { 457 HashSet<StringImpl*>::iterator iterator; 458 if (r->is8Bit()) 459 iterator = findString<LChar>(r); 460 else 461 iterator = findString<UChar>(r); 462 RELEASE_ASSERT(iterator != atomicStrings().end()); 463 atomicStrings().remove(iterator); 464 } 465 466 AtomicString AtomicString::lower() const 467 { 468 // Note: This is a hot function in the Dromaeo benchmark. 469 StringImpl* impl = this->impl(); 470 if (UNLIKELY(!impl)) 471 return *this; 472 RefPtr<StringImpl> newImpl = impl->lower(); 473 if (LIKELY(newImpl == impl)) 474 return *this; 475 return AtomicString(newImpl.release()); 476 } 477 478 AtomicString AtomicString::fromUTF8Internal(const char* charactersStart, const char* charactersEnd) 479 { 480 HashAndUTF8Characters buffer; 481 buffer.characters = charactersStart; 482 buffer.hash = calculateStringHashAndLengthFromUTF8MaskingTop8Bits(charactersStart, charactersEnd, buffer.length, buffer.utf16Length); 483 484 if (!buffer.hash) 485 return nullAtom; 486 487 AtomicString atomicString; 488 atomicString.m_string = addToStringTable<HashAndUTF8Characters, HashAndUTF8CharactersTranslator>(buffer); 489 return atomicString; 490 } 491 492 AtomicString AtomicString::number(int number) 493 { 494 return numberToStringSigned<AtomicString>(number); 495 } 496 497 AtomicString AtomicString::number(unsigned number) 498 { 499 return numberToStringUnsigned<AtomicString>(number); 500 } 501 502 AtomicString AtomicString::number(long number) 503 { 504 return numberToStringSigned<AtomicString>(number); 505 } 506 507 AtomicString AtomicString::number(unsigned long number) 508 { 509 return numberToStringUnsigned<AtomicString>(number); 510 } 511 512 AtomicString AtomicString::number(long long number) 513 { 514 return numberToStringSigned<AtomicString>(number); 515 } 516 517 AtomicString AtomicString::number(unsigned long long number) 518 { 519 return numberToStringUnsigned<AtomicString>(number); 520 } 521 522 AtomicString AtomicString::number(double number, unsigned precision, TrailingZerosTruncatingPolicy trailingZerosTruncatingPolicy) 523 { 524 NumberToStringBuffer buffer; 525 return AtomicString(numberToFixedPrecisionString(number, precision, buffer, trailingZerosTruncatingPolicy == TruncateTrailingZeros)); 526 } 527 528 #ifndef NDEBUG 529 void AtomicString::show() const 530 { 531 m_string.show(); 532 } 533 #endif 534 535 } // namespace WTF 536