1 /* 2 * Copyright (C) 2013 Google, Inc. All Rights Reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY 14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. OR 17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26 #include "config.h" 27 #include "core/html/parser/HTMLIdentifier.h" 28 29 #include "HTMLNames.h" 30 #include "wtf/HashMap.h" 31 #include "wtf/MainThread.h" 32 #include "wtf/text/StringHash.h" 33 34 namespace WebCore { 35 36 using namespace HTMLNames; 37 38 typedef HashMap<unsigned, StringImpl*, AlreadyHashed> IdentifierTable; 39 40 unsigned HTMLIdentifier::maxNameLength = 0; 41 42 static IdentifierTable& identifierTable() 43 { 44 DEFINE_STATIC_LOCAL(IdentifierTable, table, ()); 45 ASSERT(isMainThread() || !table.isEmpty()); 46 return table; 47 } 48 49 #ifndef NDEBUG 50 bool HTMLIdentifier::isKnown(const StringImpl* string) 51 { 52 const IdentifierTable& table = identifierTable(); 53 return table.contains(string->hash()); 54 } 55 #endif 56 57 StringImpl* HTMLIdentifier::findIfKnown(const UChar* characters, unsigned length) 58 { 59 // We don't need to try hashing if we know the string is too long. 60 if (length > maxNameLength) 61 return 0; 62 // computeHashAndMaskTop8Bits is the function StringImpl::hash() uses. 63 unsigned hash = StringHasher::computeHashAndMaskTop8Bits(characters, length); 64 const IdentifierTable& table = identifierTable(); 65 ASSERT(!table.isEmpty()); 66 67 IdentifierTable::const_iterator it = table.find(hash); 68 if (it == table.end()) 69 return 0; 70 // It's possible to have hash collisions between arbitrary strings and 71 // known identifiers (e.g. "bvvfg" collides with "script"). 72 // However ASSERTs in addNames() guard against there ever being collisions 73 // between known identifiers. 74 if (!equal(it->value, characters, length)) 75 return 0; 76 return it->value; 77 } 78 79 const unsigned kHTMLNamesIndexOffset = 0; 80 const unsigned kHTMLAttrsIndexOffset = 1000; 81 COMPILE_ASSERT(kHTMLAttrsIndexOffset > HTMLTagsCount, kHTMLAttrsIndexOffset_should_be_larger_than_HTMLTagsCount); 82 83 const String& HTMLIdentifier::asString() const 84 { 85 ASSERT(isMainThread()); 86 return m_string; 87 } 88 89 const StringImpl* HTMLIdentifier::asStringImpl() const 90 { 91 return m_string.impl(); 92 } 93 94 void HTMLIdentifier::addNames(QualifiedName** names, unsigned namesCount, unsigned indexOffset) 95 { 96 IdentifierTable& table = identifierTable(); 97 for (unsigned i = 0; i < namesCount; ++i) { 98 StringImpl* name = names[i]->localName().impl(); 99 unsigned hash = name->hash(); 100 IdentifierTable::AddResult addResult = table.add(hash, name); 101 maxNameLength = std::max(maxNameLength, name->length()); 102 // Ensure we're using the same hashing algorithm to get and set. 103 ASSERT_UNUSED(addResult, !addResult.isNewEntry || HTMLIdentifier::findIfKnown(String(name).charactersWithNullTermination().data(), name->length()) == name); 104 // We expect some hash collisions, but only for identical strings. 105 // Since all of these names are AtomicStrings pointers should be equal. 106 // Note: If you hit this ASSERT, then we had a hash collision among 107 // HTMLNames strings, and we need to re-design how we use this hash! 108 ASSERT_UNUSED(addResult, !addResult.isNewEntry || name == addResult.iterator->value); 109 } 110 } 111 112 void HTMLIdentifier::init() 113 { 114 ASSERT(isMainThread()); // Not technically necessary, but this is our current expected usage. 115 static bool isInitialized = false; 116 if (isInitialized) 117 return; 118 isInitialized = true; 119 120 // FIXME: We should atomize small whitespace (\n, \n\n, etc.) 121 addNames(getHTMLTags(), HTMLTagsCount, kHTMLNamesIndexOffset); 122 addNames(getHTMLAttrs(), HTMLAttrsCount, kHTMLAttrsIndexOffset); 123 } 124 125 } 126