Home | History | Annotate | Download | only in parser
      1 /*
      2  * Copyright (C) 2013 Google, Inc. All Rights Reserved.
      3  *
      4  * Redistribution and use in source and binary forms, with or without
      5  * modification, are permitted provided that the following conditions
      6  * are met:
      7  * 1. Redistributions of source code must retain the above copyright
      8  *    notice, this list of conditions and the following disclaimer.
      9  * 2. Redistributions in binary form must reproduce the above copyright
     10  *    notice, this list of conditions and the following disclaimer in the
     11  *    documentation and/or other materials provided with the distribution.
     12  *
     13  * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
     14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL GOOGLE INC. OR
     17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
     21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     24  */
     25 
     26 #include "config.h"
     27 #include "core/html/parser/HTMLIdentifier.h"
     28 
     29 #include "HTMLNames.h"
     30 #include "wtf/HashMap.h"
     31 #include "wtf/MainThread.h"
     32 #include "wtf/text/StringHash.h"
     33 
     34 namespace WebCore {
     35 
     36 using namespace HTMLNames;
     37 
     38 typedef HashMap<unsigned, StringImpl*, AlreadyHashed> IdentifierTable;
     39 
     40 unsigned HTMLIdentifier::maxNameLength = 0;
     41 
     42 static IdentifierTable& identifierTable()
     43 {
     44     DEFINE_STATIC_LOCAL(IdentifierTable, table, ());
     45     ASSERT(isMainThread() || !table.isEmpty());
     46     return table;
     47 }
     48 
     49 #ifndef NDEBUG
     50 bool HTMLIdentifier::isKnown(const StringImpl* string)
     51 {
     52     const IdentifierTable& table = identifierTable();
     53     return table.contains(string->hash());
     54 }
     55 #endif
     56 
     57 StringImpl* HTMLIdentifier::findIfKnown(const UChar* characters, unsigned length)
     58 {
     59     // We don't need to try hashing if we know the string is too long.
     60     if (length > maxNameLength)
     61         return 0;
     62     // computeHashAndMaskTop8Bits is the function StringImpl::hash() uses.
     63     unsigned hash = StringHasher::computeHashAndMaskTop8Bits(characters, length);
     64     const IdentifierTable& table = identifierTable();
     65     ASSERT(!table.isEmpty());
     66 
     67     IdentifierTable::const_iterator it = table.find(hash);
     68     if (it == table.end())
     69         return 0;
     70     // It's possible to have hash collisions between arbitrary strings and
     71     // known identifiers (e.g. "bvvfg" collides with "script").
     72     // However ASSERTs in addNames() guard against there ever being collisions
     73     // between known identifiers.
     74     if (!equal(it->value, characters, length))
     75         return 0;
     76     return it->value;
     77 }
     78 
     79 const unsigned kHTMLNamesIndexOffset = 0;
     80 const unsigned kHTMLAttrsIndexOffset = 1000;
     81 COMPILE_ASSERT(kHTMLAttrsIndexOffset > HTMLTagsCount, kHTMLAttrsIndexOffset_should_be_larger_than_HTMLTagsCount);
     82 
     83 const String& HTMLIdentifier::asString() const
     84 {
     85     ASSERT(isMainThread());
     86     return m_string;
     87 }
     88 
     89 const StringImpl* HTMLIdentifier::asStringImpl() const
     90 {
     91     return m_string.impl();
     92 }
     93 
     94 void HTMLIdentifier::addNames(QualifiedName** names, unsigned namesCount, unsigned indexOffset)
     95 {
     96     IdentifierTable& table = identifierTable();
     97     for (unsigned i = 0; i < namesCount; ++i) {
     98         StringImpl* name = names[i]->localName().impl();
     99         unsigned hash = name->hash();
    100         IdentifierTable::AddResult addResult = table.add(hash, name);
    101         maxNameLength = std::max(maxNameLength, name->length());
    102         // Ensure we're using the same hashing algorithm to get and set.
    103         ASSERT_UNUSED(addResult, !addResult.isNewEntry || HTMLIdentifier::findIfKnown(String(name).charactersWithNullTermination().data(), name->length()) == name);
    104         // We expect some hash collisions, but only for identical strings.
    105         // Since all of these names are AtomicStrings pointers should be equal.
    106         // Note: If you hit this ASSERT, then we had a hash collision among
    107         // HTMLNames strings, and we need to re-design how we use this hash!
    108         ASSERT_UNUSED(addResult, !addResult.isNewEntry || name == addResult.iterator->value);
    109     }
    110 }
    111 
    112 void HTMLIdentifier::init()
    113 {
    114     ASSERT(isMainThread()); // Not technically necessary, but this is our current expected usage.
    115     static bool isInitialized = false;
    116     if (isInitialized)
    117         return;
    118     isInitialized = true;
    119 
    120     // FIXME: We should atomize small whitespace (\n, \n\n, etc.)
    121     addNames(getHTMLTags(), HTMLTagsCount, kHTMLNamesIndexOffset);
    122     addNames(getHTMLAttrs(), HTMLAttrsCount, kHTMLAttrsIndexOffset);
    123 }
    124 
    125 }
    126