Home | History | Annotate | Download | only in parser
      1 /*
      2  * Copyright (C) 2013 Google, Inc. All Rights Reserved.
      3  *
      4  * Redistribution and use in source and binary forms, with or without
      5  * modification, are permitted provided that the following conditions
      6  * are met:
      7  * 1. Redistributions of source code must retain the above copyright
      8  *    notice, this list of conditions and the following disclaimer.
      9  * 2. Redistributions in binary form must reproduce the above copyright
     10  *    notice, this list of conditions and the following disclaimer in the
     11  *    documentation and/or other materials provided with the distribution.
     12  *
     13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
     14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
     17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
     21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     24  */
     25 
     26 #ifndef AtomicHTMLToken_h
     27 #define AtomicHTMLToken_h
     28 
     29 #include "HTMLElementLookupTrie.h"
     30 #include "core/dom/Attribute.h"
     31 #include "core/html/parser/CompactHTMLToken.h"
     32 #include "core/html/parser/HTMLToken.h"
     33 #include "wtf/RefCounted.h"
     34 #include "wtf/RefPtr.h"
     35 
     36 namespace WebCore {
     37 
     38 class AtomicHTMLToken {
     39     WTF_MAKE_NONCOPYABLE(AtomicHTMLToken);
     40 public:
     41 
     42     bool forceQuirks() const
     43     {
     44         ASSERT(m_type == HTMLToken::DOCTYPE);
     45         return m_doctypeData->m_forceQuirks;
     46     }
     47 
     48     HTMLToken::Type type() const { return m_type; }
     49 
     50     const AtomicString& name() const
     51     {
     52         ASSERT(usesName());
     53         return m_name;
     54     }
     55 
     56     void setName(const AtomicString& name)
     57     {
     58         ASSERT(usesName());
     59         m_name = name;
     60     }
     61 
     62     bool selfClosing() const
     63     {
     64         ASSERT(m_type == HTMLToken::StartTag || m_type == HTMLToken::EndTag);
     65         return m_selfClosing;
     66     }
     67 
     68     Attribute* getAttributeItem(const QualifiedName& attributeName)
     69     {
     70         ASSERT(usesAttributes());
     71         return findAttributeInVector(m_attributes, attributeName);
     72     }
     73 
     74     Vector<Attribute>& attributes()
     75     {
     76         ASSERT(usesAttributes());
     77         return m_attributes;
     78     }
     79 
     80     const Vector<Attribute>& attributes() const
     81     {
     82         ASSERT(usesAttributes());
     83         return m_attributes;
     84     }
     85 
     86     const String& characters() const
     87     {
     88         ASSERT(m_type == HTMLToken::Character);
     89         return m_data;
     90     }
     91 
     92     const String& comment() const
     93     {
     94         ASSERT(m_type == HTMLToken::Comment);
     95         return m_data;
     96     }
     97 
     98     // FIXME: Distinguish between a missing public identifer and an empty one.
     99     Vector<UChar>& publicIdentifier() const
    100     {
    101         ASSERT(m_type == HTMLToken::DOCTYPE);
    102         return m_doctypeData->m_publicIdentifier;
    103     }
    104 
    105     // FIXME: Distinguish between a missing system identifer and an empty one.
    106     Vector<UChar>& systemIdentifier() const
    107     {
    108         ASSERT(m_type == HTMLToken::DOCTYPE);
    109         return m_doctypeData->m_systemIdentifier;
    110     }
    111 
    112     explicit AtomicHTMLToken(HTMLToken& token)
    113         : m_type(token.type())
    114     {
    115         switch (m_type) {
    116         case HTMLToken::Uninitialized:
    117             ASSERT_NOT_REACHED();
    118             break;
    119         case HTMLToken::DOCTYPE:
    120             m_name = AtomicString(token.name());
    121             m_doctypeData = token.releaseDoctypeData();
    122             break;
    123         case HTMLToken::EndOfFile:
    124             break;
    125         case HTMLToken::StartTag:
    126         case HTMLToken::EndTag: {
    127             m_selfClosing = token.selfClosing();
    128             if (StringImpl* tagName = lookupHTMLTag(token.name().data(), token.name().size()))
    129                 m_name = AtomicString(tagName);
    130             else
    131                 m_name = AtomicString(token.name());
    132             initializeAttributes(token.attributes());
    133             break;
    134         }
    135         case HTMLToken::Character:
    136         case HTMLToken::Comment:
    137             if (token.isAll8BitData())
    138                 m_data = String::make8BitFrom16BitSource(token.data());
    139             else
    140                 m_data = String(token.data());
    141             break;
    142         }
    143     }
    144 
    145     explicit AtomicHTMLToken(const CompactHTMLToken& token)
    146         : m_type(token.type())
    147     {
    148         switch (m_type) {
    149         case HTMLToken::Uninitialized:
    150             ASSERT_NOT_REACHED();
    151             break;
    152         case HTMLToken::DOCTYPE:
    153             m_name = token.data();
    154             m_doctypeData = adoptPtr(new DoctypeData());
    155             m_doctypeData->m_hasPublicIdentifier = true;
    156             append(m_doctypeData->m_publicIdentifier, token.publicIdentifier());
    157             m_doctypeData->m_hasSystemIdentifier = true;
    158             append(m_doctypeData->m_systemIdentifier, token.systemIdentifier());
    159             m_doctypeData->m_forceQuirks = token.doctypeForcesQuirks();
    160             break;
    161         case HTMLToken::EndOfFile:
    162             break;
    163         case HTMLToken::StartTag:
    164             m_attributes.reserveInitialCapacity(token.attributes().size());
    165             for (Vector<CompactHTMLToken::Attribute>::const_iterator it = token.attributes().begin(); it != token.attributes().end(); ++it) {
    166                 QualifiedName name(nullAtom, it->name, nullAtom);
    167                 // FIXME: This is N^2 for the number of attributes.
    168                 if (!findAttributeInVector(m_attributes, name))
    169                     m_attributes.append(Attribute(name, it->value));
    170             }
    171             // Fall through!
    172         case HTMLToken::EndTag:
    173             m_selfClosing = token.selfClosing();
    174             m_name = token.data();
    175             break;
    176         case HTMLToken::Character:
    177         case HTMLToken::Comment:
    178             m_data = token.data();
    179             break;
    180         }
    181     }
    182 
    183     explicit AtomicHTMLToken(HTMLToken::Type type)
    184         : m_type(type)
    185         , m_selfClosing(false)
    186     {
    187     }
    188 
    189     AtomicHTMLToken(HTMLToken::Type type, const AtomicString& name, const Vector<Attribute>& attributes = Vector<Attribute>())
    190         : m_type(type)
    191         , m_name(name)
    192         , m_selfClosing(false)
    193         , m_attributes(attributes)
    194     {
    195         ASSERT(usesName());
    196     }
    197 
    198 private:
    199     HTMLToken::Type m_type;
    200 
    201     void initializeAttributes(const HTMLToken::AttributeList& attributes);
    202     QualifiedName nameForAttribute(const HTMLToken::Attribute&) const;
    203 
    204     bool usesName() const;
    205 
    206     bool usesAttributes() const;
    207 
    208     // "name" for DOCTYPE, StartTag, and EndTag
    209     AtomicString m_name;
    210 
    211     // "data" for Comment, "characters" for Character
    212     String m_data;
    213 
    214     // For DOCTYPE
    215     OwnPtr<DoctypeData> m_doctypeData;
    216 
    217     // For StartTag and EndTag
    218     bool m_selfClosing;
    219 
    220     Vector<Attribute> m_attributes;
    221 };
    222 
    223 inline void AtomicHTMLToken::initializeAttributes(const HTMLToken::AttributeList& attributes)
    224 {
    225     size_t size = attributes.size();
    226     if (!size)
    227         return;
    228 
    229     m_attributes.clear();
    230     m_attributes.reserveInitialCapacity(size);
    231     for (size_t i = 0; i < size; ++i) {
    232         const HTMLToken::Attribute& attribute = attributes[i];
    233         if (attribute.name.isEmpty())
    234             continue;
    235 
    236         // FIXME: We should be able to add the following ASSERT once we fix
    237         // https://bugs.webkit.org/show_bug.cgi?id=62971
    238         //   ASSERT(attribute.nameRange.start);
    239         ASSERT(attribute.nameRange.end);
    240         ASSERT(attribute.valueRange.start);
    241         ASSERT(attribute.valueRange.end);
    242 
    243         AtomicString value(attribute.value);
    244         const QualifiedName& name = nameForAttribute(attribute);
    245         // FIXME: This is N^2 for the number of attributes.
    246         if (!findAttributeInVector(m_attributes, name))
    247             m_attributes.append(Attribute(name, value));
    248     }
    249 }
    250 
    251 }
    252 
    253 #endif
    254