Home | History | Annotate | Download | only in parser
      1 /*
      2  * Copyright (C) 2010 Google, Inc. All Rights Reserved.
      3  * Copyright (C) 2011 Apple Inc. All rights reserved.
      4  *
      5  * Redistribution and use in source and binary forms, with or without
      6  * modification, are permitted provided that the following conditions
      7  * are met:
      8  * 1. Redistributions of source code must retain the above copyright
      9  *    notice, this list of conditions and the following disclaimer.
     10  * 2. Redistributions in binary form must reproduce the above copyright
     11  *    notice, this list of conditions and the following disclaimer in the
     12  *    documentation and/or other materials provided with the distribution.
     13  *
     14  * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
     15  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     17  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL GOOGLE INC. OR
     18  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     19  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     20  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     21  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
     22  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     24  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     25  */
     26 
     27 #ifndef HTMLTreeBuilder_h
     28 #define HTMLTreeBuilder_h
     29 
     30 #include "core/html/parser/HTMLConstructionSite.h"
     31 #include "core/html/parser/HTMLElementStack.h"
     32 #include "core/html/parser/HTMLParserOptions.h"
     33 #include "wtf/Noncopyable.h"
     34 #include "wtf/PassOwnPtr.h"
     35 #include "wtf/PassRefPtr.h"
     36 #include "wtf/RefPtr.h"
     37 #include "wtf/Vector.h"
     38 #include "wtf/text/StringBuilder.h"
     39 #include "wtf/text/TextPosition.h"
     40 
     41 namespace WebCore {
     42 
     43 class AtomicHTMLToken;
     44 class Document;
     45 class DocumentFragment;
     46 class Element;
     47 class Frame;
     48 class HTMLToken;
     49 class HTMLDocument;
     50 class Node;
     51 class HTMLDocumentParser;
     52 
     53 class HTMLTreeBuilder {
     54     WTF_MAKE_NONCOPYABLE(HTMLTreeBuilder); WTF_MAKE_FAST_ALLOCATED;
     55 public:
     56     static PassOwnPtr<HTMLTreeBuilder> create(HTMLDocumentParser* parser, Document* document, ParserContentPolicy parserContentPolicy, bool reportErrors, const HTMLParserOptions& options)
     57     {
     58         return adoptPtr(new HTMLTreeBuilder(parser, document, parserContentPolicy, reportErrors, options));
     59     }
     60     static PassOwnPtr<HTMLTreeBuilder> create(HTMLDocumentParser* parser, DocumentFragment* fragment, Element* contextElement, ParserContentPolicy parserContentPolicy, const HTMLParserOptions& options)
     61     {
     62         return adoptPtr(new HTMLTreeBuilder(parser, fragment, contextElement, parserContentPolicy, options));
     63     }
     64     ~HTMLTreeBuilder();
     65 
     66     const HTMLElementStack* openElements() const { return m_tree.openElements(); }
     67 
     68     bool isParsingFragment() const { return !!m_fragmentContext.fragment(); }
     69     bool isParsingTemplateContents() const { return m_tree.openElements()->hasTemplateInHTMLScope(); }
     70     bool isParsingFragmentOrTemplateContents() const { return isParsingFragment() || isParsingTemplateContents(); }
     71 
     72     void detach();
     73 
     74     void constructTree(AtomicHTMLToken*);
     75 
     76     bool hasParserBlockingScript() const { return !!m_scriptToProcess; }
     77     // Must be called to take the parser-blocking script before calling the parser again.
     78     PassRefPtr<Element> takeScriptToProcess(TextPosition& scriptStartPosition);
     79 
     80     // Done, close any open tags, etc.
     81     void finished();
     82 
     83     void setShouldSkipLeadingNewline(bool shouldSkip) { m_shouldSkipLeadingNewline = shouldSkip; }
     84 
     85 private:
     86     class CharacterTokenBuffer;
     87     // Represents HTML5 "insertion mode"
     88     // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#insertion-mode
     89     enum InsertionMode {
     90         InitialMode,
     91         BeforeHTMLMode,
     92         BeforeHeadMode,
     93         InHeadMode,
     94         InHeadNoscriptMode,
     95         AfterHeadMode,
     96         TemplateContentsMode,
     97         InBodyMode,
     98         TextMode,
     99         InTableMode,
    100         InTableTextMode,
    101         InCaptionMode,
    102         InColumnGroupMode,
    103         InTableBodyMode,
    104         InRowMode,
    105         InCellMode,
    106         InSelectMode,
    107         InSelectInTableMode,
    108         AfterBodyMode,
    109         InFramesetMode,
    110         AfterFramesetMode,
    111         AfterAfterBodyMode,
    112         AfterAfterFramesetMode,
    113     };
    114 
    115     HTMLTreeBuilder(HTMLDocumentParser*, Document*, ParserContentPolicy, bool reportErrors, const HTMLParserOptions&);
    116     HTMLTreeBuilder(HTMLDocumentParser*, DocumentFragment*, Element* contextElement, ParserContentPolicy, const HTMLParserOptions&);
    117 
    118     void processToken(AtomicHTMLToken*);
    119 
    120     void processDoctypeToken(AtomicHTMLToken*);
    121     void processStartTag(AtomicHTMLToken*);
    122     void processEndTag(AtomicHTMLToken*);
    123     void processComment(AtomicHTMLToken*);
    124     void processCharacter(AtomicHTMLToken*);
    125     void processEndOfFile(AtomicHTMLToken*);
    126 
    127     bool processStartTagForInHead(AtomicHTMLToken*);
    128     void processStartTagForInBody(AtomicHTMLToken*);
    129     void processStartTagForInTable(AtomicHTMLToken*);
    130     void processEndTagForInBody(AtomicHTMLToken*);
    131     void processEndTagForInTable(AtomicHTMLToken*);
    132     void processEndTagForInTableBody(AtomicHTMLToken*);
    133     void processEndTagForInRow(AtomicHTMLToken*);
    134     void processEndTagForInCell(AtomicHTMLToken*);
    135 
    136     void processIsindexStartTagForInBody(AtomicHTMLToken*);
    137     void processHtmlStartTagForInBody(AtomicHTMLToken*);
    138     bool processBodyEndTagForInBody(AtomicHTMLToken*);
    139     bool processTableEndTagForInTable();
    140     bool processCaptionEndTagForInCaption();
    141     bool processColgroupEndTagForInColumnGroup();
    142     bool processTrEndTagForInRow();
    143     // FIXME: This function should be inlined into its one call site or it
    144     // needs to assert which tokens it can be called with.
    145     void processAnyOtherEndTagForInBody(AtomicHTMLToken*);
    146 
    147     void processCharacterBuffer(CharacterTokenBuffer&);
    148     inline void processCharacterBufferForInBody(CharacterTokenBuffer&);
    149 
    150     void processFakeStartTag(const QualifiedName&, const Vector<Attribute>& attributes = Vector<Attribute>());
    151     void processFakeEndTag(const QualifiedName&);
    152     void processFakeEndTag(const AtomicString&);
    153     void processFakeCharacters(const String&);
    154     void processFakePEndTagIfPInButtonScope();
    155 
    156     void processGenericRCDATAStartTag(AtomicHTMLToken*);
    157     void processGenericRawTextStartTag(AtomicHTMLToken*);
    158     void processScriptStartTag(AtomicHTMLToken*);
    159 
    160     // Default processing for the different insertion modes.
    161     void defaultForInitial();
    162     void defaultForBeforeHTML();
    163     void defaultForBeforeHead();
    164     void defaultForInHead();
    165     void defaultForInHeadNoscript();
    166     void defaultForAfterHead();
    167     void defaultForInTableText();
    168 
    169     inline bool shouldProcessTokenInForeignContent(AtomicHTMLToken*);
    170     void processTokenInForeignContent(AtomicHTMLToken*);
    171 
    172     Vector<Attribute> attributesForIsindexInput(AtomicHTMLToken*);
    173 
    174     void callTheAdoptionAgency(AtomicHTMLToken*);
    175 
    176     void closeTheCell();
    177 
    178     template <bool shouldClose(const HTMLStackItem*)>
    179     void processCloseWhenNestedTag(AtomicHTMLToken*);
    180 
    181     void parseError(AtomicHTMLToken*);
    182 
    183     InsertionMode insertionMode() const { return m_insertionMode; }
    184     void setInsertionMode(InsertionMode mode) { m_insertionMode = mode; }
    185 
    186     void resetInsertionModeAppropriately();
    187 
    188     void processTemplateStartTag(AtomicHTMLToken*);
    189     bool processTemplateEndTag(AtomicHTMLToken*);
    190     bool processEndOfFileForInTemplateContents(AtomicHTMLToken*);
    191 
    192     class FragmentParsingContext {
    193         WTF_MAKE_NONCOPYABLE(FragmentParsingContext);
    194     public:
    195         FragmentParsingContext();
    196         FragmentParsingContext(DocumentFragment*, Element* contextElement);
    197         ~FragmentParsingContext();
    198 
    199         DocumentFragment* fragment() const { return m_fragment; }
    200         Element* contextElement() const { ASSERT(m_fragment); return m_contextElement; }
    201 
    202     private:
    203         DocumentFragment* m_fragment;
    204         Element* m_contextElement;
    205     };
    206 
    207     bool m_framesetOk;
    208 #ifndef NDEBUG
    209     bool m_isAttached;
    210 #endif
    211     FragmentParsingContext m_fragmentContext;
    212     HTMLConstructionSite m_tree;
    213 
    214     // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#insertion-mode
    215     InsertionMode m_insertionMode;
    216 
    217     // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#original-insertion-mode
    218     InsertionMode m_originalInsertionMode;
    219 
    220     Vector<InsertionMode> m_templateInsertionModes;
    221 
    222     // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#pending-table-character-tokens
    223     StringBuilder m_pendingTableCharacters;
    224 
    225     bool m_shouldSkipLeadingNewline;
    226 
    227     // We access parser because HTML5 spec requires that we be able to change the state of the tokenizer
    228     // from within parser actions. We also need it to track the current position.
    229     HTMLDocumentParser* m_parser;
    230 
    231     RefPtr<Element> m_scriptToProcess; // <script> tag which needs processing before resuming the parser.
    232     TextPosition m_scriptToProcessStartPosition; // Starting line number of the script tag needing processing.
    233 
    234     HTMLParserOptions m_options;
    235 };
    236 
    237 }
    238 
    239 #endif
    240