Home | History | Annotate | Download | only in parser
      1 /*
      2  * Copyright (C) 2010 Google, Inc. All Rights Reserved.
      3  * Copyright (C) 2011 Apple Inc. All rights reserved.
      4  *
      5  * Redistribution and use in source and binary forms, with or without
      6  * modification, are permitted provided that the following conditions
      7  * are met:
      8  * 1. Redistributions of source code must retain the above copyright
      9  *    notice, this list of conditions and the following disclaimer.
     10  * 2. Redistributions in binary form must reproduce the above copyright
     11  *    notice, this list of conditions and the following disclaimer in the
     12  *    documentation and/or other materials provided with the distribution.
     13  *
     14  * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
     15  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     17  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL GOOGLE INC. OR
     18  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     19  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     20  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     21  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
     22  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     24  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     25  */
     26 
     27 #ifndef HTMLTreeBuilder_h
     28 #define HTMLTreeBuilder_h
     29 
     30 #include "core/html/parser/HTMLConstructionSite.h"
     31 #include "core/html/parser/HTMLElementStack.h"
     32 #include "core/html/parser/HTMLParserOptions.h"
     33 #include "wtf/Noncopyable.h"
     34 #include "wtf/PassOwnPtr.h"
     35 #include "wtf/PassRefPtr.h"
     36 #include "wtf/RefPtr.h"
     37 #include "wtf/Vector.h"
     38 #include "wtf/text/StringBuilder.h"
     39 #include "wtf/text/TextPosition.h"
     40 
     41 namespace WebCore {
     42 
     43 class AtomicHTMLToken;
     44 class Document;
     45 class DocumentFragment;
     46 class Element;
     47 class Frame;
     48 class HTMLToken;
     49 class HTMLDocument;
     50 class Node;
     51 class HTMLDocumentParser;
     52 
     53 class HTMLTreeBuilder {
     54     WTF_MAKE_NONCOPYABLE(HTMLTreeBuilder); WTF_MAKE_FAST_ALLOCATED;
     55 public:
     56     static PassOwnPtr<HTMLTreeBuilder> create(HTMLDocumentParser* parser, HTMLDocument* document, ParserContentPolicy parserContentPolicy, bool reportErrors, const HTMLParserOptions& options)
     57     {
     58         return adoptPtr(new HTMLTreeBuilder(parser, document, parserContentPolicy, reportErrors, options));
     59     }
     60     static PassOwnPtr<HTMLTreeBuilder> create(HTMLDocumentParser* parser, DocumentFragment* fragment, Element* contextElement, ParserContentPolicy parserContentPolicy, const HTMLParserOptions& options)
     61     {
     62         return adoptPtr(new HTMLTreeBuilder(parser, fragment, contextElement, parserContentPolicy, options));
     63     }
     64     ~HTMLTreeBuilder();
     65 
     66     const HTMLElementStack* openElements() const { return m_tree.openElements(); }
     67 
     68     bool isParsingFragment() const { return !!m_fragmentContext.fragment(); }
     69     bool isParsingTemplateContents() const { return m_tree.openElements()->hasTemplateInHTMLScope(); }
     70     bool isParsingFragmentOrTemplateContents() const { return isParsingFragment() || isParsingTemplateContents(); }
     71 
     72     void detach();
     73 
     74     void constructTree(AtomicHTMLToken*);
     75 
     76     bool hasParserBlockingScript() const { return !!m_scriptToProcess; }
     77     // Must be called to take the parser-blocking script before calling the parser again.
     78     PassRefPtr<Element> takeScriptToProcess(TextPosition& scriptStartPosition);
     79 
     80     // Done, close any open tags, etc.
     81     void finished();
     82 
     83     // Synchronously empty any queues, possibly creating more DOM nodes.
     84     void flush() { m_tree.flush(); }
     85 
     86     void setShouldSkipLeadingNewline(bool shouldSkip) { m_shouldSkipLeadingNewline = shouldSkip; }
     87 
     88 private:
     89     class CharacterTokenBuffer;
     90     // Represents HTML5 "insertion mode"
     91     // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#insertion-mode
     92     enum InsertionMode {
     93         InitialMode,
     94         BeforeHTMLMode,
     95         BeforeHeadMode,
     96         InHeadMode,
     97         InHeadNoscriptMode,
     98         AfterHeadMode,
     99         TemplateContentsMode,
    100         InBodyMode,
    101         TextMode,
    102         InTableMode,
    103         InTableTextMode,
    104         InCaptionMode,
    105         InColumnGroupMode,
    106         InTableBodyMode,
    107         InRowMode,
    108         InCellMode,
    109         InSelectMode,
    110         InSelectInTableMode,
    111         AfterBodyMode,
    112         InFramesetMode,
    113         AfterFramesetMode,
    114         AfterAfterBodyMode,
    115         AfterAfterFramesetMode,
    116     };
    117 
    118     HTMLTreeBuilder(HTMLDocumentParser*, HTMLDocument*, ParserContentPolicy, bool reportErrors, const HTMLParserOptions&);
    119     HTMLTreeBuilder(HTMLDocumentParser*, DocumentFragment*, Element* contextElement, ParserContentPolicy, const HTMLParserOptions&);
    120 
    121     void processToken(AtomicHTMLToken*);
    122 
    123     void processDoctypeToken(AtomicHTMLToken*);
    124     void processStartTag(AtomicHTMLToken*);
    125     void processEndTag(AtomicHTMLToken*);
    126     void processComment(AtomicHTMLToken*);
    127     void processCharacter(AtomicHTMLToken*);
    128     void processEndOfFile(AtomicHTMLToken*);
    129 
    130     bool processStartTagForInHead(AtomicHTMLToken*);
    131     void processStartTagForInBody(AtomicHTMLToken*);
    132     void processStartTagForInTable(AtomicHTMLToken*);
    133     void processEndTagForInBody(AtomicHTMLToken*);
    134     void processEndTagForInTable(AtomicHTMLToken*);
    135     void processEndTagForInTableBody(AtomicHTMLToken*);
    136     void processEndTagForInRow(AtomicHTMLToken*);
    137     void processEndTagForInCell(AtomicHTMLToken*);
    138 
    139     void processIsindexStartTagForInBody(AtomicHTMLToken*);
    140     void processHtmlStartTagForInBody(AtomicHTMLToken*);
    141     bool processBodyEndTagForInBody(AtomicHTMLToken*);
    142     bool processTableEndTagForInTable();
    143     bool processCaptionEndTagForInCaption();
    144     bool processColgroupEndTagForInColumnGroup();
    145     bool processTrEndTagForInRow();
    146     // FIXME: This function should be inlined into its one call site or it
    147     // needs to assert which tokens it can be called with.
    148     void processAnyOtherEndTagForInBody(AtomicHTMLToken*);
    149 
    150     void processCharacterBuffer(CharacterTokenBuffer&);
    151     inline void processCharacterBufferForInBody(CharacterTokenBuffer&);
    152 
    153     void processFakeStartTag(const QualifiedName&, const Vector<Attribute>& attributes = Vector<Attribute>());
    154     void processFakeEndTag(const QualifiedName&);
    155     void processFakeEndTag(const AtomicString&);
    156     void processFakeCharacters(const String&);
    157     void processFakePEndTagIfPInButtonScope();
    158 
    159     void processGenericRCDATAStartTag(AtomicHTMLToken*);
    160     void processGenericRawTextStartTag(AtomicHTMLToken*);
    161     void processScriptStartTag(AtomicHTMLToken*);
    162 
    163     // Default processing for the different insertion modes.
    164     void defaultForInitial();
    165     void defaultForBeforeHTML();
    166     void defaultForBeforeHead();
    167     void defaultForInHead();
    168     void defaultForInHeadNoscript();
    169     void defaultForAfterHead();
    170     void defaultForInTableText();
    171 
    172     inline HTMLStackItem* adjustedCurrentStackItem() const;
    173     inline bool shouldProcessTokenInForeignContent(AtomicHTMLToken*);
    174     void processTokenInForeignContent(AtomicHTMLToken*);
    175 
    176     Vector<Attribute> attributesForIsindexInput(AtomicHTMLToken*);
    177 
    178     void callTheAdoptionAgency(AtomicHTMLToken*);
    179 
    180     void closeTheCell();
    181 
    182     template <bool shouldClose(const HTMLStackItem*)>
    183     void processCloseWhenNestedTag(AtomicHTMLToken*);
    184 
    185     void parseError(AtomicHTMLToken*);
    186 
    187     InsertionMode insertionMode() const { return m_insertionMode; }
    188     void setInsertionMode(InsertionMode mode) { m_insertionMode = mode; }
    189 
    190     void resetInsertionModeAppropriately();
    191 
    192     void processTemplateStartTag(AtomicHTMLToken*);
    193     bool processTemplateEndTag(AtomicHTMLToken*);
    194     bool processEndOfFileForInTemplateContents(AtomicHTMLToken*);
    195 
    196     class FragmentParsingContext {
    197         WTF_MAKE_NONCOPYABLE(FragmentParsingContext);
    198     public:
    199         FragmentParsingContext();
    200         FragmentParsingContext(DocumentFragment*, Element* contextElement);
    201         ~FragmentParsingContext();
    202 
    203         DocumentFragment* fragment() const { return m_fragment; }
    204         Element* contextElement() const { ASSERT(m_fragment); return m_contextElementStackItem->element(); }
    205         HTMLStackItem* contextElementStackItem() const { ASSERT(m_fragment); return m_contextElementStackItem.get(); }
    206 
    207     private:
    208         DocumentFragment* m_fragment;
    209         RefPtr<HTMLStackItem> m_contextElementStackItem;
    210     };
    211 
    212     bool m_framesetOk;
    213 #ifndef NDEBUG
    214     bool m_isAttached;
    215 #endif
    216     FragmentParsingContext m_fragmentContext;
    217     HTMLConstructionSite m_tree;
    218 
    219     // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#insertion-mode
    220     InsertionMode m_insertionMode;
    221 
    222     // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#original-insertion-mode
    223     InsertionMode m_originalInsertionMode;
    224 
    225     Vector<InsertionMode> m_templateInsertionModes;
    226 
    227     // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#pending-table-character-tokens
    228     StringBuilder m_pendingTableCharacters;
    229 
    230     bool m_shouldSkipLeadingNewline;
    231 
    232     // We access parser because HTML5 spec requires that we be able to change the state of the tokenizer
    233     // from within parser actions. We also need it to track the current position.
    234     HTMLDocumentParser* m_parser;
    235 
    236     RefPtr<Element> m_scriptToProcess; // <script> tag which needs processing before resuming the parser.
    237     TextPosition m_scriptToProcessStartPosition; // Starting line number of the script tag needing processing.
    238 
    239     HTMLParserOptions m_options;
    240 };
    241 
    242 }
    243 
    244 #endif
    245