Home | History | Annotate | Download | only in parser
      1 /*
      2  * Copyright (C) 2010 Google, Inc. All Rights Reserved.
      3  * Copyright (C) 2011 Apple Inc. All rights reserved.
      4  *
      5  * Redistribution and use in source and binary forms, with or without
      6  * modification, are permitted provided that the following conditions
      7  * are met:
      8  * 1. Redistributions of source code must retain the above copyright
      9  *    notice, this list of conditions and the following disclaimer.
     10  * 2. Redistributions in binary form must reproduce the above copyright
     11  *    notice, this list of conditions and the following disclaimer in the
     12  *    documentation and/or other materials provided with the distribution.
     13  *
     14  * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
     15  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     17  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL GOOGLE INC. OR
     18  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     19  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     20  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     21  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
     22  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     24  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     25  */
     26 
     27 #ifndef HTMLTreeBuilder_h
     28 #define HTMLTreeBuilder_h
     29 
     30 #include "core/html/parser/HTMLConstructionSite.h"
     31 #include "core/html/parser/HTMLElementStack.h"
     32 #include "core/html/parser/HTMLParserOptions.h"
     33 #include "platform/heap/Handle.h"
     34 #include "wtf/Noncopyable.h"
     35 #include "wtf/PassOwnPtr.h"
     36 #include "wtf/PassRefPtr.h"
     37 #include "wtf/RefPtr.h"
     38 #include "wtf/Vector.h"
     39 #include "wtf/text/StringBuilder.h"
     40 #include "wtf/text/TextPosition.h"
     41 
     42 namespace blink {
     43 
     44 class AtomicHTMLToken;
     45 class Document;
     46 class DocumentFragment;
     47 class Element;
     48 class LocalFrame;
     49 class HTMLToken;
     50 class HTMLDocument;
     51 class Node;
     52 class HTMLDocumentParser;
     53 
     54 class HTMLTreeBuilder FINAL : public NoBaseWillBeGarbageCollectedFinalized<HTMLTreeBuilder> {
     55     WTF_MAKE_NONCOPYABLE(HTMLTreeBuilder); WTF_MAKE_FAST_ALLOCATED_WILL_BE_REMOVED;
     56 public:
     57     static PassOwnPtrWillBeRawPtr<HTMLTreeBuilder> create(HTMLDocumentParser* parser, HTMLDocument* document, ParserContentPolicy parserContentPolicy, bool reportErrors, const HTMLParserOptions& options)
     58     {
     59         return adoptPtrWillBeNoop(new HTMLTreeBuilder(parser, document, parserContentPolicy, reportErrors, options));
     60     }
     61     static PassOwnPtrWillBeRawPtr<HTMLTreeBuilder> create(HTMLDocumentParser* parser, DocumentFragment* fragment, Element* contextElement, ParserContentPolicy parserContentPolicy, const HTMLParserOptions& options)
     62     {
     63         return adoptPtrWillBeNoop(new HTMLTreeBuilder(parser, fragment, contextElement, parserContentPolicy, options));
     64     }
     65     ~HTMLTreeBuilder();
     66     void trace(Visitor*);
     67 
     68     const HTMLElementStack* openElements() const { return m_tree.openElements(); }
     69 
     70     bool isParsingFragment() const { return !!m_fragmentContext.fragment(); }
     71     bool isParsingTemplateContents() const { return m_tree.openElements()->hasTemplateInHTMLScope(); }
     72     bool isParsingFragmentOrTemplateContents() const { return isParsingFragment() || isParsingTemplateContents(); }
     73 
     74     void detach();
     75 
     76     void constructTree(AtomicHTMLToken*);
     77 
     78     bool hasParserBlockingScript() const { return !!m_scriptToProcess; }
     79     // Must be called to take the parser-blocking script before calling the parser again.
     80     PassRefPtrWillBeRawPtr<Element> takeScriptToProcess(TextPosition& scriptStartPosition);
     81 
     82     // Done, close any open tags, etc.
     83     void finished();
     84 
     85     // Synchronously flush pending text and queued tasks, possibly creating more DOM nodes.
     86     // Flushing pending text depends on |mode|.
     87     void flush(FlushMode mode) { m_tree.flush(mode); }
     88 
     89     void setShouldSkipLeadingNewline(bool shouldSkip) { m_shouldSkipLeadingNewline = shouldSkip; }
     90 
     91 private:
     92     class CharacterTokenBuffer;
     93     // Represents HTML5 "insertion mode"
     94     // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#insertion-mode
     95     enum InsertionMode {
     96         InitialMode,
     97         BeforeHTMLMode,
     98         BeforeHeadMode,
     99         InHeadMode,
    100         InHeadNoscriptMode,
    101         AfterHeadMode,
    102         TemplateContentsMode,
    103         InBodyMode,
    104         TextMode,
    105         InTableMode,
    106         InTableTextMode,
    107         InCaptionMode,
    108         InColumnGroupMode,
    109         InTableBodyMode,
    110         InRowMode,
    111         InCellMode,
    112         InSelectMode,
    113         InSelectInTableMode,
    114         AfterBodyMode,
    115         InFramesetMode,
    116         AfterFramesetMode,
    117         AfterAfterBodyMode,
    118         AfterAfterFramesetMode,
    119     };
    120 
    121     HTMLTreeBuilder(HTMLDocumentParser*, HTMLDocument*, ParserContentPolicy, bool reportErrors, const HTMLParserOptions&);
    122     HTMLTreeBuilder(HTMLDocumentParser*, DocumentFragment*, Element* contextElement, ParserContentPolicy, const HTMLParserOptions&);
    123 
    124     void processToken(AtomicHTMLToken*);
    125 
    126     void processDoctypeToken(AtomicHTMLToken*);
    127     void processStartTag(AtomicHTMLToken*);
    128     void processEndTag(AtomicHTMLToken*);
    129     void processComment(AtomicHTMLToken*);
    130     void processCharacter(AtomicHTMLToken*);
    131     void processEndOfFile(AtomicHTMLToken*);
    132 
    133     bool processStartTagForInHead(AtomicHTMLToken*);
    134     void processStartTagForInBody(AtomicHTMLToken*);
    135     void processStartTagForInTable(AtomicHTMLToken*);
    136     void processEndTagForInBody(AtomicHTMLToken*);
    137     void processEndTagForInTable(AtomicHTMLToken*);
    138     void processEndTagForInTableBody(AtomicHTMLToken*);
    139     void processEndTagForInRow(AtomicHTMLToken*);
    140     void processEndTagForInCell(AtomicHTMLToken*);
    141 
    142     void processIsindexStartTagForInBody(AtomicHTMLToken*);
    143     void processHtmlStartTagForInBody(AtomicHTMLToken*);
    144     bool processBodyEndTagForInBody(AtomicHTMLToken*);
    145     bool processTableEndTagForInTable();
    146     bool processCaptionEndTagForInCaption();
    147     bool processColgroupEndTagForInColumnGroup();
    148     bool processTrEndTagForInRow();
    149     // FIXME: This function should be inlined into its one call site or it
    150     // needs to assert which tokens it can be called with.
    151     void processAnyOtherEndTagForInBody(AtomicHTMLToken*);
    152 
    153     void processCharacterBuffer(CharacterTokenBuffer&);
    154     inline void processCharacterBufferForInBody(CharacterTokenBuffer&);
    155 
    156     void processFakeStartTag(const QualifiedName&, const Vector<Attribute>& attributes = Vector<Attribute>());
    157     void processFakeEndTag(const QualifiedName&);
    158     void processFakeEndTag(const AtomicString&);
    159     void processFakePEndTagIfPInButtonScope();
    160 
    161     void processGenericRCDATAStartTag(AtomicHTMLToken*);
    162     void processGenericRawTextStartTag(AtomicHTMLToken*);
    163     void processScriptStartTag(AtomicHTMLToken*);
    164 
    165     // Default processing for the different insertion modes.
    166     void defaultForInitial();
    167     void defaultForBeforeHTML();
    168     void defaultForBeforeHead();
    169     void defaultForInHead();
    170     void defaultForInHeadNoscript();
    171     void defaultForAfterHead();
    172     void defaultForInTableText();
    173 
    174     inline HTMLStackItem* adjustedCurrentStackItem() const;
    175     inline bool shouldProcessTokenInForeignContent(AtomicHTMLToken*);
    176     void processTokenInForeignContent(AtomicHTMLToken*);
    177 
    178     Vector<Attribute> attributesForIsindexInput(AtomicHTMLToken*);
    179 
    180     void callTheAdoptionAgency(AtomicHTMLToken*);
    181 
    182     void closeTheCell();
    183 
    184     template <bool shouldClose(const HTMLStackItem*)>
    185     void processCloseWhenNestedTag(AtomicHTMLToken*);
    186 
    187     void parseError(AtomicHTMLToken*);
    188 
    189     InsertionMode insertionMode() const { return m_insertionMode; }
    190     void setInsertionMode(InsertionMode mode) { m_insertionMode = mode; }
    191 
    192     void resetInsertionModeAppropriately();
    193 
    194     void processTemplateStartTag(AtomicHTMLToken*);
    195     bool processTemplateEndTag(AtomicHTMLToken*);
    196     bool processEndOfFileForInTemplateContents(AtomicHTMLToken*);
    197 
    198     class FragmentParsingContext {
    199         WTF_MAKE_NONCOPYABLE(FragmentParsingContext);
    200         DISALLOW_ALLOCATION();
    201     public:
    202         FragmentParsingContext();
    203         FragmentParsingContext(DocumentFragment*, Element* contextElement);
    204         ~FragmentParsingContext();
    205 
    206         DocumentFragment* fragment() const { return m_fragment; }
    207         Element* contextElement() const { ASSERT(m_fragment); return m_contextElementStackItem->element(); }
    208         HTMLStackItem* contextElementStackItem() const { ASSERT(m_fragment); return m_contextElementStackItem.get(); }
    209 
    210         void trace(Visitor*);
    211 
    212     private:
    213         RawPtrWillBeMember<DocumentFragment> m_fragment;
    214         RefPtrWillBeMember<HTMLStackItem> m_contextElementStackItem;
    215     };
    216 
    217     bool m_framesetOk;
    218 #if ENABLE(ASSERT)
    219     bool m_isAttached;
    220 #endif
    221     FragmentParsingContext m_fragmentContext;
    222     HTMLConstructionSite m_tree;
    223 
    224     // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#insertion-mode
    225     InsertionMode m_insertionMode;
    226 
    227     // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#original-insertion-mode
    228     InsertionMode m_originalInsertionMode;
    229 
    230     Vector<InsertionMode> m_templateInsertionModes;
    231 
    232     // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#pending-table-character-tokens
    233     StringBuilder m_pendingTableCharacters;
    234 
    235     bool m_shouldSkipLeadingNewline;
    236 
    237     // We access parser because HTML5 spec requires that we be able to change the state of the tokenizer
    238     // from within parser actions. We also need it to track the current position.
    239     RawPtrWillBeMember<HTMLDocumentParser> m_parser;
    240 
    241     RefPtrWillBeMember<Element> m_scriptToProcess; // <script> tag which needs processing before resuming the parser.
    242     TextPosition m_scriptToProcessStartPosition; // Starting line number of the script tag needing processing.
    243 
    244     HTMLParserOptions m_options;
    245 };
    246 
    247 }
    248 
    249 #endif
    250