1 /* 2 * Copyright (C) 2010 Google, Inc. All Rights Reserved. 3 * Copyright (C) 2011 Apple Inc. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY 15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. OR 18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 #ifndef HTMLTreeBuilder_h 28 #define HTMLTreeBuilder_h 29 30 #include "core/html/parser/HTMLConstructionSite.h" 31 #include "core/html/parser/HTMLElementStack.h" 32 #include "core/html/parser/HTMLParserOptions.h" 33 #include "wtf/Noncopyable.h" 34 #include "wtf/PassOwnPtr.h" 35 #include "wtf/PassRefPtr.h" 36 #include "wtf/RefPtr.h" 37 #include "wtf/Vector.h" 38 #include "wtf/text/StringBuilder.h" 39 #include "wtf/text/TextPosition.h" 40 41 namespace WebCore { 42 43 class AtomicHTMLToken; 44 class Document; 45 class DocumentFragment; 46 class Element; 47 class Frame; 48 class HTMLToken; 49 class HTMLDocument; 50 class Node; 51 class HTMLDocumentParser; 52 53 class HTMLTreeBuilder { 54 WTF_MAKE_NONCOPYABLE(HTMLTreeBuilder); WTF_MAKE_FAST_ALLOCATED; 55 public: 56 static PassOwnPtr<HTMLTreeBuilder> create(HTMLDocumentParser* parser, Document* document, ParserContentPolicy parserContentPolicy, bool reportErrors, const HTMLParserOptions& options) 57 { 58 return adoptPtr(new HTMLTreeBuilder(parser, document, parserContentPolicy, reportErrors, options)); 59 } 60 static PassOwnPtr<HTMLTreeBuilder> create(HTMLDocumentParser* parser, DocumentFragment* fragment, Element* contextElement, ParserContentPolicy parserContentPolicy, const HTMLParserOptions& options) 61 { 62 return adoptPtr(new HTMLTreeBuilder(parser, fragment, contextElement, parserContentPolicy, options)); 63 } 64 ~HTMLTreeBuilder(); 65 66 const HTMLElementStack* openElements() const { return m_tree.openElements(); } 67 68 bool isParsingFragment() const { return !!m_fragmentContext.fragment(); } 69 bool isParsingTemplateContents() const { return m_tree.openElements()->hasTemplateInHTMLScope(); } 70 bool isParsingFragmentOrTemplateContents() const { return isParsingFragment() || isParsingTemplateContents(); } 71 72 void detach(); 73 74 void constructTree(AtomicHTMLToken*); 75 76 bool hasParserBlockingScript() const { return !!m_scriptToProcess; } 77 // Must be called to take the parser-blocking script before calling the parser again. 78 PassRefPtr<Element> takeScriptToProcess(TextPosition& scriptStartPosition); 79 80 // Done, close any open tags, etc. 81 void finished(); 82 83 void setShouldSkipLeadingNewline(bool shouldSkip) { m_shouldSkipLeadingNewline = shouldSkip; } 84 85 private: 86 class CharacterTokenBuffer; 87 // Represents HTML5 "insertion mode" 88 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#insertion-mode 89 enum InsertionMode { 90 InitialMode, 91 BeforeHTMLMode, 92 BeforeHeadMode, 93 InHeadMode, 94 InHeadNoscriptMode, 95 AfterHeadMode, 96 TemplateContentsMode, 97 InBodyMode, 98 TextMode, 99 InTableMode, 100 InTableTextMode, 101 InCaptionMode, 102 InColumnGroupMode, 103 InTableBodyMode, 104 InRowMode, 105 InCellMode, 106 InSelectMode, 107 InSelectInTableMode, 108 AfterBodyMode, 109 InFramesetMode, 110 AfterFramesetMode, 111 AfterAfterBodyMode, 112 AfterAfterFramesetMode, 113 }; 114 115 HTMLTreeBuilder(HTMLDocumentParser*, Document*, ParserContentPolicy, bool reportErrors, const HTMLParserOptions&); 116 HTMLTreeBuilder(HTMLDocumentParser*, DocumentFragment*, Element* contextElement, ParserContentPolicy, const HTMLParserOptions&); 117 118 void processToken(AtomicHTMLToken*); 119 120 void processDoctypeToken(AtomicHTMLToken*); 121 void processStartTag(AtomicHTMLToken*); 122 void processEndTag(AtomicHTMLToken*); 123 void processComment(AtomicHTMLToken*); 124 void processCharacter(AtomicHTMLToken*); 125 void processEndOfFile(AtomicHTMLToken*); 126 127 bool processStartTagForInHead(AtomicHTMLToken*); 128 void processStartTagForInBody(AtomicHTMLToken*); 129 void processStartTagForInTable(AtomicHTMLToken*); 130 void processEndTagForInBody(AtomicHTMLToken*); 131 void processEndTagForInTable(AtomicHTMLToken*); 132 void processEndTagForInTableBody(AtomicHTMLToken*); 133 void processEndTagForInRow(AtomicHTMLToken*); 134 void processEndTagForInCell(AtomicHTMLToken*); 135 136 void processIsindexStartTagForInBody(AtomicHTMLToken*); 137 void processHtmlStartTagForInBody(AtomicHTMLToken*); 138 bool processBodyEndTagForInBody(AtomicHTMLToken*); 139 bool processTableEndTagForInTable(); 140 bool processCaptionEndTagForInCaption(); 141 bool processColgroupEndTagForInColumnGroup(); 142 bool processTrEndTagForInRow(); 143 // FIXME: This function should be inlined into its one call site or it 144 // needs to assert which tokens it can be called with. 145 void processAnyOtherEndTagForInBody(AtomicHTMLToken*); 146 147 void processCharacterBuffer(CharacterTokenBuffer&); 148 inline void processCharacterBufferForInBody(CharacterTokenBuffer&); 149 150 void processFakeStartTag(const QualifiedName&, const Vector<Attribute>& attributes = Vector<Attribute>()); 151 void processFakeEndTag(const QualifiedName&); 152 void processFakeEndTag(const AtomicString&); 153 void processFakeCharacters(const String&); 154 void processFakePEndTagIfPInButtonScope(); 155 156 void processGenericRCDATAStartTag(AtomicHTMLToken*); 157 void processGenericRawTextStartTag(AtomicHTMLToken*); 158 void processScriptStartTag(AtomicHTMLToken*); 159 160 // Default processing for the different insertion modes. 161 void defaultForInitial(); 162 void defaultForBeforeHTML(); 163 void defaultForBeforeHead(); 164 void defaultForInHead(); 165 void defaultForInHeadNoscript(); 166 void defaultForAfterHead(); 167 void defaultForInTableText(); 168 169 inline bool shouldProcessTokenInForeignContent(AtomicHTMLToken*); 170 void processTokenInForeignContent(AtomicHTMLToken*); 171 172 Vector<Attribute> attributesForIsindexInput(AtomicHTMLToken*); 173 174 void callTheAdoptionAgency(AtomicHTMLToken*); 175 176 void closeTheCell(); 177 178 template <bool shouldClose(const HTMLStackItem*)> 179 void processCloseWhenNestedTag(AtomicHTMLToken*); 180 181 void parseError(AtomicHTMLToken*); 182 183 InsertionMode insertionMode() const { return m_insertionMode; } 184 void setInsertionMode(InsertionMode mode) { m_insertionMode = mode; } 185 186 void resetInsertionModeAppropriately(); 187 188 void processTemplateStartTag(AtomicHTMLToken*); 189 bool processTemplateEndTag(AtomicHTMLToken*); 190 bool processEndOfFileForInTemplateContents(AtomicHTMLToken*); 191 192 class FragmentParsingContext { 193 WTF_MAKE_NONCOPYABLE(FragmentParsingContext); 194 public: 195 FragmentParsingContext(); 196 FragmentParsingContext(DocumentFragment*, Element* contextElement); 197 ~FragmentParsingContext(); 198 199 DocumentFragment* fragment() const { return m_fragment; } 200 Element* contextElement() const { ASSERT(m_fragment); return m_contextElement; } 201 202 private: 203 DocumentFragment* m_fragment; 204 Element* m_contextElement; 205 }; 206 207 bool m_framesetOk; 208 #ifndef NDEBUG 209 bool m_isAttached; 210 #endif 211 FragmentParsingContext m_fragmentContext; 212 HTMLConstructionSite m_tree; 213 214 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#insertion-mode 215 InsertionMode m_insertionMode; 216 217 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#original-insertion-mode 218 InsertionMode m_originalInsertionMode; 219 220 Vector<InsertionMode> m_templateInsertionModes; 221 222 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#pending-table-character-tokens 223 StringBuilder m_pendingTableCharacters; 224 225 bool m_shouldSkipLeadingNewline; 226 227 // We access parser because HTML5 spec requires that we be able to change the state of the tokenizer 228 // from within parser actions. We also need it to track the current position. 229 HTMLDocumentParser* m_parser; 230 231 RefPtr<Element> m_scriptToProcess; // <script> tag which needs processing before resuming the parser. 232 TextPosition m_scriptToProcessStartPosition; // Starting line number of the script tag needing processing. 233 234 HTMLParserOptions m_options; 235 }; 236 237 } 238 239 #endif 240