1 /* 2 * Copyright (C) 2010 Google, Inc. All Rights Reserved. 3 * Copyright (C) 2011 Apple Inc. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY 15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. OR 18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 #ifndef HTMLTreeBuilder_h 28 #define HTMLTreeBuilder_h 29 30 #include "core/html/parser/HTMLConstructionSite.h" 31 #include "core/html/parser/HTMLElementStack.h" 32 #include "core/html/parser/HTMLParserOptions.h" 33 #include "wtf/Noncopyable.h" 34 #include "wtf/PassOwnPtr.h" 35 #include "wtf/PassRefPtr.h" 36 #include "wtf/RefPtr.h" 37 #include "wtf/Vector.h" 38 #include "wtf/text/StringBuilder.h" 39 #include "wtf/text/TextPosition.h" 40 41 namespace WebCore { 42 43 class AtomicHTMLToken; 44 class Document; 45 class DocumentFragment; 46 class Element; 47 class Frame; 48 class HTMLToken; 49 class HTMLDocument; 50 class Node; 51 class HTMLDocumentParser; 52 53 class HTMLTreeBuilder { 54 WTF_MAKE_NONCOPYABLE(HTMLTreeBuilder); WTF_MAKE_FAST_ALLOCATED; 55 public: 56 static PassOwnPtr<HTMLTreeBuilder> create(HTMLDocumentParser* parser, HTMLDocument* document, ParserContentPolicy parserContentPolicy, bool reportErrors, const HTMLParserOptions& options) 57 { 58 return adoptPtr(new HTMLTreeBuilder(parser, document, parserContentPolicy, reportErrors, options)); 59 } 60 static PassOwnPtr<HTMLTreeBuilder> create(HTMLDocumentParser* parser, DocumentFragment* fragment, Element* contextElement, ParserContentPolicy parserContentPolicy, const HTMLParserOptions& options) 61 { 62 return adoptPtr(new HTMLTreeBuilder(parser, fragment, contextElement, parserContentPolicy, options)); 63 } 64 ~HTMLTreeBuilder(); 65 66 const HTMLElementStack* openElements() const { return m_tree.openElements(); } 67 68 bool isParsingFragment() const { return !!m_fragmentContext.fragment(); } 69 bool isParsingTemplateContents() const { return m_tree.openElements()->hasTemplateInHTMLScope(); } 70 bool isParsingFragmentOrTemplateContents() const { return isParsingFragment() || isParsingTemplateContents(); } 71 72 void detach(); 73 74 void constructTree(AtomicHTMLToken*); 75 76 bool hasParserBlockingScript() const { return !!m_scriptToProcess; } 77 // Must be called to take the parser-blocking script before calling the parser again. 78 PassRefPtr<Element> takeScriptToProcess(TextPosition& scriptStartPosition); 79 80 // Done, close any open tags, etc. 81 void finished(); 82 83 // Synchronously empty any queues, possibly creating more DOM nodes. 84 void flush() { m_tree.flush(); } 85 86 void setShouldSkipLeadingNewline(bool shouldSkip) { m_shouldSkipLeadingNewline = shouldSkip; } 87 88 private: 89 class CharacterTokenBuffer; 90 // Represents HTML5 "insertion mode" 91 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#insertion-mode 92 enum InsertionMode { 93 InitialMode, 94 BeforeHTMLMode, 95 BeforeHeadMode, 96 InHeadMode, 97 InHeadNoscriptMode, 98 AfterHeadMode, 99 TemplateContentsMode, 100 InBodyMode, 101 TextMode, 102 InTableMode, 103 InTableTextMode, 104 InCaptionMode, 105 InColumnGroupMode, 106 InTableBodyMode, 107 InRowMode, 108 InCellMode, 109 InSelectMode, 110 InSelectInTableMode, 111 AfterBodyMode, 112 InFramesetMode, 113 AfterFramesetMode, 114 AfterAfterBodyMode, 115 AfterAfterFramesetMode, 116 }; 117 118 HTMLTreeBuilder(HTMLDocumentParser*, HTMLDocument*, ParserContentPolicy, bool reportErrors, const HTMLParserOptions&); 119 HTMLTreeBuilder(HTMLDocumentParser*, DocumentFragment*, Element* contextElement, ParserContentPolicy, const HTMLParserOptions&); 120 121 void processToken(AtomicHTMLToken*); 122 123 void processDoctypeToken(AtomicHTMLToken*); 124 void processStartTag(AtomicHTMLToken*); 125 void processEndTag(AtomicHTMLToken*); 126 void processComment(AtomicHTMLToken*); 127 void processCharacter(AtomicHTMLToken*); 128 void processEndOfFile(AtomicHTMLToken*); 129 130 bool processStartTagForInHead(AtomicHTMLToken*); 131 void processStartTagForInBody(AtomicHTMLToken*); 132 void processStartTagForInTable(AtomicHTMLToken*); 133 void processEndTagForInBody(AtomicHTMLToken*); 134 void processEndTagForInTable(AtomicHTMLToken*); 135 void processEndTagForInTableBody(AtomicHTMLToken*); 136 void processEndTagForInRow(AtomicHTMLToken*); 137 void processEndTagForInCell(AtomicHTMLToken*); 138 139 void processIsindexStartTagForInBody(AtomicHTMLToken*); 140 void processHtmlStartTagForInBody(AtomicHTMLToken*); 141 bool processBodyEndTagForInBody(AtomicHTMLToken*); 142 bool processTableEndTagForInTable(); 143 bool processCaptionEndTagForInCaption(); 144 bool processColgroupEndTagForInColumnGroup(); 145 bool processTrEndTagForInRow(); 146 // FIXME: This function should be inlined into its one call site or it 147 // needs to assert which tokens it can be called with. 148 void processAnyOtherEndTagForInBody(AtomicHTMLToken*); 149 150 void processCharacterBuffer(CharacterTokenBuffer&); 151 inline void processCharacterBufferForInBody(CharacterTokenBuffer&); 152 153 void processFakeStartTag(const QualifiedName&, const Vector<Attribute>& attributes = Vector<Attribute>()); 154 void processFakeEndTag(const QualifiedName&); 155 void processFakeEndTag(const AtomicString&); 156 void processFakeCharacters(const String&); 157 void processFakePEndTagIfPInButtonScope(); 158 159 void processGenericRCDATAStartTag(AtomicHTMLToken*); 160 void processGenericRawTextStartTag(AtomicHTMLToken*); 161 void processScriptStartTag(AtomicHTMLToken*); 162 163 // Default processing for the different insertion modes. 164 void defaultForInitial(); 165 void defaultForBeforeHTML(); 166 void defaultForBeforeHead(); 167 void defaultForInHead(); 168 void defaultForInHeadNoscript(); 169 void defaultForAfterHead(); 170 void defaultForInTableText(); 171 172 inline HTMLStackItem* adjustedCurrentStackItem() const; 173 inline bool shouldProcessTokenInForeignContent(AtomicHTMLToken*); 174 void processTokenInForeignContent(AtomicHTMLToken*); 175 176 Vector<Attribute> attributesForIsindexInput(AtomicHTMLToken*); 177 178 void callTheAdoptionAgency(AtomicHTMLToken*); 179 180 void closeTheCell(); 181 182 template <bool shouldClose(const HTMLStackItem*)> 183 void processCloseWhenNestedTag(AtomicHTMLToken*); 184 185 void parseError(AtomicHTMLToken*); 186 187 InsertionMode insertionMode() const { return m_insertionMode; } 188 void setInsertionMode(InsertionMode mode) { m_insertionMode = mode; } 189 190 void resetInsertionModeAppropriately(); 191 192 void processTemplateStartTag(AtomicHTMLToken*); 193 bool processTemplateEndTag(AtomicHTMLToken*); 194 bool processEndOfFileForInTemplateContents(AtomicHTMLToken*); 195 196 class FragmentParsingContext { 197 WTF_MAKE_NONCOPYABLE(FragmentParsingContext); 198 public: 199 FragmentParsingContext(); 200 FragmentParsingContext(DocumentFragment*, Element* contextElement); 201 ~FragmentParsingContext(); 202 203 DocumentFragment* fragment() const { return m_fragment; } 204 Element* contextElement() const { ASSERT(m_fragment); return m_contextElementStackItem->element(); } 205 HTMLStackItem* contextElementStackItem() const { ASSERT(m_fragment); return m_contextElementStackItem.get(); } 206 207 private: 208 DocumentFragment* m_fragment; 209 RefPtr<HTMLStackItem> m_contextElementStackItem; 210 }; 211 212 bool m_framesetOk; 213 #ifndef NDEBUG 214 bool m_isAttached; 215 #endif 216 FragmentParsingContext m_fragmentContext; 217 HTMLConstructionSite m_tree; 218 219 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#insertion-mode 220 InsertionMode m_insertionMode; 221 222 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#original-insertion-mode 223 InsertionMode m_originalInsertionMode; 224 225 Vector<InsertionMode> m_templateInsertionModes; 226 227 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#pending-table-character-tokens 228 StringBuilder m_pendingTableCharacters; 229 230 bool m_shouldSkipLeadingNewline; 231 232 // We access parser because HTML5 spec requires that we be able to change the state of the tokenizer 233 // from within parser actions. We also need it to track the current position. 234 HTMLDocumentParser* m_parser; 235 236 RefPtr<Element> m_scriptToProcess; // <script> tag which needs processing before resuming the parser. 237 TextPosition m_scriptToProcessStartPosition; // Starting line number of the script tag needing processing. 238 239 HTMLParserOptions m_options; 240 }; 241 242 } 243 244 #endif 245