1 /* 2 * Copyright (C) 2010 Google, Inc. All Rights Reserved. 3 * Copyright (C) 2011 Apple Inc. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY 15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. OR 18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 #ifndef HTMLTreeBuilder_h 28 #define HTMLTreeBuilder_h 29 30 #include "core/html/parser/HTMLConstructionSite.h" 31 #include "core/html/parser/HTMLElementStack.h" 32 #include "core/html/parser/HTMLParserOptions.h" 33 #include "platform/heap/Handle.h" 34 #include "wtf/Noncopyable.h" 35 #include "wtf/PassOwnPtr.h" 36 #include "wtf/PassRefPtr.h" 37 #include "wtf/RefPtr.h" 38 #include "wtf/Vector.h" 39 #include "wtf/text/StringBuilder.h" 40 #include "wtf/text/TextPosition.h" 41 42 namespace blink { 43 44 class AtomicHTMLToken; 45 class Document; 46 class DocumentFragment; 47 class Element; 48 class LocalFrame; 49 class HTMLToken; 50 class HTMLDocument; 51 class Node; 52 class HTMLDocumentParser; 53 54 class HTMLTreeBuilder FINAL : public NoBaseWillBeGarbageCollectedFinalized<HTMLTreeBuilder> { 55 WTF_MAKE_NONCOPYABLE(HTMLTreeBuilder); WTF_MAKE_FAST_ALLOCATED_WILL_BE_REMOVED; 56 public: 57 static PassOwnPtrWillBeRawPtr<HTMLTreeBuilder> create(HTMLDocumentParser* parser, HTMLDocument* document, ParserContentPolicy parserContentPolicy, bool reportErrors, const HTMLParserOptions& options) 58 { 59 return adoptPtrWillBeNoop(new HTMLTreeBuilder(parser, document, parserContentPolicy, reportErrors, options)); 60 } 61 static PassOwnPtrWillBeRawPtr<HTMLTreeBuilder> create(HTMLDocumentParser* parser, DocumentFragment* fragment, Element* contextElement, ParserContentPolicy parserContentPolicy, const HTMLParserOptions& options) 62 { 63 return adoptPtrWillBeNoop(new HTMLTreeBuilder(parser, fragment, contextElement, parserContentPolicy, options)); 64 } 65 ~HTMLTreeBuilder(); 66 void trace(Visitor*); 67 68 const HTMLElementStack* openElements() const { return m_tree.openElements(); } 69 70 bool isParsingFragment() const { return !!m_fragmentContext.fragment(); } 71 bool isParsingTemplateContents() const { return m_tree.openElements()->hasTemplateInHTMLScope(); } 72 bool isParsingFragmentOrTemplateContents() const { return isParsingFragment() || isParsingTemplateContents(); } 73 74 void detach(); 75 76 void constructTree(AtomicHTMLToken*); 77 78 bool hasParserBlockingScript() const { return !!m_scriptToProcess; } 79 // Must be called to take the parser-blocking script before calling the parser again. 80 PassRefPtrWillBeRawPtr<Element> takeScriptToProcess(TextPosition& scriptStartPosition); 81 82 // Done, close any open tags, etc. 83 void finished(); 84 85 // Synchronously flush pending text and queued tasks, possibly creating more DOM nodes. 86 // Flushing pending text depends on |mode|. 87 void flush(FlushMode mode) { m_tree.flush(mode); } 88 89 void setShouldSkipLeadingNewline(bool shouldSkip) { m_shouldSkipLeadingNewline = shouldSkip; } 90 91 private: 92 class CharacterTokenBuffer; 93 // Represents HTML5 "insertion mode" 94 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#insertion-mode 95 enum InsertionMode { 96 InitialMode, 97 BeforeHTMLMode, 98 BeforeHeadMode, 99 InHeadMode, 100 InHeadNoscriptMode, 101 AfterHeadMode, 102 TemplateContentsMode, 103 InBodyMode, 104 TextMode, 105 InTableMode, 106 InTableTextMode, 107 InCaptionMode, 108 InColumnGroupMode, 109 InTableBodyMode, 110 InRowMode, 111 InCellMode, 112 InSelectMode, 113 InSelectInTableMode, 114 AfterBodyMode, 115 InFramesetMode, 116 AfterFramesetMode, 117 AfterAfterBodyMode, 118 AfterAfterFramesetMode, 119 }; 120 121 HTMLTreeBuilder(HTMLDocumentParser*, HTMLDocument*, ParserContentPolicy, bool reportErrors, const HTMLParserOptions&); 122 HTMLTreeBuilder(HTMLDocumentParser*, DocumentFragment*, Element* contextElement, ParserContentPolicy, const HTMLParserOptions&); 123 124 void processToken(AtomicHTMLToken*); 125 126 void processDoctypeToken(AtomicHTMLToken*); 127 void processStartTag(AtomicHTMLToken*); 128 void processEndTag(AtomicHTMLToken*); 129 void processComment(AtomicHTMLToken*); 130 void processCharacter(AtomicHTMLToken*); 131 void processEndOfFile(AtomicHTMLToken*); 132 133 bool processStartTagForInHead(AtomicHTMLToken*); 134 void processStartTagForInBody(AtomicHTMLToken*); 135 void processStartTagForInTable(AtomicHTMLToken*); 136 void processEndTagForInBody(AtomicHTMLToken*); 137 void processEndTagForInTable(AtomicHTMLToken*); 138 void processEndTagForInTableBody(AtomicHTMLToken*); 139 void processEndTagForInRow(AtomicHTMLToken*); 140 void processEndTagForInCell(AtomicHTMLToken*); 141 142 void processIsindexStartTagForInBody(AtomicHTMLToken*); 143 void processHtmlStartTagForInBody(AtomicHTMLToken*); 144 bool processBodyEndTagForInBody(AtomicHTMLToken*); 145 bool processTableEndTagForInTable(); 146 bool processCaptionEndTagForInCaption(); 147 bool processColgroupEndTagForInColumnGroup(); 148 bool processTrEndTagForInRow(); 149 // FIXME: This function should be inlined into its one call site or it 150 // needs to assert which tokens it can be called with. 151 void processAnyOtherEndTagForInBody(AtomicHTMLToken*); 152 153 void processCharacterBuffer(CharacterTokenBuffer&); 154 inline void processCharacterBufferForInBody(CharacterTokenBuffer&); 155 156 void processFakeStartTag(const QualifiedName&, const Vector<Attribute>& attributes = Vector<Attribute>()); 157 void processFakeEndTag(const QualifiedName&); 158 void processFakeEndTag(const AtomicString&); 159 void processFakePEndTagIfPInButtonScope(); 160 161 void processGenericRCDATAStartTag(AtomicHTMLToken*); 162 void processGenericRawTextStartTag(AtomicHTMLToken*); 163 void processScriptStartTag(AtomicHTMLToken*); 164 165 // Default processing for the different insertion modes. 166 void defaultForInitial(); 167 void defaultForBeforeHTML(); 168 void defaultForBeforeHead(); 169 void defaultForInHead(); 170 void defaultForInHeadNoscript(); 171 void defaultForAfterHead(); 172 void defaultForInTableText(); 173 174 inline HTMLStackItem* adjustedCurrentStackItem() const; 175 inline bool shouldProcessTokenInForeignContent(AtomicHTMLToken*); 176 void processTokenInForeignContent(AtomicHTMLToken*); 177 178 Vector<Attribute> attributesForIsindexInput(AtomicHTMLToken*); 179 180 void callTheAdoptionAgency(AtomicHTMLToken*); 181 182 void closeTheCell(); 183 184 template <bool shouldClose(const HTMLStackItem*)> 185 void processCloseWhenNestedTag(AtomicHTMLToken*); 186 187 void parseError(AtomicHTMLToken*); 188 189 InsertionMode insertionMode() const { return m_insertionMode; } 190 void setInsertionMode(InsertionMode mode) { m_insertionMode = mode; } 191 192 void resetInsertionModeAppropriately(); 193 194 void processTemplateStartTag(AtomicHTMLToken*); 195 bool processTemplateEndTag(AtomicHTMLToken*); 196 bool processEndOfFileForInTemplateContents(AtomicHTMLToken*); 197 198 class FragmentParsingContext { 199 WTF_MAKE_NONCOPYABLE(FragmentParsingContext); 200 DISALLOW_ALLOCATION(); 201 public: 202 FragmentParsingContext(); 203 FragmentParsingContext(DocumentFragment*, Element* contextElement); 204 ~FragmentParsingContext(); 205 206 DocumentFragment* fragment() const { return m_fragment; } 207 Element* contextElement() const { ASSERT(m_fragment); return m_contextElementStackItem->element(); } 208 HTMLStackItem* contextElementStackItem() const { ASSERT(m_fragment); return m_contextElementStackItem.get(); } 209 210 void trace(Visitor*); 211 212 private: 213 RawPtrWillBeMember<DocumentFragment> m_fragment; 214 RefPtrWillBeMember<HTMLStackItem> m_contextElementStackItem; 215 }; 216 217 bool m_framesetOk; 218 #if ENABLE(ASSERT) 219 bool m_isAttached; 220 #endif 221 FragmentParsingContext m_fragmentContext; 222 HTMLConstructionSite m_tree; 223 224 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#insertion-mode 225 InsertionMode m_insertionMode; 226 227 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#original-insertion-mode 228 InsertionMode m_originalInsertionMode; 229 230 Vector<InsertionMode> m_templateInsertionModes; 231 232 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#pending-table-character-tokens 233 StringBuilder m_pendingTableCharacters; 234 235 bool m_shouldSkipLeadingNewline; 236 237 // We access parser because HTML5 spec requires that we be able to change the state of the tokenizer 238 // from within parser actions. We also need it to track the current position. 239 RawPtrWillBeMember<HTMLDocumentParser> m_parser; 240 241 RefPtrWillBeMember<Element> m_scriptToProcess; // <script> tag which needs processing before resuming the parser. 242 TextPosition m_scriptToProcessStartPosition; // Starting line number of the script tag needing processing. 243 244 HTMLParserOptions m_options; 245 }; 246 247 } 248 249 #endif 250