1 /* 2 * Copyright (C) 2010 Google, Inc. All Rights Reserved. 3 * Copyright (C) 2011 Apple Inc. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY 15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. OR 18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 #ifndef HTMLTreeBuilder_h 28 #define HTMLTreeBuilder_h 29 30 #include "Element.h" 31 #include "FragmentScriptingPermission.h" 32 #include "HTMLConstructionSite.h" 33 #include "HTMLElementStack.h" 34 #include "HTMLFormattingElementList.h" 35 #include "HTMLTokenizer.h" 36 #include <wtf/text/TextPosition.h> 37 #include <wtf/Noncopyable.h> 38 #include <wtf/OwnPtr.h> 39 #include <wtf/PassOwnPtr.h> 40 #include <wtf/PassRefPtr.h> 41 #include <wtf/RefPtr.h> 42 #include <wtf/unicode/Unicode.h> 43 44 namespace WebCore { 45 46 class AtomicHTMLToken; 47 class Document; 48 class DocumentFragment; 49 class Frame; 50 class HTMLToken; 51 class HTMLDocument; 52 class Node; 53 class HTMLDocumentParser; 54 55 class HTMLTreeBuilder { 56 WTF_MAKE_NONCOPYABLE(HTMLTreeBuilder); WTF_MAKE_FAST_ALLOCATED; 57 public: 58 static PassOwnPtr<HTMLTreeBuilder> create(HTMLDocumentParser* parser, HTMLDocument* document, bool reportErrors, bool usePreHTML5ParserQuirks) 59 { 60 return adoptPtr(new HTMLTreeBuilder(parser, document, reportErrors, usePreHTML5ParserQuirks)); 61 } 62 static PassOwnPtr<HTMLTreeBuilder> create(HTMLDocumentParser* parser, DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission scriptingPermission, bool usePreHTML5ParserQuirks) 63 { 64 return adoptPtr(new HTMLTreeBuilder(parser, fragment, contextElement, scriptingPermission, usePreHTML5ParserQuirks)); 65 } 66 ~HTMLTreeBuilder(); 67 68 bool isParsingFragment() const { return !!m_fragmentContext.fragment(); } 69 70 void detach(); 71 72 void setPaused(bool paused) { m_isPaused = paused; } 73 bool isPaused() const { return m_isPaused; } 74 75 // The token really should be passed as a const& since it's never modified. 76 void constructTreeFromToken(HTMLToken&); 77 void constructTreeFromAtomicToken(AtomicHTMLToken&); 78 79 // Must be called when parser is paused before calling the parser again. 80 PassRefPtr<Element> takeScriptToProcess(TextPosition1& scriptStartPosition); 81 82 // Done, close any open tags, etc. 83 void finished(); 84 85 static bool scriptEnabled(Frame*); 86 static bool pluginsEnabled(Frame*); 87 88 private: 89 class FakeInsertionMode; 90 class ExternalCharacterTokenBuffer; 91 // Represents HTML5 "insertion mode" 92 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#insertion-mode 93 enum InsertionMode { 94 InitialMode, 95 BeforeHTMLMode, 96 BeforeHeadMode, 97 InHeadMode, 98 InHeadNoscriptMode, 99 AfterHeadMode, 100 InBodyMode, 101 TextMode, 102 InTableMode, 103 InTableTextMode, 104 InCaptionMode, 105 InColumnGroupMode, 106 InTableBodyMode, 107 InRowMode, 108 InCellMode, 109 InSelectMode, 110 InSelectInTableMode, 111 InForeignContentMode, 112 AfterBodyMode, 113 InFramesetMode, 114 AfterFramesetMode, 115 AfterAfterBodyMode, 116 AfterAfterFramesetMode, 117 }; 118 119 HTMLTreeBuilder(HTMLDocumentParser* parser, HTMLDocument*, bool reportErrors, bool usePreHTML5ParserQuirks); 120 HTMLTreeBuilder(HTMLDocumentParser* parser, DocumentFragment*, Element* contextElement, FragmentScriptingPermission, bool usePreHTML5ParserQuirks); 121 122 void processToken(AtomicHTMLToken&); 123 124 void processDoctypeToken(AtomicHTMLToken&); 125 void processStartTag(AtomicHTMLToken&); 126 void processEndTag(AtomicHTMLToken&); 127 void processComment(AtomicHTMLToken&); 128 void processCharacter(AtomicHTMLToken&); 129 void processEndOfFile(AtomicHTMLToken&); 130 131 bool processStartTagForInHead(AtomicHTMLToken&); 132 void processStartTagForInBody(AtomicHTMLToken&); 133 void processStartTagForInTable(AtomicHTMLToken&); 134 void processEndTagForInBody(AtomicHTMLToken&); 135 void processEndTagForInTable(AtomicHTMLToken&); 136 void processEndTagForInTableBody(AtomicHTMLToken&); 137 void processEndTagForInRow(AtomicHTMLToken&); 138 void processEndTagForInCell(AtomicHTMLToken&); 139 140 void processIsindexStartTagForInBody(AtomicHTMLToken&); 141 bool processBodyEndTagForInBody(AtomicHTMLToken&); 142 bool processTableEndTagForInTable(); 143 bool processCaptionEndTagForInCaption(); 144 bool processColgroupEndTagForInColumnGroup(); 145 bool processTrEndTagForInRow(); 146 // FIXME: This function should be inlined into its one call site or it 147 // needs to assert which tokens it can be called with. 148 void processAnyOtherEndTagForInBody(AtomicHTMLToken&); 149 150 void processCharacterBuffer(ExternalCharacterTokenBuffer&); 151 152 void processFakeStartTag(const QualifiedName&, PassRefPtr<NamedNodeMap> attributes = 0); 153 void processFakeEndTag(const QualifiedName&); 154 void processFakeCharacters(const String&); 155 void processFakePEndTagIfPInButtonScope(); 156 157 void processGenericRCDATAStartTag(AtomicHTMLToken&); 158 void processGenericRawTextStartTag(AtomicHTMLToken&); 159 void processScriptStartTag(AtomicHTMLToken&); 160 161 // Default processing for the different insertion modes. 162 void defaultForInitial(); 163 void defaultForBeforeHTML(); 164 void defaultForBeforeHead(); 165 void defaultForInHead(); 166 void defaultForInHeadNoscript(); 167 void defaultForAfterHead(); 168 void defaultForInTableText(); 169 170 void prepareToReprocessToken(); 171 172 void reprocessStartTag(AtomicHTMLToken&); 173 void reprocessEndTag(AtomicHTMLToken&); 174 175 PassRefPtr<NamedNodeMap> attributesForIsindexInput(AtomicHTMLToken&); 176 177 HTMLElementStack::ElementRecord* furthestBlockForFormattingElement(Element*); 178 void callTheAdoptionAgency(AtomicHTMLToken&); 179 180 void closeTheCell(); 181 182 template <bool shouldClose(const ContainerNode*)> 183 void processCloseWhenNestedTag(AtomicHTMLToken&); 184 185 bool m_framesetOk; 186 187 void parseError(AtomicHTMLToken&); 188 189 InsertionMode insertionMode() const { return m_insertionMode; } 190 void setInsertionMode(InsertionMode mode) 191 { 192 m_insertionMode = mode; 193 m_isFakeInsertionMode = false; 194 } 195 196 bool isFakeInsertionMode() { return m_isFakeInsertionMode; } 197 void setFakeInsertionMode(InsertionMode mode) 198 { 199 m_insertionMode = mode; 200 m_isFakeInsertionMode = true; 201 } 202 203 void resetInsertionModeAppropriately(); 204 205 void processForeignContentUsingInBodyModeAndResetMode(AtomicHTMLToken& token); 206 void resetForeignInsertionMode(); 207 208 class FragmentParsingContext { 209 WTF_MAKE_NONCOPYABLE(FragmentParsingContext); 210 public: 211 FragmentParsingContext(); 212 FragmentParsingContext(DocumentFragment*, Element* contextElement, FragmentScriptingPermission); 213 ~FragmentParsingContext(); 214 215 DocumentFragment* fragment() const { return m_fragment; } 216 Element* contextElement() const { ASSERT(m_fragment); return m_contextElement; } 217 FragmentScriptingPermission scriptingPermission() const { ASSERT(m_fragment); return m_scriptingPermission; } 218 219 private: 220 DocumentFragment* m_fragment; 221 Element* m_contextElement; 222 223 // FragmentScriptingNotAllowed causes the Parser to remove children 224 // from <script> tags (so javascript doesn't show up in pastes). 225 FragmentScriptingPermission m_scriptingPermission; 226 }; 227 228 FragmentParsingContext m_fragmentContext; 229 230 Document* m_document; 231 HTMLConstructionSite m_tree; 232 233 bool m_reportErrors; 234 bool m_isPaused; 235 bool m_isFakeInsertionMode; 236 237 // FIXME: InsertionModes should be a separate object to prevent direct 238 // manipulation of these variables. For now, be careful to always use 239 // setInsertionMode and never set m_insertionMode directly. 240 InsertionMode m_insertionMode; 241 InsertionMode m_originalInsertionMode; 242 243 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#pending-table-character-tokens 244 Vector<UChar> m_pendingTableCharacters; 245 246 // We access parser because HTML5 spec requires that we be able to change the state of the tokenizer 247 // from within parser actions. We also need it to track the current position. 248 HTMLDocumentParser* m_parser; 249 250 RefPtr<Element> m_scriptToProcess; // <script> tag which needs processing before resuming the parser. 251 TextPosition1 m_scriptToProcessStartPosition; // Starting line number of the script tag needing processing. 252 253 // FIXME: We probably want to remove this member. Originally, it was 254 // created to service the legacy tree builder, but it seems to be used for 255 // some other things now. 256 TextPosition0 m_lastScriptElementStartPosition; 257 258 bool m_usePreHTML5ParserQuirks; 259 260 bool m_hasPendingForeignInsertionModeSteps; 261 }; 262 263 } 264 265 #endif 266