1 /* 2 * Copyright (C) 2010 Google, Inc. All Rights Reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY 14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR 17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26 #ifndef HTMLDocumentParser_h 27 #define HTMLDocumentParser_h 28 29 #include "core/dom/ParserContentPolicy.h" 30 #include "core/dom/ScriptableDocumentParser.h" 31 #include "core/fetch/ResourceClient.h" 32 #include "core/frame/UseCounter.h" 33 #include "core/html/parser/BackgroundHTMLInputStream.h" 34 #include "core/html/parser/CompactHTMLToken.h" 35 #include "core/html/parser/HTMLInputStream.h" 36 #include "core/html/parser/HTMLParserOptions.h" 37 #include "core/html/parser/HTMLPreloadScanner.h" 38 #include "core/html/parser/HTMLScriptRunnerHost.h" 39 #include "core/html/parser/HTMLSourceTracker.h" 40 #include "core/html/parser/HTMLToken.h" 41 #include "core/html/parser/HTMLTokenizer.h" 42 #include "core/html/parser/HTMLTreeBuilderSimulator.h" 43 #include "core/html/parser/XSSAuditor.h" 44 #include "core/html/parser/XSSAuditorDelegate.h" 45 #include "platform/text/SegmentedString.h" 46 #include "wtf/Deque.h" 47 #include "wtf/OwnPtr.h" 48 #include "wtf/WeakPtr.h" 49 #include "wtf/text/TextPosition.h" 50 51 namespace WebCore { 52 53 class BackgroundHTMLParser; 54 class CompactHTMLToken; 55 class Document; 56 class DocumentFragment; 57 class HTMLDocument; 58 class HTMLParserScheduler; 59 class HTMLScriptRunner; 60 class HTMLTreeBuilder; 61 class HTMLResourcePreloader; 62 class ScriptController; 63 class ScriptSourceCode; 64 65 class PumpSession; 66 67 class HTMLDocumentParser : public ScriptableDocumentParser, HTMLScriptRunnerHost, ResourceClient { 68 WTF_MAKE_FAST_ALLOCATED; 69 public: 70 static PassRefPtr<HTMLDocumentParser> create(HTMLDocument* document, bool reportErrors) 71 { 72 return adoptRef(new HTMLDocumentParser(document, reportErrors)); 73 } 74 virtual ~HTMLDocumentParser(); 75 76 // Exposed for HTMLParserScheduler 77 void resumeParsingAfterYield(); 78 79 static void parseDocumentFragment(const String&, DocumentFragment*, Element* contextElement, ParserContentPolicy = AllowScriptingContent); 80 81 HTMLTokenizer* tokenizer() const { return m_tokenizer.get(); } 82 83 virtual TextPosition textPosition() const; 84 virtual OrdinalNumber lineNumber() const; 85 86 virtual void suspendScheduledTasks(); 87 virtual void resumeScheduledTasks(); 88 89 struct ParsedChunk { 90 OwnPtr<CompactHTMLTokenStream> tokens; 91 PreloadRequestStream preloads; 92 XSSInfoStream xssInfos; 93 HTMLTokenizer::State tokenizerState; 94 HTMLTreeBuilderSimulator::State treeBuilderState; 95 HTMLInputCheckpoint inputCheckpoint; 96 TokenPreloadScannerCheckpoint preloadScannerCheckpoint; 97 }; 98 void didReceiveParsedChunkFromBackgroundParser(PassOwnPtr<ParsedChunk>); 99 100 UseCounter* useCounter() { return UseCounter::getFrom(contextForParsingSession()); } 101 102 protected: 103 virtual void insert(const SegmentedString&) OVERRIDE; 104 virtual void append(PassRefPtr<StringImpl>) OVERRIDE; 105 virtual void finish() OVERRIDE; 106 107 HTMLDocumentParser(HTMLDocument*, bool reportErrors); 108 HTMLDocumentParser(DocumentFragment*, Element* contextElement, ParserContentPolicy); 109 110 HTMLTreeBuilder* treeBuilder() const { return m_treeBuilder.get(); } 111 112 void forcePlaintextForTextDocument(); 113 114 private: 115 static PassRefPtr<HTMLDocumentParser> create(DocumentFragment* fragment, Element* contextElement, ParserContentPolicy parserContentPolicy) 116 { 117 return adoptRef(new HTMLDocumentParser(fragment, contextElement, parserContentPolicy)); 118 } 119 120 // DocumentParser 121 virtual void pinToMainThread() OVERRIDE; 122 virtual void detach() OVERRIDE; 123 virtual bool hasInsertionPoint() OVERRIDE; 124 virtual bool processingData() const OVERRIDE; 125 virtual void prepareToStopParsing() OVERRIDE; 126 virtual void stopParsing() OVERRIDE; 127 virtual bool isWaitingForScripts() const OVERRIDE; 128 virtual bool isExecutingScript() const OVERRIDE; 129 virtual void executeScriptsWaitingForResources() OVERRIDE; 130 131 // HTMLScriptRunnerHost 132 virtual void watchForLoad(Resource*) OVERRIDE; 133 virtual void stopWatchingForLoad(Resource*) OVERRIDE; 134 virtual HTMLInputStream& inputStream() { return m_input; } 135 virtual bool hasPreloadScanner() const { return m_preloadScanner.get() && !shouldUseThreading(); } 136 virtual void appendCurrentInputStreamToPreloadScannerAndScan() OVERRIDE; 137 138 // ResourceClient 139 virtual void notifyFinished(Resource*); 140 141 void startBackgroundParser(); 142 void stopBackgroundParser(); 143 void validateSpeculations(PassOwnPtr<ParsedChunk> lastChunk); 144 void discardSpeculationsAndResumeFrom(PassOwnPtr<ParsedChunk> lastChunk, PassOwnPtr<HTMLToken>, PassOwnPtr<HTMLTokenizer>); 145 void processParsedChunkFromBackgroundParser(PassOwnPtr<ParsedChunk>); 146 void pumpPendingSpeculations(); 147 148 Document* contextForParsingSession(); 149 150 enum SynchronousMode { 151 AllowYield, 152 ForceSynchronous, 153 }; 154 bool canTakeNextToken(SynchronousMode, PumpSession&); 155 void pumpTokenizer(SynchronousMode); 156 void pumpTokenizerIfPossible(SynchronousMode); 157 void constructTreeFromHTMLToken(HTMLToken&); 158 void constructTreeFromCompactHTMLToken(const CompactHTMLToken&); 159 160 void runScriptsForPausedTreeBuilder(); 161 void resumeParsingAfterScriptExecution(); 162 163 void attemptToEnd(); 164 void endIfDelayed(); 165 void attemptToRunDeferredScriptsAndEnd(); 166 void end(); 167 168 bool shouldUseThreading() const { return m_options.useThreading && !m_isPinnedToMainThread; } 169 170 bool isParsingFragment() const; 171 bool isScheduledForResume() const; 172 bool inPumpSession() const { return m_pumpSessionNestingLevel > 0; } 173 bool shouldDelayEnd() const { return inPumpSession() || isWaitingForScripts() || isScheduledForResume() || isExecutingScript(); } 174 175 HTMLToken& token() { return *m_token; } 176 177 HTMLParserOptions m_options; 178 HTMLInputStream m_input; 179 180 OwnPtr<HTMLToken> m_token; 181 OwnPtr<HTMLTokenizer> m_tokenizer; 182 OwnPtr<HTMLScriptRunner> m_scriptRunner; 183 OwnPtr<HTMLTreeBuilder> m_treeBuilder; 184 OwnPtr<HTMLPreloadScanner> m_preloadScanner; 185 OwnPtr<HTMLPreloadScanner> m_insertionPreloadScanner; 186 OwnPtr<HTMLParserScheduler> m_parserScheduler; 187 HTMLSourceTracker m_sourceTracker; 188 TextPosition m_textPosition; 189 XSSAuditor m_xssAuditor; 190 XSSAuditorDelegate m_xssAuditorDelegate; 191 192 // FIXME: m_lastChunkBeforeScript, m_tokenizer, m_token, and m_input should be combined into a single state object 193 // so they can be set and cleared together and passed between threads together. 194 OwnPtr<ParsedChunk> m_lastChunkBeforeScript; 195 Deque<OwnPtr<ParsedChunk> > m_speculations; 196 WeakPtrFactory<HTMLDocumentParser> m_weakFactory; 197 WeakPtr<BackgroundHTMLParser> m_backgroundParser; 198 OwnPtr<HTMLResourcePreloader> m_preloader; 199 200 bool m_isPinnedToMainThread; 201 bool m_endWasDelayed; 202 bool m_haveBackgroundParser; 203 unsigned m_pumpSessionNestingLevel; 204 }; 205 206 } 207 208 #endif 209