1 /* 2 * Copyright (C) 2010 Google, Inc. All Rights Reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY 14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR 17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26 #ifndef HTMLDocumentParser_h 27 #define HTMLDocumentParser_h 28 29 #include "core/dom/ParserContentPolicy.h" 30 #include "core/dom/ScriptableDocumentParser.h" 31 #include "core/html/parser/BackgroundHTMLInputStream.h" 32 #include "core/html/parser/CompactHTMLToken.h" 33 #include "core/html/parser/HTMLInputStream.h" 34 #include "core/html/parser/HTMLParserOptions.h" 35 #include "core/html/parser/HTMLPreloadScanner.h" 36 #include "core/html/parser/HTMLScriptRunnerHost.h" 37 #include "core/html/parser/HTMLSourceTracker.h" 38 #include "core/html/parser/HTMLToken.h" 39 #include "core/html/parser/HTMLTokenizer.h" 40 #include "core/html/parser/HTMLTreeBuilderSimulator.h" 41 #include "core/html/parser/XSSAuditor.h" 42 #include "core/html/parser/XSSAuditorDelegate.h" 43 #include "core/loader/cache/ResourceClient.h" 44 #include "core/platform/text/SegmentedString.h" 45 #include "wtf/Deque.h" 46 #include "wtf/OwnPtr.h" 47 #include "wtf/WeakPtr.h" 48 #include "wtf/text/TextPosition.h" 49 50 namespace WebCore { 51 52 class BackgroundHTMLParser; 53 class CompactHTMLToken; 54 class Document; 55 class DocumentFragment; 56 class HTMLDocument; 57 class HTMLParserScheduler; 58 class HTMLScriptRunner; 59 class HTMLTreeBuilder; 60 class HTMLResourcePreloader; 61 class ScriptController; 62 class ScriptSourceCode; 63 64 class PumpSession; 65 66 class HTMLDocumentParser : public ScriptableDocumentParser, HTMLScriptRunnerHost, ResourceClient { 67 WTF_MAKE_FAST_ALLOCATED; 68 public: 69 static PassRefPtr<HTMLDocumentParser> create(Document* document, bool reportErrors) 70 { 71 return adoptRef(new HTMLDocumentParser(document, reportErrors)); 72 } 73 virtual ~HTMLDocumentParser(); 74 75 // Exposed for HTMLParserScheduler 76 void resumeParsingAfterYield(); 77 78 static void parseDocumentFragment(const String&, DocumentFragment*, Element* contextElement, ParserContentPolicy = AllowScriptingContent); 79 80 HTMLTokenizer* tokenizer() const { return m_tokenizer.get(); } 81 82 virtual TextPosition textPosition() const; 83 virtual OrdinalNumber lineNumber() const; 84 85 virtual void suspendScheduledTasks(); 86 virtual void resumeScheduledTasks(); 87 88 struct ParsedChunk { 89 OwnPtr<CompactHTMLTokenStream> tokens; 90 PreloadRequestStream preloads; 91 XSSInfoStream xssInfos; 92 HTMLTokenizer::State tokenizerState; 93 HTMLTreeBuilderSimulator::State treeBuilderState; 94 HTMLInputCheckpoint inputCheckpoint; 95 TokenPreloadScannerCheckpoint preloadScannerCheckpoint; 96 }; 97 void didReceiveParsedChunkFromBackgroundParser(PassOwnPtr<ParsedChunk>); 98 99 protected: 100 virtual void insert(const SegmentedString&) OVERRIDE; 101 virtual void append(PassRefPtr<StringImpl>) OVERRIDE; 102 virtual void finish() OVERRIDE; 103 104 HTMLDocumentParser(Document*, bool reportErrors); 105 HTMLDocumentParser(DocumentFragment*, Element* contextElement, ParserContentPolicy); 106 107 HTMLTreeBuilder* treeBuilder() const { return m_treeBuilder.get(); } 108 109 void forcePlaintextForTextDocument(); 110 111 private: 112 static PassRefPtr<HTMLDocumentParser> create(DocumentFragment* fragment, Element* contextElement, ParserContentPolicy parserContentPolicy) 113 { 114 return adoptRef(new HTMLDocumentParser(fragment, contextElement, parserContentPolicy)); 115 } 116 117 // DocumentParser 118 virtual void pinToMainThread() OVERRIDE; 119 virtual void detach() OVERRIDE; 120 virtual bool hasInsertionPoint() OVERRIDE; 121 virtual bool processingData() const OVERRIDE; 122 virtual void prepareToStopParsing() OVERRIDE; 123 virtual void stopParsing() OVERRIDE; 124 virtual bool isWaitingForScripts() const OVERRIDE; 125 virtual bool isExecutingScript() const OVERRIDE; 126 virtual void executeScriptsWaitingForResources() OVERRIDE; 127 128 // HTMLScriptRunnerHost 129 virtual void watchForLoad(Resource*) OVERRIDE; 130 virtual void stopWatchingForLoad(Resource*) OVERRIDE; 131 virtual HTMLInputStream& inputStream() { return m_input; } 132 virtual bool hasPreloadScanner() const { return m_preloadScanner.get() && !shouldUseThreading(); } 133 virtual void appendCurrentInputStreamToPreloadScannerAndScan() OVERRIDE; 134 135 // ResourceClient 136 virtual void notifyFinished(Resource*); 137 138 void startBackgroundParser(); 139 void stopBackgroundParser(); 140 void validateSpeculations(PassOwnPtr<ParsedChunk> lastChunk); 141 void discardSpeculationsAndResumeFrom(PassOwnPtr<ParsedChunk> lastChunk, PassOwnPtr<HTMLToken>, PassOwnPtr<HTMLTokenizer>); 142 void processParsedChunkFromBackgroundParser(PassOwnPtr<ParsedChunk>); 143 void pumpPendingSpeculations(); 144 145 Document* contextForParsingSession(); 146 147 enum SynchronousMode { 148 AllowYield, 149 ForceSynchronous, 150 }; 151 bool canTakeNextToken(SynchronousMode, PumpSession&); 152 void pumpTokenizer(SynchronousMode); 153 void pumpTokenizerIfPossible(SynchronousMode); 154 void constructTreeFromHTMLToken(HTMLToken&); 155 void constructTreeFromCompactHTMLToken(const CompactHTMLToken&); 156 157 void runScriptsForPausedTreeBuilder(); 158 void resumeParsingAfterScriptExecution(); 159 160 void attemptToEnd(); 161 void endIfDelayed(); 162 void attemptToRunDeferredScriptsAndEnd(); 163 void end(); 164 165 bool shouldUseThreading() const { return m_options.useThreading && !m_isPinnedToMainThread; } 166 167 bool isParsingFragment() const; 168 bool isScheduledForResume() const; 169 bool inPumpSession() const { return m_pumpSessionNestingLevel > 0; } 170 bool shouldDelayEnd() const { return inPumpSession() || isWaitingForScripts() || isScheduledForResume() || isExecutingScript(); } 171 172 HTMLToken& token() { return *m_token; } 173 174 HTMLParserOptions m_options; 175 HTMLInputStream m_input; 176 177 OwnPtr<HTMLToken> m_token; 178 OwnPtr<HTMLTokenizer> m_tokenizer; 179 OwnPtr<HTMLScriptRunner> m_scriptRunner; 180 OwnPtr<HTMLTreeBuilder> m_treeBuilder; 181 OwnPtr<HTMLPreloadScanner> m_preloadScanner; 182 OwnPtr<HTMLPreloadScanner> m_insertionPreloadScanner; 183 OwnPtr<HTMLParserScheduler> m_parserScheduler; 184 HTMLSourceTracker m_sourceTracker; 185 TextPosition m_textPosition; 186 XSSAuditor m_xssAuditor; 187 XSSAuditorDelegate m_xssAuditorDelegate; 188 189 // FIXME: m_lastChunkBeforeScript, m_tokenizer, m_token, and m_input should be combined into a single state object 190 // so they can be set and cleared together and passed between threads together. 191 OwnPtr<ParsedChunk> m_lastChunkBeforeScript; 192 Deque<OwnPtr<ParsedChunk> > m_speculations; 193 WeakPtrFactory<HTMLDocumentParser> m_weakFactory; 194 WeakPtr<BackgroundHTMLParser> m_backgroundParser; 195 OwnPtr<HTMLResourcePreloader> m_preloader; 196 197 bool m_isPinnedToMainThread; 198 bool m_endWasDelayed; 199 bool m_haveBackgroundParser; 200 unsigned m_pumpSessionNestingLevel; 201 }; 202 203 } 204 205 #endif 206