1 /* 2 * Copyright (C) 2013 Google, Inc. All Rights Reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY 14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. OR 17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26 #include "config.h" 27 #include "core/html/parser/BackgroundHTMLParser.h" 28 29 #include "core/html/parser/HTMLDocumentParser.h" 30 #include "core/html/parser/HTMLParserThread.h" 31 #include "core/html/parser/XSSAuditor.h" 32 #include "wtf/MainThread.h" 33 #include "wtf/text/TextPosition.h" 34 35 namespace WebCore { 36 37 // On a network with high latency and high bandwidth, using a device 38 // with a fast CPU, we could end up speculatively tokenizing 39 // the whole document, well ahead of when the main-thread actually needs it. 40 // This is a waste of memory (and potentially time if the speculation fails). 41 // So we limit our outstanding tokens arbitrarily to 10,000. 42 // Our maximal memory spent speculating will be approximately: 43 // (outstandingTokenLimit + pendingTokenLimit) * sizeof(CompactToken) 44 // We use a separate low and high water mark to avoid constantly topping 45 // off the main thread's token buffer. 46 // At time of writing, this is (10000 + 1000) * 28 bytes = ~308kb of memory. 47 // These numbers have not been tuned. 48 static const size_t outstandingTokenLimit = 10000; 49 50 // We limit our chucks to 1000 tokens, to make sure the main 51 // thread is never waiting on the parser thread for tokens. 52 // This was tuned in https://bugs.webkit.org/show_bug.cgi?id=110408. 53 static const size_t pendingTokenLimit = 1000; 54 55 using namespace HTMLNames; 56 57 #ifndef NDEBUG 58 59 static void checkThatTokensAreSafeToSendToAnotherThread(const CompactHTMLTokenStream* tokens) 60 { 61 for (size_t i = 0; i < tokens->size(); ++i) 62 ASSERT(tokens->at(i).isSafeToSendToAnotherThread()); 63 } 64 65 static void checkThatPreloadsAreSafeToSendToAnotherThread(const PreloadRequestStream& preloads) 66 { 67 for (size_t i = 0; i < preloads.size(); ++i) 68 ASSERT(preloads[i]->isSafeToSendToAnotherThread()); 69 } 70 71 static void checkThatXSSInfosAreSafeToSendToAnotherThread(const XSSInfoStream& infos) 72 { 73 for (size_t i = 0; i < infos.size(); ++i) 74 ASSERT(infos[i]->isSafeToSendToAnotherThread()); 75 } 76 77 #endif 78 79 BackgroundHTMLParser::BackgroundHTMLParser(PassRefPtr<WeakReference<BackgroundHTMLParser> > reference, PassOwnPtr<Configuration> config) 80 : m_weakFactory(reference, this) 81 , m_token(adoptPtr(new HTMLToken)) 82 , m_tokenizer(HTMLTokenizer::create(config->options)) 83 , m_treeBuilderSimulator(config->options) 84 , m_options(config->options) 85 , m_parser(config->parser) 86 , m_pendingTokens(adoptPtr(new CompactHTMLTokenStream)) 87 , m_xssAuditor(config->xssAuditor.release()) 88 , m_preloadScanner(config->preloadScanner.release()) 89 { 90 } 91 92 void BackgroundHTMLParser::append(const String& input) 93 { 94 ASSERT(!m_input.current().isClosed()); 95 m_input.append(input); 96 pumpTokenizer(); 97 } 98 99 void BackgroundHTMLParser::resumeFrom(PassOwnPtr<Checkpoint> checkpoint) 100 { 101 m_parser = checkpoint->parser; 102 m_token = checkpoint->token.release(); 103 m_tokenizer = checkpoint->tokenizer.release(); 104 m_treeBuilderSimulator.setState(checkpoint->treeBuilderState); 105 m_input.rewindTo(checkpoint->inputCheckpoint, checkpoint->unparsedInput); 106 m_preloadScanner->rewindTo(checkpoint->preloadScannerCheckpoint); 107 pumpTokenizer(); 108 } 109 110 void BackgroundHTMLParser::startedChunkWithCheckpoint(HTMLInputCheckpoint inputCheckpoint) 111 { 112 // Note, we should not have to worry about the index being invalid 113 // as messages from the main thread will be processed in FIFO order. 114 m_input.invalidateCheckpointsBefore(inputCheckpoint); 115 pumpTokenizer(); 116 } 117 118 void BackgroundHTMLParser::finish() 119 { 120 markEndOfFile(); 121 pumpTokenizer(); 122 } 123 124 void BackgroundHTMLParser::stop() 125 { 126 delete this; 127 } 128 129 void BackgroundHTMLParser::forcePlaintextForTextDocument() 130 { 131 // This is only used by the TextDocumentParser (a subclass of HTMLDocumentParser) 132 // to force us into the PLAINTEXT state w/o using a <plaintext> tag. 133 // The TextDocumentParser uses a <pre> tag for historical/compatibility reasons. 134 m_tokenizer->setState(HTMLTokenizer::PLAINTEXTState); 135 } 136 137 void BackgroundHTMLParser::markEndOfFile() 138 { 139 ASSERT(!m_input.current().isClosed()); 140 m_input.append(String(&kEndOfFileMarker, 1)); 141 m_input.close(); 142 } 143 144 void BackgroundHTMLParser::pumpTokenizer() 145 { 146 // No need to start speculating until the main thread has almost caught up. 147 if (m_input.totalCheckpointTokenCount() > outstandingTokenLimit) 148 return; 149 150 while (true) { 151 m_sourceTracker.start(m_input.current(), m_tokenizer.get(), *m_token); 152 if (!m_tokenizer->nextToken(m_input.current(), *m_token)) { 153 // We've reached the end of our current input. 154 sendTokensToMainThread(); 155 break; 156 } 157 m_sourceTracker.end(m_input.current(), m_tokenizer.get(), *m_token); 158 159 { 160 TextPosition position = TextPosition(m_input.current().currentLine(), m_input.current().currentColumn()); 161 162 if (OwnPtr<XSSInfo> xssInfo = m_xssAuditor->filterToken(FilterTokenRequest(*m_token, m_sourceTracker, m_tokenizer->shouldAllowCDATA()))) { 163 xssInfo->m_textPosition = position; 164 m_pendingXSSInfos.append(xssInfo.release()); 165 } 166 167 CompactHTMLToken token(m_token.get(), TextPosition(m_input.current().currentLine(), m_input.current().currentColumn())); 168 169 m_preloadScanner->scan(token, m_input.current(), m_pendingPreloads); 170 171 m_pendingTokens->append(token); 172 } 173 174 m_token->clear(); 175 176 if (!m_treeBuilderSimulator.simulate(m_pendingTokens->last(), m_tokenizer.get()) || m_pendingTokens->size() >= pendingTokenLimit) { 177 sendTokensToMainThread(); 178 // If we're far ahead of the main thread, yield for a bit to avoid consuming too much memory. 179 if (m_input.totalCheckpointTokenCount() > outstandingTokenLimit) 180 break; 181 } 182 } 183 } 184 185 void BackgroundHTMLParser::sendTokensToMainThread() 186 { 187 if (m_pendingTokens->isEmpty()) 188 return; 189 190 #ifndef NDEBUG 191 checkThatTokensAreSafeToSendToAnotherThread(m_pendingTokens.get()); 192 checkThatPreloadsAreSafeToSendToAnotherThread(m_pendingPreloads); 193 checkThatXSSInfosAreSafeToSendToAnotherThread(m_pendingXSSInfos); 194 #endif 195 196 OwnPtr<HTMLDocumentParser::ParsedChunk> chunk = adoptPtr(new HTMLDocumentParser::ParsedChunk); 197 chunk->preloads.swap(m_pendingPreloads); 198 chunk->xssInfos.swap(m_pendingXSSInfos); 199 chunk->tokenizerState = m_tokenizer->state(); 200 chunk->treeBuilderState = m_treeBuilderSimulator.state(); 201 chunk->inputCheckpoint = m_input.createCheckpoint(m_pendingTokens->size()); 202 chunk->preloadScannerCheckpoint = m_preloadScanner->createCheckpoint(); 203 chunk->tokens = m_pendingTokens.release(); 204 callOnMainThread(bind(&HTMLDocumentParser::didReceiveParsedChunkFromBackgroundParser, m_parser, chunk.release())); 205 206 m_pendingTokens = adoptPtr(new CompactHTMLTokenStream); 207 } 208 209 } 210