1 /* 2 * Copyright (C) 2010 Google, Inc. All Rights Reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY 14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR 17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26 #include "config.h" 27 #include "core/html/parser/HTMLDocumentParser.h" 28 29 #include "HTMLNames.h" 30 #include "core/dom/DocumentFragment.h" 31 #include "core/dom/Element.h" 32 #include "core/html/parser/AtomicHTMLToken.h" 33 #include "core/html/parser/BackgroundHTMLParser.h" 34 #include "core/html/parser/CompactHTMLToken.h" 35 #include "core/html/parser/HTMLIdentifier.h" 36 #include "core/html/parser/HTMLParserScheduler.h" 37 #include "core/html/parser/HTMLParserThread.h" 38 #include "core/html/parser/HTMLPreloadScanner.h" 39 #include "core/html/parser/HTMLScriptRunner.h" 40 #include "core/html/parser/HTMLTokenizer.h" 41 #include "core/html/parser/HTMLTreeBuilder.h" 42 #include "core/inspector/InspectorInstrumentation.h" 43 #include "core/page/Frame.h" 44 #include "core/platform/chromium/TraceEvent.h" 45 #include "wtf/Functional.h" 46 47 namespace WebCore { 48 49 using namespace HTMLNames; 50 51 // This is a direct transcription of step 4 from: 52 // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#fragment-case 53 static HTMLTokenizer::State tokenizerStateForContextElement(Element* contextElement, bool reportErrors, const HTMLParserOptions& options) 54 { 55 if (!contextElement) 56 return HTMLTokenizer::DataState; 57 58 const QualifiedName& contextTag = contextElement->tagQName(); 59 60 if (contextTag.matches(titleTag) || contextTag.matches(textareaTag)) 61 return HTMLTokenizer::RCDATAState; 62 if (contextTag.matches(styleTag) 63 || contextTag.matches(xmpTag) 64 || contextTag.matches(iframeTag) 65 || (contextTag.matches(noembedTag) && options.pluginsEnabled) 66 || (contextTag.matches(noscriptTag) && options.scriptEnabled) 67 || contextTag.matches(noframesTag)) 68 return reportErrors ? HTMLTokenizer::RAWTEXTState : HTMLTokenizer::PLAINTEXTState; 69 if (contextTag.matches(scriptTag)) 70 return reportErrors ? HTMLTokenizer::ScriptDataState : HTMLTokenizer::PLAINTEXTState; 71 if (contextTag.matches(plaintextTag)) 72 return HTMLTokenizer::PLAINTEXTState; 73 return HTMLTokenizer::DataState; 74 } 75 76 HTMLDocumentParser::HTMLDocumentParser(Document* document, bool reportErrors) 77 : ScriptableDocumentParser(document) 78 , m_options(document) 79 , m_token(m_options.useThreading ? nullptr : adoptPtr(new HTMLToken)) 80 , m_tokenizer(m_options.useThreading ? nullptr : HTMLTokenizer::create(m_options)) 81 , m_scriptRunner(HTMLScriptRunner::create(document, this)) 82 , m_treeBuilder(HTMLTreeBuilder::create(this, document, parserContentPolicy(), reportErrors, m_options)) 83 , m_parserScheduler(HTMLParserScheduler::create(this)) 84 , m_xssAuditorDelegate(document) 85 , m_weakFactory(this) 86 , m_preloader(adoptPtr(new HTMLResourcePreloader(document))) 87 , m_isPinnedToMainThread(false) 88 , m_endWasDelayed(false) 89 , m_haveBackgroundParser(false) 90 , m_pumpSessionNestingLevel(0) 91 { 92 ASSERT(shouldUseThreading() || (m_token && m_tokenizer)); 93 } 94 95 // FIXME: Member variables should be grouped into self-initializing structs to 96 // minimize code duplication between these constructors. 97 HTMLDocumentParser::HTMLDocumentParser(DocumentFragment* fragment, Element* contextElement, ParserContentPolicy parserContentPolicy) 98 : ScriptableDocumentParser(fragment->document(), parserContentPolicy) 99 , m_options(fragment->document()) 100 , m_token(adoptPtr(new HTMLToken)) 101 , m_tokenizer(HTMLTokenizer::create(m_options)) 102 , m_treeBuilder(HTMLTreeBuilder::create(this, fragment, contextElement, this->parserContentPolicy(), m_options)) 103 , m_xssAuditorDelegate(fragment->document()) 104 , m_weakFactory(this) 105 , m_isPinnedToMainThread(true) 106 , m_endWasDelayed(false) 107 , m_haveBackgroundParser(false) 108 , m_pumpSessionNestingLevel(0) 109 { 110 ASSERT(!shouldUseThreading()); 111 bool reportErrors = false; // For now document fragment parsing never reports errors. 112 m_tokenizer->setState(tokenizerStateForContextElement(contextElement, reportErrors, m_options)); 113 m_xssAuditor.initForFragment(); 114 } 115 116 HTMLDocumentParser::~HTMLDocumentParser() 117 { 118 ASSERT(!m_parserScheduler); 119 ASSERT(!m_pumpSessionNestingLevel); 120 ASSERT(!m_preloadScanner); 121 ASSERT(!m_insertionPreloadScanner); 122 ASSERT(!m_haveBackgroundParser); 123 // FIXME: We should be able to ASSERT(m_speculations.isEmpty()), 124 // but there are cases where that's not true currently. For example, 125 // we we're told to stop parsing before we've consumed all the input. 126 } 127 128 void HTMLDocumentParser::pinToMainThread() 129 { 130 ASSERT(!m_haveBackgroundParser); 131 ASSERT(!m_isPinnedToMainThread); 132 m_isPinnedToMainThread = true; 133 if (!m_tokenizer) { 134 ASSERT(!m_token); 135 m_token = adoptPtr(new HTMLToken); 136 m_tokenizer = HTMLTokenizer::create(m_options); 137 } 138 } 139 140 void HTMLDocumentParser::detach() 141 { 142 if (m_haveBackgroundParser) 143 stopBackgroundParser(); 144 DocumentParser::detach(); 145 if (m_scriptRunner) 146 m_scriptRunner->detach(); 147 m_treeBuilder->detach(); 148 // FIXME: It seems wrong that we would have a preload scanner here. 149 // Yet during fast/dom/HTMLScriptElement/script-load-events.html we do. 150 m_preloadScanner.clear(); 151 m_insertionPreloadScanner.clear(); 152 m_parserScheduler.clear(); // Deleting the scheduler will clear any timers. 153 } 154 155 void HTMLDocumentParser::stopParsing() 156 { 157 DocumentParser::stopParsing(); 158 m_parserScheduler.clear(); // Deleting the scheduler will clear any timers. 159 if (m_haveBackgroundParser) 160 stopBackgroundParser(); 161 } 162 163 // This kicks off "Once the user agent stops parsing" as described by: 164 // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#the-end 165 void HTMLDocumentParser::prepareToStopParsing() 166 { 167 // FIXME: It may not be correct to disable this for the background parser. 168 // That means hasInsertionPoint() may not be correct in some cases. 169 ASSERT(!hasInsertionPoint() || m_haveBackgroundParser); 170 171 // pumpTokenizer can cause this parser to be detached from the Document, 172 // but we need to ensure it isn't deleted yet. 173 RefPtr<HTMLDocumentParser> protect(this); 174 175 // NOTE: This pump should only ever emit buffered character tokens, 176 // so ForceSynchronous vs. AllowYield should be meaningless. 177 if (m_tokenizer) { 178 ASSERT(!m_haveBackgroundParser); 179 pumpTokenizerIfPossible(ForceSynchronous); 180 } 181 182 if (isStopped()) 183 return; 184 185 DocumentParser::prepareToStopParsing(); 186 187 // We will not have a scriptRunner when parsing a DocumentFragment. 188 if (m_scriptRunner) 189 document()->setReadyState(Document::Interactive); 190 191 // Setting the ready state above can fire mutation event and detach us 192 // from underneath. In that case, just bail out. 193 if (isDetached()) 194 return; 195 196 attemptToRunDeferredScriptsAndEnd(); 197 } 198 199 bool HTMLDocumentParser::isParsingFragment() const 200 { 201 return m_treeBuilder->isParsingFragment(); 202 } 203 204 bool HTMLDocumentParser::processingData() const 205 { 206 return isScheduledForResume() || inPumpSession() || m_haveBackgroundParser; 207 } 208 209 void HTMLDocumentParser::pumpTokenizerIfPossible(SynchronousMode mode) 210 { 211 if (isStopped()) 212 return; 213 if (isWaitingForScripts()) 214 return; 215 216 // Once a resume is scheduled, HTMLParserScheduler controls when we next pump. 217 if (isScheduledForResume()) { 218 ASSERT(mode == AllowYield); 219 return; 220 } 221 222 pumpTokenizer(mode); 223 } 224 225 bool HTMLDocumentParser::isScheduledForResume() const 226 { 227 return m_parserScheduler && m_parserScheduler->isScheduledForResume(); 228 } 229 230 // Used by HTMLParserScheduler 231 void HTMLDocumentParser::resumeParsingAfterYield() 232 { 233 ASSERT(!m_isPinnedToMainThread); 234 // pumpTokenizer can cause this parser to be detached from the Document, 235 // but we need to ensure it isn't deleted yet. 236 RefPtr<HTMLDocumentParser> protect(this); 237 238 if (m_haveBackgroundParser) { 239 pumpPendingSpeculations(); 240 return; 241 } 242 243 // We should never be here unless we can pump immediately. Call pumpTokenizer() 244 // directly so that ASSERTS will fire if we're wrong. 245 pumpTokenizer(AllowYield); 246 endIfDelayed(); 247 } 248 249 void HTMLDocumentParser::runScriptsForPausedTreeBuilder() 250 { 251 ASSERT(scriptingContentIsAllowed(parserContentPolicy())); 252 253 TextPosition scriptStartPosition = TextPosition::belowRangePosition(); 254 RefPtr<Element> scriptElement = m_treeBuilder->takeScriptToProcess(scriptStartPosition); 255 // We will not have a scriptRunner when parsing a DocumentFragment. 256 if (m_scriptRunner) 257 m_scriptRunner->execute(scriptElement.release(), scriptStartPosition); 258 } 259 260 bool HTMLDocumentParser::canTakeNextToken(SynchronousMode mode, PumpSession& session) 261 { 262 if (isStopped()) 263 return false; 264 265 ASSERT(!m_haveBackgroundParser || mode == ForceSynchronous); 266 267 if (isWaitingForScripts()) { 268 if (mode == AllowYield) 269 m_parserScheduler->checkForYieldBeforeScript(session); 270 271 // If we don't run the script, we cannot allow the next token to be taken. 272 if (session.needsYield) 273 return false; 274 275 // If we're paused waiting for a script, we try to execute scripts before continuing. 276 runScriptsForPausedTreeBuilder(); 277 if (isStopped()) 278 return false; 279 if (isWaitingForScripts()) 280 return false; 281 } 282 283 // FIXME: It's wrong for the HTMLDocumentParser to reach back to the 284 // Frame, but this approach is how the old parser handled 285 // stopping when the page assigns window.location. What really 286 // should happen is that assigning window.location causes the 287 // parser to stop parsing cleanly. The problem is we're not 288 // perpared to do that at every point where we run JavaScript. 289 if (!isParsingFragment() 290 && document()->frame() && document()->frame()->navigationScheduler()->locationChangePending()) 291 return false; 292 293 if (mode == AllowYield) 294 m_parserScheduler->checkForYieldBeforeToken(session); 295 296 return true; 297 } 298 299 void HTMLDocumentParser::didReceiveParsedChunkFromBackgroundParser(PassOwnPtr<ParsedChunk> chunk) 300 { 301 // alert(), runModalDialog, and the JavaScript Debugger all run nested event loops 302 // which can cause this method to be re-entered. We detect re-entry using 303 // hasActiveParser(), save the chunk as a speculation, and return. 304 if (isWaitingForScripts() || !m_speculations.isEmpty() || document()->activeParserCount() > 0) { 305 m_preloader->takeAndPreload(chunk->preloads); 306 m_speculations.append(chunk); 307 return; 308 } 309 310 // processParsedChunkFromBackgroundParser can cause this parser to be detached from the Document, 311 // but we need to ensure it isn't deleted yet. 312 RefPtr<HTMLDocumentParser> protect(this); 313 314 ASSERT(m_speculations.isEmpty()); 315 chunk->preloads.clear(); // We don't need to preload because we're going to parse immediately. 316 m_speculations.append(chunk); 317 pumpPendingSpeculations(); 318 } 319 320 void HTMLDocumentParser::validateSpeculations(PassOwnPtr<ParsedChunk> chunk) 321 { 322 ASSERT(chunk); 323 if (isWaitingForScripts()) { 324 // We're waiting on a network script, just save the chunk, we'll get 325 // a second validateSpeculations call after the script completes. 326 // This call should have been made immediately after runScriptsForPausedTreeBuilder 327 // which may have started a network load and left us waiting. 328 ASSERT(!m_lastChunkBeforeScript); 329 m_lastChunkBeforeScript = chunk; 330 return; 331 } 332 333 ASSERT(!m_lastChunkBeforeScript); 334 OwnPtr<HTMLTokenizer> tokenizer = m_tokenizer.release(); 335 OwnPtr<HTMLToken> token = m_token.release(); 336 337 if (!tokenizer) { 338 // There must not have been any changes to the HTMLTokenizer state on 339 // the main thread, which means the speculation buffer is correct. 340 return; 341 } 342 343 // Currently we're only smart enough to reuse the speculation buffer if the tokenizer 344 // both starts and ends in the DataState. That state is simplest because the HTMLToken 345 // is always in the Uninitialized state. We should consider whether we can reuse the 346 // speculation buffer in other states, but we'd likely need to do something more 347 // sophisticated with the HTMLToken. 348 if (chunk->tokenizerState == HTMLTokenizer::DataState 349 && tokenizer->state() == HTMLTokenizer::DataState 350 && m_input.current().isEmpty() 351 && chunk->treeBuilderState == HTMLTreeBuilderSimulator::stateFor(m_treeBuilder.get())) { 352 ASSERT(token->isUninitialized()); 353 return; 354 } 355 356 discardSpeculationsAndResumeFrom(chunk, token.release(), tokenizer.release()); 357 } 358 359 void HTMLDocumentParser::discardSpeculationsAndResumeFrom(PassOwnPtr<ParsedChunk> lastChunkBeforeScript, PassOwnPtr<HTMLToken> token, PassOwnPtr<HTMLTokenizer> tokenizer) 360 { 361 m_weakFactory.revokeAll(); 362 m_speculations.clear(); 363 364 OwnPtr<BackgroundHTMLParser::Checkpoint> checkpoint = adoptPtr(new BackgroundHTMLParser::Checkpoint); 365 checkpoint->parser = m_weakFactory.createWeakPtr(); 366 checkpoint->token = token; 367 checkpoint->tokenizer = tokenizer; 368 checkpoint->treeBuilderState = HTMLTreeBuilderSimulator::stateFor(m_treeBuilder.get()); 369 checkpoint->inputCheckpoint = lastChunkBeforeScript->inputCheckpoint; 370 checkpoint->preloadScannerCheckpoint = lastChunkBeforeScript->preloadScannerCheckpoint; 371 checkpoint->unparsedInput = m_input.current().toString().isolatedCopy(); 372 m_input.current().clear(); // FIXME: This should be passed in instead of cleared. 373 374 ASSERT(checkpoint->unparsedInput.isSafeToSendToAnotherThread()); 375 HTMLParserThread::shared()->postTask(bind(&BackgroundHTMLParser::resumeFrom, m_backgroundParser, checkpoint.release())); 376 } 377 378 void HTMLDocumentParser::processParsedChunkFromBackgroundParser(PassOwnPtr<ParsedChunk> popChunk) 379 { 380 ASSERT_WITH_SECURITY_IMPLICATION(!document()->activeParserCount()); 381 ASSERT(!isParsingFragment()); 382 ASSERT(!isWaitingForScripts()); 383 ASSERT(!isStopped()); 384 // ASSERT that this object is both attached to the Document and protected. 385 ASSERT(refCount() >= 2); 386 ASSERT(shouldUseThreading()); 387 ASSERT(!m_tokenizer); 388 ASSERT(!m_token); 389 ASSERT(!m_lastChunkBeforeScript); 390 391 ActiveParserSession session(contextForParsingSession()); 392 393 OwnPtr<ParsedChunk> chunk(popChunk); 394 OwnPtr<CompactHTMLTokenStream> tokens = chunk->tokens.release(); 395 396 HTMLParserThread::shared()->postTask(bind(&BackgroundHTMLParser::startedChunkWithCheckpoint, m_backgroundParser, chunk->inputCheckpoint)); 397 398 for (XSSInfoStream::const_iterator it = chunk->xssInfos.begin(); it != chunk->xssInfos.end(); ++it) { 399 m_textPosition = (*it)->m_textPosition; 400 m_xssAuditorDelegate.didBlockScript(**it); 401 if (isStopped()) 402 break; 403 } 404 405 for (Vector<CompactHTMLToken>::const_iterator it = tokens->begin(); it != tokens->end(); ++it) { 406 ASSERT(!isWaitingForScripts()); 407 408 if (!isParsingFragment() 409 && document()->frame() && document()->frame()->navigationScheduler()->locationChangePending()) { 410 411 // To match main-thread parser behavior (which never checks locationChangePending on the EOF path) 412 // we peek to see if this chunk has an EOF and process it anyway. 413 if (tokens->last().type() == HTMLToken::EndOfFile) { 414 ASSERT(m_speculations.isEmpty()); // There should never be any chunks after the EOF. 415 prepareToStopParsing(); 416 } 417 break; 418 } 419 420 m_textPosition = it->textPosition(); 421 422 constructTreeFromCompactHTMLToken(*it); 423 424 if (isStopped()) 425 break; 426 427 if (isWaitingForScripts()) { 428 ASSERT(it + 1 == tokens->end()); // The </script> is assumed to be the last token of this bunch. 429 runScriptsForPausedTreeBuilder(); 430 validateSpeculations(chunk.release()); 431 break; 432 } 433 434 if (it->type() == HTMLToken::EndOfFile) { 435 ASSERT(it + 1 == tokens->end()); // The EOF is assumed to be the last token of this bunch. 436 ASSERT(m_speculations.isEmpty()); // There should never be any chunks after the EOF. 437 prepareToStopParsing(); 438 break; 439 } 440 441 ASSERT(!m_tokenizer); 442 ASSERT(!m_token); 443 } 444 } 445 446 void HTMLDocumentParser::pumpPendingSpeculations() 447 { 448 // FIXME: Share this constant with the parser scheduler. 449 const double parserTimeLimit = 0.500; 450 451 // ASSERT that this object is both attached to the Document and protected. 452 ASSERT(refCount() >= 2); 453 // If this assert fails, you need to call validateSpeculations to make sure 454 // m_tokenizer and m_token don't have state that invalidates m_speculations. 455 ASSERT(!m_tokenizer); 456 ASSERT(!m_token); 457 ASSERT(!m_lastChunkBeforeScript); 458 ASSERT(!isWaitingForScripts()); 459 ASSERT(!isStopped()); 460 461 // FIXME: Pass in current input length. 462 InspectorInstrumentationCookie cookie = InspectorInstrumentation::willWriteHTML(document(), lineNumber().zeroBasedInt()); 463 464 double startTime = currentTime(); 465 466 while (!m_speculations.isEmpty()) { 467 processParsedChunkFromBackgroundParser(m_speculations.takeFirst()); 468 469 // The order matters! If this isStopped(), isWaitingForScripts() can hit and ASSERT since 470 // m_document can be null which is used to decide the readiness. 471 if (isStopped()) 472 break; 473 if (isWaitingForScripts()) 474 break; 475 476 if (currentTime() - startTime > parserTimeLimit && !m_speculations.isEmpty()) { 477 m_parserScheduler->scheduleForResume(); 478 break; 479 } 480 } 481 482 InspectorInstrumentation::didWriteHTML(cookie, lineNumber().zeroBasedInt()); 483 } 484 485 void HTMLDocumentParser::forcePlaintextForTextDocument() 486 { 487 if (shouldUseThreading()) { 488 // This method is called before any data is appended, so we have to start 489 // the background parser ourselves. 490 if (!m_haveBackgroundParser) 491 startBackgroundParser(); 492 493 HTMLParserThread::shared()->postTask(bind(&BackgroundHTMLParser::forcePlaintextForTextDocument, m_backgroundParser)); 494 } else 495 m_tokenizer->setState(HTMLTokenizer::PLAINTEXTState); 496 } 497 498 Document* HTMLDocumentParser::contextForParsingSession() 499 { 500 // The parsing session should interact with the document only when parsing 501 // non-fragments. Otherwise, we might delay the load event mistakenly. 502 if (isParsingFragment()) 503 return 0; 504 return document(); 505 } 506 507 void HTMLDocumentParser::pumpTokenizer(SynchronousMode mode) 508 { 509 ASSERT(!isStopped()); 510 ASSERT(!isScheduledForResume()); 511 // ASSERT that this object is both attached to the Document and protected. 512 ASSERT(refCount() >= 2); 513 ASSERT(m_tokenizer); 514 ASSERT(m_token); 515 ASSERT(!m_haveBackgroundParser || mode == ForceSynchronous); 516 517 PumpSession session(m_pumpSessionNestingLevel, contextForParsingSession()); 518 519 // We tell the InspectorInstrumentation about every pump, even if we 520 // end up pumping nothing. It can filter out empty pumps itself. 521 // FIXME: m_input.current().length() is only accurate if we 522 // end up parsing the whole buffer in this pump. We should pass how 523 // much we parsed as part of didWriteHTML instead of willWriteHTML. 524 InspectorInstrumentationCookie cookie = InspectorInstrumentation::willWriteHTML(document(), m_input.current().currentLine().zeroBasedInt()); 525 526 m_xssAuditor.init(document(), &m_xssAuditorDelegate); 527 528 while (canTakeNextToken(mode, session) && !session.needsYield) { 529 if (!isParsingFragment()) 530 m_sourceTracker.start(m_input.current(), m_tokenizer.get(), token()); 531 532 if (!m_tokenizer->nextToken(m_input.current(), token())) 533 break; 534 535 if (!isParsingFragment()) { 536 m_sourceTracker.end(m_input.current(), m_tokenizer.get(), token()); 537 538 // We do not XSS filter innerHTML, which means we (intentionally) fail 539 // http/tests/security/xssAuditor/dom-write-innerHTML.html 540 if (OwnPtr<XSSInfo> xssInfo = m_xssAuditor.filterToken(FilterTokenRequest(token(), m_sourceTracker, m_tokenizer->shouldAllowCDATA()))) 541 m_xssAuditorDelegate.didBlockScript(*xssInfo); 542 } 543 544 constructTreeFromHTMLToken(token()); 545 ASSERT(token().isUninitialized()); 546 } 547 548 // Ensure we haven't been totally deref'ed after pumping. Any caller of this 549 // function should be holding a RefPtr to this to ensure we weren't deleted. 550 ASSERT(refCount() >= 1); 551 552 if (isStopped()) 553 return; 554 555 if (session.needsYield) 556 m_parserScheduler->scheduleForResume(); 557 558 if (isWaitingForScripts()) { 559 ASSERT(m_tokenizer->state() == HTMLTokenizer::DataState); 560 if (!m_preloadScanner) { 561 m_preloadScanner = adoptPtr(new HTMLPreloadScanner(m_options, document()->url())); 562 m_preloadScanner->appendToEnd(m_input.current()); 563 } 564 m_preloadScanner->scan(m_preloader.get(), document()->baseElementURL()); 565 } 566 567 InspectorInstrumentation::didWriteHTML(cookie, m_input.current().currentLine().zeroBasedInt()); 568 } 569 570 void HTMLDocumentParser::constructTreeFromHTMLToken(HTMLToken& rawToken) 571 { 572 AtomicHTMLToken token(rawToken); 573 574 // We clear the rawToken in case constructTreeFromAtomicToken 575 // synchronously re-enters the parser. We don't clear the token immedately 576 // for Character tokens because the AtomicHTMLToken avoids copying the 577 // characters by keeping a pointer to the underlying buffer in the 578 // HTMLToken. Fortunately, Character tokens can't cause us to re-enter 579 // the parser. 580 // 581 // FIXME: Stop clearing the rawToken once we start running the parser off 582 // the main thread or once we stop allowing synchronous JavaScript 583 // execution from parseAttribute. 584 if (rawToken.type() != HTMLToken::Character) 585 rawToken.clear(); 586 587 m_treeBuilder->constructTree(&token); 588 589 if (!rawToken.isUninitialized()) { 590 ASSERT(rawToken.type() == HTMLToken::Character); 591 rawToken.clear(); 592 } 593 } 594 595 void HTMLDocumentParser::constructTreeFromCompactHTMLToken(const CompactHTMLToken& compactToken) 596 { 597 AtomicHTMLToken token(compactToken); 598 m_treeBuilder->constructTree(&token); 599 } 600 601 bool HTMLDocumentParser::hasInsertionPoint() 602 { 603 // FIXME: The wasCreatedByScript() branch here might not be fully correct. 604 // Our model of the EOF character differs slightly from the one in 605 // the spec because our treatment is uniform between network-sourced 606 // and script-sourced input streams whereas the spec treats them 607 // differently. 608 return m_input.hasInsertionPoint() || (wasCreatedByScript() && !m_input.haveSeenEndOfFile()); 609 } 610 611 void HTMLDocumentParser::insert(const SegmentedString& source) 612 { 613 if (isStopped()) 614 return; 615 616 // pumpTokenizer can cause this parser to be detached from the Document, 617 // but we need to ensure it isn't deleted yet. 618 RefPtr<HTMLDocumentParser> protect(this); 619 620 if (!m_tokenizer) { 621 ASSERT(!inPumpSession()); 622 ASSERT(m_haveBackgroundParser || wasCreatedByScript()); 623 m_token = adoptPtr(new HTMLToken); 624 m_tokenizer = HTMLTokenizer::create(m_options); 625 } 626 627 SegmentedString excludedLineNumberSource(source); 628 excludedLineNumberSource.setExcludeLineNumbers(); 629 m_input.insertAtCurrentInsertionPoint(excludedLineNumberSource); 630 pumpTokenizerIfPossible(ForceSynchronous); 631 632 if (isWaitingForScripts()) { 633 // Check the document.write() output with a separate preload scanner as 634 // the main scanner can't deal with insertions. 635 if (!m_insertionPreloadScanner) 636 m_insertionPreloadScanner = adoptPtr(new HTMLPreloadScanner(m_options, document()->url())); 637 m_insertionPreloadScanner->appendToEnd(source); 638 m_insertionPreloadScanner->scan(m_preloader.get(), document()->baseElementURL()); 639 } 640 641 endIfDelayed(); 642 } 643 644 void HTMLDocumentParser::startBackgroundParser() 645 { 646 ASSERT(shouldUseThreading()); 647 ASSERT(!m_haveBackgroundParser); 648 m_haveBackgroundParser = true; 649 650 HTMLIdentifier::init(); 651 652 RefPtr<WeakReference<BackgroundHTMLParser> > reference = WeakReference<BackgroundHTMLParser>::createUnbound(); 653 m_backgroundParser = WeakPtr<BackgroundHTMLParser>(reference); 654 655 OwnPtr<BackgroundHTMLParser::Configuration> config = adoptPtr(new BackgroundHTMLParser::Configuration); 656 config->options = m_options; 657 config->parser = m_weakFactory.createWeakPtr(); 658 config->xssAuditor = adoptPtr(new XSSAuditor); 659 config->xssAuditor->init(document(), &m_xssAuditorDelegate); 660 config->preloadScanner = adoptPtr(new TokenPreloadScanner(document()->url().copy())); 661 662 ASSERT(config->xssAuditor->isSafeToSendToAnotherThread()); 663 ASSERT(config->preloadScanner->isSafeToSendToAnotherThread()); 664 HTMLParserThread::shared()->postTask(bind(&BackgroundHTMLParser::create, reference.release(), config.release())); 665 } 666 667 void HTMLDocumentParser::stopBackgroundParser() 668 { 669 ASSERT(shouldUseThreading()); 670 ASSERT(m_haveBackgroundParser); 671 m_haveBackgroundParser = false; 672 673 HTMLParserThread::shared()->postTask(bind(&BackgroundHTMLParser::stop, m_backgroundParser)); 674 m_weakFactory.revokeAll(); 675 } 676 677 void HTMLDocumentParser::append(PassRefPtr<StringImpl> inputSource) 678 { 679 if (isStopped()) 680 return; 681 682 if (shouldUseThreading()) { 683 if (!m_haveBackgroundParser) 684 startBackgroundParser(); 685 686 ASSERT(inputSource->hasOneRef()); 687 TRACE_EVENT1("net", "HTMLDocumentParser::append", "size", inputSource->length()); 688 // NOTE: Important that the String temporary is destroyed before we post the task 689 // otherwise the String could call deref() on a StringImpl now owned by the background parser. 690 // We would like to ASSERT(closure.arg3()->hasOneRef()) but sadly the args are private. 691 Closure closure = bind(&BackgroundHTMLParser::append, m_backgroundParser, String(inputSource)); 692 HTMLParserThread::shared()->postTask(closure); 693 return; 694 } 695 696 // pumpTokenizer can cause this parser to be detached from the Document, 697 // but we need to ensure it isn't deleted yet. 698 RefPtr<HTMLDocumentParser> protect(this); 699 TRACE_EVENT1("net", "HTMLDocumentParser::append", "size", inputSource->length()); 700 String source(inputSource); 701 702 if (m_preloadScanner) { 703 if (m_input.current().isEmpty() && !isWaitingForScripts()) { 704 // We have parsed until the end of the current input and so are now moving ahead of the preload scanner. 705 // Clear the scanner so we know to scan starting from the current input point if we block again. 706 m_preloadScanner.clear(); 707 } else { 708 m_preloadScanner->appendToEnd(source); 709 if (isWaitingForScripts()) 710 m_preloadScanner->scan(m_preloader.get(), document()->baseElementURL()); 711 } 712 } 713 714 m_input.appendToEnd(source); 715 716 if (inPumpSession()) { 717 // We've gotten data off the network in a nested write. 718 // We don't want to consume any more of the input stream now. Do 719 // not worry. We'll consume this data in a less-nested write(). 720 return; 721 } 722 723 // A couple pinToMainThread() callers require synchronous parsing, but can't 724 // easily use the insert() method, so we hack append() for them to be synchronous. 725 // javascript: url handling is one such caller. 726 // FIXME: This is gross, and we should separate the concept of synchronous parsing 727 // from insert() so that only document.write() uses insert. 728 if (m_isPinnedToMainThread) 729 pumpTokenizerIfPossible(ForceSynchronous); 730 else 731 pumpTokenizerIfPossible(AllowYield); 732 733 endIfDelayed(); 734 } 735 736 void HTMLDocumentParser::end() 737 { 738 ASSERT(!isDetached()); 739 ASSERT(!isScheduledForResume()); 740 741 if (m_haveBackgroundParser) 742 stopBackgroundParser(); 743 744 // Informs the the rest of WebCore that parsing is really finished (and deletes this). 745 m_treeBuilder->finished(); 746 } 747 748 void HTMLDocumentParser::attemptToRunDeferredScriptsAndEnd() 749 { 750 ASSERT(isStopping()); 751 // FIXME: It may not be correct to disable this for the background parser. 752 // That means hasInsertionPoint() may not be correct in some cases. 753 ASSERT(!hasInsertionPoint() || m_haveBackgroundParser); 754 if (m_scriptRunner && !m_scriptRunner->executeScriptsWaitingForParsing()) 755 return; 756 end(); 757 } 758 759 void HTMLDocumentParser::attemptToEnd() 760 { 761 // finish() indicates we will not receive any more data. If we are waiting on 762 // an external script to load, we can't finish parsing quite yet. 763 764 if (shouldDelayEnd()) { 765 m_endWasDelayed = true; 766 return; 767 } 768 prepareToStopParsing(); 769 } 770 771 void HTMLDocumentParser::endIfDelayed() 772 { 773 // If we've already been detached, don't bother ending. 774 if (isDetached()) 775 return; 776 777 if (!m_endWasDelayed || shouldDelayEnd()) 778 return; 779 780 m_endWasDelayed = false; 781 prepareToStopParsing(); 782 } 783 784 void HTMLDocumentParser::finish() 785 { 786 // FIXME: We should ASSERT(!m_parserStopped) here, since it does not 787 // makes sense to call any methods on DocumentParser once it's been stopped. 788 // However, FrameLoader::stop calls DocumentParser::finish unconditionally. 789 790 // Empty documents never got an append() call, and thus have never started 791 // a background parser. In those cases, we ignore shouldUseThreading() 792 // and fall through to the non-threading case. 793 if (m_haveBackgroundParser) { 794 if (!m_input.haveSeenEndOfFile()) 795 m_input.closeWithoutMarkingEndOfFile(); 796 HTMLParserThread::shared()->postTask(bind(&BackgroundHTMLParser::finish, m_backgroundParser)); 797 return; 798 } 799 800 if (!m_tokenizer) { 801 ASSERT(!m_token); 802 // We're finishing before receiving any data. Rather than booting up 803 // the background parser just to spin it down, we finish parsing 804 // synchronously. 805 m_token = adoptPtr(new HTMLToken); 806 m_tokenizer = HTMLTokenizer::create(m_options); 807 } 808 809 // We're not going to get any more data off the network, so we tell the 810 // input stream we've reached the end of file. finish() can be called more 811 // than once, if the first time does not call end(). 812 if (!m_input.haveSeenEndOfFile()) 813 m_input.markEndOfFile(); 814 815 attemptToEnd(); 816 } 817 818 bool HTMLDocumentParser::isExecutingScript() const 819 { 820 if (!m_scriptRunner) 821 return false; 822 return m_scriptRunner->isExecutingScript(); 823 } 824 825 OrdinalNumber HTMLDocumentParser::lineNumber() const 826 { 827 if (m_haveBackgroundParser) 828 return m_textPosition.m_line; 829 830 return m_input.current().currentLine(); 831 } 832 833 TextPosition HTMLDocumentParser::textPosition() const 834 { 835 if (m_haveBackgroundParser) 836 return m_textPosition; 837 838 const SegmentedString& currentString = m_input.current(); 839 OrdinalNumber line = currentString.currentLine(); 840 OrdinalNumber column = currentString.currentColumn(); 841 842 return TextPosition(line, column); 843 } 844 845 bool HTMLDocumentParser::isWaitingForScripts() const 846 { 847 // When the TreeBuilder encounters a </script> tag, it returns to the HTMLDocumentParser 848 // where the script is transfered from the treebuilder to the script runner. 849 // The script runner will hold the script until its loaded and run. During 850 // any of this time, we want to count ourselves as "waiting for a script" and thus 851 // run the preload scanner, as well as delay completion of parsing. 852 bool treeBuilderHasBlockingScript = m_treeBuilder->hasParserBlockingScript(); 853 bool scriptRunnerHasBlockingScript = m_scriptRunner && m_scriptRunner->hasParserBlockingScript(); 854 // Since the parser is paused while a script runner has a blocking script, it should 855 // never be possible to end up with both objects holding a blocking script. 856 ASSERT(!(treeBuilderHasBlockingScript && scriptRunnerHasBlockingScript)); 857 // If either object has a blocking script, the parser should be paused. 858 return treeBuilderHasBlockingScript || scriptRunnerHasBlockingScript; 859 } 860 861 void HTMLDocumentParser::resumeParsingAfterScriptExecution() 862 { 863 ASSERT(!isExecutingScript()); 864 ASSERT(!isWaitingForScripts()); 865 866 if (m_haveBackgroundParser) { 867 validateSpeculations(m_lastChunkBeforeScript.release()); 868 ASSERT(!m_lastChunkBeforeScript); 869 // processParsedChunkFromBackgroundParser can cause this parser to be detached from the Document, 870 // but we need to ensure it isn't deleted yet. 871 RefPtr<HTMLDocumentParser> protect(this); 872 pumpPendingSpeculations(); 873 return; 874 } 875 876 m_insertionPreloadScanner.clear(); 877 pumpTokenizerIfPossible(AllowYield); 878 endIfDelayed(); 879 } 880 881 void HTMLDocumentParser::watchForLoad(Resource* resource) 882 { 883 ASSERT(!resource->isLoaded()); 884 // addClient would call notifyFinished if the load were complete. 885 // Callers do not expect to be re-entered from this call, so they should 886 // not an already-loaded Resource. 887 resource->addClient(this); 888 } 889 890 void HTMLDocumentParser::stopWatchingForLoad(Resource* resource) 891 { 892 resource->removeClient(this); 893 } 894 895 void HTMLDocumentParser::appendCurrentInputStreamToPreloadScannerAndScan() 896 { 897 ASSERT(m_preloadScanner); 898 m_preloadScanner->appendToEnd(m_input.current()); 899 m_preloadScanner->scan(m_preloader.get(), document()->baseElementURL()); 900 } 901 902 void HTMLDocumentParser::notifyFinished(Resource* cachedResource) 903 { 904 // pumpTokenizer can cause this parser to be detached from the Document, 905 // but we need to ensure it isn't deleted yet. 906 RefPtr<HTMLDocumentParser> protect(this); 907 908 ASSERT(m_scriptRunner); 909 ASSERT(!isExecutingScript()); 910 if (isStopping()) { 911 attemptToRunDeferredScriptsAndEnd(); 912 return; 913 } 914 915 m_scriptRunner->executeScriptsWaitingForLoad(cachedResource); 916 if (!isWaitingForScripts()) 917 resumeParsingAfterScriptExecution(); 918 } 919 920 void HTMLDocumentParser::executeScriptsWaitingForResources() 921 { 922 // Document only calls this when the Document owns the DocumentParser 923 // so this will not be called in the DocumentFragment case. 924 ASSERT(m_scriptRunner); 925 // Ignore calls unless we have a script blocking the parser waiting on a 926 // stylesheet load. Otherwise we are currently parsing and this 927 // is a re-entrant call from encountering a </ style> tag. 928 if (!m_scriptRunner->hasScriptsWaitingForResources()) 929 return; 930 931 // pumpTokenizer can cause this parser to be detached from the Document, 932 // but we need to ensure it isn't deleted yet. 933 RefPtr<HTMLDocumentParser> protect(this); 934 m_scriptRunner->executeScriptsWaitingForResources(); 935 if (!isWaitingForScripts()) 936 resumeParsingAfterScriptExecution(); 937 } 938 939 void HTMLDocumentParser::parseDocumentFragment(const String& source, DocumentFragment* fragment, Element* contextElement, ParserContentPolicy parserContentPolicy) 940 { 941 RefPtr<HTMLDocumentParser> parser = HTMLDocumentParser::create(fragment, contextElement, parserContentPolicy); 942 parser->insert(source); // Use insert() so that the parser will not yield. 943 parser->finish(); 944 ASSERT(!parser->processingData()); // Make sure we're done. <rdar://problem/3963151> 945 parser->detach(); // Allows ~DocumentParser to assert it was detached before destruction. 946 } 947 948 void HTMLDocumentParser::suspendScheduledTasks() 949 { 950 if (m_parserScheduler) 951 m_parserScheduler->suspend(); 952 } 953 954 void HTMLDocumentParser::resumeScheduledTasks() 955 { 956 if (m_parserScheduler) 957 m_parserScheduler->resume(); 958 } 959 960 } 961