1 /* 2 * Copyright (C) 2010 Google, Inc. All Rights Reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY 14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR 17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26 #include "config.h" 27 #include "core/html/parser/HTMLDocumentParser.h" 28 29 #include "HTMLNames.h" 30 #include "core/dom/DocumentFragment.h" 31 #include "core/dom/Element.h" 32 #include "core/html/HTMLDocument.h" 33 #include "core/html/parser/AtomicHTMLToken.h" 34 #include "core/html/parser/BackgroundHTMLParser.h" 35 #include "core/html/parser/HTMLParserScheduler.h" 36 #include "core/html/parser/HTMLParserThread.h" 37 #include "core/html/parser/HTMLScriptRunner.h" 38 #include "core/html/parser/HTMLTreeBuilder.h" 39 #include "core/inspector/InspectorInstrumentation.h" 40 #include "core/frame/Frame.h" 41 #include "platform/TraceEvent.h" 42 #include "wtf/Functional.h" 43 44 namespace WebCore { 45 46 using namespace HTMLNames; 47 48 // This is a direct transcription of step 4 from: 49 // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#fragment-case 50 static HTMLTokenizer::State tokenizerStateForContextElement(Element* contextElement, bool reportErrors, const HTMLParserOptions& options) 51 { 52 if (!contextElement) 53 return HTMLTokenizer::DataState; 54 55 const QualifiedName& contextTag = contextElement->tagQName(); 56 57 if (contextTag.matches(titleTag) || contextTag.matches(textareaTag)) 58 return HTMLTokenizer::RCDATAState; 59 if (contextTag.matches(styleTag) 60 || contextTag.matches(xmpTag) 61 || contextTag.matches(iframeTag) 62 || (contextTag.matches(noembedTag) && options.pluginsEnabled) 63 || (contextTag.matches(noscriptTag) && options.scriptEnabled) 64 || contextTag.matches(noframesTag)) 65 return reportErrors ? HTMLTokenizer::RAWTEXTState : HTMLTokenizer::PLAINTEXTState; 66 if (contextTag.matches(scriptTag)) 67 return reportErrors ? HTMLTokenizer::ScriptDataState : HTMLTokenizer::PLAINTEXTState; 68 if (contextTag.matches(plaintextTag)) 69 return HTMLTokenizer::PLAINTEXTState; 70 return HTMLTokenizer::DataState; 71 } 72 73 HTMLDocumentParser::HTMLDocumentParser(HTMLDocument* document, bool reportErrors) 74 : ScriptableDocumentParser(document) 75 , m_options(document) 76 , m_token(m_options.useThreading ? nullptr : adoptPtr(new HTMLToken)) 77 , m_tokenizer(m_options.useThreading ? nullptr : HTMLTokenizer::create(m_options)) 78 , m_scriptRunner(HTMLScriptRunner::create(document, this)) 79 , m_treeBuilder(HTMLTreeBuilder::create(this, document, parserContentPolicy(), reportErrors, m_options)) 80 , m_parserScheduler(HTMLParserScheduler::create(this)) 81 , m_xssAuditorDelegate(document) 82 , m_weakFactory(this) 83 , m_preloader(adoptPtr(new HTMLResourcePreloader(document))) 84 , m_isPinnedToMainThread(false) 85 , m_endWasDelayed(false) 86 , m_haveBackgroundParser(false) 87 , m_pumpSessionNestingLevel(0) 88 { 89 ASSERT(shouldUseThreading() || (m_token && m_tokenizer)); 90 } 91 92 // FIXME: Member variables should be grouped into self-initializing structs to 93 // minimize code duplication between these constructors. 94 HTMLDocumentParser::HTMLDocumentParser(DocumentFragment* fragment, Element* contextElement, ParserContentPolicy parserContentPolicy) 95 : ScriptableDocumentParser(&fragment->document(), parserContentPolicy) 96 , m_options(&fragment->document()) 97 , m_token(adoptPtr(new HTMLToken)) 98 , m_tokenizer(HTMLTokenizer::create(m_options)) 99 , m_treeBuilder(HTMLTreeBuilder::create(this, fragment, contextElement, this->parserContentPolicy(), m_options)) 100 , m_xssAuditorDelegate(&fragment->document()) 101 , m_weakFactory(this) 102 , m_isPinnedToMainThread(true) 103 , m_endWasDelayed(false) 104 , m_haveBackgroundParser(false) 105 , m_pumpSessionNestingLevel(0) 106 { 107 ASSERT(!shouldUseThreading()); 108 bool reportErrors = false; // For now document fragment parsing never reports errors. 109 m_tokenizer->setState(tokenizerStateForContextElement(contextElement, reportErrors, m_options)); 110 m_xssAuditor.initForFragment(); 111 } 112 113 HTMLDocumentParser::~HTMLDocumentParser() 114 { 115 ASSERT(!m_parserScheduler); 116 ASSERT(!m_pumpSessionNestingLevel); 117 ASSERT(!m_preloadScanner); 118 ASSERT(!m_insertionPreloadScanner); 119 ASSERT(!m_haveBackgroundParser); 120 // FIXME: We should be able to ASSERT(m_speculations.isEmpty()), 121 // but there are cases where that's not true currently. For example, 122 // we we're told to stop parsing before we've consumed all the input. 123 } 124 125 void HTMLDocumentParser::pinToMainThread() 126 { 127 ASSERT(!m_haveBackgroundParser); 128 ASSERT(!m_isPinnedToMainThread); 129 m_isPinnedToMainThread = true; 130 if (!m_tokenizer) { 131 ASSERT(!m_token); 132 m_token = adoptPtr(new HTMLToken); 133 m_tokenizer = HTMLTokenizer::create(m_options); 134 } 135 } 136 137 void HTMLDocumentParser::detach() 138 { 139 if (m_haveBackgroundParser) 140 stopBackgroundParser(); 141 DocumentParser::detach(); 142 if (m_scriptRunner) 143 m_scriptRunner->detach(); 144 m_treeBuilder->detach(); 145 // FIXME: It seems wrong that we would have a preload scanner here. 146 // Yet during fast/dom/HTMLScriptElement/script-load-events.html we do. 147 m_preloadScanner.clear(); 148 m_insertionPreloadScanner.clear(); 149 m_parserScheduler.clear(); // Deleting the scheduler will clear any timers. 150 } 151 152 void HTMLDocumentParser::stopParsing() 153 { 154 DocumentParser::stopParsing(); 155 m_parserScheduler.clear(); // Deleting the scheduler will clear any timers. 156 if (m_haveBackgroundParser) 157 stopBackgroundParser(); 158 } 159 160 // This kicks off "Once the user agent stops parsing" as described by: 161 // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#the-end 162 void HTMLDocumentParser::prepareToStopParsing() 163 { 164 // FIXME: It may not be correct to disable this for the background parser. 165 // That means hasInsertionPoint() may not be correct in some cases. 166 ASSERT(!hasInsertionPoint() || m_haveBackgroundParser); 167 168 // pumpTokenizer can cause this parser to be detached from the Document, 169 // but we need to ensure it isn't deleted yet. 170 RefPtr<HTMLDocumentParser> protect(this); 171 172 // NOTE: This pump should only ever emit buffered character tokens, 173 // so ForceSynchronous vs. AllowYield should be meaningless. 174 if (m_tokenizer) { 175 ASSERT(!m_haveBackgroundParser); 176 pumpTokenizerIfPossible(ForceSynchronous); 177 } 178 179 if (isStopped()) 180 return; 181 182 DocumentParser::prepareToStopParsing(); 183 184 // We will not have a scriptRunner when parsing a DocumentFragment. 185 if (m_scriptRunner) 186 document()->setReadyState(Document::Interactive); 187 188 // Setting the ready state above can fire mutation event and detach us 189 // from underneath. In that case, just bail out. 190 if (isDetached()) 191 return; 192 193 attemptToRunDeferredScriptsAndEnd(); 194 } 195 196 bool HTMLDocumentParser::isParsingFragment() const 197 { 198 return m_treeBuilder->isParsingFragment(); 199 } 200 201 bool HTMLDocumentParser::processingData() const 202 { 203 return isScheduledForResume() || inPumpSession() || m_haveBackgroundParser; 204 } 205 206 void HTMLDocumentParser::pumpTokenizerIfPossible(SynchronousMode mode) 207 { 208 if (isStopped()) 209 return; 210 if (isWaitingForScripts()) 211 return; 212 213 // Once a resume is scheduled, HTMLParserScheduler controls when we next pump. 214 if (isScheduledForResume()) { 215 ASSERT(mode == AllowYield); 216 return; 217 } 218 219 pumpTokenizer(mode); 220 } 221 222 bool HTMLDocumentParser::isScheduledForResume() const 223 { 224 return m_parserScheduler && m_parserScheduler->isScheduledForResume(); 225 } 226 227 // Used by HTMLParserScheduler 228 void HTMLDocumentParser::resumeParsingAfterYield() 229 { 230 ASSERT(!m_isPinnedToMainThread); 231 // pumpTokenizer can cause this parser to be detached from the Document, 232 // but we need to ensure it isn't deleted yet. 233 RefPtr<HTMLDocumentParser> protect(this); 234 235 if (m_haveBackgroundParser) { 236 pumpPendingSpeculations(); 237 return; 238 } 239 240 // We should never be here unless we can pump immediately. Call pumpTokenizer() 241 // directly so that ASSERTS will fire if we're wrong. 242 pumpTokenizer(AllowYield); 243 endIfDelayed(); 244 } 245 246 void HTMLDocumentParser::runScriptsForPausedTreeBuilder() 247 { 248 ASSERT(scriptingContentIsAllowed(parserContentPolicy())); 249 250 TextPosition scriptStartPosition = TextPosition::belowRangePosition(); 251 RefPtr<Element> scriptElement = m_treeBuilder->takeScriptToProcess(scriptStartPosition); 252 // We will not have a scriptRunner when parsing a DocumentFragment. 253 if (m_scriptRunner) 254 m_scriptRunner->execute(scriptElement.release(), scriptStartPosition); 255 } 256 257 bool HTMLDocumentParser::canTakeNextToken(SynchronousMode mode, PumpSession& session) 258 { 259 if (isStopped()) 260 return false; 261 262 ASSERT(!m_haveBackgroundParser || mode == ForceSynchronous); 263 264 if (isWaitingForScripts()) { 265 if (mode == AllowYield) 266 m_parserScheduler->checkForYieldBeforeScript(session); 267 268 // If we don't run the script, we cannot allow the next token to be taken. 269 if (session.needsYield) 270 return false; 271 272 // If we're paused waiting for a script, we try to execute scripts before continuing. 273 runScriptsForPausedTreeBuilder(); 274 if (isStopped()) 275 return false; 276 if (isWaitingForScripts()) 277 return false; 278 } 279 280 // FIXME: It's wrong for the HTMLDocumentParser to reach back to the 281 // Frame, but this approach is how the old parser handled 282 // stopping when the page assigns window.location. What really 283 // should happen is that assigning window.location causes the 284 // parser to stop parsing cleanly. The problem is we're not 285 // perpared to do that at every point where we run JavaScript. 286 if (!isParsingFragment() 287 && document()->frame() && document()->frame()->navigationScheduler().locationChangePending()) 288 return false; 289 290 if (mode == AllowYield) 291 m_parserScheduler->checkForYieldBeforeToken(session); 292 293 return true; 294 } 295 296 void HTMLDocumentParser::didReceiveParsedChunkFromBackgroundParser(PassOwnPtr<ParsedChunk> chunk) 297 { 298 TRACE_EVENT0("webkit", "HTMLDocumentParser::didReceiveParsedChunkFromBackgroundParser"); 299 300 // alert(), runModalDialog, and the JavaScript Debugger all run nested event loops 301 // which can cause this method to be re-entered. We detect re-entry using 302 // hasActiveParser(), save the chunk as a speculation, and return. 303 if (isWaitingForScripts() || !m_speculations.isEmpty() || document()->activeParserCount() > 0) { 304 m_preloader->takeAndPreload(chunk->preloads); 305 m_speculations.append(chunk); 306 return; 307 } 308 309 // processParsedChunkFromBackgroundParser can cause this parser to be detached from the Document, 310 // but we need to ensure it isn't deleted yet. 311 RefPtr<HTMLDocumentParser> protect(this); 312 313 ASSERT(m_speculations.isEmpty()); 314 chunk->preloads.clear(); // We don't need to preload because we're going to parse immediately. 315 m_speculations.append(chunk); 316 pumpPendingSpeculations(); 317 } 318 319 void HTMLDocumentParser::validateSpeculations(PassOwnPtr<ParsedChunk> chunk) 320 { 321 ASSERT(chunk); 322 if (isWaitingForScripts()) { 323 // We're waiting on a network script, just save the chunk, we'll get 324 // a second validateSpeculations call after the script completes. 325 // This call should have been made immediately after runScriptsForPausedTreeBuilder 326 // which may have started a network load and left us waiting. 327 ASSERT(!m_lastChunkBeforeScript); 328 m_lastChunkBeforeScript = chunk; 329 return; 330 } 331 332 ASSERT(!m_lastChunkBeforeScript); 333 OwnPtr<HTMLTokenizer> tokenizer = m_tokenizer.release(); 334 OwnPtr<HTMLToken> token = m_token.release(); 335 336 if (!tokenizer) { 337 // There must not have been any changes to the HTMLTokenizer state on 338 // the main thread, which means the speculation buffer is correct. 339 return; 340 } 341 342 // Currently we're only smart enough to reuse the speculation buffer if the tokenizer 343 // both starts and ends in the DataState. That state is simplest because the HTMLToken 344 // is always in the Uninitialized state. We should consider whether we can reuse the 345 // speculation buffer in other states, but we'd likely need to do something more 346 // sophisticated with the HTMLToken. 347 if (chunk->tokenizerState == HTMLTokenizer::DataState 348 && tokenizer->state() == HTMLTokenizer::DataState 349 && m_input.current().isEmpty() 350 && chunk->treeBuilderState == HTMLTreeBuilderSimulator::stateFor(m_treeBuilder.get())) { 351 ASSERT(token->isUninitialized()); 352 return; 353 } 354 355 discardSpeculationsAndResumeFrom(chunk, token.release(), tokenizer.release()); 356 } 357 358 void HTMLDocumentParser::discardSpeculationsAndResumeFrom(PassOwnPtr<ParsedChunk> lastChunkBeforeScript, PassOwnPtr<HTMLToken> token, PassOwnPtr<HTMLTokenizer> tokenizer) 359 { 360 m_weakFactory.revokeAll(); 361 m_speculations.clear(); 362 363 OwnPtr<BackgroundHTMLParser::Checkpoint> checkpoint = adoptPtr(new BackgroundHTMLParser::Checkpoint); 364 checkpoint->parser = m_weakFactory.createWeakPtr(); 365 checkpoint->token = token; 366 checkpoint->tokenizer = tokenizer; 367 checkpoint->treeBuilderState = HTMLTreeBuilderSimulator::stateFor(m_treeBuilder.get()); 368 checkpoint->inputCheckpoint = lastChunkBeforeScript->inputCheckpoint; 369 checkpoint->preloadScannerCheckpoint = lastChunkBeforeScript->preloadScannerCheckpoint; 370 checkpoint->unparsedInput = m_input.current().toString().isolatedCopy(); 371 m_input.current().clear(); // FIXME: This should be passed in instead of cleared. 372 373 ASSERT(checkpoint->unparsedInput.isSafeToSendToAnotherThread()); 374 HTMLParserThread::shared()->postTask(bind(&BackgroundHTMLParser::resumeFrom, m_backgroundParser, checkpoint.release())); 375 } 376 377 void HTMLDocumentParser::processParsedChunkFromBackgroundParser(PassOwnPtr<ParsedChunk> popChunk) 378 { 379 TRACE_EVENT0("webkit", "HTMLDocumentParser::processParsedChunkFromBackgroundParser"); 380 381 ASSERT_WITH_SECURITY_IMPLICATION(!document()->activeParserCount()); 382 ASSERT(!isParsingFragment()); 383 ASSERT(!isWaitingForScripts()); 384 ASSERT(!isStopped()); 385 // ASSERT that this object is both attached to the Document and protected. 386 ASSERT(refCount() >= 2); 387 ASSERT(shouldUseThreading()); 388 ASSERT(!m_tokenizer); 389 ASSERT(!m_token); 390 ASSERT(!m_lastChunkBeforeScript); 391 392 ActiveParserSession session(contextForParsingSession()); 393 394 OwnPtr<ParsedChunk> chunk(popChunk); 395 OwnPtr<CompactHTMLTokenStream> tokens = chunk->tokens.release(); 396 397 HTMLParserThread::shared()->postTask(bind(&BackgroundHTMLParser::startedChunkWithCheckpoint, m_backgroundParser, chunk->inputCheckpoint)); 398 399 for (XSSInfoStream::const_iterator it = chunk->xssInfos.begin(); it != chunk->xssInfos.end(); ++it) { 400 m_textPosition = (*it)->m_textPosition; 401 m_xssAuditorDelegate.didBlockScript(**it); 402 if (isStopped()) 403 break; 404 } 405 406 for (Vector<CompactHTMLToken>::const_iterator it = tokens->begin(); it != tokens->end(); ++it) { 407 ASSERT(!isWaitingForScripts()); 408 409 if (!isParsingFragment() 410 && document()->frame() && document()->frame()->navigationScheduler().locationChangePending()) { 411 412 // To match main-thread parser behavior (which never checks locationChangePending on the EOF path) 413 // we peek to see if this chunk has an EOF and process it anyway. 414 if (tokens->last().type() == HTMLToken::EndOfFile) { 415 ASSERT(m_speculations.isEmpty()); // There should never be any chunks after the EOF. 416 prepareToStopParsing(); 417 } 418 break; 419 } 420 421 m_textPosition = it->textPosition(); 422 423 constructTreeFromCompactHTMLToken(*it); 424 425 if (isStopped()) 426 break; 427 428 if (isWaitingForScripts()) { 429 ASSERT(it + 1 == tokens->end()); // The </script> is assumed to be the last token of this bunch. 430 runScriptsForPausedTreeBuilder(); 431 validateSpeculations(chunk.release()); 432 break; 433 } 434 435 if (it->type() == HTMLToken::EndOfFile) { 436 ASSERT(it + 1 == tokens->end()); // The EOF is assumed to be the last token of this bunch. 437 ASSERT(m_speculations.isEmpty()); // There should never be any chunks after the EOF. 438 prepareToStopParsing(); 439 break; 440 } 441 442 ASSERT(!m_tokenizer); 443 ASSERT(!m_token); 444 } 445 } 446 447 void HTMLDocumentParser::pumpPendingSpeculations() 448 { 449 // FIXME: Share this constant with the parser scheduler. 450 const double parserTimeLimit = 0.500; 451 452 // ASSERT that this object is both attached to the Document and protected. 453 ASSERT(refCount() >= 2); 454 // If this assert fails, you need to call validateSpeculations to make sure 455 // m_tokenizer and m_token don't have state that invalidates m_speculations. 456 ASSERT(!m_tokenizer); 457 ASSERT(!m_token); 458 ASSERT(!m_lastChunkBeforeScript); 459 ASSERT(!isWaitingForScripts()); 460 ASSERT(!isStopped()); 461 462 // FIXME: Pass in current input length. 463 InspectorInstrumentationCookie cookie = InspectorInstrumentation::willWriteHTML(document(), lineNumber().zeroBasedInt()); 464 465 double startTime = currentTime(); 466 467 while (!m_speculations.isEmpty()) { 468 processParsedChunkFromBackgroundParser(m_speculations.takeFirst()); 469 470 // The order matters! If this isStopped(), isWaitingForScripts() can hit and ASSERT since 471 // m_document can be null which is used to decide the readiness. 472 if (isStopped()) 473 break; 474 if (isWaitingForScripts()) 475 break; 476 477 if (currentTime() - startTime > parserTimeLimit && !m_speculations.isEmpty()) { 478 m_parserScheduler->scheduleForResume(); 479 break; 480 } 481 } 482 483 InspectorInstrumentation::didWriteHTML(cookie, lineNumber().zeroBasedInt()); 484 } 485 486 void HTMLDocumentParser::forcePlaintextForTextDocument() 487 { 488 if (shouldUseThreading()) { 489 // This method is called before any data is appended, so we have to start 490 // the background parser ourselves. 491 if (!m_haveBackgroundParser) 492 startBackgroundParser(); 493 494 HTMLParserThread::shared()->postTask(bind(&BackgroundHTMLParser::forcePlaintextForTextDocument, m_backgroundParser)); 495 } else 496 m_tokenizer->setState(HTMLTokenizer::PLAINTEXTState); 497 } 498 499 Document* HTMLDocumentParser::contextForParsingSession() 500 { 501 // The parsing session should interact with the document only when parsing 502 // non-fragments. Otherwise, we might delay the load event mistakenly. 503 if (isParsingFragment()) 504 return 0; 505 return document(); 506 } 507 508 void HTMLDocumentParser::pumpTokenizer(SynchronousMode mode) 509 { 510 ASSERT(!isStopped()); 511 ASSERT(!isScheduledForResume()); 512 // ASSERT that this object is both attached to the Document and protected. 513 ASSERT(refCount() >= 2); 514 ASSERT(m_tokenizer); 515 ASSERT(m_token); 516 ASSERT(!m_haveBackgroundParser || mode == ForceSynchronous); 517 518 PumpSession session(m_pumpSessionNestingLevel, contextForParsingSession()); 519 520 // We tell the InspectorInstrumentation about every pump, even if we 521 // end up pumping nothing. It can filter out empty pumps itself. 522 // FIXME: m_input.current().length() is only accurate if we 523 // end up parsing the whole buffer in this pump. We should pass how 524 // much we parsed as part of didWriteHTML instead of willWriteHTML. 525 InspectorInstrumentationCookie cookie = InspectorInstrumentation::willWriteHTML(document(), m_input.current().currentLine().zeroBasedInt()); 526 527 m_xssAuditor.init(document(), &m_xssAuditorDelegate); 528 529 while (canTakeNextToken(mode, session) && !session.needsYield) { 530 if (!isParsingFragment()) 531 m_sourceTracker.start(m_input.current(), m_tokenizer.get(), token()); 532 533 if (!m_tokenizer->nextToken(m_input.current(), token())) 534 break; 535 536 if (!isParsingFragment()) { 537 m_sourceTracker.end(m_input.current(), m_tokenizer.get(), token()); 538 539 // We do not XSS filter innerHTML, which means we (intentionally) fail 540 // http/tests/security/xssAuditor/dom-write-innerHTML.html 541 if (OwnPtr<XSSInfo> xssInfo = m_xssAuditor.filterToken(FilterTokenRequest(token(), m_sourceTracker, m_tokenizer->shouldAllowCDATA()))) 542 m_xssAuditorDelegate.didBlockScript(*xssInfo); 543 } 544 545 constructTreeFromHTMLToken(token()); 546 ASSERT(token().isUninitialized()); 547 } 548 549 // Ensure we haven't been totally deref'ed after pumping. Any caller of this 550 // function should be holding a RefPtr to this to ensure we weren't deleted. 551 ASSERT(refCount() >= 1); 552 553 if (isStopped()) 554 return; 555 556 // There should only be PendingText left since the tree-builder always flushes 557 // the task queue before returning. In case that ever changes, crash. 558 if (mode == ForceSynchronous) 559 m_treeBuilder->flush(); 560 RELEASE_ASSERT(!isStopped()); 561 562 if (session.needsYield) 563 m_parserScheduler->scheduleForResume(); 564 565 if (isWaitingForScripts()) { 566 ASSERT(m_tokenizer->state() == HTMLTokenizer::DataState); 567 if (!m_preloadScanner) { 568 m_preloadScanner = adoptPtr(new HTMLPreloadScanner(m_options, document()->url(), document()->devicePixelRatio())); 569 m_preloadScanner->appendToEnd(m_input.current()); 570 } 571 m_preloadScanner->scan(m_preloader.get(), document()->baseElementURL()); 572 } 573 574 InspectorInstrumentation::didWriteHTML(cookie, m_input.current().currentLine().zeroBasedInt()); 575 } 576 577 void HTMLDocumentParser::constructTreeFromHTMLToken(HTMLToken& rawToken) 578 { 579 AtomicHTMLToken token(rawToken); 580 581 // We clear the rawToken in case constructTreeFromAtomicToken 582 // synchronously re-enters the parser. We don't clear the token immedately 583 // for Character tokens because the AtomicHTMLToken avoids copying the 584 // characters by keeping a pointer to the underlying buffer in the 585 // HTMLToken. Fortunately, Character tokens can't cause us to re-enter 586 // the parser. 587 // 588 // FIXME: Stop clearing the rawToken once we start running the parser off 589 // the main thread or once we stop allowing synchronous JavaScript 590 // execution from parseAttribute. 591 if (rawToken.type() != HTMLToken::Character) 592 rawToken.clear(); 593 594 m_treeBuilder->constructTree(&token); 595 596 if (!rawToken.isUninitialized()) { 597 ASSERT(rawToken.type() == HTMLToken::Character); 598 rawToken.clear(); 599 } 600 } 601 602 void HTMLDocumentParser::constructTreeFromCompactHTMLToken(const CompactHTMLToken& compactToken) 603 { 604 AtomicHTMLToken token(compactToken); 605 m_treeBuilder->constructTree(&token); 606 } 607 608 bool HTMLDocumentParser::hasInsertionPoint() 609 { 610 // FIXME: The wasCreatedByScript() branch here might not be fully correct. 611 // Our model of the EOF character differs slightly from the one in 612 // the spec because our treatment is uniform between network-sourced 613 // and script-sourced input streams whereas the spec treats them 614 // differently. 615 return m_input.hasInsertionPoint() || (wasCreatedByScript() && !m_input.haveSeenEndOfFile()); 616 } 617 618 void HTMLDocumentParser::insert(const SegmentedString& source) 619 { 620 if (isStopped()) 621 return; 622 623 TRACE_EVENT0("webkit", "HTMLDocumentParser::insert"); 624 625 // pumpTokenizer can cause this parser to be detached from the Document, 626 // but we need to ensure it isn't deleted yet. 627 RefPtr<HTMLDocumentParser> protect(this); 628 629 if (!m_tokenizer) { 630 ASSERT(!inPumpSession()); 631 ASSERT(m_haveBackgroundParser || wasCreatedByScript()); 632 m_token = adoptPtr(new HTMLToken); 633 m_tokenizer = HTMLTokenizer::create(m_options); 634 } 635 636 SegmentedString excludedLineNumberSource(source); 637 excludedLineNumberSource.setExcludeLineNumbers(); 638 m_input.insertAtCurrentInsertionPoint(excludedLineNumberSource); 639 pumpTokenizerIfPossible(ForceSynchronous); 640 641 if (isWaitingForScripts()) { 642 // Check the document.write() output with a separate preload scanner as 643 // the main scanner can't deal with insertions. 644 if (!m_insertionPreloadScanner) 645 m_insertionPreloadScanner = adoptPtr(new HTMLPreloadScanner(m_options, document()->url(), document()->devicePixelRatio())); 646 647 m_insertionPreloadScanner->appendToEnd(source); 648 m_insertionPreloadScanner->scan(m_preloader.get(), document()->baseElementURL()); 649 } 650 651 endIfDelayed(); 652 } 653 654 void HTMLDocumentParser::startBackgroundParser() 655 { 656 ASSERT(shouldUseThreading()); 657 ASSERT(!m_haveBackgroundParser); 658 m_haveBackgroundParser = true; 659 660 RefPtr<WeakReference<BackgroundHTMLParser> > reference = WeakReference<BackgroundHTMLParser>::createUnbound(); 661 m_backgroundParser = WeakPtr<BackgroundHTMLParser>(reference); 662 663 OwnPtr<BackgroundHTMLParser::Configuration> config = adoptPtr(new BackgroundHTMLParser::Configuration); 664 config->options = m_options; 665 config->parser = m_weakFactory.createWeakPtr(); 666 config->xssAuditor = adoptPtr(new XSSAuditor); 667 config->xssAuditor->init(document(), &m_xssAuditorDelegate); 668 config->preloadScanner = adoptPtr(new TokenPreloadScanner(document()->url().copy(), document()->devicePixelRatio())); 669 670 ASSERT(config->xssAuditor->isSafeToSendToAnotherThread()); 671 ASSERT(config->preloadScanner->isSafeToSendToAnotherThread()); 672 HTMLParserThread::shared()->postTask(bind(&BackgroundHTMLParser::create, reference.release(), config.release())); 673 } 674 675 void HTMLDocumentParser::stopBackgroundParser() 676 { 677 ASSERT(shouldUseThreading()); 678 ASSERT(m_haveBackgroundParser); 679 m_haveBackgroundParser = false; 680 681 HTMLParserThread::shared()->postTask(bind(&BackgroundHTMLParser::stop, m_backgroundParser)); 682 m_weakFactory.revokeAll(); 683 } 684 685 void HTMLDocumentParser::append(PassRefPtr<StringImpl> inputSource) 686 { 687 if (isStopped()) 688 return; 689 690 if (shouldUseThreading()) { 691 if (!m_haveBackgroundParser) 692 startBackgroundParser(); 693 694 ASSERT(inputSource->hasOneRef()); 695 TRACE_EVENT1("net", "HTMLDocumentParser::append", "size", inputSource->length()); 696 // NOTE: Important that the String temporary is destroyed before we post the task 697 // otherwise the String could call deref() on a StringImpl now owned by the background parser. 698 // We would like to ASSERT(closure.arg3()->hasOneRef()) but sadly the args are private. 699 Closure closure = bind(&BackgroundHTMLParser::append, m_backgroundParser, String(inputSource)); 700 HTMLParserThread::shared()->postTask(closure); 701 return; 702 } 703 704 // pumpTokenizer can cause this parser to be detached from the Document, 705 // but we need to ensure it isn't deleted yet. 706 RefPtr<HTMLDocumentParser> protect(this); 707 TRACE_EVENT1("net", "HTMLDocumentParser::append", "size", inputSource->length()); 708 String source(inputSource); 709 710 if (m_preloadScanner) { 711 if (m_input.current().isEmpty() && !isWaitingForScripts()) { 712 // We have parsed until the end of the current input and so are now moving ahead of the preload scanner. 713 // Clear the scanner so we know to scan starting from the current input point if we block again. 714 m_preloadScanner.clear(); 715 } else { 716 m_preloadScanner->appendToEnd(source); 717 if (isWaitingForScripts()) 718 m_preloadScanner->scan(m_preloader.get(), document()->baseElementURL()); 719 } 720 } 721 722 m_input.appendToEnd(source); 723 724 if (inPumpSession()) { 725 // We've gotten data off the network in a nested write. 726 // We don't want to consume any more of the input stream now. Do 727 // not worry. We'll consume this data in a less-nested write(). 728 return; 729 } 730 731 // A couple pinToMainThread() callers require synchronous parsing, but can't 732 // easily use the insert() method, so we hack append() for them to be synchronous. 733 // javascript: url handling is one such caller. 734 // FIXME: This is gross, and we should separate the concept of synchronous parsing 735 // from insert() so that only document.write() uses insert. 736 if (m_isPinnedToMainThread) 737 pumpTokenizerIfPossible(ForceSynchronous); 738 else 739 pumpTokenizerIfPossible(AllowYield); 740 741 endIfDelayed(); 742 } 743 744 void HTMLDocumentParser::end() 745 { 746 ASSERT(!isDetached()); 747 ASSERT(!isScheduledForResume()); 748 749 if (m_haveBackgroundParser) 750 stopBackgroundParser(); 751 752 // Informs the the rest of WebCore that parsing is really finished (and deletes this). 753 m_treeBuilder->finished(); 754 } 755 756 void HTMLDocumentParser::attemptToRunDeferredScriptsAndEnd() 757 { 758 ASSERT(isStopping()); 759 // FIXME: It may not be correct to disable this for the background parser. 760 // That means hasInsertionPoint() may not be correct in some cases. 761 ASSERT(!hasInsertionPoint() || m_haveBackgroundParser); 762 if (m_scriptRunner && !m_scriptRunner->executeScriptsWaitingForParsing()) 763 return; 764 end(); 765 } 766 767 void HTMLDocumentParser::attemptToEnd() 768 { 769 // finish() indicates we will not receive any more data. If we are waiting on 770 // an external script to load, we can't finish parsing quite yet. 771 772 if (shouldDelayEnd()) { 773 m_endWasDelayed = true; 774 return; 775 } 776 prepareToStopParsing(); 777 } 778 779 void HTMLDocumentParser::endIfDelayed() 780 { 781 // If we've already been detached, don't bother ending. 782 if (isDetached()) 783 return; 784 785 if (!m_endWasDelayed || shouldDelayEnd()) 786 return; 787 788 m_endWasDelayed = false; 789 prepareToStopParsing(); 790 } 791 792 void HTMLDocumentParser::finish() 793 { 794 // FIXME: We should ASSERT(!m_parserStopped) here, since it does not 795 // makes sense to call any methods on DocumentParser once it's been stopped. 796 // However, FrameLoader::stop calls DocumentParser::finish unconditionally. 797 798 // Empty documents never got an append() call, and thus have never started 799 // a background parser. In those cases, we ignore shouldUseThreading() 800 // and fall through to the non-threading case. 801 if (m_haveBackgroundParser) { 802 if (!m_input.haveSeenEndOfFile()) 803 m_input.closeWithoutMarkingEndOfFile(); 804 HTMLParserThread::shared()->postTask(bind(&BackgroundHTMLParser::finish, m_backgroundParser)); 805 return; 806 } 807 808 if (!m_tokenizer) { 809 ASSERT(!m_token); 810 // We're finishing before receiving any data. Rather than booting up 811 // the background parser just to spin it down, we finish parsing 812 // synchronously. 813 m_token = adoptPtr(new HTMLToken); 814 m_tokenizer = HTMLTokenizer::create(m_options); 815 } 816 817 // We're not going to get any more data off the network, so we tell the 818 // input stream we've reached the end of file. finish() can be called more 819 // than once, if the first time does not call end(). 820 if (!m_input.haveSeenEndOfFile()) 821 m_input.markEndOfFile(); 822 823 attemptToEnd(); 824 } 825 826 bool HTMLDocumentParser::isExecutingScript() const 827 { 828 if (!m_scriptRunner) 829 return false; 830 return m_scriptRunner->isExecutingScript(); 831 } 832 833 OrdinalNumber HTMLDocumentParser::lineNumber() const 834 { 835 if (m_haveBackgroundParser) 836 return m_textPosition.m_line; 837 838 return m_input.current().currentLine(); 839 } 840 841 TextPosition HTMLDocumentParser::textPosition() const 842 { 843 if (m_haveBackgroundParser) 844 return m_textPosition; 845 846 const SegmentedString& currentString = m_input.current(); 847 OrdinalNumber line = currentString.currentLine(); 848 OrdinalNumber column = currentString.currentColumn(); 849 850 return TextPosition(line, column); 851 } 852 853 bool HTMLDocumentParser::isWaitingForScripts() const 854 { 855 // When the TreeBuilder encounters a </script> tag, it returns to the HTMLDocumentParser 856 // where the script is transfered from the treebuilder to the script runner. 857 // The script runner will hold the script until its loaded and run. During 858 // any of this time, we want to count ourselves as "waiting for a script" and thus 859 // run the preload scanner, as well as delay completion of parsing. 860 bool treeBuilderHasBlockingScript = m_treeBuilder->hasParserBlockingScript(); 861 bool scriptRunnerHasBlockingScript = m_scriptRunner && m_scriptRunner->hasParserBlockingScript(); 862 // Since the parser is paused while a script runner has a blocking script, it should 863 // never be possible to end up with both objects holding a blocking script. 864 ASSERT(!(treeBuilderHasBlockingScript && scriptRunnerHasBlockingScript)); 865 // If either object has a blocking script, the parser should be paused. 866 return treeBuilderHasBlockingScript || scriptRunnerHasBlockingScript; 867 } 868 869 void HTMLDocumentParser::resumeParsingAfterScriptExecution() 870 { 871 ASSERT(!isExecutingScript()); 872 ASSERT(!isWaitingForScripts()); 873 874 if (m_haveBackgroundParser) { 875 validateSpeculations(m_lastChunkBeforeScript.release()); 876 ASSERT(!m_lastChunkBeforeScript); 877 // processParsedChunkFromBackgroundParser can cause this parser to be detached from the Document, 878 // but we need to ensure it isn't deleted yet. 879 RefPtr<HTMLDocumentParser> protect(this); 880 pumpPendingSpeculations(); 881 return; 882 } 883 884 m_insertionPreloadScanner.clear(); 885 pumpTokenizerIfPossible(AllowYield); 886 endIfDelayed(); 887 } 888 889 void HTMLDocumentParser::watchForLoad(Resource* resource) 890 { 891 ASSERT(!resource->isLoaded()); 892 // addClient would call notifyFinished if the load were complete. 893 // Callers do not expect to be re-entered from this call, so they should 894 // not an already-loaded Resource. 895 resource->addClient(this); 896 } 897 898 void HTMLDocumentParser::stopWatchingForLoad(Resource* resource) 899 { 900 resource->removeClient(this); 901 } 902 903 void HTMLDocumentParser::appendCurrentInputStreamToPreloadScannerAndScan() 904 { 905 ASSERT(m_preloadScanner); 906 m_preloadScanner->appendToEnd(m_input.current()); 907 m_preloadScanner->scan(m_preloader.get(), document()->baseElementURL()); 908 } 909 910 void HTMLDocumentParser::notifyFinished(Resource* cachedResource) 911 { 912 // pumpTokenizer can cause this parser to be detached from the Document, 913 // but we need to ensure it isn't deleted yet. 914 RefPtr<HTMLDocumentParser> protect(this); 915 916 ASSERT(m_scriptRunner); 917 ASSERT(!isExecutingScript()); 918 if (isStopping()) { 919 attemptToRunDeferredScriptsAndEnd(); 920 return; 921 } 922 923 m_scriptRunner->executeScriptsWaitingForLoad(cachedResource); 924 if (!isWaitingForScripts()) 925 resumeParsingAfterScriptExecution(); 926 } 927 928 void HTMLDocumentParser::executeScriptsWaitingForResources() 929 { 930 // Document only calls this when the Document owns the DocumentParser 931 // so this will not be called in the DocumentFragment case. 932 ASSERT(m_scriptRunner); 933 // Ignore calls unless we have a script blocking the parser waiting on a 934 // stylesheet load. Otherwise we are currently parsing and this 935 // is a re-entrant call from encountering a </ style> tag. 936 if (!m_scriptRunner->hasScriptsWaitingForResources()) 937 return; 938 939 // pumpTokenizer can cause this parser to be detached from the Document, 940 // but we need to ensure it isn't deleted yet. 941 RefPtr<HTMLDocumentParser> protect(this); 942 m_scriptRunner->executeScriptsWaitingForResources(); 943 if (!isWaitingForScripts()) 944 resumeParsingAfterScriptExecution(); 945 } 946 947 void HTMLDocumentParser::parseDocumentFragment(const String& source, DocumentFragment* fragment, Element* contextElement, ParserContentPolicy parserContentPolicy) 948 { 949 RefPtr<HTMLDocumentParser> parser = HTMLDocumentParser::create(fragment, contextElement, parserContentPolicy); 950 parser->insert(source); // Use insert() so that the parser will not yield. 951 parser->finish(); 952 ASSERT(!parser->processingData()); // Make sure we're done. <rdar://problem/3963151> 953 parser->detach(); // Allows ~DocumentParser to assert it was detached before destruction. 954 } 955 956 void HTMLDocumentParser::suspendScheduledTasks() 957 { 958 if (m_parserScheduler) 959 m_parserScheduler->suspend(); 960 } 961 962 void HTMLDocumentParser::resumeScheduledTasks() 963 { 964 if (m_parserScheduler) 965 m_parserScheduler->resume(); 966 } 967 968 } 969