1 /* 2 * Copyright (C) 2010 Google, Inc. All Rights Reserved. 3 * Copyright (C) 2011 Apple Inc. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY 15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. OR 18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 #include "config.h" 28 #include "core/html/parser/HTMLTreeBuilder.h" 29 30 #include "HTMLElementFactory.h" 31 #include "HTMLNames.h" 32 #include "core/dom/Comment.h" 33 #include "core/dom/DocumentFragment.h" 34 #include "core/dom/DocumentType.h" 35 #include "core/dom/Element.h" 36 #include "core/dom/ScriptLoader.h" 37 #include "core/dom/Text.h" 38 #include "core/html/HTMLFormElement.h" 39 #include "core/html/HTMLHtmlElement.h" 40 #include "core/html/HTMLOptGroupElement.h" 41 #include "core/html/HTMLScriptElement.h" 42 #include "core/html/HTMLTemplateElement.h" 43 #include "core/html/parser/AtomicHTMLToken.h" 44 #include "core/html/parser/HTMLParserIdioms.h" 45 #include "core/html/parser/HTMLStackItem.h" 46 #include "core/html/parser/HTMLToken.h" 47 #include "core/loader/FrameLoader.h" 48 #include "core/loader/FrameLoaderClient.h" 49 #include "core/frame/Frame.h" 50 #include "platform/NotImplemented.h" 51 #include "platform/text/TextBreakIterator.h" 52 #include <limits> 53 54 namespace WebCore { 55 56 using namespace HTMLNames; 57 58 static const unsigned maximumHTMLParserDOMTreeDepth = 512; 59 60 static inline void setAttributes(Element* element, AtomicHTMLToken* token, ParserContentPolicy parserContentPolicy) 61 { 62 if (!scriptingContentIsAllowed(parserContentPolicy)) 63 element->stripScriptingAttributes(token->attributes()); 64 element->parserSetAttributes(token->attributes()); 65 } 66 67 static bool hasImpliedEndTag(const HTMLStackItem* item) 68 { 69 return item->hasTagName(ddTag) 70 || item->hasTagName(dtTag) 71 || item->hasTagName(liTag) 72 || item->hasTagName(optionTag) 73 || isHTMLOptGroupElement(item->node()) 74 || item->hasTagName(pTag) 75 || item->hasTagName(rpTag) 76 || item->hasTagName(rtTag); 77 } 78 79 static bool shouldUseLengthLimit(const ContainerNode* node) 80 { 81 return !node->hasTagName(scriptTag) 82 && !node->hasTagName(styleTag) 83 && !node->hasTagName(SVGNames::scriptTag); 84 } 85 86 static unsigned textLengthLimitForContainer(const ContainerNode* node) 87 { 88 return shouldUseLengthLimit(node) ? Text::defaultLengthLimit : std::numeric_limits<unsigned>::max(); 89 } 90 91 static inline bool isAllWhitespace(const String& string) 92 { 93 return string.isAllSpecialCharacters<isHTMLSpace<UChar> >(); 94 } 95 96 static inline void insert(HTMLConstructionSiteTask& task) 97 { 98 if (task.parent->hasTagName(templateTag)) 99 task.parent = toHTMLTemplateElement(task.parent.get())->content(); 100 101 if (ContainerNode* parent = task.child->parentNode()) 102 parent->parserRemoveChild(*task.child); 103 104 if (task.nextChild) 105 task.parent->parserInsertBefore(task.child.get(), *task.nextChild); 106 else 107 task.parent->parserAppendChild(task.child.get()); 108 } 109 110 static inline void executeInsertTask(HTMLConstructionSiteTask& task) 111 { 112 ASSERT(task.operation == HTMLConstructionSiteTask::Insert); 113 114 insert(task); 115 116 task.child->beginParsingChildren(); 117 118 if (task.selfClosing) 119 task.child->finishParsingChildren(); 120 } 121 122 static inline void executeInsertTextTask(HTMLConstructionSiteTask& task) 123 { 124 ASSERT(task.operation == HTMLConstructionSiteTask::InsertText); 125 ASSERT(task.child->isTextNode()); 126 127 // Merge text nodes into previous ones if possible: 128 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#insert-a-character 129 Text* newText = toText(task.child.get()); 130 Node* previousChild = task.nextChild ? task.nextChild->previousSibling() : task.parent->lastChild(); 131 if (previousChild && previousChild->isTextNode()) { 132 Text* previousText = toText(previousChild); 133 unsigned lengthLimit = textLengthLimitForContainer(task.parent.get()); 134 if (previousText->length() + newText->length() < lengthLimit) { 135 previousText->parserAppendData(newText->data()); 136 return; 137 } 138 } 139 140 insert(task); 141 } 142 143 static inline void executeReparentTask(HTMLConstructionSiteTask& task) 144 { 145 ASSERT(task.operation == HTMLConstructionSiteTask::Reparent); 146 147 if (ContainerNode* parent = task.child->parentNode()) 148 parent->parserRemoveChild(*task.child); 149 150 task.parent->parserAppendChild(task.child); 151 } 152 153 static inline void executeInsertAlreadyParsedChildTask(HTMLConstructionSiteTask& task) 154 { 155 ASSERT(task.operation == HTMLConstructionSiteTask::InsertAlreadyParsedChild); 156 157 insert(task); 158 } 159 160 static inline void executeTakeAllChildrenTask(HTMLConstructionSiteTask& task) 161 { 162 ASSERT(task.operation == HTMLConstructionSiteTask::TakeAllChildren); 163 164 task.parent->parserTakeAllChildrenFrom(*task.oldParent()); 165 } 166 167 void HTMLConstructionSite::executeTask(HTMLConstructionSiteTask& task) 168 { 169 ASSERT(m_taskQueue.isEmpty()); 170 if (task.operation == HTMLConstructionSiteTask::Insert) 171 return executeInsertTask(task); 172 173 if (task.operation == HTMLConstructionSiteTask::InsertText) 174 return executeInsertTextTask(task); 175 176 // All the cases below this point are only used by the adoption agency. 177 178 if (task.operation == HTMLConstructionSiteTask::InsertAlreadyParsedChild) 179 return executeInsertAlreadyParsedChildTask(task); 180 181 if (task.operation == HTMLConstructionSiteTask::Reparent) 182 return executeReparentTask(task); 183 184 if (task.operation == HTMLConstructionSiteTask::TakeAllChildren) 185 return executeTakeAllChildrenTask(task); 186 187 ASSERT_NOT_REACHED(); 188 } 189 190 // This is only needed for TextDocuments where we might have text nodes 191 // approaching the default length limit (~64k) and we don't want to 192 // break a text node in the middle of a combining character. 193 static unsigned findBreakIndexBetween(const StringBuilder& string, unsigned currentPosition, unsigned proposedBreakIndex) 194 { 195 ASSERT(currentPosition < proposedBreakIndex); 196 ASSERT(proposedBreakIndex <= string.length()); 197 // The end of the string is always a valid break. 198 if (proposedBreakIndex == string.length()) 199 return proposedBreakIndex; 200 201 // Latin-1 does not have breakable boundaries. If we ever moved to a differnet 8-bit encoding this could be wrong. 202 if (string.is8Bit()) 203 return proposedBreakIndex; 204 205 const UChar* breakSearchCharacters = string.characters16() + currentPosition; 206 // We need at least two characters look-ahead to account for UTF-16 surrogates, but can't search off the end of the buffer! 207 unsigned breakSearchLength = std::min(proposedBreakIndex - currentPosition + 2, string.length() - currentPosition); 208 NonSharedCharacterBreakIterator it(breakSearchCharacters, breakSearchLength); 209 210 if (it.isBreak(proposedBreakIndex - currentPosition)) 211 return proposedBreakIndex; 212 213 int adjustedBreakIndexInSubstring = it.preceding(proposedBreakIndex - currentPosition); 214 if (adjustedBreakIndexInSubstring > 0) 215 return currentPosition + adjustedBreakIndexInSubstring; 216 // We failed to find a breakable point, let the caller figure out what to do. 217 return 0; 218 } 219 220 static String atomizeIfAllWhitespace(const String& string, WhitespaceMode whitespaceMode) 221 { 222 // Strings composed entirely of whitespace are likely to be repeated. 223 // Turn them into AtomicString so we share a single string for each. 224 if (whitespaceMode == AllWhitespace || (whitespaceMode == WhitespaceUnknown && isAllWhitespace(string))) 225 return AtomicString(string).string(); 226 return string; 227 } 228 229 void HTMLConstructionSite::flushPendingText() 230 { 231 if (m_pendingText.isEmpty()) 232 return; 233 234 PendingText pendingText; 235 // Hold onto the current pending text on the stack so that queueTask doesn't recurse infinitely. 236 m_pendingText.swap(pendingText); 237 ASSERT(m_pendingText.isEmpty()); 238 239 // Splitting text nodes into smaller chunks contradicts HTML5 spec, but is necessary 240 // for performance, see: https://bugs.webkit.org/show_bug.cgi?id=55898 241 unsigned lengthLimit = textLengthLimitForContainer(pendingText.parent.get()); 242 243 unsigned currentPosition = 0; 244 const StringBuilder& string = pendingText.stringBuilder; 245 while (currentPosition < string.length()) { 246 unsigned proposedBreakIndex = std::min(currentPosition + lengthLimit, string.length()); 247 unsigned breakIndex = findBreakIndexBetween(string, currentPosition, proposedBreakIndex); 248 ASSERT(breakIndex <= string.length()); 249 String substring = string.substring(currentPosition, breakIndex - currentPosition); 250 substring = atomizeIfAllWhitespace(substring, pendingText.whitespaceMode); 251 252 HTMLConstructionSiteTask task(HTMLConstructionSiteTask::InsertText); 253 task.parent = pendingText.parent; 254 task.nextChild = pendingText.nextChild; 255 task.child = Text::create(task.parent->document(), substring); 256 queueTask(task); 257 258 ASSERT(breakIndex > currentPosition); 259 ASSERT(breakIndex - currentPosition == substring.length()); 260 ASSERT(toText(task.child.get())->length() == substring.length()); 261 currentPosition = breakIndex; 262 } 263 } 264 265 void HTMLConstructionSite::queueTask(const HTMLConstructionSiteTask& task) 266 { 267 flushPendingText(); 268 ASSERT(m_pendingText.isEmpty()); 269 m_taskQueue.append(task); 270 } 271 272 void HTMLConstructionSite::attachLater(ContainerNode* parent, PassRefPtr<Node> prpChild, bool selfClosing) 273 { 274 ASSERT(scriptingContentIsAllowed(m_parserContentPolicy) || !prpChild.get()->isElementNode() || !toScriptLoaderIfPossible(toElement(prpChild.get()))); 275 ASSERT(pluginContentIsAllowed(m_parserContentPolicy) || !prpChild->isPluginElement()); 276 277 HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Insert); 278 task.parent = parent; 279 task.child = prpChild; 280 task.selfClosing = selfClosing; 281 282 if (shouldFosterParent()) { 283 fosterParent(task.child); 284 return; 285 } 286 287 // Add as a sibling of the parent if we have reached the maximum depth allowed. 288 if (m_openElements.stackDepth() > maximumHTMLParserDOMTreeDepth && task.parent->parentNode()) 289 task.parent = task.parent->parentNode(); 290 291 ASSERT(task.parent); 292 queueTask(task); 293 } 294 295 void HTMLConstructionSite::executeQueuedTasks() 296 { 297 // This has no affect on pendingText, and we may have pendingText 298 // remaining after executing all other queued tasks. 299 const size_t size = m_taskQueue.size(); 300 if (!size) 301 return; 302 303 // Copy the task queue into a local variable in case executeTask 304 // re-enters the parser. 305 TaskQueue queue; 306 queue.swap(m_taskQueue); 307 308 for (size_t i = 0; i < size; ++i) 309 executeTask(queue[i]); 310 311 // We might be detached now. 312 } 313 314 HTMLConstructionSite::HTMLConstructionSite(Document* document, ParserContentPolicy parserContentPolicy) 315 : m_document(document) 316 , m_attachmentRoot(document) 317 , m_parserContentPolicy(parserContentPolicy) 318 , m_isParsingFragment(false) 319 , m_redirectAttachToFosterParent(false) 320 , m_inQuirksMode(document->inQuirksMode()) 321 { 322 ASSERT(m_document->isHTMLDocument() || m_document->isXHTMLDocument()); 323 } 324 325 HTMLConstructionSite::HTMLConstructionSite(DocumentFragment* fragment, ParserContentPolicy parserContentPolicy) 326 : m_document(&fragment->document()) 327 , m_attachmentRoot(fragment) 328 , m_parserContentPolicy(parserContentPolicy) 329 , m_isParsingFragment(true) 330 , m_redirectAttachToFosterParent(false) 331 , m_inQuirksMode(fragment->document().inQuirksMode()) 332 { 333 ASSERT(m_document->isHTMLDocument() || m_document->isXHTMLDocument()); 334 } 335 336 HTMLConstructionSite::~HTMLConstructionSite() 337 { 338 // Depending on why we're being destroyed it might be OK 339 // to forget queued tasks, but currently we don't expect to. 340 ASSERT(m_taskQueue.isEmpty()); 341 // Currently we assume that text will never be the last token in the 342 // document and that we'll always queue some additional task to cause it to flush. 343 ASSERT(m_pendingText.isEmpty()); 344 } 345 346 void HTMLConstructionSite::detach() 347 { 348 // FIXME: We'd like to ASSERT here that we're canceling and not just discarding 349 // text that really should have made it into the DOM earlier, but there 350 // doesn't seem to be a nice way to do that. 351 m_pendingText.discard(); 352 m_document = 0; 353 m_attachmentRoot = 0; 354 } 355 356 void HTMLConstructionSite::setForm(HTMLFormElement* form) 357 { 358 // This method should only be needed for HTMLTreeBuilder in the fragment case. 359 ASSERT(!m_form); 360 m_form = form; 361 } 362 363 PassRefPtr<HTMLFormElement> HTMLConstructionSite::takeForm() 364 { 365 return m_form.release(); 366 } 367 368 void HTMLConstructionSite::dispatchDocumentElementAvailableIfNeeded() 369 { 370 ASSERT(m_document); 371 if (m_document->frame() && !m_isParsingFragment) 372 m_document->frame()->loader().dispatchDocumentElementAvailable(); 373 } 374 375 void HTMLConstructionSite::insertHTMLHtmlStartTagBeforeHTML(AtomicHTMLToken* token) 376 { 377 ASSERT(m_document); 378 RefPtr<HTMLHtmlElement> element = HTMLHtmlElement::create(*m_document); 379 setAttributes(element.get(), token, m_parserContentPolicy); 380 attachLater(m_attachmentRoot, element); 381 m_openElements.pushHTMLHtmlElement(HTMLStackItem::create(element, token)); 382 383 executeQueuedTasks(); 384 element->insertedByParser(); 385 dispatchDocumentElementAvailableIfNeeded(); 386 } 387 388 void HTMLConstructionSite::mergeAttributesFromTokenIntoElement(AtomicHTMLToken* token, Element* element) 389 { 390 if (token->attributes().isEmpty()) 391 return; 392 393 for (unsigned i = 0; i < token->attributes().size(); ++i) { 394 const Attribute& tokenAttribute = token->attributes().at(i); 395 if (!element->elementData() || !element->getAttributeItem(tokenAttribute.name())) 396 element->setAttribute(tokenAttribute.name(), tokenAttribute.value()); 397 } 398 } 399 400 void HTMLConstructionSite::insertHTMLHtmlStartTagInBody(AtomicHTMLToken* token) 401 { 402 // Fragments do not have a root HTML element, so any additional HTML elements 403 // encountered during fragment parsing should be ignored. 404 if (m_isParsingFragment) 405 return; 406 407 mergeAttributesFromTokenIntoElement(token, m_openElements.htmlElement()); 408 } 409 410 void HTMLConstructionSite::insertHTMLBodyStartTagInBody(AtomicHTMLToken* token) 411 { 412 mergeAttributesFromTokenIntoElement(token, m_openElements.bodyElement()); 413 } 414 415 void HTMLConstructionSite::setDefaultCompatibilityMode() 416 { 417 if (m_isParsingFragment) 418 return; 419 if (m_document->isSrcdocDocument()) 420 return; 421 setCompatibilityMode(Document::QuirksMode); 422 } 423 424 void HTMLConstructionSite::setCompatibilityMode(Document::CompatibilityMode mode) 425 { 426 m_inQuirksMode = (mode == Document::QuirksMode); 427 m_document->setCompatibilityMode(mode); 428 } 429 430 void HTMLConstructionSite::setCompatibilityModeFromDoctype(const String& name, const String& publicId, const String& systemId) 431 { 432 // There are three possible compatibility modes: 433 // Quirks - quirks mode emulates WinIE and NS4. CSS parsing is also relaxed in this mode, e.g., unit types can 434 // be omitted from numbers. 435 // Limited Quirks - This mode is identical to no-quirks mode except for its treatment of line-height in the inline box model. 436 // No Quirks - no quirks apply. Web pages will obey the specifications to the letter. 437 438 // Check for Quirks Mode. 439 if (name != "html" 440 || publicId.startsWith("+//Silmaril//dtd html Pro v0r11 19970101//", false) 441 || publicId.startsWith("-//AdvaSoft Ltd//DTD HTML 3.0 asWedit + extensions//", false) 442 || publicId.startsWith("-//AS//DTD HTML 3.0 asWedit + extensions//", false) 443 || publicId.startsWith("-//IETF//DTD HTML 2.0 Level 1//", false) 444 || publicId.startsWith("-//IETF//DTD HTML 2.0 Level 2//", false) 445 || publicId.startsWith("-//IETF//DTD HTML 2.0 Strict Level 1//", false) 446 || publicId.startsWith("-//IETF//DTD HTML 2.0 Strict Level 2//", false) 447 || publicId.startsWith("-//IETF//DTD HTML 2.0 Strict//", false) 448 || publicId.startsWith("-//IETF//DTD HTML 2.0//", false) 449 || publicId.startsWith("-//IETF//DTD HTML 2.1E//", false) 450 || publicId.startsWith("-//IETF//DTD HTML 3.0//", false) 451 || publicId.startsWith("-//IETF//DTD HTML 3.2 Final//", false) 452 || publicId.startsWith("-//IETF//DTD HTML 3.2//", false) 453 || publicId.startsWith("-//IETF//DTD HTML 3//", false) 454 || publicId.startsWith("-//IETF//DTD HTML Level 0//", false) 455 || publicId.startsWith("-//IETF//DTD HTML Level 1//", false) 456 || publicId.startsWith("-//IETF//DTD HTML Level 2//", false) 457 || publicId.startsWith("-//IETF//DTD HTML Level 3//", false) 458 || publicId.startsWith("-//IETF//DTD HTML Strict Level 0//", false) 459 || publicId.startsWith("-//IETF//DTD HTML Strict Level 1//", false) 460 || publicId.startsWith("-//IETF//DTD HTML Strict Level 2//", false) 461 || publicId.startsWith("-//IETF//DTD HTML Strict Level 3//", false) 462 || publicId.startsWith("-//IETF//DTD HTML Strict//", false) 463 || publicId.startsWith("-//IETF//DTD HTML//", false) 464 || publicId.startsWith("-//Metrius//DTD Metrius Presentational//", false) 465 || publicId.startsWith("-//Microsoft//DTD Internet Explorer 2.0 HTML Strict//", false) 466 || publicId.startsWith("-//Microsoft//DTD Internet Explorer 2.0 HTML//", false) 467 || publicId.startsWith("-//Microsoft//DTD Internet Explorer 2.0 Tables//", false) 468 || publicId.startsWith("-//Microsoft//DTD Internet Explorer 3.0 HTML Strict//", false) 469 || publicId.startsWith("-//Microsoft//DTD Internet Explorer 3.0 HTML//", false) 470 || publicId.startsWith("-//Microsoft//DTD Internet Explorer 3.0 Tables//", false) 471 || publicId.startsWith("-//Netscape Comm. Corp.//DTD HTML//", false) 472 || publicId.startsWith("-//Netscape Comm. Corp.//DTD Strict HTML//", false) 473 || publicId.startsWith("-//O'Reilly and Associates//DTD HTML 2.0//", false) 474 || publicId.startsWith("-//O'Reilly and Associates//DTD HTML Extended 1.0//", false) 475 || publicId.startsWith("-//O'Reilly and Associates//DTD HTML Extended Relaxed 1.0//", false) 476 || publicId.startsWith("-//SoftQuad Software//DTD HoTMetaL PRO 6.0::19990601::extensions to HTML 4.0//", false) 477 || publicId.startsWith("-//SoftQuad//DTD HoTMetaL PRO 4.0::19971010::extensions to HTML 4.0//", false) 478 || publicId.startsWith("-//Spyglass//DTD HTML 2.0 Extended//", false) 479 || publicId.startsWith("-//SQ//DTD HTML 2.0 HoTMetaL + extensions//", false) 480 || publicId.startsWith("-//Sun Microsystems Corp.//DTD HotJava HTML//", false) 481 || publicId.startsWith("-//Sun Microsystems Corp.//DTD HotJava Strict HTML//", false) 482 || publicId.startsWith("-//W3C//DTD HTML 3 1995-03-24//", false) 483 || publicId.startsWith("-//W3C//DTD HTML 3.2 Draft//", false) 484 || publicId.startsWith("-//W3C//DTD HTML 3.2 Final//", false) 485 || publicId.startsWith("-//W3C//DTD HTML 3.2//", false) 486 || publicId.startsWith("-//W3C//DTD HTML 3.2S Draft//", false) 487 || publicId.startsWith("-//W3C//DTD HTML 4.0 Frameset//", false) 488 || publicId.startsWith("-//W3C//DTD HTML 4.0 Transitional//", false) 489 || publicId.startsWith("-//W3C//DTD HTML Experimental 19960712//", false) 490 || publicId.startsWith("-//W3C//DTD HTML Experimental 970421//", false) 491 || publicId.startsWith("-//W3C//DTD W3 HTML//", false) 492 || publicId.startsWith("-//W3O//DTD W3 HTML 3.0//", false) 493 || equalIgnoringCase(publicId, "-//W3O//DTD W3 HTML Strict 3.0//EN//") 494 || publicId.startsWith("-//WebTechs//DTD Mozilla HTML 2.0//", false) 495 || publicId.startsWith("-//WebTechs//DTD Mozilla HTML//", false) 496 || equalIgnoringCase(publicId, "-/W3C/DTD HTML 4.0 Transitional/EN") 497 || equalIgnoringCase(publicId, "HTML") 498 || equalIgnoringCase(systemId, "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") 499 || (systemId.isEmpty() && publicId.startsWith("-//W3C//DTD HTML 4.01 Frameset//", false)) 500 || (systemId.isEmpty() && publicId.startsWith("-//W3C//DTD HTML 4.01 Transitional//", false))) { 501 setCompatibilityMode(Document::QuirksMode); 502 return; 503 } 504 505 // Check for Limited Quirks Mode. 506 if (publicId.startsWith("-//W3C//DTD XHTML 1.0 Frameset//", false) 507 || publicId.startsWith("-//W3C//DTD XHTML 1.0 Transitional//", false) 508 || (!systemId.isEmpty() && publicId.startsWith("-//W3C//DTD HTML 4.01 Frameset//", false)) 509 || (!systemId.isEmpty() && publicId.startsWith("-//W3C//DTD HTML 4.01 Transitional//", false))) { 510 setCompatibilityMode(Document::LimitedQuirksMode); 511 return; 512 } 513 514 // Otherwise we are No Quirks Mode. 515 setCompatibilityMode(Document::NoQuirksMode); 516 } 517 518 void HTMLConstructionSite::processEndOfFile() 519 { 520 ASSERT(currentNode()); 521 flush(); 522 openElements()->popAll(); 523 } 524 525 void HTMLConstructionSite::finishedParsing() 526 { 527 // We shouldn't have any queued tasks but we might have pending text which we need to promote to tasks and execute. 528 ASSERT(m_taskQueue.isEmpty()); 529 flush(); 530 m_document->finishedParsing(); 531 } 532 533 void HTMLConstructionSite::insertDoctype(AtomicHTMLToken* token) 534 { 535 ASSERT(token->type() == HTMLToken::DOCTYPE); 536 537 const String& publicId = StringImpl::create8BitIfPossible(token->publicIdentifier()); 538 const String& systemId = StringImpl::create8BitIfPossible(token->systemIdentifier()); 539 RefPtr<DocumentType> doctype = DocumentType::create(m_document, token->name(), publicId, systemId); 540 attachLater(m_attachmentRoot, doctype.release()); 541 542 // DOCTYPE nodes are only processed when parsing fragments w/o contextElements, which 543 // never occurs. However, if we ever chose to support such, this code is subtly wrong, 544 // because context-less fragments can determine their own quirks mode, and thus change 545 // parsing rules (like <p> inside <table>). For now we ASSERT that we never hit this code 546 // in a fragment, as changing the owning document's compatibility mode would be wrong. 547 ASSERT(!m_isParsingFragment); 548 if (m_isParsingFragment) 549 return; 550 551 if (token->forceQuirks()) 552 setCompatibilityMode(Document::QuirksMode); 553 else { 554 setCompatibilityModeFromDoctype(token->name(), publicId, systemId); 555 } 556 } 557 558 void HTMLConstructionSite::insertComment(AtomicHTMLToken* token) 559 { 560 ASSERT(token->type() == HTMLToken::Comment); 561 attachLater(currentNode(), Comment::create(ownerDocumentForCurrentNode(), token->comment())); 562 } 563 564 void HTMLConstructionSite::insertCommentOnDocument(AtomicHTMLToken* token) 565 { 566 ASSERT(token->type() == HTMLToken::Comment); 567 ASSERT(m_document); 568 attachLater(m_attachmentRoot, Comment::create(*m_document, token->comment())); 569 } 570 571 void HTMLConstructionSite::insertCommentOnHTMLHtmlElement(AtomicHTMLToken* token) 572 { 573 ASSERT(token->type() == HTMLToken::Comment); 574 ContainerNode* parent = m_openElements.rootNode(); 575 attachLater(parent, Comment::create(parent->document(), token->comment())); 576 } 577 578 void HTMLConstructionSite::insertHTMLHeadElement(AtomicHTMLToken* token) 579 { 580 ASSERT(!shouldFosterParent()); 581 m_head = HTMLStackItem::create(createHTMLElement(token), token); 582 attachLater(currentNode(), m_head->element()); 583 m_openElements.pushHTMLHeadElement(m_head); 584 } 585 586 void HTMLConstructionSite::insertHTMLBodyElement(AtomicHTMLToken* token) 587 { 588 ASSERT(!shouldFosterParent()); 589 RefPtr<Element> body = createHTMLElement(token); 590 attachLater(currentNode(), body); 591 m_openElements.pushHTMLBodyElement(HTMLStackItem::create(body.release(), token)); 592 if (Frame* frame = m_document->frame()) 593 frame->loader().client()->dispatchWillInsertBody(); 594 } 595 596 void HTMLConstructionSite::insertHTMLFormElement(AtomicHTMLToken* token, bool isDemoted) 597 { 598 RefPtr<Element> element = createHTMLElement(token); 599 ASSERT(element->hasTagName(formTag)); 600 m_form = static_pointer_cast<HTMLFormElement>(element.release()); 601 m_form->setDemoted(isDemoted); 602 attachLater(currentNode(), m_form); 603 m_openElements.push(HTMLStackItem::create(m_form, token)); 604 } 605 606 void HTMLConstructionSite::insertHTMLElement(AtomicHTMLToken* token) 607 { 608 RefPtr<Element> element = createHTMLElement(token); 609 attachLater(currentNode(), element); 610 m_openElements.push(HTMLStackItem::create(element.release(), token)); 611 } 612 613 void HTMLConstructionSite::insertSelfClosingHTMLElement(AtomicHTMLToken* token) 614 { 615 ASSERT(token->type() == HTMLToken::StartTag); 616 // Normally HTMLElementStack is responsible for calling finishParsingChildren, 617 // but self-closing elements are never in the element stack so the stack 618 // doesn't get a chance to tell them that we're done parsing their children. 619 attachLater(currentNode(), createHTMLElement(token), true); 620 // FIXME: Do we want to acknowledge the token's self-closing flag? 621 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#acknowledge-self-closing-flag 622 } 623 624 void HTMLConstructionSite::insertFormattingElement(AtomicHTMLToken* token) 625 { 626 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#the-stack-of-open-elements 627 // Possible active formatting elements include: 628 // a, b, big, code, em, font, i, nobr, s, small, strike, strong, tt, and u. 629 insertHTMLElement(token); 630 m_activeFormattingElements.append(currentElementRecord()->stackItem()); 631 } 632 633 void HTMLConstructionSite::insertScriptElement(AtomicHTMLToken* token) 634 { 635 // http://www.whatwg.org/specs/web-apps/current-work/multipage/scripting-1.html#already-started 636 // http://html5.org/specs/dom-parsing.html#dom-range-createcontextualfragment 637 // For createContextualFragment, the specifications say to mark it parser-inserted and already-started and later unmark them. 638 // However, we short circuit that logic to avoid the subtree traversal to find script elements since scripts can never see 639 // those flags or effects thereof. 640 const bool parserInserted = m_parserContentPolicy != AllowScriptingContentAndDoNotMarkAlreadyStarted; 641 const bool alreadyStarted = m_isParsingFragment && parserInserted; 642 RefPtr<HTMLScriptElement> element = HTMLScriptElement::create(ownerDocumentForCurrentNode(), parserInserted, alreadyStarted); 643 setAttributes(element.get(), token, m_parserContentPolicy); 644 if (scriptingContentIsAllowed(m_parserContentPolicy)) 645 attachLater(currentNode(), element); 646 m_openElements.push(HTMLStackItem::create(element.release(), token)); 647 } 648 649 void HTMLConstructionSite::insertForeignElement(AtomicHTMLToken* token, const AtomicString& namespaceURI) 650 { 651 ASSERT(token->type() == HTMLToken::StartTag); 652 notImplemented(); // parseError when xmlns or xmlns:xlink are wrong. 653 654 RefPtr<Element> element = createElement(token, namespaceURI); 655 if (scriptingContentIsAllowed(m_parserContentPolicy) || !toScriptLoaderIfPossible(element.get())) 656 attachLater(currentNode(), element, token->selfClosing()); 657 if (!token->selfClosing()) 658 m_openElements.push(HTMLStackItem::create(element.release(), token, namespaceURI)); 659 } 660 661 void HTMLConstructionSite::insertTextNode(const String& string, WhitespaceMode whitespaceMode) 662 { 663 HTMLConstructionSiteTask dummyTask(HTMLConstructionSiteTask::Insert); 664 dummyTask.parent = currentNode(); 665 666 if (shouldFosterParent()) 667 findFosterSite(dummyTask); 668 669 // FIXME: This probably doesn't need to be done both here and in insert(Task). 670 if (dummyTask.parent->hasTagName(templateTag)) 671 dummyTask.parent = toHTMLTemplateElement(dummyTask.parent.get())->content(); 672 673 // Unclear when parent != case occurs. Somehow we insert text into two separate nodes while processing the same Token. 674 // The nextChild != dummy.nextChild case occurs whenever foster parenting happened and we hit a new text node "<table>a</table>b" 675 // In either case we have to flush the pending text into the task queue before making more. 676 if (!m_pendingText.isEmpty() && (m_pendingText.parent != dummyTask.parent || m_pendingText.nextChild != dummyTask.nextChild)) 677 flushPendingText(); 678 m_pendingText.append(dummyTask.parent, dummyTask.nextChild, string, whitespaceMode); 679 } 680 681 void HTMLConstructionSite::reparent(HTMLElementStack::ElementRecord* newParent, HTMLElementStack::ElementRecord* child) 682 { 683 HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Reparent); 684 task.parent = newParent->node(); 685 task.child = child->node(); 686 queueTask(task); 687 } 688 689 void HTMLConstructionSite::reparent(HTMLElementStack::ElementRecord* newParent, HTMLStackItem* child) 690 { 691 HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Reparent); 692 task.parent = newParent->node(); 693 task.child = child->node(); 694 queueTask(task); 695 } 696 697 void HTMLConstructionSite::insertAlreadyParsedChild(HTMLStackItem* newParent, HTMLElementStack::ElementRecord* child) 698 { 699 if (newParent->causesFosterParenting()) { 700 fosterParent(child->node()); 701 return; 702 } 703 704 HTMLConstructionSiteTask task(HTMLConstructionSiteTask::InsertAlreadyParsedChild); 705 task.parent = newParent->node(); 706 task.child = child->node(); 707 queueTask(task); 708 } 709 710 void HTMLConstructionSite::takeAllChildren(HTMLStackItem* newParent, HTMLElementStack::ElementRecord* oldParent) 711 { 712 HTMLConstructionSiteTask task(HTMLConstructionSiteTask::TakeAllChildren); 713 task.parent = newParent->node(); 714 task.child = oldParent->node(); 715 queueTask(task); 716 } 717 718 PassRefPtr<Element> HTMLConstructionSite::createElement(AtomicHTMLToken* token, const AtomicString& namespaceURI) 719 { 720 QualifiedName tagName(nullAtom, token->name(), namespaceURI); 721 RefPtr<Element> element = ownerDocumentForCurrentNode().createElement(tagName, true); 722 setAttributes(element.get(), token, m_parserContentPolicy); 723 return element.release(); 724 } 725 726 inline Document& HTMLConstructionSite::ownerDocumentForCurrentNode() 727 { 728 if (currentNode()->hasTagName(templateTag)) 729 return toHTMLTemplateElement(currentElement())->content()->document(); 730 return currentNode()->document(); 731 } 732 733 PassRefPtr<Element> HTMLConstructionSite::createHTMLElement(AtomicHTMLToken* token) 734 { 735 Document& document = ownerDocumentForCurrentNode(); 736 // Only associate the element with the current form if we're creating the new element 737 // in a document with a browsing context (rather than in <template> contents). 738 HTMLFormElement* form = document.frame() ? m_form.get() : 0; 739 // FIXME: This can't use HTMLConstructionSite::createElement because we 740 // have to pass the current form element. We should rework form association 741 // to occur after construction to allow better code sharing here. 742 RefPtr<Element> element = HTMLElementFactory::createHTMLElement(token->name(), document, form, true); 743 setAttributes(element.get(), token, m_parserContentPolicy); 744 ASSERT(element->isHTMLElement()); 745 return element.release(); 746 } 747 748 PassRefPtr<HTMLStackItem> HTMLConstructionSite::createElementFromSavedToken(HTMLStackItem* item) 749 { 750 RefPtr<Element> element; 751 // NOTE: Moving from item -> token -> item copies the Attribute vector twice! 752 AtomicHTMLToken fakeToken(HTMLToken::StartTag, item->localName(), item->attributes()); 753 if (item->namespaceURI() == HTMLNames::xhtmlNamespaceURI) 754 element = createHTMLElement(&fakeToken); 755 else 756 element = createElement(&fakeToken, item->namespaceURI()); 757 return HTMLStackItem::create(element.release(), &fakeToken, item->namespaceURI()); 758 } 759 760 bool HTMLConstructionSite::indexOfFirstUnopenFormattingElement(unsigned& firstUnopenElementIndex) const 761 { 762 if (m_activeFormattingElements.isEmpty()) 763 return false; 764 unsigned index = m_activeFormattingElements.size(); 765 do { 766 --index; 767 const HTMLFormattingElementList::Entry& entry = m_activeFormattingElements.at(index); 768 if (entry.isMarker() || m_openElements.contains(entry.element())) { 769 firstUnopenElementIndex = index + 1; 770 return firstUnopenElementIndex < m_activeFormattingElements.size(); 771 } 772 } while (index); 773 firstUnopenElementIndex = index; 774 return true; 775 } 776 777 void HTMLConstructionSite::reconstructTheActiveFormattingElements() 778 { 779 unsigned firstUnopenElementIndex; 780 if (!indexOfFirstUnopenFormattingElement(firstUnopenElementIndex)) 781 return; 782 783 unsigned unopenEntryIndex = firstUnopenElementIndex; 784 ASSERT(unopenEntryIndex < m_activeFormattingElements.size()); 785 for (; unopenEntryIndex < m_activeFormattingElements.size(); ++unopenEntryIndex) { 786 HTMLFormattingElementList::Entry& unopenedEntry = m_activeFormattingElements.at(unopenEntryIndex); 787 RefPtr<HTMLStackItem> reconstructed = createElementFromSavedToken(unopenedEntry.stackItem().get()); 788 attachLater(currentNode(), reconstructed->node()); 789 m_openElements.push(reconstructed); 790 unopenedEntry.replaceElement(reconstructed.release()); 791 } 792 } 793 794 void HTMLConstructionSite::generateImpliedEndTagsWithExclusion(const AtomicString& tagName) 795 { 796 while (hasImpliedEndTag(currentStackItem()) && !currentStackItem()->matchesHTMLTag(tagName)) 797 m_openElements.pop(); 798 } 799 800 void HTMLConstructionSite::generateImpliedEndTags() 801 { 802 while (hasImpliedEndTag(currentStackItem())) 803 m_openElements.pop(); 804 } 805 806 bool HTMLConstructionSite::inQuirksMode() 807 { 808 return m_inQuirksMode; 809 } 810 811 void HTMLConstructionSite::findFosterSite(HTMLConstructionSiteTask& task) 812 { 813 // When a node is to be foster parented, the last template element with no table element is below it in the stack of open elements is the foster parent element (NOT the template's parent!) 814 HTMLElementStack::ElementRecord* lastTemplateElement = m_openElements.topmost(templateTag.localName()); 815 if (lastTemplateElement && !m_openElements.inTableScope(tableTag)) { 816 task.parent = lastTemplateElement->element(); 817 return; 818 } 819 820 HTMLElementStack::ElementRecord* lastTableElementRecord = m_openElements.topmost(tableTag.localName()); 821 if (lastTableElementRecord) { 822 Element* lastTableElement = lastTableElementRecord->element(); 823 ContainerNode* parent; 824 if (lastTableElementRecord->next()->stackItem()->hasTagName(templateTag)) 825 parent = lastTableElementRecord->next()->element(); 826 else 827 parent = lastTableElement->parentNode(); 828 829 // When parsing HTML fragments, we skip step 4.2 ("Let root be a new html element with no attributes") for efficiency, 830 // and instead use the DocumentFragment as a root node. So we must treat the root node (DocumentFragment) as if it is a html element here. 831 if (parent && (parent->isElementNode() || (m_isParsingFragment && parent == m_openElements.rootNode()))) { 832 task.parent = parent; 833 task.nextChild = lastTableElement; 834 return; 835 } 836 task.parent = lastTableElementRecord->next()->element(); 837 return; 838 } 839 // Fragment case 840 task.parent = m_openElements.rootNode(); // DocumentFragment 841 } 842 843 bool HTMLConstructionSite::shouldFosterParent() const 844 { 845 return m_redirectAttachToFosterParent 846 && currentStackItem()->isElementNode() 847 && currentStackItem()->causesFosterParenting(); 848 } 849 850 void HTMLConstructionSite::fosterParent(PassRefPtr<Node> node) 851 { 852 HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Insert); 853 findFosterSite(task); 854 task.child = node; 855 ASSERT(task.parent); 856 queueTask(task); 857 } 858 859 } 860