1 /* 2 * Copyright (C) 2010 Google, Inc. All Rights Reserved. 3 * Copyright (C) 2011 Apple Inc. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY 15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. OR 18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 #include "config.h" 28 #include "core/html/parser/HTMLConstructionSite.h" 29 30 #include "core/HTMLElementFactory.h" 31 #include "core/HTMLNames.h" 32 #include "core/dom/Comment.h" 33 #include "core/dom/DocumentFragment.h" 34 #include "core/dom/DocumentType.h" 35 #include "core/dom/Element.h" 36 #include "core/dom/ScriptLoader.h" 37 #include "core/dom/Text.h" 38 #include "core/frame/LocalFrame.h" 39 #include "core/html/HTMLFormElement.h" 40 #include "core/html/HTMLHtmlElement.h" 41 #include "core/html/HTMLPlugInElement.h" 42 #include "core/html/HTMLScriptElement.h" 43 #include "core/html/HTMLTemplateElement.h" 44 #include "core/html/parser/AtomicHTMLToken.h" 45 #include "core/html/parser/HTMLParserIdioms.h" 46 #include "core/html/parser/HTMLStackItem.h" 47 #include "core/html/parser/HTMLToken.h" 48 #include "core/loader/FrameLoader.h" 49 #include "core/loader/FrameLoaderClient.h" 50 #include "core/svg/SVGScriptElement.h" 51 #include "platform/NotImplemented.h" 52 #include "platform/text/TextBreakIterator.h" 53 #include <limits> 54 55 namespace WebCore { 56 57 using namespace HTMLNames; 58 59 static const unsigned maximumHTMLParserDOMTreeDepth = 512; 60 61 static inline void setAttributes(Element* element, AtomicHTMLToken* token, ParserContentPolicy parserContentPolicy) 62 { 63 if (!scriptingContentIsAllowed(parserContentPolicy)) 64 element->stripScriptingAttributes(token->attributes()); 65 element->parserSetAttributes(token->attributes()); 66 } 67 68 static bool hasImpliedEndTag(const HTMLStackItem* item) 69 { 70 return item->hasTagName(ddTag) 71 || item->hasTagName(dtTag) 72 || item->hasTagName(liTag) 73 || item->hasTagName(optionTag) 74 || item->hasTagName(optgroupTag) 75 || item->hasTagName(pTag) 76 || item->hasTagName(rpTag) 77 || item->hasTagName(rtTag); 78 } 79 80 static bool shouldUseLengthLimit(const ContainerNode& node) 81 { 82 return !isHTMLScriptElement(node) 83 && !isHTMLStyleElement(node) 84 && !isSVGScriptElement(node); 85 } 86 87 static unsigned textLengthLimitForContainer(const ContainerNode& node) 88 { 89 return shouldUseLengthLimit(node) ? Text::defaultLengthLimit : std::numeric_limits<unsigned>::max(); 90 } 91 92 static inline bool isAllWhitespace(const String& string) 93 { 94 return string.isAllSpecialCharacters<isHTMLSpace<UChar> >(); 95 } 96 97 static inline void insert(HTMLConstructionSiteTask& task) 98 { 99 if (isHTMLTemplateElement(*task.parent)) 100 task.parent = toHTMLTemplateElement(task.parent.get())->content(); 101 102 if (ContainerNode* parent = task.child->parentNode()) 103 parent->parserRemoveChild(*task.child); 104 105 if (task.nextChild) 106 task.parent->parserInsertBefore(task.child.get(), *task.nextChild); 107 else 108 task.parent->parserAppendChild(task.child.get()); 109 } 110 111 static inline void executeInsertTask(HTMLConstructionSiteTask& task) 112 { 113 ASSERT(task.operation == HTMLConstructionSiteTask::Insert); 114 115 insert(task); 116 117 if (task.child->isElementNode()) { 118 Element& child = toElement(*task.child); 119 child.beginParsingChildren(); 120 if (task.selfClosing) 121 child.finishParsingChildren(); 122 } 123 } 124 125 static inline void executeInsertTextTask(HTMLConstructionSiteTask& task) 126 { 127 ASSERT(task.operation == HTMLConstructionSiteTask::InsertText); 128 ASSERT(task.child->isTextNode()); 129 130 // Merge text nodes into previous ones if possible: 131 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#insert-a-character 132 Text* newText = toText(task.child.get()); 133 Node* previousChild = task.nextChild ? task.nextChild->previousSibling() : task.parent->lastChild(); 134 if (previousChild && previousChild->isTextNode()) { 135 Text* previousText = toText(previousChild); 136 unsigned lengthLimit = textLengthLimitForContainer(*task.parent); 137 if (previousText->length() + newText->length() < lengthLimit) { 138 previousText->parserAppendData(newText->data()); 139 return; 140 } 141 } 142 143 insert(task); 144 } 145 146 static inline void executeReparentTask(HTMLConstructionSiteTask& task) 147 { 148 ASSERT(task.operation == HTMLConstructionSiteTask::Reparent); 149 150 if (ContainerNode* parent = task.child->parentNode()) 151 parent->parserRemoveChild(*task.child); 152 153 task.parent->parserAppendChild(task.child); 154 } 155 156 static inline void executeInsertAlreadyParsedChildTask(HTMLConstructionSiteTask& task) 157 { 158 ASSERT(task.operation == HTMLConstructionSiteTask::InsertAlreadyParsedChild); 159 160 insert(task); 161 } 162 163 static inline void executeTakeAllChildrenTask(HTMLConstructionSiteTask& task) 164 { 165 ASSERT(task.operation == HTMLConstructionSiteTask::TakeAllChildren); 166 167 task.parent->parserTakeAllChildrenFrom(*task.oldParent()); 168 } 169 170 void HTMLConstructionSite::executeTask(HTMLConstructionSiteTask& task) 171 { 172 ASSERT(m_taskQueue.isEmpty()); 173 if (task.operation == HTMLConstructionSiteTask::Insert) 174 return executeInsertTask(task); 175 176 if (task.operation == HTMLConstructionSiteTask::InsertText) 177 return executeInsertTextTask(task); 178 179 // All the cases below this point are only used by the adoption agency. 180 181 if (task.operation == HTMLConstructionSiteTask::InsertAlreadyParsedChild) 182 return executeInsertAlreadyParsedChildTask(task); 183 184 if (task.operation == HTMLConstructionSiteTask::Reparent) 185 return executeReparentTask(task); 186 187 if (task.operation == HTMLConstructionSiteTask::TakeAllChildren) 188 return executeTakeAllChildrenTask(task); 189 190 ASSERT_NOT_REACHED(); 191 } 192 193 // This is only needed for TextDocuments where we might have text nodes 194 // approaching the default length limit (~64k) and we don't want to 195 // break a text node in the middle of a combining character. 196 static unsigned findBreakIndexBetween(const StringBuilder& string, unsigned currentPosition, unsigned proposedBreakIndex) 197 { 198 ASSERT(currentPosition < proposedBreakIndex); 199 ASSERT(proposedBreakIndex <= string.length()); 200 // The end of the string is always a valid break. 201 if (proposedBreakIndex == string.length()) 202 return proposedBreakIndex; 203 204 // Latin-1 does not have breakable boundaries. If we ever moved to a differnet 8-bit encoding this could be wrong. 205 if (string.is8Bit()) 206 return proposedBreakIndex; 207 208 const UChar* breakSearchCharacters = string.characters16() + currentPosition; 209 // We need at least two characters look-ahead to account for UTF-16 surrogates, but can't search off the end of the buffer! 210 unsigned breakSearchLength = std::min(proposedBreakIndex - currentPosition + 2, string.length() - currentPosition); 211 NonSharedCharacterBreakIterator it(breakSearchCharacters, breakSearchLength); 212 213 if (it.isBreak(proposedBreakIndex - currentPosition)) 214 return proposedBreakIndex; 215 216 int adjustedBreakIndexInSubstring = it.preceding(proposedBreakIndex - currentPosition); 217 if (adjustedBreakIndexInSubstring > 0) 218 return currentPosition + adjustedBreakIndexInSubstring; 219 // We failed to find a breakable point, let the caller figure out what to do. 220 return 0; 221 } 222 223 static String atomizeIfAllWhitespace(const String& string, WhitespaceMode whitespaceMode) 224 { 225 // Strings composed entirely of whitespace are likely to be repeated. 226 // Turn them into AtomicString so we share a single string for each. 227 if (whitespaceMode == AllWhitespace || (whitespaceMode == WhitespaceUnknown && isAllWhitespace(string))) 228 return AtomicString(string).string(); 229 return string; 230 } 231 232 void HTMLConstructionSite::flushPendingText() 233 { 234 if (m_pendingText.isEmpty()) 235 return; 236 237 PendingText pendingText; 238 // Hold onto the current pending text on the stack so that queueTask doesn't recurse infinitely. 239 m_pendingText.swap(pendingText); 240 ASSERT(m_pendingText.isEmpty()); 241 242 // Splitting text nodes into smaller chunks contradicts HTML5 spec, but is necessary 243 // for performance, see: https://bugs.webkit.org/show_bug.cgi?id=55898 244 unsigned lengthLimit = textLengthLimitForContainer(*pendingText.parent); 245 246 unsigned currentPosition = 0; 247 const StringBuilder& string = pendingText.stringBuilder; 248 while (currentPosition < string.length()) { 249 unsigned proposedBreakIndex = std::min(currentPosition + lengthLimit, string.length()); 250 unsigned breakIndex = findBreakIndexBetween(string, currentPosition, proposedBreakIndex); 251 ASSERT(breakIndex <= string.length()); 252 String substring = string.substring(currentPosition, breakIndex - currentPosition); 253 substring = atomizeIfAllWhitespace(substring, pendingText.whitespaceMode); 254 255 HTMLConstructionSiteTask task(HTMLConstructionSiteTask::InsertText); 256 task.parent = pendingText.parent; 257 task.nextChild = pendingText.nextChild; 258 task.child = Text::create(task.parent->document(), substring); 259 queueTask(task); 260 261 ASSERT(breakIndex > currentPosition); 262 ASSERT(breakIndex - currentPosition == substring.length()); 263 ASSERT(toText(task.child.get())->length() == substring.length()); 264 currentPosition = breakIndex; 265 } 266 } 267 268 void HTMLConstructionSite::queueTask(const HTMLConstructionSiteTask& task) 269 { 270 flushPendingText(); 271 ASSERT(m_pendingText.isEmpty()); 272 m_taskQueue.append(task); 273 } 274 275 void HTMLConstructionSite::attachLater(ContainerNode* parent, PassRefPtrWillBeRawPtr<Node> prpChild, bool selfClosing) 276 { 277 ASSERT(scriptingContentIsAllowed(m_parserContentPolicy) || !prpChild.get()->isElementNode() || !toScriptLoaderIfPossible(toElement(prpChild.get()))); 278 ASSERT(pluginContentIsAllowed(m_parserContentPolicy) || !isHTMLPlugInElement(prpChild)); 279 280 HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Insert); 281 task.parent = parent; 282 task.child = prpChild; 283 task.selfClosing = selfClosing; 284 285 if (shouldFosterParent()) { 286 fosterParent(task.child); 287 return; 288 } 289 290 // Add as a sibling of the parent if we have reached the maximum depth allowed. 291 if (m_openElements.stackDepth() > maximumHTMLParserDOMTreeDepth && task.parent->parentNode()) 292 task.parent = task.parent->parentNode(); 293 294 ASSERT(task.parent); 295 queueTask(task); 296 } 297 298 void HTMLConstructionSite::executeQueuedTasks() 299 { 300 // This has no affect on pendingText, and we may have pendingText 301 // remaining after executing all other queued tasks. 302 const size_t size = m_taskQueue.size(); 303 if (!size) 304 return; 305 306 // Copy the task queue into a local variable in case executeTask 307 // re-enters the parser. 308 TaskQueue queue; 309 queue.swap(m_taskQueue); 310 311 for (size_t i = 0; i < size; ++i) 312 executeTask(queue[i]); 313 314 // We might be detached now. 315 } 316 317 HTMLConstructionSite::HTMLConstructionSite(Document* document, ParserContentPolicy parserContentPolicy) 318 : m_document(document) 319 , m_attachmentRoot(document) 320 , m_parserContentPolicy(parserContentPolicy) 321 , m_isParsingFragment(false) 322 , m_redirectAttachToFosterParent(false) 323 , m_inQuirksMode(document->inQuirksMode()) 324 { 325 ASSERT(m_document->isHTMLDocument() || m_document->isXHTMLDocument()); 326 } 327 328 HTMLConstructionSite::HTMLConstructionSite(DocumentFragment* fragment, ParserContentPolicy parserContentPolicy) 329 : m_document(&fragment->document()) 330 , m_attachmentRoot(fragment) 331 , m_parserContentPolicy(parserContentPolicy) 332 , m_isParsingFragment(true) 333 , m_redirectAttachToFosterParent(false) 334 , m_inQuirksMode(fragment->document().inQuirksMode()) 335 { 336 ASSERT(m_document->isHTMLDocument() || m_document->isXHTMLDocument()); 337 } 338 339 HTMLConstructionSite::~HTMLConstructionSite() 340 { 341 // Depending on why we're being destroyed it might be OK 342 // to forget queued tasks, but currently we don't expect to. 343 ASSERT(m_taskQueue.isEmpty()); 344 // Currently we assume that text will never be the last token in the 345 // document and that we'll always queue some additional task to cause it to flush. 346 ASSERT(m_pendingText.isEmpty()); 347 } 348 349 void HTMLConstructionSite::trace(Visitor* visitor) 350 { 351 visitor->trace(m_document); 352 visitor->trace(m_attachmentRoot); 353 visitor->trace(m_head); 354 visitor->trace(m_form); 355 visitor->trace(m_openElements); 356 visitor->trace(m_activeFormattingElements); 357 visitor->trace(m_taskQueue); 358 visitor->trace(m_pendingText); 359 } 360 361 void HTMLConstructionSite::detach() 362 { 363 // FIXME: We'd like to ASSERT here that we're canceling and not just discarding 364 // text that really should have made it into the DOM earlier, but there 365 // doesn't seem to be a nice way to do that. 366 m_pendingText.discard(); 367 m_document = nullptr; 368 m_attachmentRoot = nullptr; 369 } 370 371 void HTMLConstructionSite::setForm(HTMLFormElement* form) 372 { 373 // This method should only be needed for HTMLTreeBuilder in the fragment case. 374 ASSERT(!m_form); 375 m_form = form; 376 } 377 378 PassRefPtrWillBeRawPtr<HTMLFormElement> HTMLConstructionSite::takeForm() 379 { 380 return m_form.release(); 381 } 382 383 void HTMLConstructionSite::dispatchDocumentElementAvailableIfNeeded() 384 { 385 ASSERT(m_document); 386 if (m_document->frame() && !m_isParsingFragment) 387 m_document->frame()->loader().dispatchDocumentElementAvailable(); 388 } 389 390 void HTMLConstructionSite::insertHTMLHtmlStartTagBeforeHTML(AtomicHTMLToken* token) 391 { 392 ASSERT(m_document); 393 RefPtrWillBeRawPtr<HTMLHtmlElement> element = HTMLHtmlElement::create(*m_document); 394 setAttributes(element.get(), token, m_parserContentPolicy); 395 attachLater(m_attachmentRoot, element); 396 m_openElements.pushHTMLHtmlElement(HTMLStackItem::create(element, token)); 397 398 executeQueuedTasks(); 399 element->insertedByParser(); 400 dispatchDocumentElementAvailableIfNeeded(); 401 } 402 403 void HTMLConstructionSite::mergeAttributesFromTokenIntoElement(AtomicHTMLToken* token, Element* element) 404 { 405 if (token->attributes().isEmpty()) 406 return; 407 408 for (unsigned i = 0; i < token->attributes().size(); ++i) { 409 const Attribute& tokenAttribute = token->attributes().at(i); 410 if (!element->elementData() || !element->findAttributeByName(tokenAttribute.name())) 411 element->setAttribute(tokenAttribute.name(), tokenAttribute.value()); 412 } 413 } 414 415 void HTMLConstructionSite::insertHTMLHtmlStartTagInBody(AtomicHTMLToken* token) 416 { 417 // Fragments do not have a root HTML element, so any additional HTML elements 418 // encountered during fragment parsing should be ignored. 419 if (m_isParsingFragment) 420 return; 421 422 mergeAttributesFromTokenIntoElement(token, m_openElements.htmlElement()); 423 } 424 425 void HTMLConstructionSite::insertHTMLBodyStartTagInBody(AtomicHTMLToken* token) 426 { 427 mergeAttributesFromTokenIntoElement(token, m_openElements.bodyElement()); 428 } 429 430 void HTMLConstructionSite::setDefaultCompatibilityMode() 431 { 432 if (m_isParsingFragment) 433 return; 434 setCompatibilityMode(Document::QuirksMode); 435 } 436 437 void HTMLConstructionSite::setCompatibilityMode(Document::CompatibilityMode mode) 438 { 439 m_inQuirksMode = (mode == Document::QuirksMode); 440 m_document->setCompatibilityMode(mode); 441 } 442 443 void HTMLConstructionSite::setCompatibilityModeFromDoctype(const String& name, const String& publicId, const String& systemId) 444 { 445 // There are three possible compatibility modes: 446 // Quirks - quirks mode emulates WinIE and NS4. CSS parsing is also relaxed in this mode, e.g., unit types can 447 // be omitted from numbers. 448 // Limited Quirks - This mode is identical to no-quirks mode except for its treatment of line-height in the inline box model. 449 // No Quirks - no quirks apply. Web pages will obey the specifications to the letter. 450 451 // Check for Quirks Mode. 452 if (name != "html" 453 || publicId.startsWith("+//Silmaril//dtd html Pro v0r11 19970101//", false) 454 || publicId.startsWith("-//AdvaSoft Ltd//DTD HTML 3.0 asWedit + extensions//", false) 455 || publicId.startsWith("-//AS//DTD HTML 3.0 asWedit + extensions//", false) 456 || publicId.startsWith("-//IETF//DTD HTML 2.0 Level 1//", false) 457 || publicId.startsWith("-//IETF//DTD HTML 2.0 Level 2//", false) 458 || publicId.startsWith("-//IETF//DTD HTML 2.0 Strict Level 1//", false) 459 || publicId.startsWith("-//IETF//DTD HTML 2.0 Strict Level 2//", false) 460 || publicId.startsWith("-//IETF//DTD HTML 2.0 Strict//", false) 461 || publicId.startsWith("-//IETF//DTD HTML 2.0//", false) 462 || publicId.startsWith("-//IETF//DTD HTML 2.1E//", false) 463 || publicId.startsWith("-//IETF//DTD HTML 3.0//", false) 464 || publicId.startsWith("-//IETF//DTD HTML 3.2 Final//", false) 465 || publicId.startsWith("-//IETF//DTD HTML 3.2//", false) 466 || publicId.startsWith("-//IETF//DTD HTML 3//", false) 467 || publicId.startsWith("-//IETF//DTD HTML Level 0//", false) 468 || publicId.startsWith("-//IETF//DTD HTML Level 1//", false) 469 || publicId.startsWith("-//IETF//DTD HTML Level 2//", false) 470 || publicId.startsWith("-//IETF//DTD HTML Level 3//", false) 471 || publicId.startsWith("-//IETF//DTD HTML Strict Level 0//", false) 472 || publicId.startsWith("-//IETF//DTD HTML Strict Level 1//", false) 473 || publicId.startsWith("-//IETF//DTD HTML Strict Level 2//", false) 474 || publicId.startsWith("-//IETF//DTD HTML Strict Level 3//", false) 475 || publicId.startsWith("-//IETF//DTD HTML Strict//", false) 476 || publicId.startsWith("-//IETF//DTD HTML//", false) 477 || publicId.startsWith("-//Metrius//DTD Metrius Presentational//", false) 478 || publicId.startsWith("-//Microsoft//DTD Internet Explorer 2.0 HTML Strict//", false) 479 || publicId.startsWith("-//Microsoft//DTD Internet Explorer 2.0 HTML//", false) 480 || publicId.startsWith("-//Microsoft//DTD Internet Explorer 2.0 Tables//", false) 481 || publicId.startsWith("-//Microsoft//DTD Internet Explorer 3.0 HTML Strict//", false) 482 || publicId.startsWith("-//Microsoft//DTD Internet Explorer 3.0 HTML//", false) 483 || publicId.startsWith("-//Microsoft//DTD Internet Explorer 3.0 Tables//", false) 484 || publicId.startsWith("-//Netscape Comm. Corp.//DTD HTML//", false) 485 || publicId.startsWith("-//Netscape Comm. Corp.//DTD Strict HTML//", false) 486 || publicId.startsWith("-//O'Reilly and Associates//DTD HTML 2.0//", false) 487 || publicId.startsWith("-//O'Reilly and Associates//DTD HTML Extended 1.0//", false) 488 || publicId.startsWith("-//O'Reilly and Associates//DTD HTML Extended Relaxed 1.0//", false) 489 || publicId.startsWith("-//SoftQuad Software//DTD HoTMetaL PRO 6.0::19990601::extensions to HTML 4.0//", false) 490 || publicId.startsWith("-//SoftQuad//DTD HoTMetaL PRO 4.0::19971010::extensions to HTML 4.0//", false) 491 || publicId.startsWith("-//Spyglass//DTD HTML 2.0 Extended//", false) 492 || publicId.startsWith("-//SQ//DTD HTML 2.0 HoTMetaL + extensions//", false) 493 || publicId.startsWith("-//Sun Microsystems Corp.//DTD HotJava HTML//", false) 494 || publicId.startsWith("-//Sun Microsystems Corp.//DTD HotJava Strict HTML//", false) 495 || publicId.startsWith("-//W3C//DTD HTML 3 1995-03-24//", false) 496 || publicId.startsWith("-//W3C//DTD HTML 3.2 Draft//", false) 497 || publicId.startsWith("-//W3C//DTD HTML 3.2 Final//", false) 498 || publicId.startsWith("-//W3C//DTD HTML 3.2//", false) 499 || publicId.startsWith("-//W3C//DTD HTML 3.2S Draft//", false) 500 || publicId.startsWith("-//W3C//DTD HTML 4.0 Frameset//", false) 501 || publicId.startsWith("-//W3C//DTD HTML 4.0 Transitional//", false) 502 || publicId.startsWith("-//W3C//DTD HTML Experimental 19960712//", false) 503 || publicId.startsWith("-//W3C//DTD HTML Experimental 970421//", false) 504 || publicId.startsWith("-//W3C//DTD W3 HTML//", false) 505 || publicId.startsWith("-//W3O//DTD W3 HTML 3.0//", false) 506 || equalIgnoringCase(publicId, "-//W3O//DTD W3 HTML Strict 3.0//EN//") 507 || publicId.startsWith("-//WebTechs//DTD Mozilla HTML 2.0//", false) 508 || publicId.startsWith("-//WebTechs//DTD Mozilla HTML//", false) 509 || equalIgnoringCase(publicId, "-/W3C/DTD HTML 4.0 Transitional/EN") 510 || equalIgnoringCase(publicId, "HTML") 511 || equalIgnoringCase(systemId, "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") 512 || (systemId.isEmpty() && publicId.startsWith("-//W3C//DTD HTML 4.01 Frameset//", false)) 513 || (systemId.isEmpty() && publicId.startsWith("-//W3C//DTD HTML 4.01 Transitional//", false))) { 514 setCompatibilityMode(Document::QuirksMode); 515 return; 516 } 517 518 // Check for Limited Quirks Mode. 519 if (publicId.startsWith("-//W3C//DTD XHTML 1.0 Frameset//", false) 520 || publicId.startsWith("-//W3C//DTD XHTML 1.0 Transitional//", false) 521 || (!systemId.isEmpty() && publicId.startsWith("-//W3C//DTD HTML 4.01 Frameset//", false)) 522 || (!systemId.isEmpty() && publicId.startsWith("-//W3C//DTD HTML 4.01 Transitional//", false))) { 523 setCompatibilityMode(Document::LimitedQuirksMode); 524 return; 525 } 526 527 // Otherwise we are No Quirks Mode. 528 setCompatibilityMode(Document::NoQuirksMode); 529 } 530 531 void HTMLConstructionSite::processEndOfFile() 532 { 533 ASSERT(currentNode()); 534 flush(); 535 openElements()->popAll(); 536 } 537 538 void HTMLConstructionSite::finishedParsing() 539 { 540 // We shouldn't have any queued tasks but we might have pending text which we need to promote to tasks and execute. 541 ASSERT(m_taskQueue.isEmpty()); 542 flush(); 543 m_document->finishedParsing(); 544 } 545 546 void HTMLConstructionSite::insertDoctype(AtomicHTMLToken* token) 547 { 548 ASSERT(token->type() == HTMLToken::DOCTYPE); 549 550 const String& publicId = StringImpl::create8BitIfPossible(token->publicIdentifier()); 551 const String& systemId = StringImpl::create8BitIfPossible(token->systemIdentifier()); 552 RefPtrWillBeRawPtr<DocumentType> doctype = DocumentType::create(m_document, token->name(), publicId, systemId); 553 attachLater(m_attachmentRoot, doctype.release()); 554 555 // DOCTYPE nodes are only processed when parsing fragments w/o contextElements, which 556 // never occurs. However, if we ever chose to support such, this code is subtly wrong, 557 // because context-less fragments can determine their own quirks mode, and thus change 558 // parsing rules (like <p> inside <table>). For now we ASSERT that we never hit this code 559 // in a fragment, as changing the owning document's compatibility mode would be wrong. 560 ASSERT(!m_isParsingFragment); 561 if (m_isParsingFragment) 562 return; 563 564 if (token->forceQuirks()) 565 setCompatibilityMode(Document::QuirksMode); 566 else { 567 setCompatibilityModeFromDoctype(token->name(), publicId, systemId); 568 } 569 } 570 571 void HTMLConstructionSite::insertComment(AtomicHTMLToken* token) 572 { 573 ASSERT(token->type() == HTMLToken::Comment); 574 attachLater(currentNode(), Comment::create(ownerDocumentForCurrentNode(), token->comment())); 575 } 576 577 void HTMLConstructionSite::insertCommentOnDocument(AtomicHTMLToken* token) 578 { 579 ASSERT(token->type() == HTMLToken::Comment); 580 ASSERT(m_document); 581 attachLater(m_attachmentRoot, Comment::create(*m_document, token->comment())); 582 } 583 584 void HTMLConstructionSite::insertCommentOnHTMLHtmlElement(AtomicHTMLToken* token) 585 { 586 ASSERT(token->type() == HTMLToken::Comment); 587 ContainerNode* parent = m_openElements.rootNode(); 588 attachLater(parent, Comment::create(parent->document(), token->comment())); 589 } 590 591 void HTMLConstructionSite::insertHTMLHeadElement(AtomicHTMLToken* token) 592 { 593 ASSERT(!shouldFosterParent()); 594 m_head = HTMLStackItem::create(createHTMLElement(token), token); 595 attachLater(currentNode(), m_head->element()); 596 m_openElements.pushHTMLHeadElement(m_head); 597 } 598 599 void HTMLConstructionSite::insertHTMLBodyElement(AtomicHTMLToken* token) 600 { 601 ASSERT(!shouldFosterParent()); 602 RefPtrWillBeRawPtr<Element> body = createHTMLElement(token); 603 attachLater(currentNode(), body); 604 m_openElements.pushHTMLBodyElement(HTMLStackItem::create(body.release(), token)); 605 if (LocalFrame* frame = m_document->frame()) 606 frame->loader().client()->dispatchWillInsertBody(); 607 } 608 609 void HTMLConstructionSite::insertHTMLFormElement(AtomicHTMLToken* token, bool isDemoted) 610 { 611 RefPtrWillBeRawPtr<Element> element = createHTMLElement(token); 612 ASSERT(isHTMLFormElement(element)); 613 m_form = static_pointer_cast<HTMLFormElement>(element.release()); 614 m_form->setDemoted(isDemoted); 615 attachLater(currentNode(), m_form.get()); 616 m_openElements.push(HTMLStackItem::create(m_form.get(), token)); 617 } 618 619 void HTMLConstructionSite::insertHTMLElement(AtomicHTMLToken* token) 620 { 621 RefPtrWillBeRawPtr<Element> element = createHTMLElement(token); 622 attachLater(currentNode(), element); 623 m_openElements.push(HTMLStackItem::create(element.release(), token)); 624 } 625 626 void HTMLConstructionSite::insertSelfClosingHTMLElement(AtomicHTMLToken* token) 627 { 628 ASSERT(token->type() == HTMLToken::StartTag); 629 // Normally HTMLElementStack is responsible for calling finishParsingChildren, 630 // but self-closing elements are never in the element stack so the stack 631 // doesn't get a chance to tell them that we're done parsing their children. 632 attachLater(currentNode(), createHTMLElement(token), true); 633 // FIXME: Do we want to acknowledge the token's self-closing flag? 634 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#acknowledge-self-closing-flag 635 } 636 637 void HTMLConstructionSite::insertFormattingElement(AtomicHTMLToken* token) 638 { 639 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#the-stack-of-open-elements 640 // Possible active formatting elements include: 641 // a, b, big, code, em, font, i, nobr, s, small, strike, strong, tt, and u. 642 insertHTMLElement(token); 643 m_activeFormattingElements.append(currentElementRecord()->stackItem()); 644 } 645 646 void HTMLConstructionSite::insertScriptElement(AtomicHTMLToken* token) 647 { 648 // http://www.whatwg.org/specs/web-apps/current-work/multipage/scripting-1.html#already-started 649 // http://html5.org/specs/dom-parsing.html#dom-range-createcontextualfragment 650 // For createContextualFragment, the specifications say to mark it parser-inserted and already-started and later unmark them. 651 // However, we short circuit that logic to avoid the subtree traversal to find script elements since scripts can never see 652 // those flags or effects thereof. 653 const bool parserInserted = m_parserContentPolicy != AllowScriptingContentAndDoNotMarkAlreadyStarted; 654 const bool alreadyStarted = m_isParsingFragment && parserInserted; 655 RefPtrWillBeRawPtr<HTMLScriptElement> element = HTMLScriptElement::create(ownerDocumentForCurrentNode(), parserInserted, alreadyStarted); 656 setAttributes(element.get(), token, m_parserContentPolicy); 657 if (scriptingContentIsAllowed(m_parserContentPolicy)) 658 attachLater(currentNode(), element); 659 m_openElements.push(HTMLStackItem::create(element.release(), token)); 660 } 661 662 void HTMLConstructionSite::insertForeignElement(AtomicHTMLToken* token, const AtomicString& namespaceURI) 663 { 664 ASSERT(token->type() == HTMLToken::StartTag); 665 notImplemented(); // parseError when xmlns or xmlns:xlink are wrong. 666 667 RefPtrWillBeRawPtr<Element> element = createElement(token, namespaceURI); 668 if (scriptingContentIsAllowed(m_parserContentPolicy) || !toScriptLoaderIfPossible(element.get())) 669 attachLater(currentNode(), element, token->selfClosing()); 670 if (!token->selfClosing()) 671 m_openElements.push(HTMLStackItem::create(element.release(), token, namespaceURI)); 672 } 673 674 void HTMLConstructionSite::insertTextNode(const String& string, WhitespaceMode whitespaceMode) 675 { 676 HTMLConstructionSiteTask dummyTask(HTMLConstructionSiteTask::Insert); 677 dummyTask.parent = currentNode(); 678 679 if (shouldFosterParent()) 680 findFosterSite(dummyTask); 681 682 // FIXME: This probably doesn't need to be done both here and in insert(Task). 683 if (isHTMLTemplateElement(*dummyTask.parent)) 684 dummyTask.parent = toHTMLTemplateElement(dummyTask.parent.get())->content(); 685 686 // Unclear when parent != case occurs. Somehow we insert text into two separate nodes while processing the same Token. 687 // The nextChild != dummy.nextChild case occurs whenever foster parenting happened and we hit a new text node "<table>a</table>b" 688 // In either case we have to flush the pending text into the task queue before making more. 689 if (!m_pendingText.isEmpty() && (m_pendingText.parent != dummyTask.parent || m_pendingText.nextChild != dummyTask.nextChild)) 690 flushPendingText(); 691 m_pendingText.append(dummyTask.parent, dummyTask.nextChild, string, whitespaceMode); 692 } 693 694 void HTMLConstructionSite::reparent(HTMLElementStack::ElementRecord* newParent, HTMLElementStack::ElementRecord* child) 695 { 696 HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Reparent); 697 task.parent = newParent->node(); 698 task.child = child->node(); 699 queueTask(task); 700 } 701 702 void HTMLConstructionSite::reparent(HTMLElementStack::ElementRecord* newParent, HTMLStackItem* child) 703 { 704 HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Reparent); 705 task.parent = newParent->node(); 706 task.child = child->node(); 707 queueTask(task); 708 } 709 710 void HTMLConstructionSite::insertAlreadyParsedChild(HTMLStackItem* newParent, HTMLElementStack::ElementRecord* child) 711 { 712 if (newParent->causesFosterParenting()) { 713 fosterParent(child->node()); 714 return; 715 } 716 717 HTMLConstructionSiteTask task(HTMLConstructionSiteTask::InsertAlreadyParsedChild); 718 task.parent = newParent->node(); 719 task.child = child->node(); 720 queueTask(task); 721 } 722 723 void HTMLConstructionSite::takeAllChildren(HTMLStackItem* newParent, HTMLElementStack::ElementRecord* oldParent) 724 { 725 HTMLConstructionSiteTask task(HTMLConstructionSiteTask::TakeAllChildren); 726 task.parent = newParent->node(); 727 task.child = oldParent->node(); 728 queueTask(task); 729 } 730 731 PassRefPtrWillBeRawPtr<Element> HTMLConstructionSite::createElement(AtomicHTMLToken* token, const AtomicString& namespaceURI) 732 { 733 QualifiedName tagName(nullAtom, token->name(), namespaceURI); 734 RefPtrWillBeRawPtr<Element> element = ownerDocumentForCurrentNode().createElement(tagName, true); 735 setAttributes(element.get(), token, m_parserContentPolicy); 736 return element.release(); 737 } 738 739 inline Document& HTMLConstructionSite::ownerDocumentForCurrentNode() 740 { 741 if (isHTMLTemplateElement(*currentNode())) 742 return toHTMLTemplateElement(currentElement())->content()->document(); 743 return currentNode()->document(); 744 } 745 746 PassRefPtrWillBeRawPtr<Element> HTMLConstructionSite::createHTMLElement(AtomicHTMLToken* token) 747 { 748 Document& document = ownerDocumentForCurrentNode(); 749 // Only associate the element with the current form if we're creating the new element 750 // in a document with a browsing context (rather than in <template> contents). 751 HTMLFormElement* form = document.frame() ? m_form.get() : 0; 752 // FIXME: This can't use HTMLConstructionSite::createElement because we 753 // have to pass the current form element. We should rework form association 754 // to occur after construction to allow better code sharing here. 755 RefPtrWillBeRawPtr<Element> element = HTMLElementFactory::createHTMLElement(token->name(), document, form, true); 756 setAttributes(element.get(), token, m_parserContentPolicy); 757 ASSERT(element->isHTMLElement()); 758 return element.release(); 759 } 760 761 PassRefPtrWillBeRawPtr<HTMLStackItem> HTMLConstructionSite::createElementFromSavedToken(HTMLStackItem* item) 762 { 763 RefPtrWillBeRawPtr<Element> element; 764 // NOTE: Moving from item -> token -> item copies the Attribute vector twice! 765 AtomicHTMLToken fakeToken(HTMLToken::StartTag, item->localName(), item->attributes()); 766 if (item->namespaceURI() == HTMLNames::xhtmlNamespaceURI) 767 element = createHTMLElement(&fakeToken); 768 else 769 element = createElement(&fakeToken, item->namespaceURI()); 770 return HTMLStackItem::create(element.release(), &fakeToken, item->namespaceURI()); 771 } 772 773 bool HTMLConstructionSite::indexOfFirstUnopenFormattingElement(unsigned& firstUnopenElementIndex) const 774 { 775 if (m_activeFormattingElements.isEmpty()) 776 return false; 777 unsigned index = m_activeFormattingElements.size(); 778 do { 779 --index; 780 const HTMLFormattingElementList::Entry& entry = m_activeFormattingElements.at(index); 781 if (entry.isMarker() || m_openElements.contains(entry.element())) { 782 firstUnopenElementIndex = index + 1; 783 return firstUnopenElementIndex < m_activeFormattingElements.size(); 784 } 785 } while (index); 786 firstUnopenElementIndex = index; 787 return true; 788 } 789 790 void HTMLConstructionSite::reconstructTheActiveFormattingElements() 791 { 792 unsigned firstUnopenElementIndex; 793 if (!indexOfFirstUnopenFormattingElement(firstUnopenElementIndex)) 794 return; 795 796 unsigned unopenEntryIndex = firstUnopenElementIndex; 797 ASSERT(unopenEntryIndex < m_activeFormattingElements.size()); 798 for (; unopenEntryIndex < m_activeFormattingElements.size(); ++unopenEntryIndex) { 799 HTMLFormattingElementList::Entry& unopenedEntry = m_activeFormattingElements.at(unopenEntryIndex); 800 RefPtrWillBeRawPtr<HTMLStackItem> reconstructed = createElementFromSavedToken(unopenedEntry.stackItem().get()); 801 attachLater(currentNode(), reconstructed->node()); 802 m_openElements.push(reconstructed); 803 unopenedEntry.replaceElement(reconstructed.release()); 804 } 805 } 806 807 void HTMLConstructionSite::generateImpliedEndTagsWithExclusion(const AtomicString& tagName) 808 { 809 while (hasImpliedEndTag(currentStackItem()) && !currentStackItem()->matchesHTMLTag(tagName)) 810 m_openElements.pop(); 811 } 812 813 void HTMLConstructionSite::generateImpliedEndTags() 814 { 815 while (hasImpliedEndTag(currentStackItem())) 816 m_openElements.pop(); 817 } 818 819 bool HTMLConstructionSite::inQuirksMode() 820 { 821 return m_inQuirksMode; 822 } 823 824 void HTMLConstructionSite::findFosterSite(HTMLConstructionSiteTask& task) 825 { 826 // When a node is to be foster parented, the last template element with no table element is below it in the stack of open elements is the foster parent element (NOT the template's parent!) 827 HTMLElementStack::ElementRecord* lastTemplateElement = m_openElements.topmost(templateTag.localName()); 828 if (lastTemplateElement && !m_openElements.inTableScope(tableTag)) { 829 task.parent = lastTemplateElement->element(); 830 return; 831 } 832 833 HTMLElementStack::ElementRecord* lastTableElementRecord = m_openElements.topmost(tableTag.localName()); 834 if (lastTableElementRecord) { 835 Element* lastTableElement = lastTableElementRecord->element(); 836 ContainerNode* parent; 837 if (lastTableElementRecord->next()->stackItem()->hasTagName(templateTag)) 838 parent = lastTableElementRecord->next()->element(); 839 else 840 parent = lastTableElement->parentNode(); 841 842 // When parsing HTML fragments, we skip step 4.2 ("Let root be a new html element with no attributes") for efficiency, 843 // and instead use the DocumentFragment as a root node. So we must treat the root node (DocumentFragment) as if it is a html element here. 844 if (parent && (parent->isElementNode() || (m_isParsingFragment && parent == m_openElements.rootNode()))) { 845 task.parent = parent; 846 task.nextChild = lastTableElement; 847 return; 848 } 849 task.parent = lastTableElementRecord->next()->element(); 850 return; 851 } 852 // Fragment case 853 task.parent = m_openElements.rootNode(); // DocumentFragment 854 } 855 856 bool HTMLConstructionSite::shouldFosterParent() const 857 { 858 return m_redirectAttachToFosterParent 859 && currentStackItem()->isElementNode() 860 && currentStackItem()->causesFosterParenting(); 861 } 862 863 void HTMLConstructionSite::fosterParent(PassRefPtrWillBeRawPtr<Node> node) 864 { 865 HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Insert); 866 findFosterSite(task); 867 task.child = node; 868 ASSERT(task.parent); 869 queueTask(task); 870 } 871 872 void HTMLConstructionSite::PendingText::trace(Visitor* visitor) 873 { 874 visitor->trace(parent); 875 visitor->trace(nextChild); 876 } 877 878 879 } 880