1 /* 2 * Copyright (C) 2010 Google, Inc. All Rights Reserved. 3 * Copyright (C) 2011 Apple Inc. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY 15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. OR 18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 #include "config.h" 28 #include "core/html/parser/HTMLConstructionSite.h" 29 30 #include "core/HTMLElementFactory.h" 31 #include "core/HTMLNames.h" 32 #include "core/dom/Comment.h" 33 #include "core/dom/DocumentFragment.h" 34 #include "core/dom/DocumentType.h" 35 #include "core/dom/Element.h" 36 #include "core/dom/ScriptLoader.h" 37 #include "core/dom/Text.h" 38 #include "core/frame/LocalFrame.h" 39 #include "core/html/HTMLFormElement.h" 40 #include "core/html/HTMLHtmlElement.h" 41 #include "core/html/HTMLPlugInElement.h" 42 #include "core/html/HTMLScriptElement.h" 43 #include "core/html/HTMLTemplateElement.h" 44 #include "core/html/parser/AtomicHTMLToken.h" 45 #include "core/html/parser/HTMLParserIdioms.h" 46 #include "core/html/parser/HTMLStackItem.h" 47 #include "core/html/parser/HTMLToken.h" 48 #include "core/loader/FrameLoader.h" 49 #include "core/loader/FrameLoaderClient.h" 50 #include "core/svg/SVGScriptElement.h" 51 #include "platform/NotImplemented.h" 52 #include "platform/text/TextBreakIterator.h" 53 #include <limits> 54 55 namespace blink { 56 57 using namespace HTMLNames; 58 59 static const unsigned maximumHTMLParserDOMTreeDepth = 512; 60 61 static inline void setAttributes(Element* element, AtomicHTMLToken* token, ParserContentPolicy parserContentPolicy) 62 { 63 if (!scriptingContentIsAllowed(parserContentPolicy)) 64 element->stripScriptingAttributes(token->attributes()); 65 element->parserSetAttributes(token->attributes()); 66 } 67 68 static bool hasImpliedEndTag(const HTMLStackItem* item) 69 { 70 return item->hasTagName(ddTag) 71 || item->hasTagName(dtTag) 72 || item->hasTagName(liTag) 73 || item->hasTagName(optionTag) 74 || item->hasTagName(optgroupTag) 75 || item->hasTagName(pTag) 76 || item->hasTagName(rbTag) 77 || item->hasTagName(rpTag) 78 || item->hasTagName(rtTag) 79 || item->hasTagName(rtcTag); 80 } 81 82 static bool shouldUseLengthLimit(const ContainerNode& node) 83 { 84 return !isHTMLScriptElement(node) 85 && !isHTMLStyleElement(node) 86 && !isSVGScriptElement(node); 87 } 88 89 static unsigned textLengthLimitForContainer(const ContainerNode& node) 90 { 91 return shouldUseLengthLimit(node) ? Text::defaultLengthLimit : std::numeric_limits<unsigned>::max(); 92 } 93 94 static inline bool isAllWhitespace(const String& string) 95 { 96 return string.isAllSpecialCharacters<isHTMLSpace<UChar> >(); 97 } 98 99 static inline void insert(HTMLConstructionSiteTask& task) 100 { 101 if (isHTMLTemplateElement(*task.parent)) 102 task.parent = toHTMLTemplateElement(task.parent.get())->content(); 103 104 if (ContainerNode* parent = task.child->parentNode()) 105 parent->parserRemoveChild(*task.child); 106 107 if (task.nextChild) 108 task.parent->parserInsertBefore(task.child.get(), *task.nextChild); 109 else 110 task.parent->parserAppendChild(task.child.get()); 111 } 112 113 static inline void executeInsertTask(HTMLConstructionSiteTask& task) 114 { 115 ASSERT(task.operation == HTMLConstructionSiteTask::Insert); 116 117 insert(task); 118 119 if (task.child->isElementNode()) { 120 Element& child = toElement(*task.child); 121 child.beginParsingChildren(); 122 if (task.selfClosing) 123 child.finishParsingChildren(); 124 } 125 } 126 127 static inline void executeInsertTextTask(HTMLConstructionSiteTask& task) 128 { 129 ASSERT(task.operation == HTMLConstructionSiteTask::InsertText); 130 ASSERT(task.child->isTextNode()); 131 132 // Merge text nodes into previous ones if possible: 133 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#insert-a-character 134 Text* newText = toText(task.child.get()); 135 Node* previousChild = task.nextChild ? task.nextChild->previousSibling() : task.parent->lastChild(); 136 if (previousChild && previousChild->isTextNode()) { 137 Text* previousText = toText(previousChild); 138 unsigned lengthLimit = textLengthLimitForContainer(*task.parent); 139 if (previousText->length() + newText->length() < lengthLimit) { 140 previousText->parserAppendData(newText->data()); 141 return; 142 } 143 } 144 145 insert(task); 146 } 147 148 static inline void executeReparentTask(HTMLConstructionSiteTask& task) 149 { 150 ASSERT(task.operation == HTMLConstructionSiteTask::Reparent); 151 152 if (ContainerNode* parent = task.child->parentNode()) 153 parent->parserRemoveChild(*task.child); 154 155 task.parent->parserAppendChild(task.child); 156 } 157 158 static inline void executeInsertAlreadyParsedChildTask(HTMLConstructionSiteTask& task) 159 { 160 ASSERT(task.operation == HTMLConstructionSiteTask::InsertAlreadyParsedChild); 161 162 insert(task); 163 } 164 165 static inline void executeTakeAllChildrenTask(HTMLConstructionSiteTask& task) 166 { 167 ASSERT(task.operation == HTMLConstructionSiteTask::TakeAllChildren); 168 169 task.parent->parserTakeAllChildrenFrom(*task.oldParent()); 170 } 171 172 void HTMLConstructionSite::executeTask(HTMLConstructionSiteTask& task) 173 { 174 ASSERT(m_taskQueue.isEmpty()); 175 if (task.operation == HTMLConstructionSiteTask::Insert) 176 return executeInsertTask(task); 177 178 if (task.operation == HTMLConstructionSiteTask::InsertText) 179 return executeInsertTextTask(task); 180 181 // All the cases below this point are only used by the adoption agency. 182 183 if (task.operation == HTMLConstructionSiteTask::InsertAlreadyParsedChild) 184 return executeInsertAlreadyParsedChildTask(task); 185 186 if (task.operation == HTMLConstructionSiteTask::Reparent) 187 return executeReparentTask(task); 188 189 if (task.operation == HTMLConstructionSiteTask::TakeAllChildren) 190 return executeTakeAllChildrenTask(task); 191 192 ASSERT_NOT_REACHED(); 193 } 194 195 // This is only needed for TextDocuments where we might have text nodes 196 // approaching the default length limit (~64k) and we don't want to 197 // break a text node in the middle of a combining character. 198 static unsigned findBreakIndexBetween(const StringBuilder& string, unsigned currentPosition, unsigned proposedBreakIndex) 199 { 200 ASSERT(currentPosition < proposedBreakIndex); 201 ASSERT(proposedBreakIndex <= string.length()); 202 // The end of the string is always a valid break. 203 if (proposedBreakIndex == string.length()) 204 return proposedBreakIndex; 205 206 // Latin-1 does not have breakable boundaries. If we ever moved to a differnet 8-bit encoding this could be wrong. 207 if (string.is8Bit()) 208 return proposedBreakIndex; 209 210 const UChar* breakSearchCharacters = string.characters16() + currentPosition; 211 // We need at least two characters look-ahead to account for UTF-16 surrogates, but can't search off the end of the buffer! 212 unsigned breakSearchLength = std::min(proposedBreakIndex - currentPosition + 2, string.length() - currentPosition); 213 NonSharedCharacterBreakIterator it(breakSearchCharacters, breakSearchLength); 214 215 if (it.isBreak(proposedBreakIndex - currentPosition)) 216 return proposedBreakIndex; 217 218 int adjustedBreakIndexInSubstring = it.preceding(proposedBreakIndex - currentPosition); 219 if (adjustedBreakIndexInSubstring > 0) 220 return currentPosition + adjustedBreakIndexInSubstring; 221 // We failed to find a breakable point, let the caller figure out what to do. 222 return 0; 223 } 224 225 static String atomizeIfAllWhitespace(const String& string, WhitespaceMode whitespaceMode) 226 { 227 // Strings composed entirely of whitespace are likely to be repeated. 228 // Turn them into AtomicString so we share a single string for each. 229 if (whitespaceMode == AllWhitespace || (whitespaceMode == WhitespaceUnknown && isAllWhitespace(string))) 230 return AtomicString(string).string(); 231 return string; 232 } 233 234 void HTMLConstructionSite::flushPendingText(FlushMode mode) 235 { 236 if (m_pendingText.isEmpty()) 237 return; 238 239 if (mode == FlushIfAtTextLimit 240 && !shouldUseLengthLimit(*m_pendingText.parent)) 241 return; 242 243 PendingText pendingText; 244 // Hold onto the current pending text on the stack so that queueTask doesn't recurse infinitely. 245 m_pendingText.swap(pendingText); 246 ASSERT(m_pendingText.isEmpty()); 247 248 // Splitting text nodes into smaller chunks contradicts HTML5 spec, but is necessary 249 // for performance, see: https://bugs.webkit.org/show_bug.cgi?id=55898 250 unsigned lengthLimit = textLengthLimitForContainer(*pendingText.parent); 251 252 unsigned currentPosition = 0; 253 const StringBuilder& string = pendingText.stringBuilder; 254 while (currentPosition < string.length()) { 255 unsigned proposedBreakIndex = std::min(currentPosition + lengthLimit, string.length()); 256 unsigned breakIndex = findBreakIndexBetween(string, currentPosition, proposedBreakIndex); 257 ASSERT(breakIndex <= string.length()); 258 String substring = string.substring(currentPosition, breakIndex - currentPosition); 259 substring = atomizeIfAllWhitespace(substring, pendingText.whitespaceMode); 260 261 HTMLConstructionSiteTask task(HTMLConstructionSiteTask::InsertText); 262 task.parent = pendingText.parent; 263 task.nextChild = pendingText.nextChild; 264 task.child = Text::create(task.parent->document(), substring); 265 queueTask(task); 266 267 ASSERT(breakIndex > currentPosition); 268 ASSERT(breakIndex - currentPosition == substring.length()); 269 ASSERT(toText(task.child.get())->length() == substring.length()); 270 currentPosition = breakIndex; 271 } 272 } 273 274 void HTMLConstructionSite::queueTask(const HTMLConstructionSiteTask& task) 275 { 276 flushPendingText(FlushAlways); 277 ASSERT(m_pendingText.isEmpty()); 278 m_taskQueue.append(task); 279 } 280 281 void HTMLConstructionSite::attachLater(ContainerNode* parent, PassRefPtrWillBeRawPtr<Node> prpChild, bool selfClosing) 282 { 283 ASSERT(scriptingContentIsAllowed(m_parserContentPolicy) || !prpChild.get()->isElementNode() || !toScriptLoaderIfPossible(toElement(prpChild.get()))); 284 ASSERT(pluginContentIsAllowed(m_parserContentPolicy) || !isHTMLPlugInElement(prpChild)); 285 286 HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Insert); 287 task.parent = parent; 288 task.child = prpChild; 289 task.selfClosing = selfClosing; 290 291 if (shouldFosterParent()) { 292 fosterParent(task.child); 293 return; 294 } 295 296 // Add as a sibling of the parent if we have reached the maximum depth allowed. 297 if (m_openElements.stackDepth() > maximumHTMLParserDOMTreeDepth && task.parent->parentNode()) 298 task.parent = task.parent->parentNode(); 299 300 ASSERT(task.parent); 301 queueTask(task); 302 } 303 304 void HTMLConstructionSite::executeQueuedTasks() 305 { 306 // This has no affect on pendingText, and we may have pendingText 307 // remaining after executing all other queued tasks. 308 const size_t size = m_taskQueue.size(); 309 if (!size) 310 return; 311 312 // Copy the task queue into a local variable in case executeTask 313 // re-enters the parser. 314 TaskQueue queue; 315 queue.swap(m_taskQueue); 316 317 for (size_t i = 0; i < size; ++i) 318 executeTask(queue[i]); 319 320 // We might be detached now. 321 } 322 323 HTMLConstructionSite::HTMLConstructionSite(Document* document, ParserContentPolicy parserContentPolicy) 324 : m_document(document) 325 , m_attachmentRoot(document) 326 , m_parserContentPolicy(parserContentPolicy) 327 , m_isParsingFragment(false) 328 , m_redirectAttachToFosterParent(false) 329 , m_inQuirksMode(document->inQuirksMode()) 330 { 331 ASSERT(m_document->isHTMLDocument() || m_document->isXHTMLDocument()); 332 } 333 334 HTMLConstructionSite::HTMLConstructionSite(DocumentFragment* fragment, ParserContentPolicy parserContentPolicy) 335 : m_document(&fragment->document()) 336 , m_attachmentRoot(fragment) 337 , m_parserContentPolicy(parserContentPolicy) 338 , m_isParsingFragment(true) 339 , m_redirectAttachToFosterParent(false) 340 , m_inQuirksMode(fragment->document().inQuirksMode()) 341 { 342 ASSERT(m_document->isHTMLDocument() || m_document->isXHTMLDocument()); 343 } 344 345 HTMLConstructionSite::~HTMLConstructionSite() 346 { 347 // Depending on why we're being destroyed it might be OK 348 // to forget queued tasks, but currently we don't expect to. 349 ASSERT(m_taskQueue.isEmpty()); 350 // Currently we assume that text will never be the last token in the 351 // document and that we'll always queue some additional task to cause it to flush. 352 ASSERT(m_pendingText.isEmpty()); 353 } 354 355 void HTMLConstructionSite::trace(Visitor* visitor) 356 { 357 visitor->trace(m_document); 358 visitor->trace(m_attachmentRoot); 359 visitor->trace(m_head); 360 visitor->trace(m_form); 361 visitor->trace(m_openElements); 362 visitor->trace(m_activeFormattingElements); 363 visitor->trace(m_taskQueue); 364 visitor->trace(m_pendingText); 365 } 366 367 void HTMLConstructionSite::detach() 368 { 369 // FIXME: We'd like to ASSERT here that we're canceling and not just discarding 370 // text that really should have made it into the DOM earlier, but there 371 // doesn't seem to be a nice way to do that. 372 m_pendingText.discard(); 373 m_document = nullptr; 374 m_attachmentRoot = nullptr; 375 } 376 377 void HTMLConstructionSite::setForm(HTMLFormElement* form) 378 { 379 // This method should only be needed for HTMLTreeBuilder in the fragment case. 380 ASSERT(!m_form); 381 m_form = form; 382 } 383 384 PassRefPtrWillBeRawPtr<HTMLFormElement> HTMLConstructionSite::takeForm() 385 { 386 return m_form.release(); 387 } 388 389 void HTMLConstructionSite::dispatchDocumentElementAvailableIfNeeded() 390 { 391 ASSERT(m_document); 392 if (m_document->frame() && !m_isParsingFragment) 393 m_document->frame()->loader().dispatchDocumentElementAvailable(); 394 } 395 396 void HTMLConstructionSite::insertHTMLHtmlStartTagBeforeHTML(AtomicHTMLToken* token) 397 { 398 ASSERT(m_document); 399 RefPtrWillBeRawPtr<HTMLHtmlElement> element = HTMLHtmlElement::create(*m_document); 400 setAttributes(element.get(), token, m_parserContentPolicy); 401 attachLater(m_attachmentRoot, element); 402 m_openElements.pushHTMLHtmlElement(HTMLStackItem::create(element, token)); 403 404 executeQueuedTasks(); 405 element->insertedByParser(); 406 dispatchDocumentElementAvailableIfNeeded(); 407 } 408 409 void HTMLConstructionSite::mergeAttributesFromTokenIntoElement(AtomicHTMLToken* token, Element* element) 410 { 411 if (token->attributes().isEmpty()) 412 return; 413 414 for (unsigned i = 0; i < token->attributes().size(); ++i) { 415 const Attribute& tokenAttribute = token->attributes().at(i); 416 if (element->attributesWithoutUpdate().findIndex(tokenAttribute.name()) == kNotFound) 417 element->setAttribute(tokenAttribute.name(), tokenAttribute.value()); 418 } 419 } 420 421 void HTMLConstructionSite::insertHTMLHtmlStartTagInBody(AtomicHTMLToken* token) 422 { 423 // Fragments do not have a root HTML element, so any additional HTML elements 424 // encountered during fragment parsing should be ignored. 425 if (m_isParsingFragment) 426 return; 427 428 mergeAttributesFromTokenIntoElement(token, m_openElements.htmlElement()); 429 } 430 431 void HTMLConstructionSite::insertHTMLBodyStartTagInBody(AtomicHTMLToken* token) 432 { 433 mergeAttributesFromTokenIntoElement(token, m_openElements.bodyElement()); 434 } 435 436 void HTMLConstructionSite::setDefaultCompatibilityMode() 437 { 438 if (m_isParsingFragment) 439 return; 440 setCompatibilityMode(Document::QuirksMode); 441 } 442 443 void HTMLConstructionSite::setCompatibilityMode(Document::CompatibilityMode mode) 444 { 445 m_inQuirksMode = (mode == Document::QuirksMode); 446 m_document->setCompatibilityMode(mode); 447 } 448 449 void HTMLConstructionSite::setCompatibilityModeFromDoctype(const String& name, const String& publicId, const String& systemId) 450 { 451 // There are three possible compatibility modes: 452 // Quirks - quirks mode emulates WinIE and NS4. CSS parsing is also relaxed in this mode, e.g., unit types can 453 // be omitted from numbers. 454 // Limited Quirks - This mode is identical to no-quirks mode except for its treatment of line-height in the inline box model. 455 // No Quirks - no quirks apply. Web pages will obey the specifications to the letter. 456 457 // Check for Quirks Mode. 458 if (name != "html" 459 || publicId.startsWith("+//Silmaril//dtd html Pro v0r11 19970101//", false) 460 || publicId.startsWith("-//AdvaSoft Ltd//DTD HTML 3.0 asWedit + extensions//", false) 461 || publicId.startsWith("-//AS//DTD HTML 3.0 asWedit + extensions//", false) 462 || publicId.startsWith("-//IETF//DTD HTML 2.0 Level 1//", false) 463 || publicId.startsWith("-//IETF//DTD HTML 2.0 Level 2//", false) 464 || publicId.startsWith("-//IETF//DTD HTML 2.0 Strict Level 1//", false) 465 || publicId.startsWith("-//IETF//DTD HTML 2.0 Strict Level 2//", false) 466 || publicId.startsWith("-//IETF//DTD HTML 2.0 Strict//", false) 467 || publicId.startsWith("-//IETF//DTD HTML 2.0//", false) 468 || publicId.startsWith("-//IETF//DTD HTML 2.1E//", false) 469 || publicId.startsWith("-//IETF//DTD HTML 3.0//", false) 470 || publicId.startsWith("-//IETF//DTD HTML 3.2 Final//", false) 471 || publicId.startsWith("-//IETF//DTD HTML 3.2//", false) 472 || publicId.startsWith("-//IETF//DTD HTML 3//", false) 473 || publicId.startsWith("-//IETF//DTD HTML Level 0//", false) 474 || publicId.startsWith("-//IETF//DTD HTML Level 1//", false) 475 || publicId.startsWith("-//IETF//DTD HTML Level 2//", false) 476 || publicId.startsWith("-//IETF//DTD HTML Level 3//", false) 477 || publicId.startsWith("-//IETF//DTD HTML Strict Level 0//", false) 478 || publicId.startsWith("-//IETF//DTD HTML Strict Level 1//", false) 479 || publicId.startsWith("-//IETF//DTD HTML Strict Level 2//", false) 480 || publicId.startsWith("-//IETF//DTD HTML Strict Level 3//", false) 481 || publicId.startsWith("-//IETF//DTD HTML Strict//", false) 482 || publicId.startsWith("-//IETF//DTD HTML//", false) 483 || publicId.startsWith("-//Metrius//DTD Metrius Presentational//", false) 484 || publicId.startsWith("-//Microsoft//DTD Internet Explorer 2.0 HTML Strict//", false) 485 || publicId.startsWith("-//Microsoft//DTD Internet Explorer 2.0 HTML//", false) 486 || publicId.startsWith("-//Microsoft//DTD Internet Explorer 2.0 Tables//", false) 487 || publicId.startsWith("-//Microsoft//DTD Internet Explorer 3.0 HTML Strict//", false) 488 || publicId.startsWith("-//Microsoft//DTD Internet Explorer 3.0 HTML//", false) 489 || publicId.startsWith("-//Microsoft//DTD Internet Explorer 3.0 Tables//", false) 490 || publicId.startsWith("-//Netscape Comm. Corp.//DTD HTML//", false) 491 || publicId.startsWith("-//Netscape Comm. Corp.//DTD Strict HTML//", false) 492 || publicId.startsWith("-//O'Reilly and Associates//DTD HTML 2.0//", false) 493 || publicId.startsWith("-//O'Reilly and Associates//DTD HTML Extended 1.0//", false) 494 || publicId.startsWith("-//O'Reilly and Associates//DTD HTML Extended Relaxed 1.0//", false) 495 || publicId.startsWith("-//SoftQuad Software//DTD HoTMetaL PRO 6.0::19990601::extensions to HTML 4.0//", false) 496 || publicId.startsWith("-//SoftQuad//DTD HoTMetaL PRO 4.0::19971010::extensions to HTML 4.0//", false) 497 || publicId.startsWith("-//Spyglass//DTD HTML 2.0 Extended//", false) 498 || publicId.startsWith("-//SQ//DTD HTML 2.0 HoTMetaL + extensions//", false) 499 || publicId.startsWith("-//Sun Microsystems Corp.//DTD HotJava HTML//", false) 500 || publicId.startsWith("-//Sun Microsystems Corp.//DTD HotJava Strict HTML//", false) 501 || publicId.startsWith("-//W3C//DTD HTML 3 1995-03-24//", false) 502 || publicId.startsWith("-//W3C//DTD HTML 3.2 Draft//", false) 503 || publicId.startsWith("-//W3C//DTD HTML 3.2 Final//", false) 504 || publicId.startsWith("-//W3C//DTD HTML 3.2//", false) 505 || publicId.startsWith("-//W3C//DTD HTML 3.2S Draft//", false) 506 || publicId.startsWith("-//W3C//DTD HTML 4.0 Frameset//", false) 507 || publicId.startsWith("-//W3C//DTD HTML 4.0 Transitional//", false) 508 || publicId.startsWith("-//W3C//DTD HTML Experimental 19960712//", false) 509 || publicId.startsWith("-//W3C//DTD HTML Experimental 970421//", false) 510 || publicId.startsWith("-//W3C//DTD W3 HTML//", false) 511 || publicId.startsWith("-//W3O//DTD W3 HTML 3.0//", false) 512 || equalIgnoringCase(publicId, "-//W3O//DTD W3 HTML Strict 3.0//EN//") 513 || publicId.startsWith("-//WebTechs//DTD Mozilla HTML 2.0//", false) 514 || publicId.startsWith("-//WebTechs//DTD Mozilla HTML//", false) 515 || equalIgnoringCase(publicId, "-/W3C/DTD HTML 4.0 Transitional/EN") 516 || equalIgnoringCase(publicId, "HTML") 517 || equalIgnoringCase(systemId, "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") 518 || (systemId.isEmpty() && publicId.startsWith("-//W3C//DTD HTML 4.01 Frameset//", false)) 519 || (systemId.isEmpty() && publicId.startsWith("-//W3C//DTD HTML 4.01 Transitional//", false))) { 520 setCompatibilityMode(Document::QuirksMode); 521 return; 522 } 523 524 // Check for Limited Quirks Mode. 525 if (publicId.startsWith("-//W3C//DTD XHTML 1.0 Frameset//", false) 526 || publicId.startsWith("-//W3C//DTD XHTML 1.0 Transitional//", false) 527 || (!systemId.isEmpty() && publicId.startsWith("-//W3C//DTD HTML 4.01 Frameset//", false)) 528 || (!systemId.isEmpty() && publicId.startsWith("-//W3C//DTD HTML 4.01 Transitional//", false))) { 529 setCompatibilityMode(Document::LimitedQuirksMode); 530 return; 531 } 532 533 // Otherwise we are No Quirks Mode. 534 setCompatibilityMode(Document::NoQuirksMode); 535 } 536 537 void HTMLConstructionSite::processEndOfFile() 538 { 539 ASSERT(currentNode()); 540 flush(FlushAlways); 541 openElements()->popAll(); 542 } 543 544 void HTMLConstructionSite::finishedParsing() 545 { 546 // We shouldn't have any queued tasks but we might have pending text which we need to promote to tasks and execute. 547 ASSERT(m_taskQueue.isEmpty()); 548 flush(FlushAlways); 549 m_document->finishedParsing(); 550 } 551 552 void HTMLConstructionSite::insertDoctype(AtomicHTMLToken* token) 553 { 554 ASSERT(token->type() == HTMLToken::DOCTYPE); 555 556 const String& publicId = StringImpl::create8BitIfPossible(token->publicIdentifier()); 557 const String& systemId = StringImpl::create8BitIfPossible(token->systemIdentifier()); 558 RefPtrWillBeRawPtr<DocumentType> doctype = DocumentType::create(m_document, token->name(), publicId, systemId); 559 attachLater(m_attachmentRoot, doctype.release()); 560 561 // DOCTYPE nodes are only processed when parsing fragments w/o contextElements, which 562 // never occurs. However, if we ever chose to support such, this code is subtly wrong, 563 // because context-less fragments can determine their own quirks mode, and thus change 564 // parsing rules (like <p> inside <table>). For now we ASSERT that we never hit this code 565 // in a fragment, as changing the owning document's compatibility mode would be wrong. 566 ASSERT(!m_isParsingFragment); 567 if (m_isParsingFragment) 568 return; 569 570 if (token->forceQuirks()) 571 setCompatibilityMode(Document::QuirksMode); 572 else { 573 setCompatibilityModeFromDoctype(token->name(), publicId, systemId); 574 } 575 } 576 577 void HTMLConstructionSite::insertComment(AtomicHTMLToken* token) 578 { 579 ASSERT(token->type() == HTMLToken::Comment); 580 attachLater(currentNode(), Comment::create(ownerDocumentForCurrentNode(), token->comment())); 581 } 582 583 void HTMLConstructionSite::insertCommentOnDocument(AtomicHTMLToken* token) 584 { 585 ASSERT(token->type() == HTMLToken::Comment); 586 ASSERT(m_document); 587 attachLater(m_attachmentRoot, Comment::create(*m_document, token->comment())); 588 } 589 590 void HTMLConstructionSite::insertCommentOnHTMLHtmlElement(AtomicHTMLToken* token) 591 { 592 ASSERT(token->type() == HTMLToken::Comment); 593 ContainerNode* parent = m_openElements.rootNode(); 594 attachLater(parent, Comment::create(parent->document(), token->comment())); 595 } 596 597 void HTMLConstructionSite::insertHTMLHeadElement(AtomicHTMLToken* token) 598 { 599 ASSERT(!shouldFosterParent()); 600 m_head = HTMLStackItem::create(createHTMLElement(token), token); 601 attachLater(currentNode(), m_head->element()); 602 m_openElements.pushHTMLHeadElement(m_head); 603 } 604 605 void HTMLConstructionSite::insertHTMLBodyElement(AtomicHTMLToken* token) 606 { 607 ASSERT(!shouldFosterParent()); 608 RefPtrWillBeRawPtr<HTMLElement> body = createHTMLElement(token); 609 attachLater(currentNode(), body); 610 m_openElements.pushHTMLBodyElement(HTMLStackItem::create(body.release(), token)); 611 if (LocalFrame* frame = m_document->frame()) 612 frame->loader().client()->dispatchWillInsertBody(); 613 } 614 615 void HTMLConstructionSite::insertHTMLFormElement(AtomicHTMLToken* token, bool isDemoted) 616 { 617 RefPtrWillBeRawPtr<HTMLElement> element = createHTMLElement(token); 618 ASSERT(isHTMLFormElement(element)); 619 m_form = static_pointer_cast<HTMLFormElement>(element.release()); 620 m_form->setDemoted(isDemoted); 621 attachLater(currentNode(), m_form.get()); 622 m_openElements.push(HTMLStackItem::create(m_form.get(), token)); 623 } 624 625 void HTMLConstructionSite::insertHTMLElement(AtomicHTMLToken* token) 626 { 627 RefPtrWillBeRawPtr<HTMLElement> element = createHTMLElement(token); 628 attachLater(currentNode(), element); 629 m_openElements.push(HTMLStackItem::create(element.release(), token)); 630 } 631 632 void HTMLConstructionSite::insertSelfClosingHTMLElement(AtomicHTMLToken* token) 633 { 634 ASSERT(token->type() == HTMLToken::StartTag); 635 // Normally HTMLElementStack is responsible for calling finishParsingChildren, 636 // but self-closing elements are never in the element stack so the stack 637 // doesn't get a chance to tell them that we're done parsing their children. 638 attachLater(currentNode(), createHTMLElement(token), true); 639 // FIXME: Do we want to acknowledge the token's self-closing flag? 640 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#acknowledge-self-closing-flag 641 } 642 643 void HTMLConstructionSite::insertFormattingElement(AtomicHTMLToken* token) 644 { 645 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#the-stack-of-open-elements 646 // Possible active formatting elements include: 647 // a, b, big, code, em, font, i, nobr, s, small, strike, strong, tt, and u. 648 insertHTMLElement(token); 649 m_activeFormattingElements.append(currentElementRecord()->stackItem()); 650 } 651 652 void HTMLConstructionSite::insertScriptElement(AtomicHTMLToken* token) 653 { 654 // http://www.whatwg.org/specs/web-apps/current-work/multipage/scripting-1.html#already-started 655 // http://html5.org/specs/dom-parsing.html#dom-range-createcontextualfragment 656 // For createContextualFragment, the specifications say to mark it parser-inserted and already-started and later unmark them. 657 // However, we short circuit that logic to avoid the subtree traversal to find script elements since scripts can never see 658 // those flags or effects thereof. 659 const bool parserInserted = m_parserContentPolicy != AllowScriptingContentAndDoNotMarkAlreadyStarted; 660 const bool alreadyStarted = m_isParsingFragment && parserInserted; 661 RefPtrWillBeRawPtr<HTMLScriptElement> element = HTMLScriptElement::create(ownerDocumentForCurrentNode(), parserInserted, alreadyStarted); 662 setAttributes(element.get(), token, m_parserContentPolicy); 663 if (scriptingContentIsAllowed(m_parserContentPolicy)) 664 attachLater(currentNode(), element); 665 m_openElements.push(HTMLStackItem::create(element.release(), token)); 666 } 667 668 void HTMLConstructionSite::insertForeignElement(AtomicHTMLToken* token, const AtomicString& namespaceURI) 669 { 670 ASSERT(token->type() == HTMLToken::StartTag); 671 notImplemented(); // parseError when xmlns or xmlns:xlink are wrong. 672 673 RefPtrWillBeRawPtr<Element> element = createElement(token, namespaceURI); 674 if (scriptingContentIsAllowed(m_parserContentPolicy) || !toScriptLoaderIfPossible(element.get())) 675 attachLater(currentNode(), element, token->selfClosing()); 676 if (!token->selfClosing()) 677 m_openElements.push(HTMLStackItem::create(element.release(), token, namespaceURI)); 678 } 679 680 void HTMLConstructionSite::insertTextNode(const String& string, WhitespaceMode whitespaceMode) 681 { 682 HTMLConstructionSiteTask dummyTask(HTMLConstructionSiteTask::Insert); 683 dummyTask.parent = currentNode(); 684 685 if (shouldFosterParent()) 686 findFosterSite(dummyTask); 687 688 // FIXME: This probably doesn't need to be done both here and in insert(Task). 689 if (isHTMLTemplateElement(*dummyTask.parent)) 690 dummyTask.parent = toHTMLTemplateElement(dummyTask.parent.get())->content(); 691 692 // Unclear when parent != case occurs. Somehow we insert text into two separate nodes while processing the same Token. 693 // The nextChild != dummy.nextChild case occurs whenever foster parenting happened and we hit a new text node "<table>a</table>b" 694 // In either case we have to flush the pending text into the task queue before making more. 695 if (!m_pendingText.isEmpty() && (m_pendingText.parent != dummyTask.parent || m_pendingText.nextChild != dummyTask.nextChild)) 696 flushPendingText(FlushAlways); 697 m_pendingText.append(dummyTask.parent, dummyTask.nextChild, string, whitespaceMode); 698 } 699 700 void HTMLConstructionSite::reparent(HTMLElementStack::ElementRecord* newParent, HTMLElementStack::ElementRecord* child) 701 { 702 HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Reparent); 703 task.parent = newParent->node(); 704 task.child = child->node(); 705 queueTask(task); 706 } 707 708 void HTMLConstructionSite::reparent(HTMLElementStack::ElementRecord* newParent, HTMLStackItem* child) 709 { 710 HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Reparent); 711 task.parent = newParent->node(); 712 task.child = child->node(); 713 queueTask(task); 714 } 715 716 void HTMLConstructionSite::insertAlreadyParsedChild(HTMLStackItem* newParent, HTMLElementStack::ElementRecord* child) 717 { 718 if (newParent->causesFosterParenting()) { 719 fosterParent(child->node()); 720 return; 721 } 722 723 HTMLConstructionSiteTask task(HTMLConstructionSiteTask::InsertAlreadyParsedChild); 724 task.parent = newParent->node(); 725 task.child = child->node(); 726 queueTask(task); 727 } 728 729 void HTMLConstructionSite::takeAllChildren(HTMLStackItem* newParent, HTMLElementStack::ElementRecord* oldParent) 730 { 731 HTMLConstructionSiteTask task(HTMLConstructionSiteTask::TakeAllChildren); 732 task.parent = newParent->node(); 733 task.child = oldParent->node(); 734 queueTask(task); 735 } 736 737 PassRefPtrWillBeRawPtr<Element> HTMLConstructionSite::createElement(AtomicHTMLToken* token, const AtomicString& namespaceURI) 738 { 739 QualifiedName tagName(nullAtom, token->name(), namespaceURI); 740 RefPtrWillBeRawPtr<Element> element = ownerDocumentForCurrentNode().createElement(tagName, true); 741 setAttributes(element.get(), token, m_parserContentPolicy); 742 return element.release(); 743 } 744 745 inline Document& HTMLConstructionSite::ownerDocumentForCurrentNode() 746 { 747 if (isHTMLTemplateElement(*currentNode())) 748 return toHTMLTemplateElement(currentElement())->content()->document(); 749 return currentNode()->document(); 750 } 751 752 PassRefPtrWillBeRawPtr<HTMLElement> HTMLConstructionSite::createHTMLElement(AtomicHTMLToken* token) 753 { 754 Document& document = ownerDocumentForCurrentNode(); 755 // Only associate the element with the current form if we're creating the new element 756 // in a document with a browsing context (rather than in <template> contents). 757 HTMLFormElement* form = document.frame() ? m_form.get() : 0; 758 // FIXME: This can't use HTMLConstructionSite::createElement because we 759 // have to pass the current form element. We should rework form association 760 // to occur after construction to allow better code sharing here. 761 RefPtrWillBeRawPtr<HTMLElement> element = HTMLElementFactory::createHTMLElement(token->name(), document, form, true); 762 setAttributes(element.get(), token, m_parserContentPolicy); 763 return element.release(); 764 } 765 766 PassRefPtrWillBeRawPtr<HTMLStackItem> HTMLConstructionSite::createElementFromSavedToken(HTMLStackItem* item) 767 { 768 RefPtrWillBeRawPtr<Element> element; 769 // NOTE: Moving from item -> token -> item copies the Attribute vector twice! 770 AtomicHTMLToken fakeToken(HTMLToken::StartTag, item->localName(), item->attributes()); 771 if (item->namespaceURI() == HTMLNames::xhtmlNamespaceURI) 772 element = createHTMLElement(&fakeToken); 773 else 774 element = createElement(&fakeToken, item->namespaceURI()); 775 return HTMLStackItem::create(element.release(), &fakeToken, item->namespaceURI()); 776 } 777 778 bool HTMLConstructionSite::indexOfFirstUnopenFormattingElement(unsigned& firstUnopenElementIndex) const 779 { 780 if (m_activeFormattingElements.isEmpty()) 781 return false; 782 unsigned index = m_activeFormattingElements.size(); 783 do { 784 --index; 785 const HTMLFormattingElementList::Entry& entry = m_activeFormattingElements.at(index); 786 if (entry.isMarker() || m_openElements.contains(entry.element())) { 787 firstUnopenElementIndex = index + 1; 788 return firstUnopenElementIndex < m_activeFormattingElements.size(); 789 } 790 } while (index); 791 firstUnopenElementIndex = index; 792 return true; 793 } 794 795 void HTMLConstructionSite::reconstructTheActiveFormattingElements() 796 { 797 unsigned firstUnopenElementIndex; 798 if (!indexOfFirstUnopenFormattingElement(firstUnopenElementIndex)) 799 return; 800 801 unsigned unopenEntryIndex = firstUnopenElementIndex; 802 ASSERT(unopenEntryIndex < m_activeFormattingElements.size()); 803 for (; unopenEntryIndex < m_activeFormattingElements.size(); ++unopenEntryIndex) { 804 HTMLFormattingElementList::Entry& unopenedEntry = m_activeFormattingElements.at(unopenEntryIndex); 805 RefPtrWillBeRawPtr<HTMLStackItem> reconstructed = createElementFromSavedToken(unopenedEntry.stackItem().get()); 806 attachLater(currentNode(), reconstructed->node()); 807 m_openElements.push(reconstructed); 808 unopenedEntry.replaceElement(reconstructed.release()); 809 } 810 } 811 812 void HTMLConstructionSite::generateImpliedEndTagsWithExclusion(const AtomicString& tagName) 813 { 814 while (hasImpliedEndTag(currentStackItem()) && !currentStackItem()->matchesHTMLTag(tagName)) 815 m_openElements.pop(); 816 } 817 818 void HTMLConstructionSite::generateImpliedEndTags() 819 { 820 while (hasImpliedEndTag(currentStackItem())) 821 m_openElements.pop(); 822 } 823 824 bool HTMLConstructionSite::inQuirksMode() 825 { 826 return m_inQuirksMode; 827 } 828 829 void HTMLConstructionSite::findFosterSite(HTMLConstructionSiteTask& task) 830 { 831 // When a node is to be foster parented, the last template element with no table element is below it in the stack of open elements is the foster parent element (NOT the template's parent!) 832 HTMLElementStack::ElementRecord* lastTemplateElement = m_openElements.topmost(templateTag.localName()); 833 if (lastTemplateElement && !m_openElements.inTableScope(tableTag)) { 834 task.parent = lastTemplateElement->element(); 835 return; 836 } 837 838 HTMLElementStack::ElementRecord* lastTableElementRecord = m_openElements.topmost(tableTag.localName()); 839 if (lastTableElementRecord) { 840 Element* lastTableElement = lastTableElementRecord->element(); 841 ContainerNode* parent; 842 if (lastTableElementRecord->next()->stackItem()->hasTagName(templateTag)) 843 parent = lastTableElementRecord->next()->element(); 844 else 845 parent = lastTableElement->parentNode(); 846 847 // When parsing HTML fragments, we skip step 4.2 ("Let root be a new html element with no attributes") for efficiency, 848 // and instead use the DocumentFragment as a root node. So we must treat the root node (DocumentFragment) as if it is a html element here. 849 if (parent && (parent->isElementNode() || (m_isParsingFragment && parent == m_openElements.rootNode()))) { 850 task.parent = parent; 851 task.nextChild = lastTableElement; 852 return; 853 } 854 task.parent = lastTableElementRecord->next()->element(); 855 return; 856 } 857 // Fragment case 858 task.parent = m_openElements.rootNode(); // DocumentFragment 859 } 860 861 bool HTMLConstructionSite::shouldFosterParent() const 862 { 863 return m_redirectAttachToFosterParent 864 && currentStackItem()->isElementNode() 865 && currentStackItem()->causesFosterParenting(); 866 } 867 868 void HTMLConstructionSite::fosterParent(PassRefPtrWillBeRawPtr<Node> node) 869 { 870 HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Insert); 871 findFosterSite(task); 872 task.child = node; 873 ASSERT(task.parent); 874 queueTask(task); 875 } 876 877 void HTMLConstructionSite::PendingText::trace(Visitor* visitor) 878 { 879 visitor->trace(parent); 880 visitor->trace(nextChild); 881 } 882 883 884 } 885