1 /* 2 * Copyright (C) 2010 Google, Inc. All Rights Reserved. 3 * Copyright (C) 2011 Apple Inc. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY 15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. OR 18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 #include "config.h" 28 #include "core/html/parser/HTMLTreeBuilder.h" 29 30 #include "HTMLElementFactory.h" 31 #include "HTMLNames.h" 32 #include "core/dom/Comment.h" 33 #include "core/dom/DocumentFragment.h" 34 #include "core/dom/DocumentType.h" 35 #include "core/dom/Element.h" 36 #include "core/dom/ScriptLoader.h" 37 #include "core/dom/Text.h" 38 #include "core/html/HTMLFormElement.h" 39 #include "core/html/HTMLHtmlElement.h" 40 #include "core/html/HTMLOptGroupElement.h" 41 #include "core/html/HTMLScriptElement.h" 42 #include "core/html/HTMLTemplateElement.h" 43 #include "core/html/parser/AtomicHTMLToken.h" 44 #include "core/html/parser/HTMLParserIdioms.h" 45 #include "core/html/parser/HTMLStackItem.h" 46 #include "core/html/parser/HTMLToken.h" 47 #include "core/loader/FrameLoader.h" 48 #include "core/loader/FrameLoaderClient.h" 49 #include "core/page/Frame.h" 50 #include "core/platform/NotImplemented.h" 51 #include <limits> 52 53 namespace WebCore { 54 55 using namespace HTMLNames; 56 57 static const unsigned maximumHTMLParserDOMTreeDepth = 512; 58 59 static inline void setAttributes(Element* element, AtomicHTMLToken* token, ParserContentPolicy parserContentPolicy) 60 { 61 if (!scriptingContentIsAllowed(parserContentPolicy)) 62 element->stripScriptingAttributes(token->attributes()); 63 element->parserSetAttributes(token->attributes()); 64 } 65 66 static bool hasImpliedEndTag(const HTMLStackItem* item) 67 { 68 return item->hasTagName(ddTag) 69 || item->hasTagName(dtTag) 70 || item->hasTagName(liTag) 71 || item->hasTagName(optionTag) 72 || isHTMLOptGroupElement(item->node()) 73 || item->hasTagName(pTag) 74 || item->hasTagName(rpTag) 75 || item->hasTagName(rtTag); 76 } 77 78 static bool shouldUseLengthLimit(const ContainerNode* node) 79 { 80 return !node->hasTagName(scriptTag) 81 && !node->hasTagName(styleTag) 82 && !node->hasTagName(SVGNames::scriptTag); 83 } 84 85 static inline bool isAllWhitespace(const String& string) 86 { 87 return string.isAllSpecialCharacters<isHTMLSpace>(); 88 } 89 90 // The |lazyAttach| parameter to this function exists for historical reasons. 91 // There used to be two code paths, one that used lazyAttach and one that 92 // didn't. We should make the two code paths consistent and either use 93 // lazyAttach or non-lazyAttach, but we wanted to make that change separately. 94 static inline void insert(HTMLConstructionSiteTask& task, bool lazyAttach) 95 { 96 if (task.parent->hasTagName(templateTag)) 97 task.parent = toHTMLTemplateElement(task.parent.get())->content(); 98 99 if (ContainerNode* parent = task.child->parentNode()) 100 parent->parserRemoveChild(task.child.get()); 101 102 if (task.nextChild) 103 task.parent->parserInsertBefore(task.child.get(), task.nextChild.get()); 104 else 105 task.parent->parserAppendChild(task.child.get()); 106 107 // JavaScript run from beforeload (or DOM Mutation or event handlers) 108 // might have removed the child, in which case we should not attach it. 109 110 if (task.child->parentNode() && task.parent->attached() && !task.child->attached()) { 111 if (lazyAttach) 112 task.child->lazyAttach(); 113 else 114 task.child->attach(); 115 } 116 } 117 118 static inline void executeInsertTask(HTMLConstructionSiteTask& task) 119 { 120 ASSERT(task.operation == HTMLConstructionSiteTask::Insert); 121 122 insert(task, false); 123 124 task.child->beginParsingChildren(); 125 126 if (task.selfClosing) 127 task.child->finishParsingChildren(); 128 } 129 130 static inline void executeReparentTask(HTMLConstructionSiteTask& task) 131 { 132 ASSERT(task.operation == HTMLConstructionSiteTask::Reparent); 133 134 if (ContainerNode* parent = task.child->parentNode()) 135 parent->parserRemoveChild(task.child.get()); 136 137 task.parent->parserAppendChild(task.child); 138 139 if (task.child->parentElement()->attached() && !task.child->attached()) 140 task.child->lazyAttach(); 141 } 142 143 static inline void executeInsertAlreadyParsedChildTask(HTMLConstructionSiteTask& task) 144 { 145 ASSERT(task.operation == HTMLConstructionSiteTask::InsertAlreadyParsedChild); 146 147 insert(task, true); 148 } 149 150 static inline void executeTakeAllChildrenTask(HTMLConstructionSiteTask& task) 151 { 152 ASSERT(task.operation == HTMLConstructionSiteTask::TakeAllChildren); 153 154 task.parent->takeAllChildrenFrom(task.oldParent()); 155 // Notice that we don't need to manually attach the moved children 156 // because takeAllChildrenFrom does that work for us. 157 } 158 159 static inline void executeTask(HTMLConstructionSiteTask& task) 160 { 161 if (task.operation == HTMLConstructionSiteTask::Insert) 162 return executeInsertTask(task); 163 164 // All the cases below this point are only used by the adoption agency. 165 166 if (task.operation == HTMLConstructionSiteTask::InsertAlreadyParsedChild) 167 return executeInsertAlreadyParsedChildTask(task); 168 169 if (task.operation == HTMLConstructionSiteTask::Reparent) 170 return executeReparentTask(task); 171 172 if (task.operation == HTMLConstructionSiteTask::TakeAllChildren) 173 return executeTakeAllChildrenTask(task); 174 175 ASSERT_NOT_REACHED(); 176 } 177 178 void HTMLConstructionSite::attachLater(ContainerNode* parent, PassRefPtr<Node> prpChild, bool selfClosing) 179 { 180 ASSERT(scriptingContentIsAllowed(m_parserContentPolicy) || !prpChild.get()->isElementNode() || !toScriptLoaderIfPossible(toElement(prpChild.get()))); 181 ASSERT(pluginContentIsAllowed(m_parserContentPolicy) || !prpChild->isPluginElement()); 182 183 HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Insert); 184 task.parent = parent; 185 task.child = prpChild; 186 task.selfClosing = selfClosing; 187 188 if (shouldFosterParent()) { 189 fosterParent(task.child); 190 return; 191 } 192 193 // Add as a sibling of the parent if we have reached the maximum depth allowed. 194 if (m_openElements.stackDepth() > maximumHTMLParserDOMTreeDepth && task.parent->parentNode()) 195 task.parent = task.parent->parentNode(); 196 197 ASSERT(task.parent); 198 m_taskQueue.append(task); 199 } 200 201 void HTMLConstructionSite::executeQueuedTasks() 202 { 203 const size_t size = m_taskQueue.size(); 204 if (!size) 205 return; 206 207 // Copy the task queue into a local variable in case executeTask 208 // re-enters the parser. 209 TaskQueue queue; 210 queue.swap(m_taskQueue); 211 212 for (size_t i = 0; i < size; ++i) 213 executeTask(queue[i]); 214 215 // We might be detached now. 216 } 217 218 HTMLConstructionSite::HTMLConstructionSite(Document* document, ParserContentPolicy parserContentPolicy) 219 : m_document(document) 220 , m_attachmentRoot(document) 221 , m_parserContentPolicy(parserContentPolicy) 222 , m_isParsingFragment(false) 223 , m_redirectAttachToFosterParent(false) 224 , m_inQuirksMode(document->inQuirksMode()) 225 { 226 ASSERT(m_document->isHTMLDocument() || m_document->isSVGDocument() || m_document->isXHTMLDocument()); 227 } 228 229 HTMLConstructionSite::HTMLConstructionSite(DocumentFragment* fragment, ParserContentPolicy parserContentPolicy) 230 : m_document(fragment->document()) 231 , m_attachmentRoot(fragment) 232 , m_parserContentPolicy(parserContentPolicy) 233 , m_isParsingFragment(true) 234 , m_redirectAttachToFosterParent(false) 235 , m_inQuirksMode(fragment->document()->inQuirksMode()) 236 { 237 ASSERT(m_document->isHTMLDocument() || m_document->isSVGDocument() || m_document->isXHTMLDocument()); 238 } 239 240 HTMLConstructionSite::~HTMLConstructionSite() 241 { 242 } 243 244 void HTMLConstructionSite::detach() 245 { 246 m_document = 0; 247 m_attachmentRoot = 0; 248 } 249 250 void HTMLConstructionSite::setForm(HTMLFormElement* form) 251 { 252 // This method should only be needed for HTMLTreeBuilder in the fragment case. 253 ASSERT(!m_form); 254 m_form = form; 255 } 256 257 PassRefPtr<HTMLFormElement> HTMLConstructionSite::takeForm() 258 { 259 return m_form.release(); 260 } 261 262 void HTMLConstructionSite::dispatchDocumentElementAvailableIfNeeded() 263 { 264 ASSERT(m_document); 265 if (m_document->frame() && !m_isParsingFragment) 266 m_document->frame()->loader()->dispatchDocumentElementAvailable(); 267 } 268 269 void HTMLConstructionSite::insertHTMLHtmlStartTagBeforeHTML(AtomicHTMLToken* token) 270 { 271 RefPtr<HTMLHtmlElement> element = HTMLHtmlElement::create(m_document); 272 setAttributes(element.get(), token, m_parserContentPolicy); 273 attachLater(m_attachmentRoot, element); 274 m_openElements.pushHTMLHtmlElement(HTMLStackItem::create(element, token)); 275 276 executeQueuedTasks(); 277 element->insertedByParser(); 278 dispatchDocumentElementAvailableIfNeeded(); 279 } 280 281 void HTMLConstructionSite::mergeAttributesFromTokenIntoElement(AtomicHTMLToken* token, Element* element) 282 { 283 if (token->attributes().isEmpty()) 284 return; 285 286 for (unsigned i = 0; i < token->attributes().size(); ++i) { 287 const Attribute& tokenAttribute = token->attributes().at(i); 288 if (!element->elementData() || !element->getAttributeItem(tokenAttribute.name())) 289 element->setAttribute(tokenAttribute.name(), tokenAttribute.value()); 290 } 291 } 292 293 void HTMLConstructionSite::insertHTMLHtmlStartTagInBody(AtomicHTMLToken* token) 294 { 295 // Fragments do not have a root HTML element, so any additional HTML elements 296 // encountered during fragment parsing should be ignored. 297 if (m_isParsingFragment) 298 return; 299 300 mergeAttributesFromTokenIntoElement(token, m_openElements.htmlElement()); 301 } 302 303 void HTMLConstructionSite::insertHTMLBodyStartTagInBody(AtomicHTMLToken* token) 304 { 305 mergeAttributesFromTokenIntoElement(token, m_openElements.bodyElement()); 306 } 307 308 void HTMLConstructionSite::setDefaultCompatibilityMode() 309 { 310 if (m_isParsingFragment) 311 return; 312 if (m_document->isSrcdocDocument()) 313 return; 314 setCompatibilityMode(Document::QuirksMode); 315 } 316 317 void HTMLConstructionSite::setCompatibilityMode(Document::CompatibilityMode mode) 318 { 319 m_inQuirksMode = (mode == Document::QuirksMode); 320 m_document->setCompatibilityMode(mode); 321 } 322 323 void HTMLConstructionSite::setCompatibilityModeFromDoctype(const String& name, const String& publicId, const String& systemId) 324 { 325 // There are three possible compatibility modes: 326 // Quirks - quirks mode emulates WinIE and NS4. CSS parsing is also relaxed in this mode, e.g., unit types can 327 // be omitted from numbers. 328 // Limited Quirks - This mode is identical to no-quirks mode except for its treatment of line-height in the inline box model. 329 // No Quirks - no quirks apply. Web pages will obey the specifications to the letter. 330 331 // Check for Quirks Mode. 332 if (name != "html" 333 || publicId.startsWith("+//Silmaril//dtd html Pro v0r11 19970101//", false) 334 || publicId.startsWith("-//AdvaSoft Ltd//DTD HTML 3.0 asWedit + extensions//", false) 335 || publicId.startsWith("-//AS//DTD HTML 3.0 asWedit + extensions//", false) 336 || publicId.startsWith("-//IETF//DTD HTML 2.0 Level 1//", false) 337 || publicId.startsWith("-//IETF//DTD HTML 2.0 Level 2//", false) 338 || publicId.startsWith("-//IETF//DTD HTML 2.0 Strict Level 1//", false) 339 || publicId.startsWith("-//IETF//DTD HTML 2.0 Strict Level 2//", false) 340 || publicId.startsWith("-//IETF//DTD HTML 2.0 Strict//", false) 341 || publicId.startsWith("-//IETF//DTD HTML 2.0//", false) 342 || publicId.startsWith("-//IETF//DTD HTML 2.1E//", false) 343 || publicId.startsWith("-//IETF//DTD HTML 3.0//", false) 344 || publicId.startsWith("-//IETF//DTD HTML 3.2 Final//", false) 345 || publicId.startsWith("-//IETF//DTD HTML 3.2//", false) 346 || publicId.startsWith("-//IETF//DTD HTML 3//", false) 347 || publicId.startsWith("-//IETF//DTD HTML Level 0//", false) 348 || publicId.startsWith("-//IETF//DTD HTML Level 1//", false) 349 || publicId.startsWith("-//IETF//DTD HTML Level 2//", false) 350 || publicId.startsWith("-//IETF//DTD HTML Level 3//", false) 351 || publicId.startsWith("-//IETF//DTD HTML Strict Level 0//", false) 352 || publicId.startsWith("-//IETF//DTD HTML Strict Level 1//", false) 353 || publicId.startsWith("-//IETF//DTD HTML Strict Level 2//", false) 354 || publicId.startsWith("-//IETF//DTD HTML Strict Level 3//", false) 355 || publicId.startsWith("-//IETF//DTD HTML Strict//", false) 356 || publicId.startsWith("-//IETF//DTD HTML//", false) 357 || publicId.startsWith("-//Metrius//DTD Metrius Presentational//", false) 358 || publicId.startsWith("-//Microsoft//DTD Internet Explorer 2.0 HTML Strict//", false) 359 || publicId.startsWith("-//Microsoft//DTD Internet Explorer 2.0 HTML//", false) 360 || publicId.startsWith("-//Microsoft//DTD Internet Explorer 2.0 Tables//", false) 361 || publicId.startsWith("-//Microsoft//DTD Internet Explorer 3.0 HTML Strict//", false) 362 || publicId.startsWith("-//Microsoft//DTD Internet Explorer 3.0 HTML//", false) 363 || publicId.startsWith("-//Microsoft//DTD Internet Explorer 3.0 Tables//", false) 364 || publicId.startsWith("-//Netscape Comm. Corp.//DTD HTML//", false) 365 || publicId.startsWith("-//Netscape Comm. Corp.//DTD Strict HTML//", false) 366 || publicId.startsWith("-//O'Reilly and Associates//DTD HTML 2.0//", false) 367 || publicId.startsWith("-//O'Reilly and Associates//DTD HTML Extended 1.0//", false) 368 || publicId.startsWith("-//O'Reilly and Associates//DTD HTML Extended Relaxed 1.0//", false) 369 || publicId.startsWith("-//SoftQuad Software//DTD HoTMetaL PRO 6.0::19990601::extensions to HTML 4.0//", false) 370 || publicId.startsWith("-//SoftQuad//DTD HoTMetaL PRO 4.0::19971010::extensions to HTML 4.0//", false) 371 || publicId.startsWith("-//Spyglass//DTD HTML 2.0 Extended//", false) 372 || publicId.startsWith("-//SQ//DTD HTML 2.0 HoTMetaL + extensions//", false) 373 || publicId.startsWith("-//Sun Microsystems Corp.//DTD HotJava HTML//", false) 374 || publicId.startsWith("-//Sun Microsystems Corp.//DTD HotJava Strict HTML//", false) 375 || publicId.startsWith("-//W3C//DTD HTML 3 1995-03-24//", false) 376 || publicId.startsWith("-//W3C//DTD HTML 3.2 Draft//", false) 377 || publicId.startsWith("-//W3C//DTD HTML 3.2 Final//", false) 378 || publicId.startsWith("-//W3C//DTD HTML 3.2//", false) 379 || publicId.startsWith("-//W3C//DTD HTML 3.2S Draft//", false) 380 || publicId.startsWith("-//W3C//DTD HTML 4.0 Frameset//", false) 381 || publicId.startsWith("-//W3C//DTD HTML 4.0 Transitional//", false) 382 || publicId.startsWith("-//W3C//DTD HTML Experimental 19960712//", false) 383 || publicId.startsWith("-//W3C//DTD HTML Experimental 970421//", false) 384 || publicId.startsWith("-//W3C//DTD W3 HTML//", false) 385 || publicId.startsWith("-//W3O//DTD W3 HTML 3.0//", false) 386 || equalIgnoringCase(publicId, "-//W3O//DTD W3 HTML Strict 3.0//EN//") 387 || publicId.startsWith("-//WebTechs//DTD Mozilla HTML 2.0//", false) 388 || publicId.startsWith("-//WebTechs//DTD Mozilla HTML//", false) 389 || equalIgnoringCase(publicId, "-/W3C/DTD HTML 4.0 Transitional/EN") 390 || equalIgnoringCase(publicId, "HTML") 391 || equalIgnoringCase(systemId, "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") 392 || (systemId.isEmpty() && publicId.startsWith("-//W3C//DTD HTML 4.01 Frameset//", false)) 393 || (systemId.isEmpty() && publicId.startsWith("-//W3C//DTD HTML 4.01 Transitional//", false))) { 394 setCompatibilityMode(Document::QuirksMode); 395 return; 396 } 397 398 // Check for Limited Quirks Mode. 399 if (publicId.startsWith("-//W3C//DTD XHTML 1.0 Frameset//", false) 400 || publicId.startsWith("-//W3C//DTD XHTML 1.0 Transitional//", false) 401 || (!systemId.isEmpty() && publicId.startsWith("-//W3C//DTD HTML 4.01 Frameset//", false)) 402 || (!systemId.isEmpty() && publicId.startsWith("-//W3C//DTD HTML 4.01 Transitional//", false))) { 403 setCompatibilityMode(Document::LimitedQuirksMode); 404 return; 405 } 406 407 // Otherwise we are No Quirks Mode. 408 setCompatibilityMode(Document::NoQuirksMode); 409 } 410 411 void HTMLConstructionSite::finishedParsing() 412 { 413 m_document->finishedParsing(); 414 } 415 416 void HTMLConstructionSite::insertDoctype(AtomicHTMLToken* token) 417 { 418 ASSERT(token->type() == HTMLToken::DOCTYPE); 419 420 const String& publicId = StringImpl::create8BitIfPossible(token->publicIdentifier()); 421 const String& systemId = StringImpl::create8BitIfPossible(token->systemIdentifier()); 422 RefPtr<DocumentType> doctype = DocumentType::create(m_document, token->name(), publicId, systemId); 423 attachLater(m_attachmentRoot, doctype.release()); 424 425 // DOCTYPE nodes are only processed when parsing fragments w/o contextElements, which 426 // never occurs. However, if we ever chose to support such, this code is subtly wrong, 427 // because context-less fragments can determine their own quirks mode, and thus change 428 // parsing rules (like <p> inside <table>). For now we ASSERT that we never hit this code 429 // in a fragment, as changing the owning document's compatibility mode would be wrong. 430 ASSERT(!m_isParsingFragment); 431 if (m_isParsingFragment) 432 return; 433 434 if (token->forceQuirks()) 435 setCompatibilityMode(Document::QuirksMode); 436 else { 437 setCompatibilityModeFromDoctype(token->name(), publicId, systemId); 438 } 439 } 440 441 void HTMLConstructionSite::insertComment(AtomicHTMLToken* token) 442 { 443 ASSERT(token->type() == HTMLToken::Comment); 444 attachLater(currentNode(), Comment::create(ownerDocumentForCurrentNode(), token->comment())); 445 } 446 447 void HTMLConstructionSite::insertCommentOnDocument(AtomicHTMLToken* token) 448 { 449 ASSERT(token->type() == HTMLToken::Comment); 450 attachLater(m_attachmentRoot, Comment::create(m_document, token->comment())); 451 } 452 453 void HTMLConstructionSite::insertCommentOnHTMLHtmlElement(AtomicHTMLToken* token) 454 { 455 ASSERT(token->type() == HTMLToken::Comment); 456 ContainerNode* parent = m_openElements.rootNode(); 457 attachLater(parent, Comment::create(parent->document(), token->comment())); 458 } 459 460 void HTMLConstructionSite::insertHTMLHeadElement(AtomicHTMLToken* token) 461 { 462 ASSERT(!shouldFosterParent()); 463 m_head = HTMLStackItem::create(createHTMLElement(token), token); 464 attachLater(currentNode(), m_head->element()); 465 m_openElements.pushHTMLHeadElement(m_head); 466 } 467 468 void HTMLConstructionSite::insertHTMLBodyElement(AtomicHTMLToken* token) 469 { 470 ASSERT(!shouldFosterParent()); 471 RefPtr<Element> body = createHTMLElement(token); 472 attachLater(currentNode(), body); 473 m_openElements.pushHTMLBodyElement(HTMLStackItem::create(body.release(), token)); 474 if (Frame* frame = m_document->frame()) 475 frame->loader()->client()->dispatchWillInsertBody(); 476 } 477 478 void HTMLConstructionSite::insertHTMLFormElement(AtomicHTMLToken* token, bool isDemoted) 479 { 480 RefPtr<Element> element = createHTMLElement(token); 481 ASSERT(element->hasTagName(formTag)); 482 m_form = static_pointer_cast<HTMLFormElement>(element.release()); 483 m_form->setDemoted(isDemoted); 484 attachLater(currentNode(), m_form); 485 m_openElements.push(HTMLStackItem::create(m_form, token)); 486 } 487 488 void HTMLConstructionSite::insertHTMLElement(AtomicHTMLToken* token) 489 { 490 RefPtr<Element> element = createHTMLElement(token); 491 attachLater(currentNode(), element); 492 m_openElements.push(HTMLStackItem::create(element.release(), token)); 493 } 494 495 void HTMLConstructionSite::insertSelfClosingHTMLElement(AtomicHTMLToken* token) 496 { 497 ASSERT(token->type() == HTMLToken::StartTag); 498 // Normally HTMLElementStack is responsible for calling finishParsingChildren, 499 // but self-closing elements are never in the element stack so the stack 500 // doesn't get a chance to tell them that we're done parsing their children. 501 attachLater(currentNode(), createHTMLElement(token), true); 502 // FIXME: Do we want to acknowledge the token's self-closing flag? 503 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#acknowledge-self-closing-flag 504 } 505 506 void HTMLConstructionSite::insertFormattingElement(AtomicHTMLToken* token) 507 { 508 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#the-stack-of-open-elements 509 // Possible active formatting elements include: 510 // a, b, big, code, em, font, i, nobr, s, small, strike, strong, tt, and u. 511 insertHTMLElement(token); 512 m_activeFormattingElements.append(currentElementRecord()->stackItem()); 513 } 514 515 void HTMLConstructionSite::insertScriptElement(AtomicHTMLToken* token) 516 { 517 // http://www.whatwg.org/specs/web-apps/current-work/multipage/scripting-1.html#already-started 518 // http://html5.org/specs/dom-parsing.html#dom-range-createcontextualfragment 519 // For createContextualFragment, the specifications say to mark it parser-inserted and already-started and later unmark them. 520 // However, we short circuit that logic to avoid the subtree traversal to find script elements since scripts can never see 521 // those flags or effects thereof. 522 const bool parserInserted = m_parserContentPolicy != AllowScriptingContentAndDoNotMarkAlreadyStarted; 523 const bool alreadyStarted = m_isParsingFragment && parserInserted; 524 RefPtr<HTMLScriptElement> element = HTMLScriptElement::create(scriptTag, ownerDocumentForCurrentNode(), parserInserted, alreadyStarted); 525 setAttributes(element.get(), token, m_parserContentPolicy); 526 if (scriptingContentIsAllowed(m_parserContentPolicy)) 527 attachLater(currentNode(), element); 528 m_openElements.push(HTMLStackItem::create(element.release(), token)); 529 } 530 531 void HTMLConstructionSite::insertForeignElement(AtomicHTMLToken* token, const AtomicString& namespaceURI) 532 { 533 ASSERT(token->type() == HTMLToken::StartTag); 534 notImplemented(); // parseError when xmlns or xmlns:xlink are wrong. 535 536 RefPtr<Element> element = createElement(token, namespaceURI); 537 if (scriptingContentIsAllowed(m_parserContentPolicy) || !toScriptLoaderIfPossible(element.get())) 538 attachLater(currentNode(), element, token->selfClosing()); 539 if (!token->selfClosing()) 540 m_openElements.push(HTMLStackItem::create(element.release(), token, namespaceURI)); 541 } 542 543 void HTMLConstructionSite::insertTextNode(const String& characters, WhitespaceMode whitespaceMode) 544 { 545 HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Insert); 546 task.parent = currentNode(); 547 548 if (shouldFosterParent()) 549 findFosterSite(task); 550 551 if (task.parent->hasTagName(templateTag)) 552 task.parent = toHTMLTemplateElement(task.parent.get())->content(); 553 554 // Strings composed entirely of whitespace are likely to be repeated. 555 // Turn them into AtomicString so we share a single string for each. 556 bool shouldUseAtomicString = whitespaceMode == AllWhitespace 557 || (whitespaceMode == WhitespaceUnknown && isAllWhitespace(characters)); 558 559 unsigned currentPosition = 0; 560 unsigned lengthLimit = shouldUseLengthLimit(task.parent.get()) ? Text::defaultLengthLimit : std::numeric_limits<unsigned>::max(); 561 562 // FIXME: Splitting text nodes into smaller chunks contradicts HTML5 spec, but is currently necessary 563 // for performance, see <https://bugs.webkit.org/show_bug.cgi?id=55898>. 564 565 Node* previousChild = task.nextChild ? task.nextChild->previousSibling() : task.parent->lastChild(); 566 if (previousChild && previousChild->isTextNode()) { 567 // FIXME: We're only supposed to append to this text node if it 568 // was the last text node inserted by the parser. 569 CharacterData* textNode = static_cast<CharacterData*>(previousChild); 570 currentPosition = textNode->parserAppendData(characters, 0, lengthLimit); 571 } 572 573 while (currentPosition < characters.length()) { 574 RefPtr<Text> textNode = Text::createWithLengthLimit(task.parent->document(), shouldUseAtomicString ? AtomicString(characters).string() : characters, currentPosition, lengthLimit); 575 // If we have a whole string of unbreakable characters the above could lead to an infinite loop. Exceeding the length limit is the lesser evil. 576 if (!textNode->length()) { 577 String substring = characters.substring(currentPosition); 578 textNode = Text::create(task.parent->document(), shouldUseAtomicString ? AtomicString(substring).string() : substring); 579 } 580 581 currentPosition += textNode->length(); 582 ASSERT(currentPosition <= characters.length()); 583 task.child = textNode.release(); 584 585 executeTask(task); 586 } 587 } 588 589 void HTMLConstructionSite::reparent(HTMLElementStack::ElementRecord* newParent, HTMLElementStack::ElementRecord* child) 590 { 591 HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Reparent); 592 task.parent = newParent->node(); 593 task.child = child->node(); 594 m_taskQueue.append(task); 595 } 596 597 void HTMLConstructionSite::reparent(HTMLElementStack::ElementRecord* newParent, HTMLStackItem* child) 598 { 599 HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Reparent); 600 task.parent = newParent->node(); 601 task.child = child->node(); 602 m_taskQueue.append(task); 603 } 604 605 void HTMLConstructionSite::insertAlreadyParsedChild(HTMLStackItem* newParent, HTMLElementStack::ElementRecord* child) 606 { 607 if (newParent->causesFosterParenting()) { 608 fosterParent(child->node()); 609 return; 610 } 611 612 HTMLConstructionSiteTask task(HTMLConstructionSiteTask::InsertAlreadyParsedChild); 613 task.parent = newParent->node(); 614 task.child = child->node(); 615 m_taskQueue.append(task); 616 } 617 618 void HTMLConstructionSite::takeAllChildren(HTMLStackItem* newParent, HTMLElementStack::ElementRecord* oldParent) 619 { 620 HTMLConstructionSiteTask task(HTMLConstructionSiteTask::TakeAllChildren); 621 task.parent = newParent->node(); 622 task.child = oldParent->node(); 623 m_taskQueue.append(task); 624 } 625 626 PassRefPtr<Element> HTMLConstructionSite::createElement(AtomicHTMLToken* token, const AtomicString& namespaceURI) 627 { 628 QualifiedName tagName(nullAtom, token->name(), namespaceURI); 629 RefPtr<Element> element = ownerDocumentForCurrentNode()->createElement(tagName, true); 630 setAttributes(element.get(), token, m_parserContentPolicy); 631 return element.release(); 632 } 633 634 inline Document* HTMLConstructionSite::ownerDocumentForCurrentNode() 635 { 636 if (currentNode()->hasTagName(templateTag)) 637 return toHTMLTemplateElement(currentElement())->content()->document(); 638 return currentNode()->document(); 639 } 640 641 PassRefPtr<Element> HTMLConstructionSite::createHTMLElement(AtomicHTMLToken* token) 642 { 643 QualifiedName tagName(nullAtom, token->name(), xhtmlNamespaceURI); 644 Document* document = ownerDocumentForCurrentNode(); 645 // Only associate the element with the current form if we're creating the new element 646 // in a document with a browsing context (rather than in <template> contents). 647 HTMLFormElement* form = document->frame() ? m_form.get() : 0; 648 // FIXME: This can't use HTMLConstructionSite::createElement because we 649 // have to pass the current form element. We should rework form association 650 // to occur after construction to allow better code sharing here. 651 RefPtr<Element> element = HTMLElementFactory::createHTMLElement(tagName, document, form, true); 652 setAttributes(element.get(), token, m_parserContentPolicy); 653 ASSERT(element->isHTMLElement()); 654 return element.release(); 655 } 656 657 PassRefPtr<HTMLStackItem> HTMLConstructionSite::createElementFromSavedToken(HTMLStackItem* item) 658 { 659 RefPtr<Element> element; 660 // NOTE: Moving from item -> token -> item copies the Attribute vector twice! 661 AtomicHTMLToken fakeToken(HTMLToken::StartTag, item->localName(), item->attributes()); 662 if (item->namespaceURI() == HTMLNames::xhtmlNamespaceURI) 663 element = createHTMLElement(&fakeToken); 664 else 665 element = createElement(&fakeToken, item->namespaceURI()); 666 return HTMLStackItem::create(element.release(), &fakeToken, item->namespaceURI()); 667 } 668 669 bool HTMLConstructionSite::indexOfFirstUnopenFormattingElement(unsigned& firstUnopenElementIndex) const 670 { 671 if (m_activeFormattingElements.isEmpty()) 672 return false; 673 unsigned index = m_activeFormattingElements.size(); 674 do { 675 --index; 676 const HTMLFormattingElementList::Entry& entry = m_activeFormattingElements.at(index); 677 if (entry.isMarker() || m_openElements.contains(entry.element())) { 678 firstUnopenElementIndex = index + 1; 679 return firstUnopenElementIndex < m_activeFormattingElements.size(); 680 } 681 } while (index); 682 firstUnopenElementIndex = index; 683 return true; 684 } 685 686 void HTMLConstructionSite::reconstructTheActiveFormattingElements() 687 { 688 unsigned firstUnopenElementIndex; 689 if (!indexOfFirstUnopenFormattingElement(firstUnopenElementIndex)) 690 return; 691 692 unsigned unopenEntryIndex = firstUnopenElementIndex; 693 ASSERT(unopenEntryIndex < m_activeFormattingElements.size()); 694 for (; unopenEntryIndex < m_activeFormattingElements.size(); ++unopenEntryIndex) { 695 HTMLFormattingElementList::Entry& unopenedEntry = m_activeFormattingElements.at(unopenEntryIndex); 696 RefPtr<HTMLStackItem> reconstructed = createElementFromSavedToken(unopenedEntry.stackItem().get()); 697 attachLater(currentNode(), reconstructed->node()); 698 m_openElements.push(reconstructed); 699 unopenedEntry.replaceElement(reconstructed.release()); 700 } 701 } 702 703 void HTMLConstructionSite::generateImpliedEndTagsWithExclusion(const AtomicString& tagName) 704 { 705 while (hasImpliedEndTag(currentStackItem()) && !currentStackItem()->matchesHTMLTag(tagName)) 706 m_openElements.pop(); 707 } 708 709 void HTMLConstructionSite::generateImpliedEndTags() 710 { 711 while (hasImpliedEndTag(currentStackItem())) 712 m_openElements.pop(); 713 } 714 715 bool HTMLConstructionSite::inQuirksMode() 716 { 717 return m_inQuirksMode; 718 } 719 720 void HTMLConstructionSite::findFosterSite(HTMLConstructionSiteTask& task) 721 { 722 // When a node is to be foster parented, the last template element with no table element is below it in the stack of open elements is the foster parent element (NOT the template's parent!) 723 HTMLElementStack::ElementRecord* lastTemplateElement = m_openElements.topmost(templateTag.localName()); 724 if (lastTemplateElement && !m_openElements.inTableScope(tableTag)) { 725 task.parent = lastTemplateElement->element(); 726 return; 727 } 728 729 HTMLElementStack::ElementRecord* lastTableElementRecord = m_openElements.topmost(tableTag.localName()); 730 if (lastTableElementRecord) { 731 Element* lastTableElement = lastTableElementRecord->element(); 732 ContainerNode* parent; 733 if (lastTableElementRecord->next()->stackItem()->hasTagName(templateTag)) 734 parent = lastTableElementRecord->next()->element(); 735 else 736 parent = lastTableElement->parentNode(); 737 738 // When parsing HTML fragments, we skip step 4.2 ("Let root be a new html element with no attributes") for efficiency, 739 // and instead use the DocumentFragment as a root node. So we must treat the root node (DocumentFragment) as if it is a html element here. 740 if (parent && (parent->isElementNode() || (m_isParsingFragment && parent == m_openElements.rootNode()))) { 741 task.parent = parent; 742 task.nextChild = lastTableElement; 743 return; 744 } 745 task.parent = lastTableElementRecord->next()->element(); 746 return; 747 } 748 // Fragment case 749 task.parent = m_openElements.rootNode(); // DocumentFragment 750 } 751 752 bool HTMLConstructionSite::shouldFosterParent() const 753 { 754 return m_redirectAttachToFosterParent 755 && currentStackItem()->isElementNode() 756 && currentStackItem()->causesFosterParenting(); 757 } 758 759 void HTMLConstructionSite::fosterParent(PassRefPtr<Node> node) 760 { 761 HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Insert); 762 findFosterSite(task); 763 task.child = node; 764 ASSERT(task.parent); 765 766 m_taskQueue.append(task); 767 } 768 769 } 770