1 /* 2 * Copyright (C) 2010 Google, Inc. All Rights Reserved. 3 * Copyright (C) 2011 Apple Inc. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY 15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. OR 18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 #include "config.h" 28 #include "HTMLTreeBuilder.h" 29 30 #include "Comment.h" 31 #include "DocumentFragment.h" 32 #include "DocumentType.h" 33 #include "Element.h" 34 #include "Frame.h" 35 #include "HTMLDocument.h" 36 #include "HTMLElementFactory.h" 37 #include "HTMLFormElement.h" 38 #include "HTMLHtmlElement.h" 39 #include "HTMLNames.h" 40 #include "HTMLScriptElement.h" 41 #include "HTMLToken.h" 42 #include "HTMLTokenizer.h" 43 #include "LocalizedStrings.h" 44 #if ENABLE(MATHML) 45 #include "MathMLNames.h" 46 #endif 47 #include "NotImplemented.h" 48 #if ENABLE(SVG) 49 #include "SVGNames.h" 50 #endif 51 #include "ScriptController.h" 52 #include "Settings.h" 53 #include "Text.h" 54 #include <wtf/UnusedParam.h> 55 56 namespace WebCore { 57 58 using namespace HTMLNames; 59 60 namespace { 61 62 bool hasImpliedEndTag(ContainerNode* node) 63 { 64 return node->hasTagName(ddTag) 65 || node->hasTagName(dtTag) 66 || node->hasTagName(liTag) 67 || node->hasTagName(optionTag) 68 || node->hasTagName(optgroupTag) 69 || node->hasTagName(pTag) 70 || node->hasTagName(rpTag) 71 || node->hasTagName(rtTag); 72 } 73 74 bool causesFosterParenting(const QualifiedName& tagName) 75 { 76 return tagName == tableTag 77 || tagName == tbodyTag 78 || tagName == tfootTag 79 || tagName == theadTag 80 || tagName == trTag; 81 } 82 83 } // namespace 84 85 template<typename ChildType> 86 PassRefPtr<ChildType> HTMLConstructionSite::attach(ContainerNode* rawParent, PassRefPtr<ChildType> prpChild) 87 { 88 RefPtr<ChildType> child = prpChild; 89 RefPtr<ContainerNode> parent = rawParent; 90 91 // FIXME: It's confusing that HTMLConstructionSite::attach does the magic 92 // redirection to the foster parent but HTMLConstructionSite::attachAtSite 93 // doesn't. It feels like we're missing a concept somehow. 94 if (shouldFosterParent()) { 95 fosterParent(child.get()); 96 ASSERT(child->attached() || !child->parentNode() || !child->parentNode()->attached()); 97 return child.release(); 98 } 99 100 parent->parserAddChild(child); 101 102 // An event handler (DOM Mutation, beforeload, et al.) could have removed 103 // the child, in which case we shouldn't try attaching it. 104 if (!child->parentNode()) 105 return child.release(); 106 107 if (parent->attached() && !child->attached()) 108 child->attach(); 109 return child.release(); 110 } 111 112 void HTMLConstructionSite::attachAtSite(const AttachmentSite& site, PassRefPtr<Node> prpChild) 113 { 114 // FIXME: It's unfortunate that we need to hold a reference to child 115 // here to call attach(). We should investigate whether we can rely on 116 // |site.parent| to hold a ref at this point. 117 RefPtr<Node> child = prpChild; 118 119 if (site.nextChild) 120 site.parent->parserInsertBefore(child, site.nextChild); 121 else 122 site.parent->parserAddChild(child); 123 124 // JavaScript run from beforeload (or DOM Mutation or event handlers) 125 // might have removed the child, in which case we should not attach it. 126 if (child->parentNode() && site.parent->attached() && !child->attached()) 127 child->attach(); 128 } 129 130 HTMLConstructionSite::HTMLConstructionSite(Document* document) 131 : m_document(document) 132 , m_attachmentRoot(document) 133 , m_fragmentScriptingPermission(FragmentScriptingAllowed) 134 , m_isParsingFragment(false) 135 , m_redirectAttachToFosterParent(false) 136 { 137 } 138 139 HTMLConstructionSite::HTMLConstructionSite(DocumentFragment* fragment, FragmentScriptingPermission scriptingPermission) 140 : m_document(fragment->document()) 141 , m_attachmentRoot(fragment) 142 , m_fragmentScriptingPermission(scriptingPermission) 143 , m_isParsingFragment(true) 144 , m_redirectAttachToFosterParent(false) 145 { 146 } 147 148 HTMLConstructionSite::~HTMLConstructionSite() 149 { 150 } 151 152 void HTMLConstructionSite::detach() 153 { 154 m_document = 0; 155 m_attachmentRoot = 0; 156 } 157 158 void HTMLConstructionSite::setForm(HTMLFormElement* form) 159 { 160 // This method should only be needed for HTMLTreeBuilder in the fragment case. 161 ASSERT(!m_form); 162 m_form = form; 163 } 164 165 PassRefPtr<HTMLFormElement> HTMLConstructionSite::takeForm() 166 { 167 return m_form.release(); 168 } 169 170 void HTMLConstructionSite::dispatchDocumentElementAvailableIfNeeded() 171 { 172 ASSERT(m_document); 173 if (m_document->frame() && !m_isParsingFragment) 174 m_document->frame()->loader()->dispatchDocumentElementAvailable(); 175 } 176 177 void HTMLConstructionSite::insertHTMLHtmlStartTagBeforeHTML(AtomicHTMLToken& token) 178 { 179 RefPtr<HTMLHtmlElement> element = HTMLHtmlElement::create(m_document); 180 element->setAttributeMap(token.takeAtributes(), m_fragmentScriptingPermission); 181 m_openElements.pushHTMLHtmlElement(attach<Element>(m_attachmentRoot, element.get())); 182 #if ENABLE(OFFLINE_WEB_APPLICATIONS) 183 element->insertedByParser(); 184 #endif 185 dispatchDocumentElementAvailableIfNeeded(); 186 } 187 188 void HTMLConstructionSite::mergeAttributesFromTokenIntoElement(AtomicHTMLToken& token, Element* element) 189 { 190 if (!token.attributes()) 191 return; 192 193 NamedNodeMap* attributes = element->attributes(false); 194 for (unsigned i = 0; i < token.attributes()->length(); ++i) { 195 Attribute* attribute = token.attributes()->attributeItem(i); 196 if (!attributes->getAttributeItem(attribute->name())) 197 element->setAttribute(attribute->name(), attribute->value()); 198 } 199 } 200 201 void HTMLConstructionSite::insertHTMLHtmlStartTagInBody(AtomicHTMLToken& token) 202 { 203 // FIXME: parse error 204 205 // Fragments do not have a root HTML element, so any additional HTML elements 206 // encountered during fragment parsing should be ignored. 207 if (m_isParsingFragment) 208 return; 209 210 mergeAttributesFromTokenIntoElement(token, m_openElements.htmlElement()); 211 } 212 213 void HTMLConstructionSite::insertHTMLBodyStartTagInBody(AtomicHTMLToken& token) 214 { 215 // FIXME: parse error 216 mergeAttributesFromTokenIntoElement(token, m_openElements.bodyElement()); 217 } 218 219 void HTMLConstructionSite::insertDoctype(AtomicHTMLToken& token) 220 { 221 ASSERT(token.type() == HTMLToken::DOCTYPE); 222 attach(m_attachmentRoot, DocumentType::create(m_document, token.name(), String::adopt(token.publicIdentifier()), String::adopt(token.systemIdentifier()))); 223 224 // DOCTYPE nodes are only processed when parsing fragments w/o contextElements, which 225 // never occurs. However, if we ever chose to support such, this code is subtly wrong, 226 // because context-less fragments can determine their own quirks mode, and thus change 227 // parsing rules (like <p> inside <table>). For now we ASSERT that we never hit this code 228 // in a fragment, as changing the owning document's compatibility mode would be wrong. 229 ASSERT(!m_isParsingFragment); 230 if (m_isParsingFragment) 231 return; 232 233 if (token.forceQuirks()) 234 m_document->setCompatibilityMode(Document::QuirksMode); 235 else 236 m_document->setCompatibilityModeFromDoctype(); 237 } 238 239 void HTMLConstructionSite::insertComment(AtomicHTMLToken& token) 240 { 241 ASSERT(token.type() == HTMLToken::Comment); 242 attach(currentNode(), Comment::create(currentNode()->document(), token.comment())); 243 } 244 245 void HTMLConstructionSite::insertCommentOnDocument(AtomicHTMLToken& token) 246 { 247 ASSERT(token.type() == HTMLToken::Comment); 248 attach(m_attachmentRoot, Comment::create(m_document, token.comment())); 249 } 250 251 void HTMLConstructionSite::insertCommentOnHTMLHtmlElement(AtomicHTMLToken& token) 252 { 253 ASSERT(token.type() == HTMLToken::Comment); 254 ContainerNode* parent = m_openElements.rootNode(); 255 attach(parent, Comment::create(parent->document(), token.comment())); 256 } 257 258 PassRefPtr<Element> HTMLConstructionSite::attachToCurrent(PassRefPtr<Element> child) 259 { 260 return attach(currentNode(), child); 261 } 262 263 void HTMLConstructionSite::insertHTMLHeadElement(AtomicHTMLToken& token) 264 { 265 ASSERT(!shouldFosterParent()); 266 m_head = attachToCurrent(createHTMLElement(token)); 267 m_openElements.pushHTMLHeadElement(m_head); 268 } 269 270 void HTMLConstructionSite::insertHTMLBodyElement(AtomicHTMLToken& token) 271 { 272 ASSERT(!shouldFosterParent()); 273 m_openElements.pushHTMLBodyElement(attachToCurrent(createHTMLElement(token))); 274 } 275 276 void HTMLConstructionSite::insertHTMLFormElement(AtomicHTMLToken& token, bool isDemoted) 277 { 278 RefPtr<Element> element = createHTMLElement(token); 279 ASSERT(element->hasTagName(formTag)); 280 RefPtr<HTMLFormElement> form = static_pointer_cast<HTMLFormElement>(element.release()); 281 form->setDemoted(isDemoted); 282 m_openElements.push(attachToCurrent(form.release())); 283 ASSERT(currentElement()->isHTMLElement()); 284 ASSERT(currentElement()->hasTagName(formTag)); 285 m_form = static_cast<HTMLFormElement*>(currentElement()); 286 } 287 288 void HTMLConstructionSite::insertHTMLElement(AtomicHTMLToken& token) 289 { 290 m_openElements.push(attachToCurrent(createHTMLElement(token))); 291 } 292 293 void HTMLConstructionSite::insertSelfClosingHTMLElement(AtomicHTMLToken& token) 294 { 295 ASSERT(token.type() == HTMLToken::StartTag); 296 RefPtr<Element> element = attachToCurrent(createHTMLElement(token)); 297 // Normally HTMLElementStack is responsible for calling finishParsingChildren, 298 // but self-closing elements are never in the element stack so the stack 299 // doesn't get a chance to tell them that we're done parsing their children. 300 element->finishParsingChildren(); 301 // FIXME: Do we want to acknowledge the token's self-closing flag? 302 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#acknowledge-self-closing-flag 303 } 304 305 void HTMLConstructionSite::insertFormattingElement(AtomicHTMLToken& token) 306 { 307 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#the-stack-of-open-elements 308 // Possible active formatting elements include: 309 // a, b, big, code, em, font, i, nobr, s, small, strike, strong, tt, and u. 310 insertHTMLElement(token); 311 m_activeFormattingElements.append(currentElement()); 312 } 313 314 void HTMLConstructionSite::insertScriptElement(AtomicHTMLToken& token) 315 { 316 RefPtr<HTMLScriptElement> element = HTMLScriptElement::create(scriptTag, currentNode()->document(), true); 317 if (m_fragmentScriptingPermission == FragmentScriptingAllowed) 318 element->setAttributeMap(token.takeAtributes(), m_fragmentScriptingPermission); 319 m_openElements.push(attachToCurrent(element.release())); 320 } 321 322 void HTMLConstructionSite::insertForeignElement(AtomicHTMLToken& token, const AtomicString& namespaceURI) 323 { 324 ASSERT(token.type() == HTMLToken::StartTag); 325 notImplemented(); // parseError when xmlns or xmlns:xlink are wrong. 326 327 RefPtr<Element> element = attachToCurrent(createElement(token, namespaceURI)); 328 if (!token.selfClosing()) 329 m_openElements.push(element); 330 } 331 332 void HTMLConstructionSite::insertTextNode(const String& characters) 333 { 334 AttachmentSite site; 335 site.parent = currentNode(); 336 site.nextChild = 0; 337 if (shouldFosterParent()) 338 findFosterSite(site); 339 340 unsigned currentPosition = 0; 341 342 // FIXME: Splitting text nodes into smaller chunks contradicts HTML5 spec, but is currently necessary 343 // for performance, see <https://bugs.webkit.org/show_bug.cgi?id=55898>. 344 345 Node* previousChild = site.nextChild ? site.nextChild->previousSibling() : site.parent->lastChild(); 346 if (previousChild && previousChild->isTextNode()) { 347 // FIXME: We're only supposed to append to this text node if it 348 // was the last text node inserted by the parser. 349 CharacterData* textNode = static_cast<CharacterData*>(previousChild); 350 currentPosition = textNode->parserAppendData(characters.characters(), characters.length(), Text::defaultLengthLimit); 351 } 352 353 while (currentPosition < characters.length()) { 354 RefPtr<Text> textNode = Text::createWithLengthLimit(site.parent->document(), characters, currentPosition); 355 // If we have a whole string of unbreakable characters the above could lead to an infinite loop. Exceeding the length limit is the lesser evil. 356 if (!textNode->length()) 357 textNode = Text::create(site.parent->document(), characters.substring(currentPosition)); 358 359 currentPosition += textNode->length(); 360 ASSERT(currentPosition <= characters.length()); 361 attachAtSite(site, textNode.release()); 362 } 363 } 364 365 PassRefPtr<Element> HTMLConstructionSite::createElement(AtomicHTMLToken& token, const AtomicString& namespaceURI) 366 { 367 QualifiedName tagName(nullAtom, token.name(), namespaceURI); 368 RefPtr<Element> element = currentNode()->document()->createElement(tagName, true); 369 element->setAttributeMap(token.takeAtributes(), m_fragmentScriptingPermission); 370 return element.release(); 371 } 372 373 PassRefPtr<Element> HTMLConstructionSite::createHTMLElement(AtomicHTMLToken& token) 374 { 375 QualifiedName tagName(nullAtom, token.name(), xhtmlNamespaceURI); 376 // FIXME: This can't use HTMLConstructionSite::createElement because we 377 // have to pass the current form element. We should rework form association 378 // to occur after construction to allow better code sharing here. 379 RefPtr<Element> element = HTMLElementFactory::createHTMLElement(tagName, currentNode()->document(), form(), true); 380 element->setAttributeMap(token.takeAtributes(), m_fragmentScriptingPermission); 381 ASSERT(element->isHTMLElement()); 382 return element.release(); 383 } 384 385 PassRefPtr<Element> HTMLConstructionSite::createHTMLElementFromElementRecord(HTMLElementStack::ElementRecord* record) 386 { 387 return createHTMLElementFromSavedElement(record->element()); 388 } 389 390 namespace { 391 392 PassRefPtr<NamedNodeMap> cloneAttributes(Element* element) 393 { 394 NamedNodeMap* attributes = element->attributes(true); 395 if (!attributes) 396 return 0; 397 398 RefPtr<NamedNodeMap> newAttributes = NamedNodeMap::create(); 399 for (size_t i = 0; i < attributes->length(); ++i) { 400 Attribute* attribute = attributes->attributeItem(i); 401 RefPtr<Attribute> clone = Attribute::createMapped(attribute->name(), attribute->value()); 402 newAttributes->addAttribute(clone); 403 } 404 return newAttributes.release(); 405 } 406 407 } 408 409 PassRefPtr<Element> HTMLConstructionSite::createHTMLElementFromSavedElement(Element* element) 410 { 411 // FIXME: This method is wrong. We should be using the original token. 412 // Using an Element* causes us to fail examples like this: 413 // <b id="1"><p><script>document.getElementById("1").id = "2"</script></p>TEXT</b> 414 // When reconstructTheActiveFormattingElements calls this method to open 415 // a second <b> tag to wrap TEXT, it will have id "2", even though the HTML5 416 // spec implies it should be "1". Minefield matches the HTML5 spec here. 417 418 ASSERT(element->isHTMLElement()); // otherwise localName() might be wrong. 419 AtomicHTMLToken fakeToken(HTMLToken::StartTag, element->localName(), cloneAttributes(element)); 420 return createHTMLElement(fakeToken); 421 } 422 423 bool HTMLConstructionSite::indexOfFirstUnopenFormattingElement(unsigned& firstUnopenElementIndex) const 424 { 425 if (m_activeFormattingElements.isEmpty()) 426 return false; 427 unsigned index = m_activeFormattingElements.size(); 428 do { 429 --index; 430 const HTMLFormattingElementList::Entry& entry = m_activeFormattingElements.at(index); 431 if (entry.isMarker() || m_openElements.contains(entry.element())) { 432 firstUnopenElementIndex = index + 1; 433 return firstUnopenElementIndex < m_activeFormattingElements.size(); 434 } 435 } while (index); 436 firstUnopenElementIndex = index; 437 return true; 438 } 439 440 void HTMLConstructionSite::reconstructTheActiveFormattingElements() 441 { 442 unsigned firstUnopenElementIndex; 443 if (!indexOfFirstUnopenFormattingElement(firstUnopenElementIndex)) 444 return; 445 446 unsigned unopenEntryIndex = firstUnopenElementIndex; 447 ASSERT(unopenEntryIndex < m_activeFormattingElements.size()); 448 for (; unopenEntryIndex < m_activeFormattingElements.size(); ++unopenEntryIndex) { 449 HTMLFormattingElementList::Entry& unopenedEntry = m_activeFormattingElements.at(unopenEntryIndex); 450 RefPtr<Element> reconstructed = createHTMLElementFromSavedElement(unopenedEntry.element()); 451 m_openElements.push(attachToCurrent(reconstructed.release())); 452 unopenedEntry.replaceElement(currentElement()); 453 } 454 } 455 456 void HTMLConstructionSite::generateImpliedEndTagsWithExclusion(const AtomicString& tagName) 457 { 458 while (hasImpliedEndTag(currentNode()) && !currentNode()->hasLocalName(tagName)) 459 m_openElements.pop(); 460 } 461 462 void HTMLConstructionSite::generateImpliedEndTags() 463 { 464 while (hasImpliedEndTag(currentNode())) 465 m_openElements.pop(); 466 } 467 468 void HTMLConstructionSite::findFosterSite(AttachmentSite& site) 469 { 470 HTMLElementStack::ElementRecord* lastTableElementRecord = m_openElements.topmost(tableTag.localName()); 471 if (lastTableElementRecord) { 472 Element* lastTableElement = lastTableElementRecord->element(); 473 if (ContainerNode* parent = lastTableElement->parentNode()) { 474 site.parent = parent; 475 site.nextChild = lastTableElement; 476 return; 477 } 478 site.parent = lastTableElementRecord->next()->element(); 479 site.nextChild = 0; 480 return; 481 } 482 // Fragment case 483 site.parent = m_openElements.rootNode(); // DocumentFragment 484 site.nextChild = 0; 485 } 486 487 bool HTMLConstructionSite::shouldFosterParent() const 488 { 489 return m_redirectAttachToFosterParent 490 && currentNode()->isElementNode() 491 && causesFosterParenting(currentElement()->tagQName()); 492 } 493 494 void HTMLConstructionSite::fosterParent(Node* node) 495 { 496 AttachmentSite site; 497 findFosterSite(site); 498 attachAtSite(site, node); 499 } 500 501 } 502