Home | History | Annotate | Download | only in parser
      1 /*
      2  * Copyright (C) 2010 Google, Inc. All Rights Reserved.
      3  * Copyright (C) 2011 Apple Inc. All rights reserved.
      4  *
      5  * Redistribution and use in source and binary forms, with or without
      6  * modification, are permitted provided that the following conditions
      7  * are met:
      8  * 1. Redistributions of source code must retain the above copyright
      9  *    notice, this list of conditions and the following disclaimer.
     10  * 2. Redistributions in binary form must reproduce the above copyright
     11  *    notice, this list of conditions and the following disclaimer in the
     12  *    documentation and/or other materials provided with the distribution.
     13  *
     14  * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
     15  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     17  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL GOOGLE INC. OR
     18  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     19  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     20  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     21  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
     22  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     24  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     25  */
     26 
     27 #include "config.h"
     28 #include "HTMLTreeBuilder.h"
     29 
     30 #include "Comment.h"
     31 #include "DocumentFragment.h"
     32 #include "DocumentType.h"
     33 #include "Element.h"
     34 #include "Frame.h"
     35 #include "HTMLDocument.h"
     36 #include "HTMLElementFactory.h"
     37 #include "HTMLFormElement.h"
     38 #include "HTMLHtmlElement.h"
     39 #include "HTMLNames.h"
     40 #include "HTMLScriptElement.h"
     41 #include "HTMLToken.h"
     42 #include "HTMLTokenizer.h"
     43 #include "LocalizedStrings.h"
     44 #if ENABLE(MATHML)
     45 #include "MathMLNames.h"
     46 #endif
     47 #include "NotImplemented.h"
     48 #if ENABLE(SVG)
     49 #include "SVGNames.h"
     50 #endif
     51 #include "ScriptController.h"
     52 #include "Settings.h"
     53 #include "Text.h"
     54 #include <wtf/UnusedParam.h>
     55 
     56 namespace WebCore {
     57 
     58 using namespace HTMLNames;
     59 
     60 namespace {
     61 
     62 bool hasImpliedEndTag(ContainerNode* node)
     63 {
     64     return node->hasTagName(ddTag)
     65         || node->hasTagName(dtTag)
     66         || node->hasTagName(liTag)
     67         || node->hasTagName(optionTag)
     68         || node->hasTagName(optgroupTag)
     69         || node->hasTagName(pTag)
     70         || node->hasTagName(rpTag)
     71         || node->hasTagName(rtTag);
     72 }
     73 
     74 bool causesFosterParenting(const QualifiedName& tagName)
     75 {
     76     return tagName == tableTag
     77         || tagName == tbodyTag
     78         || tagName == tfootTag
     79         || tagName == theadTag
     80         || tagName == trTag;
     81 }
     82 
     83 } // namespace
     84 
     85 template<typename ChildType>
     86 PassRefPtr<ChildType> HTMLConstructionSite::attach(ContainerNode* rawParent, PassRefPtr<ChildType> prpChild)
     87 {
     88     RefPtr<ChildType> child = prpChild;
     89     RefPtr<ContainerNode> parent = rawParent;
     90 
     91     // FIXME: It's confusing that HTMLConstructionSite::attach does the magic
     92     // redirection to the foster parent but HTMLConstructionSite::attachAtSite
     93     // doesn't. It feels like we're missing a concept somehow.
     94     if (shouldFosterParent()) {
     95         fosterParent(child.get());
     96         ASSERT(child->attached() || !child->parentNode() || !child->parentNode()->attached());
     97         return child.release();
     98     }
     99 
    100     parent->parserAddChild(child);
    101 
    102     // An event handler (DOM Mutation, beforeload, et al.) could have removed
    103     // the child, in which case we shouldn't try attaching it.
    104     if (!child->parentNode())
    105         return child.release();
    106 
    107     if (parent->attached() && !child->attached())
    108         child->attach();
    109     return child.release();
    110 }
    111 
    112 void HTMLConstructionSite::attachAtSite(const AttachmentSite& site, PassRefPtr<Node> prpChild)
    113 {
    114     // FIXME: It's unfortunate that we need to hold a reference to child
    115     // here to call attach().  We should investigate whether we can rely on
    116     // |site.parent| to hold a ref at this point.
    117     RefPtr<Node> child = prpChild;
    118 
    119     if (site.nextChild)
    120         site.parent->parserInsertBefore(child, site.nextChild);
    121     else
    122         site.parent->parserAddChild(child);
    123 
    124     // JavaScript run from beforeload (or DOM Mutation or event handlers)
    125     // might have removed the child, in which case we should not attach it.
    126     if (child->parentNode() && site.parent->attached() && !child->attached())
    127         child->attach();
    128 }
    129 
    130 HTMLConstructionSite::HTMLConstructionSite(Document* document)
    131     : m_document(document)
    132     , m_attachmentRoot(document)
    133     , m_fragmentScriptingPermission(FragmentScriptingAllowed)
    134     , m_isParsingFragment(false)
    135     , m_redirectAttachToFosterParent(false)
    136 {
    137 }
    138 
    139 HTMLConstructionSite::HTMLConstructionSite(DocumentFragment* fragment, FragmentScriptingPermission scriptingPermission)
    140     : m_document(fragment->document())
    141     , m_attachmentRoot(fragment)
    142     , m_fragmentScriptingPermission(scriptingPermission)
    143     , m_isParsingFragment(true)
    144     , m_redirectAttachToFosterParent(false)
    145 {
    146 }
    147 
    148 HTMLConstructionSite::~HTMLConstructionSite()
    149 {
    150 }
    151 
    152 void HTMLConstructionSite::detach()
    153 {
    154     m_document = 0;
    155     m_attachmentRoot = 0;
    156 }
    157 
    158 void HTMLConstructionSite::setForm(HTMLFormElement* form)
    159 {
    160     // This method should only be needed for HTMLTreeBuilder in the fragment case.
    161     ASSERT(!m_form);
    162     m_form = form;
    163 }
    164 
    165 PassRefPtr<HTMLFormElement> HTMLConstructionSite::takeForm()
    166 {
    167     return m_form.release();
    168 }
    169 
    170 void HTMLConstructionSite::dispatchDocumentElementAvailableIfNeeded()
    171 {
    172     ASSERT(m_document);
    173     if (m_document->frame() && !m_isParsingFragment)
    174         m_document->frame()->loader()->dispatchDocumentElementAvailable();
    175 }
    176 
    177 void HTMLConstructionSite::insertHTMLHtmlStartTagBeforeHTML(AtomicHTMLToken& token)
    178 {
    179     RefPtr<HTMLHtmlElement> element = HTMLHtmlElement::create(m_document);
    180     element->setAttributeMap(token.takeAtributes(), m_fragmentScriptingPermission);
    181     m_openElements.pushHTMLHtmlElement(attach<Element>(m_attachmentRoot, element.get()));
    182 #if ENABLE(OFFLINE_WEB_APPLICATIONS)
    183     element->insertedByParser();
    184 #endif
    185     dispatchDocumentElementAvailableIfNeeded();
    186 }
    187 
    188 void HTMLConstructionSite::mergeAttributesFromTokenIntoElement(AtomicHTMLToken& token, Element* element)
    189 {
    190     if (!token.attributes())
    191         return;
    192 
    193     NamedNodeMap* attributes = element->attributes(false);
    194     for (unsigned i = 0; i < token.attributes()->length(); ++i) {
    195         Attribute* attribute = token.attributes()->attributeItem(i);
    196         if (!attributes->getAttributeItem(attribute->name()))
    197             element->setAttribute(attribute->name(), attribute->value());
    198     }
    199 }
    200 
    201 void HTMLConstructionSite::insertHTMLHtmlStartTagInBody(AtomicHTMLToken& token)
    202 {
    203     // FIXME: parse error
    204 
    205     // Fragments do not have a root HTML element, so any additional HTML elements
    206     // encountered during fragment parsing should be ignored.
    207     if (m_isParsingFragment)
    208         return;
    209 
    210     mergeAttributesFromTokenIntoElement(token, m_openElements.htmlElement());
    211 }
    212 
    213 void HTMLConstructionSite::insertHTMLBodyStartTagInBody(AtomicHTMLToken& token)
    214 {
    215     // FIXME: parse error
    216     mergeAttributesFromTokenIntoElement(token, m_openElements.bodyElement());
    217 }
    218 
    219 void HTMLConstructionSite::insertDoctype(AtomicHTMLToken& token)
    220 {
    221     ASSERT(token.type() == HTMLToken::DOCTYPE);
    222     attach(m_attachmentRoot, DocumentType::create(m_document, token.name(), String::adopt(token.publicIdentifier()), String::adopt(token.systemIdentifier())));
    223 
    224     // DOCTYPE nodes are only processed when parsing fragments w/o contextElements, which
    225     // never occurs.  However, if we ever chose to support such, this code is subtly wrong,
    226     // because context-less fragments can determine their own quirks mode, and thus change
    227     // parsing rules (like <p> inside <table>).  For now we ASSERT that we never hit this code
    228     // in a fragment, as changing the owning document's compatibility mode would be wrong.
    229     ASSERT(!m_isParsingFragment);
    230     if (m_isParsingFragment)
    231         return;
    232 
    233     if (token.forceQuirks())
    234         m_document->setCompatibilityMode(Document::QuirksMode);
    235     else
    236         m_document->setCompatibilityModeFromDoctype();
    237 }
    238 
    239 void HTMLConstructionSite::insertComment(AtomicHTMLToken& token)
    240 {
    241     ASSERT(token.type() == HTMLToken::Comment);
    242     attach(currentNode(), Comment::create(currentNode()->document(), token.comment()));
    243 }
    244 
    245 void HTMLConstructionSite::insertCommentOnDocument(AtomicHTMLToken& token)
    246 {
    247     ASSERT(token.type() == HTMLToken::Comment);
    248     attach(m_attachmentRoot, Comment::create(m_document, token.comment()));
    249 }
    250 
    251 void HTMLConstructionSite::insertCommentOnHTMLHtmlElement(AtomicHTMLToken& token)
    252 {
    253     ASSERT(token.type() == HTMLToken::Comment);
    254     ContainerNode* parent = m_openElements.rootNode();
    255     attach(parent, Comment::create(parent->document(), token.comment()));
    256 }
    257 
    258 PassRefPtr<Element> HTMLConstructionSite::attachToCurrent(PassRefPtr<Element> child)
    259 {
    260     return attach(currentNode(), child);
    261 }
    262 
    263 void HTMLConstructionSite::insertHTMLHeadElement(AtomicHTMLToken& token)
    264 {
    265     ASSERT(!shouldFosterParent());
    266     m_head = attachToCurrent(createHTMLElement(token));
    267     m_openElements.pushHTMLHeadElement(m_head);
    268 }
    269 
    270 void HTMLConstructionSite::insertHTMLBodyElement(AtomicHTMLToken& token)
    271 {
    272     ASSERT(!shouldFosterParent());
    273     m_openElements.pushHTMLBodyElement(attachToCurrent(createHTMLElement(token)));
    274 }
    275 
    276 void HTMLConstructionSite::insertHTMLFormElement(AtomicHTMLToken& token, bool isDemoted)
    277 {
    278     RefPtr<Element> element = createHTMLElement(token);
    279     ASSERT(element->hasTagName(formTag));
    280     RefPtr<HTMLFormElement> form = static_pointer_cast<HTMLFormElement>(element.release());
    281     form->setDemoted(isDemoted);
    282     m_openElements.push(attachToCurrent(form.release()));
    283     ASSERT(currentElement()->isHTMLElement());
    284     ASSERT(currentElement()->hasTagName(formTag));
    285     m_form = static_cast<HTMLFormElement*>(currentElement());
    286 }
    287 
    288 void HTMLConstructionSite::insertHTMLElement(AtomicHTMLToken& token)
    289 {
    290     m_openElements.push(attachToCurrent(createHTMLElement(token)));
    291 }
    292 
    293 void HTMLConstructionSite::insertSelfClosingHTMLElement(AtomicHTMLToken& token)
    294 {
    295     ASSERT(token.type() == HTMLToken::StartTag);
    296     RefPtr<Element> element = attachToCurrent(createHTMLElement(token));
    297     // Normally HTMLElementStack is responsible for calling finishParsingChildren,
    298     // but self-closing elements are never in the element stack so the stack
    299     // doesn't get a chance to tell them that we're done parsing their children.
    300     element->finishParsingChildren();
    301     // FIXME: Do we want to acknowledge the token's self-closing flag?
    302     // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#acknowledge-self-closing-flag
    303 }
    304 
    305 void HTMLConstructionSite::insertFormattingElement(AtomicHTMLToken& token)
    306 {
    307     // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#the-stack-of-open-elements
    308     // Possible active formatting elements include:
    309     // a, b, big, code, em, font, i, nobr, s, small, strike, strong, tt, and u.
    310     insertHTMLElement(token);
    311     m_activeFormattingElements.append(currentElement());
    312 }
    313 
    314 void HTMLConstructionSite::insertScriptElement(AtomicHTMLToken& token)
    315 {
    316     RefPtr<HTMLScriptElement> element = HTMLScriptElement::create(scriptTag, currentNode()->document(), true);
    317     if (m_fragmentScriptingPermission == FragmentScriptingAllowed)
    318         element->setAttributeMap(token.takeAtributes(), m_fragmentScriptingPermission);
    319     m_openElements.push(attachToCurrent(element.release()));
    320 }
    321 
    322 void HTMLConstructionSite::insertForeignElement(AtomicHTMLToken& token, const AtomicString& namespaceURI)
    323 {
    324     ASSERT(token.type() == HTMLToken::StartTag);
    325     notImplemented(); // parseError when xmlns or xmlns:xlink are wrong.
    326 
    327     RefPtr<Element> element = attachToCurrent(createElement(token, namespaceURI));
    328     if (!token.selfClosing())
    329         m_openElements.push(element);
    330 }
    331 
    332 void HTMLConstructionSite::insertTextNode(const String& characters)
    333 {
    334     AttachmentSite site;
    335     site.parent = currentNode();
    336     site.nextChild = 0;
    337     if (shouldFosterParent())
    338         findFosterSite(site);
    339 
    340     unsigned currentPosition = 0;
    341 
    342     // FIXME: Splitting text nodes into smaller chunks contradicts HTML5 spec, but is currently necessary
    343     // for performance, see <https://bugs.webkit.org/show_bug.cgi?id=55898>.
    344 
    345     Node* previousChild = site.nextChild ? site.nextChild->previousSibling() : site.parent->lastChild();
    346     if (previousChild && previousChild->isTextNode()) {
    347         // FIXME: We're only supposed to append to this text node if it
    348         // was the last text node inserted by the parser.
    349         CharacterData* textNode = static_cast<CharacterData*>(previousChild);
    350         currentPosition = textNode->parserAppendData(characters.characters(), characters.length(), Text::defaultLengthLimit);
    351     }
    352 
    353     while (currentPosition < characters.length()) {
    354         RefPtr<Text> textNode = Text::createWithLengthLimit(site.parent->document(), characters, currentPosition);
    355         // If we have a whole string of unbreakable characters the above could lead to an infinite loop. Exceeding the length limit is the lesser evil.
    356         if (!textNode->length())
    357             textNode = Text::create(site.parent->document(), characters.substring(currentPosition));
    358 
    359         currentPosition += textNode->length();
    360         ASSERT(currentPosition <= characters.length());
    361         attachAtSite(site, textNode.release());
    362     }
    363 }
    364 
    365 PassRefPtr<Element> HTMLConstructionSite::createElement(AtomicHTMLToken& token, const AtomicString& namespaceURI)
    366 {
    367     QualifiedName tagName(nullAtom, token.name(), namespaceURI);
    368     RefPtr<Element> element = currentNode()->document()->createElement(tagName, true);
    369     element->setAttributeMap(token.takeAtributes(), m_fragmentScriptingPermission);
    370     return element.release();
    371 }
    372 
    373 PassRefPtr<Element> HTMLConstructionSite::createHTMLElement(AtomicHTMLToken& token)
    374 {
    375     QualifiedName tagName(nullAtom, token.name(), xhtmlNamespaceURI);
    376     // FIXME: This can't use HTMLConstructionSite::createElement because we
    377     // have to pass the current form element.  We should rework form association
    378     // to occur after construction to allow better code sharing here.
    379     RefPtr<Element> element = HTMLElementFactory::createHTMLElement(tagName, currentNode()->document(), form(), true);
    380     element->setAttributeMap(token.takeAtributes(), m_fragmentScriptingPermission);
    381     ASSERT(element->isHTMLElement());
    382     return element.release();
    383 }
    384 
    385 PassRefPtr<Element> HTMLConstructionSite::createHTMLElementFromElementRecord(HTMLElementStack::ElementRecord* record)
    386 {
    387     return createHTMLElementFromSavedElement(record->element());
    388 }
    389 
    390 namespace {
    391 
    392 PassRefPtr<NamedNodeMap> cloneAttributes(Element* element)
    393 {
    394     NamedNodeMap* attributes = element->attributes(true);
    395     if (!attributes)
    396         return 0;
    397 
    398     RefPtr<NamedNodeMap> newAttributes = NamedNodeMap::create();
    399     for (size_t i = 0; i < attributes->length(); ++i) {
    400         Attribute* attribute = attributes->attributeItem(i);
    401         RefPtr<Attribute> clone = Attribute::createMapped(attribute->name(), attribute->value());
    402         newAttributes->addAttribute(clone);
    403     }
    404     return newAttributes.release();
    405 }
    406 
    407 }
    408 
    409 PassRefPtr<Element> HTMLConstructionSite::createHTMLElementFromSavedElement(Element* element)
    410 {
    411     // FIXME: This method is wrong.  We should be using the original token.
    412     // Using an Element* causes us to fail examples like this:
    413     // <b id="1"><p><script>document.getElementById("1").id = "2"</script></p>TEXT</b>
    414     // When reconstructTheActiveFormattingElements calls this method to open
    415     // a second <b> tag to wrap TEXT, it will have id "2", even though the HTML5
    416     // spec implies it should be "1".  Minefield matches the HTML5 spec here.
    417 
    418     ASSERT(element->isHTMLElement()); // otherwise localName() might be wrong.
    419     AtomicHTMLToken fakeToken(HTMLToken::StartTag, element->localName(), cloneAttributes(element));
    420     return createHTMLElement(fakeToken);
    421 }
    422 
    423 bool HTMLConstructionSite::indexOfFirstUnopenFormattingElement(unsigned& firstUnopenElementIndex) const
    424 {
    425     if (m_activeFormattingElements.isEmpty())
    426         return false;
    427     unsigned index = m_activeFormattingElements.size();
    428     do {
    429         --index;
    430         const HTMLFormattingElementList::Entry& entry = m_activeFormattingElements.at(index);
    431         if (entry.isMarker() || m_openElements.contains(entry.element())) {
    432             firstUnopenElementIndex = index + 1;
    433             return firstUnopenElementIndex < m_activeFormattingElements.size();
    434         }
    435     } while (index);
    436     firstUnopenElementIndex = index;
    437     return true;
    438 }
    439 
    440 void HTMLConstructionSite::reconstructTheActiveFormattingElements()
    441 {
    442     unsigned firstUnopenElementIndex;
    443     if (!indexOfFirstUnopenFormattingElement(firstUnopenElementIndex))
    444         return;
    445 
    446     unsigned unopenEntryIndex = firstUnopenElementIndex;
    447     ASSERT(unopenEntryIndex < m_activeFormattingElements.size());
    448     for (; unopenEntryIndex < m_activeFormattingElements.size(); ++unopenEntryIndex) {
    449         HTMLFormattingElementList::Entry& unopenedEntry = m_activeFormattingElements.at(unopenEntryIndex);
    450         RefPtr<Element> reconstructed = createHTMLElementFromSavedElement(unopenedEntry.element());
    451         m_openElements.push(attachToCurrent(reconstructed.release()));
    452         unopenedEntry.replaceElement(currentElement());
    453     }
    454 }
    455 
    456 void HTMLConstructionSite::generateImpliedEndTagsWithExclusion(const AtomicString& tagName)
    457 {
    458     while (hasImpliedEndTag(currentNode()) && !currentNode()->hasLocalName(tagName))
    459         m_openElements.pop();
    460 }
    461 
    462 void HTMLConstructionSite::generateImpliedEndTags()
    463 {
    464     while (hasImpliedEndTag(currentNode()))
    465         m_openElements.pop();
    466 }
    467 
    468 void HTMLConstructionSite::findFosterSite(AttachmentSite& site)
    469 {
    470     HTMLElementStack::ElementRecord* lastTableElementRecord = m_openElements.topmost(tableTag.localName());
    471     if (lastTableElementRecord) {
    472         Element* lastTableElement = lastTableElementRecord->element();
    473         if (ContainerNode* parent = lastTableElement->parentNode()) {
    474             site.parent = parent;
    475             site.nextChild = lastTableElement;
    476             return;
    477         }
    478         site.parent = lastTableElementRecord->next()->element();
    479         site.nextChild = 0;
    480         return;
    481     }
    482     // Fragment case
    483     site.parent = m_openElements.rootNode(); // DocumentFragment
    484     site.nextChild = 0;
    485 }
    486 
    487 bool HTMLConstructionSite::shouldFosterParent() const
    488 {
    489     return m_redirectAttachToFosterParent
    490         && currentNode()->isElementNode()
    491         && causesFosterParenting(currentElement()->tagQName());
    492 }
    493 
    494 void HTMLConstructionSite::fosterParent(Node* node)
    495 {
    496     AttachmentSite site;
    497     findFosterSite(site);
    498     attachAtSite(site, node);
    499 }
    500 
    501 }
    502