Home | History | Annotate | Download | only in parser
      1 /*
      2  * Copyright (C) 2010 Google, Inc. All Rights Reserved.
      3  * Copyright (C) 2011 Apple Inc. All rights reserved.
      4  *
      5  * Redistribution and use in source and binary forms, with or without
      6  * modification, are permitted provided that the following conditions
      7  * are met:
      8  * 1. Redistributions of source code must retain the above copyright
      9  *    notice, this list of conditions and the following disclaimer.
     10  * 2. Redistributions in binary form must reproduce the above copyright
     11  *    notice, this list of conditions and the following disclaimer in the
     12  *    documentation and/or other materials provided with the distribution.
     13  *
     14  * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
     15  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     17  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL GOOGLE INC. OR
     18  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     19  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     20  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     21  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
     22  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     24  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     25  */
     26 
     27 #include "config.h"
     28 #include "core/html/parser/HTMLTreeBuilder.h"
     29 
     30 #include "HTMLElementFactory.h"
     31 #include "HTMLNames.h"
     32 #include "core/dom/Comment.h"
     33 #include "core/dom/DocumentFragment.h"
     34 #include "core/dom/DocumentType.h"
     35 #include "core/dom/Element.h"
     36 #include "core/dom/ScriptLoader.h"
     37 #include "core/dom/Text.h"
     38 #include "core/html/HTMLFormElement.h"
     39 #include "core/html/HTMLHtmlElement.h"
     40 #include "core/html/HTMLOptGroupElement.h"
     41 #include "core/html/HTMLScriptElement.h"
     42 #include "core/html/HTMLTemplateElement.h"
     43 #include "core/html/parser/AtomicHTMLToken.h"
     44 #include "core/html/parser/HTMLParserIdioms.h"
     45 #include "core/html/parser/HTMLStackItem.h"
     46 #include "core/html/parser/HTMLToken.h"
     47 #include "core/loader/FrameLoader.h"
     48 #include "core/loader/FrameLoaderClient.h"
     49 #include "core/page/Frame.h"
     50 #include "core/platform/NotImplemented.h"
     51 #include <limits>
     52 
     53 namespace WebCore {
     54 
     55 using namespace HTMLNames;
     56 
     57 static const unsigned maximumHTMLParserDOMTreeDepth = 512;
     58 
     59 static inline void setAttributes(Element* element, AtomicHTMLToken* token, ParserContentPolicy parserContentPolicy)
     60 {
     61     if (!scriptingContentIsAllowed(parserContentPolicy))
     62         element->stripScriptingAttributes(token->attributes());
     63     element->parserSetAttributes(token->attributes());
     64 }
     65 
     66 static bool hasImpliedEndTag(const HTMLStackItem* item)
     67 {
     68     return item->hasTagName(ddTag)
     69         || item->hasTagName(dtTag)
     70         || item->hasTagName(liTag)
     71         || item->hasTagName(optionTag)
     72         || isHTMLOptGroupElement(item->node())
     73         || item->hasTagName(pTag)
     74         || item->hasTagName(rpTag)
     75         || item->hasTagName(rtTag);
     76 }
     77 
     78 static bool shouldUseLengthLimit(const ContainerNode* node)
     79 {
     80     return !node->hasTagName(scriptTag)
     81         && !node->hasTagName(styleTag)
     82         && !node->hasTagName(SVGNames::scriptTag);
     83 }
     84 
     85 static inline bool isAllWhitespace(const String& string)
     86 {
     87     return string.isAllSpecialCharacters<isHTMLSpace>();
     88 }
     89 
     90 // The |lazyAttach| parameter to this function exists for historical reasons.
     91 // There used to be two code paths, one that used lazyAttach and one that
     92 // didn't. We should make the two code paths consistent and either use
     93 // lazyAttach or non-lazyAttach, but we wanted to make that change separately.
     94 static inline void insert(HTMLConstructionSiteTask& task, bool lazyAttach)
     95 {
     96     if (task.parent->hasTagName(templateTag))
     97         task.parent = toHTMLTemplateElement(task.parent.get())->content();
     98 
     99     if (ContainerNode* parent = task.child->parentNode())
    100         parent->parserRemoveChild(task.child.get());
    101 
    102     if (task.nextChild)
    103         task.parent->parserInsertBefore(task.child.get(), task.nextChild.get());
    104     else
    105         task.parent->parserAppendChild(task.child.get());
    106 
    107     // JavaScript run from beforeload (or DOM Mutation or event handlers)
    108     // might have removed the child, in which case we should not attach it.
    109 
    110     if (task.child->parentNode() && task.parent->attached() && !task.child->attached()) {
    111         if (lazyAttach)
    112             task.child->lazyAttach();
    113         else
    114             task.child->attach();
    115     }
    116 }
    117 
    118 static inline void executeInsertTask(HTMLConstructionSiteTask& task)
    119 {
    120     ASSERT(task.operation == HTMLConstructionSiteTask::Insert);
    121 
    122     insert(task, false);
    123 
    124     task.child->beginParsingChildren();
    125 
    126     if (task.selfClosing)
    127         task.child->finishParsingChildren();
    128 }
    129 
    130 static inline void executeReparentTask(HTMLConstructionSiteTask& task)
    131 {
    132     ASSERT(task.operation == HTMLConstructionSiteTask::Reparent);
    133 
    134     if (ContainerNode* parent = task.child->parentNode())
    135         parent->parserRemoveChild(task.child.get());
    136 
    137     task.parent->parserAppendChild(task.child);
    138 
    139     if (task.child->parentElement()->attached() && !task.child->attached())
    140         task.child->lazyAttach();
    141 }
    142 
    143 static inline void executeInsertAlreadyParsedChildTask(HTMLConstructionSiteTask& task)
    144 {
    145     ASSERT(task.operation == HTMLConstructionSiteTask::InsertAlreadyParsedChild);
    146 
    147     insert(task, true);
    148 }
    149 
    150 static inline void executeTakeAllChildrenTask(HTMLConstructionSiteTask& task)
    151 {
    152     ASSERT(task.operation == HTMLConstructionSiteTask::TakeAllChildren);
    153 
    154     task.parent->takeAllChildrenFrom(task.oldParent());
    155     // Notice that we don't need to manually attach the moved children
    156     // because takeAllChildrenFrom does that work for us.
    157 }
    158 
    159 static inline void executeTask(HTMLConstructionSiteTask& task)
    160 {
    161     if (task.operation == HTMLConstructionSiteTask::Insert)
    162         return executeInsertTask(task);
    163 
    164     // All the cases below this point are only used by the adoption agency.
    165 
    166     if (task.operation == HTMLConstructionSiteTask::InsertAlreadyParsedChild)
    167         return executeInsertAlreadyParsedChildTask(task);
    168 
    169     if (task.operation == HTMLConstructionSiteTask::Reparent)
    170         return executeReparentTask(task);
    171 
    172     if (task.operation == HTMLConstructionSiteTask::TakeAllChildren)
    173         return executeTakeAllChildrenTask(task);
    174 
    175     ASSERT_NOT_REACHED();
    176 }
    177 
    178 void HTMLConstructionSite::attachLater(ContainerNode* parent, PassRefPtr<Node> prpChild, bool selfClosing)
    179 {
    180     ASSERT(scriptingContentIsAllowed(m_parserContentPolicy) || !prpChild.get()->isElementNode() || !toScriptLoaderIfPossible(toElement(prpChild.get())));
    181     ASSERT(pluginContentIsAllowed(m_parserContentPolicy) || !prpChild->isPluginElement());
    182 
    183     HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Insert);
    184     task.parent = parent;
    185     task.child = prpChild;
    186     task.selfClosing = selfClosing;
    187 
    188     if (shouldFosterParent()) {
    189         fosterParent(task.child);
    190         return;
    191     }
    192 
    193     // Add as a sibling of the parent if we have reached the maximum depth allowed.
    194     if (m_openElements.stackDepth() > maximumHTMLParserDOMTreeDepth && task.parent->parentNode())
    195         task.parent = task.parent->parentNode();
    196 
    197     ASSERT(task.parent);
    198     m_taskQueue.append(task);
    199 }
    200 
    201 void HTMLConstructionSite::executeQueuedTasks()
    202 {
    203     const size_t size = m_taskQueue.size();
    204     if (!size)
    205         return;
    206 
    207     // Copy the task queue into a local variable in case executeTask
    208     // re-enters the parser.
    209     TaskQueue queue;
    210     queue.swap(m_taskQueue);
    211 
    212     for (size_t i = 0; i < size; ++i)
    213         executeTask(queue[i]);
    214 
    215     // We might be detached now.
    216 }
    217 
    218 HTMLConstructionSite::HTMLConstructionSite(Document* document, ParserContentPolicy parserContentPolicy)
    219     : m_document(document)
    220     , m_attachmentRoot(document)
    221     , m_parserContentPolicy(parserContentPolicy)
    222     , m_isParsingFragment(false)
    223     , m_redirectAttachToFosterParent(false)
    224     , m_inQuirksMode(document->inQuirksMode())
    225 {
    226     ASSERT(m_document->isHTMLDocument() || m_document->isSVGDocument() || m_document->isXHTMLDocument());
    227 }
    228 
    229 HTMLConstructionSite::HTMLConstructionSite(DocumentFragment* fragment, ParserContentPolicy parserContentPolicy)
    230     : m_document(fragment->document())
    231     , m_attachmentRoot(fragment)
    232     , m_parserContentPolicy(parserContentPolicy)
    233     , m_isParsingFragment(true)
    234     , m_redirectAttachToFosterParent(false)
    235     , m_inQuirksMode(fragment->document()->inQuirksMode())
    236 {
    237     ASSERT(m_document->isHTMLDocument() || m_document->isSVGDocument() || m_document->isXHTMLDocument());
    238 }
    239 
    240 HTMLConstructionSite::~HTMLConstructionSite()
    241 {
    242 }
    243 
    244 void HTMLConstructionSite::detach()
    245 {
    246     m_document = 0;
    247     m_attachmentRoot = 0;
    248 }
    249 
    250 void HTMLConstructionSite::setForm(HTMLFormElement* form)
    251 {
    252     // This method should only be needed for HTMLTreeBuilder in the fragment case.
    253     ASSERT(!m_form);
    254     m_form = form;
    255 }
    256 
    257 PassRefPtr<HTMLFormElement> HTMLConstructionSite::takeForm()
    258 {
    259     return m_form.release();
    260 }
    261 
    262 void HTMLConstructionSite::dispatchDocumentElementAvailableIfNeeded()
    263 {
    264     ASSERT(m_document);
    265     if (m_document->frame() && !m_isParsingFragment)
    266         m_document->frame()->loader()->dispatchDocumentElementAvailable();
    267 }
    268 
    269 void HTMLConstructionSite::insertHTMLHtmlStartTagBeforeHTML(AtomicHTMLToken* token)
    270 {
    271     RefPtr<HTMLHtmlElement> element = HTMLHtmlElement::create(m_document);
    272     setAttributes(element.get(), token, m_parserContentPolicy);
    273     attachLater(m_attachmentRoot, element);
    274     m_openElements.pushHTMLHtmlElement(HTMLStackItem::create(element, token));
    275 
    276     executeQueuedTasks();
    277     element->insertedByParser();
    278     dispatchDocumentElementAvailableIfNeeded();
    279 }
    280 
    281 void HTMLConstructionSite::mergeAttributesFromTokenIntoElement(AtomicHTMLToken* token, Element* element)
    282 {
    283     if (token->attributes().isEmpty())
    284         return;
    285 
    286     for (unsigned i = 0; i < token->attributes().size(); ++i) {
    287         const Attribute& tokenAttribute = token->attributes().at(i);
    288         if (!element->elementData() || !element->getAttributeItem(tokenAttribute.name()))
    289             element->setAttribute(tokenAttribute.name(), tokenAttribute.value());
    290     }
    291 }
    292 
    293 void HTMLConstructionSite::insertHTMLHtmlStartTagInBody(AtomicHTMLToken* token)
    294 {
    295     // Fragments do not have a root HTML element, so any additional HTML elements
    296     // encountered during fragment parsing should be ignored.
    297     if (m_isParsingFragment)
    298         return;
    299 
    300     mergeAttributesFromTokenIntoElement(token, m_openElements.htmlElement());
    301 }
    302 
    303 void HTMLConstructionSite::insertHTMLBodyStartTagInBody(AtomicHTMLToken* token)
    304 {
    305     mergeAttributesFromTokenIntoElement(token, m_openElements.bodyElement());
    306 }
    307 
    308 void HTMLConstructionSite::setDefaultCompatibilityMode()
    309 {
    310     if (m_isParsingFragment)
    311         return;
    312     if (m_document->isSrcdocDocument())
    313         return;
    314     setCompatibilityMode(Document::QuirksMode);
    315 }
    316 
    317 void HTMLConstructionSite::setCompatibilityMode(Document::CompatibilityMode mode)
    318 {
    319     m_inQuirksMode = (mode == Document::QuirksMode);
    320     m_document->setCompatibilityMode(mode);
    321 }
    322 
    323 void HTMLConstructionSite::setCompatibilityModeFromDoctype(const String& name, const String& publicId, const String& systemId)
    324 {
    325     // There are three possible compatibility modes:
    326     // Quirks - quirks mode emulates WinIE and NS4. CSS parsing is also relaxed in this mode, e.g., unit types can
    327     // be omitted from numbers.
    328     // Limited Quirks - This mode is identical to no-quirks mode except for its treatment of line-height in the inline box model.
    329     // No Quirks - no quirks apply. Web pages will obey the specifications to the letter.
    330 
    331     // Check for Quirks Mode.
    332     if (name != "html"
    333         || publicId.startsWith("+//Silmaril//dtd html Pro v0r11 19970101//", false)
    334         || publicId.startsWith("-//AdvaSoft Ltd//DTD HTML 3.0 asWedit + extensions//", false)
    335         || publicId.startsWith("-//AS//DTD HTML 3.0 asWedit + extensions//", false)
    336         || publicId.startsWith("-//IETF//DTD HTML 2.0 Level 1//", false)
    337         || publicId.startsWith("-//IETF//DTD HTML 2.0 Level 2//", false)
    338         || publicId.startsWith("-//IETF//DTD HTML 2.0 Strict Level 1//", false)
    339         || publicId.startsWith("-//IETF//DTD HTML 2.0 Strict Level 2//", false)
    340         || publicId.startsWith("-//IETF//DTD HTML 2.0 Strict//", false)
    341         || publicId.startsWith("-//IETF//DTD HTML 2.0//", false)
    342         || publicId.startsWith("-//IETF//DTD HTML 2.1E//", false)
    343         || publicId.startsWith("-//IETF//DTD HTML 3.0//", false)
    344         || publicId.startsWith("-//IETF//DTD HTML 3.2 Final//", false)
    345         || publicId.startsWith("-//IETF//DTD HTML 3.2//", false)
    346         || publicId.startsWith("-//IETF//DTD HTML 3//", false)
    347         || publicId.startsWith("-//IETF//DTD HTML Level 0//", false)
    348         || publicId.startsWith("-//IETF//DTD HTML Level 1//", false)
    349         || publicId.startsWith("-//IETF//DTD HTML Level 2//", false)
    350         || publicId.startsWith("-//IETF//DTD HTML Level 3//", false)
    351         || publicId.startsWith("-//IETF//DTD HTML Strict Level 0//", false)
    352         || publicId.startsWith("-//IETF//DTD HTML Strict Level 1//", false)
    353         || publicId.startsWith("-//IETF//DTD HTML Strict Level 2//", false)
    354         || publicId.startsWith("-//IETF//DTD HTML Strict Level 3//", false)
    355         || publicId.startsWith("-//IETF//DTD HTML Strict//", false)
    356         || publicId.startsWith("-//IETF//DTD HTML//", false)
    357         || publicId.startsWith("-//Metrius//DTD Metrius Presentational//", false)
    358         || publicId.startsWith("-//Microsoft//DTD Internet Explorer 2.0 HTML Strict//", false)
    359         || publicId.startsWith("-//Microsoft//DTD Internet Explorer 2.0 HTML//", false)
    360         || publicId.startsWith("-//Microsoft//DTD Internet Explorer 2.0 Tables//", false)
    361         || publicId.startsWith("-//Microsoft//DTD Internet Explorer 3.0 HTML Strict//", false)
    362         || publicId.startsWith("-//Microsoft//DTD Internet Explorer 3.0 HTML//", false)
    363         || publicId.startsWith("-//Microsoft//DTD Internet Explorer 3.0 Tables//", false)
    364         || publicId.startsWith("-//Netscape Comm. Corp.//DTD HTML//", false)
    365         || publicId.startsWith("-//Netscape Comm. Corp.//DTD Strict HTML//", false)
    366         || publicId.startsWith("-//O'Reilly and Associates//DTD HTML 2.0//", false)
    367         || publicId.startsWith("-//O'Reilly and Associates//DTD HTML Extended 1.0//", false)
    368         || publicId.startsWith("-//O'Reilly and Associates//DTD HTML Extended Relaxed 1.0//", false)
    369         || publicId.startsWith("-//SoftQuad Software//DTD HoTMetaL PRO 6.0::19990601::extensions to HTML 4.0//", false)
    370         || publicId.startsWith("-//SoftQuad//DTD HoTMetaL PRO 4.0::19971010::extensions to HTML 4.0//", false)
    371         || publicId.startsWith("-//Spyglass//DTD HTML 2.0 Extended//", false)
    372         || publicId.startsWith("-//SQ//DTD HTML 2.0 HoTMetaL + extensions//", false)
    373         || publicId.startsWith("-//Sun Microsystems Corp.//DTD HotJava HTML//", false)
    374         || publicId.startsWith("-//Sun Microsystems Corp.//DTD HotJava Strict HTML//", false)
    375         || publicId.startsWith("-//W3C//DTD HTML 3 1995-03-24//", false)
    376         || publicId.startsWith("-//W3C//DTD HTML 3.2 Draft//", false)
    377         || publicId.startsWith("-//W3C//DTD HTML 3.2 Final//", false)
    378         || publicId.startsWith("-//W3C//DTD HTML 3.2//", false)
    379         || publicId.startsWith("-//W3C//DTD HTML 3.2S Draft//", false)
    380         || publicId.startsWith("-//W3C//DTD HTML 4.0 Frameset//", false)
    381         || publicId.startsWith("-//W3C//DTD HTML 4.0 Transitional//", false)
    382         || publicId.startsWith("-//W3C//DTD HTML Experimental 19960712//", false)
    383         || publicId.startsWith("-//W3C//DTD HTML Experimental 970421//", false)
    384         || publicId.startsWith("-//W3C//DTD W3 HTML//", false)
    385         || publicId.startsWith("-//W3O//DTD W3 HTML 3.0//", false)
    386         || equalIgnoringCase(publicId, "-//W3O//DTD W3 HTML Strict 3.0//EN//")
    387         || publicId.startsWith("-//WebTechs//DTD Mozilla HTML 2.0//", false)
    388         || publicId.startsWith("-//WebTechs//DTD Mozilla HTML//", false)
    389         || equalIgnoringCase(publicId, "-/W3C/DTD HTML 4.0 Transitional/EN")
    390         || equalIgnoringCase(publicId, "HTML")
    391         || equalIgnoringCase(systemId, "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd")
    392         || (systemId.isEmpty() && publicId.startsWith("-//W3C//DTD HTML 4.01 Frameset//", false))
    393         || (systemId.isEmpty() && publicId.startsWith("-//W3C//DTD HTML 4.01 Transitional//", false))) {
    394         setCompatibilityMode(Document::QuirksMode);
    395         return;
    396     }
    397 
    398     // Check for Limited Quirks Mode.
    399     if (publicId.startsWith("-//W3C//DTD XHTML 1.0 Frameset//", false)
    400         || publicId.startsWith("-//W3C//DTD XHTML 1.0 Transitional//", false)
    401         || (!systemId.isEmpty() && publicId.startsWith("-//W3C//DTD HTML 4.01 Frameset//", false))
    402         || (!systemId.isEmpty() && publicId.startsWith("-//W3C//DTD HTML 4.01 Transitional//", false))) {
    403         setCompatibilityMode(Document::LimitedQuirksMode);
    404         return;
    405     }
    406 
    407     // Otherwise we are No Quirks Mode.
    408     setCompatibilityMode(Document::NoQuirksMode);
    409 }
    410 
    411 void HTMLConstructionSite::finishedParsing()
    412 {
    413     m_document->finishedParsing();
    414 }
    415 
    416 void HTMLConstructionSite::insertDoctype(AtomicHTMLToken* token)
    417 {
    418     ASSERT(token->type() == HTMLToken::DOCTYPE);
    419 
    420     const String& publicId = StringImpl::create8BitIfPossible(token->publicIdentifier());
    421     const String& systemId = StringImpl::create8BitIfPossible(token->systemIdentifier());
    422     RefPtr<DocumentType> doctype = DocumentType::create(m_document, token->name(), publicId, systemId);
    423     attachLater(m_attachmentRoot, doctype.release());
    424 
    425     // DOCTYPE nodes are only processed when parsing fragments w/o contextElements, which
    426     // never occurs.  However, if we ever chose to support such, this code is subtly wrong,
    427     // because context-less fragments can determine their own quirks mode, and thus change
    428     // parsing rules (like <p> inside <table>).  For now we ASSERT that we never hit this code
    429     // in a fragment, as changing the owning document's compatibility mode would be wrong.
    430     ASSERT(!m_isParsingFragment);
    431     if (m_isParsingFragment)
    432         return;
    433 
    434     if (token->forceQuirks())
    435         setCompatibilityMode(Document::QuirksMode);
    436     else {
    437         setCompatibilityModeFromDoctype(token->name(), publicId, systemId);
    438     }
    439 }
    440 
    441 void HTMLConstructionSite::insertComment(AtomicHTMLToken* token)
    442 {
    443     ASSERT(token->type() == HTMLToken::Comment);
    444     attachLater(currentNode(), Comment::create(ownerDocumentForCurrentNode(), token->comment()));
    445 }
    446 
    447 void HTMLConstructionSite::insertCommentOnDocument(AtomicHTMLToken* token)
    448 {
    449     ASSERT(token->type() == HTMLToken::Comment);
    450     attachLater(m_attachmentRoot, Comment::create(m_document, token->comment()));
    451 }
    452 
    453 void HTMLConstructionSite::insertCommentOnHTMLHtmlElement(AtomicHTMLToken* token)
    454 {
    455     ASSERT(token->type() == HTMLToken::Comment);
    456     ContainerNode* parent = m_openElements.rootNode();
    457     attachLater(parent, Comment::create(parent->document(), token->comment()));
    458 }
    459 
    460 void HTMLConstructionSite::insertHTMLHeadElement(AtomicHTMLToken* token)
    461 {
    462     ASSERT(!shouldFosterParent());
    463     m_head = HTMLStackItem::create(createHTMLElement(token), token);
    464     attachLater(currentNode(), m_head->element());
    465     m_openElements.pushHTMLHeadElement(m_head);
    466 }
    467 
    468 void HTMLConstructionSite::insertHTMLBodyElement(AtomicHTMLToken* token)
    469 {
    470     ASSERT(!shouldFosterParent());
    471     RefPtr<Element> body = createHTMLElement(token);
    472     attachLater(currentNode(), body);
    473     m_openElements.pushHTMLBodyElement(HTMLStackItem::create(body.release(), token));
    474     if (Frame* frame = m_document->frame())
    475         frame->loader()->client()->dispatchWillInsertBody();
    476 }
    477 
    478 void HTMLConstructionSite::insertHTMLFormElement(AtomicHTMLToken* token, bool isDemoted)
    479 {
    480     RefPtr<Element> element = createHTMLElement(token);
    481     ASSERT(element->hasTagName(formTag));
    482     m_form = static_pointer_cast<HTMLFormElement>(element.release());
    483     m_form->setDemoted(isDemoted);
    484     attachLater(currentNode(), m_form);
    485     m_openElements.push(HTMLStackItem::create(m_form, token));
    486 }
    487 
    488 void HTMLConstructionSite::insertHTMLElement(AtomicHTMLToken* token)
    489 {
    490     RefPtr<Element> element = createHTMLElement(token);
    491     attachLater(currentNode(), element);
    492     m_openElements.push(HTMLStackItem::create(element.release(), token));
    493 }
    494 
    495 void HTMLConstructionSite::insertSelfClosingHTMLElement(AtomicHTMLToken* token)
    496 {
    497     ASSERT(token->type() == HTMLToken::StartTag);
    498     // Normally HTMLElementStack is responsible for calling finishParsingChildren,
    499     // but self-closing elements are never in the element stack so the stack
    500     // doesn't get a chance to tell them that we're done parsing their children.
    501     attachLater(currentNode(), createHTMLElement(token), true);
    502     // FIXME: Do we want to acknowledge the token's self-closing flag?
    503     // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#acknowledge-self-closing-flag
    504 }
    505 
    506 void HTMLConstructionSite::insertFormattingElement(AtomicHTMLToken* token)
    507 {
    508     // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#the-stack-of-open-elements
    509     // Possible active formatting elements include:
    510     // a, b, big, code, em, font, i, nobr, s, small, strike, strong, tt, and u.
    511     insertHTMLElement(token);
    512     m_activeFormattingElements.append(currentElementRecord()->stackItem());
    513 }
    514 
    515 void HTMLConstructionSite::insertScriptElement(AtomicHTMLToken* token)
    516 {
    517     // http://www.whatwg.org/specs/web-apps/current-work/multipage/scripting-1.html#already-started
    518     // http://html5.org/specs/dom-parsing.html#dom-range-createcontextualfragment
    519     // For createContextualFragment, the specifications say to mark it parser-inserted and already-started and later unmark them.
    520     // However, we short circuit that logic to avoid the subtree traversal to find script elements since scripts can never see
    521     // those flags or effects thereof.
    522     const bool parserInserted = m_parserContentPolicy != AllowScriptingContentAndDoNotMarkAlreadyStarted;
    523     const bool alreadyStarted = m_isParsingFragment && parserInserted;
    524     RefPtr<HTMLScriptElement> element = HTMLScriptElement::create(scriptTag, ownerDocumentForCurrentNode(), parserInserted, alreadyStarted);
    525     setAttributes(element.get(), token, m_parserContentPolicy);
    526     if (scriptingContentIsAllowed(m_parserContentPolicy))
    527         attachLater(currentNode(), element);
    528     m_openElements.push(HTMLStackItem::create(element.release(), token));
    529 }
    530 
    531 void HTMLConstructionSite::insertForeignElement(AtomicHTMLToken* token, const AtomicString& namespaceURI)
    532 {
    533     ASSERT(token->type() == HTMLToken::StartTag);
    534     notImplemented(); // parseError when xmlns or xmlns:xlink are wrong.
    535 
    536     RefPtr<Element> element = createElement(token, namespaceURI);
    537     if (scriptingContentIsAllowed(m_parserContentPolicy) || !toScriptLoaderIfPossible(element.get()))
    538         attachLater(currentNode(), element, token->selfClosing());
    539     if (!token->selfClosing())
    540         m_openElements.push(HTMLStackItem::create(element.release(), token, namespaceURI));
    541 }
    542 
    543 void HTMLConstructionSite::insertTextNode(const String& characters, WhitespaceMode whitespaceMode)
    544 {
    545     HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Insert);
    546     task.parent = currentNode();
    547 
    548     if (shouldFosterParent())
    549         findFosterSite(task);
    550 
    551     if (task.parent->hasTagName(templateTag))
    552         task.parent = toHTMLTemplateElement(task.parent.get())->content();
    553 
    554     // Strings composed entirely of whitespace are likely to be repeated.
    555     // Turn them into AtomicString so we share a single string for each.
    556     bool shouldUseAtomicString = whitespaceMode == AllWhitespace
    557         || (whitespaceMode == WhitespaceUnknown && isAllWhitespace(characters));
    558 
    559     unsigned currentPosition = 0;
    560     unsigned lengthLimit = shouldUseLengthLimit(task.parent.get()) ? Text::defaultLengthLimit : std::numeric_limits<unsigned>::max();
    561 
    562     // FIXME: Splitting text nodes into smaller chunks contradicts HTML5 spec, but is currently necessary
    563     // for performance, see <https://bugs.webkit.org/show_bug.cgi?id=55898>.
    564 
    565     Node* previousChild = task.nextChild ? task.nextChild->previousSibling() : task.parent->lastChild();
    566     if (previousChild && previousChild->isTextNode()) {
    567         // FIXME: We're only supposed to append to this text node if it
    568         // was the last text node inserted by the parser.
    569         CharacterData* textNode = static_cast<CharacterData*>(previousChild);
    570         currentPosition = textNode->parserAppendData(characters, 0, lengthLimit);
    571     }
    572 
    573     while (currentPosition < characters.length()) {
    574         RefPtr<Text> textNode = Text::createWithLengthLimit(task.parent->document(), shouldUseAtomicString ? AtomicString(characters).string() : characters, currentPosition, lengthLimit);
    575         // If we have a whole string of unbreakable characters the above could lead to an infinite loop. Exceeding the length limit is the lesser evil.
    576         if (!textNode->length()) {
    577             String substring = characters.substring(currentPosition);
    578             textNode = Text::create(task.parent->document(), shouldUseAtomicString ? AtomicString(substring).string() : substring);
    579         }
    580 
    581         currentPosition += textNode->length();
    582         ASSERT(currentPosition <= characters.length());
    583         task.child = textNode.release();
    584 
    585         executeTask(task);
    586     }
    587 }
    588 
    589 void HTMLConstructionSite::reparent(HTMLElementStack::ElementRecord* newParent, HTMLElementStack::ElementRecord* child)
    590 {
    591     HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Reparent);
    592     task.parent = newParent->node();
    593     task.child = child->node();
    594     m_taskQueue.append(task);
    595 }
    596 
    597 void HTMLConstructionSite::reparent(HTMLElementStack::ElementRecord* newParent, HTMLStackItem* child)
    598 {
    599     HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Reparent);
    600     task.parent = newParent->node();
    601     task.child = child->node();
    602     m_taskQueue.append(task);
    603 }
    604 
    605 void HTMLConstructionSite::insertAlreadyParsedChild(HTMLStackItem* newParent, HTMLElementStack::ElementRecord* child)
    606 {
    607     if (newParent->causesFosterParenting()) {
    608         fosterParent(child->node());
    609         return;
    610     }
    611 
    612     HTMLConstructionSiteTask task(HTMLConstructionSiteTask::InsertAlreadyParsedChild);
    613     task.parent = newParent->node();
    614     task.child = child->node();
    615     m_taskQueue.append(task);
    616 }
    617 
    618 void HTMLConstructionSite::takeAllChildren(HTMLStackItem* newParent, HTMLElementStack::ElementRecord* oldParent)
    619 {
    620     HTMLConstructionSiteTask task(HTMLConstructionSiteTask::TakeAllChildren);
    621     task.parent = newParent->node();
    622     task.child = oldParent->node();
    623     m_taskQueue.append(task);
    624 }
    625 
    626 PassRefPtr<Element> HTMLConstructionSite::createElement(AtomicHTMLToken* token, const AtomicString& namespaceURI)
    627 {
    628     QualifiedName tagName(nullAtom, token->name(), namespaceURI);
    629     RefPtr<Element> element = ownerDocumentForCurrentNode()->createElement(tagName, true);
    630     setAttributes(element.get(), token, m_parserContentPolicy);
    631     return element.release();
    632 }
    633 
    634 inline Document* HTMLConstructionSite::ownerDocumentForCurrentNode()
    635 {
    636     if (currentNode()->hasTagName(templateTag))
    637         return toHTMLTemplateElement(currentElement())->content()->document();
    638     return currentNode()->document();
    639 }
    640 
    641 PassRefPtr<Element> HTMLConstructionSite::createHTMLElement(AtomicHTMLToken* token)
    642 {
    643     QualifiedName tagName(nullAtom, token->name(), xhtmlNamespaceURI);
    644     Document* document = ownerDocumentForCurrentNode();
    645     // Only associate the element with the current form if we're creating the new element
    646     // in a document with a browsing context (rather than in <template> contents).
    647     HTMLFormElement* form = document->frame() ? m_form.get() : 0;
    648     // FIXME: This can't use HTMLConstructionSite::createElement because we
    649     // have to pass the current form element.  We should rework form association
    650     // to occur after construction to allow better code sharing here.
    651     RefPtr<Element> element = HTMLElementFactory::createHTMLElement(tagName, document, form, true);
    652     setAttributes(element.get(), token, m_parserContentPolicy);
    653     ASSERT(element->isHTMLElement());
    654     return element.release();
    655 }
    656 
    657 PassRefPtr<HTMLStackItem> HTMLConstructionSite::createElementFromSavedToken(HTMLStackItem* item)
    658 {
    659     RefPtr<Element> element;
    660     // NOTE: Moving from item -> token -> item copies the Attribute vector twice!
    661     AtomicHTMLToken fakeToken(HTMLToken::StartTag, item->localName(), item->attributes());
    662     if (item->namespaceURI() == HTMLNames::xhtmlNamespaceURI)
    663         element = createHTMLElement(&fakeToken);
    664     else
    665         element = createElement(&fakeToken, item->namespaceURI());
    666     return HTMLStackItem::create(element.release(), &fakeToken, item->namespaceURI());
    667 }
    668 
    669 bool HTMLConstructionSite::indexOfFirstUnopenFormattingElement(unsigned& firstUnopenElementIndex) const
    670 {
    671     if (m_activeFormattingElements.isEmpty())
    672         return false;
    673     unsigned index = m_activeFormattingElements.size();
    674     do {
    675         --index;
    676         const HTMLFormattingElementList::Entry& entry = m_activeFormattingElements.at(index);
    677         if (entry.isMarker() || m_openElements.contains(entry.element())) {
    678             firstUnopenElementIndex = index + 1;
    679             return firstUnopenElementIndex < m_activeFormattingElements.size();
    680         }
    681     } while (index);
    682     firstUnopenElementIndex = index;
    683     return true;
    684 }
    685 
    686 void HTMLConstructionSite::reconstructTheActiveFormattingElements()
    687 {
    688     unsigned firstUnopenElementIndex;
    689     if (!indexOfFirstUnopenFormattingElement(firstUnopenElementIndex))
    690         return;
    691 
    692     unsigned unopenEntryIndex = firstUnopenElementIndex;
    693     ASSERT(unopenEntryIndex < m_activeFormattingElements.size());
    694     for (; unopenEntryIndex < m_activeFormattingElements.size(); ++unopenEntryIndex) {
    695         HTMLFormattingElementList::Entry& unopenedEntry = m_activeFormattingElements.at(unopenEntryIndex);
    696         RefPtr<HTMLStackItem> reconstructed = createElementFromSavedToken(unopenedEntry.stackItem().get());
    697         attachLater(currentNode(), reconstructed->node());
    698         m_openElements.push(reconstructed);
    699         unopenedEntry.replaceElement(reconstructed.release());
    700     }
    701 }
    702 
    703 void HTMLConstructionSite::generateImpliedEndTagsWithExclusion(const AtomicString& tagName)
    704 {
    705     while (hasImpliedEndTag(currentStackItem()) && !currentStackItem()->matchesHTMLTag(tagName))
    706         m_openElements.pop();
    707 }
    708 
    709 void HTMLConstructionSite::generateImpliedEndTags()
    710 {
    711     while (hasImpliedEndTag(currentStackItem()))
    712         m_openElements.pop();
    713 }
    714 
    715 bool HTMLConstructionSite::inQuirksMode()
    716 {
    717     return m_inQuirksMode;
    718 }
    719 
    720 void HTMLConstructionSite::findFosterSite(HTMLConstructionSiteTask& task)
    721 {
    722     // When a node is to be foster parented, the last template element with no table element is below it in the stack of open elements is the foster parent element (NOT the template's parent!)
    723     HTMLElementStack::ElementRecord* lastTemplateElement = m_openElements.topmost(templateTag.localName());
    724     if (lastTemplateElement && !m_openElements.inTableScope(tableTag)) {
    725         task.parent = lastTemplateElement->element();
    726         return;
    727     }
    728 
    729     HTMLElementStack::ElementRecord* lastTableElementRecord = m_openElements.topmost(tableTag.localName());
    730     if (lastTableElementRecord) {
    731         Element* lastTableElement = lastTableElementRecord->element();
    732         ContainerNode* parent;
    733         if (lastTableElementRecord->next()->stackItem()->hasTagName(templateTag))
    734             parent = lastTableElementRecord->next()->element();
    735         else
    736             parent = lastTableElement->parentNode();
    737 
    738         // When parsing HTML fragments, we skip step 4.2 ("Let root be a new html element with no attributes") for efficiency,
    739         // and instead use the DocumentFragment as a root node. So we must treat the root node (DocumentFragment) as if it is a html element here.
    740         if (parent && (parent->isElementNode() || (m_isParsingFragment && parent == m_openElements.rootNode()))) {
    741             task.parent = parent;
    742             task.nextChild = lastTableElement;
    743             return;
    744         }
    745         task.parent = lastTableElementRecord->next()->element();
    746         return;
    747     }
    748     // Fragment case
    749     task.parent = m_openElements.rootNode(); // DocumentFragment
    750 }
    751 
    752 bool HTMLConstructionSite::shouldFosterParent() const
    753 {
    754     return m_redirectAttachToFosterParent
    755         && currentStackItem()->isElementNode()
    756         && currentStackItem()->causesFosterParenting();
    757 }
    758 
    759 void HTMLConstructionSite::fosterParent(PassRefPtr<Node> node)
    760 {
    761     HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Insert);
    762     findFosterSite(task);
    763     task.child = node;
    764     ASSERT(task.parent);
    765 
    766     m_taskQueue.append(task);
    767 }
    768 
    769 }
    770