Home | History | Annotate | Download | only in parser
      1 /*
      2  * Copyright (C) 2010 Google, Inc. All Rights Reserved.
      3  * Copyright (C) 2011 Apple Inc. All rights reserved.
      4  *
      5  * Redistribution and use in source and binary forms, with or without
      6  * modification, are permitted provided that the following conditions
      7  * are met:
      8  * 1. Redistributions of source code must retain the above copyright
      9  *    notice, this list of conditions and the following disclaimer.
     10  * 2. Redistributions in binary form must reproduce the above copyright
     11  *    notice, this list of conditions and the following disclaimer in the
     12  *    documentation and/or other materials provided with the distribution.
     13  *
     14  * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
     15  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     17  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL GOOGLE INC. OR
     18  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     19  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     20  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     21  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
     22  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     24  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     25  */
     26 
     27 #include "config.h"
     28 #include "core/html/parser/HTMLTreeBuilder.h"
     29 
     30 #include "HTMLElementFactory.h"
     31 #include "HTMLNames.h"
     32 #include "core/dom/Comment.h"
     33 #include "core/dom/DocumentFragment.h"
     34 #include "core/dom/DocumentType.h"
     35 #include "core/dom/Element.h"
     36 #include "core/dom/ScriptLoader.h"
     37 #include "core/dom/Text.h"
     38 #include "core/html/HTMLFormElement.h"
     39 #include "core/html/HTMLHtmlElement.h"
     40 #include "core/html/HTMLOptGroupElement.h"
     41 #include "core/html/HTMLScriptElement.h"
     42 #include "core/html/HTMLTemplateElement.h"
     43 #include "core/html/parser/AtomicHTMLToken.h"
     44 #include "core/html/parser/HTMLParserIdioms.h"
     45 #include "core/html/parser/HTMLStackItem.h"
     46 #include "core/html/parser/HTMLToken.h"
     47 #include "core/loader/FrameLoader.h"
     48 #include "core/loader/FrameLoaderClient.h"
     49 #include "core/frame/Frame.h"
     50 #include "platform/NotImplemented.h"
     51 #include "platform/text/TextBreakIterator.h"
     52 #include <limits>
     53 
     54 namespace WebCore {
     55 
     56 using namespace HTMLNames;
     57 
     58 static const unsigned maximumHTMLParserDOMTreeDepth = 512;
     59 
     60 static inline void setAttributes(Element* element, AtomicHTMLToken* token, ParserContentPolicy parserContentPolicy)
     61 {
     62     if (!scriptingContentIsAllowed(parserContentPolicy))
     63         element->stripScriptingAttributes(token->attributes());
     64     element->parserSetAttributes(token->attributes());
     65 }
     66 
     67 static bool hasImpliedEndTag(const HTMLStackItem* item)
     68 {
     69     return item->hasTagName(ddTag)
     70         || item->hasTagName(dtTag)
     71         || item->hasTagName(liTag)
     72         || item->hasTagName(optionTag)
     73         || isHTMLOptGroupElement(item->node())
     74         || item->hasTagName(pTag)
     75         || item->hasTagName(rpTag)
     76         || item->hasTagName(rtTag);
     77 }
     78 
     79 static bool shouldUseLengthLimit(const ContainerNode* node)
     80 {
     81     return !node->hasTagName(scriptTag)
     82         && !node->hasTagName(styleTag)
     83         && !node->hasTagName(SVGNames::scriptTag);
     84 }
     85 
     86 static unsigned textLengthLimitForContainer(const ContainerNode* node)
     87 {
     88     return shouldUseLengthLimit(node) ? Text::defaultLengthLimit : std::numeric_limits<unsigned>::max();
     89 }
     90 
     91 static inline bool isAllWhitespace(const String& string)
     92 {
     93     return string.isAllSpecialCharacters<isHTMLSpace<UChar> >();
     94 }
     95 
     96 static inline void insert(HTMLConstructionSiteTask& task)
     97 {
     98     if (task.parent->hasTagName(templateTag))
     99         task.parent = toHTMLTemplateElement(task.parent.get())->content();
    100 
    101     if (ContainerNode* parent = task.child->parentNode())
    102         parent->parserRemoveChild(*task.child);
    103 
    104     if (task.nextChild)
    105         task.parent->parserInsertBefore(task.child.get(), *task.nextChild);
    106     else
    107         task.parent->parserAppendChild(task.child.get());
    108 }
    109 
    110 static inline void executeInsertTask(HTMLConstructionSiteTask& task)
    111 {
    112     ASSERT(task.operation == HTMLConstructionSiteTask::Insert);
    113 
    114     insert(task);
    115 
    116     task.child->beginParsingChildren();
    117 
    118     if (task.selfClosing)
    119         task.child->finishParsingChildren();
    120 }
    121 
    122 static inline void executeInsertTextTask(HTMLConstructionSiteTask& task)
    123 {
    124     ASSERT(task.operation == HTMLConstructionSiteTask::InsertText);
    125     ASSERT(task.child->isTextNode());
    126 
    127     // Merge text nodes into previous ones if possible:
    128     // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#insert-a-character
    129     Text* newText = toText(task.child.get());
    130     Node* previousChild = task.nextChild ? task.nextChild->previousSibling() : task.parent->lastChild();
    131     if (previousChild && previousChild->isTextNode()) {
    132         Text* previousText = toText(previousChild);
    133         unsigned lengthLimit = textLengthLimitForContainer(task.parent.get());
    134         if (previousText->length() + newText->length() < lengthLimit) {
    135             previousText->parserAppendData(newText->data());
    136             return;
    137         }
    138     }
    139 
    140     insert(task);
    141 }
    142 
    143 static inline void executeReparentTask(HTMLConstructionSiteTask& task)
    144 {
    145     ASSERT(task.operation == HTMLConstructionSiteTask::Reparent);
    146 
    147     if (ContainerNode* parent = task.child->parentNode())
    148         parent->parserRemoveChild(*task.child);
    149 
    150     task.parent->parserAppendChild(task.child);
    151 }
    152 
    153 static inline void executeInsertAlreadyParsedChildTask(HTMLConstructionSiteTask& task)
    154 {
    155     ASSERT(task.operation == HTMLConstructionSiteTask::InsertAlreadyParsedChild);
    156 
    157     insert(task);
    158 }
    159 
    160 static inline void executeTakeAllChildrenTask(HTMLConstructionSiteTask& task)
    161 {
    162     ASSERT(task.operation == HTMLConstructionSiteTask::TakeAllChildren);
    163 
    164     task.parent->parserTakeAllChildrenFrom(*task.oldParent());
    165 }
    166 
    167 void HTMLConstructionSite::executeTask(HTMLConstructionSiteTask& task)
    168 {
    169     ASSERT(m_taskQueue.isEmpty());
    170     if (task.operation == HTMLConstructionSiteTask::Insert)
    171         return executeInsertTask(task);
    172 
    173     if (task.operation == HTMLConstructionSiteTask::InsertText)
    174         return executeInsertTextTask(task);
    175 
    176     // All the cases below this point are only used by the adoption agency.
    177 
    178     if (task.operation == HTMLConstructionSiteTask::InsertAlreadyParsedChild)
    179         return executeInsertAlreadyParsedChildTask(task);
    180 
    181     if (task.operation == HTMLConstructionSiteTask::Reparent)
    182         return executeReparentTask(task);
    183 
    184     if (task.operation == HTMLConstructionSiteTask::TakeAllChildren)
    185         return executeTakeAllChildrenTask(task);
    186 
    187     ASSERT_NOT_REACHED();
    188 }
    189 
    190 // This is only needed for TextDocuments where we might have text nodes
    191 // approaching the default length limit (~64k) and we don't want to
    192 // break a text node in the middle of a combining character.
    193 static unsigned findBreakIndexBetween(const StringBuilder& string, unsigned currentPosition, unsigned proposedBreakIndex)
    194 {
    195     ASSERT(currentPosition < proposedBreakIndex);
    196     ASSERT(proposedBreakIndex <= string.length());
    197     // The end of the string is always a valid break.
    198     if (proposedBreakIndex == string.length())
    199         return proposedBreakIndex;
    200 
    201     // Latin-1 does not have breakable boundaries. If we ever moved to a differnet 8-bit encoding this could be wrong.
    202     if (string.is8Bit())
    203         return proposedBreakIndex;
    204 
    205     const UChar* breakSearchCharacters = string.characters16() + currentPosition;
    206     // We need at least two characters look-ahead to account for UTF-16 surrogates, but can't search off the end of the buffer!
    207     unsigned breakSearchLength = std::min(proposedBreakIndex - currentPosition + 2, string.length() - currentPosition);
    208     NonSharedCharacterBreakIterator it(breakSearchCharacters, breakSearchLength);
    209 
    210     if (it.isBreak(proposedBreakIndex - currentPosition))
    211         return proposedBreakIndex;
    212 
    213     int adjustedBreakIndexInSubstring = it.preceding(proposedBreakIndex - currentPosition);
    214     if (adjustedBreakIndexInSubstring > 0)
    215         return currentPosition + adjustedBreakIndexInSubstring;
    216     // We failed to find a breakable point, let the caller figure out what to do.
    217     return 0;
    218 }
    219 
    220 static String atomizeIfAllWhitespace(const String& string, WhitespaceMode whitespaceMode)
    221 {
    222     // Strings composed entirely of whitespace are likely to be repeated.
    223     // Turn them into AtomicString so we share a single string for each.
    224     if (whitespaceMode == AllWhitespace || (whitespaceMode == WhitespaceUnknown && isAllWhitespace(string)))
    225         return AtomicString(string).string();
    226     return string;
    227 }
    228 
    229 void HTMLConstructionSite::flushPendingText()
    230 {
    231     if (m_pendingText.isEmpty())
    232         return;
    233 
    234     PendingText pendingText;
    235     // Hold onto the current pending text on the stack so that queueTask doesn't recurse infinitely.
    236     m_pendingText.swap(pendingText);
    237     ASSERT(m_pendingText.isEmpty());
    238 
    239     // Splitting text nodes into smaller chunks contradicts HTML5 spec, but is necessary
    240     // for performance, see: https://bugs.webkit.org/show_bug.cgi?id=55898
    241     unsigned lengthLimit = textLengthLimitForContainer(pendingText.parent.get());
    242 
    243     unsigned currentPosition = 0;
    244     const StringBuilder& string = pendingText.stringBuilder;
    245     while (currentPosition < string.length()) {
    246         unsigned proposedBreakIndex = std::min(currentPosition + lengthLimit, string.length());
    247         unsigned breakIndex = findBreakIndexBetween(string, currentPosition, proposedBreakIndex);
    248         ASSERT(breakIndex <= string.length());
    249         String substring = string.substring(currentPosition, breakIndex - currentPosition);
    250         substring = atomizeIfAllWhitespace(substring, pendingText.whitespaceMode);
    251 
    252         HTMLConstructionSiteTask task(HTMLConstructionSiteTask::InsertText);
    253         task.parent = pendingText.parent;
    254         task.nextChild = pendingText.nextChild;
    255         task.child = Text::create(task.parent->document(), substring);
    256         queueTask(task);
    257 
    258         ASSERT(breakIndex > currentPosition);
    259         ASSERT(breakIndex - currentPosition == substring.length());
    260         ASSERT(toText(task.child.get())->length() == substring.length());
    261         currentPosition = breakIndex;
    262     }
    263 }
    264 
    265 void HTMLConstructionSite::queueTask(const HTMLConstructionSiteTask& task)
    266 {
    267     flushPendingText();
    268     ASSERT(m_pendingText.isEmpty());
    269     m_taskQueue.append(task);
    270 }
    271 
    272 void HTMLConstructionSite::attachLater(ContainerNode* parent, PassRefPtr<Node> prpChild, bool selfClosing)
    273 {
    274     ASSERT(scriptingContentIsAllowed(m_parserContentPolicy) || !prpChild.get()->isElementNode() || !toScriptLoaderIfPossible(toElement(prpChild.get())));
    275     ASSERT(pluginContentIsAllowed(m_parserContentPolicy) || !prpChild->isPluginElement());
    276 
    277     HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Insert);
    278     task.parent = parent;
    279     task.child = prpChild;
    280     task.selfClosing = selfClosing;
    281 
    282     if (shouldFosterParent()) {
    283         fosterParent(task.child);
    284         return;
    285     }
    286 
    287     // Add as a sibling of the parent if we have reached the maximum depth allowed.
    288     if (m_openElements.stackDepth() > maximumHTMLParserDOMTreeDepth && task.parent->parentNode())
    289         task.parent = task.parent->parentNode();
    290 
    291     ASSERT(task.parent);
    292     queueTask(task);
    293 }
    294 
    295 void HTMLConstructionSite::executeQueuedTasks()
    296 {
    297     // This has no affect on pendingText, and we may have pendingText
    298     // remaining after executing all other queued tasks.
    299     const size_t size = m_taskQueue.size();
    300     if (!size)
    301         return;
    302 
    303     // Copy the task queue into a local variable in case executeTask
    304     // re-enters the parser.
    305     TaskQueue queue;
    306     queue.swap(m_taskQueue);
    307 
    308     for (size_t i = 0; i < size; ++i)
    309         executeTask(queue[i]);
    310 
    311     // We might be detached now.
    312 }
    313 
    314 HTMLConstructionSite::HTMLConstructionSite(Document* document, ParserContentPolicy parserContentPolicy)
    315     : m_document(document)
    316     , m_attachmentRoot(document)
    317     , m_parserContentPolicy(parserContentPolicy)
    318     , m_isParsingFragment(false)
    319     , m_redirectAttachToFosterParent(false)
    320     , m_inQuirksMode(document->inQuirksMode())
    321 {
    322     ASSERT(m_document->isHTMLDocument() || m_document->isXHTMLDocument());
    323 }
    324 
    325 HTMLConstructionSite::HTMLConstructionSite(DocumentFragment* fragment, ParserContentPolicy parserContentPolicy)
    326     : m_document(&fragment->document())
    327     , m_attachmentRoot(fragment)
    328     , m_parserContentPolicy(parserContentPolicy)
    329     , m_isParsingFragment(true)
    330     , m_redirectAttachToFosterParent(false)
    331     , m_inQuirksMode(fragment->document().inQuirksMode())
    332 {
    333     ASSERT(m_document->isHTMLDocument() || m_document->isXHTMLDocument());
    334 }
    335 
    336 HTMLConstructionSite::~HTMLConstructionSite()
    337 {
    338     // Depending on why we're being destroyed it might be OK
    339     // to forget queued tasks, but currently we don't expect to.
    340     ASSERT(m_taskQueue.isEmpty());
    341     // Currently we assume that text will never be the last token in the
    342     // document and that we'll always queue some additional task to cause it to flush.
    343     ASSERT(m_pendingText.isEmpty());
    344 }
    345 
    346 void HTMLConstructionSite::detach()
    347 {
    348     // FIXME: We'd like to ASSERT here that we're canceling and not just discarding
    349     // text that really should have made it into the DOM earlier, but there
    350     // doesn't seem to be a nice way to do that.
    351     m_pendingText.discard();
    352     m_document = 0;
    353     m_attachmentRoot = 0;
    354 }
    355 
    356 void HTMLConstructionSite::setForm(HTMLFormElement* form)
    357 {
    358     // This method should only be needed for HTMLTreeBuilder in the fragment case.
    359     ASSERT(!m_form);
    360     m_form = form;
    361 }
    362 
    363 PassRefPtr<HTMLFormElement> HTMLConstructionSite::takeForm()
    364 {
    365     return m_form.release();
    366 }
    367 
    368 void HTMLConstructionSite::dispatchDocumentElementAvailableIfNeeded()
    369 {
    370     ASSERT(m_document);
    371     if (m_document->frame() && !m_isParsingFragment)
    372         m_document->frame()->loader().dispatchDocumentElementAvailable();
    373 }
    374 
    375 void HTMLConstructionSite::insertHTMLHtmlStartTagBeforeHTML(AtomicHTMLToken* token)
    376 {
    377     ASSERT(m_document);
    378     RefPtr<HTMLHtmlElement> element = HTMLHtmlElement::create(*m_document);
    379     setAttributes(element.get(), token, m_parserContentPolicy);
    380     attachLater(m_attachmentRoot, element);
    381     m_openElements.pushHTMLHtmlElement(HTMLStackItem::create(element, token));
    382 
    383     executeQueuedTasks();
    384     element->insertedByParser();
    385     dispatchDocumentElementAvailableIfNeeded();
    386 }
    387 
    388 void HTMLConstructionSite::mergeAttributesFromTokenIntoElement(AtomicHTMLToken* token, Element* element)
    389 {
    390     if (token->attributes().isEmpty())
    391         return;
    392 
    393     for (unsigned i = 0; i < token->attributes().size(); ++i) {
    394         const Attribute& tokenAttribute = token->attributes().at(i);
    395         if (!element->elementData() || !element->getAttributeItem(tokenAttribute.name()))
    396             element->setAttribute(tokenAttribute.name(), tokenAttribute.value());
    397     }
    398 }
    399 
    400 void HTMLConstructionSite::insertHTMLHtmlStartTagInBody(AtomicHTMLToken* token)
    401 {
    402     // Fragments do not have a root HTML element, so any additional HTML elements
    403     // encountered during fragment parsing should be ignored.
    404     if (m_isParsingFragment)
    405         return;
    406 
    407     mergeAttributesFromTokenIntoElement(token, m_openElements.htmlElement());
    408 }
    409 
    410 void HTMLConstructionSite::insertHTMLBodyStartTagInBody(AtomicHTMLToken* token)
    411 {
    412     mergeAttributesFromTokenIntoElement(token, m_openElements.bodyElement());
    413 }
    414 
    415 void HTMLConstructionSite::setDefaultCompatibilityMode()
    416 {
    417     if (m_isParsingFragment)
    418         return;
    419     if (m_document->isSrcdocDocument())
    420         return;
    421     setCompatibilityMode(Document::QuirksMode);
    422 }
    423 
    424 void HTMLConstructionSite::setCompatibilityMode(Document::CompatibilityMode mode)
    425 {
    426     m_inQuirksMode = (mode == Document::QuirksMode);
    427     m_document->setCompatibilityMode(mode);
    428 }
    429 
    430 void HTMLConstructionSite::setCompatibilityModeFromDoctype(const String& name, const String& publicId, const String& systemId)
    431 {
    432     // There are three possible compatibility modes:
    433     // Quirks - quirks mode emulates WinIE and NS4. CSS parsing is also relaxed in this mode, e.g., unit types can
    434     // be omitted from numbers.
    435     // Limited Quirks - This mode is identical to no-quirks mode except for its treatment of line-height in the inline box model.
    436     // No Quirks - no quirks apply. Web pages will obey the specifications to the letter.
    437 
    438     // Check for Quirks Mode.
    439     if (name != "html"
    440         || publicId.startsWith("+//Silmaril//dtd html Pro v0r11 19970101//", false)
    441         || publicId.startsWith("-//AdvaSoft Ltd//DTD HTML 3.0 asWedit + extensions//", false)
    442         || publicId.startsWith("-//AS//DTD HTML 3.0 asWedit + extensions//", false)
    443         || publicId.startsWith("-//IETF//DTD HTML 2.0 Level 1//", false)
    444         || publicId.startsWith("-//IETF//DTD HTML 2.0 Level 2//", false)
    445         || publicId.startsWith("-//IETF//DTD HTML 2.0 Strict Level 1//", false)
    446         || publicId.startsWith("-//IETF//DTD HTML 2.0 Strict Level 2//", false)
    447         || publicId.startsWith("-//IETF//DTD HTML 2.0 Strict//", false)
    448         || publicId.startsWith("-//IETF//DTD HTML 2.0//", false)
    449         || publicId.startsWith("-//IETF//DTD HTML 2.1E//", false)
    450         || publicId.startsWith("-//IETF//DTD HTML 3.0//", false)
    451         || publicId.startsWith("-//IETF//DTD HTML 3.2 Final//", false)
    452         || publicId.startsWith("-//IETF//DTD HTML 3.2//", false)
    453         || publicId.startsWith("-//IETF//DTD HTML 3//", false)
    454         || publicId.startsWith("-//IETF//DTD HTML Level 0//", false)
    455         || publicId.startsWith("-//IETF//DTD HTML Level 1//", false)
    456         || publicId.startsWith("-//IETF//DTD HTML Level 2//", false)
    457         || publicId.startsWith("-//IETF//DTD HTML Level 3//", false)
    458         || publicId.startsWith("-//IETF//DTD HTML Strict Level 0//", false)
    459         || publicId.startsWith("-//IETF//DTD HTML Strict Level 1//", false)
    460         || publicId.startsWith("-//IETF//DTD HTML Strict Level 2//", false)
    461         || publicId.startsWith("-//IETF//DTD HTML Strict Level 3//", false)
    462         || publicId.startsWith("-//IETF//DTD HTML Strict//", false)
    463         || publicId.startsWith("-//IETF//DTD HTML//", false)
    464         || publicId.startsWith("-//Metrius//DTD Metrius Presentational//", false)
    465         || publicId.startsWith("-//Microsoft//DTD Internet Explorer 2.0 HTML Strict//", false)
    466         || publicId.startsWith("-//Microsoft//DTD Internet Explorer 2.0 HTML//", false)
    467         || publicId.startsWith("-//Microsoft//DTD Internet Explorer 2.0 Tables//", false)
    468         || publicId.startsWith("-//Microsoft//DTD Internet Explorer 3.0 HTML Strict//", false)
    469         || publicId.startsWith("-//Microsoft//DTD Internet Explorer 3.0 HTML//", false)
    470         || publicId.startsWith("-//Microsoft//DTD Internet Explorer 3.0 Tables//", false)
    471         || publicId.startsWith("-//Netscape Comm. Corp.//DTD HTML//", false)
    472         || publicId.startsWith("-//Netscape Comm. Corp.//DTD Strict HTML//", false)
    473         || publicId.startsWith("-//O'Reilly and Associates//DTD HTML 2.0//", false)
    474         || publicId.startsWith("-//O'Reilly and Associates//DTD HTML Extended 1.0//", false)
    475         || publicId.startsWith("-//O'Reilly and Associates//DTD HTML Extended Relaxed 1.0//", false)
    476         || publicId.startsWith("-//SoftQuad Software//DTD HoTMetaL PRO 6.0::19990601::extensions to HTML 4.0//", false)
    477         || publicId.startsWith("-//SoftQuad//DTD HoTMetaL PRO 4.0::19971010::extensions to HTML 4.0//", false)
    478         || publicId.startsWith("-//Spyglass//DTD HTML 2.0 Extended//", false)
    479         || publicId.startsWith("-//SQ//DTD HTML 2.0 HoTMetaL + extensions//", false)
    480         || publicId.startsWith("-//Sun Microsystems Corp.//DTD HotJava HTML//", false)
    481         || publicId.startsWith("-//Sun Microsystems Corp.//DTD HotJava Strict HTML//", false)
    482         || publicId.startsWith("-//W3C//DTD HTML 3 1995-03-24//", false)
    483         || publicId.startsWith("-//W3C//DTD HTML 3.2 Draft//", false)
    484         || publicId.startsWith("-//W3C//DTD HTML 3.2 Final//", false)
    485         || publicId.startsWith("-//W3C//DTD HTML 3.2//", false)
    486         || publicId.startsWith("-//W3C//DTD HTML 3.2S Draft//", false)
    487         || publicId.startsWith("-//W3C//DTD HTML 4.0 Frameset//", false)
    488         || publicId.startsWith("-//W3C//DTD HTML 4.0 Transitional//", false)
    489         || publicId.startsWith("-//W3C//DTD HTML Experimental 19960712//", false)
    490         || publicId.startsWith("-//W3C//DTD HTML Experimental 970421//", false)
    491         || publicId.startsWith("-//W3C//DTD W3 HTML//", false)
    492         || publicId.startsWith("-//W3O//DTD W3 HTML 3.0//", false)
    493         || equalIgnoringCase(publicId, "-//W3O//DTD W3 HTML Strict 3.0//EN//")
    494         || publicId.startsWith("-//WebTechs//DTD Mozilla HTML 2.0//", false)
    495         || publicId.startsWith("-//WebTechs//DTD Mozilla HTML//", false)
    496         || equalIgnoringCase(publicId, "-/W3C/DTD HTML 4.0 Transitional/EN")
    497         || equalIgnoringCase(publicId, "HTML")
    498         || equalIgnoringCase(systemId, "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd")
    499         || (systemId.isEmpty() && publicId.startsWith("-//W3C//DTD HTML 4.01 Frameset//", false))
    500         || (systemId.isEmpty() && publicId.startsWith("-//W3C//DTD HTML 4.01 Transitional//", false))) {
    501         setCompatibilityMode(Document::QuirksMode);
    502         return;
    503     }
    504 
    505     // Check for Limited Quirks Mode.
    506     if (publicId.startsWith("-//W3C//DTD XHTML 1.0 Frameset//", false)
    507         || publicId.startsWith("-//W3C//DTD XHTML 1.0 Transitional//", false)
    508         || (!systemId.isEmpty() && publicId.startsWith("-//W3C//DTD HTML 4.01 Frameset//", false))
    509         || (!systemId.isEmpty() && publicId.startsWith("-//W3C//DTD HTML 4.01 Transitional//", false))) {
    510         setCompatibilityMode(Document::LimitedQuirksMode);
    511         return;
    512     }
    513 
    514     // Otherwise we are No Quirks Mode.
    515     setCompatibilityMode(Document::NoQuirksMode);
    516 }
    517 
    518 void HTMLConstructionSite::processEndOfFile()
    519 {
    520     ASSERT(currentNode());
    521     flush();
    522     openElements()->popAll();
    523 }
    524 
    525 void HTMLConstructionSite::finishedParsing()
    526 {
    527     // We shouldn't have any queued tasks but we might have pending text which we need to promote to tasks and execute.
    528     ASSERT(m_taskQueue.isEmpty());
    529     flush();
    530     m_document->finishedParsing();
    531 }
    532 
    533 void HTMLConstructionSite::insertDoctype(AtomicHTMLToken* token)
    534 {
    535     ASSERT(token->type() == HTMLToken::DOCTYPE);
    536 
    537     const String& publicId = StringImpl::create8BitIfPossible(token->publicIdentifier());
    538     const String& systemId = StringImpl::create8BitIfPossible(token->systemIdentifier());
    539     RefPtr<DocumentType> doctype = DocumentType::create(m_document, token->name(), publicId, systemId);
    540     attachLater(m_attachmentRoot, doctype.release());
    541 
    542     // DOCTYPE nodes are only processed when parsing fragments w/o contextElements, which
    543     // never occurs.  However, if we ever chose to support such, this code is subtly wrong,
    544     // because context-less fragments can determine their own quirks mode, and thus change
    545     // parsing rules (like <p> inside <table>).  For now we ASSERT that we never hit this code
    546     // in a fragment, as changing the owning document's compatibility mode would be wrong.
    547     ASSERT(!m_isParsingFragment);
    548     if (m_isParsingFragment)
    549         return;
    550 
    551     if (token->forceQuirks())
    552         setCompatibilityMode(Document::QuirksMode);
    553     else {
    554         setCompatibilityModeFromDoctype(token->name(), publicId, systemId);
    555     }
    556 }
    557 
    558 void HTMLConstructionSite::insertComment(AtomicHTMLToken* token)
    559 {
    560     ASSERT(token->type() == HTMLToken::Comment);
    561     attachLater(currentNode(), Comment::create(ownerDocumentForCurrentNode(), token->comment()));
    562 }
    563 
    564 void HTMLConstructionSite::insertCommentOnDocument(AtomicHTMLToken* token)
    565 {
    566     ASSERT(token->type() == HTMLToken::Comment);
    567     ASSERT(m_document);
    568     attachLater(m_attachmentRoot, Comment::create(*m_document, token->comment()));
    569 }
    570 
    571 void HTMLConstructionSite::insertCommentOnHTMLHtmlElement(AtomicHTMLToken* token)
    572 {
    573     ASSERT(token->type() == HTMLToken::Comment);
    574     ContainerNode* parent = m_openElements.rootNode();
    575     attachLater(parent, Comment::create(parent->document(), token->comment()));
    576 }
    577 
    578 void HTMLConstructionSite::insertHTMLHeadElement(AtomicHTMLToken* token)
    579 {
    580     ASSERT(!shouldFosterParent());
    581     m_head = HTMLStackItem::create(createHTMLElement(token), token);
    582     attachLater(currentNode(), m_head->element());
    583     m_openElements.pushHTMLHeadElement(m_head);
    584 }
    585 
    586 void HTMLConstructionSite::insertHTMLBodyElement(AtomicHTMLToken* token)
    587 {
    588     ASSERT(!shouldFosterParent());
    589     RefPtr<Element> body = createHTMLElement(token);
    590     attachLater(currentNode(), body);
    591     m_openElements.pushHTMLBodyElement(HTMLStackItem::create(body.release(), token));
    592     if (Frame* frame = m_document->frame())
    593         frame->loader().client()->dispatchWillInsertBody();
    594 }
    595 
    596 void HTMLConstructionSite::insertHTMLFormElement(AtomicHTMLToken* token, bool isDemoted)
    597 {
    598     RefPtr<Element> element = createHTMLElement(token);
    599     ASSERT(element->hasTagName(formTag));
    600     m_form = static_pointer_cast<HTMLFormElement>(element.release());
    601     m_form->setDemoted(isDemoted);
    602     attachLater(currentNode(), m_form);
    603     m_openElements.push(HTMLStackItem::create(m_form, token));
    604 }
    605 
    606 void HTMLConstructionSite::insertHTMLElement(AtomicHTMLToken* token)
    607 {
    608     RefPtr<Element> element = createHTMLElement(token);
    609     attachLater(currentNode(), element);
    610     m_openElements.push(HTMLStackItem::create(element.release(), token));
    611 }
    612 
    613 void HTMLConstructionSite::insertSelfClosingHTMLElement(AtomicHTMLToken* token)
    614 {
    615     ASSERT(token->type() == HTMLToken::StartTag);
    616     // Normally HTMLElementStack is responsible for calling finishParsingChildren,
    617     // but self-closing elements are never in the element stack so the stack
    618     // doesn't get a chance to tell them that we're done parsing their children.
    619     attachLater(currentNode(), createHTMLElement(token), true);
    620     // FIXME: Do we want to acknowledge the token's self-closing flag?
    621     // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#acknowledge-self-closing-flag
    622 }
    623 
    624 void HTMLConstructionSite::insertFormattingElement(AtomicHTMLToken* token)
    625 {
    626     // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#the-stack-of-open-elements
    627     // Possible active formatting elements include:
    628     // a, b, big, code, em, font, i, nobr, s, small, strike, strong, tt, and u.
    629     insertHTMLElement(token);
    630     m_activeFormattingElements.append(currentElementRecord()->stackItem());
    631 }
    632 
    633 void HTMLConstructionSite::insertScriptElement(AtomicHTMLToken* token)
    634 {
    635     // http://www.whatwg.org/specs/web-apps/current-work/multipage/scripting-1.html#already-started
    636     // http://html5.org/specs/dom-parsing.html#dom-range-createcontextualfragment
    637     // For createContextualFragment, the specifications say to mark it parser-inserted and already-started and later unmark them.
    638     // However, we short circuit that logic to avoid the subtree traversal to find script elements since scripts can never see
    639     // those flags or effects thereof.
    640     const bool parserInserted = m_parserContentPolicy != AllowScriptingContentAndDoNotMarkAlreadyStarted;
    641     const bool alreadyStarted = m_isParsingFragment && parserInserted;
    642     RefPtr<HTMLScriptElement> element = HTMLScriptElement::create(ownerDocumentForCurrentNode(), parserInserted, alreadyStarted);
    643     setAttributes(element.get(), token, m_parserContentPolicy);
    644     if (scriptingContentIsAllowed(m_parserContentPolicy))
    645         attachLater(currentNode(), element);
    646     m_openElements.push(HTMLStackItem::create(element.release(), token));
    647 }
    648 
    649 void HTMLConstructionSite::insertForeignElement(AtomicHTMLToken* token, const AtomicString& namespaceURI)
    650 {
    651     ASSERT(token->type() == HTMLToken::StartTag);
    652     notImplemented(); // parseError when xmlns or xmlns:xlink are wrong.
    653 
    654     RefPtr<Element> element = createElement(token, namespaceURI);
    655     if (scriptingContentIsAllowed(m_parserContentPolicy) || !toScriptLoaderIfPossible(element.get()))
    656         attachLater(currentNode(), element, token->selfClosing());
    657     if (!token->selfClosing())
    658         m_openElements.push(HTMLStackItem::create(element.release(), token, namespaceURI));
    659 }
    660 
    661 void HTMLConstructionSite::insertTextNode(const String& string, WhitespaceMode whitespaceMode)
    662 {
    663     HTMLConstructionSiteTask dummyTask(HTMLConstructionSiteTask::Insert);
    664     dummyTask.parent = currentNode();
    665 
    666     if (shouldFosterParent())
    667         findFosterSite(dummyTask);
    668 
    669     // FIXME: This probably doesn't need to be done both here and in insert(Task).
    670     if (dummyTask.parent->hasTagName(templateTag))
    671         dummyTask.parent = toHTMLTemplateElement(dummyTask.parent.get())->content();
    672 
    673     // Unclear when parent != case occurs. Somehow we insert text into two separate nodes while processing the same Token.
    674     // The nextChild != dummy.nextChild case occurs whenever foster parenting happened and we hit a new text node "<table>a</table>b"
    675     // In either case we have to flush the pending text into the task queue before making more.
    676     if (!m_pendingText.isEmpty() && (m_pendingText.parent != dummyTask.parent ||  m_pendingText.nextChild != dummyTask.nextChild))
    677         flushPendingText();
    678     m_pendingText.append(dummyTask.parent, dummyTask.nextChild, string, whitespaceMode);
    679 }
    680 
    681 void HTMLConstructionSite::reparent(HTMLElementStack::ElementRecord* newParent, HTMLElementStack::ElementRecord* child)
    682 {
    683     HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Reparent);
    684     task.parent = newParent->node();
    685     task.child = child->node();
    686     queueTask(task);
    687 }
    688 
    689 void HTMLConstructionSite::reparent(HTMLElementStack::ElementRecord* newParent, HTMLStackItem* child)
    690 {
    691     HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Reparent);
    692     task.parent = newParent->node();
    693     task.child = child->node();
    694     queueTask(task);
    695 }
    696 
    697 void HTMLConstructionSite::insertAlreadyParsedChild(HTMLStackItem* newParent, HTMLElementStack::ElementRecord* child)
    698 {
    699     if (newParent->causesFosterParenting()) {
    700         fosterParent(child->node());
    701         return;
    702     }
    703 
    704     HTMLConstructionSiteTask task(HTMLConstructionSiteTask::InsertAlreadyParsedChild);
    705     task.parent = newParent->node();
    706     task.child = child->node();
    707     queueTask(task);
    708 }
    709 
    710 void HTMLConstructionSite::takeAllChildren(HTMLStackItem* newParent, HTMLElementStack::ElementRecord* oldParent)
    711 {
    712     HTMLConstructionSiteTask task(HTMLConstructionSiteTask::TakeAllChildren);
    713     task.parent = newParent->node();
    714     task.child = oldParent->node();
    715     queueTask(task);
    716 }
    717 
    718 PassRefPtr<Element> HTMLConstructionSite::createElement(AtomicHTMLToken* token, const AtomicString& namespaceURI)
    719 {
    720     QualifiedName tagName(nullAtom, token->name(), namespaceURI);
    721     RefPtr<Element> element = ownerDocumentForCurrentNode().createElement(tagName, true);
    722     setAttributes(element.get(), token, m_parserContentPolicy);
    723     return element.release();
    724 }
    725 
    726 inline Document& HTMLConstructionSite::ownerDocumentForCurrentNode()
    727 {
    728     if (currentNode()->hasTagName(templateTag))
    729         return toHTMLTemplateElement(currentElement())->content()->document();
    730     return currentNode()->document();
    731 }
    732 
    733 PassRefPtr<Element> HTMLConstructionSite::createHTMLElement(AtomicHTMLToken* token)
    734 {
    735     Document& document = ownerDocumentForCurrentNode();
    736     // Only associate the element with the current form if we're creating the new element
    737     // in a document with a browsing context (rather than in <template> contents).
    738     HTMLFormElement* form = document.frame() ? m_form.get() : 0;
    739     // FIXME: This can't use HTMLConstructionSite::createElement because we
    740     // have to pass the current form element.  We should rework form association
    741     // to occur after construction to allow better code sharing here.
    742     RefPtr<Element> element = HTMLElementFactory::createHTMLElement(token->name(), document, form, true);
    743     setAttributes(element.get(), token, m_parserContentPolicy);
    744     ASSERT(element->isHTMLElement());
    745     return element.release();
    746 }
    747 
    748 PassRefPtr<HTMLStackItem> HTMLConstructionSite::createElementFromSavedToken(HTMLStackItem* item)
    749 {
    750     RefPtr<Element> element;
    751     // NOTE: Moving from item -> token -> item copies the Attribute vector twice!
    752     AtomicHTMLToken fakeToken(HTMLToken::StartTag, item->localName(), item->attributes());
    753     if (item->namespaceURI() == HTMLNames::xhtmlNamespaceURI)
    754         element = createHTMLElement(&fakeToken);
    755     else
    756         element = createElement(&fakeToken, item->namespaceURI());
    757     return HTMLStackItem::create(element.release(), &fakeToken, item->namespaceURI());
    758 }
    759 
    760 bool HTMLConstructionSite::indexOfFirstUnopenFormattingElement(unsigned& firstUnopenElementIndex) const
    761 {
    762     if (m_activeFormattingElements.isEmpty())
    763         return false;
    764     unsigned index = m_activeFormattingElements.size();
    765     do {
    766         --index;
    767         const HTMLFormattingElementList::Entry& entry = m_activeFormattingElements.at(index);
    768         if (entry.isMarker() || m_openElements.contains(entry.element())) {
    769             firstUnopenElementIndex = index + 1;
    770             return firstUnopenElementIndex < m_activeFormattingElements.size();
    771         }
    772     } while (index);
    773     firstUnopenElementIndex = index;
    774     return true;
    775 }
    776 
    777 void HTMLConstructionSite::reconstructTheActiveFormattingElements()
    778 {
    779     unsigned firstUnopenElementIndex;
    780     if (!indexOfFirstUnopenFormattingElement(firstUnopenElementIndex))
    781         return;
    782 
    783     unsigned unopenEntryIndex = firstUnopenElementIndex;
    784     ASSERT(unopenEntryIndex < m_activeFormattingElements.size());
    785     for (; unopenEntryIndex < m_activeFormattingElements.size(); ++unopenEntryIndex) {
    786         HTMLFormattingElementList::Entry& unopenedEntry = m_activeFormattingElements.at(unopenEntryIndex);
    787         RefPtr<HTMLStackItem> reconstructed = createElementFromSavedToken(unopenedEntry.stackItem().get());
    788         attachLater(currentNode(), reconstructed->node());
    789         m_openElements.push(reconstructed);
    790         unopenedEntry.replaceElement(reconstructed.release());
    791     }
    792 }
    793 
    794 void HTMLConstructionSite::generateImpliedEndTagsWithExclusion(const AtomicString& tagName)
    795 {
    796     while (hasImpliedEndTag(currentStackItem()) && !currentStackItem()->matchesHTMLTag(tagName))
    797         m_openElements.pop();
    798 }
    799 
    800 void HTMLConstructionSite::generateImpliedEndTags()
    801 {
    802     while (hasImpliedEndTag(currentStackItem()))
    803         m_openElements.pop();
    804 }
    805 
    806 bool HTMLConstructionSite::inQuirksMode()
    807 {
    808     return m_inQuirksMode;
    809 }
    810 
    811 void HTMLConstructionSite::findFosterSite(HTMLConstructionSiteTask& task)
    812 {
    813     // When a node is to be foster parented, the last template element with no table element is below it in the stack of open elements is the foster parent element (NOT the template's parent!)
    814     HTMLElementStack::ElementRecord* lastTemplateElement = m_openElements.topmost(templateTag.localName());
    815     if (lastTemplateElement && !m_openElements.inTableScope(tableTag)) {
    816         task.parent = lastTemplateElement->element();
    817         return;
    818     }
    819 
    820     HTMLElementStack::ElementRecord* lastTableElementRecord = m_openElements.topmost(tableTag.localName());
    821     if (lastTableElementRecord) {
    822         Element* lastTableElement = lastTableElementRecord->element();
    823         ContainerNode* parent;
    824         if (lastTableElementRecord->next()->stackItem()->hasTagName(templateTag))
    825             parent = lastTableElementRecord->next()->element();
    826         else
    827             parent = lastTableElement->parentNode();
    828 
    829         // When parsing HTML fragments, we skip step 4.2 ("Let root be a new html element with no attributes") for efficiency,
    830         // and instead use the DocumentFragment as a root node. So we must treat the root node (DocumentFragment) as if it is a html element here.
    831         if (parent && (parent->isElementNode() || (m_isParsingFragment && parent == m_openElements.rootNode()))) {
    832             task.parent = parent;
    833             task.nextChild = lastTableElement;
    834             return;
    835         }
    836         task.parent = lastTableElementRecord->next()->element();
    837         return;
    838     }
    839     // Fragment case
    840     task.parent = m_openElements.rootNode(); // DocumentFragment
    841 }
    842 
    843 bool HTMLConstructionSite::shouldFosterParent() const
    844 {
    845     return m_redirectAttachToFosterParent
    846         && currentStackItem()->isElementNode()
    847         && currentStackItem()->causesFosterParenting();
    848 }
    849 
    850 void HTMLConstructionSite::fosterParent(PassRefPtr<Node> node)
    851 {
    852     HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Insert);
    853     findFosterSite(task);
    854     task.child = node;
    855     ASSERT(task.parent);
    856     queueTask(task);
    857 }
    858 
    859 }
    860