Home | History | Annotate | Download | only in parser
      1 /*
      2  * Copyright (C) 2010 Google, Inc. All Rights Reserved.
      3  * Copyright (C) 2011 Apple Inc. All rights reserved.
      4  *
      5  * Redistribution and use in source and binary forms, with or without
      6  * modification, are permitted provided that the following conditions
      7  * are met:
      8  * 1. Redistributions of source code must retain the above copyright
      9  *    notice, this list of conditions and the following disclaimer.
     10  * 2. Redistributions in binary form must reproduce the above copyright
     11  *    notice, this list of conditions and the following disclaimer in the
     12  *    documentation and/or other materials provided with the distribution.
     13  *
     14  * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
     15  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     17  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL GOOGLE INC. OR
     18  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     19  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     20  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     21  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
     22  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     24  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     25  */
     26 
     27 #include "config.h"
     28 #include "core/html/parser/HTMLConstructionSite.h"
     29 
     30 #include "core/HTMLElementFactory.h"
     31 #include "core/HTMLNames.h"
     32 #include "core/dom/Comment.h"
     33 #include "core/dom/DocumentFragment.h"
     34 #include "core/dom/DocumentType.h"
     35 #include "core/dom/Element.h"
     36 #include "core/dom/ScriptLoader.h"
     37 #include "core/dom/Text.h"
     38 #include "core/frame/LocalFrame.h"
     39 #include "core/html/HTMLFormElement.h"
     40 #include "core/html/HTMLHtmlElement.h"
     41 #include "core/html/HTMLPlugInElement.h"
     42 #include "core/html/HTMLScriptElement.h"
     43 #include "core/html/HTMLTemplateElement.h"
     44 #include "core/html/parser/AtomicHTMLToken.h"
     45 #include "core/html/parser/HTMLParserIdioms.h"
     46 #include "core/html/parser/HTMLStackItem.h"
     47 #include "core/html/parser/HTMLToken.h"
     48 #include "core/loader/FrameLoader.h"
     49 #include "core/loader/FrameLoaderClient.h"
     50 #include "core/svg/SVGScriptElement.h"
     51 #include "platform/NotImplemented.h"
     52 #include "platform/text/TextBreakIterator.h"
     53 #include <limits>
     54 
     55 namespace blink {
     56 
     57 using namespace HTMLNames;
     58 
     59 static const unsigned maximumHTMLParserDOMTreeDepth = 512;
     60 
     61 static inline void setAttributes(Element* element, AtomicHTMLToken* token, ParserContentPolicy parserContentPolicy)
     62 {
     63     if (!scriptingContentIsAllowed(parserContentPolicy))
     64         element->stripScriptingAttributes(token->attributes());
     65     element->parserSetAttributes(token->attributes());
     66 }
     67 
     68 static bool hasImpliedEndTag(const HTMLStackItem* item)
     69 {
     70     return item->hasTagName(ddTag)
     71         || item->hasTagName(dtTag)
     72         || item->hasTagName(liTag)
     73         || item->hasTagName(optionTag)
     74         || item->hasTagName(optgroupTag)
     75         || item->hasTagName(pTag)
     76         || item->hasTagName(rbTag)
     77         || item->hasTagName(rpTag)
     78         || item->hasTagName(rtTag)
     79         || item->hasTagName(rtcTag);
     80 }
     81 
     82 static bool shouldUseLengthLimit(const ContainerNode& node)
     83 {
     84     return !isHTMLScriptElement(node)
     85         && !isHTMLStyleElement(node)
     86         && !isSVGScriptElement(node);
     87 }
     88 
     89 static unsigned textLengthLimitForContainer(const ContainerNode& node)
     90 {
     91     return shouldUseLengthLimit(node) ? Text::defaultLengthLimit : std::numeric_limits<unsigned>::max();
     92 }
     93 
     94 static inline bool isAllWhitespace(const String& string)
     95 {
     96     return string.isAllSpecialCharacters<isHTMLSpace<UChar> >();
     97 }
     98 
     99 static inline void insert(HTMLConstructionSiteTask& task)
    100 {
    101     if (isHTMLTemplateElement(*task.parent))
    102         task.parent = toHTMLTemplateElement(task.parent.get())->content();
    103 
    104     if (ContainerNode* parent = task.child->parentNode())
    105         parent->parserRemoveChild(*task.child);
    106 
    107     if (task.nextChild)
    108         task.parent->parserInsertBefore(task.child.get(), *task.nextChild);
    109     else
    110         task.parent->parserAppendChild(task.child.get());
    111 }
    112 
    113 static inline void executeInsertTask(HTMLConstructionSiteTask& task)
    114 {
    115     ASSERT(task.operation == HTMLConstructionSiteTask::Insert);
    116 
    117     insert(task);
    118 
    119     if (task.child->isElementNode()) {
    120         Element& child = toElement(*task.child);
    121         child.beginParsingChildren();
    122         if (task.selfClosing)
    123             child.finishParsingChildren();
    124     }
    125 }
    126 
    127 static inline void executeInsertTextTask(HTMLConstructionSiteTask& task)
    128 {
    129     ASSERT(task.operation == HTMLConstructionSiteTask::InsertText);
    130     ASSERT(task.child->isTextNode());
    131 
    132     // Merge text nodes into previous ones if possible:
    133     // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#insert-a-character
    134     Text* newText = toText(task.child.get());
    135     Node* previousChild = task.nextChild ? task.nextChild->previousSibling() : task.parent->lastChild();
    136     if (previousChild && previousChild->isTextNode()) {
    137         Text* previousText = toText(previousChild);
    138         unsigned lengthLimit = textLengthLimitForContainer(*task.parent);
    139         if (previousText->length() + newText->length() < lengthLimit) {
    140             previousText->parserAppendData(newText->data());
    141             return;
    142         }
    143     }
    144 
    145     insert(task);
    146 }
    147 
    148 static inline void executeReparentTask(HTMLConstructionSiteTask& task)
    149 {
    150     ASSERT(task.operation == HTMLConstructionSiteTask::Reparent);
    151 
    152     if (ContainerNode* parent = task.child->parentNode())
    153         parent->parserRemoveChild(*task.child);
    154 
    155     task.parent->parserAppendChild(task.child);
    156 }
    157 
    158 static inline void executeInsertAlreadyParsedChildTask(HTMLConstructionSiteTask& task)
    159 {
    160     ASSERT(task.operation == HTMLConstructionSiteTask::InsertAlreadyParsedChild);
    161 
    162     insert(task);
    163 }
    164 
    165 static inline void executeTakeAllChildrenTask(HTMLConstructionSiteTask& task)
    166 {
    167     ASSERT(task.operation == HTMLConstructionSiteTask::TakeAllChildren);
    168 
    169     task.parent->parserTakeAllChildrenFrom(*task.oldParent());
    170 }
    171 
    172 void HTMLConstructionSite::executeTask(HTMLConstructionSiteTask& task)
    173 {
    174     ASSERT(m_taskQueue.isEmpty());
    175     if (task.operation == HTMLConstructionSiteTask::Insert)
    176         return executeInsertTask(task);
    177 
    178     if (task.operation == HTMLConstructionSiteTask::InsertText)
    179         return executeInsertTextTask(task);
    180 
    181     // All the cases below this point are only used by the adoption agency.
    182 
    183     if (task.operation == HTMLConstructionSiteTask::InsertAlreadyParsedChild)
    184         return executeInsertAlreadyParsedChildTask(task);
    185 
    186     if (task.operation == HTMLConstructionSiteTask::Reparent)
    187         return executeReparentTask(task);
    188 
    189     if (task.operation == HTMLConstructionSiteTask::TakeAllChildren)
    190         return executeTakeAllChildrenTask(task);
    191 
    192     ASSERT_NOT_REACHED();
    193 }
    194 
    195 // This is only needed for TextDocuments where we might have text nodes
    196 // approaching the default length limit (~64k) and we don't want to
    197 // break a text node in the middle of a combining character.
    198 static unsigned findBreakIndexBetween(const StringBuilder& string, unsigned currentPosition, unsigned proposedBreakIndex)
    199 {
    200     ASSERT(currentPosition < proposedBreakIndex);
    201     ASSERT(proposedBreakIndex <= string.length());
    202     // The end of the string is always a valid break.
    203     if (proposedBreakIndex == string.length())
    204         return proposedBreakIndex;
    205 
    206     // Latin-1 does not have breakable boundaries. If we ever moved to a differnet 8-bit encoding this could be wrong.
    207     if (string.is8Bit())
    208         return proposedBreakIndex;
    209 
    210     const UChar* breakSearchCharacters = string.characters16() + currentPosition;
    211     // We need at least two characters look-ahead to account for UTF-16 surrogates, but can't search off the end of the buffer!
    212     unsigned breakSearchLength = std::min(proposedBreakIndex - currentPosition + 2, string.length() - currentPosition);
    213     NonSharedCharacterBreakIterator it(breakSearchCharacters, breakSearchLength);
    214 
    215     if (it.isBreak(proposedBreakIndex - currentPosition))
    216         return proposedBreakIndex;
    217 
    218     int adjustedBreakIndexInSubstring = it.preceding(proposedBreakIndex - currentPosition);
    219     if (adjustedBreakIndexInSubstring > 0)
    220         return currentPosition + adjustedBreakIndexInSubstring;
    221     // We failed to find a breakable point, let the caller figure out what to do.
    222     return 0;
    223 }
    224 
    225 static String atomizeIfAllWhitespace(const String& string, WhitespaceMode whitespaceMode)
    226 {
    227     // Strings composed entirely of whitespace are likely to be repeated.
    228     // Turn them into AtomicString so we share a single string for each.
    229     if (whitespaceMode == AllWhitespace || (whitespaceMode == WhitespaceUnknown && isAllWhitespace(string)))
    230         return AtomicString(string).string();
    231     return string;
    232 }
    233 
    234 void HTMLConstructionSite::flushPendingText(FlushMode mode)
    235 {
    236     if (m_pendingText.isEmpty())
    237         return;
    238 
    239     if (mode == FlushIfAtTextLimit
    240         && !shouldUseLengthLimit(*m_pendingText.parent))
    241         return;
    242 
    243     PendingText pendingText;
    244     // Hold onto the current pending text on the stack so that queueTask doesn't recurse infinitely.
    245     m_pendingText.swap(pendingText);
    246     ASSERT(m_pendingText.isEmpty());
    247 
    248     // Splitting text nodes into smaller chunks contradicts HTML5 spec, but is necessary
    249     // for performance, see: https://bugs.webkit.org/show_bug.cgi?id=55898
    250     unsigned lengthLimit = textLengthLimitForContainer(*pendingText.parent);
    251 
    252     unsigned currentPosition = 0;
    253     const StringBuilder& string = pendingText.stringBuilder;
    254     while (currentPosition < string.length()) {
    255         unsigned proposedBreakIndex = std::min(currentPosition + lengthLimit, string.length());
    256         unsigned breakIndex = findBreakIndexBetween(string, currentPosition, proposedBreakIndex);
    257         ASSERT(breakIndex <= string.length());
    258         String substring = string.substring(currentPosition, breakIndex - currentPosition);
    259         substring = atomizeIfAllWhitespace(substring, pendingText.whitespaceMode);
    260 
    261         HTMLConstructionSiteTask task(HTMLConstructionSiteTask::InsertText);
    262         task.parent = pendingText.parent;
    263         task.nextChild = pendingText.nextChild;
    264         task.child = Text::create(task.parent->document(), substring);
    265         queueTask(task);
    266 
    267         ASSERT(breakIndex > currentPosition);
    268         ASSERT(breakIndex - currentPosition == substring.length());
    269         ASSERT(toText(task.child.get())->length() == substring.length());
    270         currentPosition = breakIndex;
    271     }
    272 }
    273 
    274 void HTMLConstructionSite::queueTask(const HTMLConstructionSiteTask& task)
    275 {
    276     flushPendingText(FlushAlways);
    277     ASSERT(m_pendingText.isEmpty());
    278     m_taskQueue.append(task);
    279 }
    280 
    281 void HTMLConstructionSite::attachLater(ContainerNode* parent, PassRefPtrWillBeRawPtr<Node> prpChild, bool selfClosing)
    282 {
    283     ASSERT(scriptingContentIsAllowed(m_parserContentPolicy) || !prpChild.get()->isElementNode() || !toScriptLoaderIfPossible(toElement(prpChild.get())));
    284     ASSERT(pluginContentIsAllowed(m_parserContentPolicy) || !isHTMLPlugInElement(prpChild));
    285 
    286     HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Insert);
    287     task.parent = parent;
    288     task.child = prpChild;
    289     task.selfClosing = selfClosing;
    290 
    291     if (shouldFosterParent()) {
    292         fosterParent(task.child);
    293         return;
    294     }
    295 
    296     // Add as a sibling of the parent if we have reached the maximum depth allowed.
    297     if (m_openElements.stackDepth() > maximumHTMLParserDOMTreeDepth && task.parent->parentNode())
    298         task.parent = task.parent->parentNode();
    299 
    300     ASSERT(task.parent);
    301     queueTask(task);
    302 }
    303 
    304 void HTMLConstructionSite::executeQueuedTasks()
    305 {
    306     // This has no affect on pendingText, and we may have pendingText
    307     // remaining after executing all other queued tasks.
    308     const size_t size = m_taskQueue.size();
    309     if (!size)
    310         return;
    311 
    312     // Copy the task queue into a local variable in case executeTask
    313     // re-enters the parser.
    314     TaskQueue queue;
    315     queue.swap(m_taskQueue);
    316 
    317     for (size_t i = 0; i < size; ++i)
    318         executeTask(queue[i]);
    319 
    320     // We might be detached now.
    321 }
    322 
    323 HTMLConstructionSite::HTMLConstructionSite(Document* document, ParserContentPolicy parserContentPolicy)
    324     : m_document(document)
    325     , m_attachmentRoot(document)
    326     , m_parserContentPolicy(parserContentPolicy)
    327     , m_isParsingFragment(false)
    328     , m_redirectAttachToFosterParent(false)
    329     , m_inQuirksMode(document->inQuirksMode())
    330 {
    331     ASSERT(m_document->isHTMLDocument() || m_document->isXHTMLDocument());
    332 }
    333 
    334 HTMLConstructionSite::HTMLConstructionSite(DocumentFragment* fragment, ParserContentPolicy parserContentPolicy)
    335     : m_document(&fragment->document())
    336     , m_attachmentRoot(fragment)
    337     , m_parserContentPolicy(parserContentPolicy)
    338     , m_isParsingFragment(true)
    339     , m_redirectAttachToFosterParent(false)
    340     , m_inQuirksMode(fragment->document().inQuirksMode())
    341 {
    342     ASSERT(m_document->isHTMLDocument() || m_document->isXHTMLDocument());
    343 }
    344 
    345 HTMLConstructionSite::~HTMLConstructionSite()
    346 {
    347     // Depending on why we're being destroyed it might be OK
    348     // to forget queued tasks, but currently we don't expect to.
    349     ASSERT(m_taskQueue.isEmpty());
    350     // Currently we assume that text will never be the last token in the
    351     // document and that we'll always queue some additional task to cause it to flush.
    352     ASSERT(m_pendingText.isEmpty());
    353 }
    354 
    355 void HTMLConstructionSite::trace(Visitor* visitor)
    356 {
    357     visitor->trace(m_document);
    358     visitor->trace(m_attachmentRoot);
    359     visitor->trace(m_head);
    360     visitor->trace(m_form);
    361     visitor->trace(m_openElements);
    362     visitor->trace(m_activeFormattingElements);
    363     visitor->trace(m_taskQueue);
    364     visitor->trace(m_pendingText);
    365 }
    366 
    367 void HTMLConstructionSite::detach()
    368 {
    369     // FIXME: We'd like to ASSERT here that we're canceling and not just discarding
    370     // text that really should have made it into the DOM earlier, but there
    371     // doesn't seem to be a nice way to do that.
    372     m_pendingText.discard();
    373     m_document = nullptr;
    374     m_attachmentRoot = nullptr;
    375 }
    376 
    377 void HTMLConstructionSite::setForm(HTMLFormElement* form)
    378 {
    379     // This method should only be needed for HTMLTreeBuilder in the fragment case.
    380     ASSERT(!m_form);
    381     m_form = form;
    382 }
    383 
    384 PassRefPtrWillBeRawPtr<HTMLFormElement> HTMLConstructionSite::takeForm()
    385 {
    386     return m_form.release();
    387 }
    388 
    389 void HTMLConstructionSite::dispatchDocumentElementAvailableIfNeeded()
    390 {
    391     ASSERT(m_document);
    392     if (m_document->frame() && !m_isParsingFragment)
    393         m_document->frame()->loader().dispatchDocumentElementAvailable();
    394 }
    395 
    396 void HTMLConstructionSite::insertHTMLHtmlStartTagBeforeHTML(AtomicHTMLToken* token)
    397 {
    398     ASSERT(m_document);
    399     RefPtrWillBeRawPtr<HTMLHtmlElement> element = HTMLHtmlElement::create(*m_document);
    400     setAttributes(element.get(), token, m_parserContentPolicy);
    401     attachLater(m_attachmentRoot, element);
    402     m_openElements.pushHTMLHtmlElement(HTMLStackItem::create(element, token));
    403 
    404     executeQueuedTasks();
    405     element->insertedByParser();
    406     dispatchDocumentElementAvailableIfNeeded();
    407 }
    408 
    409 void HTMLConstructionSite::mergeAttributesFromTokenIntoElement(AtomicHTMLToken* token, Element* element)
    410 {
    411     if (token->attributes().isEmpty())
    412         return;
    413 
    414     for (unsigned i = 0; i < token->attributes().size(); ++i) {
    415         const Attribute& tokenAttribute = token->attributes().at(i);
    416         if (element->attributesWithoutUpdate().findIndex(tokenAttribute.name()) == kNotFound)
    417             element->setAttribute(tokenAttribute.name(), tokenAttribute.value());
    418     }
    419 }
    420 
    421 void HTMLConstructionSite::insertHTMLHtmlStartTagInBody(AtomicHTMLToken* token)
    422 {
    423     // Fragments do not have a root HTML element, so any additional HTML elements
    424     // encountered during fragment parsing should be ignored.
    425     if (m_isParsingFragment)
    426         return;
    427 
    428     mergeAttributesFromTokenIntoElement(token, m_openElements.htmlElement());
    429 }
    430 
    431 void HTMLConstructionSite::insertHTMLBodyStartTagInBody(AtomicHTMLToken* token)
    432 {
    433     mergeAttributesFromTokenIntoElement(token, m_openElements.bodyElement());
    434 }
    435 
    436 void HTMLConstructionSite::setDefaultCompatibilityMode()
    437 {
    438     if (m_isParsingFragment)
    439         return;
    440     setCompatibilityMode(Document::QuirksMode);
    441 }
    442 
    443 void HTMLConstructionSite::setCompatibilityMode(Document::CompatibilityMode mode)
    444 {
    445     m_inQuirksMode = (mode == Document::QuirksMode);
    446     m_document->setCompatibilityMode(mode);
    447 }
    448 
    449 void HTMLConstructionSite::setCompatibilityModeFromDoctype(const String& name, const String& publicId, const String& systemId)
    450 {
    451     // There are three possible compatibility modes:
    452     // Quirks - quirks mode emulates WinIE and NS4. CSS parsing is also relaxed in this mode, e.g., unit types can
    453     // be omitted from numbers.
    454     // Limited Quirks - This mode is identical to no-quirks mode except for its treatment of line-height in the inline box model.
    455     // No Quirks - no quirks apply. Web pages will obey the specifications to the letter.
    456 
    457     // Check for Quirks Mode.
    458     if (name != "html"
    459         || publicId.startsWith("+//Silmaril//dtd html Pro v0r11 19970101//", false)
    460         || publicId.startsWith("-//AdvaSoft Ltd//DTD HTML 3.0 asWedit + extensions//", false)
    461         || publicId.startsWith("-//AS//DTD HTML 3.0 asWedit + extensions//", false)
    462         || publicId.startsWith("-//IETF//DTD HTML 2.0 Level 1//", false)
    463         || publicId.startsWith("-//IETF//DTD HTML 2.0 Level 2//", false)
    464         || publicId.startsWith("-//IETF//DTD HTML 2.0 Strict Level 1//", false)
    465         || publicId.startsWith("-//IETF//DTD HTML 2.0 Strict Level 2//", false)
    466         || publicId.startsWith("-//IETF//DTD HTML 2.0 Strict//", false)
    467         || publicId.startsWith("-//IETF//DTD HTML 2.0//", false)
    468         || publicId.startsWith("-//IETF//DTD HTML 2.1E//", false)
    469         || publicId.startsWith("-//IETF//DTD HTML 3.0//", false)
    470         || publicId.startsWith("-//IETF//DTD HTML 3.2 Final//", false)
    471         || publicId.startsWith("-//IETF//DTD HTML 3.2//", false)
    472         || publicId.startsWith("-//IETF//DTD HTML 3//", false)
    473         || publicId.startsWith("-//IETF//DTD HTML Level 0//", false)
    474         || publicId.startsWith("-//IETF//DTD HTML Level 1//", false)
    475         || publicId.startsWith("-//IETF//DTD HTML Level 2//", false)
    476         || publicId.startsWith("-//IETF//DTD HTML Level 3//", false)
    477         || publicId.startsWith("-//IETF//DTD HTML Strict Level 0//", false)
    478         || publicId.startsWith("-//IETF//DTD HTML Strict Level 1//", false)
    479         || publicId.startsWith("-//IETF//DTD HTML Strict Level 2//", false)
    480         || publicId.startsWith("-//IETF//DTD HTML Strict Level 3//", false)
    481         || publicId.startsWith("-//IETF//DTD HTML Strict//", false)
    482         || publicId.startsWith("-//IETF//DTD HTML//", false)
    483         || publicId.startsWith("-//Metrius//DTD Metrius Presentational//", false)
    484         || publicId.startsWith("-//Microsoft//DTD Internet Explorer 2.0 HTML Strict//", false)
    485         || publicId.startsWith("-//Microsoft//DTD Internet Explorer 2.0 HTML//", false)
    486         || publicId.startsWith("-//Microsoft//DTD Internet Explorer 2.0 Tables//", false)
    487         || publicId.startsWith("-//Microsoft//DTD Internet Explorer 3.0 HTML Strict//", false)
    488         || publicId.startsWith("-//Microsoft//DTD Internet Explorer 3.0 HTML//", false)
    489         || publicId.startsWith("-//Microsoft//DTD Internet Explorer 3.0 Tables//", false)
    490         || publicId.startsWith("-//Netscape Comm. Corp.//DTD HTML//", false)
    491         || publicId.startsWith("-//Netscape Comm. Corp.//DTD Strict HTML//", false)
    492         || publicId.startsWith("-//O'Reilly and Associates//DTD HTML 2.0//", false)
    493         || publicId.startsWith("-//O'Reilly and Associates//DTD HTML Extended 1.0//", false)
    494         || publicId.startsWith("-//O'Reilly and Associates//DTD HTML Extended Relaxed 1.0//", false)
    495         || publicId.startsWith("-//SoftQuad Software//DTD HoTMetaL PRO 6.0::19990601::extensions to HTML 4.0//", false)
    496         || publicId.startsWith("-//SoftQuad//DTD HoTMetaL PRO 4.0::19971010::extensions to HTML 4.0//", false)
    497         || publicId.startsWith("-//Spyglass//DTD HTML 2.0 Extended//", false)
    498         || publicId.startsWith("-//SQ//DTD HTML 2.0 HoTMetaL + extensions//", false)
    499         || publicId.startsWith("-//Sun Microsystems Corp.//DTD HotJava HTML//", false)
    500         || publicId.startsWith("-//Sun Microsystems Corp.//DTD HotJava Strict HTML//", false)
    501         || publicId.startsWith("-//W3C//DTD HTML 3 1995-03-24//", false)
    502         || publicId.startsWith("-//W3C//DTD HTML 3.2 Draft//", false)
    503         || publicId.startsWith("-//W3C//DTD HTML 3.2 Final//", false)
    504         || publicId.startsWith("-//W3C//DTD HTML 3.2//", false)
    505         || publicId.startsWith("-//W3C//DTD HTML 3.2S Draft//", false)
    506         || publicId.startsWith("-//W3C//DTD HTML 4.0 Frameset//", false)
    507         || publicId.startsWith("-//W3C//DTD HTML 4.0 Transitional//", false)
    508         || publicId.startsWith("-//W3C//DTD HTML Experimental 19960712//", false)
    509         || publicId.startsWith("-//W3C//DTD HTML Experimental 970421//", false)
    510         || publicId.startsWith("-//W3C//DTD W3 HTML//", false)
    511         || publicId.startsWith("-//W3O//DTD W3 HTML 3.0//", false)
    512         || equalIgnoringCase(publicId, "-//W3O//DTD W3 HTML Strict 3.0//EN//")
    513         || publicId.startsWith("-//WebTechs//DTD Mozilla HTML 2.0//", false)
    514         || publicId.startsWith("-//WebTechs//DTD Mozilla HTML//", false)
    515         || equalIgnoringCase(publicId, "-/W3C/DTD HTML 4.0 Transitional/EN")
    516         || equalIgnoringCase(publicId, "HTML")
    517         || equalIgnoringCase(systemId, "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd")
    518         || (systemId.isEmpty() && publicId.startsWith("-//W3C//DTD HTML 4.01 Frameset//", false))
    519         || (systemId.isEmpty() && publicId.startsWith("-//W3C//DTD HTML 4.01 Transitional//", false))) {
    520         setCompatibilityMode(Document::QuirksMode);
    521         return;
    522     }
    523 
    524     // Check for Limited Quirks Mode.
    525     if (publicId.startsWith("-//W3C//DTD XHTML 1.0 Frameset//", false)
    526         || publicId.startsWith("-//W3C//DTD XHTML 1.0 Transitional//", false)
    527         || (!systemId.isEmpty() && publicId.startsWith("-//W3C//DTD HTML 4.01 Frameset//", false))
    528         || (!systemId.isEmpty() && publicId.startsWith("-//W3C//DTD HTML 4.01 Transitional//", false))) {
    529         setCompatibilityMode(Document::LimitedQuirksMode);
    530         return;
    531     }
    532 
    533     // Otherwise we are No Quirks Mode.
    534     setCompatibilityMode(Document::NoQuirksMode);
    535 }
    536 
    537 void HTMLConstructionSite::processEndOfFile()
    538 {
    539     ASSERT(currentNode());
    540     flush(FlushAlways);
    541     openElements()->popAll();
    542 }
    543 
    544 void HTMLConstructionSite::finishedParsing()
    545 {
    546     // We shouldn't have any queued tasks but we might have pending text which we need to promote to tasks and execute.
    547     ASSERT(m_taskQueue.isEmpty());
    548     flush(FlushAlways);
    549     m_document->finishedParsing();
    550 }
    551 
    552 void HTMLConstructionSite::insertDoctype(AtomicHTMLToken* token)
    553 {
    554     ASSERT(token->type() == HTMLToken::DOCTYPE);
    555 
    556     const String& publicId = StringImpl::create8BitIfPossible(token->publicIdentifier());
    557     const String& systemId = StringImpl::create8BitIfPossible(token->systemIdentifier());
    558     RefPtrWillBeRawPtr<DocumentType> doctype = DocumentType::create(m_document, token->name(), publicId, systemId);
    559     attachLater(m_attachmentRoot, doctype.release());
    560 
    561     // DOCTYPE nodes are only processed when parsing fragments w/o contextElements, which
    562     // never occurs.  However, if we ever chose to support such, this code is subtly wrong,
    563     // because context-less fragments can determine their own quirks mode, and thus change
    564     // parsing rules (like <p> inside <table>).  For now we ASSERT that we never hit this code
    565     // in a fragment, as changing the owning document's compatibility mode would be wrong.
    566     ASSERT(!m_isParsingFragment);
    567     if (m_isParsingFragment)
    568         return;
    569 
    570     if (token->forceQuirks())
    571         setCompatibilityMode(Document::QuirksMode);
    572     else {
    573         setCompatibilityModeFromDoctype(token->name(), publicId, systemId);
    574     }
    575 }
    576 
    577 void HTMLConstructionSite::insertComment(AtomicHTMLToken* token)
    578 {
    579     ASSERT(token->type() == HTMLToken::Comment);
    580     attachLater(currentNode(), Comment::create(ownerDocumentForCurrentNode(), token->comment()));
    581 }
    582 
    583 void HTMLConstructionSite::insertCommentOnDocument(AtomicHTMLToken* token)
    584 {
    585     ASSERT(token->type() == HTMLToken::Comment);
    586     ASSERT(m_document);
    587     attachLater(m_attachmentRoot, Comment::create(*m_document, token->comment()));
    588 }
    589 
    590 void HTMLConstructionSite::insertCommentOnHTMLHtmlElement(AtomicHTMLToken* token)
    591 {
    592     ASSERT(token->type() == HTMLToken::Comment);
    593     ContainerNode* parent = m_openElements.rootNode();
    594     attachLater(parent, Comment::create(parent->document(), token->comment()));
    595 }
    596 
    597 void HTMLConstructionSite::insertHTMLHeadElement(AtomicHTMLToken* token)
    598 {
    599     ASSERT(!shouldFosterParent());
    600     m_head = HTMLStackItem::create(createHTMLElement(token), token);
    601     attachLater(currentNode(), m_head->element());
    602     m_openElements.pushHTMLHeadElement(m_head);
    603 }
    604 
    605 void HTMLConstructionSite::insertHTMLBodyElement(AtomicHTMLToken* token)
    606 {
    607     ASSERT(!shouldFosterParent());
    608     RefPtrWillBeRawPtr<HTMLElement> body = createHTMLElement(token);
    609     attachLater(currentNode(), body);
    610     m_openElements.pushHTMLBodyElement(HTMLStackItem::create(body.release(), token));
    611     if (LocalFrame* frame = m_document->frame())
    612         frame->loader().client()->dispatchWillInsertBody();
    613 }
    614 
    615 void HTMLConstructionSite::insertHTMLFormElement(AtomicHTMLToken* token, bool isDemoted)
    616 {
    617     RefPtrWillBeRawPtr<HTMLElement> element = createHTMLElement(token);
    618     ASSERT(isHTMLFormElement(element));
    619     m_form = static_pointer_cast<HTMLFormElement>(element.release());
    620     m_form->setDemoted(isDemoted);
    621     attachLater(currentNode(), m_form.get());
    622     m_openElements.push(HTMLStackItem::create(m_form.get(), token));
    623 }
    624 
    625 void HTMLConstructionSite::insertHTMLElement(AtomicHTMLToken* token)
    626 {
    627     RefPtrWillBeRawPtr<HTMLElement> element = createHTMLElement(token);
    628     attachLater(currentNode(), element);
    629     m_openElements.push(HTMLStackItem::create(element.release(), token));
    630 }
    631 
    632 void HTMLConstructionSite::insertSelfClosingHTMLElement(AtomicHTMLToken* token)
    633 {
    634     ASSERT(token->type() == HTMLToken::StartTag);
    635     // Normally HTMLElementStack is responsible for calling finishParsingChildren,
    636     // but self-closing elements are never in the element stack so the stack
    637     // doesn't get a chance to tell them that we're done parsing their children.
    638     attachLater(currentNode(), createHTMLElement(token), true);
    639     // FIXME: Do we want to acknowledge the token's self-closing flag?
    640     // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#acknowledge-self-closing-flag
    641 }
    642 
    643 void HTMLConstructionSite::insertFormattingElement(AtomicHTMLToken* token)
    644 {
    645     // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#the-stack-of-open-elements
    646     // Possible active formatting elements include:
    647     // a, b, big, code, em, font, i, nobr, s, small, strike, strong, tt, and u.
    648     insertHTMLElement(token);
    649     m_activeFormattingElements.append(currentElementRecord()->stackItem());
    650 }
    651 
    652 void HTMLConstructionSite::insertScriptElement(AtomicHTMLToken* token)
    653 {
    654     // http://www.whatwg.org/specs/web-apps/current-work/multipage/scripting-1.html#already-started
    655     // http://html5.org/specs/dom-parsing.html#dom-range-createcontextualfragment
    656     // For createContextualFragment, the specifications say to mark it parser-inserted and already-started and later unmark them.
    657     // However, we short circuit that logic to avoid the subtree traversal to find script elements since scripts can never see
    658     // those flags or effects thereof.
    659     const bool parserInserted = m_parserContentPolicy != AllowScriptingContentAndDoNotMarkAlreadyStarted;
    660     const bool alreadyStarted = m_isParsingFragment && parserInserted;
    661     RefPtrWillBeRawPtr<HTMLScriptElement> element = HTMLScriptElement::create(ownerDocumentForCurrentNode(), parserInserted, alreadyStarted);
    662     setAttributes(element.get(), token, m_parserContentPolicy);
    663     if (scriptingContentIsAllowed(m_parserContentPolicy))
    664         attachLater(currentNode(), element);
    665     m_openElements.push(HTMLStackItem::create(element.release(), token));
    666 }
    667 
    668 void HTMLConstructionSite::insertForeignElement(AtomicHTMLToken* token, const AtomicString& namespaceURI)
    669 {
    670     ASSERT(token->type() == HTMLToken::StartTag);
    671     notImplemented(); // parseError when xmlns or xmlns:xlink are wrong.
    672 
    673     RefPtrWillBeRawPtr<Element> element = createElement(token, namespaceURI);
    674     if (scriptingContentIsAllowed(m_parserContentPolicy) || !toScriptLoaderIfPossible(element.get()))
    675         attachLater(currentNode(), element, token->selfClosing());
    676     if (!token->selfClosing())
    677         m_openElements.push(HTMLStackItem::create(element.release(), token, namespaceURI));
    678 }
    679 
    680 void HTMLConstructionSite::insertTextNode(const String& string, WhitespaceMode whitespaceMode)
    681 {
    682     HTMLConstructionSiteTask dummyTask(HTMLConstructionSiteTask::Insert);
    683     dummyTask.parent = currentNode();
    684 
    685     if (shouldFosterParent())
    686         findFosterSite(dummyTask);
    687 
    688     // FIXME: This probably doesn't need to be done both here and in insert(Task).
    689     if (isHTMLTemplateElement(*dummyTask.parent))
    690         dummyTask.parent = toHTMLTemplateElement(dummyTask.parent.get())->content();
    691 
    692     // Unclear when parent != case occurs. Somehow we insert text into two separate nodes while processing the same Token.
    693     // The nextChild != dummy.nextChild case occurs whenever foster parenting happened and we hit a new text node "<table>a</table>b"
    694     // In either case we have to flush the pending text into the task queue before making more.
    695     if (!m_pendingText.isEmpty() && (m_pendingText.parent != dummyTask.parent ||  m_pendingText.nextChild != dummyTask.nextChild))
    696         flushPendingText(FlushAlways);
    697     m_pendingText.append(dummyTask.parent, dummyTask.nextChild, string, whitespaceMode);
    698 }
    699 
    700 void HTMLConstructionSite::reparent(HTMLElementStack::ElementRecord* newParent, HTMLElementStack::ElementRecord* child)
    701 {
    702     HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Reparent);
    703     task.parent = newParent->node();
    704     task.child = child->node();
    705     queueTask(task);
    706 }
    707 
    708 void HTMLConstructionSite::reparent(HTMLElementStack::ElementRecord* newParent, HTMLStackItem* child)
    709 {
    710     HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Reparent);
    711     task.parent = newParent->node();
    712     task.child = child->node();
    713     queueTask(task);
    714 }
    715 
    716 void HTMLConstructionSite::insertAlreadyParsedChild(HTMLStackItem* newParent, HTMLElementStack::ElementRecord* child)
    717 {
    718     if (newParent->causesFosterParenting()) {
    719         fosterParent(child->node());
    720         return;
    721     }
    722 
    723     HTMLConstructionSiteTask task(HTMLConstructionSiteTask::InsertAlreadyParsedChild);
    724     task.parent = newParent->node();
    725     task.child = child->node();
    726     queueTask(task);
    727 }
    728 
    729 void HTMLConstructionSite::takeAllChildren(HTMLStackItem* newParent, HTMLElementStack::ElementRecord* oldParent)
    730 {
    731     HTMLConstructionSiteTask task(HTMLConstructionSiteTask::TakeAllChildren);
    732     task.parent = newParent->node();
    733     task.child = oldParent->node();
    734     queueTask(task);
    735 }
    736 
    737 PassRefPtrWillBeRawPtr<Element> HTMLConstructionSite::createElement(AtomicHTMLToken* token, const AtomicString& namespaceURI)
    738 {
    739     QualifiedName tagName(nullAtom, token->name(), namespaceURI);
    740     RefPtrWillBeRawPtr<Element> element = ownerDocumentForCurrentNode().createElement(tagName, true);
    741     setAttributes(element.get(), token, m_parserContentPolicy);
    742     return element.release();
    743 }
    744 
    745 inline Document& HTMLConstructionSite::ownerDocumentForCurrentNode()
    746 {
    747     if (isHTMLTemplateElement(*currentNode()))
    748         return toHTMLTemplateElement(currentElement())->content()->document();
    749     return currentNode()->document();
    750 }
    751 
    752 PassRefPtrWillBeRawPtr<HTMLElement> HTMLConstructionSite::createHTMLElement(AtomicHTMLToken* token)
    753 {
    754     Document& document = ownerDocumentForCurrentNode();
    755     // Only associate the element with the current form if we're creating the new element
    756     // in a document with a browsing context (rather than in <template> contents).
    757     HTMLFormElement* form = document.frame() ? m_form.get() : 0;
    758     // FIXME: This can't use HTMLConstructionSite::createElement because we
    759     // have to pass the current form element.  We should rework form association
    760     // to occur after construction to allow better code sharing here.
    761     RefPtrWillBeRawPtr<HTMLElement> element = HTMLElementFactory::createHTMLElement(token->name(), document, form, true);
    762     setAttributes(element.get(), token, m_parserContentPolicy);
    763     return element.release();
    764 }
    765 
    766 PassRefPtrWillBeRawPtr<HTMLStackItem> HTMLConstructionSite::createElementFromSavedToken(HTMLStackItem* item)
    767 {
    768     RefPtrWillBeRawPtr<Element> element;
    769     // NOTE: Moving from item -> token -> item copies the Attribute vector twice!
    770     AtomicHTMLToken fakeToken(HTMLToken::StartTag, item->localName(), item->attributes());
    771     if (item->namespaceURI() == HTMLNames::xhtmlNamespaceURI)
    772         element = createHTMLElement(&fakeToken);
    773     else
    774         element = createElement(&fakeToken, item->namespaceURI());
    775     return HTMLStackItem::create(element.release(), &fakeToken, item->namespaceURI());
    776 }
    777 
    778 bool HTMLConstructionSite::indexOfFirstUnopenFormattingElement(unsigned& firstUnopenElementIndex) const
    779 {
    780     if (m_activeFormattingElements.isEmpty())
    781         return false;
    782     unsigned index = m_activeFormattingElements.size();
    783     do {
    784         --index;
    785         const HTMLFormattingElementList::Entry& entry = m_activeFormattingElements.at(index);
    786         if (entry.isMarker() || m_openElements.contains(entry.element())) {
    787             firstUnopenElementIndex = index + 1;
    788             return firstUnopenElementIndex < m_activeFormattingElements.size();
    789         }
    790     } while (index);
    791     firstUnopenElementIndex = index;
    792     return true;
    793 }
    794 
    795 void HTMLConstructionSite::reconstructTheActiveFormattingElements()
    796 {
    797     unsigned firstUnopenElementIndex;
    798     if (!indexOfFirstUnopenFormattingElement(firstUnopenElementIndex))
    799         return;
    800 
    801     unsigned unopenEntryIndex = firstUnopenElementIndex;
    802     ASSERT(unopenEntryIndex < m_activeFormattingElements.size());
    803     for (; unopenEntryIndex < m_activeFormattingElements.size(); ++unopenEntryIndex) {
    804         HTMLFormattingElementList::Entry& unopenedEntry = m_activeFormattingElements.at(unopenEntryIndex);
    805         RefPtrWillBeRawPtr<HTMLStackItem> reconstructed = createElementFromSavedToken(unopenedEntry.stackItem().get());
    806         attachLater(currentNode(), reconstructed->node());
    807         m_openElements.push(reconstructed);
    808         unopenedEntry.replaceElement(reconstructed.release());
    809     }
    810 }
    811 
    812 void HTMLConstructionSite::generateImpliedEndTagsWithExclusion(const AtomicString& tagName)
    813 {
    814     while (hasImpliedEndTag(currentStackItem()) && !currentStackItem()->matchesHTMLTag(tagName))
    815         m_openElements.pop();
    816 }
    817 
    818 void HTMLConstructionSite::generateImpliedEndTags()
    819 {
    820     while (hasImpliedEndTag(currentStackItem()))
    821         m_openElements.pop();
    822 }
    823 
    824 bool HTMLConstructionSite::inQuirksMode()
    825 {
    826     return m_inQuirksMode;
    827 }
    828 
    829 void HTMLConstructionSite::findFosterSite(HTMLConstructionSiteTask& task)
    830 {
    831     // When a node is to be foster parented, the last template element with no table element is below it in the stack of open elements is the foster parent element (NOT the template's parent!)
    832     HTMLElementStack::ElementRecord* lastTemplateElement = m_openElements.topmost(templateTag.localName());
    833     if (lastTemplateElement && !m_openElements.inTableScope(tableTag)) {
    834         task.parent = lastTemplateElement->element();
    835         return;
    836     }
    837 
    838     HTMLElementStack::ElementRecord* lastTableElementRecord = m_openElements.topmost(tableTag.localName());
    839     if (lastTableElementRecord) {
    840         Element* lastTableElement = lastTableElementRecord->element();
    841         ContainerNode* parent;
    842         if (lastTableElementRecord->next()->stackItem()->hasTagName(templateTag))
    843             parent = lastTableElementRecord->next()->element();
    844         else
    845             parent = lastTableElement->parentNode();
    846 
    847         // When parsing HTML fragments, we skip step 4.2 ("Let root be a new html element with no attributes") for efficiency,
    848         // and instead use the DocumentFragment as a root node. So we must treat the root node (DocumentFragment) as if it is a html element here.
    849         if (parent && (parent->isElementNode() || (m_isParsingFragment && parent == m_openElements.rootNode()))) {
    850             task.parent = parent;
    851             task.nextChild = lastTableElement;
    852             return;
    853         }
    854         task.parent = lastTableElementRecord->next()->element();
    855         return;
    856     }
    857     // Fragment case
    858     task.parent = m_openElements.rootNode(); // DocumentFragment
    859 }
    860 
    861 bool HTMLConstructionSite::shouldFosterParent() const
    862 {
    863     return m_redirectAttachToFosterParent
    864         && currentStackItem()->isElementNode()
    865         && currentStackItem()->causesFosterParenting();
    866 }
    867 
    868 void HTMLConstructionSite::fosterParent(PassRefPtrWillBeRawPtr<Node> node)
    869 {
    870     HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Insert);
    871     findFosterSite(task);
    872     task.child = node;
    873     ASSERT(task.parent);
    874     queueTask(task);
    875 }
    876 
    877 void HTMLConstructionSite::PendingText::trace(Visitor* visitor)
    878 {
    879     visitor->trace(parent);
    880     visitor->trace(nextChild);
    881 }
    882 
    883 
    884 }
    885