Home | History | Annotate | Download | only in parser
      1 /*
      2  * Copyright (C) 2010 Google, Inc. All Rights Reserved.
      3  * Copyright (C) 2011 Apple Inc. All rights reserved.
      4  *
      5  * Redistribution and use in source and binary forms, with or without
      6  * modification, are permitted provided that the following conditions
      7  * are met:
      8  * 1. Redistributions of source code must retain the above copyright
      9  *    notice, this list of conditions and the following disclaimer.
     10  * 2. Redistributions in binary form must reproduce the above copyright
     11  *    notice, this list of conditions and the following disclaimer in the
     12  *    documentation and/or other materials provided with the distribution.
     13  *
     14  * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
     15  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     17  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL GOOGLE INC. OR
     18  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     19  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     20  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     21  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
     22  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     24  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     25  */
     26 
     27 #include "config.h"
     28 #include "core/html/parser/HTMLConstructionSite.h"
     29 
     30 #include "core/HTMLElementFactory.h"
     31 #include "core/HTMLNames.h"
     32 #include "core/dom/Comment.h"
     33 #include "core/dom/DocumentFragment.h"
     34 #include "core/dom/DocumentType.h"
     35 #include "core/dom/Element.h"
     36 #include "core/dom/ScriptLoader.h"
     37 #include "core/dom/Text.h"
     38 #include "core/frame/LocalFrame.h"
     39 #include "core/html/HTMLFormElement.h"
     40 #include "core/html/HTMLHtmlElement.h"
     41 #include "core/html/HTMLPlugInElement.h"
     42 #include "core/html/HTMLScriptElement.h"
     43 #include "core/html/HTMLTemplateElement.h"
     44 #include "core/html/parser/AtomicHTMLToken.h"
     45 #include "core/html/parser/HTMLParserIdioms.h"
     46 #include "core/html/parser/HTMLStackItem.h"
     47 #include "core/html/parser/HTMLToken.h"
     48 #include "core/loader/FrameLoader.h"
     49 #include "core/loader/FrameLoaderClient.h"
     50 #include "core/svg/SVGScriptElement.h"
     51 #include "platform/NotImplemented.h"
     52 #include "platform/text/TextBreakIterator.h"
     53 #include <limits>
     54 
     55 namespace WebCore {
     56 
     57 using namespace HTMLNames;
     58 
     59 static const unsigned maximumHTMLParserDOMTreeDepth = 512;
     60 
     61 static inline void setAttributes(Element* element, AtomicHTMLToken* token, ParserContentPolicy parserContentPolicy)
     62 {
     63     if (!scriptingContentIsAllowed(parserContentPolicy))
     64         element->stripScriptingAttributes(token->attributes());
     65     element->parserSetAttributes(token->attributes());
     66 }
     67 
     68 static bool hasImpliedEndTag(const HTMLStackItem* item)
     69 {
     70     return item->hasTagName(ddTag)
     71         || item->hasTagName(dtTag)
     72         || item->hasTagName(liTag)
     73         || item->hasTagName(optionTag)
     74         || item->hasTagName(optgroupTag)
     75         || item->hasTagName(pTag)
     76         || item->hasTagName(rpTag)
     77         || item->hasTagName(rtTag);
     78 }
     79 
     80 static bool shouldUseLengthLimit(const ContainerNode& node)
     81 {
     82     return !isHTMLScriptElement(node)
     83         && !isHTMLStyleElement(node)
     84         && !isSVGScriptElement(node);
     85 }
     86 
     87 static unsigned textLengthLimitForContainer(const ContainerNode& node)
     88 {
     89     return shouldUseLengthLimit(node) ? Text::defaultLengthLimit : std::numeric_limits<unsigned>::max();
     90 }
     91 
     92 static inline bool isAllWhitespace(const String& string)
     93 {
     94     return string.isAllSpecialCharacters<isHTMLSpace<UChar> >();
     95 }
     96 
     97 static inline void insert(HTMLConstructionSiteTask& task)
     98 {
     99     if (isHTMLTemplateElement(*task.parent))
    100         task.parent = toHTMLTemplateElement(task.parent.get())->content();
    101 
    102     if (ContainerNode* parent = task.child->parentNode())
    103         parent->parserRemoveChild(*task.child);
    104 
    105     if (task.nextChild)
    106         task.parent->parserInsertBefore(task.child.get(), *task.nextChild);
    107     else
    108         task.parent->parserAppendChild(task.child.get());
    109 }
    110 
    111 static inline void executeInsertTask(HTMLConstructionSiteTask& task)
    112 {
    113     ASSERT(task.operation == HTMLConstructionSiteTask::Insert);
    114 
    115     insert(task);
    116 
    117     if (task.child->isElementNode()) {
    118         Element& child = toElement(*task.child);
    119         child.beginParsingChildren();
    120         if (task.selfClosing)
    121             child.finishParsingChildren();
    122     }
    123 }
    124 
    125 static inline void executeInsertTextTask(HTMLConstructionSiteTask& task)
    126 {
    127     ASSERT(task.operation == HTMLConstructionSiteTask::InsertText);
    128     ASSERT(task.child->isTextNode());
    129 
    130     // Merge text nodes into previous ones if possible:
    131     // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#insert-a-character
    132     Text* newText = toText(task.child.get());
    133     Node* previousChild = task.nextChild ? task.nextChild->previousSibling() : task.parent->lastChild();
    134     if (previousChild && previousChild->isTextNode()) {
    135         Text* previousText = toText(previousChild);
    136         unsigned lengthLimit = textLengthLimitForContainer(*task.parent);
    137         if (previousText->length() + newText->length() < lengthLimit) {
    138             previousText->parserAppendData(newText->data());
    139             return;
    140         }
    141     }
    142 
    143     insert(task);
    144 }
    145 
    146 static inline void executeReparentTask(HTMLConstructionSiteTask& task)
    147 {
    148     ASSERT(task.operation == HTMLConstructionSiteTask::Reparent);
    149 
    150     if (ContainerNode* parent = task.child->parentNode())
    151         parent->parserRemoveChild(*task.child);
    152 
    153     task.parent->parserAppendChild(task.child);
    154 }
    155 
    156 static inline void executeInsertAlreadyParsedChildTask(HTMLConstructionSiteTask& task)
    157 {
    158     ASSERT(task.operation == HTMLConstructionSiteTask::InsertAlreadyParsedChild);
    159 
    160     insert(task);
    161 }
    162 
    163 static inline void executeTakeAllChildrenTask(HTMLConstructionSiteTask& task)
    164 {
    165     ASSERT(task.operation == HTMLConstructionSiteTask::TakeAllChildren);
    166 
    167     task.parent->parserTakeAllChildrenFrom(*task.oldParent());
    168 }
    169 
    170 void HTMLConstructionSite::executeTask(HTMLConstructionSiteTask& task)
    171 {
    172     ASSERT(m_taskQueue.isEmpty());
    173     if (task.operation == HTMLConstructionSiteTask::Insert)
    174         return executeInsertTask(task);
    175 
    176     if (task.operation == HTMLConstructionSiteTask::InsertText)
    177         return executeInsertTextTask(task);
    178 
    179     // All the cases below this point are only used by the adoption agency.
    180 
    181     if (task.operation == HTMLConstructionSiteTask::InsertAlreadyParsedChild)
    182         return executeInsertAlreadyParsedChildTask(task);
    183 
    184     if (task.operation == HTMLConstructionSiteTask::Reparent)
    185         return executeReparentTask(task);
    186 
    187     if (task.operation == HTMLConstructionSiteTask::TakeAllChildren)
    188         return executeTakeAllChildrenTask(task);
    189 
    190     ASSERT_NOT_REACHED();
    191 }
    192 
    193 // This is only needed for TextDocuments where we might have text nodes
    194 // approaching the default length limit (~64k) and we don't want to
    195 // break a text node in the middle of a combining character.
    196 static unsigned findBreakIndexBetween(const StringBuilder& string, unsigned currentPosition, unsigned proposedBreakIndex)
    197 {
    198     ASSERT(currentPosition < proposedBreakIndex);
    199     ASSERT(proposedBreakIndex <= string.length());
    200     // The end of the string is always a valid break.
    201     if (proposedBreakIndex == string.length())
    202         return proposedBreakIndex;
    203 
    204     // Latin-1 does not have breakable boundaries. If we ever moved to a differnet 8-bit encoding this could be wrong.
    205     if (string.is8Bit())
    206         return proposedBreakIndex;
    207 
    208     const UChar* breakSearchCharacters = string.characters16() + currentPosition;
    209     // We need at least two characters look-ahead to account for UTF-16 surrogates, but can't search off the end of the buffer!
    210     unsigned breakSearchLength = std::min(proposedBreakIndex - currentPosition + 2, string.length() - currentPosition);
    211     NonSharedCharacterBreakIterator it(breakSearchCharacters, breakSearchLength);
    212 
    213     if (it.isBreak(proposedBreakIndex - currentPosition))
    214         return proposedBreakIndex;
    215 
    216     int adjustedBreakIndexInSubstring = it.preceding(proposedBreakIndex - currentPosition);
    217     if (adjustedBreakIndexInSubstring > 0)
    218         return currentPosition + adjustedBreakIndexInSubstring;
    219     // We failed to find a breakable point, let the caller figure out what to do.
    220     return 0;
    221 }
    222 
    223 static String atomizeIfAllWhitespace(const String& string, WhitespaceMode whitespaceMode)
    224 {
    225     // Strings composed entirely of whitespace are likely to be repeated.
    226     // Turn them into AtomicString so we share a single string for each.
    227     if (whitespaceMode == AllWhitespace || (whitespaceMode == WhitespaceUnknown && isAllWhitespace(string)))
    228         return AtomicString(string).string();
    229     return string;
    230 }
    231 
    232 void HTMLConstructionSite::flushPendingText()
    233 {
    234     if (m_pendingText.isEmpty())
    235         return;
    236 
    237     PendingText pendingText;
    238     // Hold onto the current pending text on the stack so that queueTask doesn't recurse infinitely.
    239     m_pendingText.swap(pendingText);
    240     ASSERT(m_pendingText.isEmpty());
    241 
    242     // Splitting text nodes into smaller chunks contradicts HTML5 spec, but is necessary
    243     // for performance, see: https://bugs.webkit.org/show_bug.cgi?id=55898
    244     unsigned lengthLimit = textLengthLimitForContainer(*pendingText.parent);
    245 
    246     unsigned currentPosition = 0;
    247     const StringBuilder& string = pendingText.stringBuilder;
    248     while (currentPosition < string.length()) {
    249         unsigned proposedBreakIndex = std::min(currentPosition + lengthLimit, string.length());
    250         unsigned breakIndex = findBreakIndexBetween(string, currentPosition, proposedBreakIndex);
    251         ASSERT(breakIndex <= string.length());
    252         String substring = string.substring(currentPosition, breakIndex - currentPosition);
    253         substring = atomizeIfAllWhitespace(substring, pendingText.whitespaceMode);
    254 
    255         HTMLConstructionSiteTask task(HTMLConstructionSiteTask::InsertText);
    256         task.parent = pendingText.parent;
    257         task.nextChild = pendingText.nextChild;
    258         task.child = Text::create(task.parent->document(), substring);
    259         queueTask(task);
    260 
    261         ASSERT(breakIndex > currentPosition);
    262         ASSERT(breakIndex - currentPosition == substring.length());
    263         ASSERT(toText(task.child.get())->length() == substring.length());
    264         currentPosition = breakIndex;
    265     }
    266 }
    267 
    268 void HTMLConstructionSite::queueTask(const HTMLConstructionSiteTask& task)
    269 {
    270     flushPendingText();
    271     ASSERT(m_pendingText.isEmpty());
    272     m_taskQueue.append(task);
    273 }
    274 
    275 void HTMLConstructionSite::attachLater(ContainerNode* parent, PassRefPtrWillBeRawPtr<Node> prpChild, bool selfClosing)
    276 {
    277     ASSERT(scriptingContentIsAllowed(m_parserContentPolicy) || !prpChild.get()->isElementNode() || !toScriptLoaderIfPossible(toElement(prpChild.get())));
    278     ASSERT(pluginContentIsAllowed(m_parserContentPolicy) || !isHTMLPlugInElement(prpChild));
    279 
    280     HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Insert);
    281     task.parent = parent;
    282     task.child = prpChild;
    283     task.selfClosing = selfClosing;
    284 
    285     if (shouldFosterParent()) {
    286         fosterParent(task.child);
    287         return;
    288     }
    289 
    290     // Add as a sibling of the parent if we have reached the maximum depth allowed.
    291     if (m_openElements.stackDepth() > maximumHTMLParserDOMTreeDepth && task.parent->parentNode())
    292         task.parent = task.parent->parentNode();
    293 
    294     ASSERT(task.parent);
    295     queueTask(task);
    296 }
    297 
    298 void HTMLConstructionSite::executeQueuedTasks()
    299 {
    300     // This has no affect on pendingText, and we may have pendingText
    301     // remaining after executing all other queued tasks.
    302     const size_t size = m_taskQueue.size();
    303     if (!size)
    304         return;
    305 
    306     // Copy the task queue into a local variable in case executeTask
    307     // re-enters the parser.
    308     TaskQueue queue;
    309     queue.swap(m_taskQueue);
    310 
    311     for (size_t i = 0; i < size; ++i)
    312         executeTask(queue[i]);
    313 
    314     // We might be detached now.
    315 }
    316 
    317 HTMLConstructionSite::HTMLConstructionSite(Document* document, ParserContentPolicy parserContentPolicy)
    318     : m_document(document)
    319     , m_attachmentRoot(document)
    320     , m_parserContentPolicy(parserContentPolicy)
    321     , m_isParsingFragment(false)
    322     , m_redirectAttachToFosterParent(false)
    323     , m_inQuirksMode(document->inQuirksMode())
    324 {
    325     ASSERT(m_document->isHTMLDocument() || m_document->isXHTMLDocument());
    326 }
    327 
    328 HTMLConstructionSite::HTMLConstructionSite(DocumentFragment* fragment, ParserContentPolicy parserContentPolicy)
    329     : m_document(&fragment->document())
    330     , m_attachmentRoot(fragment)
    331     , m_parserContentPolicy(parserContentPolicy)
    332     , m_isParsingFragment(true)
    333     , m_redirectAttachToFosterParent(false)
    334     , m_inQuirksMode(fragment->document().inQuirksMode())
    335 {
    336     ASSERT(m_document->isHTMLDocument() || m_document->isXHTMLDocument());
    337 }
    338 
    339 HTMLConstructionSite::~HTMLConstructionSite()
    340 {
    341     // Depending on why we're being destroyed it might be OK
    342     // to forget queued tasks, but currently we don't expect to.
    343     ASSERT(m_taskQueue.isEmpty());
    344     // Currently we assume that text will never be the last token in the
    345     // document and that we'll always queue some additional task to cause it to flush.
    346     ASSERT(m_pendingText.isEmpty());
    347 }
    348 
    349 void HTMLConstructionSite::trace(Visitor* visitor)
    350 {
    351     visitor->trace(m_document);
    352     visitor->trace(m_attachmentRoot);
    353     visitor->trace(m_head);
    354     visitor->trace(m_form);
    355     visitor->trace(m_openElements);
    356     visitor->trace(m_activeFormattingElements);
    357     visitor->trace(m_taskQueue);
    358     visitor->trace(m_pendingText);
    359 }
    360 
    361 void HTMLConstructionSite::detach()
    362 {
    363     // FIXME: We'd like to ASSERT here that we're canceling and not just discarding
    364     // text that really should have made it into the DOM earlier, but there
    365     // doesn't seem to be a nice way to do that.
    366     m_pendingText.discard();
    367     m_document = nullptr;
    368     m_attachmentRoot = nullptr;
    369 }
    370 
    371 void HTMLConstructionSite::setForm(HTMLFormElement* form)
    372 {
    373     // This method should only be needed for HTMLTreeBuilder in the fragment case.
    374     ASSERT(!m_form);
    375     m_form = form;
    376 }
    377 
    378 PassRefPtrWillBeRawPtr<HTMLFormElement> HTMLConstructionSite::takeForm()
    379 {
    380     return m_form.release();
    381 }
    382 
    383 void HTMLConstructionSite::dispatchDocumentElementAvailableIfNeeded()
    384 {
    385     ASSERT(m_document);
    386     if (m_document->frame() && !m_isParsingFragment)
    387         m_document->frame()->loader().dispatchDocumentElementAvailable();
    388 }
    389 
    390 void HTMLConstructionSite::insertHTMLHtmlStartTagBeforeHTML(AtomicHTMLToken* token)
    391 {
    392     ASSERT(m_document);
    393     RefPtrWillBeRawPtr<HTMLHtmlElement> element = HTMLHtmlElement::create(*m_document);
    394     setAttributes(element.get(), token, m_parserContentPolicy);
    395     attachLater(m_attachmentRoot, element);
    396     m_openElements.pushHTMLHtmlElement(HTMLStackItem::create(element, token));
    397 
    398     executeQueuedTasks();
    399     element->insertedByParser();
    400     dispatchDocumentElementAvailableIfNeeded();
    401 }
    402 
    403 void HTMLConstructionSite::mergeAttributesFromTokenIntoElement(AtomicHTMLToken* token, Element* element)
    404 {
    405     if (token->attributes().isEmpty())
    406         return;
    407 
    408     for (unsigned i = 0; i < token->attributes().size(); ++i) {
    409         const Attribute& tokenAttribute = token->attributes().at(i);
    410         if (!element->elementData() || !element->findAttributeByName(tokenAttribute.name()))
    411             element->setAttribute(tokenAttribute.name(), tokenAttribute.value());
    412     }
    413 }
    414 
    415 void HTMLConstructionSite::insertHTMLHtmlStartTagInBody(AtomicHTMLToken* token)
    416 {
    417     // Fragments do not have a root HTML element, so any additional HTML elements
    418     // encountered during fragment parsing should be ignored.
    419     if (m_isParsingFragment)
    420         return;
    421 
    422     mergeAttributesFromTokenIntoElement(token, m_openElements.htmlElement());
    423 }
    424 
    425 void HTMLConstructionSite::insertHTMLBodyStartTagInBody(AtomicHTMLToken* token)
    426 {
    427     mergeAttributesFromTokenIntoElement(token, m_openElements.bodyElement());
    428 }
    429 
    430 void HTMLConstructionSite::setDefaultCompatibilityMode()
    431 {
    432     if (m_isParsingFragment)
    433         return;
    434     setCompatibilityMode(Document::QuirksMode);
    435 }
    436 
    437 void HTMLConstructionSite::setCompatibilityMode(Document::CompatibilityMode mode)
    438 {
    439     m_inQuirksMode = (mode == Document::QuirksMode);
    440     m_document->setCompatibilityMode(mode);
    441 }
    442 
    443 void HTMLConstructionSite::setCompatibilityModeFromDoctype(const String& name, const String& publicId, const String& systemId)
    444 {
    445     // There are three possible compatibility modes:
    446     // Quirks - quirks mode emulates WinIE and NS4. CSS parsing is also relaxed in this mode, e.g., unit types can
    447     // be omitted from numbers.
    448     // Limited Quirks - This mode is identical to no-quirks mode except for its treatment of line-height in the inline box model.
    449     // No Quirks - no quirks apply. Web pages will obey the specifications to the letter.
    450 
    451     // Check for Quirks Mode.
    452     if (name != "html"
    453         || publicId.startsWith("+//Silmaril//dtd html Pro v0r11 19970101//", false)
    454         || publicId.startsWith("-//AdvaSoft Ltd//DTD HTML 3.0 asWedit + extensions//", false)
    455         || publicId.startsWith("-//AS//DTD HTML 3.0 asWedit + extensions//", false)
    456         || publicId.startsWith("-//IETF//DTD HTML 2.0 Level 1//", false)
    457         || publicId.startsWith("-//IETF//DTD HTML 2.0 Level 2//", false)
    458         || publicId.startsWith("-//IETF//DTD HTML 2.0 Strict Level 1//", false)
    459         || publicId.startsWith("-//IETF//DTD HTML 2.0 Strict Level 2//", false)
    460         || publicId.startsWith("-//IETF//DTD HTML 2.0 Strict//", false)
    461         || publicId.startsWith("-//IETF//DTD HTML 2.0//", false)
    462         || publicId.startsWith("-//IETF//DTD HTML 2.1E//", false)
    463         || publicId.startsWith("-//IETF//DTD HTML 3.0//", false)
    464         || publicId.startsWith("-//IETF//DTD HTML 3.2 Final//", false)
    465         || publicId.startsWith("-//IETF//DTD HTML 3.2//", false)
    466         || publicId.startsWith("-//IETF//DTD HTML 3//", false)
    467         || publicId.startsWith("-//IETF//DTD HTML Level 0//", false)
    468         || publicId.startsWith("-//IETF//DTD HTML Level 1//", false)
    469         || publicId.startsWith("-//IETF//DTD HTML Level 2//", false)
    470         || publicId.startsWith("-//IETF//DTD HTML Level 3//", false)
    471         || publicId.startsWith("-//IETF//DTD HTML Strict Level 0//", false)
    472         || publicId.startsWith("-//IETF//DTD HTML Strict Level 1//", false)
    473         || publicId.startsWith("-//IETF//DTD HTML Strict Level 2//", false)
    474         || publicId.startsWith("-//IETF//DTD HTML Strict Level 3//", false)
    475         || publicId.startsWith("-//IETF//DTD HTML Strict//", false)
    476         || publicId.startsWith("-//IETF//DTD HTML//", false)
    477         || publicId.startsWith("-//Metrius//DTD Metrius Presentational//", false)
    478         || publicId.startsWith("-//Microsoft//DTD Internet Explorer 2.0 HTML Strict//", false)
    479         || publicId.startsWith("-//Microsoft//DTD Internet Explorer 2.0 HTML//", false)
    480         || publicId.startsWith("-//Microsoft//DTD Internet Explorer 2.0 Tables//", false)
    481         || publicId.startsWith("-//Microsoft//DTD Internet Explorer 3.0 HTML Strict//", false)
    482         || publicId.startsWith("-//Microsoft//DTD Internet Explorer 3.0 HTML//", false)
    483         || publicId.startsWith("-//Microsoft//DTD Internet Explorer 3.0 Tables//", false)
    484         || publicId.startsWith("-//Netscape Comm. Corp.//DTD HTML//", false)
    485         || publicId.startsWith("-//Netscape Comm. Corp.//DTD Strict HTML//", false)
    486         || publicId.startsWith("-//O'Reilly and Associates//DTD HTML 2.0//", false)
    487         || publicId.startsWith("-//O'Reilly and Associates//DTD HTML Extended 1.0//", false)
    488         || publicId.startsWith("-//O'Reilly and Associates//DTD HTML Extended Relaxed 1.0//", false)
    489         || publicId.startsWith("-//SoftQuad Software//DTD HoTMetaL PRO 6.0::19990601::extensions to HTML 4.0//", false)
    490         || publicId.startsWith("-//SoftQuad//DTD HoTMetaL PRO 4.0::19971010::extensions to HTML 4.0//", false)
    491         || publicId.startsWith("-//Spyglass//DTD HTML 2.0 Extended//", false)
    492         || publicId.startsWith("-//SQ//DTD HTML 2.0 HoTMetaL + extensions//", false)
    493         || publicId.startsWith("-//Sun Microsystems Corp.//DTD HotJava HTML//", false)
    494         || publicId.startsWith("-//Sun Microsystems Corp.//DTD HotJava Strict HTML//", false)
    495         || publicId.startsWith("-//W3C//DTD HTML 3 1995-03-24//", false)
    496         || publicId.startsWith("-//W3C//DTD HTML 3.2 Draft//", false)
    497         || publicId.startsWith("-//W3C//DTD HTML 3.2 Final//", false)
    498         || publicId.startsWith("-//W3C//DTD HTML 3.2//", false)
    499         || publicId.startsWith("-//W3C//DTD HTML 3.2S Draft//", false)
    500         || publicId.startsWith("-//W3C//DTD HTML 4.0 Frameset//", false)
    501         || publicId.startsWith("-//W3C//DTD HTML 4.0 Transitional//", false)
    502         || publicId.startsWith("-//W3C//DTD HTML Experimental 19960712//", false)
    503         || publicId.startsWith("-//W3C//DTD HTML Experimental 970421//", false)
    504         || publicId.startsWith("-//W3C//DTD W3 HTML//", false)
    505         || publicId.startsWith("-//W3O//DTD W3 HTML 3.0//", false)
    506         || equalIgnoringCase(publicId, "-//W3O//DTD W3 HTML Strict 3.0//EN//")
    507         || publicId.startsWith("-//WebTechs//DTD Mozilla HTML 2.0//", false)
    508         || publicId.startsWith("-//WebTechs//DTD Mozilla HTML//", false)
    509         || equalIgnoringCase(publicId, "-/W3C/DTD HTML 4.0 Transitional/EN")
    510         || equalIgnoringCase(publicId, "HTML")
    511         || equalIgnoringCase(systemId, "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd")
    512         || (systemId.isEmpty() && publicId.startsWith("-//W3C//DTD HTML 4.01 Frameset//", false))
    513         || (systemId.isEmpty() && publicId.startsWith("-//W3C//DTD HTML 4.01 Transitional//", false))) {
    514         setCompatibilityMode(Document::QuirksMode);
    515         return;
    516     }
    517 
    518     // Check for Limited Quirks Mode.
    519     if (publicId.startsWith("-//W3C//DTD XHTML 1.0 Frameset//", false)
    520         || publicId.startsWith("-//W3C//DTD XHTML 1.0 Transitional//", false)
    521         || (!systemId.isEmpty() && publicId.startsWith("-//W3C//DTD HTML 4.01 Frameset//", false))
    522         || (!systemId.isEmpty() && publicId.startsWith("-//W3C//DTD HTML 4.01 Transitional//", false))) {
    523         setCompatibilityMode(Document::LimitedQuirksMode);
    524         return;
    525     }
    526 
    527     // Otherwise we are No Quirks Mode.
    528     setCompatibilityMode(Document::NoQuirksMode);
    529 }
    530 
    531 void HTMLConstructionSite::processEndOfFile()
    532 {
    533     ASSERT(currentNode());
    534     flush();
    535     openElements()->popAll();
    536 }
    537 
    538 void HTMLConstructionSite::finishedParsing()
    539 {
    540     // We shouldn't have any queued tasks but we might have pending text which we need to promote to tasks and execute.
    541     ASSERT(m_taskQueue.isEmpty());
    542     flush();
    543     m_document->finishedParsing();
    544 }
    545 
    546 void HTMLConstructionSite::insertDoctype(AtomicHTMLToken* token)
    547 {
    548     ASSERT(token->type() == HTMLToken::DOCTYPE);
    549 
    550     const String& publicId = StringImpl::create8BitIfPossible(token->publicIdentifier());
    551     const String& systemId = StringImpl::create8BitIfPossible(token->systemIdentifier());
    552     RefPtrWillBeRawPtr<DocumentType> doctype = DocumentType::create(m_document, token->name(), publicId, systemId);
    553     attachLater(m_attachmentRoot, doctype.release());
    554 
    555     // DOCTYPE nodes are only processed when parsing fragments w/o contextElements, which
    556     // never occurs.  However, if we ever chose to support such, this code is subtly wrong,
    557     // because context-less fragments can determine their own quirks mode, and thus change
    558     // parsing rules (like <p> inside <table>).  For now we ASSERT that we never hit this code
    559     // in a fragment, as changing the owning document's compatibility mode would be wrong.
    560     ASSERT(!m_isParsingFragment);
    561     if (m_isParsingFragment)
    562         return;
    563 
    564     if (token->forceQuirks())
    565         setCompatibilityMode(Document::QuirksMode);
    566     else {
    567         setCompatibilityModeFromDoctype(token->name(), publicId, systemId);
    568     }
    569 }
    570 
    571 void HTMLConstructionSite::insertComment(AtomicHTMLToken* token)
    572 {
    573     ASSERT(token->type() == HTMLToken::Comment);
    574     attachLater(currentNode(), Comment::create(ownerDocumentForCurrentNode(), token->comment()));
    575 }
    576 
    577 void HTMLConstructionSite::insertCommentOnDocument(AtomicHTMLToken* token)
    578 {
    579     ASSERT(token->type() == HTMLToken::Comment);
    580     ASSERT(m_document);
    581     attachLater(m_attachmentRoot, Comment::create(*m_document, token->comment()));
    582 }
    583 
    584 void HTMLConstructionSite::insertCommentOnHTMLHtmlElement(AtomicHTMLToken* token)
    585 {
    586     ASSERT(token->type() == HTMLToken::Comment);
    587     ContainerNode* parent = m_openElements.rootNode();
    588     attachLater(parent, Comment::create(parent->document(), token->comment()));
    589 }
    590 
    591 void HTMLConstructionSite::insertHTMLHeadElement(AtomicHTMLToken* token)
    592 {
    593     ASSERT(!shouldFosterParent());
    594     m_head = HTMLStackItem::create(createHTMLElement(token), token);
    595     attachLater(currentNode(), m_head->element());
    596     m_openElements.pushHTMLHeadElement(m_head);
    597 }
    598 
    599 void HTMLConstructionSite::insertHTMLBodyElement(AtomicHTMLToken* token)
    600 {
    601     ASSERT(!shouldFosterParent());
    602     RefPtrWillBeRawPtr<Element> body = createHTMLElement(token);
    603     attachLater(currentNode(), body);
    604     m_openElements.pushHTMLBodyElement(HTMLStackItem::create(body.release(), token));
    605     if (LocalFrame* frame = m_document->frame())
    606         frame->loader().client()->dispatchWillInsertBody();
    607 }
    608 
    609 void HTMLConstructionSite::insertHTMLFormElement(AtomicHTMLToken* token, bool isDemoted)
    610 {
    611     RefPtrWillBeRawPtr<Element> element = createHTMLElement(token);
    612     ASSERT(isHTMLFormElement(element));
    613     m_form = static_pointer_cast<HTMLFormElement>(element.release());
    614     m_form->setDemoted(isDemoted);
    615     attachLater(currentNode(), m_form.get());
    616     m_openElements.push(HTMLStackItem::create(m_form.get(), token));
    617 }
    618 
    619 void HTMLConstructionSite::insertHTMLElement(AtomicHTMLToken* token)
    620 {
    621     RefPtrWillBeRawPtr<Element> element = createHTMLElement(token);
    622     attachLater(currentNode(), element);
    623     m_openElements.push(HTMLStackItem::create(element.release(), token));
    624 }
    625 
    626 void HTMLConstructionSite::insertSelfClosingHTMLElement(AtomicHTMLToken* token)
    627 {
    628     ASSERT(token->type() == HTMLToken::StartTag);
    629     // Normally HTMLElementStack is responsible for calling finishParsingChildren,
    630     // but self-closing elements are never in the element stack so the stack
    631     // doesn't get a chance to tell them that we're done parsing their children.
    632     attachLater(currentNode(), createHTMLElement(token), true);
    633     // FIXME: Do we want to acknowledge the token's self-closing flag?
    634     // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#acknowledge-self-closing-flag
    635 }
    636 
    637 void HTMLConstructionSite::insertFormattingElement(AtomicHTMLToken* token)
    638 {
    639     // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#the-stack-of-open-elements
    640     // Possible active formatting elements include:
    641     // a, b, big, code, em, font, i, nobr, s, small, strike, strong, tt, and u.
    642     insertHTMLElement(token);
    643     m_activeFormattingElements.append(currentElementRecord()->stackItem());
    644 }
    645 
    646 void HTMLConstructionSite::insertScriptElement(AtomicHTMLToken* token)
    647 {
    648     // http://www.whatwg.org/specs/web-apps/current-work/multipage/scripting-1.html#already-started
    649     // http://html5.org/specs/dom-parsing.html#dom-range-createcontextualfragment
    650     // For createContextualFragment, the specifications say to mark it parser-inserted and already-started and later unmark them.
    651     // However, we short circuit that logic to avoid the subtree traversal to find script elements since scripts can never see
    652     // those flags or effects thereof.
    653     const bool parserInserted = m_parserContentPolicy != AllowScriptingContentAndDoNotMarkAlreadyStarted;
    654     const bool alreadyStarted = m_isParsingFragment && parserInserted;
    655     RefPtrWillBeRawPtr<HTMLScriptElement> element = HTMLScriptElement::create(ownerDocumentForCurrentNode(), parserInserted, alreadyStarted);
    656     setAttributes(element.get(), token, m_parserContentPolicy);
    657     if (scriptingContentIsAllowed(m_parserContentPolicy))
    658         attachLater(currentNode(), element);
    659     m_openElements.push(HTMLStackItem::create(element.release(), token));
    660 }
    661 
    662 void HTMLConstructionSite::insertForeignElement(AtomicHTMLToken* token, const AtomicString& namespaceURI)
    663 {
    664     ASSERT(token->type() == HTMLToken::StartTag);
    665     notImplemented(); // parseError when xmlns or xmlns:xlink are wrong.
    666 
    667     RefPtrWillBeRawPtr<Element> element = createElement(token, namespaceURI);
    668     if (scriptingContentIsAllowed(m_parserContentPolicy) || !toScriptLoaderIfPossible(element.get()))
    669         attachLater(currentNode(), element, token->selfClosing());
    670     if (!token->selfClosing())
    671         m_openElements.push(HTMLStackItem::create(element.release(), token, namespaceURI));
    672 }
    673 
    674 void HTMLConstructionSite::insertTextNode(const String& string, WhitespaceMode whitespaceMode)
    675 {
    676     HTMLConstructionSiteTask dummyTask(HTMLConstructionSiteTask::Insert);
    677     dummyTask.parent = currentNode();
    678 
    679     if (shouldFosterParent())
    680         findFosterSite(dummyTask);
    681 
    682     // FIXME: This probably doesn't need to be done both here and in insert(Task).
    683     if (isHTMLTemplateElement(*dummyTask.parent))
    684         dummyTask.parent = toHTMLTemplateElement(dummyTask.parent.get())->content();
    685 
    686     // Unclear when parent != case occurs. Somehow we insert text into two separate nodes while processing the same Token.
    687     // The nextChild != dummy.nextChild case occurs whenever foster parenting happened and we hit a new text node "<table>a</table>b"
    688     // In either case we have to flush the pending text into the task queue before making more.
    689     if (!m_pendingText.isEmpty() && (m_pendingText.parent != dummyTask.parent ||  m_pendingText.nextChild != dummyTask.nextChild))
    690         flushPendingText();
    691     m_pendingText.append(dummyTask.parent, dummyTask.nextChild, string, whitespaceMode);
    692 }
    693 
    694 void HTMLConstructionSite::reparent(HTMLElementStack::ElementRecord* newParent, HTMLElementStack::ElementRecord* child)
    695 {
    696     HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Reparent);
    697     task.parent = newParent->node();
    698     task.child = child->node();
    699     queueTask(task);
    700 }
    701 
    702 void HTMLConstructionSite::reparent(HTMLElementStack::ElementRecord* newParent, HTMLStackItem* child)
    703 {
    704     HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Reparent);
    705     task.parent = newParent->node();
    706     task.child = child->node();
    707     queueTask(task);
    708 }
    709 
    710 void HTMLConstructionSite::insertAlreadyParsedChild(HTMLStackItem* newParent, HTMLElementStack::ElementRecord* child)
    711 {
    712     if (newParent->causesFosterParenting()) {
    713         fosterParent(child->node());
    714         return;
    715     }
    716 
    717     HTMLConstructionSiteTask task(HTMLConstructionSiteTask::InsertAlreadyParsedChild);
    718     task.parent = newParent->node();
    719     task.child = child->node();
    720     queueTask(task);
    721 }
    722 
    723 void HTMLConstructionSite::takeAllChildren(HTMLStackItem* newParent, HTMLElementStack::ElementRecord* oldParent)
    724 {
    725     HTMLConstructionSiteTask task(HTMLConstructionSiteTask::TakeAllChildren);
    726     task.parent = newParent->node();
    727     task.child = oldParent->node();
    728     queueTask(task);
    729 }
    730 
    731 PassRefPtrWillBeRawPtr<Element> HTMLConstructionSite::createElement(AtomicHTMLToken* token, const AtomicString& namespaceURI)
    732 {
    733     QualifiedName tagName(nullAtom, token->name(), namespaceURI);
    734     RefPtrWillBeRawPtr<Element> element = ownerDocumentForCurrentNode().createElement(tagName, true);
    735     setAttributes(element.get(), token, m_parserContentPolicy);
    736     return element.release();
    737 }
    738 
    739 inline Document& HTMLConstructionSite::ownerDocumentForCurrentNode()
    740 {
    741     if (isHTMLTemplateElement(*currentNode()))
    742         return toHTMLTemplateElement(currentElement())->content()->document();
    743     return currentNode()->document();
    744 }
    745 
    746 PassRefPtrWillBeRawPtr<Element> HTMLConstructionSite::createHTMLElement(AtomicHTMLToken* token)
    747 {
    748     Document& document = ownerDocumentForCurrentNode();
    749     // Only associate the element with the current form if we're creating the new element
    750     // in a document with a browsing context (rather than in <template> contents).
    751     HTMLFormElement* form = document.frame() ? m_form.get() : 0;
    752     // FIXME: This can't use HTMLConstructionSite::createElement because we
    753     // have to pass the current form element.  We should rework form association
    754     // to occur after construction to allow better code sharing here.
    755     RefPtrWillBeRawPtr<Element> element = HTMLElementFactory::createHTMLElement(token->name(), document, form, true);
    756     setAttributes(element.get(), token, m_parserContentPolicy);
    757     ASSERT(element->isHTMLElement());
    758     return element.release();
    759 }
    760 
    761 PassRefPtrWillBeRawPtr<HTMLStackItem> HTMLConstructionSite::createElementFromSavedToken(HTMLStackItem* item)
    762 {
    763     RefPtrWillBeRawPtr<Element> element;
    764     // NOTE: Moving from item -> token -> item copies the Attribute vector twice!
    765     AtomicHTMLToken fakeToken(HTMLToken::StartTag, item->localName(), item->attributes());
    766     if (item->namespaceURI() == HTMLNames::xhtmlNamespaceURI)
    767         element = createHTMLElement(&fakeToken);
    768     else
    769         element = createElement(&fakeToken, item->namespaceURI());
    770     return HTMLStackItem::create(element.release(), &fakeToken, item->namespaceURI());
    771 }
    772 
    773 bool HTMLConstructionSite::indexOfFirstUnopenFormattingElement(unsigned& firstUnopenElementIndex) const
    774 {
    775     if (m_activeFormattingElements.isEmpty())
    776         return false;
    777     unsigned index = m_activeFormattingElements.size();
    778     do {
    779         --index;
    780         const HTMLFormattingElementList::Entry& entry = m_activeFormattingElements.at(index);
    781         if (entry.isMarker() || m_openElements.contains(entry.element())) {
    782             firstUnopenElementIndex = index + 1;
    783             return firstUnopenElementIndex < m_activeFormattingElements.size();
    784         }
    785     } while (index);
    786     firstUnopenElementIndex = index;
    787     return true;
    788 }
    789 
    790 void HTMLConstructionSite::reconstructTheActiveFormattingElements()
    791 {
    792     unsigned firstUnopenElementIndex;
    793     if (!indexOfFirstUnopenFormattingElement(firstUnopenElementIndex))
    794         return;
    795 
    796     unsigned unopenEntryIndex = firstUnopenElementIndex;
    797     ASSERT(unopenEntryIndex < m_activeFormattingElements.size());
    798     for (; unopenEntryIndex < m_activeFormattingElements.size(); ++unopenEntryIndex) {
    799         HTMLFormattingElementList::Entry& unopenedEntry = m_activeFormattingElements.at(unopenEntryIndex);
    800         RefPtrWillBeRawPtr<HTMLStackItem> reconstructed = createElementFromSavedToken(unopenedEntry.stackItem().get());
    801         attachLater(currentNode(), reconstructed->node());
    802         m_openElements.push(reconstructed);
    803         unopenedEntry.replaceElement(reconstructed.release());
    804     }
    805 }
    806 
    807 void HTMLConstructionSite::generateImpliedEndTagsWithExclusion(const AtomicString& tagName)
    808 {
    809     while (hasImpliedEndTag(currentStackItem()) && !currentStackItem()->matchesHTMLTag(tagName))
    810         m_openElements.pop();
    811 }
    812 
    813 void HTMLConstructionSite::generateImpliedEndTags()
    814 {
    815     while (hasImpliedEndTag(currentStackItem()))
    816         m_openElements.pop();
    817 }
    818 
    819 bool HTMLConstructionSite::inQuirksMode()
    820 {
    821     return m_inQuirksMode;
    822 }
    823 
    824 void HTMLConstructionSite::findFosterSite(HTMLConstructionSiteTask& task)
    825 {
    826     // When a node is to be foster parented, the last template element with no table element is below it in the stack of open elements is the foster parent element (NOT the template's parent!)
    827     HTMLElementStack::ElementRecord* lastTemplateElement = m_openElements.topmost(templateTag.localName());
    828     if (lastTemplateElement && !m_openElements.inTableScope(tableTag)) {
    829         task.parent = lastTemplateElement->element();
    830         return;
    831     }
    832 
    833     HTMLElementStack::ElementRecord* lastTableElementRecord = m_openElements.topmost(tableTag.localName());
    834     if (lastTableElementRecord) {
    835         Element* lastTableElement = lastTableElementRecord->element();
    836         ContainerNode* parent;
    837         if (lastTableElementRecord->next()->stackItem()->hasTagName(templateTag))
    838             parent = lastTableElementRecord->next()->element();
    839         else
    840             parent = lastTableElement->parentNode();
    841 
    842         // When parsing HTML fragments, we skip step 4.2 ("Let root be a new html element with no attributes") for efficiency,
    843         // and instead use the DocumentFragment as a root node. So we must treat the root node (DocumentFragment) as if it is a html element here.
    844         if (parent && (parent->isElementNode() || (m_isParsingFragment && parent == m_openElements.rootNode()))) {
    845             task.parent = parent;
    846             task.nextChild = lastTableElement;
    847             return;
    848         }
    849         task.parent = lastTableElementRecord->next()->element();
    850         return;
    851     }
    852     // Fragment case
    853     task.parent = m_openElements.rootNode(); // DocumentFragment
    854 }
    855 
    856 bool HTMLConstructionSite::shouldFosterParent() const
    857 {
    858     return m_redirectAttachToFosterParent
    859         && currentStackItem()->isElementNode()
    860         && currentStackItem()->causesFosterParenting();
    861 }
    862 
    863 void HTMLConstructionSite::fosterParent(PassRefPtrWillBeRawPtr<Node> node)
    864 {
    865     HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Insert);
    866     findFosterSite(task);
    867     task.child = node;
    868     ASSERT(task.parent);
    869     queueTask(task);
    870 }
    871 
    872 void HTMLConstructionSite::PendingText::trace(Visitor* visitor)
    873 {
    874     visitor->trace(parent);
    875     visitor->trace(nextChild);
    876 }
    877 
    878 
    879 }
    880