Home | History | Annotate | Download | only in parser
      1 /*
      2  * Copyright (C) 2010 Google, Inc. All Rights Reserved.
      3  * Copyright (C) 2011 Apple Inc. All rights reserved.
      4  *
      5  * Redistribution and use in source and binary forms, with or without
      6  * modification, are permitted provided that the following conditions
      7  * are met:
      8  * 1. Redistributions of source code must retain the above copyright
      9  *    notice, this list of conditions and the following disclaimer.
     10  * 2. Redistributions in binary form must reproduce the above copyright
     11  *    notice, this list of conditions and the following disclaimer in the
     12  *    documentation and/or other materials provided with the distribution.
     13  *
     14  * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
     15  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     17  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL GOOGLE INC. OR
     18  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     19  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     20  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     21  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
     22  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     24  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     25  */
     26 
     27 #include "config.h"
     28 #include "HTMLTreeBuilder.h"
     29 
     30 #include "Comment.h"
     31 #include "DOMWindow.h"
     32 #include "DocumentFragment.h"
     33 #include "DocumentType.h"
     34 #include "Frame.h"
     35 #include "HTMLDocument.h"
     36 #include "HTMLDocumentParser.h"
     37 #include "HTMLElementFactory.h"
     38 #include "HTMLFormElement.h"
     39 #include "HTMLHtmlElement.h"
     40 #include "HTMLNames.h"
     41 #include "HTMLParserIdioms.h"
     42 #include "HTMLScriptElement.h"
     43 #include "HTMLToken.h"
     44 #include "HTMLTokenizer.h"
     45 #include "LocalizedStrings.h"
     46 #include "MathMLNames.h"
     47 #include "NotImplemented.h"
     48 #include "SVGNames.h"
     49 #include "ScriptController.h"
     50 #include "Text.h"
     51 #include "XLinkNames.h"
     52 #include "XMLNSNames.h"
     53 #include "XMLNames.h"
     54 #include <wtf/unicode/CharacterNames.h>
     55 
     56 namespace WebCore {
     57 
     58 using namespace HTMLNames;
     59 
     60 static const int uninitializedLineNumberValue = -1;
     61 
     62 static TextPosition1 uninitializedPositionValue1()
     63 {
     64     return TextPosition1(WTF::OneBasedNumber::fromOneBasedInt(-1), WTF::OneBasedNumber::base());
     65 }
     66 
     67 namespace {
     68 
     69 inline bool isHTMLSpaceOrReplacementCharacter(UChar character)
     70 {
     71     return isHTMLSpace(character) || character == replacementCharacter;
     72 }
     73 
     74 inline bool isAllWhitespace(const String& string)
     75 {
     76     return string.isAllSpecialCharacters<isHTMLSpace>();
     77 }
     78 
     79 inline bool isAllWhitespaceOrReplacementCharacters(const String& string)
     80 {
     81     return string.isAllSpecialCharacters<isHTMLSpaceOrReplacementCharacter>();
     82 }
     83 
     84 bool isNumberedHeaderTag(const AtomicString& tagName)
     85 {
     86     return tagName == h1Tag
     87         || tagName == h2Tag
     88         || tagName == h3Tag
     89         || tagName == h4Tag
     90         || tagName == h5Tag
     91         || tagName == h6Tag;
     92 }
     93 
     94 bool isCaptionColOrColgroupTag(const AtomicString& tagName)
     95 {
     96     return tagName == captionTag
     97         || tagName == colTag
     98         || tagName == colgroupTag;
     99 }
    100 
    101 bool isTableCellContextTag(const AtomicString& tagName)
    102 {
    103     return tagName == thTag || tagName == tdTag;
    104 }
    105 
    106 bool isTableBodyContextTag(const AtomicString& tagName)
    107 {
    108     return tagName == tbodyTag
    109         || tagName == tfootTag
    110         || tagName == theadTag;
    111 }
    112 
    113 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#special
    114 bool isSpecialNode(Node* node)
    115 {
    116     if (node->hasTagName(MathMLNames::miTag)
    117         || node->hasTagName(MathMLNames::moTag)
    118         || node->hasTagName(MathMLNames::mnTag)
    119         || node->hasTagName(MathMLNames::msTag)
    120         || node->hasTagName(MathMLNames::mtextTag)
    121         || node->hasTagName(MathMLNames::annotation_xmlTag)
    122         || node->hasTagName(SVGNames::foreignObjectTag)
    123         || node->hasTagName(SVGNames::descTag)
    124         || node->hasTagName(SVGNames::titleTag))
    125         return true;
    126     if (node->nodeType() == Node::DOCUMENT_FRAGMENT_NODE)
    127         return true;
    128     if (!isInHTMLNamespace(node))
    129         return false;
    130     const AtomicString& tagName = node->localName();
    131     return tagName == addressTag
    132         || tagName == appletTag
    133         || tagName == areaTag
    134         || tagName == articleTag
    135         || tagName == asideTag
    136         || tagName == baseTag
    137         || tagName == basefontTag
    138         || tagName == bgsoundTag
    139         || tagName == blockquoteTag
    140         || tagName == bodyTag
    141         || tagName == brTag
    142         || tagName == buttonTag
    143         || tagName == captionTag
    144         || tagName == centerTag
    145         || tagName == colTag
    146         || tagName == colgroupTag
    147         || tagName == commandTag
    148         || tagName == ddTag
    149         || tagName == detailsTag
    150         || tagName == dirTag
    151         || tagName == divTag
    152         || tagName == dlTag
    153         || tagName == dtTag
    154         || tagName == embedTag
    155         || tagName == fieldsetTag
    156         || tagName == figcaptionTag
    157         || tagName == figureTag
    158         || tagName == footerTag
    159         || tagName == formTag
    160         || tagName == frameTag
    161         || tagName == framesetTag
    162         || isNumberedHeaderTag(tagName)
    163         || tagName == headTag
    164         || tagName == headerTag
    165         || tagName == hgroupTag
    166         || tagName == hrTag
    167         || tagName == htmlTag
    168         || tagName == iframeTag
    169         || tagName == imgTag
    170         || tagName == inputTag
    171         || tagName == isindexTag
    172         || tagName == liTag
    173         || tagName == linkTag
    174         || tagName == listingTag
    175         || tagName == marqueeTag
    176         || tagName == menuTag
    177         || tagName == metaTag
    178         || tagName == navTag
    179         || tagName == noembedTag
    180         || tagName == noframesTag
    181         || tagName == noscriptTag
    182         || tagName == objectTag
    183         || tagName == olTag
    184         || tagName == pTag
    185         || tagName == paramTag
    186         || tagName == plaintextTag
    187         || tagName == preTag
    188         || tagName == scriptTag
    189         || tagName == sectionTag
    190         || tagName == selectTag
    191         || tagName == styleTag
    192         || tagName == summaryTag
    193         || tagName == tableTag
    194         || isTableBodyContextTag(tagName)
    195         || tagName == tdTag
    196         || tagName == textareaTag
    197         || tagName == thTag
    198         || tagName == titleTag
    199         || tagName == trTag
    200         || tagName == ulTag
    201         || tagName == wbrTag
    202         || tagName == xmpTag;
    203 }
    204 
    205 bool isNonAnchorNonNobrFormattingTag(const AtomicString& tagName)
    206 {
    207     return tagName == bTag
    208         || tagName == bigTag
    209         || tagName == codeTag
    210         || tagName == emTag
    211         || tagName == fontTag
    212         || tagName == iTag
    213         || tagName == sTag
    214         || tagName == smallTag
    215         || tagName == strikeTag
    216         || tagName == strongTag
    217         || tagName == ttTag
    218         || tagName == uTag;
    219 }
    220 
    221 bool isNonAnchorFormattingTag(const AtomicString& tagName)
    222 {
    223     return tagName == nobrTag
    224         || isNonAnchorNonNobrFormattingTag(tagName);
    225 }
    226 
    227 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#formatting
    228 bool isFormattingTag(const AtomicString& tagName)
    229 {
    230     return tagName == aTag || isNonAnchorFormattingTag(tagName);
    231 }
    232 
    233 HTMLFormElement* closestFormAncestor(Element* element)
    234 {
    235     while (element) {
    236         if (element->hasTagName(formTag))
    237             return static_cast<HTMLFormElement*>(element);
    238         ContainerNode* parent = element->parentNode();
    239         if (!parent || !parent->isElementNode())
    240             return 0;
    241         element = static_cast<Element*>(parent);
    242     }
    243     return 0;
    244 }
    245 
    246 } // namespace
    247 
    248 class HTMLTreeBuilder::ExternalCharacterTokenBuffer {
    249     WTF_MAKE_NONCOPYABLE(ExternalCharacterTokenBuffer);
    250 public:
    251     explicit ExternalCharacterTokenBuffer(AtomicHTMLToken& token)
    252         : m_current(token.characters().data())
    253         , m_end(m_current + token.characters().size())
    254     {
    255         ASSERT(!isEmpty());
    256     }
    257 
    258     explicit ExternalCharacterTokenBuffer(const String& string)
    259         : m_current(string.characters())
    260         , m_end(m_current + string.length())
    261     {
    262         ASSERT(!isEmpty());
    263     }
    264 
    265     ~ExternalCharacterTokenBuffer()
    266     {
    267         ASSERT(isEmpty());
    268     }
    269 
    270     bool isEmpty() const { return m_current == m_end; }
    271 
    272     void skipLeadingWhitespace()
    273     {
    274         skipLeading<isHTMLSpace>();
    275     }
    276 
    277     String takeLeadingWhitespace()
    278     {
    279         return takeLeading<isHTMLSpace>();
    280     }
    281 
    282     String takeLeadingNonWhitespace()
    283     {
    284         return takeLeading<isNotHTMLSpace>();
    285     }
    286 
    287     String takeRemaining()
    288     {
    289         ASSERT(!isEmpty());
    290         const UChar* start = m_current;
    291         m_current = m_end;
    292         return String(start, m_current - start);
    293     }
    294 
    295     void giveRemainingTo(Vector<UChar>& recipient)
    296     {
    297         recipient.append(m_current, m_end - m_current);
    298         m_current = m_end;
    299     }
    300 
    301     String takeRemainingWhitespace()
    302     {
    303         ASSERT(!isEmpty());
    304         Vector<UChar> whitespace;
    305         do {
    306             UChar cc = *m_current++;
    307             if (isHTMLSpace(cc))
    308                 whitespace.append(cc);
    309         } while (m_current < m_end);
    310         // Returning the null string when there aren't any whitespace
    311         // characters is slightly cleaner semantically because we don't want
    312         // to insert a text node (as opposed to inserting an empty text node).
    313         if (whitespace.isEmpty())
    314             return String();
    315         return String::adopt(whitespace);
    316     }
    317 
    318 private:
    319     template<bool characterPredicate(UChar)>
    320     void skipLeading()
    321     {
    322         ASSERT(!isEmpty());
    323         while (characterPredicate(*m_current)) {
    324             if (++m_current == m_end)
    325                 return;
    326         }
    327     }
    328 
    329     template<bool characterPredicate(UChar)>
    330     String takeLeading()
    331     {
    332         ASSERT(!isEmpty());
    333         const UChar* start = m_current;
    334         skipLeading<characterPredicate>();
    335         if (start == m_current)
    336             return String();
    337         return String(start, m_current - start);
    338     }
    339 
    340     const UChar* m_current;
    341     const UChar* m_end;
    342 };
    343 
    344 
    345 HTMLTreeBuilder::HTMLTreeBuilder(HTMLDocumentParser* parser, HTMLDocument* document, bool reportErrors, bool usePreHTML5ParserQuirks)
    346     : m_framesetOk(true)
    347     , m_document(document)
    348     , m_tree(document)
    349     , m_reportErrors(reportErrors)
    350     , m_isPaused(false)
    351     , m_insertionMode(InitialMode)
    352     , m_originalInsertionMode(InitialMode)
    353     , m_parser(parser)
    354     , m_scriptToProcessStartPosition(uninitializedPositionValue1())
    355     , m_lastScriptElementStartPosition(TextPosition0::belowRangePosition())
    356     , m_usePreHTML5ParserQuirks(usePreHTML5ParserQuirks)
    357     , m_hasPendingForeignInsertionModeSteps(false)
    358 {
    359 }
    360 
    361 // FIXME: Member variables should be grouped into self-initializing structs to
    362 // minimize code duplication between these constructors.
    363 HTMLTreeBuilder::HTMLTreeBuilder(HTMLDocumentParser* parser, DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission scriptingPermission, bool usePreHTML5ParserQuirks)
    364     : m_framesetOk(true)
    365     , m_fragmentContext(fragment, contextElement, scriptingPermission)
    366     , m_document(fragment->document())
    367     , m_tree(fragment, scriptingPermission)
    368     , m_reportErrors(false) // FIXME: Why not report errors in fragments?
    369     , m_isPaused(false)
    370     , m_insertionMode(InitialMode)
    371     , m_originalInsertionMode(InitialMode)
    372     , m_parser(parser)
    373     , m_scriptToProcessStartPosition(uninitializedPositionValue1())
    374     , m_lastScriptElementStartPosition(TextPosition0::belowRangePosition())
    375     , m_usePreHTML5ParserQuirks(usePreHTML5ParserQuirks)
    376     , m_hasPendingForeignInsertionModeSteps(false)
    377 {
    378     if (contextElement) {
    379         // Steps 4.2-4.6 of the HTML5 Fragment Case parsing algorithm:
    380         // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#fragment-case
    381         // For efficiency, we skip step 4.2 ("Let root be a new html element with no attributes")
    382         // and instead use the DocumentFragment as a root node.
    383         m_tree.openElements()->pushRootNode(fragment);
    384         resetInsertionModeAppropriately();
    385         m_tree.setForm(closestFormAncestor(contextElement));
    386     }
    387 }
    388 
    389 HTMLTreeBuilder::~HTMLTreeBuilder()
    390 {
    391 }
    392 
    393 void HTMLTreeBuilder::detach()
    394 {
    395     // This call makes little sense in fragment mode, but for consistency
    396     // DocumentParser expects detach() to always be called before it's destroyed.
    397     m_document = 0;
    398     // HTMLConstructionSite might be on the callstack when detach() is called
    399     // otherwise we'd just call m_tree.clear() here instead.
    400     m_tree.detach();
    401 }
    402 
    403 HTMLTreeBuilder::FragmentParsingContext::FragmentParsingContext()
    404     : m_fragment(0)
    405     , m_contextElement(0)
    406     , m_scriptingPermission(FragmentScriptingAllowed)
    407 {
    408 }
    409 
    410 HTMLTreeBuilder::FragmentParsingContext::FragmentParsingContext(DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission scriptingPermission)
    411     : m_fragment(fragment)
    412     , m_contextElement(contextElement)
    413     , m_scriptingPermission(scriptingPermission)
    414 {
    415     ASSERT(!fragment->hasChildNodes());
    416 }
    417 
    418 HTMLTreeBuilder::FragmentParsingContext::~FragmentParsingContext()
    419 {
    420 }
    421 
    422 PassRefPtr<Element> HTMLTreeBuilder::takeScriptToProcess(TextPosition1& scriptStartPosition)
    423 {
    424     // Unpause ourselves, callers may pause us again when processing the script.
    425     // The HTML5 spec is written as though scripts are executed inside the tree
    426     // builder.  We pause the parser to exit the tree builder, and then resume
    427     // before running scripts.
    428     m_isPaused = false;
    429     scriptStartPosition = m_scriptToProcessStartPosition;
    430     m_scriptToProcessStartPosition = uninitializedPositionValue1();
    431     return m_scriptToProcess.release();
    432 }
    433 
    434 void HTMLTreeBuilder::constructTreeFromToken(HTMLToken& rawToken)
    435 {
    436     AtomicHTMLToken token(rawToken);
    437 
    438     // We clear the rawToken in case constructTreeFromAtomicToken
    439     // synchronously re-enters the parser. We don't clear the token immedately
    440     // for Character tokens because the AtomicHTMLToken avoids copying the
    441     // characters by keeping a pointer to the underlying buffer in the
    442     // HTMLToken. Fortuantely, Character tokens can't cause use to re-enter
    443     // the parser.
    444     //
    445     // FIXME: Top clearing the rawToken once we start running the parser off
    446     // the main thread or once we stop allowing synchronous JavaScript
    447     // execution from parseMappedAttribute.
    448     if (rawToken.type() != HTMLToken::Character)
    449         rawToken.clear();
    450 
    451     constructTreeFromAtomicToken(token);
    452 
    453     if (!rawToken.isUninitialized()) {
    454         ASSERT(rawToken.type() == HTMLToken::Character);
    455         rawToken.clear();
    456     }
    457 }
    458 
    459 void HTMLTreeBuilder::constructTreeFromAtomicToken(AtomicHTMLToken& token)
    460 {
    461     processToken(token);
    462 
    463     // Swallowing U+0000 characters isn't in the HTML5 spec, but turning all
    464     // the U+0000 characters into replacement characters has compatibility
    465     // problems.
    466     m_parser->tokenizer()->setForceNullCharacterReplacement(m_insertionMode == TextMode || m_insertionMode == InForeignContentMode);
    467     m_parser->tokenizer()->setShouldAllowCDATA(m_insertionMode == InForeignContentMode && !isInHTMLNamespace(m_tree.currentNode()));
    468 }
    469 
    470 void HTMLTreeBuilder::processToken(AtomicHTMLToken& token)
    471 {
    472     switch (token.type()) {
    473     case HTMLToken::Uninitialized:
    474         ASSERT_NOT_REACHED();
    475         break;
    476     case HTMLToken::DOCTYPE:
    477         processDoctypeToken(token);
    478         break;
    479     case HTMLToken::StartTag:
    480         processStartTag(token);
    481         break;
    482     case HTMLToken::EndTag:
    483         processEndTag(token);
    484         break;
    485     case HTMLToken::Comment:
    486         processComment(token);
    487         return;
    488     case HTMLToken::Character:
    489         processCharacter(token);
    490         break;
    491     case HTMLToken::EndOfFile:
    492         processEndOfFile(token);
    493         break;
    494     }
    495 }
    496 
    497 void HTMLTreeBuilder::processDoctypeToken(AtomicHTMLToken& token)
    498 {
    499     ASSERT(token.type() == HTMLToken::DOCTYPE);
    500     if (m_insertionMode == InitialMode) {
    501         m_tree.insertDoctype(token);
    502         setInsertionMode(BeforeHTMLMode);
    503         return;
    504     }
    505     if (m_insertionMode == InTableTextMode) {
    506         defaultForInTableText();
    507         processDoctypeToken(token);
    508         return;
    509     }
    510     parseError(token);
    511 }
    512 
    513 void HTMLTreeBuilder::processFakeStartTag(const QualifiedName& tagName, PassRefPtr<NamedNodeMap> attributes)
    514 {
    515     // FIXME: We'll need a fancier conversion than just "localName" for SVG/MathML tags.
    516     AtomicHTMLToken fakeToken(HTMLToken::StartTag, tagName.localName(), attributes);
    517     processStartTag(fakeToken);
    518 }
    519 
    520 void HTMLTreeBuilder::processFakeEndTag(const QualifiedName& tagName)
    521 {
    522     // FIXME: We'll need a fancier conversion than just "localName" for SVG/MathML tags.
    523     AtomicHTMLToken fakeToken(HTMLToken::EndTag, tagName.localName());
    524     processEndTag(fakeToken);
    525 }
    526 
    527 void HTMLTreeBuilder::processFakeCharacters(const String& characters)
    528 {
    529     ASSERT(!characters.isEmpty());
    530     ExternalCharacterTokenBuffer buffer(characters);
    531     processCharacterBuffer(buffer);
    532 }
    533 
    534 void HTMLTreeBuilder::processFakePEndTagIfPInButtonScope()
    535 {
    536     if (!m_tree.openElements()->inButtonScope(pTag.localName()))
    537         return;
    538     AtomicHTMLToken endP(HTMLToken::EndTag, pTag.localName());
    539     processEndTag(endP);
    540 }
    541 
    542 PassRefPtr<NamedNodeMap> HTMLTreeBuilder::attributesForIsindexInput(AtomicHTMLToken& token)
    543 {
    544     RefPtr<NamedNodeMap> attributes = token.takeAtributes();
    545     if (!attributes)
    546         attributes = NamedNodeMap::create();
    547     else {
    548         attributes->removeAttribute(nameAttr);
    549         attributes->removeAttribute(actionAttr);
    550         attributes->removeAttribute(promptAttr);
    551     }
    552 
    553     RefPtr<Attribute> mappedAttribute = Attribute::createMapped(nameAttr, isindexTag.localName());
    554     attributes->insertAttribute(mappedAttribute.release(), false);
    555     return attributes.release();
    556 }
    557 
    558 void HTMLTreeBuilder::processIsindexStartTagForInBody(AtomicHTMLToken& token)
    559 {
    560     ASSERT(token.type() == HTMLToken::StartTag);
    561     ASSERT(token.name() == isindexTag);
    562     parseError(token);
    563     if (m_tree.form())
    564         return;
    565     notImplemented(); // Acknowledge self-closing flag
    566     processFakeStartTag(formTag);
    567     RefPtr<Attribute> actionAttribute = token.getAttributeItem(actionAttr);
    568     if (actionAttribute) {
    569         ASSERT(m_tree.currentElement()->hasTagName(formTag));
    570         m_tree.currentElement()->setAttribute(actionAttr, actionAttribute->value());
    571     }
    572     processFakeStartTag(hrTag);
    573     processFakeStartTag(labelTag);
    574     RefPtr<Attribute> promptAttribute = token.getAttributeItem(promptAttr);
    575     if (promptAttribute)
    576         processFakeCharacters(promptAttribute->value());
    577     else
    578         processFakeCharacters(searchableIndexIntroduction());
    579     processFakeStartTag(inputTag, attributesForIsindexInput(token));
    580     notImplemented(); // This second set of characters may be needed by non-english locales.
    581     processFakeEndTag(labelTag);
    582     processFakeStartTag(hrTag);
    583     processFakeEndTag(formTag);
    584 }
    585 
    586 namespace {
    587 
    588 bool isLi(const ContainerNode* element)
    589 {
    590     return element->hasTagName(liTag);
    591 }
    592 
    593 bool isDdOrDt(const ContainerNode* element)
    594 {
    595     return element->hasTagName(ddTag)
    596         || element->hasTagName(dtTag);
    597 }
    598 
    599 }
    600 
    601 template <bool shouldClose(const ContainerNode*)>
    602 void HTMLTreeBuilder::processCloseWhenNestedTag(AtomicHTMLToken& token)
    603 {
    604     m_framesetOk = false;
    605     HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord();
    606     while (1) {
    607         ContainerNode* node = nodeRecord->node();
    608         if (shouldClose(node)) {
    609             ASSERT(node->isElementNode());
    610             processFakeEndTag(toElement(node)->tagQName());
    611             break;
    612         }
    613         if (isSpecialNode(node) && !node->hasTagName(addressTag) && !node->hasTagName(divTag) && !node->hasTagName(pTag))
    614             break;
    615         nodeRecord = nodeRecord->next();
    616     }
    617     processFakePEndTagIfPInButtonScope();
    618     m_tree.insertHTMLElement(token);
    619 }
    620 
    621 namespace {
    622 
    623 typedef HashMap<AtomicString, QualifiedName> PrefixedNameToQualifiedNameMap;
    624 
    625 void mapLoweredLocalNameToName(PrefixedNameToQualifiedNameMap* map, QualifiedName** names, size_t length)
    626 {
    627     for (size_t i = 0; i < length; ++i) {
    628         const QualifiedName& name = *names[i];
    629         const AtomicString& localName = name.localName();
    630         AtomicString loweredLocalName = localName.lower();
    631         if (loweredLocalName != localName)
    632             map->add(loweredLocalName, name);
    633     }
    634 }
    635 
    636 void adjustSVGTagNameCase(AtomicHTMLToken& token)
    637 {
    638     static PrefixedNameToQualifiedNameMap* caseMap = 0;
    639     if (!caseMap) {
    640         caseMap = new PrefixedNameToQualifiedNameMap;
    641         size_t length = 0;
    642         QualifiedName** svgTags = SVGNames::getSVGTags(&length);
    643         mapLoweredLocalNameToName(caseMap, svgTags, length);
    644     }
    645 
    646     const QualifiedName& casedName = caseMap->get(token.name());
    647     if (casedName.localName().isNull())
    648         return;
    649     token.setName(casedName.localName());
    650 }
    651 
    652 template<QualifiedName** getAttrs(size_t* length)>
    653 void adjustAttributes(AtomicHTMLToken& token)
    654 {
    655     static PrefixedNameToQualifiedNameMap* caseMap = 0;
    656     if (!caseMap) {
    657         caseMap = new PrefixedNameToQualifiedNameMap;
    658         size_t length = 0;
    659         QualifiedName** attrs = getAttrs(&length);
    660         mapLoweredLocalNameToName(caseMap, attrs, length);
    661     }
    662 
    663     NamedNodeMap* attributes = token.attributes();
    664     if (!attributes)
    665         return;
    666 
    667     for (unsigned x = 0; x < attributes->length(); ++x) {
    668         Attribute* attribute = attributes->attributeItem(x);
    669         const QualifiedName& casedName = caseMap->get(attribute->localName());
    670         if (!casedName.localName().isNull())
    671             attribute->parserSetName(casedName);
    672     }
    673 }
    674 
    675 void adjustSVGAttributes(AtomicHTMLToken& token)
    676 {
    677     adjustAttributes<SVGNames::getSVGAttrs>(token);
    678 }
    679 
    680 void adjustMathMLAttributes(AtomicHTMLToken& token)
    681 {
    682     adjustAttributes<MathMLNames::getMathMLAttrs>(token);
    683 }
    684 
    685 void addNamesWithPrefix(PrefixedNameToQualifiedNameMap* map, const AtomicString& prefix, QualifiedName** names, size_t length)
    686 {
    687     for (size_t i = 0; i < length; ++i) {
    688         QualifiedName* name = names[i];
    689         const AtomicString& localName = name->localName();
    690         AtomicString prefixColonLocalName(prefix + ":" + localName);
    691         QualifiedName nameWithPrefix(prefix, localName, name->namespaceURI());
    692         map->add(prefixColonLocalName, nameWithPrefix);
    693     }
    694 }
    695 
    696 void adjustForeignAttributes(AtomicHTMLToken& token)
    697 {
    698     static PrefixedNameToQualifiedNameMap* map = 0;
    699     if (!map) {
    700         map = new PrefixedNameToQualifiedNameMap;
    701         size_t length = 0;
    702         QualifiedName** attrs = XLinkNames::getXLinkAttrs(&length);
    703         addNamesWithPrefix(map, "xlink", attrs, length);
    704 
    705         attrs = XMLNames::getXMLAttrs(&length);
    706         addNamesWithPrefix(map, "xml", attrs, length);
    707 
    708         map->add("xmlns", XMLNSNames::xmlnsAttr);
    709         map->add("xmlns:xlink", QualifiedName("xmlns", "xlink", XMLNSNames::xmlnsNamespaceURI));
    710     }
    711 
    712     NamedNodeMap* attributes = token.attributes();
    713     if (!attributes)
    714         return;
    715 
    716     for (unsigned x = 0; x < attributes->length(); ++x) {
    717         Attribute* attribute = attributes->attributeItem(x);
    718         const QualifiedName& name = map->get(attribute->localName());
    719         if (!name.localName().isNull())
    720             attribute->parserSetName(name);
    721     }
    722 }
    723 
    724 }
    725 
    726 void HTMLTreeBuilder::processStartTagForInBody(AtomicHTMLToken& token)
    727 {
    728     ASSERT(token.type() == HTMLToken::StartTag);
    729     if (token.name() == htmlTag) {
    730         m_tree.insertHTMLHtmlStartTagInBody(token);
    731         return;
    732     }
    733     if (token.name() == baseTag
    734         || token.name() == basefontTag
    735         || token.name() == bgsoundTag
    736         || token.name() == commandTag
    737         || token.name() == linkTag
    738         || token.name() == metaTag
    739         || token.name() == noframesTag
    740         || token.name() == scriptTag
    741         || token.name() == styleTag
    742         || token.name() == titleTag) {
    743         bool didProcess = processStartTagForInHead(token);
    744         ASSERT_UNUSED(didProcess, didProcess);
    745         return;
    746     }
    747     if (token.name() == bodyTag) {
    748         if (!m_tree.openElements()->secondElementIsHTMLBodyElement() || m_tree.openElements()->hasOnlyOneElement()) {
    749             ASSERT(isParsingFragment());
    750             return;
    751         }
    752         m_tree.insertHTMLBodyStartTagInBody(token);
    753         return;
    754     }
    755     if (token.name() == framesetTag) {
    756         parseError(token);
    757         if (!m_tree.openElements()->secondElementIsHTMLBodyElement() || m_tree.openElements()->hasOnlyOneElement()) {
    758             ASSERT(isParsingFragment());
    759             return;
    760         }
    761         if (!m_framesetOk)
    762             return;
    763         ExceptionCode ec = 0;
    764         m_tree.openElements()->bodyElement()->remove(ec);
    765         ASSERT(!ec);
    766         m_tree.openElements()->popUntil(m_tree.openElements()->bodyElement());
    767         m_tree.openElements()->popHTMLBodyElement();
    768         ASSERT(m_tree.openElements()->top() == m_tree.openElements()->htmlElement());
    769         m_tree.insertHTMLElement(token);
    770         setInsertionMode(InFramesetMode);
    771         return;
    772     }
    773     if (token.name() == addressTag
    774         || token.name() == articleTag
    775         || token.name() == asideTag
    776         || token.name() == blockquoteTag
    777         || token.name() == centerTag
    778         || token.name() == detailsTag
    779         || token.name() == dirTag
    780         || token.name() == divTag
    781         || token.name() == dlTag
    782         || token.name() == fieldsetTag
    783         || token.name() == figcaptionTag
    784         || token.name() == figureTag
    785         || token.name() == footerTag
    786         || token.name() == headerTag
    787         || token.name() == hgroupTag
    788         || token.name() == menuTag
    789         || token.name() == navTag
    790         || token.name() == olTag
    791         || token.name() == pTag
    792         || token.name() == sectionTag
    793         || token.name() == summaryTag
    794         || token.name() == ulTag) {
    795         processFakePEndTagIfPInButtonScope();
    796         m_tree.insertHTMLElement(token);
    797         return;
    798     }
    799     if (isNumberedHeaderTag(token.name())) {
    800         processFakePEndTagIfPInButtonScope();
    801         if (isNumberedHeaderTag(m_tree.currentNode()->localName())) {
    802             parseError(token);
    803             m_tree.openElements()->pop();
    804         }
    805         m_tree.insertHTMLElement(token);
    806         return;
    807     }
    808     if (token.name() == preTag || token.name() == listingTag) {
    809         processFakePEndTagIfPInButtonScope();
    810         m_tree.insertHTMLElement(token);
    811         m_parser->tokenizer()->setSkipLeadingNewLineForListing(true);
    812         m_framesetOk = false;
    813         return;
    814     }
    815     if (token.name() == formTag) {
    816         if (m_tree.form()) {
    817             parseError(token);
    818             return;
    819         }
    820         processFakePEndTagIfPInButtonScope();
    821         m_tree.insertHTMLFormElement(token);
    822         return;
    823     }
    824     if (token.name() == liTag) {
    825         processCloseWhenNestedTag<isLi>(token);
    826         return;
    827     }
    828     if (token.name() == ddTag || token.name() == dtTag) {
    829         processCloseWhenNestedTag<isDdOrDt>(token);
    830         return;
    831     }
    832     if (token.name() == plaintextTag) {
    833         processFakePEndTagIfPInButtonScope();
    834         m_tree.insertHTMLElement(token);
    835         m_parser->tokenizer()->setState(HTMLTokenizer::PLAINTEXTState);
    836         return;
    837     }
    838     if (token.name() == buttonTag) {
    839         if (m_tree.openElements()->inScope(buttonTag)) {
    840             parseError(token);
    841             processFakeEndTag(buttonTag);
    842             reprocessStartTag(token); // FIXME: Could we just fall through here?
    843             return;
    844         }
    845         m_tree.reconstructTheActiveFormattingElements();
    846         m_tree.insertHTMLElement(token);
    847         m_framesetOk = false;
    848         return;
    849     }
    850     if (token.name() == aTag) {
    851         Element* activeATag = m_tree.activeFormattingElements()->closestElementInScopeWithName(aTag.localName());
    852         if (activeATag) {
    853             parseError(token);
    854             processFakeEndTag(aTag);
    855             m_tree.activeFormattingElements()->remove(activeATag);
    856             if (m_tree.openElements()->contains(activeATag))
    857                 m_tree.openElements()->remove(activeATag);
    858         }
    859         m_tree.reconstructTheActiveFormattingElements();
    860         m_tree.insertFormattingElement(token);
    861         return;
    862     }
    863     if (isNonAnchorNonNobrFormattingTag(token.name())) {
    864         m_tree.reconstructTheActiveFormattingElements();
    865         m_tree.insertFormattingElement(token);
    866         return;
    867     }
    868     if (token.name() == nobrTag) {
    869         m_tree.reconstructTheActiveFormattingElements();
    870         if (m_tree.openElements()->inScope(nobrTag)) {
    871             parseError(token);
    872             processFakeEndTag(nobrTag);
    873             m_tree.reconstructTheActiveFormattingElements();
    874         }
    875         m_tree.insertFormattingElement(token);
    876         return;
    877     }
    878     if (token.name() == appletTag
    879         || token.name() == marqueeTag
    880         || token.name() == objectTag) {
    881         m_tree.reconstructTheActiveFormattingElements();
    882         m_tree.insertHTMLElement(token);
    883         m_tree.activeFormattingElements()->appendMarker();
    884         m_framesetOk = false;
    885         return;
    886     }
    887     if (token.name() == tableTag) {
    888         if (!m_document->inQuirksMode() && m_tree.openElements()->inButtonScope(pTag))
    889             processFakeEndTag(pTag);
    890         m_tree.insertHTMLElement(token);
    891         m_framesetOk = false;
    892         setInsertionMode(InTableMode);
    893         return;
    894     }
    895     if (token.name() == imageTag) {
    896         parseError(token);
    897         // Apparently we're not supposed to ask.
    898         token.setName(imgTag.localName());
    899         prepareToReprocessToken();
    900         // Note the fall through to the imgTag handling below!
    901     }
    902     if (token.name() == areaTag
    903         || token.name() == brTag
    904         || token.name() == embedTag
    905         || token.name() == imgTag
    906         || token.name() == keygenTag
    907         || token.name() == wbrTag) {
    908         m_tree.reconstructTheActiveFormattingElements();
    909         m_tree.insertSelfClosingHTMLElement(token);
    910         m_framesetOk = false;
    911         return;
    912     }
    913     if (token.name() == inputTag) {
    914         RefPtr<Attribute> typeAttribute = token.getAttributeItem(typeAttr);
    915         m_tree.reconstructTheActiveFormattingElements();
    916         m_tree.insertSelfClosingHTMLElement(token);
    917         if (!typeAttribute || !equalIgnoringCase(typeAttribute->value(), "hidden"))
    918             m_framesetOk = false;
    919         return;
    920     }
    921     if (token.name() == paramTag
    922         || token.name() == sourceTag
    923         || token.name() == trackTag) {
    924         m_tree.insertSelfClosingHTMLElement(token);
    925         return;
    926     }
    927     if (token.name() == hrTag) {
    928         processFakePEndTagIfPInButtonScope();
    929         m_tree.insertSelfClosingHTMLElement(token);
    930         m_framesetOk = false;
    931         return;
    932     }
    933     if (token.name() == isindexTag) {
    934         processIsindexStartTagForInBody(token);
    935         return;
    936     }
    937     if (token.name() == textareaTag) {
    938         m_tree.insertHTMLElement(token);
    939         m_parser->tokenizer()->setSkipLeadingNewLineForListing(true);
    940         m_parser->tokenizer()->setState(HTMLTokenizer::RCDATAState);
    941         m_originalInsertionMode = m_insertionMode;
    942         m_framesetOk = false;
    943         setInsertionMode(TextMode);
    944         return;
    945     }
    946     if (token.name() == xmpTag) {
    947         processFakePEndTagIfPInButtonScope();
    948         m_tree.reconstructTheActiveFormattingElements();
    949         m_framesetOk = false;
    950         processGenericRawTextStartTag(token);
    951         return;
    952     }
    953     if (token.name() == iframeTag) {
    954         m_framesetOk = false;
    955         processGenericRawTextStartTag(token);
    956         return;
    957     }
    958     if (token.name() == noembedTag && pluginsEnabled(m_document->frame())) {
    959         processGenericRawTextStartTag(token);
    960         return;
    961     }
    962     if (token.name() == noscriptTag && scriptEnabled(m_document->frame())) {
    963         processGenericRawTextStartTag(token);
    964         return;
    965     }
    966     if (token.name() == selectTag) {
    967         m_tree.reconstructTheActiveFormattingElements();
    968         m_tree.insertHTMLElement(token);
    969         m_framesetOk = false;
    970         if (m_insertionMode == InTableMode
    971              || m_insertionMode == InCaptionMode
    972              || m_insertionMode == InColumnGroupMode
    973              || m_insertionMode == InTableBodyMode
    974              || m_insertionMode == InRowMode
    975              || m_insertionMode == InCellMode)
    976             setInsertionMode(InSelectInTableMode);
    977         else
    978             setInsertionMode(InSelectMode);
    979         return;
    980     }
    981     if (token.name() == optgroupTag || token.name() == optionTag) {
    982         if (m_tree.openElements()->inScope(optionTag.localName())) {
    983             AtomicHTMLToken endOption(HTMLToken::EndTag, optionTag.localName());
    984             processEndTag(endOption);
    985         }
    986         m_tree.reconstructTheActiveFormattingElements();
    987         m_tree.insertHTMLElement(token);
    988         return;
    989     }
    990     if (token.name() == rpTag || token.name() == rtTag) {
    991         if (m_tree.openElements()->inScope(rubyTag.localName())) {
    992             m_tree.generateImpliedEndTags();
    993             if (!m_tree.currentNode()->hasTagName(rubyTag)) {
    994                 parseError(token);
    995                 m_tree.openElements()->popUntil(rubyTag.localName());
    996             }
    997         }
    998         m_tree.insertHTMLElement(token);
    999         return;
   1000     }
   1001     if (token.name() == MathMLNames::mathTag.localName()) {
   1002         m_tree.reconstructTheActiveFormattingElements();
   1003         adjustMathMLAttributes(token);
   1004         adjustForeignAttributes(token);
   1005         m_tree.insertForeignElement(token, MathMLNames::mathmlNamespaceURI);
   1006         if (m_insertionMode != InForeignContentMode && !token.selfClosing())
   1007             setInsertionMode(InForeignContentMode);
   1008         return;
   1009     }
   1010     if (token.name() == SVGNames::svgTag.localName()) {
   1011         m_tree.reconstructTheActiveFormattingElements();
   1012         adjustSVGAttributes(token);
   1013         adjustForeignAttributes(token);
   1014         m_tree.insertForeignElement(token, SVGNames::svgNamespaceURI);
   1015         if (m_insertionMode != InForeignContentMode && !token.selfClosing())
   1016             setInsertionMode(InForeignContentMode);
   1017         return;
   1018     }
   1019     if (isCaptionColOrColgroupTag(token.name())
   1020         || token.name() == frameTag
   1021         || token.name() == headTag
   1022         || isTableBodyContextTag(token.name())
   1023         || isTableCellContextTag(token.name())
   1024         || token.name() == trTag) {
   1025         parseError(token);
   1026         return;
   1027     }
   1028     m_tree.reconstructTheActiveFormattingElements();
   1029     m_tree.insertHTMLElement(token);
   1030 }
   1031 
   1032 bool HTMLTreeBuilder::processColgroupEndTagForInColumnGroup()
   1033 {
   1034     if (m_tree.currentNode() == m_tree.openElements()->rootNode()) {
   1035         ASSERT(isParsingFragment());
   1036         // FIXME: parse error
   1037         return false;
   1038     }
   1039     m_tree.openElements()->pop();
   1040     setInsertionMode(InTableMode);
   1041     return true;
   1042 }
   1043 
   1044 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#close-the-cell
   1045 void HTMLTreeBuilder::closeTheCell()
   1046 {
   1047     ASSERT(insertionMode() == InCellMode);
   1048     if (m_tree.openElements()->inTableScope(tdTag)) {
   1049         ASSERT(!m_tree.openElements()->inTableScope(thTag));
   1050         processFakeEndTag(tdTag);
   1051         return;
   1052     }
   1053     ASSERT(m_tree.openElements()->inTableScope(thTag));
   1054     processFakeEndTag(thTag);
   1055     ASSERT(insertionMode() == InRowMode);
   1056 }
   1057 
   1058 void HTMLTreeBuilder::processStartTagForInTable(AtomicHTMLToken& token)
   1059 {
   1060     ASSERT(token.type() == HTMLToken::StartTag);
   1061     if (token.name() == captionTag) {
   1062         m_tree.openElements()->popUntilTableScopeMarker();
   1063         m_tree.activeFormattingElements()->appendMarker();
   1064         m_tree.insertHTMLElement(token);
   1065         setInsertionMode(InCaptionMode);
   1066         return;
   1067     }
   1068     if (token.name() == colgroupTag) {
   1069         m_tree.openElements()->popUntilTableScopeMarker();
   1070         m_tree.insertHTMLElement(token);
   1071         setInsertionMode(InColumnGroupMode);
   1072         return;
   1073     }
   1074     if (token.name() == colTag) {
   1075         processFakeStartTag(colgroupTag);
   1076         ASSERT(InColumnGroupMode);
   1077         reprocessStartTag(token);
   1078         return;
   1079     }
   1080     if (isTableBodyContextTag(token.name())) {
   1081         m_tree.openElements()->popUntilTableScopeMarker();
   1082         m_tree.insertHTMLElement(token);
   1083         setInsertionMode(InTableBodyMode);
   1084         return;
   1085     }
   1086     if (isTableCellContextTag(token.name())
   1087         || token.name() == trTag) {
   1088         processFakeStartTag(tbodyTag);
   1089         ASSERT(insertionMode() == InTableBodyMode);
   1090         reprocessStartTag(token);
   1091         return;
   1092     }
   1093     if (token.name() == tableTag) {
   1094         parseError(token);
   1095         if (!processTableEndTagForInTable()) {
   1096             ASSERT(isParsingFragment());
   1097             return;
   1098         }
   1099         reprocessStartTag(token);
   1100         return;
   1101     }
   1102     if (token.name() == styleTag || token.name() == scriptTag) {
   1103         processStartTagForInHead(token);
   1104         return;
   1105     }
   1106     if (token.name() == inputTag) {
   1107         Attribute* typeAttribute = token.getAttributeItem(typeAttr);
   1108         if (typeAttribute && equalIgnoringCase(typeAttribute->value(), "hidden")) {
   1109             parseError(token);
   1110             m_tree.insertSelfClosingHTMLElement(token);
   1111             return;
   1112         }
   1113         // Fall through to "anything else" case.
   1114     }
   1115     if (token.name() == formTag) {
   1116         parseError(token);
   1117         if (m_tree.form())
   1118             return;
   1119         m_tree.insertHTMLFormElement(token, true);
   1120         m_tree.openElements()->pop();
   1121         return;
   1122     }
   1123     parseError(token);
   1124     HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
   1125     processStartTagForInBody(token);
   1126 }
   1127 
   1128 namespace {
   1129 
   1130 bool shouldProcessForeignContentUsingInBodyInsertionMode(AtomicHTMLToken& token, ContainerNode* currentElement)
   1131 {
   1132     ASSERT(token.type() == HTMLToken::StartTag);
   1133     if (currentElement->hasTagName(MathMLNames::miTag)
   1134         || currentElement->hasTagName(MathMLNames::moTag)
   1135         || currentElement->hasTagName(MathMLNames::mnTag)
   1136         || currentElement->hasTagName(MathMLNames::msTag)
   1137         || currentElement->hasTagName(MathMLNames::mtextTag)) {
   1138         return token.name() != MathMLNames::mglyphTag
   1139             && token.name() != MathMLNames::malignmarkTag;
   1140     }
   1141     if (currentElement->hasTagName(MathMLNames::annotation_xmlTag))
   1142         return token.name() == SVGNames::svgTag;
   1143     if (currentElement->hasTagName(SVGNames::foreignObjectTag)
   1144         || currentElement->hasTagName(SVGNames::descTag)
   1145         || currentElement->hasTagName(SVGNames::titleTag))
   1146         return true;
   1147     return isInHTMLNamespace(currentElement);
   1148 }
   1149 
   1150 }
   1151 
   1152 void HTMLTreeBuilder::processStartTag(AtomicHTMLToken& token)
   1153 {
   1154     ASSERT(token.type() == HTMLToken::StartTag);
   1155     switch (insertionMode()) {
   1156     case InitialMode:
   1157         ASSERT(insertionMode() == InitialMode);
   1158         defaultForInitial();
   1159         // Fall through.
   1160     case BeforeHTMLMode:
   1161         ASSERT(insertionMode() == BeforeHTMLMode);
   1162         if (token.name() == htmlTag) {
   1163             m_tree.insertHTMLHtmlStartTagBeforeHTML(token);
   1164             setInsertionMode(BeforeHeadMode);
   1165             return;
   1166         }
   1167         defaultForBeforeHTML();
   1168         // Fall through.
   1169     case BeforeHeadMode:
   1170         ASSERT(insertionMode() == BeforeHeadMode);
   1171         if (token.name() == htmlTag) {
   1172             m_tree.insertHTMLHtmlStartTagInBody(token);
   1173             return;
   1174         }
   1175         if (token.name() == headTag) {
   1176             m_tree.insertHTMLHeadElement(token);
   1177             setInsertionMode(InHeadMode);
   1178             return;
   1179         }
   1180         defaultForBeforeHead();
   1181         // Fall through.
   1182     case InHeadMode:
   1183         ASSERT(insertionMode() == InHeadMode);
   1184         if (processStartTagForInHead(token))
   1185             return;
   1186         defaultForInHead();
   1187         // Fall through.
   1188     case AfterHeadMode:
   1189         ASSERT(insertionMode() == AfterHeadMode);
   1190         if (token.name() == htmlTag) {
   1191             m_tree.insertHTMLHtmlStartTagInBody(token);
   1192             return;
   1193         }
   1194         if (token.name() == bodyTag) {
   1195             m_framesetOk = false;
   1196             m_tree.insertHTMLBodyElement(token);
   1197             setInsertionMode(InBodyMode);
   1198             return;
   1199         }
   1200         if (token.name() == framesetTag) {
   1201             m_tree.insertHTMLElement(token);
   1202             setInsertionMode(InFramesetMode);
   1203             return;
   1204         }
   1205         if (token.name() == baseTag
   1206             || token.name() == basefontTag
   1207             || token.name() == bgsoundTag
   1208             || token.name() == linkTag
   1209             || token.name() == metaTag
   1210             || token.name() == noframesTag
   1211             || token.name() == scriptTag
   1212             || token.name() == styleTag
   1213             || token.name() == titleTag) {
   1214             parseError(token);
   1215             ASSERT(m_tree.head());
   1216             m_tree.openElements()->pushHTMLHeadElement(m_tree.head());
   1217             processStartTagForInHead(token);
   1218             m_tree.openElements()->removeHTMLHeadElement(m_tree.head());
   1219             return;
   1220         }
   1221         if (token.name() == headTag) {
   1222             parseError(token);
   1223             return;
   1224         }
   1225         defaultForAfterHead();
   1226         // Fall through
   1227     case InBodyMode:
   1228         ASSERT(insertionMode() == InBodyMode);
   1229         processStartTagForInBody(token);
   1230         break;
   1231     case InTableMode:
   1232         ASSERT(insertionMode() == InTableMode);
   1233         processStartTagForInTable(token);
   1234         break;
   1235     case InCaptionMode:
   1236         ASSERT(insertionMode() == InCaptionMode);
   1237         if (isCaptionColOrColgroupTag(token.name())
   1238             || isTableBodyContextTag(token.name())
   1239             || isTableCellContextTag(token.name())
   1240             || token.name() == trTag) {
   1241             parseError(token);
   1242             if (!processCaptionEndTagForInCaption()) {
   1243                 ASSERT(isParsingFragment());
   1244                 return;
   1245             }
   1246             reprocessStartTag(token);
   1247             return;
   1248         }
   1249         processStartTagForInBody(token);
   1250         break;
   1251     case InColumnGroupMode:
   1252         ASSERT(insertionMode() == InColumnGroupMode);
   1253         if (token.name() == htmlTag) {
   1254             m_tree.insertHTMLHtmlStartTagInBody(token);
   1255             return;
   1256         }
   1257         if (token.name() == colTag) {
   1258             m_tree.insertSelfClosingHTMLElement(token);
   1259             return;
   1260         }
   1261         if (!processColgroupEndTagForInColumnGroup()) {
   1262             ASSERT(isParsingFragment());
   1263             return;
   1264         }
   1265         reprocessStartTag(token);
   1266         break;
   1267     case InTableBodyMode:
   1268         ASSERT(insertionMode() == InTableBodyMode);
   1269         if (token.name() == trTag) {
   1270             m_tree.openElements()->popUntilTableBodyScopeMarker(); // How is there ever anything to pop?
   1271             m_tree.insertHTMLElement(token);
   1272             setInsertionMode(InRowMode);
   1273             return;
   1274         }
   1275         if (isTableCellContextTag(token.name())) {
   1276             parseError(token);
   1277             processFakeStartTag(trTag);
   1278             ASSERT(insertionMode() == InRowMode);
   1279             reprocessStartTag(token);
   1280             return;
   1281         }
   1282         if (isCaptionColOrColgroupTag(token.name()) || isTableBodyContextTag(token.name())) {
   1283             // FIXME: This is slow.
   1284             if (!m_tree.openElements()->inTableScope(tbodyTag.localName()) && !m_tree.openElements()->inTableScope(theadTag.localName()) && !m_tree.openElements()->inTableScope(tfootTag.localName())) {
   1285                 ASSERT(isParsingFragment());
   1286                 parseError(token);
   1287                 return;
   1288             }
   1289             m_tree.openElements()->popUntilTableBodyScopeMarker();
   1290             ASSERT(isTableBodyContextTag(m_tree.currentElement()->localName()));
   1291             processFakeEndTag(m_tree.currentElement()->tagQName());
   1292             reprocessStartTag(token);
   1293             return;
   1294         }
   1295         processStartTagForInTable(token);
   1296         break;
   1297     case InRowMode:
   1298         ASSERT(insertionMode() == InRowMode);
   1299         if (isTableCellContextTag(token.name())) {
   1300             m_tree.openElements()->popUntilTableRowScopeMarker();
   1301             m_tree.insertHTMLElement(token);
   1302             setInsertionMode(InCellMode);
   1303             m_tree.activeFormattingElements()->appendMarker();
   1304             return;
   1305         }
   1306         if (token.name() == trTag
   1307             || isCaptionColOrColgroupTag(token.name())
   1308             || isTableBodyContextTag(token.name())) {
   1309             if (!processTrEndTagForInRow()) {
   1310                 ASSERT(isParsingFragment());
   1311                 return;
   1312             }
   1313             ASSERT(insertionMode() == InTableBodyMode);
   1314             reprocessStartTag(token);
   1315             return;
   1316         }
   1317         processStartTagForInTable(token);
   1318         break;
   1319     case InCellMode:
   1320         ASSERT(insertionMode() == InCellMode);
   1321         if (isCaptionColOrColgroupTag(token.name())
   1322             || isTableCellContextTag(token.name())
   1323             || token.name() == trTag
   1324             || isTableBodyContextTag(token.name())) {
   1325             // FIXME: This could be more efficient.
   1326             if (!m_tree.openElements()->inTableScope(tdTag) && !m_tree.openElements()->inTableScope(thTag)) {
   1327                 ASSERT(isParsingFragment());
   1328                 parseError(token);
   1329                 return;
   1330             }
   1331             closeTheCell();
   1332             reprocessStartTag(token);
   1333             return;
   1334         }
   1335         processStartTagForInBody(token);
   1336         break;
   1337     case AfterBodyMode:
   1338     case AfterAfterBodyMode:
   1339         ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
   1340         if (token.name() == htmlTag) {
   1341             m_tree.insertHTMLHtmlStartTagInBody(token);
   1342             return;
   1343         }
   1344         setInsertionMode(InBodyMode);
   1345         reprocessStartTag(token);
   1346         break;
   1347     case InHeadNoscriptMode:
   1348         ASSERT(insertionMode() == InHeadNoscriptMode);
   1349         if (token.name() == htmlTag) {
   1350             m_tree.insertHTMLHtmlStartTagInBody(token);
   1351             return;
   1352         }
   1353         if (token.name() == basefontTag
   1354             || token.name() == bgsoundTag
   1355             || token.name() == linkTag
   1356             || token.name() == metaTag
   1357             || token.name() == noframesTag
   1358             || token.name() == styleTag) {
   1359             bool didProcess = processStartTagForInHead(token);
   1360             ASSERT_UNUSED(didProcess, didProcess);
   1361             return;
   1362         }
   1363         if (token.name() == htmlTag || token.name() == noscriptTag) {
   1364             parseError(token);
   1365             return;
   1366         }
   1367         defaultForInHeadNoscript();
   1368         processToken(token);
   1369         break;
   1370     case InFramesetMode:
   1371         ASSERT(insertionMode() == InFramesetMode);
   1372         if (token.name() == htmlTag) {
   1373             m_tree.insertHTMLHtmlStartTagInBody(token);
   1374             return;
   1375         }
   1376         if (token.name() == framesetTag) {
   1377             m_tree.insertHTMLElement(token);
   1378             return;
   1379         }
   1380         if (token.name() == frameTag) {
   1381             m_tree.insertSelfClosingHTMLElement(token);
   1382             return;
   1383         }
   1384         if (token.name() == noframesTag) {
   1385             processStartTagForInHead(token);
   1386             return;
   1387         }
   1388         parseError(token);
   1389         break;
   1390     case AfterFramesetMode:
   1391     case AfterAfterFramesetMode:
   1392         ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
   1393         if (token.name() == htmlTag) {
   1394             m_tree.insertHTMLHtmlStartTagInBody(token);
   1395             return;
   1396         }
   1397         if (token.name() == noframesTag) {
   1398             processStartTagForInHead(token);
   1399             return;
   1400         }
   1401         parseError(token);
   1402         break;
   1403     case InSelectInTableMode:
   1404         ASSERT(insertionMode() == InSelectInTableMode);
   1405         if (token.name() == captionTag
   1406             || token.name() == tableTag
   1407             || isTableBodyContextTag(token.name())
   1408             || token.name() == trTag
   1409             || isTableCellContextTag(token.name())) {
   1410             parseError(token);
   1411             AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
   1412             processEndTag(endSelect);
   1413             reprocessStartTag(token);
   1414             return;
   1415         }
   1416         // Fall through
   1417     case InSelectMode:
   1418         ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
   1419         if (token.name() == htmlTag) {
   1420             m_tree.insertHTMLHtmlStartTagInBody(token);
   1421             return;
   1422         }
   1423         if (token.name() == optionTag) {
   1424             if (m_tree.currentNode()->hasTagName(optionTag)) {
   1425                 AtomicHTMLToken endOption(HTMLToken::EndTag, optionTag.localName());
   1426                 processEndTag(endOption);
   1427             }
   1428             m_tree.insertHTMLElement(token);
   1429             return;
   1430         }
   1431         if (token.name() == optgroupTag) {
   1432             if (m_tree.currentNode()->hasTagName(optionTag)) {
   1433                 AtomicHTMLToken endOption(HTMLToken::EndTag, optionTag.localName());
   1434                 processEndTag(endOption);
   1435             }
   1436             if (m_tree.currentNode()->hasTagName(optgroupTag)) {
   1437                 AtomicHTMLToken endOptgroup(HTMLToken::EndTag, optgroupTag.localName());
   1438                 processEndTag(endOptgroup);
   1439             }
   1440             m_tree.insertHTMLElement(token);
   1441             return;
   1442         }
   1443         if (token.name() == selectTag) {
   1444             parseError(token);
   1445             AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
   1446             processEndTag(endSelect);
   1447             return;
   1448         }
   1449         if (token.name() == inputTag
   1450             || token.name() == keygenTag
   1451             || token.name() == textareaTag) {
   1452             parseError(token);
   1453             if (!m_tree.openElements()->inSelectScope(selectTag)) {
   1454                 ASSERT(isParsingFragment());
   1455                 return;
   1456             }
   1457             AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
   1458             processEndTag(endSelect);
   1459             reprocessStartTag(token);
   1460             return;
   1461         }
   1462         if (token.name() == scriptTag) {
   1463             bool didProcess = processStartTagForInHead(token);
   1464             ASSERT_UNUSED(didProcess, didProcess);
   1465             return;
   1466         }
   1467         break;
   1468     case InTableTextMode:
   1469         defaultForInTableText();
   1470         processStartTag(token);
   1471         break;
   1472     case InForeignContentMode: {
   1473         if (shouldProcessForeignContentUsingInBodyInsertionMode(token, m_tree.currentNode())) {
   1474             processForeignContentUsingInBodyModeAndResetMode(token);
   1475             return;
   1476         }
   1477         if (token.name() == bTag
   1478             || token.name() == bigTag
   1479             || token.name() == blockquoteTag
   1480             || token.name() == bodyTag
   1481             || token.name() == brTag
   1482             || token.name() == centerTag
   1483             || token.name() == codeTag
   1484             || token.name() == ddTag
   1485             || token.name() == divTag
   1486             || token.name() == dlTag
   1487             || token.name() == dtTag
   1488             || token.name() == emTag
   1489             || token.name() == embedTag
   1490             || isNumberedHeaderTag(token.name())
   1491             || token.name() == headTag
   1492             || token.name() == hrTag
   1493             || token.name() == iTag
   1494             || token.name() == imgTag
   1495             || token.name() == liTag
   1496             || token.name() == listingTag
   1497             || token.name() == menuTag
   1498             || token.name() == metaTag
   1499             || token.name() == nobrTag
   1500             || token.name() == olTag
   1501             || token.name() == pTag
   1502             || token.name() == preTag
   1503             || token.name() == rubyTag
   1504             || token.name() == sTag
   1505             || token.name() == smallTag
   1506             || token.name() == spanTag
   1507             || token.name() == strongTag
   1508             || token.name() == strikeTag
   1509             || token.name() == subTag
   1510             || token.name() == supTag
   1511             || token.name() == tableTag
   1512             || token.name() == ttTag
   1513             || token.name() == uTag
   1514             || token.name() == ulTag
   1515             || token.name() == varTag
   1516             || (token.name() == fontTag && (token.getAttributeItem(colorAttr) || token.getAttributeItem(faceAttr) || token.getAttributeItem(sizeAttr)))) {
   1517             parseError(token);
   1518             m_tree.openElements()->popUntilForeignContentScopeMarker();
   1519             resetInsertionModeAppropriately();
   1520             reprocessStartTag(token);
   1521             return;
   1522         }
   1523         const AtomicString& currentNamespace = m_tree.currentElement()->namespaceURI();
   1524         if (currentNamespace == MathMLNames::mathmlNamespaceURI)
   1525             adjustMathMLAttributes(token);
   1526         if (currentNamespace == SVGNames::svgNamespaceURI) {
   1527             adjustSVGTagNameCase(token);
   1528             adjustSVGAttributes(token);
   1529         }
   1530         adjustForeignAttributes(token);
   1531         m_tree.insertForeignElement(token, currentNamespace);
   1532         break;
   1533     }
   1534     case TextMode:
   1535         ASSERT_NOT_REACHED();
   1536         break;
   1537     }
   1538 }
   1539 
   1540 bool HTMLTreeBuilder::processBodyEndTagForInBody(AtomicHTMLToken& token)
   1541 {
   1542     ASSERT(token.type() == HTMLToken::EndTag);
   1543     ASSERT(token.name() == bodyTag);
   1544     if (!m_tree.openElements()->inScope(bodyTag.localName())) {
   1545         parseError(token);
   1546         return false;
   1547     }
   1548     notImplemented(); // Emit a more specific parse error based on stack contents.
   1549     setInsertionMode(AfterBodyMode);
   1550     return true;
   1551 }
   1552 
   1553 void HTMLTreeBuilder::processAnyOtherEndTagForInBody(AtomicHTMLToken& token)
   1554 {
   1555     ASSERT(token.type() == HTMLToken::EndTag);
   1556     HTMLElementStack::ElementRecord* record = m_tree.openElements()->topRecord();
   1557     while (1) {
   1558         ContainerNode* node = record->node();
   1559         if (node->hasLocalName(token.name())) {
   1560             m_tree.generateImpliedEndTags();
   1561             // FIXME: The ElementRecord pointed to by record might be deleted by
   1562             // the preceding call. Perhaps we should hold a RefPtr so that it
   1563             // stays alive for the duration of record's scope.
   1564             record = 0;
   1565             if (!m_tree.currentNode()->hasLocalName(token.name())) {
   1566                 parseError(token);
   1567                 // FIXME: This is either a bug in the spec, or a bug in our
   1568                 // implementation.  Filed a bug with HTML5:
   1569                 // http://www.w3.org/Bugs/Public/show_bug.cgi?id=10080
   1570                 // We might have already popped the node for the token in
   1571                 // generateImpliedEndTags, just abort.
   1572                 if (!m_tree.openElements()->contains(toElement(node)))
   1573                     return;
   1574             }
   1575             m_tree.openElements()->popUntilPopped(toElement(node));
   1576             return;
   1577         }
   1578         if (isSpecialNode(node)) {
   1579             parseError(token);
   1580             return;
   1581         }
   1582         record = record->next();
   1583     }
   1584 }
   1585 
   1586 // FIXME: This probably belongs on HTMLElementStack.
   1587 HTMLElementStack::ElementRecord* HTMLTreeBuilder::furthestBlockForFormattingElement(Element* formattingElement)
   1588 {
   1589     HTMLElementStack::ElementRecord* furthestBlock = 0;
   1590     HTMLElementStack::ElementRecord* record = m_tree.openElements()->topRecord();
   1591     for (; record; record = record->next()) {
   1592         if (record->element() == formattingElement)
   1593             return furthestBlock;
   1594         if (isSpecialNode(record->element()))
   1595             furthestBlock = record;
   1596     }
   1597     ASSERT_NOT_REACHED();
   1598     return 0;
   1599 }
   1600 
   1601 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody
   1602 void HTMLTreeBuilder::callTheAdoptionAgency(AtomicHTMLToken& token)
   1603 {
   1604     // The adoption agency algorithm is N^2.  We limit the number of iterations
   1605     // to stop from hanging the whole browser.  This limit is copied from the
   1606     // legacy tree builder and might need to be tweaked in the future.
   1607     static const int adoptionAgencyIterationLimit = 10;
   1608 
   1609     for (int i = 0; i < adoptionAgencyIterationLimit; ++i) {
   1610         // 1.
   1611         Element* formattingElement = m_tree.activeFormattingElements()->closestElementInScopeWithName(token.name());
   1612         if (!formattingElement || ((m_tree.openElements()->contains(formattingElement)) && !m_tree.openElements()->inScope(formattingElement))) {
   1613             parseError(token);
   1614             notImplemented(); // Check the stack of open elements for a more specific parse error.
   1615             return;
   1616         }
   1617         HTMLElementStack::ElementRecord* formattingElementRecord = m_tree.openElements()->find(formattingElement);
   1618         if (!formattingElementRecord) {
   1619             parseError(token);
   1620             m_tree.activeFormattingElements()->remove(formattingElement);
   1621             return;
   1622         }
   1623         if (formattingElement != m_tree.currentElement())
   1624             parseError(token);
   1625         // 2.
   1626         HTMLElementStack::ElementRecord* furthestBlock = furthestBlockForFormattingElement(formattingElement);
   1627         // 3.
   1628         if (!furthestBlock) {
   1629             m_tree.openElements()->popUntilPopped(formattingElement);
   1630             m_tree.activeFormattingElements()->remove(formattingElement);
   1631             return;
   1632         }
   1633         // 4.
   1634         ASSERT(furthestBlock->isAbove(formattingElementRecord));
   1635         ContainerNode* commonAncestor = formattingElementRecord->next()->node();
   1636         // 5.
   1637         HTMLFormattingElementList::Bookmark bookmark = m_tree.activeFormattingElements()->bookmarkFor(formattingElement);
   1638         // 6.
   1639         HTMLElementStack::ElementRecord* node = furthestBlock;
   1640         HTMLElementStack::ElementRecord* nextNode = node->next();
   1641         HTMLElementStack::ElementRecord* lastNode = furthestBlock;
   1642         for (int i = 0; i < adoptionAgencyIterationLimit; ++i) {
   1643             // 6.1
   1644             node = nextNode;
   1645             ASSERT(node);
   1646             nextNode = node->next(); // Save node->next() for the next iteration in case node is deleted in 6.2.
   1647             // 6.2
   1648             if (!m_tree.activeFormattingElements()->contains(node->element())) {
   1649                 m_tree.openElements()->remove(node->element());
   1650                 node = 0;
   1651                 continue;
   1652             }
   1653             // 6.3
   1654             if (node == formattingElementRecord)
   1655                 break;
   1656             // 6.5
   1657             RefPtr<Element> newElement = m_tree.createHTMLElementFromElementRecord(node);
   1658             HTMLFormattingElementList::Entry* nodeEntry = m_tree.activeFormattingElements()->find(node->element());
   1659             nodeEntry->replaceElement(newElement.get());
   1660             node->replaceElement(newElement.release());
   1661             // 6.4 -- Intentionally out of order to handle the case where node
   1662             // was replaced in 6.5.
   1663             // http://www.w3.org/Bugs/Public/show_bug.cgi?id=10096
   1664             if (lastNode == furthestBlock)
   1665                 bookmark.moveToAfter(nodeEntry);
   1666             // 6.6
   1667             if (Element* parent = lastNode->element()->parentElement())
   1668                 parent->parserRemoveChild(lastNode->element());
   1669             node->element()->parserAddChild(lastNode->element());
   1670             if (lastNode->element()->parentElement()->attached() && !lastNode->element()->attached())
   1671                 lastNode->element()->lazyAttach();
   1672             // 6.7
   1673             lastNode = node;
   1674         }
   1675         // 7
   1676         const AtomicString& commonAncestorTag = commonAncestor->localName();
   1677         if (Element* parent = lastNode->element()->parentElement())
   1678             parent->parserRemoveChild(lastNode->element());
   1679         // FIXME: If this moves to HTMLConstructionSite, this check should use
   1680         // causesFosterParenting(tagName) instead.
   1681         if (commonAncestorTag == tableTag
   1682             || commonAncestorTag == trTag
   1683             || isTableBodyContextTag(commonAncestorTag))
   1684             m_tree.fosterParent(lastNode->element());
   1685         else {
   1686             commonAncestor->parserAddChild(lastNode->element());
   1687             ASSERT(lastNode->node()->isElementNode());
   1688             ASSERT(lastNode->element()->parentNode());
   1689             if (lastNode->element()->parentNode()->attached() && !lastNode->element()->attached())
   1690                 lastNode->element()->lazyAttach();
   1691         }
   1692         // 8
   1693         RefPtr<Element> newElement = m_tree.createHTMLElementFromElementRecord(formattingElementRecord);
   1694         // 9
   1695         newElement->takeAllChildrenFrom(furthestBlock->element());
   1696         // 10
   1697         Element* furthestBlockElement = furthestBlock->element();
   1698         // FIXME: All this creation / parserAddChild / attach business should
   1699         //        be in HTMLConstructionSite.  My guess is that steps 8--12
   1700         //        should all be in some HTMLConstructionSite function.
   1701         furthestBlockElement->parserAddChild(newElement);
   1702         if (furthestBlockElement->attached() && !newElement->attached()) {
   1703             // Notice that newElement might already be attached if, for example, one of the reparented
   1704             // children is a style element, which attaches itself automatically.
   1705             newElement->attach();
   1706         }
   1707         // 11
   1708         m_tree.activeFormattingElements()->swapTo(formattingElement, newElement.get(), bookmark);
   1709         // 12
   1710         m_tree.openElements()->remove(formattingElement);
   1711         m_tree.openElements()->insertAbove(newElement, furthestBlock);
   1712     }
   1713 }
   1714 
   1715 void HTMLTreeBuilder::resetInsertionModeAppropriately()
   1716 {
   1717     // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#reset-the-insertion-mode-appropriately
   1718     bool last = false;
   1719     HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord();
   1720     while (1) {
   1721         ContainerNode* node = nodeRecord->node();
   1722         if (node == m_tree.openElements()->rootNode()) {
   1723             ASSERT(isParsingFragment());
   1724             last = true;
   1725             node = m_fragmentContext.contextElement();
   1726         }
   1727         if (node->hasTagName(selectTag)) {
   1728             ASSERT(isParsingFragment());
   1729             return setInsertionMode(InSelectMode);
   1730         }
   1731         if (node->hasTagName(tdTag) || node->hasTagName(thTag))
   1732             return setInsertionMode(InCellMode);
   1733         if (node->hasTagName(trTag))
   1734             return setInsertionMode(InRowMode);
   1735         if (node->hasTagName(tbodyTag) || node->hasTagName(theadTag) || node->hasTagName(tfootTag))
   1736             return setInsertionMode(InTableBodyMode);
   1737         if (node->hasTagName(captionTag))
   1738             return setInsertionMode(InCaptionMode);
   1739         if (node->hasTagName(colgroupTag)) {
   1740             ASSERT(isParsingFragment());
   1741             return setInsertionMode(InColumnGroupMode);
   1742         }
   1743         if (node->hasTagName(tableTag))
   1744             return setInsertionMode(InTableMode);
   1745         if (node->hasTagName(headTag)) {
   1746             ASSERT(isParsingFragment());
   1747             return setInsertionMode(InBodyMode);
   1748         }
   1749         if (node->hasTagName(bodyTag))
   1750             return setInsertionMode(InBodyMode);
   1751         if (node->hasTagName(framesetTag)) {
   1752             ASSERT(isParsingFragment());
   1753             return setInsertionMode(InFramesetMode);
   1754         }
   1755         if (node->hasTagName(htmlTag)) {
   1756             ASSERT(isParsingFragment());
   1757             return setInsertionMode(BeforeHeadMode);
   1758         }
   1759         if (node->namespaceURI() == SVGNames::svgNamespaceURI
   1760             || node->namespaceURI() == MathMLNames::mathmlNamespaceURI)
   1761             return setInsertionMode(InForeignContentMode);
   1762         if (last) {
   1763             ASSERT(isParsingFragment());
   1764             return setInsertionMode(InBodyMode);
   1765         }
   1766         nodeRecord = nodeRecord->next();
   1767     }
   1768 }
   1769 
   1770 void HTMLTreeBuilder::processEndTagForInTableBody(AtomicHTMLToken& token)
   1771 {
   1772     ASSERT(token.type() == HTMLToken::EndTag);
   1773     if (isTableBodyContextTag(token.name())) {
   1774         if (!m_tree.openElements()->inTableScope(token.name())) {
   1775             parseError(token);
   1776             return;
   1777         }
   1778         m_tree.openElements()->popUntilTableBodyScopeMarker();
   1779         m_tree.openElements()->pop();
   1780         setInsertionMode(InTableMode);
   1781         return;
   1782     }
   1783     if (token.name() == tableTag) {
   1784         // FIXME: This is slow.
   1785         if (!m_tree.openElements()->inTableScope(tbodyTag.localName()) && !m_tree.openElements()->inTableScope(theadTag.localName()) && !m_tree.openElements()->inTableScope(tfootTag.localName())) {
   1786             ASSERT(isParsingFragment());
   1787             parseError(token);
   1788             return;
   1789         }
   1790         m_tree.openElements()->popUntilTableBodyScopeMarker();
   1791         ASSERT(isTableBodyContextTag(m_tree.currentElement()->localName()));
   1792         processFakeEndTag(m_tree.currentElement()->tagQName());
   1793         reprocessEndTag(token);
   1794         return;
   1795     }
   1796     if (token.name() == bodyTag
   1797         || isCaptionColOrColgroupTag(token.name())
   1798         || token.name() == htmlTag
   1799         || isTableCellContextTag(token.name())
   1800         || token.name() == trTag) {
   1801         parseError(token);
   1802         return;
   1803     }
   1804     processEndTagForInTable(token);
   1805 }
   1806 
   1807 void HTMLTreeBuilder::processEndTagForInRow(AtomicHTMLToken& token)
   1808 {
   1809     ASSERT(token.type() == HTMLToken::EndTag);
   1810     if (token.name() == trTag) {
   1811         processTrEndTagForInRow();
   1812         return;
   1813     }
   1814     if (token.name() == tableTag) {
   1815         if (!processTrEndTagForInRow()) {
   1816             ASSERT(isParsingFragment());
   1817             return;
   1818         }
   1819         ASSERT(insertionMode() == InTableBodyMode);
   1820         reprocessEndTag(token);
   1821         return;
   1822     }
   1823     if (isTableBodyContextTag(token.name())) {
   1824         if (!m_tree.openElements()->inTableScope(token.name())) {
   1825             parseError(token);
   1826             return;
   1827         }
   1828         processFakeEndTag(trTag);
   1829         ASSERT(insertionMode() == InTableBodyMode);
   1830         reprocessEndTag(token);
   1831         return;
   1832     }
   1833     if (token.name() == bodyTag
   1834         || isCaptionColOrColgroupTag(token.name())
   1835         || token.name() == htmlTag
   1836         || isTableCellContextTag(token.name())) {
   1837         parseError(token);
   1838         return;
   1839     }
   1840     processEndTagForInTable(token);
   1841 }
   1842 
   1843 void HTMLTreeBuilder::processEndTagForInCell(AtomicHTMLToken& token)
   1844 {
   1845     ASSERT(token.type() == HTMLToken::EndTag);
   1846     if (isTableCellContextTag(token.name())) {
   1847         if (!m_tree.openElements()->inTableScope(token.name())) {
   1848             parseError(token);
   1849             return;
   1850         }
   1851         m_tree.generateImpliedEndTags();
   1852         if (!m_tree.currentNode()->hasLocalName(token.name()))
   1853             parseError(token);
   1854         m_tree.openElements()->popUntilPopped(token.name());
   1855         m_tree.activeFormattingElements()->clearToLastMarker();
   1856         setInsertionMode(InRowMode);
   1857         return;
   1858     }
   1859     if (token.name() == bodyTag
   1860         || isCaptionColOrColgroupTag(token.name())
   1861         || token.name() == htmlTag) {
   1862         parseError(token);
   1863         return;
   1864     }
   1865     if (token.name() == tableTag
   1866         || token.name() == trTag
   1867         || isTableBodyContextTag(token.name())) {
   1868         if (!m_tree.openElements()->inTableScope(token.name())) {
   1869             ASSERT(isTableBodyContextTag(token.name()) || isParsingFragment());
   1870             parseError(token);
   1871             return;
   1872         }
   1873         closeTheCell();
   1874         reprocessEndTag(token);
   1875         return;
   1876     }
   1877     processEndTagForInBody(token);
   1878 }
   1879 
   1880 void HTMLTreeBuilder::processEndTagForInBody(AtomicHTMLToken& token)
   1881 {
   1882     ASSERT(token.type() == HTMLToken::EndTag);
   1883     if (token.name() == bodyTag) {
   1884         processBodyEndTagForInBody(token);
   1885         return;
   1886     }
   1887     if (token.name() == htmlTag) {
   1888         AtomicHTMLToken endBody(HTMLToken::EndTag, bodyTag.localName());
   1889         if (processBodyEndTagForInBody(endBody))
   1890             reprocessEndTag(token);
   1891         return;
   1892     }
   1893     if (token.name() == addressTag
   1894         || token.name() == articleTag
   1895         || token.name() == asideTag
   1896         || token.name() == blockquoteTag
   1897         || token.name() == buttonTag
   1898         || token.name() == centerTag
   1899         || token.name() == detailsTag
   1900         || token.name() == dirTag
   1901         || token.name() == divTag
   1902         || token.name() == dlTag
   1903         || token.name() == fieldsetTag
   1904         || token.name() == figcaptionTag
   1905         || token.name() == figureTag
   1906         || token.name() == footerTag
   1907         || token.name() == headerTag
   1908         || token.name() == hgroupTag
   1909         || token.name() == listingTag
   1910         || token.name() == menuTag
   1911         || token.name() == navTag
   1912         || token.name() == olTag
   1913         || token.name() == preTag
   1914         || token.name() == sectionTag
   1915         || token.name() == summaryTag
   1916         || token.name() == ulTag) {
   1917         if (!m_tree.openElements()->inScope(token.name())) {
   1918             parseError(token);
   1919             return;
   1920         }
   1921         m_tree.generateImpliedEndTags();
   1922         if (!m_tree.currentNode()->hasLocalName(token.name()))
   1923             parseError(token);
   1924         m_tree.openElements()->popUntilPopped(token.name());
   1925         return;
   1926     }
   1927     if (token.name() == formTag) {
   1928         RefPtr<Element> node = m_tree.takeForm();
   1929         if (!node || !m_tree.openElements()->inScope(node.get())) {
   1930             parseError(token);
   1931             return;
   1932         }
   1933         m_tree.generateImpliedEndTags();
   1934         if (m_tree.currentElement() != node.get())
   1935             parseError(token);
   1936         m_tree.openElements()->remove(node.get());
   1937     }
   1938     if (token.name() == pTag) {
   1939         if (!m_tree.openElements()->inButtonScope(token.name())) {
   1940             parseError(token);
   1941             processFakeStartTag(pTag);
   1942             ASSERT(m_tree.openElements()->inScope(token.name()));
   1943             reprocessEndTag(token);
   1944             return;
   1945         }
   1946         m_tree.generateImpliedEndTagsWithExclusion(token.name());
   1947         if (!m_tree.currentNode()->hasLocalName(token.name()))
   1948             parseError(token);
   1949         m_tree.openElements()->popUntilPopped(token.name());
   1950         return;
   1951     }
   1952     if (token.name() == liTag) {
   1953         if (!m_tree.openElements()->inListItemScope(token.name())) {
   1954             parseError(token);
   1955             return;
   1956         }
   1957         m_tree.generateImpliedEndTagsWithExclusion(token.name());
   1958         if (!m_tree.currentNode()->hasLocalName(token.name()))
   1959             parseError(token);
   1960         m_tree.openElements()->popUntilPopped(token.name());
   1961         return;
   1962     }
   1963     if (token.name() == ddTag
   1964         || token.name() == dtTag) {
   1965         if (!m_tree.openElements()->inScope(token.name())) {
   1966             parseError(token);
   1967             return;
   1968         }
   1969         m_tree.generateImpliedEndTagsWithExclusion(token.name());
   1970         if (!m_tree.currentNode()->hasLocalName(token.name()))
   1971             parseError(token);
   1972         m_tree.openElements()->popUntilPopped(token.name());
   1973         return;
   1974     }
   1975     if (isNumberedHeaderTag(token.name())) {
   1976         if (!m_tree.openElements()->hasNumberedHeaderElementInScope()) {
   1977             parseError(token);
   1978             return;
   1979         }
   1980         m_tree.generateImpliedEndTags();
   1981         if (!m_tree.currentNode()->hasLocalName(token.name()))
   1982             parseError(token);
   1983         m_tree.openElements()->popUntilNumberedHeaderElementPopped();
   1984         return;
   1985     }
   1986     if (isFormattingTag(token.name())) {
   1987         callTheAdoptionAgency(token);
   1988         return;
   1989     }
   1990     if (token.name() == appletTag
   1991         || token.name() == marqueeTag
   1992         || token.name() == objectTag) {
   1993         if (!m_tree.openElements()->inScope(token.name())) {
   1994             parseError(token);
   1995             return;
   1996         }
   1997         m_tree.generateImpliedEndTags();
   1998         if (!m_tree.currentNode()->hasLocalName(token.name()))
   1999             parseError(token);
   2000         m_tree.openElements()->popUntilPopped(token.name());
   2001         m_tree.activeFormattingElements()->clearToLastMarker();
   2002         return;
   2003     }
   2004     if (token.name() == brTag) {
   2005         parseError(token);
   2006         processFakeStartTag(brTag);
   2007         return;
   2008     }
   2009     processAnyOtherEndTagForInBody(token);
   2010 }
   2011 
   2012 bool HTMLTreeBuilder::processCaptionEndTagForInCaption()
   2013 {
   2014     if (!m_tree.openElements()->inTableScope(captionTag.localName())) {
   2015         ASSERT(isParsingFragment());
   2016         // FIXME: parse error
   2017         return false;
   2018     }
   2019     m_tree.generateImpliedEndTags();
   2020     // FIXME: parse error if (!m_tree.currentElement()->hasTagName(captionTag))
   2021     m_tree.openElements()->popUntilPopped(captionTag.localName());
   2022     m_tree.activeFormattingElements()->clearToLastMarker();
   2023     setInsertionMode(InTableMode);
   2024     return true;
   2025 }
   2026 
   2027 bool HTMLTreeBuilder::processTrEndTagForInRow()
   2028 {
   2029     if (!m_tree.openElements()->inTableScope(trTag.localName())) {
   2030         ASSERT(isParsingFragment());
   2031         // FIXME: parse error
   2032         return false;
   2033     }
   2034     m_tree.openElements()->popUntilTableRowScopeMarker();
   2035     ASSERT(m_tree.currentElement()->hasTagName(trTag));
   2036     m_tree.openElements()->pop();
   2037     setInsertionMode(InTableBodyMode);
   2038     return true;
   2039 }
   2040 
   2041 bool HTMLTreeBuilder::processTableEndTagForInTable()
   2042 {
   2043     if (!m_tree.openElements()->inTableScope(tableTag)) {
   2044         ASSERT(isParsingFragment());
   2045         // FIXME: parse error.
   2046         return false;
   2047     }
   2048     m_tree.openElements()->popUntilPopped(tableTag.localName());
   2049     resetInsertionModeAppropriately();
   2050     return true;
   2051 }
   2052 
   2053 void HTMLTreeBuilder::processEndTagForInTable(AtomicHTMLToken& token)
   2054 {
   2055     ASSERT(token.type() == HTMLToken::EndTag);
   2056     if (token.name() == tableTag) {
   2057         processTableEndTagForInTable();
   2058         return;
   2059     }
   2060     if (token.name() == bodyTag
   2061         || isCaptionColOrColgroupTag(token.name())
   2062         || token.name() == htmlTag
   2063         || isTableBodyContextTag(token.name())
   2064         || isTableCellContextTag(token.name())
   2065         || token.name() == trTag) {
   2066         parseError(token);
   2067         return;
   2068     }
   2069     // Is this redirection necessary here?
   2070     HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
   2071     processEndTagForInBody(token);
   2072 }
   2073 
   2074 void HTMLTreeBuilder::processEndTag(AtomicHTMLToken& token)
   2075 {
   2076     ASSERT(token.type() == HTMLToken::EndTag);
   2077     switch (insertionMode()) {
   2078     case InitialMode:
   2079         ASSERT(insertionMode() == InitialMode);
   2080         defaultForInitial();
   2081         // Fall through.
   2082     case BeforeHTMLMode:
   2083         ASSERT(insertionMode() == BeforeHTMLMode);
   2084         if (token.name() != headTag && token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
   2085             parseError(token);
   2086             return;
   2087         }
   2088         defaultForBeforeHTML();
   2089         // Fall through.
   2090     case BeforeHeadMode:
   2091         ASSERT(insertionMode() == BeforeHeadMode);
   2092         if (token.name() != headTag && token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
   2093             parseError(token);
   2094             return;
   2095         }
   2096         defaultForBeforeHead();
   2097         // Fall through.
   2098     case InHeadMode:
   2099         ASSERT(insertionMode() == InHeadMode);
   2100         if (token.name() == headTag) {
   2101             m_tree.openElements()->popHTMLHeadElement();
   2102             setInsertionMode(AfterHeadMode);
   2103             return;
   2104         }
   2105         if (token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
   2106             parseError(token);
   2107             return;
   2108         }
   2109         defaultForInHead();
   2110         // Fall through.
   2111     case AfterHeadMode:
   2112         ASSERT(insertionMode() == AfterHeadMode);
   2113         if (token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
   2114             parseError(token);
   2115             return;
   2116         }
   2117         defaultForAfterHead();
   2118         // Fall through
   2119     case InBodyMode:
   2120         ASSERT(insertionMode() == InBodyMode);
   2121         processEndTagForInBody(token);
   2122         break;
   2123     case InTableMode:
   2124         ASSERT(insertionMode() == InTableMode);
   2125         processEndTagForInTable(token);
   2126         break;
   2127     case InCaptionMode:
   2128         ASSERT(insertionMode() == InCaptionMode);
   2129         if (token.name() == captionTag) {
   2130             processCaptionEndTagForInCaption();
   2131             return;
   2132         }
   2133         if (token.name() == tableTag) {
   2134             parseError(token);
   2135             if (!processCaptionEndTagForInCaption()) {
   2136                 ASSERT(isParsingFragment());
   2137                 return;
   2138             }
   2139             reprocessEndTag(token);
   2140             return;
   2141         }
   2142         if (token.name() == bodyTag
   2143             || token.name() == colTag
   2144             || token.name() == colgroupTag
   2145             || token.name() == htmlTag
   2146             || isTableBodyContextTag(token.name())
   2147             || isTableCellContextTag(token.name())
   2148             || token.name() == trTag) {
   2149             parseError(token);
   2150             return;
   2151         }
   2152         processEndTagForInBody(token);
   2153         break;
   2154     case InColumnGroupMode:
   2155         ASSERT(insertionMode() == InColumnGroupMode);
   2156         if (token.name() == colgroupTag) {
   2157             processColgroupEndTagForInColumnGroup();
   2158             return;
   2159         }
   2160         if (token.name() == colTag) {
   2161             parseError(token);
   2162             return;
   2163         }
   2164         if (!processColgroupEndTagForInColumnGroup()) {
   2165             ASSERT(isParsingFragment());
   2166             return;
   2167         }
   2168         reprocessEndTag(token);
   2169         break;
   2170     case InRowMode:
   2171         ASSERT(insertionMode() == InRowMode);
   2172         processEndTagForInRow(token);
   2173         break;
   2174     case InCellMode:
   2175         ASSERT(insertionMode() == InCellMode);
   2176         processEndTagForInCell(token);
   2177         break;
   2178     case InTableBodyMode:
   2179         ASSERT(insertionMode() == InTableBodyMode);
   2180         processEndTagForInTableBody(token);
   2181         break;
   2182     case AfterBodyMode:
   2183         ASSERT(insertionMode() == AfterBodyMode);
   2184         if (token.name() == htmlTag) {
   2185             if (isParsingFragment()) {
   2186                 parseError(token);
   2187                 return;
   2188             }
   2189             setInsertionMode(AfterAfterBodyMode);
   2190             return;
   2191         }
   2192         prepareToReprocessToken();
   2193         // Fall through.
   2194     case AfterAfterBodyMode:
   2195         ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
   2196         parseError(token);
   2197         setInsertionMode(InBodyMode);
   2198         reprocessEndTag(token);
   2199         break;
   2200     case InHeadNoscriptMode:
   2201         ASSERT(insertionMode() == InHeadNoscriptMode);
   2202         if (token.name() == noscriptTag) {
   2203             ASSERT(m_tree.currentElement()->hasTagName(noscriptTag));
   2204             m_tree.openElements()->pop();
   2205             ASSERT(m_tree.currentElement()->hasTagName(headTag));
   2206             setInsertionMode(InHeadMode);
   2207             return;
   2208         }
   2209         if (token.name() != brTag) {
   2210             parseError(token);
   2211             return;
   2212         }
   2213         defaultForInHeadNoscript();
   2214         processToken(token);
   2215         break;
   2216     case TextMode:
   2217         if (token.name() == scriptTag) {
   2218             // Pause ourselves so that parsing stops until the script can be processed by the caller.
   2219             m_isPaused = true;
   2220             ASSERT(m_tree.currentElement()->hasTagName(scriptTag));
   2221             m_scriptToProcess = m_tree.currentElement();
   2222             m_scriptToProcessStartPosition = WTF::toOneBasedTextPosition(m_lastScriptElementStartPosition);
   2223             m_tree.openElements()->pop();
   2224             if (isParsingFragment() && m_fragmentContext.scriptingPermission() == FragmentScriptingNotAllowed)
   2225                 m_scriptToProcess->removeAllChildren();
   2226             setInsertionMode(m_originalInsertionMode);
   2227 
   2228             // This token will not have been created by the tokenizer if a
   2229             // self-closing script tag was encountered and pre-HTML5 parser
   2230             // quirks are enabled. We must set the tokenizer's state to
   2231             // DataState explicitly if the tokenizer didn't have a chance to.
   2232             ASSERT(m_parser->tokenizer()->state() == HTMLTokenizer::DataState || m_usePreHTML5ParserQuirks);
   2233             m_parser->tokenizer()->setState(HTMLTokenizer::DataState);
   2234             return;
   2235         }
   2236         m_tree.openElements()->pop();
   2237         setInsertionMode(m_originalInsertionMode);
   2238         break;
   2239     case InFramesetMode:
   2240         ASSERT(insertionMode() == InFramesetMode);
   2241         if (token.name() == framesetTag) {
   2242             if (m_tree.currentNode() == m_tree.openElements()->rootNode()) {
   2243                 parseError(token);
   2244                 return;
   2245             }
   2246             m_tree.openElements()->pop();
   2247             if (!isParsingFragment() && !m_tree.currentElement()->hasTagName(framesetTag))
   2248                 setInsertionMode(AfterFramesetMode);
   2249             return;
   2250         }
   2251         break;
   2252     case AfterFramesetMode:
   2253         ASSERT(insertionMode() == AfterFramesetMode);
   2254         if (token.name() == htmlTag) {
   2255             setInsertionMode(AfterAfterFramesetMode);
   2256             return;
   2257         }
   2258         // Fall through.
   2259     case AfterAfterFramesetMode:
   2260         ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
   2261         parseError(token);
   2262         break;
   2263     case InSelectInTableMode:
   2264         ASSERT(insertionMode() == InSelectInTableMode);
   2265         if (token.name() == captionTag
   2266             || token.name() == tableTag
   2267             || isTableBodyContextTag(token.name())
   2268             || token.name() == trTag
   2269             || isTableCellContextTag(token.name())) {
   2270             parseError(token);
   2271             if (m_tree.openElements()->inTableScope(token.name())) {
   2272                 AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
   2273                 processEndTag(endSelect);
   2274                 reprocessEndTag(token);
   2275             }
   2276             return;
   2277         }
   2278         // Fall through.
   2279     case InSelectMode:
   2280         ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
   2281         if (token.name() == optgroupTag) {
   2282             if (m_tree.currentNode()->hasTagName(optionTag) && m_tree.oneBelowTop()->hasTagName(optgroupTag))
   2283                 processFakeEndTag(optionTag);
   2284             if (m_tree.currentNode()->hasTagName(optgroupTag)) {
   2285                 m_tree.openElements()->pop();
   2286                 return;
   2287             }
   2288             parseError(token);
   2289             return;
   2290         }
   2291         if (token.name() == optionTag) {
   2292             if (m_tree.currentNode()->hasTagName(optionTag)) {
   2293                 m_tree.openElements()->pop();
   2294                 return;
   2295             }
   2296             parseError(token);
   2297             return;
   2298         }
   2299         if (token.name() == selectTag) {
   2300             if (!m_tree.openElements()->inSelectScope(token.name())) {
   2301                 ASSERT(isParsingFragment());
   2302                 parseError(token);
   2303                 return;
   2304             }
   2305             m_tree.openElements()->popUntilPopped(selectTag.localName());
   2306             resetInsertionModeAppropriately();
   2307             return;
   2308         }
   2309         break;
   2310     case InTableTextMode:
   2311         defaultForInTableText();
   2312         processEndTag(token);
   2313         break;
   2314     case InForeignContentMode:
   2315         if (token.name() == SVGNames::scriptTag && m_tree.currentNode()->hasTagName(SVGNames::scriptTag)) {
   2316             notImplemented();
   2317             return;
   2318         }
   2319         if (!isInHTMLNamespace(m_tree.currentNode())) {
   2320             // FIXME: This code just wants an Element* iterator, instead of an ElementRecord*
   2321             HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord();
   2322             if (!nodeRecord->node()->hasLocalName(token.name()))
   2323                 parseError(token);
   2324             while (1) {
   2325                 if (nodeRecord->node()->hasLocalName(token.name())) {
   2326                     m_tree.openElements()->popUntilPopped(nodeRecord->element());
   2327                     resetForeignInsertionMode();
   2328                     return;
   2329                 }
   2330                 nodeRecord = nodeRecord->next();
   2331 
   2332                 if (isInHTMLNamespace(nodeRecord->node()))
   2333                     break;
   2334             }
   2335         }
   2336         // Any other end tag (also the last two steps of "An end tag, if the current node is not an element in the HTML namespace."
   2337         processForeignContentUsingInBodyModeAndResetMode(token);
   2338         break;
   2339     }
   2340 }
   2341 
   2342 void HTMLTreeBuilder::prepareToReprocessToken()
   2343 {
   2344     if (m_hasPendingForeignInsertionModeSteps) {
   2345         resetForeignInsertionMode();
   2346         m_hasPendingForeignInsertionModeSteps = false;
   2347     }
   2348 }
   2349 
   2350 void HTMLTreeBuilder::reprocessStartTag(AtomicHTMLToken& token)
   2351 {
   2352     prepareToReprocessToken();
   2353     processStartTag(token);
   2354 }
   2355 
   2356 void HTMLTreeBuilder::reprocessEndTag(AtomicHTMLToken& token)
   2357 {
   2358     prepareToReprocessToken();
   2359     processEndTag(token);
   2360 }
   2361 
   2362 class HTMLTreeBuilder::FakeInsertionMode {
   2363     WTF_MAKE_NONCOPYABLE(FakeInsertionMode);
   2364 public:
   2365     FakeInsertionMode(HTMLTreeBuilder* treeBuilder, InsertionMode mode)
   2366         : m_treeBuilder(treeBuilder)
   2367         , m_originalMode(treeBuilder->insertionMode())
   2368     {
   2369         m_treeBuilder->setFakeInsertionMode(mode);
   2370     }
   2371 
   2372     ~FakeInsertionMode()
   2373     {
   2374         if (m_treeBuilder->isFakeInsertionMode())
   2375             m_treeBuilder->setInsertionMode(m_originalMode);
   2376     }
   2377 
   2378 private:
   2379     HTMLTreeBuilder* m_treeBuilder;
   2380     InsertionMode m_originalMode;
   2381 };
   2382 
   2383 void HTMLTreeBuilder::processForeignContentUsingInBodyModeAndResetMode(AtomicHTMLToken& token)
   2384 {
   2385     m_hasPendingForeignInsertionModeSteps = true;
   2386     {
   2387         FakeInsertionMode fakeMode(this, InBodyMode);
   2388         processToken(token);
   2389     }
   2390     if (m_hasPendingForeignInsertionModeSteps)
   2391         resetForeignInsertionMode();
   2392 }
   2393 
   2394 void HTMLTreeBuilder::resetForeignInsertionMode()
   2395 {
   2396     if (insertionMode() == InForeignContentMode)
   2397         resetInsertionModeAppropriately();
   2398 }
   2399 
   2400 void HTMLTreeBuilder::processComment(AtomicHTMLToken& token)
   2401 {
   2402     ASSERT(token.type() == HTMLToken::Comment);
   2403     if (m_insertionMode == InitialMode
   2404         || m_insertionMode == BeforeHTMLMode
   2405         || m_insertionMode == AfterAfterBodyMode
   2406         || m_insertionMode == AfterAfterFramesetMode) {
   2407         m_tree.insertCommentOnDocument(token);
   2408         return;
   2409     }
   2410     if (m_insertionMode == AfterBodyMode) {
   2411         m_tree.insertCommentOnHTMLHtmlElement(token);
   2412         return;
   2413     }
   2414     if (m_insertionMode == InTableTextMode) {
   2415         defaultForInTableText();
   2416         processComment(token);
   2417         return;
   2418     }
   2419     m_tree.insertComment(token);
   2420 }
   2421 
   2422 void HTMLTreeBuilder::processCharacter(AtomicHTMLToken& token)
   2423 {
   2424     ASSERT(token.type() == HTMLToken::Character);
   2425     ExternalCharacterTokenBuffer buffer(token);
   2426     processCharacterBuffer(buffer);
   2427 }
   2428 
   2429 void HTMLTreeBuilder::processCharacterBuffer(ExternalCharacterTokenBuffer& buffer)
   2430 {
   2431 ReprocessBuffer:
   2432     switch (insertionMode()) {
   2433     case InitialMode: {
   2434         ASSERT(insertionMode() == InitialMode);
   2435         buffer.skipLeadingWhitespace();
   2436         if (buffer.isEmpty())
   2437             return;
   2438         defaultForInitial();
   2439         // Fall through.
   2440     }
   2441     case BeforeHTMLMode: {
   2442         ASSERT(insertionMode() == BeforeHTMLMode);
   2443         buffer.skipLeadingWhitespace();
   2444         if (buffer.isEmpty())
   2445             return;
   2446         defaultForBeforeHTML();
   2447         // Fall through.
   2448     }
   2449     case BeforeHeadMode: {
   2450         ASSERT(insertionMode() == BeforeHeadMode);
   2451         buffer.skipLeadingWhitespace();
   2452         if (buffer.isEmpty())
   2453             return;
   2454         defaultForBeforeHead();
   2455         // Fall through.
   2456     }
   2457     case InHeadMode: {
   2458         ASSERT(insertionMode() == InHeadMode);
   2459         String leadingWhitespace = buffer.takeLeadingWhitespace();
   2460         if (!leadingWhitespace.isEmpty())
   2461             m_tree.insertTextNode(leadingWhitespace);
   2462         if (buffer.isEmpty())
   2463             return;
   2464         defaultForInHead();
   2465         // Fall through.
   2466     }
   2467     case AfterHeadMode: {
   2468         ASSERT(insertionMode() == AfterHeadMode);
   2469         String leadingWhitespace = buffer.takeLeadingWhitespace();
   2470         if (!leadingWhitespace.isEmpty())
   2471             m_tree.insertTextNode(leadingWhitespace);
   2472         if (buffer.isEmpty())
   2473             return;
   2474         defaultForAfterHead();
   2475         // Fall through.
   2476     }
   2477     case InBodyMode:
   2478     case InCaptionMode:
   2479     case InCellMode: {
   2480         ASSERT(insertionMode() == InBodyMode || insertionMode() == InCaptionMode || insertionMode() == InCellMode);
   2481         m_tree.reconstructTheActiveFormattingElements();
   2482         String characters = buffer.takeRemaining();
   2483         m_tree.insertTextNode(characters);
   2484         if (m_framesetOk && !isAllWhitespaceOrReplacementCharacters(characters))
   2485             m_framesetOk = false;
   2486         break;
   2487     }
   2488     case InTableMode:
   2489     case InTableBodyMode:
   2490     case InRowMode: {
   2491         ASSERT(insertionMode() == InTableMode || insertionMode() == InTableBodyMode || insertionMode() == InRowMode);
   2492         ASSERT(m_pendingTableCharacters.isEmpty());
   2493         m_originalInsertionMode = m_insertionMode;
   2494         setInsertionMode(InTableTextMode);
   2495         prepareToReprocessToken();
   2496         // Fall through.
   2497     }
   2498     case InTableTextMode: {
   2499         buffer.giveRemainingTo(m_pendingTableCharacters);
   2500         break;
   2501     }
   2502     case InColumnGroupMode: {
   2503         ASSERT(insertionMode() == InColumnGroupMode);
   2504         String leadingWhitespace = buffer.takeLeadingWhitespace();
   2505         if (!leadingWhitespace.isEmpty())
   2506             m_tree.insertTextNode(leadingWhitespace);
   2507         if (buffer.isEmpty())
   2508             return;
   2509         if (!processColgroupEndTagForInColumnGroup()) {
   2510             ASSERT(isParsingFragment());
   2511             // The spec tells us to drop these characters on the floor.
   2512             buffer.takeLeadingNonWhitespace();
   2513             if (buffer.isEmpty())
   2514                 return;
   2515         }
   2516         prepareToReprocessToken();
   2517         goto ReprocessBuffer;
   2518     }
   2519     case AfterBodyMode:
   2520     case AfterAfterBodyMode: {
   2521         ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
   2522         // FIXME: parse error
   2523         setInsertionMode(InBodyMode);
   2524         prepareToReprocessToken();
   2525         goto ReprocessBuffer;
   2526         break;
   2527     }
   2528     case TextMode: {
   2529         ASSERT(insertionMode() == TextMode);
   2530         m_tree.insertTextNode(buffer.takeRemaining());
   2531         break;
   2532     }
   2533     case InHeadNoscriptMode: {
   2534         ASSERT(insertionMode() == InHeadNoscriptMode);
   2535         String leadingWhitespace = buffer.takeLeadingWhitespace();
   2536         if (!leadingWhitespace.isEmpty())
   2537             m_tree.insertTextNode(leadingWhitespace);
   2538         if (buffer.isEmpty())
   2539             return;
   2540         defaultForInHeadNoscript();
   2541         goto ReprocessBuffer;
   2542         break;
   2543     }
   2544     case InFramesetMode:
   2545     case AfterFramesetMode: {
   2546         ASSERT(insertionMode() == InFramesetMode || insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
   2547         String leadingWhitespace = buffer.takeRemainingWhitespace();
   2548         if (!leadingWhitespace.isEmpty())
   2549             m_tree.insertTextNode(leadingWhitespace);
   2550         // FIXME: We should generate a parse error if we skipped over any
   2551         // non-whitespace characters.
   2552         break;
   2553     }
   2554     case InSelectInTableMode:
   2555     case InSelectMode: {
   2556         ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
   2557         m_tree.insertTextNode(buffer.takeRemaining());
   2558         break;
   2559     }
   2560     case InForeignContentMode: {
   2561         ASSERT(insertionMode() == InForeignContentMode);
   2562         String characters = buffer.takeRemaining();
   2563         m_tree.insertTextNode(characters);
   2564         if (m_framesetOk && !isAllWhitespace(characters))
   2565             m_framesetOk = false;
   2566         break;
   2567     }
   2568     case AfterAfterFramesetMode: {
   2569         String leadingWhitespace = buffer.takeRemainingWhitespace();
   2570         if (!leadingWhitespace.isEmpty()) {
   2571             m_tree.reconstructTheActiveFormattingElements();
   2572             m_tree.insertTextNode(leadingWhitespace);
   2573         }
   2574         // FIXME: We should generate a parse error if we skipped over any
   2575         // non-whitespace characters.
   2576         break;
   2577     }
   2578     }
   2579 }
   2580 
   2581 void HTMLTreeBuilder::processEndOfFile(AtomicHTMLToken& token)
   2582 {
   2583     ASSERT(token.type() == HTMLToken::EndOfFile);
   2584     switch (insertionMode()) {
   2585     case InitialMode:
   2586         ASSERT(insertionMode() == InitialMode);
   2587         defaultForInitial();
   2588         // Fall through.
   2589     case BeforeHTMLMode:
   2590         ASSERT(insertionMode() == BeforeHTMLMode);
   2591         defaultForBeforeHTML();
   2592         // Fall through.
   2593     case BeforeHeadMode:
   2594         ASSERT(insertionMode() == BeforeHeadMode);
   2595         defaultForBeforeHead();
   2596         // Fall through.
   2597     case InHeadMode:
   2598         ASSERT(insertionMode() == InHeadMode);
   2599         defaultForInHead();
   2600         // Fall through.
   2601     case AfterHeadMode:
   2602         ASSERT(insertionMode() == AfterHeadMode);
   2603         defaultForAfterHead();
   2604         // Fall through
   2605     case InBodyMode:
   2606     case InCellMode:
   2607     case InCaptionMode:
   2608     case InRowMode:
   2609         ASSERT(insertionMode() == InBodyMode || insertionMode() == InCellMode || insertionMode() == InCaptionMode || insertionMode() == InRowMode);
   2610         notImplemented(); // Emit parse error based on what elements are still open.
   2611         break;
   2612     case AfterBodyMode:
   2613     case AfterAfterBodyMode:
   2614         ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
   2615         break;
   2616     case InHeadNoscriptMode:
   2617         ASSERT(insertionMode() == InHeadNoscriptMode);
   2618         defaultForInHeadNoscript();
   2619         processEndOfFile(token);
   2620         return;
   2621     case AfterFramesetMode:
   2622     case AfterAfterFramesetMode:
   2623         ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
   2624         break;
   2625     case InFramesetMode:
   2626     case InTableMode:
   2627     case InTableBodyMode:
   2628     case InSelectInTableMode:
   2629     case InSelectMode:
   2630         ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode || insertionMode() == InTableMode || insertionMode() == InFramesetMode || insertionMode() == InTableBodyMode);
   2631         if (m_tree.currentNode() != m_tree.openElements()->rootNode())
   2632             parseError(token);
   2633         break;
   2634     case InColumnGroupMode:
   2635         if (m_tree.currentNode() == m_tree.openElements()->rootNode()) {
   2636             ASSERT(isParsingFragment());
   2637             return; // FIXME: Should we break here instead of returning?
   2638         }
   2639         if (!processColgroupEndTagForInColumnGroup()) {
   2640             ASSERT(isParsingFragment());
   2641             return; // FIXME: Should we break here instead of returning?
   2642         }
   2643         prepareToReprocessToken();
   2644         processEndOfFile(token);
   2645         return;
   2646     case InForeignContentMode:
   2647         setInsertionMode(InBodyMode);
   2648         processEndOfFile(token);
   2649         return;
   2650     case InTableTextMode:
   2651         defaultForInTableText();
   2652         processEndOfFile(token);
   2653         return;
   2654     case TextMode:
   2655         parseError(token);
   2656         if (m_tree.currentNode()->hasTagName(scriptTag))
   2657             notImplemented(); // mark the script element as "already started".
   2658         m_tree.openElements()->pop();
   2659         setInsertionMode(m_originalInsertionMode);
   2660         prepareToReprocessToken();
   2661         processEndOfFile(token);
   2662         return;
   2663     }
   2664     ASSERT(m_tree.currentNode());
   2665     m_tree.openElements()->popAll();
   2666 }
   2667 
   2668 void HTMLTreeBuilder::defaultForInitial()
   2669 {
   2670     notImplemented();
   2671     if (!m_fragmentContext.fragment())
   2672         m_document->setCompatibilityMode(Document::QuirksMode);
   2673     // FIXME: parse error
   2674     setInsertionMode(BeforeHTMLMode);
   2675     prepareToReprocessToken();
   2676 }
   2677 
   2678 void HTMLTreeBuilder::defaultForBeforeHTML()
   2679 {
   2680     AtomicHTMLToken startHTML(HTMLToken::StartTag, htmlTag.localName());
   2681     m_tree.insertHTMLHtmlStartTagBeforeHTML(startHTML);
   2682     setInsertionMode(BeforeHeadMode);
   2683     prepareToReprocessToken();
   2684 }
   2685 
   2686 void HTMLTreeBuilder::defaultForBeforeHead()
   2687 {
   2688     AtomicHTMLToken startHead(HTMLToken::StartTag, headTag.localName());
   2689     processStartTag(startHead);
   2690     prepareToReprocessToken();
   2691 }
   2692 
   2693 void HTMLTreeBuilder::defaultForInHead()
   2694 {
   2695     AtomicHTMLToken endHead(HTMLToken::EndTag, headTag.localName());
   2696     processEndTag(endHead);
   2697     prepareToReprocessToken();
   2698 }
   2699 
   2700 void HTMLTreeBuilder::defaultForInHeadNoscript()
   2701 {
   2702     AtomicHTMLToken endNoscript(HTMLToken::EndTag, noscriptTag.localName());
   2703     processEndTag(endNoscript);
   2704     prepareToReprocessToken();
   2705 }
   2706 
   2707 void HTMLTreeBuilder::defaultForAfterHead()
   2708 {
   2709     AtomicHTMLToken startBody(HTMLToken::StartTag, bodyTag.localName());
   2710     processStartTag(startBody);
   2711     m_framesetOk = true;
   2712     prepareToReprocessToken();
   2713 }
   2714 
   2715 void HTMLTreeBuilder::defaultForInTableText()
   2716 {
   2717     String characters = String::adopt(m_pendingTableCharacters);
   2718     if (!isAllWhitespace(characters)) {
   2719         // FIXME: parse error
   2720         HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
   2721         m_tree.reconstructTheActiveFormattingElements();
   2722         m_tree.insertTextNode(characters);
   2723         m_framesetOk = false;
   2724         setInsertionMode(m_originalInsertionMode);
   2725         prepareToReprocessToken();
   2726         return;
   2727     }
   2728     m_tree.insertTextNode(characters);
   2729     setInsertionMode(m_originalInsertionMode);
   2730     prepareToReprocessToken();
   2731 }
   2732 
   2733 bool HTMLTreeBuilder::processStartTagForInHead(AtomicHTMLToken& token)
   2734 {
   2735     ASSERT(token.type() == HTMLToken::StartTag);
   2736     if (token.name() == htmlTag) {
   2737         m_tree.insertHTMLHtmlStartTagInBody(token);
   2738         return true;
   2739     }
   2740     if (token.name() == baseTag
   2741         || token.name() == basefontTag
   2742         || token.name() == bgsoundTag
   2743         || token.name() == commandTag
   2744         || token.name() == linkTag
   2745         || token.name() == metaTag) {
   2746         m_tree.insertSelfClosingHTMLElement(token);
   2747         // Note: The custom processing for the <meta> tag is done in HTMLMetaElement::process().
   2748         return true;
   2749     }
   2750     if (token.name() == titleTag) {
   2751         processGenericRCDATAStartTag(token);
   2752         return true;
   2753     }
   2754     if (token.name() == noscriptTag) {
   2755         if (scriptEnabled(m_document->frame())) {
   2756             processGenericRawTextStartTag(token);
   2757             return true;
   2758         }
   2759         m_tree.insertHTMLElement(token);
   2760         setInsertionMode(InHeadNoscriptMode);
   2761         return true;
   2762     }
   2763     if (token.name() == noframesTag || token.name() == styleTag) {
   2764         processGenericRawTextStartTag(token);
   2765         return true;
   2766     }
   2767     if (token.name() == scriptTag) {
   2768         processScriptStartTag(token);
   2769         if (m_usePreHTML5ParserQuirks && token.selfClosing())
   2770             processFakeEndTag(scriptTag);
   2771         return true;
   2772     }
   2773     if (token.name() == headTag) {
   2774         parseError(token);
   2775         return true;
   2776     }
   2777     return false;
   2778 }
   2779 
   2780 void HTMLTreeBuilder::processGenericRCDATAStartTag(AtomicHTMLToken& token)
   2781 {
   2782     ASSERT(token.type() == HTMLToken::StartTag);
   2783     m_tree.insertHTMLElement(token);
   2784     m_parser->tokenizer()->setState(HTMLTokenizer::RCDATAState);
   2785     m_originalInsertionMode = m_insertionMode;
   2786     setInsertionMode(TextMode);
   2787 }
   2788 
   2789 void HTMLTreeBuilder::processGenericRawTextStartTag(AtomicHTMLToken& token)
   2790 {
   2791     ASSERT(token.type() == HTMLToken::StartTag);
   2792     m_tree.insertHTMLElement(token);
   2793     m_parser->tokenizer()->setState(HTMLTokenizer::RAWTEXTState);
   2794     m_originalInsertionMode = m_insertionMode;
   2795     setInsertionMode(TextMode);
   2796 }
   2797 
   2798 void HTMLTreeBuilder::processScriptStartTag(AtomicHTMLToken& token)
   2799 {
   2800     ASSERT(token.type() == HTMLToken::StartTag);
   2801     m_tree.insertScriptElement(token);
   2802     m_parser->tokenizer()->setState(HTMLTokenizer::ScriptDataState);
   2803     m_originalInsertionMode = m_insertionMode;
   2804 
   2805     TextPosition0 position = m_parser->textPosition();
   2806 
   2807     ASSERT(position.m_line.zeroBasedInt() == m_parser->tokenizer()->lineNumber());
   2808 
   2809     m_lastScriptElementStartPosition = position;
   2810 
   2811     setInsertionMode(TextMode);
   2812 }
   2813 
   2814 void HTMLTreeBuilder::finished()
   2815 {
   2816     if (isParsingFragment())
   2817         return;
   2818 
   2819     ASSERT(m_document);
   2820     // Warning, this may detach the parser. Do not do anything else after this.
   2821     m_document->finishedParsing();
   2822 }
   2823 
   2824 void HTMLTreeBuilder::parseError(AtomicHTMLToken&)
   2825 {
   2826 }
   2827 
   2828 bool HTMLTreeBuilder::scriptEnabled(Frame* frame)
   2829 {
   2830     if (!frame)
   2831         return false;
   2832     return frame->script()->canExecuteScripts(NotAboutToExecuteScript);
   2833 }
   2834 
   2835 bool HTMLTreeBuilder::pluginsEnabled(Frame* frame)
   2836 {
   2837     if (!frame)
   2838         return false;
   2839     return frame->loader()->subframeLoader()->allowPlugins(NotAboutToInstantiatePlugin);
   2840 }
   2841 
   2842 }
   2843