Home | History | Annotate | Download | only in parser
      1 /*
      2  * Copyright (C) 2013 Google, Inc. All Rights Reserved.
      3  *
      4  * Redistribution and use in source and binary forms, with or without
      5  * modification, are permitted provided that the following conditions
      6  * are met:
      7  * 1. Redistributions of source code must retain the above copyright
      8  *    notice, this list of conditions and the following disclaimer.
      9  * 2. Redistributions in binary form must reproduce the above copyright
     10  *    notice, this list of conditions and the following disclaimer in the
     11  *    documentation and/or other materials provided with the distribution.
     12  *
     13  * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
     14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL GOOGLE INC. OR
     17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
     21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     24  */
     25 
     26 #include "config.h"
     27 #include "core/html/parser/HTMLTreeBuilderSimulator.h"
     28 
     29 #include "HTMLNames.h"
     30 #include "MathMLNames.h"
     31 #include "SVGNames.h"
     32 #include "core/html/parser/HTMLParserIdioms.h"
     33 #include "core/html/parser/HTMLTokenizer.h"
     34 #include "core/html/parser/HTMLTreeBuilder.h"
     35 
     36 namespace WebCore {
     37 
     38 using namespace HTMLNames;
     39 
     40 static bool tokenExitsForeignContent(const CompactHTMLToken& token)
     41 {
     42     // FIXME: This is copied from HTMLTreeBuilder::processTokenInForeignContent and changed to use threadSafeHTMLNamesMatch.
     43     const HTMLIdentifier& tagName = token.data();
     44     return threadSafeHTMLNamesMatch(tagName, bTag)
     45         || threadSafeHTMLNamesMatch(tagName, bigTag)
     46         || threadSafeHTMLNamesMatch(tagName, blockquoteTag)
     47         || threadSafeHTMLNamesMatch(tagName, bodyTag)
     48         || threadSafeHTMLNamesMatch(tagName, brTag)
     49         || threadSafeHTMLNamesMatch(tagName, centerTag)
     50         || threadSafeHTMLNamesMatch(tagName, codeTag)
     51         || threadSafeHTMLNamesMatch(tagName, ddTag)
     52         || threadSafeHTMLNamesMatch(tagName, divTag)
     53         || threadSafeHTMLNamesMatch(tagName, dlTag)
     54         || threadSafeHTMLNamesMatch(tagName, dtTag)
     55         || threadSafeHTMLNamesMatch(tagName, emTag)
     56         || threadSafeHTMLNamesMatch(tagName, embedTag)
     57         || threadSafeHTMLNamesMatch(tagName, h1Tag)
     58         || threadSafeHTMLNamesMatch(tagName, h2Tag)
     59         || threadSafeHTMLNamesMatch(tagName, h3Tag)
     60         || threadSafeHTMLNamesMatch(tagName, h4Tag)
     61         || threadSafeHTMLNamesMatch(tagName, h5Tag)
     62         || threadSafeHTMLNamesMatch(tagName, h6Tag)
     63         || threadSafeHTMLNamesMatch(tagName, headTag)
     64         || threadSafeHTMLNamesMatch(tagName, hrTag)
     65         || threadSafeHTMLNamesMatch(tagName, iTag)
     66         || threadSafeHTMLNamesMatch(tagName, imgTag)
     67         || threadSafeHTMLNamesMatch(tagName, liTag)
     68         || threadSafeHTMLNamesMatch(tagName, listingTag)
     69         || threadSafeHTMLNamesMatch(tagName, menuTag)
     70         || threadSafeHTMLNamesMatch(tagName, metaTag)
     71         || threadSafeHTMLNamesMatch(tagName, nobrTag)
     72         || threadSafeHTMLNamesMatch(tagName, olTag)
     73         || threadSafeHTMLNamesMatch(tagName, pTag)
     74         || threadSafeHTMLNamesMatch(tagName, preTag)
     75         || threadSafeHTMLNamesMatch(tagName, rubyTag)
     76         || threadSafeHTMLNamesMatch(tagName, sTag)
     77         || threadSafeHTMLNamesMatch(tagName, smallTag)
     78         || threadSafeHTMLNamesMatch(tagName, spanTag)
     79         || threadSafeHTMLNamesMatch(tagName, strongTag)
     80         || threadSafeHTMLNamesMatch(tagName, strikeTag)
     81         || threadSafeHTMLNamesMatch(tagName, subTag)
     82         || threadSafeHTMLNamesMatch(tagName, supTag)
     83         || threadSafeHTMLNamesMatch(tagName, tableTag)
     84         || threadSafeHTMLNamesMatch(tagName, ttTag)
     85         || threadSafeHTMLNamesMatch(tagName, uTag)
     86         || threadSafeHTMLNamesMatch(tagName, ulTag)
     87         || threadSafeHTMLNamesMatch(tagName, varTag)
     88         || (threadSafeHTMLNamesMatch(tagName, fontTag) && (token.getAttributeItem(colorAttr) || token.getAttributeItem(faceAttr) || token.getAttributeItem(sizeAttr)));
     89 }
     90 
     91 static bool tokenExitsSVG(const CompactHTMLToken& token)
     92 {
     93     // FIXME: It's very fragile that we special case foreignObject here to be case-insensitive.
     94     return equalIgnoringCaseNonNull(token.data().asStringImpl(), SVGNames::foreignObjectTag.localName().impl());
     95 }
     96 
     97 static bool tokenExitsMath(const CompactHTMLToken& token)
     98 {
     99     // FIXME: This is copied from HTMLElementStack::isMathMLTextIntegrationPoint and changed to use threadSafeMatch.
    100     const HTMLIdentifier& tagName = token.data();
    101     return threadSafeMatch(tagName, MathMLNames::miTag)
    102         || threadSafeMatch(tagName, MathMLNames::moTag)
    103         || threadSafeMatch(tagName, MathMLNames::mnTag)
    104         || threadSafeMatch(tagName, MathMLNames::msTag)
    105         || threadSafeMatch(tagName, MathMLNames::mtextTag);
    106 }
    107 
    108 HTMLTreeBuilderSimulator::HTMLTreeBuilderSimulator(const HTMLParserOptions& options)
    109     : m_options(options)
    110 {
    111     m_namespaceStack.append(HTML);
    112 }
    113 
    114 HTMLTreeBuilderSimulator::State HTMLTreeBuilderSimulator::stateFor(HTMLTreeBuilder* treeBuilder)
    115 {
    116     ASSERT(isMainThread());
    117     State namespaceStack;
    118     for (HTMLElementStack::ElementRecord* record = treeBuilder->openElements()->topRecord(); record; record = record->next()) {
    119         Namespace currentNamespace = HTML;
    120         if (record->namespaceURI() == SVGNames::svgNamespaceURI)
    121             currentNamespace = SVG;
    122         else if (record->namespaceURI() == MathMLNames::mathmlNamespaceURI)
    123             currentNamespace = MathML;
    124 
    125         if (namespaceStack.isEmpty() || namespaceStack.last() != currentNamespace)
    126             namespaceStack.append(currentNamespace);
    127     }
    128     namespaceStack.reverse();
    129     return namespaceStack;
    130 }
    131 
    132 bool HTMLTreeBuilderSimulator::simulate(const CompactHTMLToken& token, HTMLTokenizer* tokenizer)
    133 {
    134     if (token.type() == HTMLToken::StartTag) {
    135         const HTMLIdentifier& tagName = token.data();
    136         if (threadSafeMatch(tagName, SVGNames::svgTag))
    137             m_namespaceStack.append(SVG);
    138         if (threadSafeMatch(tagName, MathMLNames::mathTag))
    139             m_namespaceStack.append(MathML);
    140         if (inForeignContent() && tokenExitsForeignContent(token))
    141             m_namespaceStack.removeLast();
    142         if ((m_namespaceStack.last() == SVG && tokenExitsSVG(token))
    143             || (m_namespaceStack.last() == MathML && tokenExitsMath(token)))
    144             m_namespaceStack.append(HTML);
    145         if (!inForeignContent()) {
    146             // FIXME: This is just a copy of Tokenizer::updateStateFor which uses threadSafeMatches.
    147             if (threadSafeHTMLNamesMatch(tagName, textareaTag) || threadSafeHTMLNamesMatch(tagName, titleTag))
    148                 tokenizer->setState(HTMLTokenizer::RCDATAState);
    149             else if (threadSafeHTMLNamesMatch(tagName, plaintextTag))
    150                 tokenizer->setState(HTMLTokenizer::PLAINTEXTState);
    151             else if (threadSafeHTMLNamesMatch(tagName, scriptTag))
    152                 tokenizer->setState(HTMLTokenizer::ScriptDataState);
    153             else if (threadSafeHTMLNamesMatch(tagName, styleTag)
    154                 || threadSafeHTMLNamesMatch(tagName, iframeTag)
    155                 || threadSafeHTMLNamesMatch(tagName, xmpTag)
    156                 || (threadSafeHTMLNamesMatch(tagName, noembedTag) && m_options.pluginsEnabled)
    157                 || threadSafeHTMLNamesMatch(tagName, noframesTag)
    158                 || (threadSafeHTMLNamesMatch(tagName, noscriptTag) && m_options.scriptEnabled))
    159                 tokenizer->setState(HTMLTokenizer::RAWTEXTState);
    160         }
    161     }
    162 
    163     if (token.type() == HTMLToken::EndTag) {
    164         const HTMLIdentifier& tagName = token.data();
    165         if ((m_namespaceStack.last() == SVG && threadSafeMatch(tagName, SVGNames::svgTag))
    166             || (m_namespaceStack.last() == MathML && threadSafeMatch(tagName, MathMLNames::mathTag))
    167             || (m_namespaceStack.contains(SVG) && m_namespaceStack.last() == HTML && tokenExitsSVG(token))
    168             || (m_namespaceStack.contains(MathML) && m_namespaceStack.last() == HTML && tokenExitsMath(token)))
    169             m_namespaceStack.removeLast();
    170         if (threadSafeHTMLNamesMatch(tagName, scriptTag)) {
    171             if (!inForeignContent())
    172                 tokenizer->setState(HTMLTokenizer::DataState);
    173             return false;
    174         }
    175     }
    176 
    177     // FIXME: Also setForceNullCharacterReplacement when in text mode.
    178     tokenizer->setForceNullCharacterReplacement(inForeignContent());
    179     tokenizer->setShouldAllowCDATA(inForeignContent());
    180     return true;
    181 }
    182 
    183 }
    184