1 /* 2 * Copyright (C) 2013 Google, Inc. All Rights Reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY 14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. OR 17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26 #include "config.h" 27 #include "core/html/parser/HTMLTreeBuilderSimulator.h" 28 29 #include "HTMLNames.h" 30 #include "MathMLNames.h" 31 #include "SVGNames.h" 32 #include "core/html/parser/HTMLParserIdioms.h" 33 #include "core/html/parser/HTMLTokenizer.h" 34 #include "core/html/parser/HTMLTreeBuilder.h" 35 36 namespace WebCore { 37 38 using namespace HTMLNames; 39 40 static bool tokenExitsForeignContent(const CompactHTMLToken& token) 41 { 42 // FIXME: This is copied from HTMLTreeBuilder::processTokenInForeignContent and changed to use threadSafeHTMLNamesMatch. 43 const String& tagName = token.data(); 44 return threadSafeMatch(tagName, bTag) 45 || threadSafeMatch(tagName, bigTag) 46 || threadSafeMatch(tagName, blockquoteTag) 47 || threadSafeMatch(tagName, bodyTag) 48 || threadSafeMatch(tagName, brTag) 49 || threadSafeMatch(tagName, centerTag) 50 || threadSafeMatch(tagName, codeTag) 51 || threadSafeMatch(tagName, ddTag) 52 || threadSafeMatch(tagName, divTag) 53 || threadSafeMatch(tagName, dlTag) 54 || threadSafeMatch(tagName, dtTag) 55 || threadSafeMatch(tagName, emTag) 56 || threadSafeMatch(tagName, embedTag) 57 || threadSafeMatch(tagName, h1Tag) 58 || threadSafeMatch(tagName, h2Tag) 59 || threadSafeMatch(tagName, h3Tag) 60 || threadSafeMatch(tagName, h4Tag) 61 || threadSafeMatch(tagName, h5Tag) 62 || threadSafeMatch(tagName, h6Tag) 63 || threadSafeMatch(tagName, headTag) 64 || threadSafeMatch(tagName, hrTag) 65 || threadSafeMatch(tagName, iTag) 66 || threadSafeMatch(tagName, imgTag) 67 || threadSafeMatch(tagName, liTag) 68 || threadSafeMatch(tagName, listingTag) 69 || threadSafeMatch(tagName, menuTag) 70 || threadSafeMatch(tagName, metaTag) 71 || threadSafeMatch(tagName, nobrTag) 72 || threadSafeMatch(tagName, olTag) 73 || threadSafeMatch(tagName, pTag) 74 || threadSafeMatch(tagName, preTag) 75 || threadSafeMatch(tagName, rubyTag) 76 || threadSafeMatch(tagName, sTag) 77 || threadSafeMatch(tagName, smallTag) 78 || threadSafeMatch(tagName, spanTag) 79 || threadSafeMatch(tagName, strongTag) 80 || threadSafeMatch(tagName, strikeTag) 81 || threadSafeMatch(tagName, subTag) 82 || threadSafeMatch(tagName, supTag) 83 || threadSafeMatch(tagName, tableTag) 84 || threadSafeMatch(tagName, ttTag) 85 || threadSafeMatch(tagName, uTag) 86 || threadSafeMatch(tagName, ulTag) 87 || threadSafeMatch(tagName, varTag) 88 || (threadSafeMatch(tagName, fontTag) && (token.getAttributeItem(colorAttr) || token.getAttributeItem(faceAttr) || token.getAttributeItem(sizeAttr))); 89 } 90 91 static bool tokenExitsSVG(const CompactHTMLToken& token) 92 { 93 // FIXME: It's very fragile that we special case foreignObject here to be case-insensitive. 94 return equalIgnoringCaseNonNull(token.data().impl(), SVGNames::foreignObjectTag.localName().impl()); 95 } 96 97 static bool tokenExitsMath(const CompactHTMLToken& token) 98 { 99 // FIXME: This is copied from HTMLElementStack::isMathMLTextIntegrationPoint and changed to use threadSafeMatch. 100 const String& tagName = token.data(); 101 return threadSafeMatch(tagName, MathMLNames::miTag) 102 || threadSafeMatch(tagName, MathMLNames::moTag) 103 || threadSafeMatch(tagName, MathMLNames::mnTag) 104 || threadSafeMatch(tagName, MathMLNames::msTag) 105 || threadSafeMatch(tagName, MathMLNames::mtextTag); 106 } 107 108 HTMLTreeBuilderSimulator::HTMLTreeBuilderSimulator(const HTMLParserOptions& options) 109 : m_options(options) 110 { 111 m_namespaceStack.append(HTML); 112 } 113 114 HTMLTreeBuilderSimulator::State HTMLTreeBuilderSimulator::stateFor(HTMLTreeBuilder* treeBuilder) 115 { 116 ASSERT(isMainThread()); 117 State namespaceStack; 118 for (HTMLElementStack::ElementRecord* record = treeBuilder->openElements()->topRecord(); record; record = record->next()) { 119 Namespace currentNamespace = HTML; 120 if (record->namespaceURI() == SVGNames::svgNamespaceURI) 121 currentNamespace = SVG; 122 else if (record->namespaceURI() == MathMLNames::mathmlNamespaceURI) 123 currentNamespace = MathML; 124 125 if (namespaceStack.isEmpty() || namespaceStack.last() != currentNamespace) 126 namespaceStack.append(currentNamespace); 127 } 128 namespaceStack.reverse(); 129 return namespaceStack; 130 } 131 132 bool HTMLTreeBuilderSimulator::simulate(const CompactHTMLToken& token, HTMLTokenizer* tokenizer) 133 { 134 if (token.type() == HTMLToken::StartTag) { 135 const String& tagName = token.data(); 136 if (threadSafeMatch(tagName, SVGNames::svgTag)) 137 m_namespaceStack.append(SVG); 138 if (threadSafeMatch(tagName, MathMLNames::mathTag)) 139 m_namespaceStack.append(MathML); 140 if (inForeignContent() && tokenExitsForeignContent(token)) 141 m_namespaceStack.removeLast(); 142 if ((m_namespaceStack.last() == SVG && tokenExitsSVG(token)) 143 || (m_namespaceStack.last() == MathML && tokenExitsMath(token))) 144 m_namespaceStack.append(HTML); 145 if (!inForeignContent()) { 146 // FIXME: This is just a copy of Tokenizer::updateStateFor which uses threadSafeMatches. 147 if (threadSafeMatch(tagName, textareaTag) || threadSafeMatch(tagName, titleTag)) 148 tokenizer->setState(HTMLTokenizer::RCDATAState); 149 else if (threadSafeMatch(tagName, plaintextTag)) 150 tokenizer->setState(HTMLTokenizer::PLAINTEXTState); 151 else if (threadSafeMatch(tagName, scriptTag)) 152 tokenizer->setState(HTMLTokenizer::ScriptDataState); 153 else if (threadSafeMatch(tagName, styleTag) 154 || threadSafeMatch(tagName, iframeTag) 155 || threadSafeMatch(tagName, xmpTag) 156 || (threadSafeMatch(tagName, noembedTag) && m_options.pluginsEnabled) 157 || threadSafeMatch(tagName, noframesTag) 158 || (threadSafeMatch(tagName, noscriptTag) && m_options.scriptEnabled)) 159 tokenizer->setState(HTMLTokenizer::RAWTEXTState); 160 } 161 } 162 163 if (token.type() == HTMLToken::EndTag) { 164 const String& tagName = token.data(); 165 if ((m_namespaceStack.last() == SVG && threadSafeMatch(tagName, SVGNames::svgTag)) 166 || (m_namespaceStack.last() == MathML && threadSafeMatch(tagName, MathMLNames::mathTag)) 167 || (m_namespaceStack.contains(SVG) && m_namespaceStack.last() == HTML && tokenExitsSVG(token)) 168 || (m_namespaceStack.contains(MathML) && m_namespaceStack.last() == HTML && tokenExitsMath(token))) 169 m_namespaceStack.removeLast(); 170 if (threadSafeMatch(tagName, scriptTag)) { 171 if (!inForeignContent()) 172 tokenizer->setState(HTMLTokenizer::DataState); 173 return false; 174 } 175 } 176 177 // FIXME: Also setForceNullCharacterReplacement when in text mode. 178 tokenizer->setForceNullCharacterReplacement(inForeignContent()); 179 tokenizer->setShouldAllowCDATA(inForeignContent()); 180 return true; 181 } 182 183 } 184