1 /* 2 * Copyright (C) 2000 Peter Kelly (pmk (at) post.com) 3 * Copyright (C) 2005, 2006, 2008 Apple Inc. All rights reserved. 4 * Copyright (C) 2006 Alexey Proskuryakov (ap (at) webkit.org) 5 * Copyright (C) 2007 Samuel Weinig (sam (at) webkit.org) 6 * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies) 7 * Copyright (C) 2008 Holger Hans Peter Freyther 8 * Copyright (C) 2008 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/) 9 * 10 * This library is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU Library General Public 12 * License as published by the Free Software Foundation; either 13 * version 2 of the License, or (at your option) any later version. 14 * 15 * This library is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 * Library General Public License for more details. 19 * 20 * You should have received a copy of the GNU Library General Public License 21 * along with this library; see the file COPYING.LIB. If not, write to 22 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 23 * Boston, MA 02110-1301, USA. 24 */ 25 26 #include "config.h" 27 #include "XMLDocumentParser.h" 28 29 #include "CDATASection.h" 30 #include "CachedScript.h" 31 #include "Comment.h" 32 #include "CachedResourceLoader.h" 33 #include "Document.h" 34 #include "DocumentFragment.h" 35 #include "DocumentType.h" 36 #include "Frame.h" 37 #include "FrameLoader.h" 38 #include "FrameView.h" 39 #include "HTMLLinkElement.h" 40 #include "HTMLNames.h" 41 #include "HTMLStyleElement.h" 42 #include "ImageLoader.h" 43 #include "ProcessingInstruction.h" 44 #include "ResourceError.h" 45 #include "ResourceHandle.h" 46 #include "ResourceRequest.h" 47 #include "ResourceResponse.h" 48 #include "ScriptController.h" 49 #include "ScriptElement.h" 50 #include "ScriptSourceCode.h" 51 #include "ScriptValue.h" 52 #include "TextResourceDecoder.h" 53 #include "TreeDepthLimit.h" 54 #include <wtf/text/StringConcatenate.h> 55 #include <wtf/StringExtras.h> 56 #include <wtf/Threading.h> 57 #include <wtf/Vector.h> 58 59 #if ENABLE(SVG) 60 #include "SVGNames.h" 61 #include "SVGStyleElement.h" 62 #endif 63 64 using namespace std; 65 66 namespace WebCore { 67 68 using namespace HTMLNames; 69 70 const int maxErrors = 25; 71 72 #if ENABLE(WML) 73 bool XMLDocumentParser::isWMLDocument() const 74 { 75 return document()->isWMLDocument(); 76 } 77 #endif 78 79 void XMLDocumentParser::pushCurrentNode(Node* n) 80 { 81 ASSERT(n); 82 ASSERT(m_currentNode); 83 if (n != document()) 84 n->ref(); 85 m_currentNodeStack.append(m_currentNode); 86 m_currentNode = n; 87 if (m_currentNodeStack.size() > maxDOMTreeDepth) 88 handleError(fatal, "Excessive node nesting.", lineNumber(), columnNumber()); 89 } 90 91 void XMLDocumentParser::popCurrentNode() 92 { 93 if (!m_currentNode) 94 return; 95 ASSERT(m_currentNodeStack.size()); 96 97 if (m_currentNode != document()) 98 m_currentNode->deref(); 99 100 m_currentNode = m_currentNodeStack.last(); 101 m_currentNodeStack.removeLast(); 102 } 103 104 void XMLDocumentParser::clearCurrentNodeStack() 105 { 106 if (m_currentNode && m_currentNode != document()) 107 m_currentNode->deref(); 108 m_currentNode = 0; 109 110 if (m_currentNodeStack.size()) { // Aborted parsing. 111 for (size_t i = m_currentNodeStack.size() - 1; i != 0; --i) 112 m_currentNodeStack[i]->deref(); 113 if (m_currentNodeStack[0] && m_currentNodeStack[0] != document()) 114 m_currentNodeStack[0]->deref(); 115 m_currentNodeStack.clear(); 116 } 117 } 118 119 void XMLDocumentParser::insert(const SegmentedString&) 120 { 121 ASSERT_NOT_REACHED(); 122 } 123 124 void XMLDocumentParser::append(const SegmentedString& s) 125 { 126 String parseString = s.toString(); 127 128 if (m_sawXSLTransform || !m_sawFirstElement) 129 m_originalSourceForTransform += parseString; 130 131 if (isStopped() || m_sawXSLTransform) 132 return; 133 134 if (m_parserPaused) { 135 m_pendingSrc.append(s); 136 return; 137 } 138 139 doWrite(s.toString()); 140 141 // After parsing, go ahead and dispatch image beforeload events. 142 ImageLoader::dispatchPendingBeforeLoadEvents(); 143 } 144 145 void XMLDocumentParser::handleError(ErrorType type, const char* m, int lineNumber, int columnNumber) 146 { 147 handleError(type, m, TextPosition1(WTF::OneBasedNumber::fromOneBasedInt(lineNumber), WTF::OneBasedNumber::fromOneBasedInt(columnNumber))); 148 } 149 150 void XMLDocumentParser::handleError(ErrorType type, const char* m, TextPosition1 position) 151 { 152 if (type == fatal || (m_errorCount < maxErrors && m_lastErrorPosition.m_line != position.m_line && m_lastErrorPosition.m_column != position.m_column)) { 153 switch (type) { 154 case warning: 155 m_errorMessages += makeString("warning on line ", String::number(position.m_line.oneBasedInt()), " at column ", String::number(position.m_column.oneBasedInt()), ": ", m); 156 break; 157 case fatal: 158 case nonFatal: 159 m_errorMessages += makeString("error on line ", String::number(position.m_line.oneBasedInt()), " at column ", String::number(position.m_column.oneBasedInt()), ": ", m); 160 } 161 162 m_lastErrorPosition = position; 163 ++m_errorCount; 164 } 165 166 if (type != warning) 167 m_sawError = true; 168 169 if (type == fatal) 170 stopParsing(); 171 } 172 173 void XMLDocumentParser::enterText() 174 { 175 #if !USE(QXMLSTREAM) 176 ASSERT(m_bufferedText.size() == 0); 177 #endif 178 RefPtr<Node> newNode = Text::create(document(), ""); 179 m_currentNode->deprecatedParserAddChild(newNode.get()); 180 pushCurrentNode(newNode.get()); 181 } 182 183 #if !USE(QXMLSTREAM) 184 static inline String toString(const xmlChar* string, size_t size) 185 { 186 return String::fromUTF8(reinterpret_cast<const char*>(string), size); 187 } 188 #endif 189 190 191 void XMLDocumentParser::exitText() 192 { 193 if (isStopped()) 194 return; 195 196 if (!m_currentNode || !m_currentNode->isTextNode()) 197 return; 198 199 #if !USE(QXMLSTREAM) 200 ExceptionCode ec = 0; 201 static_cast<Text*>(m_currentNode)->appendData(toString(m_bufferedText.data(), m_bufferedText.size()), ec); 202 Vector<xmlChar> empty; 203 m_bufferedText.swap(empty); 204 #endif 205 206 if (m_view && m_currentNode && !m_currentNode->attached()) 207 m_currentNode->attach(); 208 209 popCurrentNode(); 210 } 211 212 void XMLDocumentParser::detach() 213 { 214 clearCurrentNodeStack(); 215 ScriptableDocumentParser::detach(); 216 } 217 218 void XMLDocumentParser::end() 219 { 220 // XMLDocumentParserLibxml2 will do bad things to the document if doEnd() is called. 221 // I don't believe XMLDocumentParserQt needs doEnd called in the fragment case. 222 ASSERT(!m_parsingFragment); 223 224 doEnd(); 225 226 // doEnd() could process a script tag, thus pausing parsing. 227 if (m_parserPaused) 228 return; 229 230 if (m_sawError) 231 insertErrorMessageBlock(); 232 else { 233 exitText(); 234 document()->styleSelectorChanged(RecalcStyleImmediately); 235 } 236 237 if (isParsing()) 238 prepareToStopParsing(); 239 document()->setReadyState(Document::Interactive); 240 clearCurrentNodeStack(); 241 document()->finishedParsing(); 242 } 243 244 void XMLDocumentParser::finish() 245 { 246 // FIXME: We should ASSERT(!m_parserStopped) here, since it does not 247 // makes sense to call any methods on DocumentParser once it's been stopped. 248 // However, FrameLoader::stop calls Document::finishParsing unconditionally 249 // which in turn calls m_parser->finish(). 250 251 if (m_parserPaused) 252 m_finishCalled = true; 253 else 254 end(); 255 } 256 257 bool XMLDocumentParser::finishWasCalled() 258 { 259 return m_finishCalled; 260 } 261 262 static inline RefPtr<Element> createXHTMLParserErrorHeader(Document* doc, const String& errorMessages) 263 { 264 RefPtr<Element> reportElement = doc->createElement(QualifiedName(nullAtom, "parsererror", xhtmlNamespaceURI), false); 265 reportElement->setAttribute(styleAttr, "display: block; white-space: pre; border: 2px solid #c77; padding: 0 1em 0 1em; margin: 1em; background-color: #fdd; color: black"); 266 267 ExceptionCode ec = 0; 268 RefPtr<Element> h3 = doc->createElement(h3Tag, false); 269 reportElement->appendChild(h3.get(), ec); 270 h3->appendChild(doc->createTextNode("This page contains the following errors:"), ec); 271 272 RefPtr<Element> fixed = doc->createElement(divTag, false); 273 reportElement->appendChild(fixed.get(), ec); 274 fixed->setAttribute(styleAttr, "font-family:monospace;font-size:12px"); 275 fixed->appendChild(doc->createTextNode(errorMessages), ec); 276 277 h3 = doc->createElement(h3Tag, false); 278 reportElement->appendChild(h3.get(), ec); 279 h3->appendChild(doc->createTextNode("Below is a rendering of the page up to the first error."), ec); 280 281 return reportElement; 282 } 283 284 void XMLDocumentParser::insertErrorMessageBlock() 285 { 286 #if USE(QXMLSTREAM) 287 if (m_parsingFragment) 288 return; 289 #endif 290 // One or more errors occurred during parsing of the code. Display an error block to the user above 291 // the normal content (the DOM tree is created manually and includes line/col info regarding 292 // where the errors are located) 293 294 // Create elements for display 295 ExceptionCode ec = 0; 296 Document* document = this->document(); 297 RefPtr<Element> documentElement = document->documentElement(); 298 if (!documentElement) { 299 RefPtr<Element> rootElement = document->createElement(htmlTag, false); 300 document->appendChild(rootElement, ec); 301 RefPtr<Element> body = document->createElement(bodyTag, false); 302 rootElement->appendChild(body, ec); 303 documentElement = body.get(); 304 } 305 #if ENABLE(SVG) 306 else if (documentElement->namespaceURI() == SVGNames::svgNamespaceURI) { 307 RefPtr<Element> rootElement = document->createElement(htmlTag, false); 308 RefPtr<Element> body = document->createElement(bodyTag, false); 309 rootElement->appendChild(body, ec); 310 body->appendChild(documentElement, ec); 311 document->appendChild(rootElement.get(), ec); 312 documentElement = body.get(); 313 } 314 #endif 315 #if ENABLE(WML) 316 else if (isWMLDocument()) { 317 RefPtr<Element> rootElement = document->createElement(htmlTag, false); 318 RefPtr<Element> body = document->createElement(bodyTag, false); 319 rootElement->appendChild(body, ec); 320 body->appendChild(documentElement, ec); 321 document->appendChild(rootElement.get(), ec); 322 documentElement = body.get(); 323 } 324 #endif 325 326 RefPtr<Element> reportElement = createXHTMLParserErrorHeader(document, m_errorMessages); 327 documentElement->insertBefore(reportElement, documentElement->firstChild(), ec); 328 #if ENABLE(XSLT) 329 if (document->transformSourceDocument()) { 330 RefPtr<Element> paragraph = document->createElement(pTag, false); 331 paragraph->setAttribute(styleAttr, "white-space: normal"); 332 paragraph->appendChild(document->createTextNode("This document was created as the result of an XSL transformation. The line and column numbers given are from the transformed result."), ec); 333 reportElement->appendChild(paragraph.release(), ec); 334 } 335 #endif 336 document->updateStyleIfNeeded(); 337 } 338 339 void XMLDocumentParser::notifyFinished(CachedResource* unusedResource) 340 { 341 ASSERT_UNUSED(unusedResource, unusedResource == m_pendingScript); 342 ASSERT(m_pendingScript->accessCount() > 0); 343 344 ScriptSourceCode sourceCode(m_pendingScript.get()); 345 bool errorOccurred = m_pendingScript->errorOccurred(); 346 347 m_pendingScript->removeClient(this); 348 m_pendingScript = 0; 349 350 RefPtr<Element> e = m_scriptElement; 351 m_scriptElement = 0; 352 353 ScriptElement* scriptElement = toScriptElement(e.get()); 354 ASSERT(scriptElement); 355 356 // JavaScript can detach this parser, make sure it's kept alive even if detached. 357 RefPtr<XMLDocumentParser> protect(this); 358 359 if (errorOccurred) 360 scriptElement->dispatchErrorEvent(); 361 else { 362 scriptElement->executeScript(sourceCode); 363 scriptElement->dispatchLoadEvent(); 364 } 365 366 m_scriptElement = 0; 367 368 if (!isDetached() && !m_requestingScript) 369 resumeParsing(); 370 } 371 372 bool XMLDocumentParser::isWaitingForScripts() const 373 { 374 return m_pendingScript; 375 } 376 377 void XMLDocumentParser::pauseParsing() 378 { 379 if (m_parsingFragment) 380 return; 381 382 m_parserPaused = true; 383 } 384 385 bool XMLDocumentParser::parseDocumentFragment(const String& chunk, DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission scriptingPermission) 386 { 387 if (!chunk.length()) 388 return true; 389 390 // FIXME: We need to implement the HTML5 XML Fragment parsing algorithm: 391 // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-xhtml-syntax.html#xml-fragment-parsing-algorithm 392 // For now we have a hack for script/style innerHTML support: 393 if (contextElement && (contextElement->hasLocalName(HTMLNames::scriptTag) || contextElement->hasLocalName(HTMLNames::styleTag))) { 394 fragment->parserAddChild(fragment->document()->createTextNode(chunk)); 395 return true; 396 } 397 398 RefPtr<XMLDocumentParser> parser = XMLDocumentParser::create(fragment, contextElement, scriptingPermission); 399 bool wellFormed = parser->appendFragmentSource(chunk); 400 // Do not call finish(). Current finish() and doEnd() implementations touch the main Document/loader 401 // and can cause crashes in the fragment case. 402 parser->detach(); // Allows ~DocumentParser to assert it was detached before destruction. 403 return wellFormed; // appendFragmentSource()'s wellFormed is more permissive than wellFormed(). 404 } 405 406 } // namespace WebCore 407