1 /* 2 * Copyright (C) 2000 Peter Kelly (pmk (at) post.com) 3 * Copyright (C) 2005, 2006, 2008, 2014 Apple Inc. All rights reserved. 4 * Copyright (C) 2006 Alexey Proskuryakov (ap (at) webkit.org) 5 * Copyright (C) 2007 Samuel Weinig (sam (at) webkit.org) 6 * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies) 7 * Copyright (C) 2008 Holger Hans Peter Freyther 8 * Copyright (C) 2008 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/) 9 * 10 * This library is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU Library General Public 12 * License as published by the Free Software Foundation; either 13 * version 2 of the License, or (at your option) any later version. 14 * 15 * This library is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 * Library General Public License for more details. 19 * 20 * You should have received a copy of the GNU Library General Public License 21 * along with this library; see the file COPYING.LIB. If not, write to 22 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 23 * Boston, MA 02110-1301, USA. 24 */ 25 26 #include "config.h" 27 #include "core/xml/parser/XMLDocumentParser.h" 28 29 #include "bindings/core/v8/ExceptionState.h" 30 #include "bindings/core/v8/ExceptionStatePlaceholder.h" 31 #include "bindings/core/v8/ScriptController.h" 32 #include "bindings/core/v8/ScriptSourceCode.h" 33 #include "bindings/core/v8/V8Document.h" 34 #include "core/FetchInitiatorTypeNames.h" 35 #include "core/HTMLNames.h" 36 #include "core/XMLNSNames.h" 37 #include "core/dom/CDATASection.h" 38 #include "core/dom/Comment.h" 39 #include "core/dom/Document.h" 40 #include "core/dom/DocumentFragment.h" 41 #include "core/dom/DocumentType.h" 42 #include "core/dom/ProcessingInstruction.h" 43 #include "core/dom/ScriptLoader.h" 44 #include "core/dom/TransformSource.h" 45 #include "core/fetch/ResourceFetcher.h" 46 #include "core/fetch/ScriptResource.h" 47 #include "core/frame/LocalFrame.h" 48 #include "core/frame/UseCounter.h" 49 #include "core/html/HTMLHtmlElement.h" 50 #include "core/html/HTMLTemplateElement.h" 51 #include "core/html/parser/HTMLEntityParser.h" 52 #include "core/html/parser/TextResourceDecoder.h" 53 #include "core/loader/FrameLoader.h" 54 #include "core/loader/ImageLoader.h" 55 #include "core/svg/graphics/SVGImage.h" 56 #include "core/xml/parser/SharedBufferReader.h" 57 #include "core/xml/parser/XMLDocumentParserScope.h" 58 #include "core/xml/parser/XMLParserInput.h" 59 #include "platform/RuntimeEnabledFeatures.h" 60 #include "platform/SharedBuffer.h" 61 #include "platform/TraceEvent.h" 62 #include "platform/network/ResourceError.h" 63 #include "platform/network/ResourceRequest.h" 64 #include "platform/network/ResourceResponse.h" 65 #include "platform/weborigin/SecurityOrigin.h" 66 #include "wtf/StringExtras.h" 67 #include "wtf/TemporaryChange.h" 68 #include "wtf/Threading.h" 69 #include "wtf/Vector.h" 70 #include "wtf/unicode/UTF8.h" 71 #include <libxml/catalog.h> 72 #include <libxml/parser.h> 73 #include <libxml/parserInternals.h> 74 #include <libxslt/xslt.h> 75 76 namespace blink { 77 78 using namespace HTMLNames; 79 80 // FIXME: HTMLConstructionSite has a limit of 512, should these match? 81 static const unsigned maxXMLTreeDepth = 5000; 82 83 static inline String toString(const xmlChar* string, size_t length) 84 { 85 return String::fromUTF8(reinterpret_cast<const char*>(string), length); 86 } 87 88 static inline String toString(const xmlChar* string) 89 { 90 return String::fromUTF8(reinterpret_cast<const char*>(string)); 91 } 92 93 static inline AtomicString toAtomicString(const xmlChar* string, size_t length) 94 { 95 return AtomicString::fromUTF8(reinterpret_cast<const char*>(string), length); 96 } 97 98 static inline AtomicString toAtomicString(const xmlChar* string) 99 { 100 return AtomicString::fromUTF8(reinterpret_cast<const char*>(string)); 101 } 102 103 static inline bool hasNoStyleInformation(Document* document) 104 { 105 if (document->sawElementsInKnownNamespaces() || document->transformSourceDocument()) 106 return false; 107 108 if (!document->frame() || !document->frame()->page()) 109 return false; 110 111 if (document->frame()->tree().parent()) 112 return false; // This document is not in a top frame 113 114 if (SVGImage::isInSVGImage(document)) 115 return false; 116 117 return true; 118 } 119 120 class PendingStartElementNSCallback FINAL : public XMLDocumentParser::PendingCallback { 121 public: 122 PendingStartElementNSCallback(const AtomicString& localName, const AtomicString& prefix, const AtomicString& uri, 123 int namespaceCount, const xmlChar** namespaces, int attributeCount, int defaultedCount, const xmlChar** attributes) 124 : m_localName(localName) 125 , m_prefix(prefix) 126 , m_uri(uri) 127 , m_namespaceCount(namespaceCount) 128 , m_attributeCount(attributeCount) 129 , m_defaultedCount(defaultedCount) 130 { 131 m_namespaces = static_cast<xmlChar**>(xmlMalloc(sizeof(xmlChar*) * namespaceCount * 2)); 132 for (int i = 0; i < namespaceCount * 2 ; ++i) 133 m_namespaces[i] = xmlStrdup(namespaces[i]); 134 m_attributes = static_cast<xmlChar**>(xmlMalloc(sizeof(xmlChar*) * attributeCount * 5)); 135 for (int i = 0; i < attributeCount; ++i) { 136 // Each attribute has 5 elements in the array: 137 // name, prefix, uri, value and an end pointer. 138 for (int j = 0; j < 3; ++j) 139 m_attributes[i * 5 + j] = xmlStrdup(attributes[i * 5 + j]); 140 int length = attributes[i * 5 + 4] - attributes[i * 5 + 3]; 141 m_attributes[i * 5 + 3] = xmlStrndup(attributes[i * 5 + 3], length); 142 m_attributes[i * 5 + 4] = m_attributes[i * 5 + 3] + length; 143 } 144 } 145 146 virtual ~PendingStartElementNSCallback() 147 { 148 for (int i = 0; i < m_namespaceCount * 2; ++i) 149 xmlFree(m_namespaces[i]); 150 xmlFree(m_namespaces); 151 for (int i = 0; i < m_attributeCount; ++i) 152 for (int j = 0; j < 4; ++j) 153 xmlFree(m_attributes[i * 5 + j]); 154 xmlFree(m_attributes); 155 } 156 157 virtual void call(XMLDocumentParser* parser) OVERRIDE 158 { 159 parser->startElementNs(m_localName, m_prefix, m_uri, 160 m_namespaceCount, const_cast<const xmlChar**>(m_namespaces), 161 m_attributeCount, m_defaultedCount, const_cast<const xmlChar**>(m_attributes)); 162 } 163 164 private: 165 AtomicString m_localName; 166 AtomicString m_prefix; 167 AtomicString m_uri; 168 int m_namespaceCount; 169 xmlChar** m_namespaces; 170 int m_attributeCount; 171 int m_defaultedCount; 172 xmlChar** m_attributes; 173 }; 174 175 class PendingEndElementNSCallback FINAL : public XMLDocumentParser::PendingCallback { 176 public: 177 virtual void call(XMLDocumentParser* parser) OVERRIDE 178 { 179 parser->endElementNs(); 180 } 181 }; 182 183 class PendingCharactersCallback FINAL : public XMLDocumentParser::PendingCallback { 184 public: 185 PendingCharactersCallback(const xmlChar* chars, int length) 186 : m_chars(xmlStrndup(chars, length)) 187 , m_length(length) 188 { 189 } 190 191 virtual ~PendingCharactersCallback() 192 { 193 xmlFree(m_chars); 194 } 195 196 virtual void call(XMLDocumentParser* parser) OVERRIDE 197 { 198 parser->characters(m_chars, m_length); 199 } 200 201 private: 202 xmlChar* m_chars; 203 int m_length; 204 }; 205 206 class PendingProcessingInstructionCallback FINAL : public XMLDocumentParser::PendingCallback { 207 public: 208 PendingProcessingInstructionCallback(const String& target, const String& data) 209 : m_target(target) 210 , m_data(data) 211 { 212 } 213 214 virtual void call(XMLDocumentParser* parser) OVERRIDE 215 { 216 parser->processingInstruction(m_target, m_data); 217 } 218 219 private: 220 String m_target; 221 String m_data; 222 }; 223 224 class PendingCDATABlockCallback FINAL : public XMLDocumentParser::PendingCallback { 225 public: 226 explicit PendingCDATABlockCallback(const String& text) : m_text(text) { } 227 228 virtual void call(XMLDocumentParser* parser) OVERRIDE 229 { 230 parser->cdataBlock(m_text); 231 } 232 233 private: 234 String m_text; 235 }; 236 237 class PendingCommentCallback FINAL : public XMLDocumentParser::PendingCallback { 238 public: 239 explicit PendingCommentCallback(const String& text) : m_text(text) { } 240 241 virtual void call(XMLDocumentParser* parser) OVERRIDE 242 { 243 parser->comment(m_text); 244 } 245 246 private: 247 String m_text; 248 }; 249 250 class PendingInternalSubsetCallback FINAL : public XMLDocumentParser::PendingCallback { 251 public: 252 PendingInternalSubsetCallback(const String& name, const String& externalID, const String& systemID) 253 : m_name(name) 254 , m_externalID(externalID) 255 , m_systemID(systemID) 256 { 257 } 258 259 virtual void call(XMLDocumentParser* parser) OVERRIDE 260 { 261 parser->internalSubset(m_name, m_externalID, m_systemID); 262 } 263 264 private: 265 String m_name; 266 String m_externalID; 267 String m_systemID; 268 }; 269 270 class PendingErrorCallback FINAL : public XMLDocumentParser::PendingCallback { 271 public: 272 PendingErrorCallback(XMLErrors::ErrorType type, const xmlChar* message, OrdinalNumber lineNumber, OrdinalNumber columnNumber) 273 : m_type(type) 274 , m_message(xmlStrdup(message)) 275 , m_lineNumber(lineNumber) 276 , m_columnNumber(columnNumber) 277 { 278 } 279 280 virtual ~PendingErrorCallback() 281 { 282 xmlFree(m_message); 283 } 284 285 virtual void call(XMLDocumentParser* parser) OVERRIDE 286 { 287 parser->handleError(m_type, reinterpret_cast<char*>(m_message), TextPosition(m_lineNumber, m_columnNumber)); 288 } 289 290 private: 291 XMLErrors::ErrorType m_type; 292 xmlChar* m_message; 293 OrdinalNumber m_lineNumber; 294 OrdinalNumber m_columnNumber; 295 }; 296 297 void XMLDocumentParser::pushCurrentNode(ContainerNode* n) 298 { 299 ASSERT(n); 300 ASSERT(m_currentNode); 301 #if !ENABLE(OILPAN) 302 if (n != document()) 303 n->ref(); 304 #endif 305 m_currentNodeStack.append(m_currentNode); 306 m_currentNode = n; 307 if (m_currentNodeStack.size() > maxXMLTreeDepth) 308 handleError(XMLErrors::ErrorTypeFatal, "Excessive node nesting.", textPosition()); 309 } 310 311 void XMLDocumentParser::popCurrentNode() 312 { 313 if (!m_currentNode) 314 return; 315 ASSERT(m_currentNodeStack.size()); 316 #if !ENABLE(OILPAN) 317 if (m_currentNode != document()) 318 m_currentNode->deref(); 319 #endif 320 m_currentNode = m_currentNodeStack.last(); 321 m_currentNodeStack.removeLast(); 322 } 323 324 void XMLDocumentParser::clearCurrentNodeStack() 325 { 326 #if !ENABLE(OILPAN) 327 if (m_currentNode && m_currentNode != document()) 328 m_currentNode->deref(); 329 #endif 330 m_currentNode = nullptr; 331 m_leafTextNode = nullptr; 332 333 if (m_currentNodeStack.size()) { // Aborted parsing. 334 #if !ENABLE(OILPAN) 335 for (size_t i = m_currentNodeStack.size() - 1; i != 0; --i) 336 m_currentNodeStack[i]->deref(); 337 if (m_currentNodeStack[0] && m_currentNodeStack[0] != document()) 338 m_currentNodeStack[0]->deref(); 339 #endif 340 m_currentNodeStack.clear(); 341 } 342 } 343 344 void XMLDocumentParser::insert(const SegmentedString&) 345 { 346 ASSERT_NOT_REACHED(); 347 } 348 349 void XMLDocumentParser::append(PassRefPtr<StringImpl> inputSource) 350 { 351 SegmentedString source(inputSource); 352 if (m_sawXSLTransform || !m_sawFirstElement) 353 m_originalSourceForTransform.append(source); 354 355 if (isStopped() || m_sawXSLTransform) 356 return; 357 358 if (m_parserPaused) { 359 m_pendingSrc.append(source); 360 return; 361 } 362 363 // JavaScript can detach the parser. Make sure this is not released 364 // before the end of this method. 365 RefPtrWillBeRawPtr<XMLDocumentParser> protect(this); 366 367 doWrite(source.toString()); 368 } 369 370 void XMLDocumentParser::handleError(XMLErrors::ErrorType type, const char* formattedMessage, TextPosition position) 371 { 372 m_xmlErrors.handleError(type, formattedMessage, position); 373 if (type != XMLErrors::ErrorTypeWarning) 374 m_sawError = true; 375 if (type == XMLErrors::ErrorTypeFatal) 376 stopParsing(); 377 } 378 379 void XMLDocumentParser::enterText() 380 { 381 ASSERT(m_bufferedText.size() == 0); 382 ASSERT(!m_leafTextNode); 383 m_leafTextNode = Text::create(m_currentNode->document(), ""); 384 m_currentNode->parserAppendChild(m_leafTextNode.get()); 385 } 386 387 void XMLDocumentParser::exitText() 388 { 389 if (isStopped()) 390 return; 391 392 if (!m_leafTextNode) 393 return; 394 395 m_leafTextNode->appendData(toString(m_bufferedText.data(), m_bufferedText.size())); 396 m_bufferedText.clear(); 397 m_leafTextNode = nullptr; 398 } 399 400 void XMLDocumentParser::detach() 401 { 402 clearCurrentNodeStack(); 403 ScriptableDocumentParser::detach(); 404 } 405 406 void XMLDocumentParser::end() 407 { 408 TRACE_EVENT0("blink", "XMLDocumentParser::end"); 409 // XMLDocumentParserLibxml2 will do bad things to the document if doEnd() is called. 410 // I don't believe XMLDocumentParserQt needs doEnd called in the fragment case. 411 ASSERT(!m_parsingFragment); 412 413 doEnd(); 414 415 // doEnd() call above can detach the parser and null out its document. 416 // In that case, we just bail out. 417 if (isDetached()) 418 return; 419 420 // doEnd() could process a script tag, thus pausing parsing. 421 if (m_parserPaused) 422 return; 423 424 if (m_sawError) { 425 insertErrorMessageBlock(); 426 } else { 427 exitText(); 428 document()->styleResolverChanged(); 429 } 430 431 if (isParsing()) 432 prepareToStopParsing(); 433 document()->setReadyState(Document::Interactive); 434 clearCurrentNodeStack(); 435 document()->finishedParsing(); 436 } 437 438 void XMLDocumentParser::finish() 439 { 440 // FIXME: We should ASSERT(!m_parserStopped) here, since it does not 441 // makes sense to call any methods on DocumentParser once it's been stopped. 442 // However, FrameLoader::stop calls DocumentParser::finish unconditionally. 443 444 // flush may ending up executing arbitrary script, and possibly detach the parser. 445 RefPtrWillBeRawPtr<XMLDocumentParser> protect(this); 446 flush(); 447 if (isDetached()) 448 return; 449 450 if (m_parserPaused) 451 m_finishCalled = true; 452 else 453 end(); 454 } 455 456 void XMLDocumentParser::insertErrorMessageBlock() 457 { 458 m_xmlErrors.insertErrorMessageBlock(); 459 } 460 461 void XMLDocumentParser::notifyFinished(Resource* unusedResource) 462 { 463 ASSERT_UNUSED(unusedResource, unusedResource == m_pendingScript); 464 465 ScriptSourceCode sourceCode(m_pendingScript.get()); 466 bool errorOccurred = m_pendingScript->errorOccurred(); 467 bool wasCanceled = m_pendingScript->wasCanceled(); 468 469 m_pendingScript->removeClient(this); 470 m_pendingScript = 0; 471 472 RefPtrWillBeRawPtr<Element> e = m_scriptElement; 473 m_scriptElement = nullptr; 474 475 ScriptLoader* scriptLoader = toScriptLoaderIfPossible(e.get()); 476 ASSERT(scriptLoader); 477 478 // JavaScript can detach this parser, make sure it's kept alive even if 479 // detached. 480 RefPtrWillBeRawPtr<XMLDocumentParser> protect(this); 481 482 if (errorOccurred) { 483 scriptLoader->dispatchErrorEvent(); 484 } else if (!wasCanceled) { 485 scriptLoader->executeScript(sourceCode); 486 scriptLoader->dispatchLoadEvent(); 487 } 488 489 m_scriptElement = nullptr; 490 491 if (!isDetached() && !m_requestingScript) 492 resumeParsing(); 493 } 494 495 bool XMLDocumentParser::isWaitingForScripts() const 496 { 497 return m_pendingScript; 498 } 499 500 void XMLDocumentParser::pauseParsing() 501 { 502 if (!m_parsingFragment) 503 m_parserPaused = true; 504 } 505 506 bool XMLDocumentParser::parseDocumentFragment(const String& chunk, DocumentFragment* fragment, Element* contextElement, ParserContentPolicy parserContentPolicy) 507 { 508 if (!chunk.length()) 509 return true; 510 511 // FIXME: We need to implement the HTML5 XML Fragment parsing algorithm: 512 // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-xhtml-syntax.html#xml-fragment-parsing-algorithm 513 // For now we have a hack for script/style innerHTML support: 514 if (contextElement && (contextElement->hasLocalName(scriptTag.localName()) || contextElement->hasLocalName(styleTag.localName()))) { 515 fragment->parserAppendChild(fragment->document().createTextNode(chunk)); 516 return true; 517 } 518 519 RefPtrWillBeRawPtr<XMLDocumentParser> parser = XMLDocumentParser::create(fragment, contextElement, parserContentPolicy); 520 bool wellFormed = parser->appendFragmentSource(chunk); 521 522 // Do not call finish(). Current finish() and doEnd() implementations touch 523 // the main Document/loader and can cause crashes in the fragment case. 524 525 // Allows ~DocumentParser to assert it was detached before destruction. 526 parser->detach(); 527 // appendFragmentSource()'s wellFormed is more permissive than wellFormed(). 528 return wellFormed; 529 } 530 531 static int globalDescriptor = 0; 532 static ThreadIdentifier libxmlLoaderThread = 0; 533 534 static int matchFunc(const char*) 535 { 536 // Only match loads initiated due to uses of libxml2 from within 537 // XMLDocumentParser to avoid interfering with client applications that also 538 // use libxml2. http://bugs.webkit.org/show_bug.cgi?id=17353 539 return XMLDocumentParserScope::currentFetcher && currentThread() == libxmlLoaderThread; 540 } 541 542 static inline void setAttributes(Element* element, Vector<Attribute>& attributeVector, ParserContentPolicy parserContentPolicy) 543 { 544 if (!scriptingContentIsAllowed(parserContentPolicy)) 545 element->stripScriptingAttributes(attributeVector); 546 element->parserSetAttributes(attributeVector); 547 } 548 549 static void switchEncoding(xmlParserCtxtPtr ctxt, bool is8Bit) 550 { 551 // Hack around libxml2's lack of encoding overide support by manually 552 // resetting the encoding to UTF-16 before every chunk. Otherwise libxml 553 // will detect <?xml version="1.0" encoding="<encoding name>"?> blocks and 554 // switch encodings, causing the parse to fail. 555 if (is8Bit) { 556 xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_8859_1); 557 return; 558 } 559 560 const UChar BOM = 0xFEFF; 561 const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char*>(&BOM); 562 xmlSwitchEncoding(ctxt, BOMHighByte == 0xFF ? XML_CHAR_ENCODING_UTF16LE : XML_CHAR_ENCODING_UTF16BE); 563 } 564 565 static void parseChunk(xmlParserCtxtPtr ctxt, const String& chunk) 566 { 567 bool is8Bit = chunk.is8Bit(); 568 switchEncoding(ctxt, is8Bit); 569 if (is8Bit) 570 xmlParseChunk(ctxt, reinterpret_cast<const char*>(chunk.characters8()), sizeof(LChar) * chunk.length(), 0); 571 else 572 xmlParseChunk(ctxt, reinterpret_cast<const char*>(chunk.characters16()), sizeof(UChar) * chunk.length(), 0); 573 } 574 575 static void finishParsing(xmlParserCtxtPtr ctxt) 576 { 577 xmlParseChunk(ctxt, 0, 0, 1); 578 } 579 580 #define xmlParseChunk #error "Use parseChunk instead to select the correct encoding." 581 582 static bool isLibxmlDefaultCatalogFile(const String& urlString) 583 { 584 // On non-Windows platforms libxml asks for this URL, the 585 // "XML_XML_DEFAULT_CATALOG", on initialization. 586 if (urlString == "file:///etc/xml/catalog") 587 return true; 588 589 // On Windows, libxml computes a URL relative to where its DLL resides. 590 if (urlString.startsWith("file:///", false) && urlString.endsWith("/etc/catalog", false)) 591 return true; 592 return false; 593 } 594 595 static bool shouldAllowExternalLoad(const KURL& url) 596 { 597 String urlString = url.string(); 598 599 // This isn't really necessary now that initializeLibXMLIfNecessary 600 // disables catalog support in libxml, but keeping it for defense in depth. 601 if (isLibxmlDefaultCatalogFile(url)) 602 return false; 603 604 // The most common DTD. There isn't much point in hammering www.w3c.org by 605 // requesting this URL for every XHTML document. 606 if (urlString.startsWith("http://www.w3.org/TR/xhtml", false)) 607 return false; 608 609 // Similarly, there isn't much point in requesting the SVG DTD. 610 if (urlString.startsWith("http://www.w3.org/Graphics/SVG", false)) 611 return false; 612 613 // The libxml doesn't give us a lot of context for deciding whether to allow 614 // this request. In the worst case, this load could be for an external 615 // entity and the resulting document could simply read the retrieved 616 // content. If we had more context, we could potentially allow the parser to 617 // load a DTD. As things stand, we take the conservative route and allow 618 // same-origin requests only. 619 if (!XMLDocumentParserScope::currentFetcher->document()->securityOrigin()->canRequest(url)) { 620 XMLDocumentParserScope::currentFetcher->printAccessDeniedMessage(url); 621 return false; 622 } 623 624 return true; 625 } 626 627 static void* openFunc(const char* uri) 628 { 629 ASSERT(XMLDocumentParserScope::currentFetcher); 630 ASSERT(currentThread() == libxmlLoaderThread); 631 632 KURL url(KURL(), uri); 633 634 if (!shouldAllowExternalLoad(url)) 635 return &globalDescriptor; 636 637 KURL finalURL; 638 RefPtr<SharedBuffer> data; 639 640 { 641 ResourceFetcher* fetcher = XMLDocumentParserScope::currentFetcher; 642 XMLDocumentParserScope scope(0); 643 // FIXME: We should restore the original global error handler as well. 644 645 if (fetcher->frame()) { 646 FetchRequest request(ResourceRequest(url), FetchInitiatorTypeNames::xml, ResourceFetcher::defaultResourceOptions()); 647 ResourcePtr<Resource> resource = fetcher->fetchSynchronously(request); 648 if (resource && !resource->errorOccurred()) { 649 data = resource->resourceBuffer(); 650 finalURL = resource->response().url(); 651 } 652 } 653 } 654 655 // We have to check the URL again after the load to catch redirects. 656 // See <https://bugs.webkit.org/show_bug.cgi?id=21963>. 657 if (!shouldAllowExternalLoad(finalURL)) 658 return &globalDescriptor; 659 660 UseCounter::count(XMLDocumentParserScope::currentFetcher->document(), UseCounter::XMLExternalResourceLoad); 661 662 return new SharedBufferReader(data); 663 } 664 665 static int readFunc(void* context, char* buffer, int len) 666 { 667 // Do 0-byte reads in case of a null descriptor 668 if (context == &globalDescriptor) 669 return 0; 670 671 SharedBufferReader* data = static_cast<SharedBufferReader*>(context); 672 return data->readData(buffer, len); 673 } 674 675 static int writeFunc(void*, const char*, int) 676 { 677 // Always just do 0-byte writes 678 return 0; 679 } 680 681 static int closeFunc(void* context) 682 { 683 if (context != &globalDescriptor) { 684 SharedBufferReader* data = static_cast<SharedBufferReader*>(context); 685 delete data; 686 } 687 return 0; 688 } 689 690 static void errorFunc(void*, const char*, ...) 691 { 692 // FIXME: It would be nice to display error messages somewhere. 693 } 694 695 static void initializeLibXMLIfNecessary() 696 { 697 static bool didInit = false; 698 if (didInit) 699 return; 700 701 // We don't want libxml to try and load catalogs. 702 // FIXME: It's not nice to set global settings in libxml, embedders of Blink 703 // could be trying to use libxml themselves. 704 xmlCatalogSetDefaults(XML_CATA_ALLOW_NONE); 705 xmlInitParser(); 706 xmlRegisterInputCallbacks(matchFunc, openFunc, readFunc, closeFunc); 707 xmlRegisterOutputCallbacks(matchFunc, openFunc, writeFunc, closeFunc); 708 libxmlLoaderThread = currentThread(); 709 didInit = true; 710 } 711 712 713 PassRefPtr<XMLParserContext> XMLParserContext::createStringParser(xmlSAXHandlerPtr handlers, void* userData) 714 { 715 initializeLibXMLIfNecessary(); 716 xmlParserCtxtPtr parser = xmlCreatePushParserCtxt(handlers, 0, 0, 0, 0); 717 parser->_private = userData; 718 parser->replaceEntities = true; 719 return adoptRef(new XMLParserContext(parser)); 720 } 721 722 // Chunk should be encoded in UTF-8 723 PassRefPtr<XMLParserContext> XMLParserContext::createMemoryParser(xmlSAXHandlerPtr handlers, void* userData, const CString& chunk) 724 { 725 initializeLibXMLIfNecessary(); 726 727 // appendFragmentSource() checks that the length doesn't overflow an int. 728 xmlParserCtxtPtr parser = xmlCreateMemoryParserCtxt(chunk.data(), chunk.length()); 729 730 if (!parser) 731 return nullptr; 732 733 // Copy the sax handler 734 memcpy(parser->sax, handlers, sizeof(xmlSAXHandler)); 735 736 // Set parser options. 737 // XML_PARSE_NODICT: default dictionary option. 738 // XML_PARSE_NOENT: force entities substitutions. 739 xmlCtxtUseOptions(parser, XML_PARSE_NODICT | XML_PARSE_NOENT); 740 741 // Internal initialization 742 parser->sax2 = 1; 743 parser->instate = XML_PARSER_CONTENT; // We are parsing a CONTENT 744 parser->depth = 0; 745 parser->str_xml = xmlDictLookup(parser->dict, BAD_CAST "xml", 3); 746 parser->str_xmlns = xmlDictLookup(parser->dict, BAD_CAST "xmlns", 5); 747 parser->str_xml_ns = xmlDictLookup(parser->dict, XML_XML_NAMESPACE, 36); 748 parser->_private = userData; 749 750 return adoptRef(new XMLParserContext(parser)); 751 } 752 753 // -------------------------------- 754 755 bool XMLDocumentParser::supportsXMLVersion(const String& version) 756 { 757 return version == "1.0"; 758 } 759 760 XMLDocumentParser::XMLDocumentParser(Document& document, FrameView* frameView) 761 : ScriptableDocumentParser(document) 762 , m_hasView(frameView) 763 , m_context(nullptr) 764 , m_currentNode(&document) 765 , m_isCurrentlyParsing8BitChunk(false) 766 , m_sawError(false) 767 , m_sawCSS(false) 768 , m_sawXSLTransform(false) 769 , m_sawFirstElement(false) 770 , m_isXHTMLDocument(false) 771 , m_parserPaused(false) 772 , m_requestingScript(false) 773 , m_finishCalled(false) 774 , m_xmlErrors(&document) 775 , m_pendingScript(0) 776 , m_scriptStartPosition(TextPosition::belowRangePosition()) 777 , m_parsingFragment(false) 778 { 779 // This is XML being used as a document resource. 780 if (frameView && document.isXMLDocument()) 781 UseCounter::count(document, UseCounter::XMLDocument); 782 } 783 784 XMLDocumentParser::XMLDocumentParser(DocumentFragment* fragment, Element* parentElement, ParserContentPolicy parserContentPolicy) 785 : ScriptableDocumentParser(fragment->document(), parserContentPolicy) 786 , m_hasView(false) 787 , m_context(nullptr) 788 , m_currentNode(fragment) 789 , m_isCurrentlyParsing8BitChunk(false) 790 , m_sawError(false) 791 , m_sawCSS(false) 792 , m_sawXSLTransform(false) 793 , m_sawFirstElement(false) 794 , m_isXHTMLDocument(false) 795 , m_parserPaused(false) 796 , m_requestingScript(false) 797 , m_finishCalled(false) 798 , m_xmlErrors(&fragment->document()) 799 , m_pendingScript(0) 800 , m_scriptStartPosition(TextPosition::belowRangePosition()) 801 , m_parsingFragment(true) 802 { 803 #if !ENABLE(OILPAN) 804 fragment->ref(); 805 #endif 806 807 // Add namespaces based on the parent node 808 WillBeHeapVector<RawPtrWillBeMember<Element> > elemStack; 809 while (parentElement) { 810 elemStack.append(parentElement); 811 812 Element* grandParentElement = parentElement->parentElement(); 813 if (!grandParentElement) 814 break; 815 parentElement = grandParentElement; 816 } 817 818 if (elemStack.isEmpty()) 819 return; 820 821 for (; !elemStack.isEmpty(); elemStack.removeLast()) { 822 Element* element = elemStack.last(); 823 AttributeCollection attributes = element->attributes(); 824 AttributeCollection::iterator end = attributes.end(); 825 for (AttributeCollection::iterator it = attributes.begin(); it != end; ++it) { 826 if (it->localName() == xmlnsAtom) 827 m_defaultNamespaceURI = it->value(); 828 else if (it->prefix() == xmlnsAtom) 829 m_prefixToNamespaceMap.set(it->localName(), it->value()); 830 } 831 } 832 833 // If the parent element is not in document tree, there may be no xmlns attribute; just default to the parent's namespace. 834 if (m_defaultNamespaceURI.isNull() && !parentElement->inDocument()) 835 m_defaultNamespaceURI = parentElement->namespaceURI(); 836 } 837 838 XMLParserContext::~XMLParserContext() 839 { 840 if (m_context->myDoc) 841 xmlFreeDoc(m_context->myDoc); 842 xmlFreeParserCtxt(m_context); 843 } 844 845 XMLDocumentParser::~XMLDocumentParser() 846 { 847 #if !ENABLE(OILPAN) 848 // The XMLDocumentParser will always be detached before being destroyed. 849 ASSERT(m_currentNodeStack.isEmpty()); 850 ASSERT(!m_currentNode); 851 #endif 852 853 // FIXME: m_pendingScript handling should be moved into XMLDocumentParser.cpp! 854 if (m_pendingScript) 855 m_pendingScript->removeClient(this); 856 } 857 858 void XMLDocumentParser::trace(Visitor* visitor) 859 { 860 visitor->trace(m_currentNode); 861 #if ENABLE(OILPAN) 862 visitor->trace(m_currentNodeStack); 863 #endif 864 visitor->trace(m_leafTextNode); 865 visitor->trace(m_xmlErrors); 866 visitor->trace(m_scriptElement); 867 ScriptableDocumentParser::trace(visitor); 868 } 869 870 void XMLDocumentParser::doWrite(const String& parseString) 871 { 872 TRACE_EVENT0("blink", "XMLDocumentParser::doWrite"); 873 ASSERT(!isDetached()); 874 if (!m_context) 875 initializeParserContext(); 876 877 // Protect the libxml context from deletion during a callback 878 RefPtr<XMLParserContext> context = m_context; 879 880 // libXML throws an error if you try to switch the encoding for an empty 881 // string. 882 if (parseString.length()) { 883 // JavaScript may cause the parser to detach during parseChunk 884 // keep this alive until this function is done. 885 RefPtrWillBeRawPtr<XMLDocumentParser> protect(this); 886 887 XMLDocumentParserScope scope(document()->fetcher()); 888 TemporaryChange<bool> encodingScope(m_isCurrentlyParsing8BitChunk, parseString.is8Bit()); 889 parseChunk(context->context(), parseString); 890 891 // JavaScript (which may be run under the parseChunk callstack) may 892 // cause the parser to be stopped or detached. 893 if (isStopped()) 894 return; 895 } 896 897 // FIXME: Why is this here? And why is it after we process the passed 898 // source? 899 if (document()->sawDecodingError()) { 900 // If the decoder saw an error, report it as fatal (stops parsing) 901 TextPosition position(OrdinalNumber::fromOneBasedInt(context->context()->input->line), OrdinalNumber::fromOneBasedInt(context->context()->input->col)); 902 handleError(XMLErrors::ErrorTypeFatal, "Encoding error", position); 903 } 904 } 905 906 struct xmlSAX2Namespace { 907 const xmlChar* prefix; 908 const xmlChar* uri; 909 }; 910 911 static inline void handleNamespaceAttributes(Vector<Attribute>& prefixedAttributes, const xmlChar** libxmlNamespaces, int nbNamespaces, ExceptionState& exceptionState) 912 { 913 xmlSAX2Namespace* namespaces = reinterpret_cast<xmlSAX2Namespace*>(libxmlNamespaces); 914 for (int i = 0; i < nbNamespaces; ++i) { 915 AtomicString namespaceQName = xmlnsAtom; 916 AtomicString namespaceURI = toAtomicString(namespaces[i].uri); 917 if (namespaces[i].prefix) 918 namespaceQName = WTF::xmlnsWithColon + namespaces[i].prefix; 919 920 QualifiedName parsedName = anyName; 921 if (!Element::parseAttributeName(parsedName, XMLNSNames::xmlnsNamespaceURI, namespaceQName, exceptionState)) 922 return; 923 924 prefixedAttributes.append(Attribute(parsedName, namespaceURI)); 925 } 926 } 927 928 struct xmlSAX2Attributes { 929 const xmlChar* localname; 930 const xmlChar* prefix; 931 const xmlChar* uri; 932 const xmlChar* value; 933 const xmlChar* end; 934 }; 935 936 static inline void handleElementAttributes(Vector<Attribute>& prefixedAttributes, const xmlChar** libxmlAttributes, int nbAttributes, ExceptionState& exceptionState) 937 { 938 xmlSAX2Attributes* attributes = reinterpret_cast<xmlSAX2Attributes*>(libxmlAttributes); 939 for (int i = 0; i < nbAttributes; ++i) { 940 int valueLength = static_cast<int>(attributes[i].end - attributes[i].value); 941 AtomicString attrValue = toAtomicString(attributes[i].value, valueLength); 942 String attrPrefix = toString(attributes[i].prefix); 943 AtomicString attrURI = attrPrefix.isEmpty() ? AtomicString() : toAtomicString(attributes[i].uri); 944 AtomicString attrQName = attrPrefix.isEmpty() ? toAtomicString(attributes[i].localname) : attrPrefix + ":" + toString(attributes[i].localname); 945 946 QualifiedName parsedName = anyName; 947 if (!Element::parseAttributeName(parsedName, attrURI, attrQName, exceptionState)) 948 return; 949 950 prefixedAttributes.append(Attribute(parsedName, attrValue)); 951 } 952 } 953 954 void XMLDocumentParser::startElementNs(const AtomicString& localName, const AtomicString& prefix, const AtomicString& uri, int nbNamespaces, 955 const xmlChar** libxmlNamespaces, int nbAttributes, int nbDefaulted, const xmlChar** libxmlAttributes) 956 { 957 if (isStopped()) 958 return; 959 960 if (m_parserPaused) { 961 m_pendingCallbacks.append(adoptPtr(new PendingStartElementNSCallback(localName, prefix, uri, nbNamespaces, libxmlNamespaces, 962 nbAttributes, nbDefaulted, libxmlAttributes))); 963 return; 964 } 965 966 exitText(); 967 968 AtomicString adjustedURI = uri; 969 if (m_parsingFragment && adjustedURI.isNull()) { 970 if (!prefix.isNull()) 971 adjustedURI = m_prefixToNamespaceMap.get(prefix); 972 else 973 adjustedURI = m_defaultNamespaceURI; 974 } 975 976 bool isFirstElement = !m_sawFirstElement; 977 m_sawFirstElement = true; 978 979 QualifiedName qName(prefix, localName, adjustedURI); 980 RefPtrWillBeRawPtr<Element> newElement = m_currentNode->document().createElement(qName, true); 981 if (!newElement) { 982 stopParsing(); 983 return; 984 } 985 986 Vector<Attribute> prefixedAttributes; 987 TrackExceptionState exceptionState; 988 handleNamespaceAttributes(prefixedAttributes, libxmlNamespaces, nbNamespaces, exceptionState); 989 if (exceptionState.hadException()) { 990 setAttributes(newElement.get(), prefixedAttributes, parserContentPolicy()); 991 stopParsing(); 992 return; 993 } 994 995 handleElementAttributes(prefixedAttributes, libxmlAttributes, nbAttributes, exceptionState); 996 setAttributes(newElement.get(), prefixedAttributes, parserContentPolicy()); 997 if (exceptionState.hadException()) { 998 stopParsing(); 999 return; 1000 } 1001 1002 newElement->beginParsingChildren(); 1003 1004 ScriptLoader* scriptLoader = toScriptLoaderIfPossible(newElement.get()); 1005 if (scriptLoader) 1006 m_scriptStartPosition = textPosition(); 1007 1008 m_currentNode->parserAppendChild(newElement.get()); 1009 1010 // Event handlers may synchronously trigger removal of the 1011 // document and cancellation of this parser. 1012 if (isStopped()) { 1013 stopParsing(); 1014 return; 1015 } 1016 1017 if (isHTMLTemplateElement(*newElement)) 1018 pushCurrentNode(toHTMLTemplateElement(*newElement).content()); 1019 else 1020 pushCurrentNode(newElement.get()); 1021 1022 if (isHTMLHtmlElement(*newElement)) 1023 toHTMLHtmlElement(*newElement).insertedByParser(); 1024 1025 if (!m_parsingFragment && isFirstElement && document()->frame()) 1026 document()->frame()->loader().dispatchDocumentElementAvailable(); 1027 } 1028 1029 void XMLDocumentParser::endElementNs() 1030 { 1031 if (isStopped()) 1032 return; 1033 1034 if (m_parserPaused) { 1035 m_pendingCallbacks.append(adoptPtr(new PendingEndElementNSCallback())); 1036 return; 1037 } 1038 1039 // JavaScript can detach the parser. Make sure this is not released before 1040 // the end of this method. 1041 RefPtrWillBeRawPtr<XMLDocumentParser> protect(this); 1042 1043 exitText(); 1044 1045 RefPtrWillBeRawPtr<ContainerNode> n = m_currentNode; 1046 if (m_currentNode->isElementNode()) 1047 toElement(n.get())->finishParsingChildren(); 1048 1049 if (!scriptingContentIsAllowed(parserContentPolicy()) && n->isElementNode() && toScriptLoaderIfPossible(toElement(n))) { 1050 popCurrentNode(); 1051 n->remove(IGNORE_EXCEPTION); 1052 return; 1053 } 1054 1055 if (!n->isElementNode() || !m_hasView) { 1056 popCurrentNode(); 1057 return; 1058 } 1059 1060 Element* element = toElement(n); 1061 1062 // The element's parent may have already been removed from document. 1063 // Parsing continues in this case, but scripts aren't executed. 1064 if (!element->inDocument()) { 1065 popCurrentNode(); 1066 return; 1067 } 1068 1069 ScriptLoader* scriptLoader = toScriptLoaderIfPossible(element); 1070 if (!scriptLoader) { 1071 popCurrentNode(); 1072 return; 1073 } 1074 1075 // Don't load external scripts for standalone documents (for now). 1076 ASSERT(!m_pendingScript); 1077 m_requestingScript = true; 1078 1079 if (scriptLoader->prepareScript(m_scriptStartPosition, ScriptLoader::AllowLegacyTypeInTypeAttribute)) { 1080 // FIXME: Script execution should be shared between 1081 // the libxml2 and Qt XMLDocumentParser implementations. 1082 1083 if (scriptLoader->readyToBeParserExecuted()) { 1084 scriptLoader->executeScript(ScriptSourceCode(scriptLoader->scriptContent(), document()->url(), m_scriptStartPosition)); 1085 } else if (scriptLoader->willBeParserExecuted()) { 1086 m_pendingScript = scriptLoader->resource(); 1087 m_scriptElement = element; 1088 m_pendingScript->addClient(this); 1089 1090 // m_pendingScript will be 0 if script was already loaded and 1091 // addClient() executed it. 1092 if (m_pendingScript) 1093 pauseParsing(); 1094 } else { 1095 m_scriptElement = nullptr; 1096 } 1097 1098 // JavaScript may have detached the parser 1099 if (isDetached()) 1100 return; 1101 } 1102 m_requestingScript = false; 1103 popCurrentNode(); 1104 } 1105 1106 void XMLDocumentParser::characters(const xmlChar* chars, int length) 1107 { 1108 if (isStopped()) 1109 return; 1110 1111 if (m_parserPaused) { 1112 m_pendingCallbacks.append(adoptPtr(new PendingCharactersCallback(chars, length))); 1113 return; 1114 } 1115 1116 if (!m_leafTextNode) 1117 enterText(); 1118 m_bufferedText.append(chars, length); 1119 } 1120 1121 void XMLDocumentParser::error(XMLErrors::ErrorType type, const char* message, va_list args) 1122 { 1123 if (isStopped()) 1124 return; 1125 1126 char formattedMessage[1024]; 1127 vsnprintf(formattedMessage, sizeof(formattedMessage) - 1, message, args); 1128 1129 if (m_parserPaused) { 1130 m_pendingCallbacks.append(adoptPtr(new PendingErrorCallback(type, reinterpret_cast<const xmlChar*>(formattedMessage), lineNumber(), columnNumber()))); 1131 return; 1132 } 1133 1134 handleError(type, formattedMessage, textPosition()); 1135 } 1136 1137 void XMLDocumentParser::processingInstruction(const String& target, const String& data) 1138 { 1139 if (isStopped()) 1140 return; 1141 1142 if (m_parserPaused) { 1143 m_pendingCallbacks.append(adoptPtr(new PendingProcessingInstructionCallback(target, data))); 1144 return; 1145 } 1146 1147 exitText(); 1148 1149 // ### handle exceptions 1150 TrackExceptionState exceptionState; 1151 RefPtrWillBeRawPtr<ProcessingInstruction> pi = m_currentNode->document().createProcessingInstruction(target, data, exceptionState); 1152 if (exceptionState.hadException()) 1153 return; 1154 1155 pi->setCreatedByParser(true); 1156 1157 m_currentNode->parserAppendChild(pi.get()); 1158 1159 pi->setCreatedByParser(false); 1160 1161 if (pi->isCSS()) 1162 m_sawCSS = true; 1163 1164 if (!RuntimeEnabledFeatures::xsltEnabled()) 1165 return; 1166 1167 m_sawXSLTransform = !m_sawFirstElement && pi->isXSL(); 1168 if (m_sawXSLTransform && !document()->transformSourceDocument()) { 1169 // This behavior is very tricky. We call stopParsing() here because we 1170 // want to stop processing the document until we're ready to apply the 1171 // transform, but we actually still want to be fed decoded string pieces 1172 // to accumulate in m_originalSourceForTransform. So, we call 1173 // stopParsing() here and check isStopped() in element callbacks. 1174 // FIXME: This contradicts the contract of DocumentParser. 1175 stopParsing(); 1176 } 1177 } 1178 1179 void XMLDocumentParser::cdataBlock(const String& text) 1180 { 1181 if (isStopped()) 1182 return; 1183 1184 if (m_parserPaused) { 1185 m_pendingCallbacks.append(adoptPtr(new PendingCDATABlockCallback(text))); 1186 return; 1187 } 1188 1189 exitText(); 1190 1191 m_currentNode->parserAppendChild(CDATASection::create(m_currentNode->document(), text)); 1192 } 1193 1194 void XMLDocumentParser::comment(const String& text) 1195 { 1196 if (isStopped()) 1197 return; 1198 1199 if (m_parserPaused) { 1200 m_pendingCallbacks.append(adoptPtr(new PendingCommentCallback(text))); 1201 return; 1202 } 1203 1204 exitText(); 1205 1206 m_currentNode->parserAppendChild(Comment::create(m_currentNode->document(), text)); 1207 } 1208 1209 enum StandaloneInfo { 1210 StandaloneUnspecified = -2, 1211 NoXMlDeclaration, 1212 StandaloneNo, 1213 StandaloneYes 1214 }; 1215 1216 void XMLDocumentParser::startDocument(const String& version, const String& encoding, int standalone) 1217 { 1218 StandaloneInfo standaloneInfo = static_cast<StandaloneInfo>(standalone); 1219 if (standaloneInfo == NoXMlDeclaration) { 1220 document()->setHasXMLDeclaration(false); 1221 return; 1222 } 1223 1224 if (!version.isNull()) 1225 document()->setXMLVersion(version, ASSERT_NO_EXCEPTION); 1226 if (standalone != StandaloneUnspecified) 1227 document()->setXMLStandalone(standaloneInfo == StandaloneYes, ASSERT_NO_EXCEPTION); 1228 if (!encoding.isNull()) 1229 document()->setXMLEncoding(encoding); 1230 document()->setHasXMLDeclaration(true); 1231 } 1232 1233 void XMLDocumentParser::endDocument() 1234 { 1235 exitText(); 1236 } 1237 1238 void XMLDocumentParser::internalSubset(const String& name, const String& externalID, const String& systemID) 1239 { 1240 if (isStopped()) 1241 return; 1242 1243 if (m_parserPaused) { 1244 m_pendingCallbacks.append(adoptPtr(new PendingInternalSubsetCallback(name, externalID, systemID))); 1245 return; 1246 } 1247 1248 if (document()) 1249 document()->parserAppendChild(DocumentType::create(document(), name, externalID, systemID)); 1250 } 1251 1252 static inline XMLDocumentParser* getParser(void* closure) 1253 { 1254 xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure); 1255 return static_cast<XMLDocumentParser*>(ctxt->_private); 1256 } 1257 1258 static void startElementNsHandler(void* closure, const xmlChar* localName, const xmlChar* prefix, const xmlChar* uri, int nbNamespaces, const xmlChar** namespaces, int nbAttributes, int nbDefaulted, const xmlChar** libxmlAttributes) 1259 { 1260 getParser(closure)->startElementNs(toAtomicString(localName), toAtomicString(prefix), toAtomicString(uri), nbNamespaces, namespaces, nbAttributes, nbDefaulted, libxmlAttributes); 1261 } 1262 1263 static void endElementNsHandler(void* closure, const xmlChar*, const xmlChar*, const xmlChar*) 1264 { 1265 getParser(closure)->endElementNs(); 1266 } 1267 1268 static void charactersHandler(void* closure, const xmlChar* chars, int length) 1269 { 1270 getParser(closure)->characters(chars, length); 1271 } 1272 1273 static void processingInstructionHandler(void* closure, const xmlChar* target, const xmlChar* data) 1274 { 1275 getParser(closure)->processingInstruction(toString(target), toString(data)); 1276 } 1277 1278 static void cdataBlockHandler(void* closure, const xmlChar* text, int length) 1279 { 1280 getParser(closure)->cdataBlock(toString(text, length)); 1281 } 1282 1283 static void commentHandler(void* closure, const xmlChar* text) 1284 { 1285 getParser(closure)->comment(toString(text)); 1286 } 1287 1288 WTF_ATTRIBUTE_PRINTF(2, 3) 1289 static void warningHandler(void* closure, const char* message, ...) 1290 { 1291 va_list args; 1292 va_start(args, message); 1293 getParser(closure)->error(XMLErrors::ErrorTypeWarning, message, args); 1294 va_end(args); 1295 } 1296 1297 WTF_ATTRIBUTE_PRINTF(2, 3) 1298 static void fatalErrorHandler(void* closure, const char* message, ...) 1299 { 1300 va_list args; 1301 va_start(args, message); 1302 getParser(closure)->error(XMLErrors::ErrorTypeFatal, message, args); 1303 va_end(args); 1304 } 1305 1306 WTF_ATTRIBUTE_PRINTF(2, 3) 1307 static void normalErrorHandler(void* closure, const char* message, ...) 1308 { 1309 va_list args; 1310 va_start(args, message); 1311 getParser(closure)->error(XMLErrors::ErrorTypeNonFatal, message, args); 1312 va_end(args); 1313 } 1314 1315 // Using a static entity and marking it XML_INTERNAL_PREDEFINED_ENTITY is a hack 1316 // to avoid malloc/free. Using a global variable like this could cause trouble 1317 // if libxml implementation details were to change 1318 static xmlChar sharedXHTMLEntityResult[9] = {0, 0, 0, 0, 0, 0, 0, 0, 0}; 1319 1320 static xmlEntityPtr sharedXHTMLEntity() 1321 { 1322 static xmlEntity entity; 1323 if (!entity.type) { 1324 entity.type = XML_ENTITY_DECL; 1325 entity.orig = sharedXHTMLEntityResult; 1326 entity.content = sharedXHTMLEntityResult; 1327 entity.etype = XML_INTERNAL_PREDEFINED_ENTITY; 1328 } 1329 return &entity; 1330 } 1331 1332 static size_t convertUTF16EntityToUTF8(const UChar* utf16Entity, size_t numberOfCodeUnits, char* target, size_t targetSize) 1333 { 1334 const char* originalTarget = target; 1335 WTF::Unicode::ConversionResult conversionResult = WTF::Unicode::convertUTF16ToUTF8(&utf16Entity, 1336 utf16Entity + numberOfCodeUnits, &target, target + targetSize); 1337 if (conversionResult != WTF::Unicode::conversionOK) 1338 return 0; 1339 1340 // Even though we must pass the length, libxml expects the entity string to be null terminated. 1341 ASSERT(target > originalTarget + 1); 1342 *target = '\0'; 1343 return target - originalTarget; 1344 } 1345 1346 static xmlEntityPtr getXHTMLEntity(const xmlChar* name) 1347 { 1348 UChar utf16DecodedEntity[4]; 1349 size_t numberOfCodeUnits = decodeNamedEntityToUCharArray(reinterpret_cast<const char*>(name), utf16DecodedEntity); 1350 if (!numberOfCodeUnits) 1351 return 0; 1352 1353 ASSERT(numberOfCodeUnits <= 4); 1354 size_t entityLengthInUTF8 = convertUTF16EntityToUTF8(utf16DecodedEntity, numberOfCodeUnits, 1355 reinterpret_cast<char*>(sharedXHTMLEntityResult), WTF_ARRAY_LENGTH(sharedXHTMLEntityResult)); 1356 if (!entityLengthInUTF8) 1357 return 0; 1358 1359 xmlEntityPtr entity = sharedXHTMLEntity(); 1360 entity->length = entityLengthInUTF8; 1361 entity->name = name; 1362 return entity; 1363 } 1364 1365 static xmlEntityPtr getEntityHandler(void* closure, const xmlChar* name) 1366 { 1367 xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure); 1368 xmlEntityPtr ent = xmlGetPredefinedEntity(name); 1369 if (ent) { 1370 ent->etype = XML_INTERNAL_PREDEFINED_ENTITY; 1371 return ent; 1372 } 1373 1374 ent = xmlGetDocEntity(ctxt->myDoc, name); 1375 if (!ent && getParser(closure)->isXHTMLDocument()) { 1376 ent = getXHTMLEntity(name); 1377 if (ent) 1378 ent->etype = XML_INTERNAL_GENERAL_ENTITY; 1379 } 1380 1381 return ent; 1382 } 1383 1384 static void startDocumentHandler(void* closure) 1385 { 1386 xmlParserCtxt* ctxt = static_cast<xmlParserCtxt*>(closure); 1387 XMLDocumentParser* parser = getParser(closure); 1388 switchEncoding(ctxt, parser->isCurrentlyParsing8BitChunk()); 1389 parser->startDocument(toString(ctxt->version), toString(ctxt->encoding), ctxt->standalone); 1390 xmlSAX2StartDocument(closure); 1391 } 1392 1393 static void endDocumentHandler(void* closure) 1394 { 1395 getParser(closure)->endDocument(); 1396 xmlSAX2EndDocument(closure); 1397 } 1398 1399 static void internalSubsetHandler(void* closure, const xmlChar* name, const xmlChar* externalID, const xmlChar* systemID) 1400 { 1401 getParser(closure)->internalSubset(toString(name), toString(externalID), toString(systemID)); 1402 xmlSAX2InternalSubset(closure, name, externalID, systemID); 1403 } 1404 1405 static void externalSubsetHandler(void* closure, const xmlChar*, const xmlChar* externalId, const xmlChar*) 1406 { 1407 String extId = toString(externalId); 1408 if (extId == "-//W3C//DTD XHTML 1.0 Transitional//EN" 1409 || extId == "-//W3C//DTD XHTML 1.1//EN" 1410 || extId == "-//W3C//DTD XHTML 1.0 Strict//EN" 1411 || extId == "-//W3C//DTD XHTML 1.0 Frameset//EN" 1412 || extId == "-//W3C//DTD XHTML Basic 1.0//EN" 1413 || extId == "-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN" 1414 || extId == "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN" 1415 || extId == "-//WAPFORUM//DTD XHTML Mobile 1.0//EN" 1416 || extId == "-//WAPFORUM//DTD XHTML Mobile 1.1//EN" 1417 || extId == "-//WAPFORUM//DTD XHTML Mobile 1.2//EN") { 1418 // Controls if we replace entities or not. 1419 getParser(closure)->setIsXHTMLDocument(true); 1420 } 1421 } 1422 1423 static void ignorableWhitespaceHandler(void*, const xmlChar*, int) 1424 { 1425 // Nothing to do, but we need this to work around a crasher. 1426 // http://bugzilla.gnome.org/show_bug.cgi?id=172255 1427 // http://bugs.webkit.org/show_bug.cgi?id=5792 1428 } 1429 1430 void XMLDocumentParser::initializeParserContext(const CString& chunk) 1431 { 1432 xmlSAXHandler sax; 1433 memset(&sax, 0, sizeof(sax)); 1434 1435 sax.error = normalErrorHandler; 1436 sax.fatalError = fatalErrorHandler; 1437 sax.characters = charactersHandler; 1438 sax.processingInstruction = processingInstructionHandler; 1439 sax.cdataBlock = cdataBlockHandler; 1440 sax.comment = commentHandler; 1441 sax.warning = warningHandler; 1442 sax.startElementNs = startElementNsHandler; 1443 sax.endElementNs = endElementNsHandler; 1444 sax.getEntity = getEntityHandler; 1445 sax.startDocument = startDocumentHandler; 1446 sax.endDocument = endDocumentHandler; 1447 sax.internalSubset = internalSubsetHandler; 1448 sax.externalSubset = externalSubsetHandler; 1449 sax.ignorableWhitespace = ignorableWhitespaceHandler; 1450 sax.entityDecl = xmlSAX2EntityDecl; 1451 sax.initialized = XML_SAX2_MAGIC; 1452 m_sawError = false; 1453 m_sawCSS = false; 1454 m_sawXSLTransform = false; 1455 m_sawFirstElement = false; 1456 1457 XMLDocumentParserScope scope(document()->fetcher()); 1458 if (m_parsingFragment) { 1459 m_context = XMLParserContext::createMemoryParser(&sax, this, chunk); 1460 } else { 1461 ASSERT(!chunk.data()); 1462 m_context = XMLParserContext::createStringParser(&sax, this); 1463 } 1464 } 1465 1466 void XMLDocumentParser::doEnd() 1467 { 1468 if (!isStopped()) { 1469 if (m_context) { 1470 // Tell libxml we're done. 1471 { 1472 XMLDocumentParserScope scope(document()->fetcher()); 1473 finishParsing(context()); 1474 } 1475 1476 m_context = nullptr; 1477 } 1478 } 1479 1480 bool xmlViewerMode = !m_sawError && !m_sawCSS && !m_sawXSLTransform && hasNoStyleInformation(document()); 1481 if (xmlViewerMode) { 1482 const char noStyleMessage[] = "This XML file does not appear to have any style information associated with it. The document tree is shown below."; 1483 document()->setIsViewSource(true); 1484 V8Document::PrivateScript::transformDocumentToTreeViewMethod(document()->frame(), document(), noStyleMessage); 1485 } else if (m_sawXSLTransform) { 1486 xmlDocPtr doc = xmlDocPtrForString(document()->fetcher(), m_originalSourceForTransform.toString(), document()->url().string()); 1487 document()->setTransformSource(adoptPtr(new TransformSource(doc))); 1488 // Make the document think it's done, so it will apply XSL stylesheets. 1489 document()->setParsing(false); 1490 document()->styleResolverChanged(); 1491 1492 // styleResolverChanged() call can detach the parser and null out its 1493 // document. In that case, we just bail out. 1494 if (isDetached()) 1495 return; 1496 1497 document()->setParsing(true); 1498 DocumentParser::stopParsing(); 1499 } 1500 } 1501 1502 xmlDocPtr xmlDocPtrForString(ResourceFetcher* fetcher, const String& source, const String& url) 1503 { 1504 if (source.isEmpty()) 1505 return 0; 1506 // Parse in a single chunk into an xmlDocPtr 1507 // FIXME: Hook up error handlers so that a failure to parse the main 1508 // document results in good error messages. 1509 XMLDocumentParserScope scope(fetcher, errorFunc, 0); 1510 XMLParserInput input(source); 1511 return xmlReadMemory(input.data(), input.size(), url.latin1().data(), input.encoding(), XSLT_PARSE_OPTIONS); 1512 } 1513 1514 OrdinalNumber XMLDocumentParser::lineNumber() const 1515 { 1516 return OrdinalNumber::fromOneBasedInt(context() ? context()->input->line : 1); 1517 } 1518 1519 OrdinalNumber XMLDocumentParser::columnNumber() const 1520 { 1521 return OrdinalNumber::fromOneBasedInt(context() ? context()->input->col : 1); 1522 } 1523 1524 TextPosition XMLDocumentParser::textPosition() const 1525 { 1526 xmlParserCtxtPtr context = this->context(); 1527 if (!context) 1528 return TextPosition::minimumPosition(); 1529 return TextPosition(OrdinalNumber::fromOneBasedInt(context->input->line), OrdinalNumber::fromOneBasedInt(context->input->col)); 1530 } 1531 1532 void XMLDocumentParser::stopParsing() 1533 { 1534 DocumentParser::stopParsing(); 1535 if (context()) 1536 xmlStopParser(context()); 1537 } 1538 1539 void XMLDocumentParser::resumeParsing() 1540 { 1541 ASSERT(!isDetached()); 1542 ASSERT(m_parserPaused); 1543 1544 m_parserPaused = false; 1545 1546 // First, execute any pending callbacks 1547 while (!m_pendingCallbacks.isEmpty()) { 1548 OwnPtr<PendingCallback> callback = m_pendingCallbacks.takeFirst(); 1549 callback->call(this); 1550 1551 // A callback paused the parser 1552 if (m_parserPaused) 1553 return; 1554 } 1555 1556 // Then, write any pending data 1557 SegmentedString rest = m_pendingSrc; 1558 m_pendingSrc.clear(); 1559 // There is normally only one string left, so toString() shouldn't copy. 1560 // In any case, the XML parser runs on the main thread and it's OK if 1561 // the passed string has more than one reference. 1562 append(rest.toString().impl()); 1563 1564 // Finally, if finish() has been called and write() didn't result 1565 // in any further callbacks being queued, call end() 1566 if (m_finishCalled && m_pendingCallbacks.isEmpty()) 1567 end(); 1568 } 1569 1570 bool XMLDocumentParser::appendFragmentSource(const String& chunk) 1571 { 1572 ASSERT(!m_context); 1573 ASSERT(m_parsingFragment); 1574 1575 CString chunkAsUtf8 = chunk.utf8(); 1576 1577 // libxml2 takes an int for a length, and therefore can't handle XML chunks 1578 // larger than 2 GiB. 1579 if (chunkAsUtf8.length() > INT_MAX) 1580 return false; 1581 1582 TRACE_EVENT0("blink", "XMLDocumentParser::appendFragmentSource"); 1583 initializeParserContext(chunkAsUtf8); 1584 xmlParseContent(context()); 1585 endDocument(); // Close any open text nodes. 1586 1587 // FIXME: If this code is actually needed, it should probably move to 1588 // finish() 1589 // XMLDocumentParserQt has a similar check (m_stream.error() == 1590 // QXmlStreamReader::PrematureEndOfDocumentError) in doEnd(). Check if all 1591 // the chunk has been processed. 1592 long bytesProcessed = xmlByteConsumed(context()); 1593 if (bytesProcessed == -1 || static_cast<unsigned long>(bytesProcessed) != chunkAsUtf8.length()) { 1594 // FIXME: I don't believe we can hit this case without also having seen 1595 // an error or a null byte. If we hit this ASSERT, we've found a test 1596 // case which demonstrates the need for this code. 1597 ASSERT(m_sawError || (bytesProcessed >= 0 && !chunkAsUtf8.data()[bytesProcessed])); 1598 return false; 1599 } 1600 1601 // No error if the chunk is well formed or it is not but we have no error. 1602 return context()->wellFormed || !xmlCtxtGetLastError(context()); 1603 } 1604 1605 // -------------------------------- 1606 1607 struct AttributeParseState { 1608 HashMap<String, String> attributes; 1609 bool gotAttributes; 1610 }; 1611 1612 static void attributesStartElementNsHandler(void* closure, const xmlChar* xmlLocalName, const xmlChar* /*xmlPrefix*/, 1613 const xmlChar* /*xmlURI*/, int /*nbNamespaces*/, const xmlChar** /*namespaces*/, 1614 int nbAttributes, int /*nbDefaulted*/, const xmlChar** libxmlAttributes) 1615 { 1616 if (strcmp(reinterpret_cast<const char*>(xmlLocalName), "attrs") != 0) 1617 return; 1618 1619 xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure); 1620 AttributeParseState* state = static_cast<AttributeParseState*>(ctxt->_private); 1621 1622 state->gotAttributes = true; 1623 1624 xmlSAX2Attributes* attributes = reinterpret_cast<xmlSAX2Attributes*>(libxmlAttributes); 1625 for (int i = 0; i < nbAttributes; ++i) { 1626 String attrLocalName = toString(attributes[i].localname); 1627 int valueLength = (int) (attributes[i].end - attributes[i].value); 1628 String attrValue = toString(attributes[i].value, valueLength); 1629 String attrPrefix = toString(attributes[i].prefix); 1630 String attrQName = attrPrefix.isEmpty() ? attrLocalName : attrPrefix + ":" + attrLocalName; 1631 1632 state->attributes.set(attrQName, attrValue); 1633 } 1634 } 1635 1636 HashMap<String, String> parseAttributes(const String& string, bool& attrsOK) 1637 { 1638 AttributeParseState state; 1639 state.gotAttributes = false; 1640 1641 xmlSAXHandler sax; 1642 memset(&sax, 0, sizeof(sax)); 1643 sax.startElementNs = attributesStartElementNsHandler; 1644 sax.initialized = XML_SAX2_MAGIC; 1645 RefPtr<XMLParserContext> parser = XMLParserContext::createStringParser(&sax, &state); 1646 String parseString = "<?xml version=\"1.0\"?><attrs " + string + " />"; 1647 parseChunk(parser->context(), parseString); 1648 finishParsing(parser->context()); 1649 attrsOK = state.gotAttributes; 1650 return state.attributes; 1651 } 1652 1653 } // namespace blink 1654