1 /* 2 * Copyright (C) 2000 Peter Kelly (pmk (at) post.com) 3 * Copyright (C) 2005, 2006, 2008 Apple Inc. All rights reserved. 4 * Copyright (C) 2006 Alexey Proskuryakov (ap (at) webkit.org) 5 * Copyright (C) 2007 Samuel Weinig (sam (at) webkit.org) 6 * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies) 7 * Copyright (C) 2008 Holger Hans Peter Freyther 8 * Copyright (C) 2008 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/) 9 * 10 * This library is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU Library General Public 12 * License as published by the Free Software Foundation; either 13 * version 2 of the License, or (at your option) any later version. 14 * 15 * This library is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 * Library General Public License for more details. 19 * 20 * You should have received a copy of the GNU Library General Public License 21 * along with this library; see the file COPYING.LIB. If not, write to 22 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 23 * Boston, MA 02110-1301, USA. 24 */ 25 26 #include "config.h" 27 #include "core/xml/parser/XMLDocumentParser.h" 28 29 #include <libxml/catalog.h> 30 #include <libxml/parser.h> 31 #include <libxml/parserInternals.h> 32 #include <libxslt/xslt.h> 33 #include "FetchInitiatorTypeNames.h" 34 #include "HTMLNames.h" 35 #include "RuntimeEnabledFeatures.h" 36 #include "XMLNSNames.h" 37 #include "bindings/v8/ExceptionState.h" 38 #include "bindings/v8/ExceptionStatePlaceholder.h" 39 #include "bindings/v8/ScriptController.h" 40 #include "bindings/v8/ScriptSourceCode.h" 41 #include "core/dom/CDATASection.h" 42 #include "core/dom/Comment.h" 43 #include "core/dom/Document.h" 44 #include "core/dom/DocumentFragment.h" 45 #include "core/dom/DocumentType.h" 46 #include "core/dom/ProcessingInstruction.h" 47 #include "core/dom/ScriptLoader.h" 48 #include "core/dom/TransformSource.h" 49 #include "core/fetch/ResourceFetcher.h" 50 #include "core/fetch/ScriptResource.h" 51 #include "core/fetch/TextResourceDecoder.h" 52 #include "core/frame/Frame.h" 53 #include "core/html/HTMLHtmlElement.h" 54 #include "core/html/HTMLTemplateElement.h" 55 #include "core/html/parser/HTMLEntityParser.h" 56 #include "core/loader/FrameLoader.h" 57 #include "core/loader/ImageLoader.h" 58 #include "core/frame/UseCounter.h" 59 #include "core/xml/XMLTreeViewer.h" 60 #include "core/xml/parser/XMLDocumentParserScope.h" 61 #include "core/xml/parser/XMLParserInput.h" 62 #include "platform/SharedBuffer.h" 63 #include "platform/network/ResourceError.h" 64 #include "platform/network/ResourceRequest.h" 65 #include "platform/network/ResourceResponse.h" 66 #include "platform/weborigin/SecurityOrigin.h" 67 #include "wtf/StringExtras.h" 68 #include "wtf/TemporaryChange.h" 69 #include "wtf/Threading.h" 70 #include "wtf/Vector.h" 71 #include "wtf/unicode/UTF8.h" 72 73 using namespace std; 74 75 namespace WebCore { 76 77 using namespace HTMLNames; 78 79 // FIXME: HTMLConstructionSite has a limit of 512, should these match? 80 static const unsigned maxXMLTreeDepth = 5000; 81 82 static inline String toString(const xmlChar* string, size_t length) 83 { 84 return String::fromUTF8(reinterpret_cast<const char*>(string), length); 85 } 86 87 static inline String toString(const xmlChar* string) 88 { 89 return String::fromUTF8(reinterpret_cast<const char*>(string)); 90 } 91 92 static inline AtomicString toAtomicString(const xmlChar* string, size_t length) 93 { 94 return AtomicString::fromUTF8(reinterpret_cast<const char*>(string), length); 95 } 96 97 static inline AtomicString toAtomicString(const xmlChar* string) 98 { 99 return AtomicString::fromUTF8(reinterpret_cast<const char*>(string)); 100 } 101 102 static inline bool hasNoStyleInformation(Document* document) 103 { 104 if (document->sawElementsInKnownNamespaces() || document->transformSourceDocument()) 105 return false; 106 107 if (!document->frame() || !document->frame()->page()) 108 return false; 109 110 if (document->frame()->tree().parent()) 111 return false; // This document is not in a top frame 112 113 return true; 114 } 115 116 class PendingStartElementNSCallback FINAL : public XMLDocumentParser::PendingCallback { 117 public: 118 PendingStartElementNSCallback(const AtomicString& localName, const AtomicString& prefix, const AtomicString& uri, 119 int namespaceCount, const xmlChar** namespaces, int attributeCount, int defaultedCount, const xmlChar** attributes) 120 : m_localName(localName) 121 , m_prefix(prefix) 122 , m_uri(uri) 123 , m_namespaceCount(namespaceCount) 124 , m_attributeCount(attributeCount) 125 , m_defaultedCount(defaultedCount) 126 { 127 m_namespaces = static_cast<xmlChar**>(xmlMalloc(sizeof(xmlChar*) * namespaceCount * 2)); 128 for (int i = 0; i < namespaceCount * 2 ; i++) 129 m_namespaces[i] = xmlStrdup(namespaces[i]); 130 m_attributes = static_cast<xmlChar**>(xmlMalloc(sizeof(xmlChar*) * attributeCount * 5)); 131 for (int i = 0; i < attributeCount; i++) { 132 // Each attribute has 5 elements in the array: 133 // name, prefix, uri, value and an end pointer. 134 for (int j = 0; j < 3; j++) 135 m_attributes[i * 5 + j] = xmlStrdup(attributes[i * 5 + j]); 136 int length = attributes[i * 5 + 4] - attributes[i * 5 + 3]; 137 m_attributes[i * 5 + 3] = xmlStrndup(attributes[i * 5 + 3], length); 138 m_attributes[i * 5 + 4] = m_attributes[i * 5 + 3] + length; 139 } 140 } 141 142 virtual ~PendingStartElementNSCallback() 143 { 144 for (int i = 0; i < m_namespaceCount * 2; i++) 145 xmlFree(m_namespaces[i]); 146 xmlFree(m_namespaces); 147 for (int i = 0; i < m_attributeCount; i++) 148 for (int j = 0; j < 4; j++) 149 xmlFree(m_attributes[i * 5 + j]); 150 xmlFree(m_attributes); 151 } 152 153 virtual void call(XMLDocumentParser* parser) OVERRIDE 154 { 155 parser->startElementNs(m_localName, m_prefix, m_uri, 156 m_namespaceCount, const_cast<const xmlChar**>(m_namespaces), 157 m_attributeCount, m_defaultedCount, const_cast<const xmlChar**>(m_attributes)); 158 } 159 160 private: 161 AtomicString m_localName; 162 AtomicString m_prefix; 163 AtomicString m_uri; 164 int m_namespaceCount; 165 xmlChar** m_namespaces; 166 int m_attributeCount; 167 int m_defaultedCount; 168 xmlChar** m_attributes; 169 }; 170 171 class PendingEndElementNSCallback FINAL : public XMLDocumentParser::PendingCallback { 172 public: 173 virtual void call(XMLDocumentParser* parser) OVERRIDE 174 { 175 parser->endElementNs(); 176 } 177 }; 178 179 class PendingCharactersCallback FINAL : public XMLDocumentParser::PendingCallback { 180 public: 181 PendingCharactersCallback(const xmlChar* chars, int length) 182 : m_chars(xmlStrndup(chars, length)) 183 , m_length(length) 184 { 185 } 186 187 virtual ~PendingCharactersCallback() 188 { 189 xmlFree(m_chars); 190 } 191 192 virtual void call(XMLDocumentParser* parser) OVERRIDE 193 { 194 parser->characters(m_chars, m_length); 195 } 196 197 private: 198 xmlChar* m_chars; 199 int m_length; 200 }; 201 202 class PendingProcessingInstructionCallback FINAL : public XMLDocumentParser::PendingCallback { 203 public: 204 PendingProcessingInstructionCallback(const String& target, const String& data) 205 : m_target(target) 206 , m_data(data) 207 { 208 } 209 210 virtual void call(XMLDocumentParser* parser) OVERRIDE 211 { 212 parser->processingInstruction(m_target, m_data); 213 } 214 215 private: 216 String m_target; 217 String m_data; 218 }; 219 220 class PendingCDATABlockCallback FINAL : public XMLDocumentParser::PendingCallback { 221 public: 222 explicit PendingCDATABlockCallback(const String& text) : m_text(text) { } 223 224 virtual void call(XMLDocumentParser* parser) OVERRIDE 225 { 226 parser->cdataBlock(m_text); 227 } 228 229 private: 230 String m_text; 231 }; 232 233 class PendingCommentCallback FINAL : public XMLDocumentParser::PendingCallback { 234 public: 235 explicit PendingCommentCallback(const String& text) : m_text(text) { } 236 237 virtual void call(XMLDocumentParser* parser) OVERRIDE 238 { 239 parser->comment(m_text); 240 } 241 242 private: 243 String m_text; 244 }; 245 246 class PendingInternalSubsetCallback FINAL : public XMLDocumentParser::PendingCallback { 247 public: 248 PendingInternalSubsetCallback(const String& name, const String& externalID, const String& systemID) 249 : m_name(name) 250 , m_externalID(externalID) 251 , m_systemID(systemID) 252 { 253 } 254 255 virtual void call(XMLDocumentParser* parser) OVERRIDE 256 { 257 parser->internalSubset(m_name, m_externalID, m_systemID); 258 } 259 260 private: 261 String m_name; 262 String m_externalID; 263 String m_systemID; 264 }; 265 266 class PendingErrorCallback FINAL : public XMLDocumentParser::PendingCallback { 267 public: 268 PendingErrorCallback(XMLErrors::ErrorType type, const xmlChar* message, OrdinalNumber lineNumber, OrdinalNumber columnNumber) 269 : m_type(type) 270 , m_message(xmlStrdup(message)) 271 , m_lineNumber(lineNumber) 272 , m_columnNumber(columnNumber) 273 { 274 } 275 276 virtual ~PendingErrorCallback() 277 { 278 xmlFree(m_message); 279 } 280 281 virtual void call(XMLDocumentParser* parser) OVERRIDE 282 { 283 parser->handleError(m_type, reinterpret_cast<char*>(m_message), TextPosition(m_lineNumber, m_columnNumber)); 284 } 285 286 private: 287 XMLErrors::ErrorType m_type; 288 xmlChar* m_message; 289 OrdinalNumber m_lineNumber; 290 OrdinalNumber m_columnNumber; 291 }; 292 293 void XMLDocumentParser::pushCurrentNode(ContainerNode* n) 294 { 295 ASSERT(n); 296 ASSERT(m_currentNode); 297 if (n != document()) 298 n->ref(); 299 m_currentNodeStack.append(m_currentNode); 300 m_currentNode = n; 301 if (m_currentNodeStack.size() > maxXMLTreeDepth) 302 handleError(XMLErrors::fatal, "Excessive node nesting.", textPosition()); 303 } 304 305 void XMLDocumentParser::popCurrentNode() 306 { 307 if (!m_currentNode) 308 return; 309 ASSERT(m_currentNodeStack.size()); 310 311 if (m_currentNode != document()) 312 m_currentNode->deref(); 313 314 m_currentNode = m_currentNodeStack.last(); 315 m_currentNodeStack.removeLast(); 316 } 317 318 void XMLDocumentParser::clearCurrentNodeStack() 319 { 320 if (m_currentNode && m_currentNode != document()) 321 m_currentNode->deref(); 322 m_currentNode = 0; 323 m_leafTextNode = 0; 324 325 if (m_currentNodeStack.size()) { // Aborted parsing. 326 for (size_t i = m_currentNodeStack.size() - 1; i != 0; --i) 327 m_currentNodeStack[i]->deref(); 328 if (m_currentNodeStack[0] && m_currentNodeStack[0] != document()) 329 m_currentNodeStack[0]->deref(); 330 m_currentNodeStack.clear(); 331 } 332 } 333 334 void XMLDocumentParser::insert(const SegmentedString&) 335 { 336 ASSERT_NOT_REACHED(); 337 } 338 339 void XMLDocumentParser::append(PassRefPtr<StringImpl> inputSource) 340 { 341 SegmentedString source(inputSource); 342 if (m_sawXSLTransform || !m_sawFirstElement) 343 m_originalSourceForTransform.append(source); 344 345 if (isStopped() || m_sawXSLTransform) 346 return; 347 348 if (m_parserPaused) { 349 m_pendingSrc.append(source); 350 return; 351 } 352 353 // JavaScript can detach the parser. Make sure this is not released 354 // before the end of this method. 355 RefPtr<XMLDocumentParser> protect(this); 356 357 doWrite(source.toString()); 358 359 if (isStopped()) 360 return; 361 362 if (document()->frame() && document()->frame()->script().canExecuteScripts(NotAboutToExecuteScript)) 363 ImageLoader::dispatchPendingBeforeLoadEvents(); 364 } 365 366 void XMLDocumentParser::handleError(XMLErrors::ErrorType type, const char* formattedMessage, TextPosition position) 367 { 368 m_xmlErrors.handleError(type, formattedMessage, position); 369 if (type != XMLErrors::warning) 370 m_sawError = true; 371 if (type == XMLErrors::fatal) 372 stopParsing(); 373 } 374 375 void XMLDocumentParser::enterText() 376 { 377 ASSERT(m_bufferedText.size() == 0); 378 ASSERT(!m_leafTextNode); 379 m_leafTextNode = Text::create(m_currentNode->document(), ""); 380 m_currentNode->parserAppendChild(m_leafTextNode.get()); 381 } 382 383 void XMLDocumentParser::exitText() 384 { 385 if (isStopped()) 386 return; 387 388 if (!m_leafTextNode) 389 return; 390 391 m_leafTextNode->appendData(toString(m_bufferedText.data(), m_bufferedText.size())); 392 m_bufferedText.clear(); 393 m_leafTextNode = 0; 394 } 395 396 void XMLDocumentParser::detach() 397 { 398 clearCurrentNodeStack(); 399 ScriptableDocumentParser::detach(); 400 } 401 402 void XMLDocumentParser::end() 403 { 404 // XMLDocumentParserLibxml2 will do bad things to the document if doEnd() is called. 405 // I don't believe XMLDocumentParserQt needs doEnd called in the fragment case. 406 ASSERT(!m_parsingFragment); 407 408 doEnd(); 409 410 // doEnd() call above can detach the parser and null out its document. 411 // In that case, we just bail out. 412 if (isDetached()) 413 return; 414 415 // doEnd() could process a script tag, thus pausing parsing. 416 if (m_parserPaused) 417 return; 418 419 if (m_sawError) 420 insertErrorMessageBlock(); 421 else { 422 exitText(); 423 document()->styleResolverChanged(RecalcStyleImmediately); 424 } 425 426 if (isParsing()) 427 prepareToStopParsing(); 428 document()->setReadyState(Document::Interactive); 429 clearCurrentNodeStack(); 430 document()->finishedParsing(); 431 } 432 433 void XMLDocumentParser::finish() 434 { 435 // FIXME: We should ASSERT(!m_parserStopped) here, since it does not 436 // makes sense to call any methods on DocumentParser once it's been stopped. 437 // However, FrameLoader::stop calls DocumentParser::finish unconditionally. 438 439 if (m_parserPaused) 440 m_finishCalled = true; 441 else 442 end(); 443 } 444 445 void XMLDocumentParser::insertErrorMessageBlock() 446 { 447 m_xmlErrors.insertErrorMessageBlock(); 448 } 449 450 void XMLDocumentParser::notifyFinished(Resource* unusedResource) 451 { 452 ASSERT_UNUSED(unusedResource, unusedResource == m_pendingScript); 453 ASSERT(m_pendingScript->accessCount() > 0); 454 455 ScriptSourceCode sourceCode(m_pendingScript.get()); 456 bool errorOccurred = m_pendingScript->errorOccurred(); 457 bool wasCanceled = m_pendingScript->wasCanceled(); 458 459 m_pendingScript->removeClient(this); 460 m_pendingScript = 0; 461 462 RefPtr<Element> e = m_scriptElement; 463 m_scriptElement = 0; 464 465 ScriptLoader* scriptLoader = toScriptLoaderIfPossible(e.get()); 466 ASSERT(scriptLoader); 467 468 // JavaScript can detach this parser, make sure it's kept alive even if detached. 469 RefPtr<XMLDocumentParser> protect(this); 470 471 if (errorOccurred) 472 scriptLoader->dispatchErrorEvent(); 473 else if (!wasCanceled) { 474 if (scriptLoader->executePotentiallyCrossOriginScript(sourceCode)) 475 scriptLoader->dispatchLoadEvent(); 476 } 477 478 m_scriptElement = 0; 479 480 if (!isDetached() && !m_requestingScript) 481 resumeParsing(); 482 } 483 484 bool XMLDocumentParser::isWaitingForScripts() const 485 { 486 return m_pendingScript; 487 } 488 489 void XMLDocumentParser::pauseParsing() 490 { 491 if (m_parsingFragment) 492 return; 493 494 m_parserPaused = true; 495 } 496 497 bool XMLDocumentParser::parseDocumentFragment(const String& chunk, DocumentFragment* fragment, Element* contextElement, ParserContentPolicy parserContentPolicy) 498 { 499 if (!chunk.length()) 500 return true; 501 502 // FIXME: We need to implement the HTML5 XML Fragment parsing algorithm: 503 // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-xhtml-syntax.html#xml-fragment-parsing-algorithm 504 // For now we have a hack for script/style innerHTML support: 505 if (contextElement && (contextElement->hasLocalName(HTMLNames::scriptTag) || contextElement->hasLocalName(HTMLNames::styleTag))) { 506 fragment->parserAppendChild(fragment->document().createTextNode(chunk)); 507 return true; 508 } 509 510 RefPtr<XMLDocumentParser> parser = XMLDocumentParser::create(fragment, contextElement, parserContentPolicy); 511 bool wellFormed = parser->appendFragmentSource(chunk); 512 // Do not call finish(). Current finish() and doEnd() implementations touch the main Document/loader 513 // and can cause crashes in the fragment case. 514 parser->detach(); // Allows ~DocumentParser to assert it was detached before destruction. 515 return wellFormed; // appendFragmentSource()'s wellFormed is more permissive than wellFormed(). 516 } 517 518 static int globalDescriptor = 0; 519 static ThreadIdentifier libxmlLoaderThread = 0; 520 521 static int matchFunc(const char*) 522 { 523 // Only match loads initiated due to uses of libxml2 from within XMLDocumentParser to avoid 524 // interfering with client applications that also use libxml2. http://bugs.webkit.org/show_bug.cgi?id=17353 525 return XMLDocumentParserScope::currentFetcher && currentThread() == libxmlLoaderThread; 526 } 527 528 class OffsetBuffer { 529 WTF_MAKE_FAST_ALLOCATED; 530 public: 531 OffsetBuffer(const Vector<char>& b) : m_buffer(b), m_currentOffset(0) { } 532 533 int readOutBytes(char* outputBuffer, unsigned askedToRead) 534 { 535 unsigned bytesLeft = m_buffer.size() - m_currentOffset; 536 unsigned lenToCopy = min(askedToRead, bytesLeft); 537 if (lenToCopy) { 538 memcpy(outputBuffer, m_buffer.data() + m_currentOffset, lenToCopy); 539 m_currentOffset += lenToCopy; 540 } 541 return lenToCopy; 542 } 543 544 private: 545 Vector<char> m_buffer; 546 unsigned m_currentOffset; 547 }; 548 549 static inline void setAttributes(Element* element, Vector<Attribute>& attributeVector, ParserContentPolicy parserContentPolicy) 550 { 551 if (!scriptingContentIsAllowed(parserContentPolicy)) 552 element->stripScriptingAttributes(attributeVector); 553 element->parserSetAttributes(attributeVector); 554 } 555 556 static void switchEncoding(xmlParserCtxtPtr ctxt, bool is8Bit) 557 { 558 // Hack around libxml2's lack of encoding overide support by manually 559 // resetting the encoding to UTF-16 before every chunk. Otherwise libxml 560 // will detect <?xml version="1.0" encoding="<encoding name>"?> blocks 561 // and switch encodings, causing the parse to fail. 562 if (is8Bit) { 563 xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_8859_1); 564 return; 565 } 566 567 const UChar BOM = 0xFEFF; 568 const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char*>(&BOM); 569 xmlSwitchEncoding(ctxt, BOMHighByte == 0xFF ? XML_CHAR_ENCODING_UTF16LE : XML_CHAR_ENCODING_UTF16BE); 570 } 571 572 static void parseChunk(xmlParserCtxtPtr ctxt, const String& chunk) 573 { 574 bool is8Bit = chunk.is8Bit(); 575 switchEncoding(ctxt, is8Bit); 576 if (is8Bit) 577 xmlParseChunk(ctxt, reinterpret_cast<const char*>(chunk.characters8()), sizeof(LChar) * chunk.length(), 0); 578 else 579 xmlParseChunk(ctxt, reinterpret_cast<const char*>(chunk.characters16()), sizeof(UChar) * chunk.length(), 0); 580 } 581 582 static void finishParsing(xmlParserCtxtPtr ctxt) 583 { 584 xmlParseChunk(ctxt, 0, 0, 1); 585 } 586 587 #define xmlParseChunk #error "Use parseChunk instead to select the correct encoding." 588 589 static bool isLibxmlDefaultCatalogFile(const String& urlString) 590 { 591 // On non-Windows platforms libxml asks for this URL, the 592 // "XML_XML_DEFAULT_CATALOG", on initialization. 593 if (urlString == "file:///etc/xml/catalog") 594 return true; 595 596 // On Windows, libxml computes a URL relative to where its DLL resides. 597 if (urlString.startsWith("file:///", false) && urlString.endsWith("/etc/catalog", false)) 598 return true; 599 return false; 600 } 601 602 static bool shouldAllowExternalLoad(const KURL& url) 603 { 604 String urlString = url.string(); 605 606 // This isn't really necessary now that initializeLibXMLIfNecessary 607 // disables catalog support in libxml, but keeping it for defense in depth. 608 if (isLibxmlDefaultCatalogFile(url)) 609 return false; 610 611 // The most common DTD. There isn't much point in hammering www.w3c.org 612 // by requesting this URL for every XHTML document. 613 if (urlString.startsWith("http://www.w3.org/TR/xhtml", false)) 614 return false; 615 616 // Similarly, there isn't much point in requesting the SVG DTD. 617 if (urlString.startsWith("http://www.w3.org/Graphics/SVG", false)) 618 return false; 619 620 // The libxml doesn't give us a lot of context for deciding whether to 621 // allow this request. In the worst case, this load could be for an 622 // external entity and the resulting document could simply read the 623 // retrieved content. If we had more context, we could potentially allow 624 // the parser to load a DTD. As things stand, we take the conservative 625 // route and allow same-origin requests only. 626 if (!XMLDocumentParserScope::currentFetcher->document()->securityOrigin()->canRequest(url)) { 627 XMLDocumentParserScope::currentFetcher->printAccessDeniedMessage(url); 628 return false; 629 } 630 631 return true; 632 } 633 634 static void* openFunc(const char* uri) 635 { 636 ASSERT(XMLDocumentParserScope::currentFetcher); 637 ASSERT(currentThread() == libxmlLoaderThread); 638 639 KURL url(KURL(), uri); 640 641 if (!shouldAllowExternalLoad(url)) 642 return &globalDescriptor; 643 644 KURL finalURL; 645 Vector<char> data; 646 647 { 648 ResourceFetcher* fetcher = XMLDocumentParserScope::currentFetcher; 649 XMLDocumentParserScope scope(0); 650 // FIXME: We should restore the original global error handler as well. 651 652 if (fetcher->frame()) { 653 FetchRequest request(ResourceRequest(url), FetchInitiatorTypeNames::xml, ResourceFetcher::defaultResourceOptions()); 654 ResourcePtr<Resource> resource = fetcher->fetchSynchronously(request); 655 if (resource && !resource->errorOccurred()) { 656 resource->resourceBuffer()->moveTo(data); 657 finalURL = resource->response().url(); 658 } 659 } 660 } 661 662 // We have to check the URL again after the load to catch redirects. 663 // See <https://bugs.webkit.org/show_bug.cgi?id=21963>. 664 if (!shouldAllowExternalLoad(finalURL)) 665 return &globalDescriptor; 666 667 return new OffsetBuffer(data); 668 } 669 670 static int readFunc(void* context, char* buffer, int len) 671 { 672 // Do 0-byte reads in case of a null descriptor 673 if (context == &globalDescriptor) 674 return 0; 675 676 OffsetBuffer* data = static_cast<OffsetBuffer*>(context); 677 return data->readOutBytes(buffer, len); 678 } 679 680 static int writeFunc(void*, const char*, int) 681 { 682 // Always just do 0-byte writes 683 return 0; 684 } 685 686 static int closeFunc(void* context) 687 { 688 if (context != &globalDescriptor) { 689 OffsetBuffer* data = static_cast<OffsetBuffer*>(context); 690 delete data; 691 } 692 return 0; 693 } 694 695 static void errorFunc(void*, const char*, ...) 696 { 697 // FIXME: It would be nice to display error messages somewhere. 698 } 699 700 static void initializeLibXMLIfNecessary() 701 { 702 static bool didInit = false; 703 if (didInit) 704 return; 705 706 // We don't want libxml to try and load catalogs. 707 // FIXME: It's not nice to set global settings in libxml, embedders of Blink 708 // could be trying to use libxml themselves. 709 xmlCatalogSetDefaults(XML_CATA_ALLOW_NONE); 710 xmlInitParser(); 711 xmlRegisterInputCallbacks(matchFunc, openFunc, readFunc, closeFunc); 712 xmlRegisterOutputCallbacks(matchFunc, openFunc, writeFunc, closeFunc); 713 libxmlLoaderThread = currentThread(); 714 didInit = true; 715 } 716 717 718 PassRefPtr<XMLParserContext> XMLParserContext::createStringParser(xmlSAXHandlerPtr handlers, void* userData) 719 { 720 initializeLibXMLIfNecessary(); 721 xmlParserCtxtPtr parser = xmlCreatePushParserCtxt(handlers, 0, 0, 0, 0); 722 parser->_private = userData; 723 parser->replaceEntities = true; 724 return adoptRef(new XMLParserContext(parser)); 725 } 726 727 // Chunk should be encoded in UTF-8 728 PassRefPtr<XMLParserContext> XMLParserContext::createMemoryParser(xmlSAXHandlerPtr handlers, void* userData, const CString& chunk) 729 { 730 initializeLibXMLIfNecessary(); 731 732 // appendFragmentSource() checks that the length doesn't overflow an int. 733 xmlParserCtxtPtr parser = xmlCreateMemoryParserCtxt(chunk.data(), chunk.length()); 734 735 if (!parser) 736 return 0; 737 738 // Copy the sax handler 739 memcpy(parser->sax, handlers, sizeof(xmlSAXHandler)); 740 741 // Set parser options. 742 // XML_PARSE_NODICT: default dictionary option. 743 // XML_PARSE_NOENT: force entities substitutions. 744 xmlCtxtUseOptions(parser, XML_PARSE_NODICT | XML_PARSE_NOENT); 745 746 // Internal initialization 747 parser->sax2 = 1; 748 parser->instate = XML_PARSER_CONTENT; // We are parsing a CONTENT 749 parser->depth = 0; 750 parser->str_xml = xmlDictLookup(parser->dict, BAD_CAST "xml", 3); 751 parser->str_xmlns = xmlDictLookup(parser->dict, BAD_CAST "xmlns", 5); 752 parser->str_xml_ns = xmlDictLookup(parser->dict, XML_XML_NAMESPACE, 36); 753 parser->_private = userData; 754 755 return adoptRef(new XMLParserContext(parser)); 756 } 757 758 // -------------------------------- 759 760 bool XMLDocumentParser::supportsXMLVersion(const String& version) 761 { 762 return version == "1.0"; 763 } 764 765 XMLDocumentParser::XMLDocumentParser(Document* document, FrameView* frameView) 766 : ScriptableDocumentParser(document) 767 , m_view(frameView) 768 , m_context(0) 769 , m_currentNode(document) 770 , m_isCurrentlyParsing8BitChunk(false) 771 , m_sawError(false) 772 , m_sawCSS(false) 773 , m_sawXSLTransform(false) 774 , m_sawFirstElement(false) 775 , m_isXHTMLDocument(false) 776 , m_parserPaused(false) 777 , m_requestingScript(false) 778 , m_finishCalled(false) 779 , m_xmlErrors(document) 780 , m_pendingScript(0) 781 , m_scriptStartPosition(TextPosition::belowRangePosition()) 782 , m_parsingFragment(false) 783 { 784 // This is XML being used as a document resource. 785 UseCounter::count(*document, UseCounter::XMLDocument); 786 } 787 788 XMLDocumentParser::XMLDocumentParser(DocumentFragment* fragment, Element* parentElement, ParserContentPolicy parserContentPolicy) 789 : ScriptableDocumentParser(&fragment->document(), parserContentPolicy) 790 , m_view(0) 791 , m_context(0) 792 , m_currentNode(fragment) 793 , m_isCurrentlyParsing8BitChunk(false) 794 , m_sawError(false) 795 , m_sawCSS(false) 796 , m_sawXSLTransform(false) 797 , m_sawFirstElement(false) 798 , m_isXHTMLDocument(false) 799 , m_parserPaused(false) 800 , m_requestingScript(false) 801 , m_finishCalled(false) 802 , m_xmlErrors(&fragment->document()) 803 , m_pendingScript(0) 804 , m_scriptStartPosition(TextPosition::belowRangePosition()) 805 , m_parsingFragment(true) 806 { 807 fragment->ref(); 808 809 // Add namespaces based on the parent node 810 Vector<Element*> elemStack; 811 while (parentElement) { 812 elemStack.append(parentElement); 813 814 ContainerNode* n = parentElement->parentNode(); 815 if (!n || !n->isElementNode()) 816 break; 817 parentElement = toElement(n); 818 } 819 820 if (elemStack.isEmpty()) 821 return; 822 823 for (; !elemStack.isEmpty(); elemStack.removeLast()) { 824 Element* element = elemStack.last(); 825 if (element->hasAttributes()) { 826 for (unsigned i = 0; i < element->attributeCount(); i++) { 827 const Attribute* attribute = element->attributeItem(i); 828 if (attribute->localName() == xmlnsAtom) 829 m_defaultNamespaceURI = attribute->value(); 830 else if (attribute->prefix() == xmlnsAtom) 831 m_prefixToNamespaceMap.set(attribute->localName(), attribute->value()); 832 } 833 } 834 } 835 836 // If the parent element is not in document tree, there may be no xmlns attribute; just default to the parent's namespace. 837 if (m_defaultNamespaceURI.isNull() && !parentElement->inDocument()) 838 m_defaultNamespaceURI = parentElement->namespaceURI(); 839 } 840 841 XMLParserContext::~XMLParserContext() 842 { 843 if (m_context->myDoc) 844 xmlFreeDoc(m_context->myDoc); 845 xmlFreeParserCtxt(m_context); 846 } 847 848 XMLDocumentParser::~XMLDocumentParser() 849 { 850 // The XMLDocumentParser will always be detached before being destroyed. 851 ASSERT(m_currentNodeStack.isEmpty()); 852 ASSERT(!m_currentNode); 853 854 // FIXME: m_pendingScript handling should be moved into XMLDocumentParser.cpp! 855 if (m_pendingScript) 856 m_pendingScript->removeClient(this); 857 } 858 859 void XMLDocumentParser::doWrite(const String& parseString) 860 { 861 ASSERT(!isDetached()); 862 if (!m_context) 863 initializeParserContext(); 864 865 // Protect the libxml context from deletion during a callback 866 RefPtr<XMLParserContext> context = m_context; 867 868 // libXML throws an error if you try to switch the encoding for an empty string. 869 if (parseString.length()) { 870 // JavaScript may cause the parser to detach during parseChunk 871 // keep this alive until this function is done. 872 RefPtr<XMLDocumentParser> protect(this); 873 874 XMLDocumentParserScope scope(document()->fetcher()); 875 TemporaryChange<bool> encodingScope(m_isCurrentlyParsing8BitChunk, parseString.is8Bit()); 876 parseChunk(context->context(), parseString); 877 878 // JavaScript (which may be run under the parseChunk callstack) may 879 // cause the parser to be stopped or detached. 880 if (isStopped()) 881 return; 882 } 883 884 // FIXME: Why is this here? And why is it after we process the passed source? 885 if (document()->sawDecodingError()) { 886 // If the decoder saw an error, report it as fatal (stops parsing) 887 TextPosition position(OrdinalNumber::fromOneBasedInt(context->context()->input->line), OrdinalNumber::fromOneBasedInt(context->context()->input->col)); 888 handleError(XMLErrors::fatal, "Encoding error", position); 889 } 890 } 891 892 struct _xmlSAX2Namespace { 893 const xmlChar* prefix; 894 const xmlChar* uri; 895 }; 896 typedef struct _xmlSAX2Namespace xmlSAX2Namespace; 897 898 static inline void handleNamespaceAttributes(Vector<Attribute>& prefixedAttributes, const xmlChar** libxmlNamespaces, int nbNamespaces, ExceptionState& exceptionState) 899 { 900 xmlSAX2Namespace* namespaces = reinterpret_cast<xmlSAX2Namespace*>(libxmlNamespaces); 901 for (int i = 0; i < nbNamespaces; i++) { 902 AtomicString namespaceQName = xmlnsAtom; 903 AtomicString namespaceURI = toAtomicString(namespaces[i].uri); 904 if (namespaces[i].prefix) 905 namespaceQName = "xmlns:" + toString(namespaces[i].prefix); 906 907 QualifiedName parsedName = anyName; 908 if (!Element::parseAttributeName(parsedName, XMLNSNames::xmlnsNamespaceURI, namespaceQName, exceptionState)) 909 return; 910 911 prefixedAttributes.append(Attribute(parsedName, namespaceURI)); 912 } 913 } 914 915 struct _xmlSAX2Attributes { 916 const xmlChar* localname; 917 const xmlChar* prefix; 918 const xmlChar* uri; 919 const xmlChar* value; 920 const xmlChar* end; 921 }; 922 typedef struct _xmlSAX2Attributes xmlSAX2Attributes; 923 924 static inline void handleElementAttributes(Vector<Attribute>& prefixedAttributes, const xmlChar** libxmlAttributes, int nbAttributes, ExceptionState& exceptionState) 925 { 926 xmlSAX2Attributes* attributes = reinterpret_cast<xmlSAX2Attributes*>(libxmlAttributes); 927 for (int i = 0; i < nbAttributes; i++) { 928 int valueLength = static_cast<int>(attributes[i].end - attributes[i].value); 929 AtomicString attrValue = toAtomicString(attributes[i].value, valueLength); 930 String attrPrefix = toString(attributes[i].prefix); 931 AtomicString attrURI = attrPrefix.isEmpty() ? AtomicString() : toAtomicString(attributes[i].uri); 932 AtomicString attrQName = attrPrefix.isEmpty() ? toAtomicString(attributes[i].localname) : attrPrefix + ":" + toString(attributes[i].localname); 933 934 QualifiedName parsedName = anyName; 935 if (!Element::parseAttributeName(parsedName, attrURI, attrQName, exceptionState)) 936 return; 937 938 prefixedAttributes.append(Attribute(parsedName, attrValue)); 939 } 940 } 941 942 void XMLDocumentParser::startElementNs(const AtomicString& localName, const AtomicString& prefix, const AtomicString& uri, int nbNamespaces, 943 const xmlChar** libxmlNamespaces, int nbAttributes, int nbDefaulted, const xmlChar** libxmlAttributes) 944 { 945 if (isStopped()) 946 return; 947 948 if (m_parserPaused) { 949 m_pendingCallbacks.append(adoptPtr(new PendingStartElementNSCallback(localName, prefix, uri, nbNamespaces, libxmlNamespaces, 950 nbAttributes, nbDefaulted, libxmlAttributes))); 951 return; 952 } 953 954 exitText(); 955 956 AtomicString adjustedURI = uri; 957 if (m_parsingFragment && adjustedURI.isNull()) { 958 if (!prefix.isNull()) 959 adjustedURI = m_prefixToNamespaceMap.get(prefix); 960 else 961 adjustedURI = m_defaultNamespaceURI; 962 } 963 964 bool isFirstElement = !m_sawFirstElement; 965 m_sawFirstElement = true; 966 967 QualifiedName qName(prefix, localName, adjustedURI); 968 RefPtr<Element> newElement = m_currentNode->document().createElement(qName, true); 969 if (!newElement) { 970 stopParsing(); 971 return; 972 } 973 974 Vector<Attribute> prefixedAttributes; 975 TrackExceptionState exceptionState; 976 handleNamespaceAttributes(prefixedAttributes, libxmlNamespaces, nbNamespaces, exceptionState); 977 if (exceptionState.hadException()) { 978 setAttributes(newElement.get(), prefixedAttributes, parserContentPolicy()); 979 stopParsing(); 980 return; 981 } 982 983 handleElementAttributes(prefixedAttributes, libxmlAttributes, nbAttributes, exceptionState); 984 setAttributes(newElement.get(), prefixedAttributes, parserContentPolicy()); 985 if (exceptionState.hadException()) { 986 stopParsing(); 987 return; 988 } 989 990 newElement->beginParsingChildren(); 991 992 ScriptLoader* scriptLoader = toScriptLoaderIfPossible(newElement.get()); 993 if (scriptLoader) 994 m_scriptStartPosition = textPosition(); 995 996 m_currentNode->parserAppendChild(newElement.get()); 997 998 if (newElement->hasTagName(HTMLNames::templateTag)) 999 pushCurrentNode(toHTMLTemplateElement(newElement.get())->content()); 1000 else 1001 pushCurrentNode(newElement.get()); 1002 1003 if (isHTMLHtmlElement(newElement.get())) 1004 toHTMLHtmlElement(newElement)->insertedByParser(); 1005 1006 if (!m_parsingFragment && isFirstElement && document()->frame()) 1007 document()->frame()->loader().dispatchDocumentElementAvailable(); 1008 } 1009 1010 void XMLDocumentParser::endElementNs() 1011 { 1012 if (isStopped()) 1013 return; 1014 1015 if (m_parserPaused) { 1016 m_pendingCallbacks.append(adoptPtr(new PendingEndElementNSCallback())); 1017 return; 1018 } 1019 1020 // JavaScript can detach the parser. Make sure this is not released 1021 // before the end of this method. 1022 RefPtr<XMLDocumentParser> protect(this); 1023 1024 exitText(); 1025 1026 RefPtr<ContainerNode> n = m_currentNode; 1027 n->finishParsingChildren(); 1028 1029 if (!scriptingContentIsAllowed(parserContentPolicy()) && n->isElementNode() && toScriptLoaderIfPossible(toElement(n))) { 1030 popCurrentNode(); 1031 n->remove(IGNORE_EXCEPTION); 1032 return; 1033 } 1034 1035 if (!n->isElementNode() || !m_view) { 1036 popCurrentNode(); 1037 return; 1038 } 1039 1040 Element* element = toElement(n); 1041 1042 // The element's parent may have already been removed from document. 1043 // Parsing continues in this case, but scripts aren't executed. 1044 if (!element->inDocument()) { 1045 popCurrentNode(); 1046 return; 1047 } 1048 1049 ScriptLoader* scriptLoader = toScriptLoaderIfPossible(element); 1050 if (!scriptLoader) { 1051 popCurrentNode(); 1052 return; 1053 } 1054 1055 // Don't load external scripts for standalone documents (for now). 1056 ASSERT(!m_pendingScript); 1057 m_requestingScript = true; 1058 1059 if (scriptLoader->prepareScript(m_scriptStartPosition, ScriptLoader::AllowLegacyTypeInTypeAttribute)) { 1060 // FIXME: Script execution should be shared between 1061 // the libxml2 and Qt XMLDocumentParser implementations. 1062 1063 if (scriptLoader->readyToBeParserExecuted()) { 1064 scriptLoader->executeScript(ScriptSourceCode(scriptLoader->scriptContent(), document()->url(), m_scriptStartPosition)); 1065 } else if (scriptLoader->willBeParserExecuted()) { 1066 m_pendingScript = scriptLoader->resource(); 1067 m_scriptElement = element; 1068 m_pendingScript->addClient(this); 1069 1070 // m_pendingScript will be 0 if script was already loaded and addClient() executed it. 1071 if (m_pendingScript) 1072 pauseParsing(); 1073 } else { 1074 m_scriptElement = 0; 1075 } 1076 1077 // JavaScript may have detached the parser 1078 if (isDetached()) 1079 return; 1080 } 1081 m_requestingScript = false; 1082 popCurrentNode(); 1083 } 1084 1085 void XMLDocumentParser::characters(const xmlChar* chars, int length) 1086 { 1087 if (isStopped()) 1088 return; 1089 1090 if (m_parserPaused) { 1091 m_pendingCallbacks.append(adoptPtr(new PendingCharactersCallback(chars, length))); 1092 return; 1093 } 1094 1095 if (!m_leafTextNode) 1096 enterText(); 1097 m_bufferedText.append(chars, length); 1098 } 1099 1100 void XMLDocumentParser::error(XMLErrors::ErrorType type, const char* message, va_list args) 1101 { 1102 if (isStopped()) 1103 return; 1104 1105 #if HAVE(VASPRINTF) 1106 char* formattedMessage; 1107 if (vasprintf(&formattedMessage, message, args) == -1) 1108 return; 1109 #else 1110 char formattedMessage[1024]; 1111 vsnprintf(formattedMessage, sizeof(formattedMessage) - 1, message, args); 1112 #endif 1113 1114 if (m_parserPaused) { 1115 m_pendingCallbacks.append(adoptPtr(new PendingErrorCallback(type, reinterpret_cast<const xmlChar*>(formattedMessage), lineNumber(), columnNumber()))); 1116 #if HAVE(VASPRINTF) 1117 free(formattedMessage); 1118 #endif 1119 return; 1120 } 1121 1122 handleError(type, formattedMessage, textPosition()); 1123 1124 #if HAVE(VASPRINTF) 1125 free(formattedMessage); 1126 #endif 1127 } 1128 1129 void XMLDocumentParser::processingInstruction(const String& target, const String& data) 1130 { 1131 if (isStopped()) 1132 return; 1133 1134 if (m_parserPaused) { 1135 m_pendingCallbacks.append(adoptPtr(new PendingProcessingInstructionCallback(target ,data))); 1136 return; 1137 } 1138 1139 exitText(); 1140 1141 // ### handle exceptions 1142 TrackExceptionState exceptionState; 1143 RefPtr<ProcessingInstruction> pi = m_currentNode->document().createProcessingInstruction(target, data, exceptionState); 1144 if (exceptionState.hadException()) 1145 return; 1146 1147 pi->setCreatedByParser(true); 1148 1149 m_currentNode->parserAppendChild(pi.get()); 1150 1151 pi->finishParsingChildren(); 1152 1153 if (pi->isCSS()) 1154 m_sawCSS = true; 1155 1156 if (!RuntimeEnabledFeatures::xsltEnabled()) 1157 return; 1158 1159 m_sawXSLTransform = !m_sawFirstElement && pi->isXSL(); 1160 if (m_sawXSLTransform && !document()->transformSourceDocument()) { 1161 // This behavior is very tricky. We call stopParsing() here because we want to stop processing the document 1162 // until we're ready to apply the transform, but we actually still want to be fed decoded string pieces to 1163 // accumulate in m_originalSourceForTransform. So, we call stopParsing() here and 1164 // check isStopped() in element callbacks. 1165 // FIXME: This contradicts the contract of DocumentParser. 1166 stopParsing(); 1167 } 1168 } 1169 1170 void XMLDocumentParser::cdataBlock(const String& text) 1171 { 1172 if (isStopped()) 1173 return; 1174 1175 if (m_parserPaused) { 1176 m_pendingCallbacks.append(adoptPtr(new PendingCDATABlockCallback(text))); 1177 return; 1178 } 1179 1180 exitText(); 1181 1182 RefPtr<CDATASection> newNode = CDATASection::create(m_currentNode->document(), text); 1183 m_currentNode->parserAppendChild(newNode.get()); 1184 } 1185 1186 void XMLDocumentParser::comment(const String& text) 1187 { 1188 if (isStopped()) 1189 return; 1190 1191 if (m_parserPaused) { 1192 m_pendingCallbacks.append(adoptPtr(new PendingCommentCallback(text))); 1193 return; 1194 } 1195 1196 exitText(); 1197 1198 RefPtr<Comment> newNode = Comment::create(m_currentNode->document(), text); 1199 m_currentNode->parserAppendChild(newNode.get()); 1200 } 1201 1202 enum StandaloneInfo { 1203 StandaloneUnspecified = -2, 1204 NoXMlDeclaration, 1205 StandaloneNo, 1206 StandaloneYes 1207 }; 1208 1209 void XMLDocumentParser::startDocument(const String& version, const String& encoding, int standalone) 1210 { 1211 StandaloneInfo standaloneInfo = (StandaloneInfo)standalone; 1212 if (standaloneInfo == NoXMlDeclaration) { 1213 document()->setHasXMLDeclaration(false); 1214 return; 1215 } 1216 1217 if (!version.isNull()) 1218 document()->setXMLVersion(version, ASSERT_NO_EXCEPTION); 1219 if (standalone != StandaloneUnspecified) 1220 document()->setXMLStandalone(standaloneInfo == StandaloneYes, ASSERT_NO_EXCEPTION); 1221 if (!encoding.isNull()) 1222 document()->setXMLEncoding(encoding); 1223 document()->setHasXMLDeclaration(true); 1224 } 1225 1226 void XMLDocumentParser::endDocument() 1227 { 1228 exitText(); 1229 } 1230 1231 void XMLDocumentParser::internalSubset(const String& name, const String& externalID, const String& systemID) 1232 { 1233 if (isStopped()) 1234 return; 1235 1236 if (m_parserPaused) { 1237 m_pendingCallbacks.append(adoptPtr(new PendingInternalSubsetCallback(name, externalID, systemID))); 1238 return; 1239 } 1240 1241 if (document()) 1242 document()->parserAppendChild(DocumentType::create(document(), name, externalID, systemID)); 1243 } 1244 1245 static inline XMLDocumentParser* getParser(void* closure) 1246 { 1247 xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure); 1248 return static_cast<XMLDocumentParser*>(ctxt->_private); 1249 } 1250 1251 static void startElementNsHandler(void* closure, const xmlChar* localName, const xmlChar* prefix, const xmlChar* uri, int nbNamespaces, const xmlChar** namespaces, int nbAttributes, int nbDefaulted, const xmlChar** libxmlAttributes) 1252 { 1253 getParser(closure)->startElementNs(toAtomicString(localName), toAtomicString(prefix), toAtomicString(uri), nbNamespaces, namespaces, nbAttributes, nbDefaulted, libxmlAttributes); 1254 } 1255 1256 static void endElementNsHandler(void* closure, const xmlChar*, const xmlChar*, const xmlChar*) 1257 { 1258 getParser(closure)->endElementNs(); 1259 } 1260 1261 static void charactersHandler(void* closure, const xmlChar* chars, int length) 1262 { 1263 getParser(closure)->characters(chars, length); 1264 } 1265 1266 static void processingInstructionHandler(void* closure, const xmlChar* target, const xmlChar* data) 1267 { 1268 getParser(closure)->processingInstruction(toString(target), toString(data)); 1269 } 1270 1271 static void cdataBlockHandler(void* closure, const xmlChar* text, int length) 1272 { 1273 getParser(closure)->cdataBlock(toString(text, length)); 1274 } 1275 1276 static void commentHandler(void* closure, const xmlChar* text) 1277 { 1278 getParser(closure)->comment(toString(text)); 1279 } 1280 1281 WTF_ATTRIBUTE_PRINTF(2, 3) 1282 static void warningHandler(void* closure, const char* message, ...) 1283 { 1284 va_list args; 1285 va_start(args, message); 1286 getParser(closure)->error(XMLErrors::warning, message, args); 1287 va_end(args); 1288 } 1289 1290 WTF_ATTRIBUTE_PRINTF(2, 3) 1291 static void fatalErrorHandler(void* closure, const char* message, ...) 1292 { 1293 va_list args; 1294 va_start(args, message); 1295 getParser(closure)->error(XMLErrors::fatal, message, args); 1296 va_end(args); 1297 } 1298 1299 WTF_ATTRIBUTE_PRINTF(2, 3) 1300 static void normalErrorHandler(void* closure, const char* message, ...) 1301 { 1302 va_list args; 1303 va_start(args, message); 1304 getParser(closure)->error(XMLErrors::nonFatal, message, args); 1305 va_end(args); 1306 } 1307 1308 // Using a static entity and marking it XML_INTERNAL_PREDEFINED_ENTITY is 1309 // a hack to avoid malloc/free. Using a global variable like this could cause trouble 1310 // if libxml implementation details were to change 1311 static xmlChar sharedXHTMLEntityResult[9] = {0, 0, 0, 0, 0, 0, 0, 0, 0}; 1312 1313 static xmlEntityPtr sharedXHTMLEntity() 1314 { 1315 static xmlEntity entity; 1316 if (!entity.type) { 1317 entity.type = XML_ENTITY_DECL; 1318 entity.orig = sharedXHTMLEntityResult; 1319 entity.content = sharedXHTMLEntityResult; 1320 entity.etype = XML_INTERNAL_PREDEFINED_ENTITY; 1321 } 1322 return &entity; 1323 } 1324 1325 static size_t convertUTF16EntityToUTF8(const UChar* utf16Entity, size_t numberOfCodeUnits, char* target, size_t targetSize) 1326 { 1327 const char* originalTarget = target; 1328 WTF::Unicode::ConversionResult conversionResult = WTF::Unicode::convertUTF16ToUTF8(&utf16Entity, 1329 utf16Entity + numberOfCodeUnits, &target, target + targetSize); 1330 if (conversionResult != WTF::Unicode::conversionOK) 1331 return 0; 1332 1333 // Even though we must pass the length, libxml expects the entity string to be null terminated. 1334 ASSERT(target > originalTarget + 1); 1335 *target = '\0'; 1336 return target - originalTarget; 1337 } 1338 1339 static xmlEntityPtr getXHTMLEntity(const xmlChar* name) 1340 { 1341 UChar utf16DecodedEntity[4]; 1342 size_t numberOfCodeUnits = decodeNamedEntityToUCharArray(reinterpret_cast<const char*>(name), utf16DecodedEntity); 1343 if (!numberOfCodeUnits) 1344 return 0; 1345 1346 ASSERT(numberOfCodeUnits <= 4); 1347 size_t entityLengthInUTF8 = convertUTF16EntityToUTF8(utf16DecodedEntity, numberOfCodeUnits, 1348 reinterpret_cast<char*>(sharedXHTMLEntityResult), WTF_ARRAY_LENGTH(sharedXHTMLEntityResult)); 1349 if (!entityLengthInUTF8) 1350 return 0; 1351 1352 xmlEntityPtr entity = sharedXHTMLEntity(); 1353 entity->length = entityLengthInUTF8; 1354 entity->name = name; 1355 return entity; 1356 } 1357 1358 static xmlEntityPtr getEntityHandler(void* closure, const xmlChar* name) 1359 { 1360 xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure); 1361 xmlEntityPtr ent = xmlGetPredefinedEntity(name); 1362 if (ent) { 1363 ent->etype = XML_INTERNAL_PREDEFINED_ENTITY; 1364 return ent; 1365 } 1366 1367 ent = xmlGetDocEntity(ctxt->myDoc, name); 1368 if (!ent && getParser(closure)->isXHTMLDocument()) { 1369 ent = getXHTMLEntity(name); 1370 if (ent) 1371 ent->etype = XML_INTERNAL_GENERAL_ENTITY; 1372 } 1373 1374 return ent; 1375 } 1376 1377 static void startDocumentHandler(void* closure) 1378 { 1379 xmlParserCtxt* ctxt = static_cast<xmlParserCtxt*>(closure); 1380 XMLDocumentParser* parser = getParser(closure); 1381 switchEncoding(ctxt, parser->isCurrentlyParsing8BitChunk()); 1382 parser->startDocument(toString(ctxt->version), toString(ctxt->encoding), ctxt->standalone); 1383 xmlSAX2StartDocument(closure); 1384 } 1385 1386 static void endDocumentHandler(void* closure) 1387 { 1388 getParser(closure)->endDocument(); 1389 xmlSAX2EndDocument(closure); 1390 } 1391 1392 static void internalSubsetHandler(void* closure, const xmlChar* name, const xmlChar* externalID, const xmlChar* systemID) 1393 { 1394 getParser(closure)->internalSubset(toString(name), toString(externalID), toString(systemID)); 1395 xmlSAX2InternalSubset(closure, name, externalID, systemID); 1396 } 1397 1398 static void externalSubsetHandler(void* closure, const xmlChar*, const xmlChar* externalId, const xmlChar*) 1399 { 1400 String extId = toString(externalId); 1401 if ((extId == "-//W3C//DTD XHTML 1.0 Transitional//EN") 1402 || (extId == "-//W3C//DTD XHTML 1.1//EN") 1403 || (extId == "-//W3C//DTD XHTML 1.0 Strict//EN") 1404 || (extId == "-//W3C//DTD XHTML 1.0 Frameset//EN") 1405 || (extId == "-//W3C//DTD XHTML Basic 1.0//EN") 1406 || (extId == "-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN") 1407 || (extId == "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN") 1408 || (extId == "-//WAPFORUM//DTD XHTML Mobile 1.0//EN") 1409 || (extId == "-//WAPFORUM//DTD XHTML Mobile 1.1//EN") 1410 || (extId == "-//WAPFORUM//DTD XHTML Mobile 1.2//EN")) 1411 getParser(closure)->setIsXHTMLDocument(true); // controls if we replace entities or not. 1412 } 1413 1414 static void ignorableWhitespaceHandler(void*, const xmlChar*, int) 1415 { 1416 // nothing to do, but we need this to work around a crasher 1417 // http://bugzilla.gnome.org/show_bug.cgi?id=172255 1418 // http://bugs.webkit.org/show_bug.cgi?id=5792 1419 } 1420 1421 void XMLDocumentParser::initializeParserContext(const CString& chunk) 1422 { 1423 xmlSAXHandler sax; 1424 memset(&sax, 0, sizeof(sax)); 1425 1426 sax.error = normalErrorHandler; 1427 sax.fatalError = fatalErrorHandler; 1428 sax.characters = charactersHandler; 1429 sax.processingInstruction = processingInstructionHandler; 1430 sax.cdataBlock = cdataBlockHandler; 1431 sax.comment = commentHandler; 1432 sax.warning = warningHandler; 1433 sax.startElementNs = startElementNsHandler; 1434 sax.endElementNs = endElementNsHandler; 1435 sax.getEntity = getEntityHandler; 1436 sax.startDocument = startDocumentHandler; 1437 sax.endDocument = endDocumentHandler; 1438 sax.internalSubset = internalSubsetHandler; 1439 sax.externalSubset = externalSubsetHandler; 1440 sax.ignorableWhitespace = ignorableWhitespaceHandler; 1441 sax.entityDecl = xmlSAX2EntityDecl; 1442 sax.initialized = XML_SAX2_MAGIC; 1443 DocumentParser::startParsing(); 1444 m_sawError = false; 1445 m_sawCSS = false; 1446 m_sawXSLTransform = false; 1447 m_sawFirstElement = false; 1448 1449 XMLDocumentParserScope scope(document()->fetcher()); 1450 if (m_parsingFragment) 1451 m_context = XMLParserContext::createMemoryParser(&sax, this, chunk); 1452 else { 1453 ASSERT(!chunk.data()); 1454 m_context = XMLParserContext::createStringParser(&sax, this); 1455 } 1456 } 1457 1458 void XMLDocumentParser::doEnd() 1459 { 1460 if (!isStopped()) { 1461 if (m_context) { 1462 // Tell libxml we're done. 1463 { 1464 XMLDocumentParserScope scope(document()->fetcher()); 1465 finishParsing(context()); 1466 } 1467 1468 m_context = 0; 1469 } 1470 } 1471 1472 bool xmlViewerMode = !m_sawError && !m_sawCSS && !m_sawXSLTransform && hasNoStyleInformation(document()); 1473 if (xmlViewerMode) { 1474 XMLTreeViewer xmlTreeViewer(document()); 1475 xmlTreeViewer.transformDocumentToTreeView(); 1476 } else if (m_sawXSLTransform) { 1477 xmlDocPtr doc = xmlDocPtrForString(document()->fetcher(), m_originalSourceForTransform.toString(), document()->url().string()); 1478 document()->setTransformSource(adoptPtr(new TransformSource(doc))); 1479 1480 document()->setParsing(false); // Make the document think it's done, so it will apply XSL stylesheets. 1481 document()->styleResolverChanged(RecalcStyleImmediately); 1482 1483 // styleResolverChanged() call can detach the parser and null out its document. 1484 // In that case, we just bail out. 1485 if (isDetached()) 1486 return; 1487 1488 document()->setParsing(true); 1489 DocumentParser::stopParsing(); 1490 } 1491 } 1492 1493 xmlDocPtr xmlDocPtrForString(ResourceFetcher* fetcher, const String& source, const String& url) 1494 { 1495 if (source.isEmpty()) 1496 return 0; 1497 // Parse in a single chunk into an xmlDocPtr 1498 // FIXME: Hook up error handlers so that a failure to parse the main document results in 1499 // good error messages. 1500 XMLDocumentParserScope scope(fetcher, errorFunc, 0); 1501 XMLParserInput input(source); 1502 return xmlReadMemory(input.data(), input.size(), url.latin1().data(), input.encoding(), XSLT_PARSE_OPTIONS); 1503 } 1504 1505 OrdinalNumber XMLDocumentParser::lineNumber() const 1506 { 1507 return OrdinalNumber::fromOneBasedInt(context() ? context()->input->line : 1); 1508 } 1509 1510 OrdinalNumber XMLDocumentParser::columnNumber() const 1511 { 1512 return OrdinalNumber::fromOneBasedInt(context() ? context()->input->col : 1); 1513 } 1514 1515 TextPosition XMLDocumentParser::textPosition() const 1516 { 1517 xmlParserCtxtPtr context = this->context(); 1518 if (!context) 1519 return TextPosition::minimumPosition(); 1520 return TextPosition(OrdinalNumber::fromOneBasedInt(context->input->line), 1521 OrdinalNumber::fromOneBasedInt(context->input->col)); 1522 } 1523 1524 void XMLDocumentParser::stopParsing() 1525 { 1526 DocumentParser::stopParsing(); 1527 if (context()) 1528 xmlStopParser(context()); 1529 } 1530 1531 void XMLDocumentParser::resumeParsing() 1532 { 1533 ASSERT(!isDetached()); 1534 ASSERT(m_parserPaused); 1535 1536 m_parserPaused = false; 1537 1538 // First, execute any pending callbacks 1539 while (!m_pendingCallbacks.isEmpty()) { 1540 OwnPtr<PendingCallback> callback = m_pendingCallbacks.takeFirst(); 1541 callback->call(this); 1542 1543 // A callback paused the parser 1544 if (m_parserPaused) 1545 return; 1546 } 1547 1548 // Then, write any pending data 1549 SegmentedString rest = m_pendingSrc; 1550 m_pendingSrc.clear(); 1551 // There is normally only one string left, so toString() shouldn't copy. 1552 // In any case, the XML parser runs on the main thread and it's OK if 1553 // the passed string has more than one reference. 1554 append(rest.toString().impl()); 1555 1556 // Finally, if finish() has been called and write() didn't result 1557 // in any further callbacks being queued, call end() 1558 if (m_finishCalled && m_pendingCallbacks.isEmpty()) 1559 end(); 1560 } 1561 1562 bool XMLDocumentParser::appendFragmentSource(const String& chunk) 1563 { 1564 ASSERT(!m_context); 1565 ASSERT(m_parsingFragment); 1566 1567 CString chunkAsUtf8 = chunk.utf8(); 1568 1569 // libxml2 takes an int for a length, and therefore can't handle XML chunks larger than 2 GiB. 1570 if (chunkAsUtf8.length() > INT_MAX) 1571 return false; 1572 1573 initializeParserContext(chunkAsUtf8); 1574 xmlParseContent(context()); 1575 endDocument(); // Close any open text nodes. 1576 1577 // FIXME: If this code is actually needed, it should probably move to finish() 1578 // XMLDocumentParserQt has a similar check (m_stream.error() == QXmlStreamReader::PrematureEndOfDocumentError) in doEnd(). 1579 // Check if all the chunk has been processed. 1580 long bytesProcessed = xmlByteConsumed(context()); 1581 if (bytesProcessed == -1 || ((unsigned long)bytesProcessed) != chunkAsUtf8.length()) { 1582 // FIXME: I don't believe we can hit this case without also having seen an error or a null byte. 1583 // If we hit this ASSERT, we've found a test case which demonstrates the need for this code. 1584 ASSERT(m_sawError || (bytesProcessed >= 0 && !chunkAsUtf8.data()[bytesProcessed])); 1585 return false; 1586 } 1587 1588 // No error if the chunk is well formed or it is not but we have no error. 1589 return context()->wellFormed || !xmlCtxtGetLastError(context()); 1590 } 1591 1592 // -------------------------------- 1593 1594 struct AttributeParseState { 1595 HashMap<String, String> attributes; 1596 bool gotAttributes; 1597 }; 1598 1599 static void attributesStartElementNsHandler(void* closure, const xmlChar* xmlLocalName, const xmlChar* /*xmlPrefix*/, 1600 const xmlChar* /*xmlURI*/, int /*nbNamespaces*/, const xmlChar** /*namespaces*/, 1601 int nbAttributes, int /*nbDefaulted*/, const xmlChar** libxmlAttributes) 1602 { 1603 if (strcmp(reinterpret_cast<const char*>(xmlLocalName), "attrs") != 0) 1604 return; 1605 1606 xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure); 1607 AttributeParseState* state = static_cast<AttributeParseState*>(ctxt->_private); 1608 1609 state->gotAttributes = true; 1610 1611 xmlSAX2Attributes* attributes = reinterpret_cast<xmlSAX2Attributes*>(libxmlAttributes); 1612 for (int i = 0; i < nbAttributes; i++) { 1613 String attrLocalName = toString(attributes[i].localname); 1614 int valueLength = (int) (attributes[i].end - attributes[i].value); 1615 String attrValue = toString(attributes[i].value, valueLength); 1616 String attrPrefix = toString(attributes[i].prefix); 1617 String attrQName = attrPrefix.isEmpty() ? attrLocalName : attrPrefix + ":" + attrLocalName; 1618 1619 state->attributes.set(attrQName, attrValue); 1620 } 1621 } 1622 1623 HashMap<String, String> parseAttributes(const String& string, bool& attrsOK) 1624 { 1625 AttributeParseState state; 1626 state.gotAttributes = false; 1627 1628 xmlSAXHandler sax; 1629 memset(&sax, 0, sizeof(sax)); 1630 sax.startElementNs = attributesStartElementNsHandler; 1631 sax.initialized = XML_SAX2_MAGIC; 1632 RefPtr<XMLParserContext> parser = XMLParserContext::createStringParser(&sax, &state); 1633 String parseString = "<?xml version=\"1.0\"?><attrs " + string + " />"; 1634 parseChunk(parser->context(), parseString); 1635 finishParsing(parser->context()); 1636 attrsOK = state.gotAttributes; 1637 return state.attributes; 1638 } 1639 1640 } // namespace WebCore 1641