1 /* 2 * Copyright (C) 2000 Peter Kelly (pmk (at) post.com) 3 * Copyright (C) 2005, 2006, 2008 Apple Inc. All rights reserved. 4 * Copyright (C) 2006 Alexey Proskuryakov (ap (at) webkit.org) 5 * Copyright (C) 2007 Samuel Weinig (sam (at) webkit.org) 6 * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies) 7 * Copyright (C) 2008 Holger Hans Peter Freyther 8 * Copyright (C) 2008 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/) 9 * 10 * This library is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU Library General Public 12 * License as published by the Free Software Foundation; either 13 * version 2 of the License, or (at your option) any later version. 14 * 15 * This library is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 * Library General Public License for more details. 19 * 20 * You should have received a copy of the GNU Library General Public License 21 * along with this library; see the file COPYING.LIB. If not, write to 22 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 23 * Boston, MA 02110-1301, USA. 24 */ 25 26 #include "config.h" 27 #include "core/xml/parser/XMLDocumentParser.h" 28 29 #include <libxml/parser.h> 30 #include <libxml/parserInternals.h> 31 #include <libxslt/xslt.h> 32 #include <wtf/StringExtras.h> 33 #include <wtf/text/CString.h> 34 #include <wtf/Threading.h> 35 #include <wtf/unicode/UTF8.h> 36 #include <wtf/UnusedParam.h> 37 #include <wtf/Vector.h> 38 #include "HTMLNames.h" 39 #include "XMLNSNames.h" 40 #include "bindings/v8/ExceptionState.h" 41 #include "bindings/v8/ExceptionStatePlaceholder.h" 42 #include "bindings/v8/ScriptController.h" 43 #include "bindings/v8/ScriptSourceCode.h" 44 #include "core/dom/CDATASection.h" 45 #include "core/dom/Comment.h" 46 #include "core/dom/Document.h" 47 #include "core/dom/DocumentFragment.h" 48 #include "core/dom/DocumentType.h" 49 #include "core/dom/ProcessingInstruction.h" 50 #include "core/dom/ScriptLoader.h" 51 #include "core/dom/TransformSource.h" 52 #include "core/html/HTMLHtmlElement.h" 53 #include "core/html/HTMLTemplateElement.h" 54 #include "core/html/parser/HTMLEntityParser.h" 55 #include "core/loader/FrameLoader.h" 56 #include "core/loader/ImageLoader.h" 57 #include "core/loader/TextResourceDecoder.h" 58 #include "core/loader/cache/ResourceFetcher.h" 59 #include "core/loader/cache/ScriptResource.h" 60 #include "core/page/Frame.h" 61 #include "core/page/UseCounter.h" 62 #include "core/platform/network/ResourceError.h" 63 #include "core/platform/network/ResourceRequest.h" 64 #include "core/platform/network/ResourceResponse.h" 65 #include "core/xml/XMLErrors.h" 66 #include "core/xml/XMLTreeViewer.h" 67 #include "core/xml/parser/XMLDocumentParserScope.h" 68 #include "core/xml/parser/XMLParserInput.h" 69 #include "weborigin/SecurityOrigin.h" 70 #include "wtf/TemporaryChange.h" 71 72 using namespace std; 73 74 namespace WebCore { 75 76 using namespace HTMLNames; 77 78 // FIXME: HTMLConstructionSite has a limit of 512, should these match? 79 static const unsigned maxXMLTreeDepth = 5000; 80 81 static inline String toString(const xmlChar* string, size_t length) 82 { 83 return String::fromUTF8(reinterpret_cast<const char*>(string), length); 84 } 85 86 static inline String toString(const xmlChar* string) 87 { 88 return String::fromUTF8(reinterpret_cast<const char*>(string)); 89 } 90 91 static inline AtomicString toAtomicString(const xmlChar* string, size_t length) 92 { 93 return AtomicString::fromUTF8(reinterpret_cast<const char*>(string), length); 94 } 95 96 static inline AtomicString toAtomicString(const xmlChar* string) 97 { 98 return AtomicString::fromUTF8(reinterpret_cast<const char*>(string)); 99 } 100 101 static inline bool hasNoStyleInformation(Document* document) 102 { 103 if (document->sawElementsInKnownNamespaces() || document->transformSourceDocument()) 104 return false; 105 106 if (!document->frame() || !document->frame()->page()) 107 return false; 108 109 if (document->frame()->tree()->parent()) 110 return false; // This document is not in a top frame 111 112 return true; 113 } 114 115 class PendingStartElementNSCallback FINAL : public XMLDocumentParser::PendingCallback { 116 public: 117 PendingStartElementNSCallback(const AtomicString& localName, const AtomicString& prefix, const AtomicString& uri, 118 int namespaceCount, const xmlChar** namespaces, int attributeCount, int defaultedCount, const xmlChar** attributes) 119 : m_localName(localName) 120 , m_prefix(prefix) 121 , m_uri(uri) 122 , m_namespaceCount(namespaceCount) 123 , m_attributeCount(attributeCount) 124 , m_defaultedCount(defaultedCount) 125 { 126 m_namespaces = static_cast<xmlChar**>(xmlMalloc(sizeof(xmlChar*) * namespaceCount * 2)); 127 for (int i = 0; i < namespaceCount * 2 ; i++) 128 m_namespaces[i] = xmlStrdup(namespaces[i]); 129 m_attributes = static_cast<xmlChar**>(xmlMalloc(sizeof(xmlChar*) * attributeCount * 5)); 130 for (int i = 0; i < attributeCount; i++) { 131 // Each attribute has 5 elements in the array: 132 // name, prefix, uri, value and an end pointer. 133 for (int j = 0; j < 3; j++) 134 m_attributes[i * 5 + j] = xmlStrdup(attributes[i * 5 + j]); 135 int length = attributes[i * 5 + 4] - attributes[i * 5 + 3]; 136 m_attributes[i * 5 + 3] = xmlStrndup(attributes[i * 5 + 3], length); 137 m_attributes[i * 5 + 4] = m_attributes[i * 5 + 3] + length; 138 } 139 } 140 141 virtual ~PendingStartElementNSCallback() 142 { 143 for (int i = 0; i < m_namespaceCount * 2; i++) 144 xmlFree(m_namespaces[i]); 145 xmlFree(m_namespaces); 146 for (int i = 0; i < m_attributeCount; i++) 147 for (int j = 0; j < 4; j++) 148 xmlFree(m_attributes[i * 5 + j]); 149 xmlFree(m_attributes); 150 } 151 152 virtual void call(XMLDocumentParser* parser) OVERRIDE 153 { 154 parser->startElementNs(m_localName, m_prefix, m_uri, 155 m_namespaceCount, const_cast<const xmlChar**>(m_namespaces), 156 m_attributeCount, m_defaultedCount, const_cast<const xmlChar**>(m_attributes)); 157 } 158 159 private: 160 AtomicString m_localName; 161 AtomicString m_prefix; 162 AtomicString m_uri; 163 int m_namespaceCount; 164 xmlChar** m_namespaces; 165 int m_attributeCount; 166 int m_defaultedCount; 167 xmlChar** m_attributes; 168 }; 169 170 class PendingEndElementNSCallback FINAL : public XMLDocumentParser::PendingCallback { 171 public: 172 virtual void call(XMLDocumentParser* parser) OVERRIDE 173 { 174 parser->endElementNs(); 175 } 176 }; 177 178 class PendingCharactersCallback FINAL : public XMLDocumentParser::PendingCallback { 179 public: 180 PendingCharactersCallback(const xmlChar* chars, int length) 181 : m_chars(xmlStrndup(chars, length)) 182 , m_length(length) 183 { 184 } 185 186 virtual ~PendingCharactersCallback() 187 { 188 xmlFree(m_chars); 189 } 190 191 virtual void call(XMLDocumentParser* parser) OVERRIDE 192 { 193 parser->characters(m_chars, m_length); 194 } 195 196 private: 197 xmlChar* m_chars; 198 int m_length; 199 }; 200 201 class PendingProcessingInstructionCallback FINAL : public XMLDocumentParser::PendingCallback { 202 public: 203 PendingProcessingInstructionCallback(const String& target, const String& data) 204 : m_target(target) 205 , m_data(data) 206 { 207 } 208 209 virtual void call(XMLDocumentParser* parser) OVERRIDE 210 { 211 parser->processingInstruction(m_target, m_data); 212 } 213 214 private: 215 String m_target; 216 String m_data; 217 }; 218 219 class PendingCDATABlockCallback FINAL : public XMLDocumentParser::PendingCallback { 220 public: 221 explicit PendingCDATABlockCallback(const String& text) : m_text(text) { } 222 223 virtual void call(XMLDocumentParser* parser) OVERRIDE 224 { 225 parser->cdataBlock(m_text); 226 } 227 228 private: 229 String m_text; 230 }; 231 232 class PendingCommentCallback FINAL : public XMLDocumentParser::PendingCallback { 233 public: 234 explicit PendingCommentCallback(const String& text) : m_text(text) { } 235 236 virtual void call(XMLDocumentParser* parser) OVERRIDE 237 { 238 parser->comment(m_text); 239 } 240 241 private: 242 String m_text; 243 }; 244 245 class PendingInternalSubsetCallback FINAL : public XMLDocumentParser::PendingCallback { 246 public: 247 PendingInternalSubsetCallback(const String& name, const String& externalID, const String& systemID) 248 : m_name(name) 249 , m_externalID(externalID) 250 , m_systemID(systemID) 251 { 252 } 253 254 virtual void call(XMLDocumentParser* parser) OVERRIDE 255 { 256 parser->internalSubset(m_name, m_externalID, m_systemID); 257 } 258 259 private: 260 String m_name; 261 String m_externalID; 262 String m_systemID; 263 }; 264 265 class PendingErrorCallback FINAL : public XMLDocumentParser::PendingCallback { 266 public: 267 PendingErrorCallback(XMLErrors::ErrorType type, const xmlChar* message, OrdinalNumber lineNumber, OrdinalNumber columnNumber) 268 : m_type(type) 269 , m_message(xmlStrdup(message)) 270 , m_lineNumber(lineNumber) 271 , m_columnNumber(columnNumber) 272 { 273 } 274 275 virtual ~PendingErrorCallback() 276 { 277 xmlFree(m_message); 278 } 279 280 virtual void call(XMLDocumentParser* parser) OVERRIDE 281 { 282 parser->handleError(m_type, reinterpret_cast<char*>(m_message), TextPosition(m_lineNumber, m_columnNumber)); 283 } 284 285 private: 286 XMLErrors::ErrorType m_type; 287 xmlChar* m_message; 288 OrdinalNumber m_lineNumber; 289 OrdinalNumber m_columnNumber; 290 }; 291 292 void XMLDocumentParser::pushCurrentNode(ContainerNode* n) 293 { 294 ASSERT(n); 295 ASSERT(m_currentNode); 296 if (n != document()) 297 n->ref(); 298 m_currentNodeStack.append(m_currentNode); 299 m_currentNode = n; 300 if (m_currentNodeStack.size() > maxXMLTreeDepth) 301 handleError(XMLErrors::fatal, "Excessive node nesting.", textPosition()); 302 } 303 304 void XMLDocumentParser::popCurrentNode() 305 { 306 if (!m_currentNode) 307 return; 308 ASSERT(m_currentNodeStack.size()); 309 310 if (m_currentNode != document()) 311 m_currentNode->deref(); 312 313 m_currentNode = m_currentNodeStack.last(); 314 m_currentNodeStack.removeLast(); 315 } 316 317 void XMLDocumentParser::clearCurrentNodeStack() 318 { 319 if (m_currentNode && m_currentNode != document()) 320 m_currentNode->deref(); 321 m_currentNode = 0; 322 m_leafTextNode = 0; 323 324 if (m_currentNodeStack.size()) { // Aborted parsing. 325 for (size_t i = m_currentNodeStack.size() - 1; i != 0; --i) 326 m_currentNodeStack[i]->deref(); 327 if (m_currentNodeStack[0] && m_currentNodeStack[0] != document()) 328 m_currentNodeStack[0]->deref(); 329 m_currentNodeStack.clear(); 330 } 331 } 332 333 void XMLDocumentParser::insert(const SegmentedString&) 334 { 335 ASSERT_NOT_REACHED(); 336 } 337 338 void XMLDocumentParser::append(PassRefPtr<StringImpl> inputSource) 339 { 340 SegmentedString source(inputSource); 341 if (m_sawXSLTransform || !m_sawFirstElement) 342 m_originalSourceForTransform.append(source); 343 344 if (isStopped() || m_sawXSLTransform) 345 return; 346 347 if (m_parserPaused) { 348 m_pendingSrc.append(source); 349 return; 350 } 351 352 // JavaScript can detach the parser. Make sure this is not released 353 // before the end of this method. 354 RefPtr<XMLDocumentParser> protect(this); 355 356 doWrite(source.toString()); 357 358 if (isStopped()) 359 return; 360 361 if (document()->frame() && document()->frame()->script()->canExecuteScripts(NotAboutToExecuteScript)) 362 ImageLoader::dispatchPendingBeforeLoadEvents(); 363 } 364 365 void XMLDocumentParser::handleError(XMLErrors::ErrorType type, const char* formattedMessage, TextPosition position) 366 { 367 m_xmlErrors.handleError(type, formattedMessage, position); 368 if (type != XMLErrors::warning) 369 m_sawError = true; 370 if (type == XMLErrors::fatal) 371 stopParsing(); 372 } 373 374 void XMLDocumentParser::enterText() 375 { 376 ASSERT(m_bufferedText.size() == 0); 377 ASSERT(!m_leafTextNode); 378 m_leafTextNode = Text::create(m_currentNode->document(), ""); 379 m_currentNode->parserAppendChild(m_leafTextNode.get()); 380 } 381 382 void XMLDocumentParser::exitText() 383 { 384 if (isStopped()) 385 return; 386 387 if (!m_leafTextNode) 388 return; 389 390 m_leafTextNode->appendData(toString(m_bufferedText.data(), m_bufferedText.size())); 391 Vector<xmlChar> empty; 392 m_bufferedText.swap(empty); 393 394 if (m_view && m_leafTextNode->parentNode() && m_leafTextNode->parentNode()->attached() 395 && !m_leafTextNode->attached()) 396 m_leafTextNode->attach(); 397 398 m_leafTextNode = 0; 399 } 400 401 void XMLDocumentParser::detach() 402 { 403 clearCurrentNodeStack(); 404 ScriptableDocumentParser::detach(); 405 } 406 407 void XMLDocumentParser::end() 408 { 409 // XMLDocumentParserLibxml2 will do bad things to the document if doEnd() is called. 410 // I don't believe XMLDocumentParserQt needs doEnd called in the fragment case. 411 ASSERT(!m_parsingFragment); 412 413 doEnd(); 414 415 // doEnd() call above can detach the parser and null out its document. 416 // In that case, we just bail out. 417 if (isDetached()) 418 return; 419 420 // doEnd() could process a script tag, thus pausing parsing. 421 if (m_parserPaused) 422 return; 423 424 if (m_sawError) 425 insertErrorMessageBlock(); 426 else { 427 exitText(); 428 document()->styleResolverChanged(RecalcStyleImmediately); 429 } 430 431 if (isParsing()) 432 prepareToStopParsing(); 433 document()->setReadyState(Document::Interactive); 434 clearCurrentNodeStack(); 435 document()->finishedParsing(); 436 } 437 438 void XMLDocumentParser::finish() 439 { 440 // FIXME: We should ASSERT(!m_parserStopped) here, since it does not 441 // makes sense to call any methods on DocumentParser once it's been stopped. 442 // However, FrameLoader::stop calls DocumentParser::finish unconditionally. 443 444 if (m_parserPaused) 445 m_finishCalled = true; 446 else 447 end(); 448 } 449 450 void XMLDocumentParser::insertErrorMessageBlock() 451 { 452 m_xmlErrors.insertErrorMessageBlock(); 453 } 454 455 void XMLDocumentParser::notifyFinished(Resource* unusedResource) 456 { 457 ASSERT_UNUSED(unusedResource, unusedResource == m_pendingScript); 458 ASSERT(m_pendingScript->accessCount() > 0); 459 460 ScriptSourceCode sourceCode(m_pendingScript.get()); 461 bool errorOccurred = m_pendingScript->errorOccurred(); 462 bool wasCanceled = m_pendingScript->wasCanceled(); 463 464 m_pendingScript->removeClient(this); 465 m_pendingScript = 0; 466 467 RefPtr<Element> e = m_scriptElement; 468 m_scriptElement = 0; 469 470 ScriptLoader* scriptLoader = toScriptLoaderIfPossible(e.get()); 471 ASSERT(scriptLoader); 472 473 // JavaScript can detach this parser, make sure it's kept alive even if detached. 474 RefPtr<XMLDocumentParser> protect(this); 475 476 if (errorOccurred) 477 scriptLoader->dispatchErrorEvent(); 478 else if (!wasCanceled) { 479 scriptLoader->executeScript(sourceCode); 480 scriptLoader->dispatchLoadEvent(); 481 } 482 483 m_scriptElement = 0; 484 485 if (!isDetached() && !m_requestingScript) 486 resumeParsing(); 487 } 488 489 bool XMLDocumentParser::isWaitingForScripts() const 490 { 491 return m_pendingScript; 492 } 493 494 void XMLDocumentParser::pauseParsing() 495 { 496 if (m_parsingFragment) 497 return; 498 499 m_parserPaused = true; 500 } 501 502 bool XMLDocumentParser::parseDocumentFragment(const String& chunk, DocumentFragment* fragment, Element* contextElement, ParserContentPolicy parserContentPolicy) 503 { 504 if (!chunk.length()) 505 return true; 506 507 // FIXME: We need to implement the HTML5 XML Fragment parsing algorithm: 508 // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-xhtml-syntax.html#xml-fragment-parsing-algorithm 509 // For now we have a hack for script/style innerHTML support: 510 if (contextElement && (contextElement->hasLocalName(HTMLNames::scriptTag) || contextElement->hasLocalName(HTMLNames::styleTag))) { 511 fragment->parserAppendChild(fragment->document()->createTextNode(chunk)); 512 return true; 513 } 514 515 RefPtr<XMLDocumentParser> parser = XMLDocumentParser::create(fragment, contextElement, parserContentPolicy); 516 bool wellFormed = parser->appendFragmentSource(chunk); 517 // Do not call finish(). Current finish() and doEnd() implementations touch the main Document/loader 518 // and can cause crashes in the fragment case. 519 parser->detach(); // Allows ~DocumentParser to assert it was detached before destruction. 520 return wellFormed; // appendFragmentSource()'s wellFormed is more permissive than wellFormed(). 521 } 522 523 static int globalDescriptor = 0; 524 static ThreadIdentifier libxmlLoaderThread = 0; 525 526 static int matchFunc(const char*) 527 { 528 // Only match loads initiated due to uses of libxml2 from within XMLDocumentParser to avoid 529 // interfering with client applications that also use libxml2. http://bugs.webkit.org/show_bug.cgi?id=17353 530 return XMLDocumentParserScope::currentFetcher && currentThread() == libxmlLoaderThread; 531 } 532 533 class OffsetBuffer { 534 WTF_MAKE_FAST_ALLOCATED; 535 public: 536 OffsetBuffer(const Vector<char>& b) : m_buffer(b), m_currentOffset(0) { } 537 538 int readOutBytes(char* outputBuffer, unsigned askedToRead) 539 { 540 unsigned bytesLeft = m_buffer.size() - m_currentOffset; 541 unsigned lenToCopy = min(askedToRead, bytesLeft); 542 if (lenToCopy) { 543 memcpy(outputBuffer, m_buffer.data() + m_currentOffset, lenToCopy); 544 m_currentOffset += lenToCopy; 545 } 546 return lenToCopy; 547 } 548 549 private: 550 Vector<char> m_buffer; 551 unsigned m_currentOffset; 552 }; 553 554 static inline void setAttributes(Element* element, Vector<Attribute>& attributeVector, ParserContentPolicy parserContentPolicy) 555 { 556 if (!scriptingContentIsAllowed(parserContentPolicy)) 557 element->stripScriptingAttributes(attributeVector); 558 element->parserSetAttributes(attributeVector); 559 } 560 561 static void switchEncoding(xmlParserCtxtPtr ctxt, bool is8Bit) 562 { 563 // Hack around libxml2's lack of encoding overide support by manually 564 // resetting the encoding to UTF-16 before every chunk. Otherwise libxml 565 // will detect <?xml version="1.0" encoding="<encoding name>"?> blocks 566 // and switch encodings, causing the parse to fail. 567 if (is8Bit) { 568 xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_8859_1); 569 return; 570 } 571 572 const UChar BOM = 0xFEFF; 573 const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char*>(&BOM); 574 xmlSwitchEncoding(ctxt, BOMHighByte == 0xFF ? XML_CHAR_ENCODING_UTF16LE : XML_CHAR_ENCODING_UTF16BE); 575 } 576 577 static void parseChunk(xmlParserCtxtPtr ctxt, const String& chunk) 578 { 579 bool is8Bit = chunk.is8Bit(); 580 switchEncoding(ctxt, is8Bit); 581 if (is8Bit) 582 xmlParseChunk(ctxt, reinterpret_cast<const char*>(chunk.characters8()), sizeof(LChar) * chunk.length(), 0); 583 else 584 xmlParseChunk(ctxt, reinterpret_cast<const char*>(chunk.characters16()), sizeof(UChar) * chunk.length(), 0); 585 } 586 587 static void finishParsing(xmlParserCtxtPtr ctxt) 588 { 589 xmlParseChunk(ctxt, 0, 0, 1); 590 } 591 592 #define xmlParseChunk #error "Use parseChunk instead to select the correct encoding." 593 594 static bool shouldAllowExternalLoad(const KURL& url) 595 { 596 String urlString = url.string(); 597 598 // On non-Windows platforms libxml asks for this URL, the 599 // "XML_XML_DEFAULT_CATALOG", on initialization. 600 if (urlString == "file:///etc/xml/catalog") 601 return false; 602 603 // On Windows, libxml computes a URL relative to where its DLL resides. 604 if (urlString.startsWith("file:///", false) && urlString.endsWith("/etc/catalog", false)) 605 return false; 606 607 // The most common DTD. There isn't much point in hammering www.w3c.org 608 // by requesting this URL for every XHTML document. 609 if (urlString.startsWith("http://www.w3.org/TR/xhtml", false)) 610 return false; 611 612 // Similarly, there isn't much point in requesting the SVG DTD. 613 if (urlString.startsWith("http://www.w3.org/Graphics/SVG", false)) 614 return false; 615 616 // The libxml doesn't give us a lot of context for deciding whether to 617 // allow this request. In the worst case, this load could be for an 618 // external entity and the resulting document could simply read the 619 // retrieved content. If we had more context, we could potentially allow 620 // the parser to load a DTD. As things stand, we take the conservative 621 // route and allow same-origin requests only. 622 if (!XMLDocumentParserScope::currentFetcher->document()->securityOrigin()->canRequest(url)) { 623 XMLDocumentParserScope::currentFetcher->printAccessDeniedMessage(url); 624 return false; 625 } 626 627 return true; 628 } 629 630 static void* openFunc(const char* uri) 631 { 632 ASSERT(XMLDocumentParserScope::currentFetcher); 633 ASSERT(currentThread() == libxmlLoaderThread); 634 635 KURL url(KURL(), uri); 636 637 if (!shouldAllowExternalLoad(url)) 638 return &globalDescriptor; 639 640 ResourceError error; 641 ResourceResponse response; 642 Vector<char> data; 643 644 645 { 646 ResourceFetcher* fetcher = XMLDocumentParserScope::currentFetcher; 647 XMLDocumentParserScope scope(0); 648 // FIXME: We should restore the original global error handler as well. 649 650 if (fetcher->frame()) 651 fetcher->frame()->loader()->loadResourceSynchronously(url, AllowStoredCredentials, error, response, data); 652 } 653 654 // We have to check the URL again after the load to catch redirects. 655 // See <https://bugs.webkit.org/show_bug.cgi?id=21963>. 656 if (!shouldAllowExternalLoad(response.url())) 657 return &globalDescriptor; 658 659 return new OffsetBuffer(data); 660 } 661 662 static int readFunc(void* context, char* buffer, int len) 663 { 664 // Do 0-byte reads in case of a null descriptor 665 if (context == &globalDescriptor) 666 return 0; 667 668 OffsetBuffer* data = static_cast<OffsetBuffer*>(context); 669 return data->readOutBytes(buffer, len); 670 } 671 672 static int writeFunc(void*, const char*, int) 673 { 674 // Always just do 0-byte writes 675 return 0; 676 } 677 678 static int closeFunc(void* context) 679 { 680 if (context != &globalDescriptor) { 681 OffsetBuffer* data = static_cast<OffsetBuffer*>(context); 682 delete data; 683 } 684 return 0; 685 } 686 687 static void errorFunc(void*, const char*, ...) 688 { 689 // FIXME: It would be nice to display error messages somewhere. 690 } 691 692 static bool didInit = false; 693 694 PassRefPtr<XMLParserContext> XMLParserContext::createStringParser(xmlSAXHandlerPtr handlers, void* userData) 695 { 696 if (!didInit) { 697 xmlInitParser(); 698 xmlRegisterInputCallbacks(matchFunc, openFunc, readFunc, closeFunc); 699 xmlRegisterOutputCallbacks(matchFunc, openFunc, writeFunc, closeFunc); 700 libxmlLoaderThread = currentThread(); 701 didInit = true; 702 } 703 704 xmlParserCtxtPtr parser = xmlCreatePushParserCtxt(handlers, 0, 0, 0, 0); 705 parser->_private = userData; 706 parser->replaceEntities = true; 707 return adoptRef(new XMLParserContext(parser)); 708 } 709 710 // Chunk should be encoded in UTF-8 711 PassRefPtr<XMLParserContext> XMLParserContext::createMemoryParser(xmlSAXHandlerPtr handlers, void* userData, const CString& chunk) 712 { 713 if (!didInit) { 714 xmlInitParser(); 715 xmlRegisterInputCallbacks(matchFunc, openFunc, readFunc, closeFunc); 716 xmlRegisterOutputCallbacks(matchFunc, openFunc, writeFunc, closeFunc); 717 libxmlLoaderThread = currentThread(); 718 didInit = true; 719 } 720 721 // appendFragmentSource() checks that the length doesn't overflow an int. 722 xmlParserCtxtPtr parser = xmlCreateMemoryParserCtxt(chunk.data(), chunk.length()); 723 724 if (!parser) 725 return 0; 726 727 // Copy the sax handler 728 memcpy(parser->sax, handlers, sizeof(xmlSAXHandler)); 729 730 // Set parser options. 731 // XML_PARSE_NODICT: default dictionary option. 732 // XML_PARSE_NOENT: force entities substitutions. 733 xmlCtxtUseOptions(parser, XML_PARSE_NODICT | XML_PARSE_NOENT); 734 735 // Internal initialization 736 parser->sax2 = 1; 737 parser->instate = XML_PARSER_CONTENT; // We are parsing a CONTENT 738 parser->depth = 0; 739 parser->str_xml = xmlDictLookup(parser->dict, BAD_CAST "xml", 3); 740 parser->str_xmlns = xmlDictLookup(parser->dict, BAD_CAST "xmlns", 5); 741 parser->str_xml_ns = xmlDictLookup(parser->dict, XML_XML_NAMESPACE, 36); 742 parser->_private = userData; 743 744 return adoptRef(new XMLParserContext(parser)); 745 } 746 747 // -------------------------------- 748 749 bool XMLDocumentParser::supportsXMLVersion(const String& version) 750 { 751 return version == "1.0"; 752 } 753 754 XMLDocumentParser::XMLDocumentParser(Document* document, FrameView* frameView) 755 : ScriptableDocumentParser(document) 756 , m_view(frameView) 757 , m_context(0) 758 , m_currentNode(document) 759 , m_isCurrentlyParsing8BitChunk(false) 760 , m_sawError(false) 761 , m_sawCSS(false) 762 , m_sawXSLTransform(false) 763 , m_sawFirstElement(false) 764 , m_isXHTMLDocument(false) 765 , m_parserPaused(false) 766 , m_requestingScript(false) 767 , m_finishCalled(false) 768 , m_xmlErrors(document) 769 , m_pendingScript(0) 770 , m_scriptStartPosition(TextPosition::belowRangePosition()) 771 , m_parsingFragment(false) 772 { 773 // This is XML being used as a document resource. 774 UseCounter::count(document, UseCounter::XMLDocument); 775 } 776 777 XMLDocumentParser::XMLDocumentParser(DocumentFragment* fragment, Element* parentElement, ParserContentPolicy parserContentPolicy) 778 : ScriptableDocumentParser(fragment->document(), parserContentPolicy) 779 , m_view(0) 780 , m_context(0) 781 , m_currentNode(fragment) 782 , m_isCurrentlyParsing8BitChunk(false) 783 , m_sawError(false) 784 , m_sawCSS(false) 785 , m_sawXSLTransform(false) 786 , m_sawFirstElement(false) 787 , m_isXHTMLDocument(false) 788 , m_parserPaused(false) 789 , m_requestingScript(false) 790 , m_finishCalled(false) 791 , m_xmlErrors(fragment->document()) 792 , m_pendingScript(0) 793 , m_scriptStartPosition(TextPosition::belowRangePosition()) 794 , m_parsingFragment(true) 795 { 796 fragment->ref(); 797 798 // Add namespaces based on the parent node 799 Vector<Element*> elemStack; 800 while (parentElement) { 801 elemStack.append(parentElement); 802 803 ContainerNode* n = parentElement->parentNode(); 804 if (!n || !n->isElementNode()) 805 break; 806 parentElement = toElement(n); 807 } 808 809 if (elemStack.isEmpty()) 810 return; 811 812 for (; !elemStack.isEmpty(); elemStack.removeLast()) { 813 Element* element = elemStack.last(); 814 if (element->hasAttributes()) { 815 for (unsigned i = 0; i < element->attributeCount(); i++) { 816 const Attribute* attribute = element->attributeItem(i); 817 if (attribute->localName() == xmlnsAtom) 818 m_defaultNamespaceURI = attribute->value(); 819 else if (attribute->prefix() == xmlnsAtom) 820 m_prefixToNamespaceMap.set(attribute->localName(), attribute->value()); 821 } 822 } 823 } 824 825 // If the parent element is not in document tree, there may be no xmlns attribute; just default to the parent's namespace. 826 if (m_defaultNamespaceURI.isNull() && !parentElement->inDocument()) 827 m_defaultNamespaceURI = parentElement->namespaceURI(); 828 } 829 830 XMLParserContext::~XMLParserContext() 831 { 832 if (m_context->myDoc) 833 xmlFreeDoc(m_context->myDoc); 834 xmlFreeParserCtxt(m_context); 835 } 836 837 XMLDocumentParser::~XMLDocumentParser() 838 { 839 // The XMLDocumentParser will always be detached before being destroyed. 840 ASSERT(m_currentNodeStack.isEmpty()); 841 ASSERT(!m_currentNode); 842 843 // FIXME: m_pendingScript handling should be moved into XMLDocumentParser.cpp! 844 if (m_pendingScript) 845 m_pendingScript->removeClient(this); 846 } 847 848 void XMLDocumentParser::doWrite(const String& parseString) 849 { 850 ASSERT(!isDetached()); 851 if (!m_context) 852 initializeParserContext(); 853 854 // Protect the libxml context from deletion during a callback 855 RefPtr<XMLParserContext> context = m_context; 856 857 // libXML throws an error if you try to switch the encoding for an empty string. 858 if (parseString.length()) { 859 // JavaScript may cause the parser to detach during parseChunk 860 // keep this alive until this function is done. 861 RefPtr<XMLDocumentParser> protect(this); 862 863 XMLDocumentParserScope scope(document()->fetcher()); 864 TemporaryChange<bool> encodingScope(m_isCurrentlyParsing8BitChunk, parseString.is8Bit()); 865 parseChunk(context->context(), parseString); 866 867 // JavaScript (which may be run under the parseChunk callstack) may 868 // cause the parser to be stopped or detached. 869 if (isStopped()) 870 return; 871 } 872 873 // FIXME: Why is this here? And why is it after we process the passed source? 874 if (document()->decoder() && document()->decoder()->sawError()) { 875 // If the decoder saw an error, report it as fatal (stops parsing) 876 TextPosition position(OrdinalNumber::fromOneBasedInt(context->context()->input->line), OrdinalNumber::fromOneBasedInt(context->context()->input->col)); 877 handleError(XMLErrors::fatal, "Encoding error", position); 878 } 879 } 880 881 struct _xmlSAX2Namespace { 882 const xmlChar* prefix; 883 const xmlChar* uri; 884 }; 885 typedef struct _xmlSAX2Namespace xmlSAX2Namespace; 886 887 static inline void handleNamespaceAttributes(Vector<Attribute>& prefixedAttributes, const xmlChar** libxmlNamespaces, int nbNamespaces, ExceptionState& es) 888 { 889 xmlSAX2Namespace* namespaces = reinterpret_cast<xmlSAX2Namespace*>(libxmlNamespaces); 890 for (int i = 0; i < nbNamespaces; i++) { 891 AtomicString namespaceQName = xmlnsAtom; 892 AtomicString namespaceURI = toAtomicString(namespaces[i].uri); 893 if (namespaces[i].prefix) 894 namespaceQName = "xmlns:" + toString(namespaces[i].prefix); 895 896 QualifiedName parsedName = anyName; 897 if (!Element::parseAttributeName(parsedName, XMLNSNames::xmlnsNamespaceURI, namespaceQName, es)) 898 return; 899 900 prefixedAttributes.append(Attribute(parsedName, namespaceURI)); 901 } 902 } 903 904 struct _xmlSAX2Attributes { 905 const xmlChar* localname; 906 const xmlChar* prefix; 907 const xmlChar* uri; 908 const xmlChar* value; 909 const xmlChar* end; 910 }; 911 typedef struct _xmlSAX2Attributes xmlSAX2Attributes; 912 913 static inline void handleElementAttributes(Vector<Attribute>& prefixedAttributes, const xmlChar** libxmlAttributes, int nbAttributes, ExceptionState& es) 914 { 915 xmlSAX2Attributes* attributes = reinterpret_cast<xmlSAX2Attributes*>(libxmlAttributes); 916 for (int i = 0; i < nbAttributes; i++) { 917 int valueLength = static_cast<int>(attributes[i].end - attributes[i].value); 918 AtomicString attrValue = toAtomicString(attributes[i].value, valueLength); 919 String attrPrefix = toString(attributes[i].prefix); 920 AtomicString attrURI = attrPrefix.isEmpty() ? AtomicString() : toAtomicString(attributes[i].uri); 921 AtomicString attrQName = attrPrefix.isEmpty() ? toAtomicString(attributes[i].localname) : attrPrefix + ":" + toString(attributes[i].localname); 922 923 QualifiedName parsedName = anyName; 924 if (!Element::parseAttributeName(parsedName, attrURI, attrQName, es)) 925 return; 926 927 prefixedAttributes.append(Attribute(parsedName, attrValue)); 928 } 929 } 930 931 void XMLDocumentParser::startElementNs(const AtomicString& localName, const AtomicString& prefix, const AtomicString& uri, int nbNamespaces, 932 const xmlChar** libxmlNamespaces, int nbAttributes, int nbDefaulted, const xmlChar** libxmlAttributes) 933 { 934 if (isStopped()) 935 return; 936 937 if (m_parserPaused) { 938 m_pendingCallbacks.append(adoptPtr(new PendingStartElementNSCallback(localName, prefix, uri, nbNamespaces, libxmlNamespaces, 939 nbAttributes, nbDefaulted, libxmlAttributes))); 940 return; 941 } 942 943 exitText(); 944 945 AtomicString adjustedURI = uri; 946 if (m_parsingFragment && adjustedURI.isNull()) { 947 if (!prefix.isNull()) 948 adjustedURI = m_prefixToNamespaceMap.get(prefix); 949 else 950 adjustedURI = m_defaultNamespaceURI; 951 } 952 953 bool isFirstElement = !m_sawFirstElement; 954 m_sawFirstElement = true; 955 956 QualifiedName qName(prefix, localName, adjustedURI); 957 RefPtr<Element> newElement = m_currentNode->document()->createElement(qName, true); 958 if (!newElement) { 959 stopParsing(); 960 return; 961 } 962 963 Vector<Attribute> prefixedAttributes; 964 TrackExceptionState es; 965 handleNamespaceAttributes(prefixedAttributes, libxmlNamespaces, nbNamespaces, es); 966 if (es.hadException()) { 967 setAttributes(newElement.get(), prefixedAttributes, parserContentPolicy()); 968 stopParsing(); 969 return; 970 } 971 972 handleElementAttributes(prefixedAttributes, libxmlAttributes, nbAttributes, es); 973 setAttributes(newElement.get(), prefixedAttributes, parserContentPolicy()); 974 if (es.hadException()) { 975 stopParsing(); 976 return; 977 } 978 979 newElement->beginParsingChildren(); 980 981 ScriptLoader* scriptLoader = toScriptLoaderIfPossible(newElement.get()); 982 if (scriptLoader) 983 m_scriptStartPosition = textPosition(); 984 985 m_currentNode->parserAppendChild(newElement.get()); 986 987 const ContainerNode* currentNode = m_currentNode; 988 if (newElement->hasTagName(HTMLNames::templateTag)) 989 pushCurrentNode(toHTMLTemplateElement(newElement.get())->content()); 990 else 991 pushCurrentNode(newElement.get()); 992 993 if (m_view && currentNode->attached() && !newElement->attached()) 994 newElement->attach(); 995 996 if (isHTMLHtmlElement(newElement.get())) 997 toHTMLHtmlElement(newElement.get())->insertedByParser(); 998 999 if (!m_parsingFragment && isFirstElement && document()->frame()) 1000 document()->frame()->loader()->dispatchDocumentElementAvailable(); 1001 } 1002 1003 void XMLDocumentParser::endElementNs() 1004 { 1005 if (isStopped()) 1006 return; 1007 1008 if (m_parserPaused) { 1009 m_pendingCallbacks.append(adoptPtr(new PendingEndElementNSCallback())); 1010 return; 1011 } 1012 1013 // JavaScript can detach the parser. Make sure this is not released 1014 // before the end of this method. 1015 RefPtr<XMLDocumentParser> protect(this); 1016 1017 exitText(); 1018 1019 RefPtr<ContainerNode> n = m_currentNode; 1020 n->finishParsingChildren(); 1021 1022 if (!scriptingContentIsAllowed(parserContentPolicy()) && n->isElementNode() && toScriptLoaderIfPossible(toElement(n.get()))) { 1023 popCurrentNode(); 1024 n->remove(IGNORE_EXCEPTION); 1025 return; 1026 } 1027 1028 if (!n->isElementNode() || !m_view) { 1029 popCurrentNode(); 1030 return; 1031 } 1032 1033 Element* element = toElement(n.get()); 1034 1035 // The element's parent may have already been removed from document. 1036 // Parsing continues in this case, but scripts aren't executed. 1037 if (!element->inDocument()) { 1038 popCurrentNode(); 1039 return; 1040 } 1041 1042 ScriptLoader* scriptLoader = toScriptLoaderIfPossible(element); 1043 if (!scriptLoader) { 1044 popCurrentNode(); 1045 return; 1046 } 1047 1048 // Don't load external scripts for standalone documents (for now). 1049 ASSERT(!m_pendingScript); 1050 m_requestingScript = true; 1051 1052 if (scriptLoader->prepareScript(m_scriptStartPosition, ScriptLoader::AllowLegacyTypeInTypeAttribute)) { 1053 // FIXME: Script execution should be shared between 1054 // the libxml2 and Qt XMLDocumentParser implementations. 1055 1056 if (scriptLoader->readyToBeParserExecuted()) { 1057 scriptLoader->executeScript(ScriptSourceCode(scriptLoader->scriptContent(), document()->url(), m_scriptStartPosition)); 1058 } else if (scriptLoader->willBeParserExecuted()) { 1059 m_pendingScript = scriptLoader->resource(); 1060 m_scriptElement = element; 1061 m_pendingScript->addClient(this); 1062 1063 // m_pendingScript will be 0 if script was already loaded and addClient() executed it. 1064 if (m_pendingScript) 1065 pauseParsing(); 1066 } else { 1067 m_scriptElement = 0; 1068 } 1069 1070 // JavaScript may have detached the parser 1071 if (isDetached()) 1072 return; 1073 } 1074 m_requestingScript = false; 1075 popCurrentNode(); 1076 } 1077 1078 void XMLDocumentParser::characters(const xmlChar* chars, int length) 1079 { 1080 if (isStopped()) 1081 return; 1082 1083 if (m_parserPaused) { 1084 m_pendingCallbacks.append(adoptPtr(new PendingCharactersCallback(chars, length))); 1085 return; 1086 } 1087 1088 if (!m_leafTextNode) 1089 enterText(); 1090 m_bufferedText.append(chars, length); 1091 } 1092 1093 void XMLDocumentParser::error(XMLErrors::ErrorType type, const char* message, va_list args) 1094 { 1095 if (isStopped()) 1096 return; 1097 1098 #if HAVE(VASPRINTF) 1099 char* formattedMessage; 1100 if (vasprintf(&formattedMessage, message, args) == -1) 1101 return; 1102 #else 1103 char formattedMessage[1024]; 1104 vsnprintf(formattedMessage, sizeof(formattedMessage) - 1, message, args); 1105 #endif 1106 1107 if (m_parserPaused) { 1108 m_pendingCallbacks.append(adoptPtr(new PendingErrorCallback(type, reinterpret_cast<const xmlChar*>(formattedMessage), lineNumber(), columnNumber()))); 1109 #if HAVE(VASPRINTF) 1110 free(formattedMessage); 1111 #endif 1112 return; 1113 } 1114 1115 handleError(type, formattedMessage, textPosition()); 1116 1117 #if HAVE(VASPRINTF) 1118 free(formattedMessage); 1119 #endif 1120 } 1121 1122 void XMLDocumentParser::processingInstruction(const String& target, const String& data) 1123 { 1124 if (isStopped()) 1125 return; 1126 1127 if (m_parserPaused) { 1128 m_pendingCallbacks.append(adoptPtr(new PendingProcessingInstructionCallback(target ,data))); 1129 return; 1130 } 1131 1132 exitText(); 1133 1134 // ### handle exceptions 1135 TrackExceptionState es; 1136 RefPtr<ProcessingInstruction> pi = m_currentNode->document()->createProcessingInstruction(target, data, es); 1137 if (es.hadException()) 1138 return; 1139 1140 pi->setCreatedByParser(true); 1141 1142 m_currentNode->parserAppendChild(pi.get()); 1143 if (m_view && !pi->attached()) 1144 pi->attach(); 1145 1146 pi->finishParsingChildren(); 1147 1148 if (pi->isCSS()) 1149 m_sawCSS = true; 1150 m_sawXSLTransform = !m_sawFirstElement && pi->isXSL(); 1151 if (m_sawXSLTransform && !document()->transformSourceDocument()) 1152 stopParsing(); 1153 } 1154 1155 void XMLDocumentParser::cdataBlock(const String& text) 1156 { 1157 if (isStopped()) 1158 return; 1159 1160 if (m_parserPaused) { 1161 m_pendingCallbacks.append(adoptPtr(new PendingCDATABlockCallback(text))); 1162 return; 1163 } 1164 1165 exitText(); 1166 1167 RefPtr<CDATASection> newNode = CDATASection::create(m_currentNode->document(), text); 1168 m_currentNode->parserAppendChild(newNode.get()); 1169 if (m_view && !newNode->attached()) 1170 newNode->attach(); 1171 } 1172 1173 void XMLDocumentParser::comment(const String& text) 1174 { 1175 if (isStopped()) 1176 return; 1177 1178 if (m_parserPaused) { 1179 m_pendingCallbacks.append(adoptPtr(new PendingCommentCallback(text))); 1180 return; 1181 } 1182 1183 exitText(); 1184 1185 RefPtr<Comment> newNode = Comment::create(m_currentNode->document(), text); 1186 m_currentNode->parserAppendChild(newNode.get()); 1187 if (m_view && !newNode->attached()) 1188 newNode->attach(); 1189 } 1190 1191 enum StandaloneInfo { 1192 StandaloneUnspecified = -2, 1193 NoXMlDeclaration, 1194 StandaloneNo, 1195 StandaloneYes 1196 }; 1197 1198 void XMLDocumentParser::startDocument(const String& version, const String& encoding, int standalone) 1199 { 1200 StandaloneInfo standaloneInfo = (StandaloneInfo)standalone; 1201 if (standaloneInfo == NoXMlDeclaration) { 1202 document()->setHasXMLDeclaration(false); 1203 return; 1204 } 1205 1206 if (!version.isNull()) 1207 document()->setXMLVersion(version, ASSERT_NO_EXCEPTION); 1208 if (standalone != StandaloneUnspecified) 1209 document()->setXMLStandalone(standaloneInfo == StandaloneYes, ASSERT_NO_EXCEPTION); 1210 if (!encoding.isNull()) 1211 document()->setXMLEncoding(encoding); 1212 document()->setHasXMLDeclaration(true); 1213 } 1214 1215 void XMLDocumentParser::endDocument() 1216 { 1217 exitText(); 1218 } 1219 1220 void XMLDocumentParser::internalSubset(const String& name, const String& externalID, const String& systemID) 1221 { 1222 if (isStopped()) 1223 return; 1224 1225 if (m_parserPaused) { 1226 m_pendingCallbacks.append(adoptPtr(new PendingInternalSubsetCallback(name, externalID, systemID))); 1227 return; 1228 } 1229 1230 if (document()) 1231 document()->parserAppendChild(DocumentType::create(document(), name, externalID, systemID)); 1232 } 1233 1234 static inline XMLDocumentParser* getParser(void* closure) 1235 { 1236 xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure); 1237 return static_cast<XMLDocumentParser*>(ctxt->_private); 1238 } 1239 1240 static void startElementNsHandler(void* closure, const xmlChar* localName, const xmlChar* prefix, const xmlChar* uri, int nbNamespaces, const xmlChar** namespaces, int nbAttributes, int nbDefaulted, const xmlChar** libxmlAttributes) 1241 { 1242 getParser(closure)->startElementNs(toAtomicString(localName), toAtomicString(prefix), toAtomicString(uri), nbNamespaces, namespaces, nbAttributes, nbDefaulted, libxmlAttributes); 1243 } 1244 1245 static void endElementNsHandler(void* closure, const xmlChar*, const xmlChar*, const xmlChar*) 1246 { 1247 getParser(closure)->endElementNs(); 1248 } 1249 1250 static void charactersHandler(void* closure, const xmlChar* chars, int length) 1251 { 1252 getParser(closure)->characters(chars, length); 1253 } 1254 1255 static void processingInstructionHandler(void* closure, const xmlChar* target, const xmlChar* data) 1256 { 1257 getParser(closure)->processingInstruction(toString(target), toString(data)); 1258 } 1259 1260 static void cdataBlockHandler(void* closure, const xmlChar* text, int length) 1261 { 1262 getParser(closure)->cdataBlock(toString(text, length)); 1263 } 1264 1265 static void commentHandler(void* closure, const xmlChar* text) 1266 { 1267 getParser(closure)->comment(toString(text)); 1268 } 1269 1270 WTF_ATTRIBUTE_PRINTF(2, 3) 1271 static void warningHandler(void* closure, const char* message, ...) 1272 { 1273 va_list args; 1274 va_start(args, message); 1275 getParser(closure)->error(XMLErrors::warning, message, args); 1276 va_end(args); 1277 } 1278 1279 WTF_ATTRIBUTE_PRINTF(2, 3) 1280 static void fatalErrorHandler(void* closure, const char* message, ...) 1281 { 1282 va_list args; 1283 va_start(args, message); 1284 getParser(closure)->error(XMLErrors::fatal, message, args); 1285 va_end(args); 1286 } 1287 1288 WTF_ATTRIBUTE_PRINTF(2, 3) 1289 static void normalErrorHandler(void* closure, const char* message, ...) 1290 { 1291 va_list args; 1292 va_start(args, message); 1293 getParser(closure)->error(XMLErrors::nonFatal, message, args); 1294 va_end(args); 1295 } 1296 1297 // Using a static entity and marking it XML_INTERNAL_PREDEFINED_ENTITY is 1298 // a hack to avoid malloc/free. Using a global variable like this could cause trouble 1299 // if libxml implementation details were to change 1300 static xmlChar sharedXHTMLEntityResult[9] = {0, 0, 0, 0, 0, 0, 0, 0, 0}; 1301 1302 static xmlEntityPtr sharedXHTMLEntity() 1303 { 1304 static xmlEntity entity; 1305 if (!entity.type) { 1306 entity.type = XML_ENTITY_DECL; 1307 entity.orig = sharedXHTMLEntityResult; 1308 entity.content = sharedXHTMLEntityResult; 1309 entity.etype = XML_INTERNAL_PREDEFINED_ENTITY; 1310 } 1311 return &entity; 1312 } 1313 1314 static size_t convertUTF16EntityToUTF8(const UChar* utf16Entity, size_t numberOfCodeUnits, char* target, size_t targetSize) 1315 { 1316 const char* originalTarget = target; 1317 WTF::Unicode::ConversionResult conversionResult = WTF::Unicode::convertUTF16ToUTF8(&utf16Entity, 1318 utf16Entity + numberOfCodeUnits, &target, target + targetSize); 1319 if (conversionResult != WTF::Unicode::conversionOK) 1320 return 0; 1321 1322 // Even though we must pass the length, libxml expects the entity string to be null terminated. 1323 ASSERT(target > originalTarget + 1); 1324 *target = '\0'; 1325 return target - originalTarget; 1326 } 1327 1328 static xmlEntityPtr getXHTMLEntity(const xmlChar* name) 1329 { 1330 UChar utf16DecodedEntity[4]; 1331 size_t numberOfCodeUnits = decodeNamedEntityToUCharArray(reinterpret_cast<const char*>(name), utf16DecodedEntity); 1332 if (!numberOfCodeUnits) 1333 return 0; 1334 1335 ASSERT(numberOfCodeUnits <= 4); 1336 size_t entityLengthInUTF8 = convertUTF16EntityToUTF8(utf16DecodedEntity, numberOfCodeUnits, 1337 reinterpret_cast<char*>(sharedXHTMLEntityResult), WTF_ARRAY_LENGTH(sharedXHTMLEntityResult)); 1338 if (!entityLengthInUTF8) 1339 return 0; 1340 1341 xmlEntityPtr entity = sharedXHTMLEntity(); 1342 entity->length = entityLengthInUTF8; 1343 entity->name = name; 1344 return entity; 1345 } 1346 1347 static xmlEntityPtr getEntityHandler(void* closure, const xmlChar* name) 1348 { 1349 xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure); 1350 xmlEntityPtr ent = xmlGetPredefinedEntity(name); 1351 if (ent) { 1352 ent->etype = XML_INTERNAL_PREDEFINED_ENTITY; 1353 return ent; 1354 } 1355 1356 ent = xmlGetDocEntity(ctxt->myDoc, name); 1357 if (!ent && getParser(closure)->isXHTMLDocument()) { 1358 ent = getXHTMLEntity(name); 1359 if (ent) 1360 ent->etype = XML_INTERNAL_GENERAL_ENTITY; 1361 } 1362 1363 return ent; 1364 } 1365 1366 static void startDocumentHandler(void* closure) 1367 { 1368 xmlParserCtxt* ctxt = static_cast<xmlParserCtxt*>(closure); 1369 XMLDocumentParser* parser = getParser(closure); 1370 switchEncoding(ctxt, parser->isCurrentlyParsing8BitChunk()); 1371 parser->startDocument(toString(ctxt->version), toString(ctxt->encoding), ctxt->standalone); 1372 xmlSAX2StartDocument(closure); 1373 } 1374 1375 static void endDocumentHandler(void* closure) 1376 { 1377 getParser(closure)->endDocument(); 1378 xmlSAX2EndDocument(closure); 1379 } 1380 1381 static void internalSubsetHandler(void* closure, const xmlChar* name, const xmlChar* externalID, const xmlChar* systemID) 1382 { 1383 getParser(closure)->internalSubset(toString(name), toString(externalID), toString(systemID)); 1384 xmlSAX2InternalSubset(closure, name, externalID, systemID); 1385 } 1386 1387 static void externalSubsetHandler(void* closure, const xmlChar*, const xmlChar* externalId, const xmlChar*) 1388 { 1389 String extId = toString(externalId); 1390 if ((extId == "-//W3C//DTD XHTML 1.0 Transitional//EN") 1391 || (extId == "-//W3C//DTD XHTML 1.1//EN") 1392 || (extId == "-//W3C//DTD XHTML 1.0 Strict//EN") 1393 || (extId == "-//W3C//DTD XHTML 1.0 Frameset//EN") 1394 || (extId == "-//W3C//DTD XHTML Basic 1.0//EN") 1395 || (extId == "-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN") 1396 || (extId == "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN") 1397 || (extId == "-//WAPFORUM//DTD XHTML Mobile 1.0//EN") 1398 || (extId == "-//WAPFORUM//DTD XHTML Mobile 1.1//EN") 1399 || (extId == "-//WAPFORUM//DTD XHTML Mobile 1.2//EN")) 1400 getParser(closure)->setIsXHTMLDocument(true); // controls if we replace entities or not. 1401 } 1402 1403 static void ignorableWhitespaceHandler(void*, const xmlChar*, int) 1404 { 1405 // nothing to do, but we need this to work around a crasher 1406 // http://bugzilla.gnome.org/show_bug.cgi?id=172255 1407 // http://bugs.webkit.org/show_bug.cgi?id=5792 1408 } 1409 1410 void XMLDocumentParser::initializeParserContext(const CString& chunk) 1411 { 1412 xmlSAXHandler sax; 1413 memset(&sax, 0, sizeof(sax)); 1414 1415 sax.error = normalErrorHandler; 1416 sax.fatalError = fatalErrorHandler; 1417 sax.characters = charactersHandler; 1418 sax.processingInstruction = processingInstructionHandler; 1419 sax.cdataBlock = cdataBlockHandler; 1420 sax.comment = commentHandler; 1421 sax.warning = warningHandler; 1422 sax.startElementNs = startElementNsHandler; 1423 sax.endElementNs = endElementNsHandler; 1424 sax.getEntity = getEntityHandler; 1425 sax.startDocument = startDocumentHandler; 1426 sax.endDocument = endDocumentHandler; 1427 sax.internalSubset = internalSubsetHandler; 1428 sax.externalSubset = externalSubsetHandler; 1429 sax.ignorableWhitespace = ignorableWhitespaceHandler; 1430 sax.entityDecl = xmlSAX2EntityDecl; 1431 sax.initialized = XML_SAX2_MAGIC; 1432 DocumentParser::startParsing(); 1433 m_sawError = false; 1434 m_sawCSS = false; 1435 m_sawXSLTransform = false; 1436 m_sawFirstElement = false; 1437 1438 XMLDocumentParserScope scope(document()->fetcher()); 1439 if (m_parsingFragment) 1440 m_context = XMLParserContext::createMemoryParser(&sax, this, chunk); 1441 else { 1442 ASSERT(!chunk.data()); 1443 m_context = XMLParserContext::createStringParser(&sax, this); 1444 } 1445 } 1446 1447 void XMLDocumentParser::doEnd() 1448 { 1449 if (!isStopped()) { 1450 if (m_context) { 1451 // Tell libxml we're done. 1452 { 1453 XMLDocumentParserScope scope(document()->fetcher()); 1454 finishParsing(context()); 1455 } 1456 1457 m_context = 0; 1458 } 1459 } 1460 1461 bool xmlViewerMode = !m_sawError && !m_sawCSS && !m_sawXSLTransform && hasNoStyleInformation(document()); 1462 if (xmlViewerMode) { 1463 XMLTreeViewer xmlTreeViewer(document()); 1464 xmlTreeViewer.transformDocumentToTreeView(); 1465 } else if (m_sawXSLTransform) { 1466 xmlDocPtr doc = xmlDocPtrForString(document()->fetcher(), m_originalSourceForTransform.toString(), document()->url().string()); 1467 document()->setTransformSource(adoptPtr(new TransformSource(doc))); 1468 1469 document()->setParsing(false); // Make the document think it's done, so it will apply XSL stylesheets. 1470 document()->styleResolverChanged(RecalcStyleImmediately); 1471 1472 // styleResolverChanged() call can detach the parser and null out its document. 1473 // In that case, we just bail out. 1474 if (isDetached()) 1475 return; 1476 1477 document()->setParsing(true); 1478 DocumentParser::stopParsing(); 1479 } 1480 } 1481 1482 xmlDocPtr xmlDocPtrForString(ResourceFetcher* fetcher, const String& source, const String& url) 1483 { 1484 if (source.isEmpty()) 1485 return 0; 1486 // Parse in a single chunk into an xmlDocPtr 1487 // FIXME: Hook up error handlers so that a failure to parse the main document results in 1488 // good error messages. 1489 XMLDocumentParserScope scope(fetcher, errorFunc, 0); 1490 XMLParserInput input(source); 1491 return xmlReadMemory(input.data(), input.size(), url.latin1().data(), input.encoding(), XSLT_PARSE_OPTIONS); 1492 } 1493 1494 OrdinalNumber XMLDocumentParser::lineNumber() const 1495 { 1496 return OrdinalNumber::fromOneBasedInt(context() ? context()->input->line : 1); 1497 } 1498 1499 OrdinalNumber XMLDocumentParser::columnNumber() const 1500 { 1501 return OrdinalNumber::fromOneBasedInt(context() ? context()->input->col : 1); 1502 } 1503 1504 TextPosition XMLDocumentParser::textPosition() const 1505 { 1506 xmlParserCtxtPtr context = this->context(); 1507 if (!context) 1508 return TextPosition::minimumPosition(); 1509 return TextPosition(OrdinalNumber::fromOneBasedInt(context->input->line), 1510 OrdinalNumber::fromOneBasedInt(context->input->col)); 1511 } 1512 1513 void XMLDocumentParser::stopParsing() 1514 { 1515 DocumentParser::stopParsing(); 1516 if (context()) 1517 xmlStopParser(context()); 1518 } 1519 1520 void XMLDocumentParser::resumeParsing() 1521 { 1522 ASSERT(!isDetached()); 1523 ASSERT(m_parserPaused); 1524 1525 m_parserPaused = false; 1526 1527 // First, execute any pending callbacks 1528 while (!m_pendingCallbacks.isEmpty()) { 1529 OwnPtr<PendingCallback> callback = m_pendingCallbacks.takeFirst(); 1530 callback->call(this); 1531 1532 // A callback paused the parser 1533 if (m_parserPaused) 1534 return; 1535 } 1536 1537 // Then, write any pending data 1538 SegmentedString rest = m_pendingSrc; 1539 m_pendingSrc.clear(); 1540 // There is normally only one string left, so toString() shouldn't copy. 1541 // In any case, the XML parser runs on the main thread and it's OK if 1542 // the passed string has more than one reference. 1543 append(rest.toString().impl()); 1544 1545 // Finally, if finish() has been called and write() didn't result 1546 // in any further callbacks being queued, call end() 1547 if (m_finishCalled && m_pendingCallbacks.isEmpty()) 1548 end(); 1549 } 1550 1551 bool XMLDocumentParser::appendFragmentSource(const String& chunk) 1552 { 1553 ASSERT(!m_context); 1554 ASSERT(m_parsingFragment); 1555 1556 CString chunkAsUtf8 = chunk.utf8(); 1557 1558 // libxml2 takes an int for a length, and therefore can't handle XML chunks larger than 2 GiB. 1559 if (chunkAsUtf8.length() > INT_MAX) 1560 return false; 1561 1562 initializeParserContext(chunkAsUtf8); 1563 xmlParseContent(context()); 1564 endDocument(); // Close any open text nodes. 1565 1566 // FIXME: If this code is actually needed, it should probably move to finish() 1567 // XMLDocumentParserQt has a similar check (m_stream.error() == QXmlStreamReader::PrematureEndOfDocumentError) in doEnd(). 1568 // Check if all the chunk has been processed. 1569 long bytesProcessed = xmlByteConsumed(context()); 1570 if (bytesProcessed == -1 || ((unsigned long)bytesProcessed) != chunkAsUtf8.length()) { 1571 // FIXME: I don't believe we can hit this case without also having seen an error or a null byte. 1572 // If we hit this ASSERT, we've found a test case which demonstrates the need for this code. 1573 ASSERT(m_sawError || (bytesProcessed >= 0 && !chunkAsUtf8.data()[bytesProcessed])); 1574 return false; 1575 } 1576 1577 // No error if the chunk is well formed or it is not but we have no error. 1578 return context()->wellFormed || !xmlCtxtGetLastError(context()); 1579 } 1580 1581 // -------------------------------- 1582 1583 struct AttributeParseState { 1584 HashMap<String, String> attributes; 1585 bool gotAttributes; 1586 }; 1587 1588 static void attributesStartElementNsHandler(void* closure, const xmlChar* xmlLocalName, const xmlChar* /*xmlPrefix*/, 1589 const xmlChar* /*xmlURI*/, int /*nbNamespaces*/, const xmlChar** /*namespaces*/, 1590 int nbAttributes, int /*nbDefaulted*/, const xmlChar** libxmlAttributes) 1591 { 1592 if (strcmp(reinterpret_cast<const char*>(xmlLocalName), "attrs") != 0) 1593 return; 1594 1595 xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure); 1596 AttributeParseState* state = static_cast<AttributeParseState*>(ctxt->_private); 1597 1598 state->gotAttributes = true; 1599 1600 xmlSAX2Attributes* attributes = reinterpret_cast<xmlSAX2Attributes*>(libxmlAttributes); 1601 for (int i = 0; i < nbAttributes; i++) { 1602 String attrLocalName = toString(attributes[i].localname); 1603 int valueLength = (int) (attributes[i].end - attributes[i].value); 1604 String attrValue = toString(attributes[i].value, valueLength); 1605 String attrPrefix = toString(attributes[i].prefix); 1606 String attrQName = attrPrefix.isEmpty() ? attrLocalName : attrPrefix + ":" + attrLocalName; 1607 1608 state->attributes.set(attrQName, attrValue); 1609 } 1610 } 1611 1612 HashMap<String, String> parseAttributes(const String& string, bool& attrsOK) 1613 { 1614 AttributeParseState state; 1615 state.gotAttributes = false; 1616 1617 xmlSAXHandler sax; 1618 memset(&sax, 0, sizeof(sax)); 1619 sax.startElementNs = attributesStartElementNsHandler; 1620 sax.initialized = XML_SAX2_MAGIC; 1621 RefPtr<XMLParserContext> parser = XMLParserContext::createStringParser(&sax, &state); 1622 String parseString = "<?xml version=\"1.0\"?><attrs " + string + " />"; 1623 parseChunk(parser->context(), parseString); 1624 finishParsing(parser->context()); 1625 attrsOK = state.gotAttributes; 1626 return state.attributes; 1627 } 1628 1629 } // namespace WebCore 1630