1 /* 2 * Copyright (C) 2000 Peter Kelly <pmk (at) post.com> 3 * Copyright (C) 2005, 2006, 2008 Apple Inc. All rights reserved. 4 * Copyright (C) 2006 Alexey Proskuryakov <ap (at) webkit.org> 5 * Copyright (C) 2007 Samuel Weinig <sam (at) webkit.org> 6 * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies) 7 * Copyright (C) 2008 Holger Hans Peter Freyther 8 * Copyright (C) 2008, 2009 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/) 9 * Copyright (C) 2010 Patrick Gansterer <paroga (at) paroga.com> 10 * 11 * This library is free software; you can redistribute it and/or 12 * modify it under the terms of the GNU Library General Public 13 * License as published by the Free Software Foundation; either 14 * version 2 of the License, or (at your option) any later version. 15 * 16 * This library is distributed in the hope that it will be useful, 17 * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 19 * Library General Public License for more details. 20 * 21 * You should have received a copy of the GNU Library General Public License 22 * along with this library; see the file COPYING.LIB. If not, write to 23 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 24 * Boston, MA 02110-1301, USA. 25 */ 26 27 #include "config.h" 28 #include "XMLDocumentParser.h" 29 30 #include "CDATASection.h" 31 #include "CachedScript.h" 32 #include "Comment.h" 33 #include "CachedResourceLoader.h" 34 #include "Document.h" 35 #include "DocumentFragment.h" 36 #include "DocumentType.h" 37 #include "Frame.h" 38 #include "FrameLoader.h" 39 #include "FrameView.h" 40 #include "HTMLEntityParser.h" 41 #include "HTMLHtmlElement.h" 42 #include "HTMLLinkElement.h" 43 #include "HTMLNames.h" 44 #include "HTMLStyleElement.h" 45 #include "ProcessingInstruction.h" 46 #include "ResourceError.h" 47 #include "ResourceHandle.h" 48 #include "ResourceRequest.h" 49 #include "ResourceResponse.h" 50 #include "ScriptElement.h" 51 #include "ScriptSourceCode.h" 52 #include "ScriptValue.h" 53 #include "TextResourceDecoder.h" 54 #include "TransformSource.h" 55 #include "XMLNSNames.h" 56 #include "XMLDocumentParserScope.h" 57 #include <libxml/parser.h> 58 #include <libxml/parserInternals.h> 59 #include <wtf/text/CString.h> 60 #include <wtf/StringExtras.h> 61 #include <wtf/Threading.h> 62 #include <wtf/UnusedParam.h> 63 #include <wtf/Vector.h> 64 65 #if ENABLE(XSLT) 66 #include "XMLTreeViewer.h" 67 #include <libxslt/xslt.h> 68 #endif 69 70 #if ENABLE(XHTMLMP) 71 #include "HTMLScriptElement.h" 72 #endif 73 74 75 using namespace std; 76 77 namespace WebCore { 78 79 class PendingCallbacks { 80 WTF_MAKE_NONCOPYABLE(PendingCallbacks); 81 public: 82 PendingCallbacks() { } 83 ~PendingCallbacks() 84 { 85 deleteAllValues(m_callbacks); 86 } 87 88 void appendStartElementNSCallback(const xmlChar* xmlLocalName, const xmlChar* xmlPrefix, const xmlChar* xmlURI, int nb_namespaces, 89 const xmlChar** namespaces, int nb_attributes, int nb_defaulted, const xmlChar** attributes) 90 { 91 PendingStartElementNSCallback* callback = new PendingStartElementNSCallback; 92 93 callback->xmlLocalName = xmlStrdup(xmlLocalName); 94 callback->xmlPrefix = xmlStrdup(xmlPrefix); 95 callback->xmlURI = xmlStrdup(xmlURI); 96 callback->nb_namespaces = nb_namespaces; 97 callback->namespaces = static_cast<xmlChar**>(xmlMalloc(sizeof(xmlChar*) * nb_namespaces * 2)); 98 for (int i = 0; i < nb_namespaces * 2 ; i++) 99 callback->namespaces[i] = xmlStrdup(namespaces[i]); 100 callback->nb_attributes = nb_attributes; 101 callback->nb_defaulted = nb_defaulted; 102 callback->attributes = static_cast<xmlChar**>(xmlMalloc(sizeof(xmlChar*) * nb_attributes * 5)); 103 for (int i = 0; i < nb_attributes; i++) { 104 // Each attribute has 5 elements in the array: 105 // name, prefix, uri, value and an end pointer. 106 107 for (int j = 0; j < 3; j++) 108 callback->attributes[i * 5 + j] = xmlStrdup(attributes[i * 5 + j]); 109 110 int len = attributes[i * 5 + 4] - attributes[i * 5 + 3]; 111 112 callback->attributes[i * 5 + 3] = xmlStrndup(attributes[i * 5 + 3], len); 113 callback->attributes[i * 5 + 4] = callback->attributes[i * 5 + 3] + len; 114 } 115 116 m_callbacks.append(callback); 117 } 118 119 void appendEndElementNSCallback() 120 { 121 PendingEndElementNSCallback* callback = new PendingEndElementNSCallback; 122 123 m_callbacks.append(callback); 124 } 125 126 void appendCharactersCallback(const xmlChar* s, int len) 127 { 128 PendingCharactersCallback* callback = new PendingCharactersCallback; 129 130 callback->s = xmlStrndup(s, len); 131 callback->len = len; 132 133 m_callbacks.append(callback); 134 } 135 136 void appendProcessingInstructionCallback(const xmlChar* target, const xmlChar* data) 137 { 138 PendingProcessingInstructionCallback* callback = new PendingProcessingInstructionCallback; 139 140 callback->target = xmlStrdup(target); 141 callback->data = xmlStrdup(data); 142 143 m_callbacks.append(callback); 144 } 145 146 void appendCDATABlockCallback(const xmlChar* s, int len) 147 { 148 PendingCDATABlockCallback* callback = new PendingCDATABlockCallback; 149 150 callback->s = xmlStrndup(s, len); 151 callback->len = len; 152 153 m_callbacks.append(callback); 154 } 155 156 void appendCommentCallback(const xmlChar* s) 157 { 158 PendingCommentCallback* callback = new PendingCommentCallback; 159 160 callback->s = xmlStrdup(s); 161 162 m_callbacks.append(callback); 163 } 164 165 void appendInternalSubsetCallback(const xmlChar* name, const xmlChar* externalID, const xmlChar* systemID) 166 { 167 PendingInternalSubsetCallback* callback = new PendingInternalSubsetCallback; 168 169 callback->name = xmlStrdup(name); 170 callback->externalID = xmlStrdup(externalID); 171 callback->systemID = xmlStrdup(systemID); 172 173 m_callbacks.append(callback); 174 } 175 176 void appendErrorCallback(XMLDocumentParser::ErrorType type, const xmlChar* message, int lineNumber, int columnNumber) 177 { 178 PendingErrorCallback* callback = new PendingErrorCallback; 179 180 callback->message = xmlStrdup(message); 181 callback->type = type; 182 callback->lineNumber = lineNumber; 183 callback->columnNumber = columnNumber; 184 185 m_callbacks.append(callback); 186 } 187 188 void callAndRemoveFirstCallback(XMLDocumentParser* parser) 189 { 190 OwnPtr<PendingCallback> callback(m_callbacks.takeFirst()); 191 callback->call(parser); 192 } 193 194 bool isEmpty() const { return m_callbacks.isEmpty(); } 195 196 private: 197 struct PendingCallback { 198 virtual ~PendingCallback() { } 199 virtual void call(XMLDocumentParser* parser) = 0; 200 }; 201 202 struct PendingStartElementNSCallback : public PendingCallback { 203 virtual ~PendingStartElementNSCallback() 204 { 205 xmlFree(xmlLocalName); 206 xmlFree(xmlPrefix); 207 xmlFree(xmlURI); 208 for (int i = 0; i < nb_namespaces * 2; i++) 209 xmlFree(namespaces[i]); 210 xmlFree(namespaces); 211 for (int i = 0; i < nb_attributes; i++) 212 for (int j = 0; j < 4; j++) 213 xmlFree(attributes[i * 5 + j]); 214 xmlFree(attributes); 215 } 216 217 virtual void call(XMLDocumentParser* parser) 218 { 219 parser->startElementNs(xmlLocalName, xmlPrefix, xmlURI, 220 nb_namespaces, const_cast<const xmlChar**>(namespaces), 221 nb_attributes, nb_defaulted, const_cast<const xmlChar**>(attributes)); 222 } 223 224 xmlChar* xmlLocalName; 225 xmlChar* xmlPrefix; 226 xmlChar* xmlURI; 227 int nb_namespaces; 228 xmlChar** namespaces; 229 int nb_attributes; 230 int nb_defaulted; 231 xmlChar** attributes; 232 }; 233 234 struct PendingEndElementNSCallback : public PendingCallback { 235 virtual void call(XMLDocumentParser* parser) 236 { 237 parser->endElementNs(); 238 } 239 }; 240 241 struct PendingCharactersCallback : public PendingCallback { 242 virtual ~PendingCharactersCallback() 243 { 244 xmlFree(s); 245 } 246 247 virtual void call(XMLDocumentParser* parser) 248 { 249 parser->characters(s, len); 250 } 251 252 xmlChar* s; 253 int len; 254 }; 255 256 struct PendingProcessingInstructionCallback : public PendingCallback { 257 virtual ~PendingProcessingInstructionCallback() 258 { 259 xmlFree(target); 260 xmlFree(data); 261 } 262 263 virtual void call(XMLDocumentParser* parser) 264 { 265 parser->processingInstruction(target, data); 266 } 267 268 xmlChar* target; 269 xmlChar* data; 270 }; 271 272 struct PendingCDATABlockCallback : public PendingCallback { 273 virtual ~PendingCDATABlockCallback() 274 { 275 xmlFree(s); 276 } 277 278 virtual void call(XMLDocumentParser* parser) 279 { 280 parser->cdataBlock(s, len); 281 } 282 283 xmlChar* s; 284 int len; 285 }; 286 287 struct PendingCommentCallback : public PendingCallback { 288 virtual ~PendingCommentCallback() 289 { 290 xmlFree(s); 291 } 292 293 virtual void call(XMLDocumentParser* parser) 294 { 295 parser->comment(s); 296 } 297 298 xmlChar* s; 299 }; 300 301 struct PendingInternalSubsetCallback : public PendingCallback { 302 virtual ~PendingInternalSubsetCallback() 303 { 304 xmlFree(name); 305 xmlFree(externalID); 306 xmlFree(systemID); 307 } 308 309 virtual void call(XMLDocumentParser* parser) 310 { 311 parser->internalSubset(name, externalID, systemID); 312 } 313 314 xmlChar* name; 315 xmlChar* externalID; 316 xmlChar* systemID; 317 }; 318 319 struct PendingErrorCallback: public PendingCallback { 320 virtual ~PendingErrorCallback() 321 { 322 xmlFree(message); 323 } 324 325 virtual void call(XMLDocumentParser* parser) 326 { 327 parser->handleError(type, reinterpret_cast<char*>(message), lineNumber, columnNumber); 328 } 329 330 XMLDocumentParser::ErrorType type; 331 xmlChar* message; 332 int lineNumber; 333 int columnNumber; 334 }; 335 336 Deque<PendingCallback*> m_callbacks; 337 }; 338 // -------------------------------- 339 340 static int globalDescriptor = 0; 341 static ThreadIdentifier libxmlLoaderThread = 0; 342 343 static int matchFunc(const char*) 344 { 345 // Only match loads initiated due to uses of libxml2 from within XMLDocumentParser to avoid 346 // interfering with client applications that also use libxml2. http://bugs.webkit.org/show_bug.cgi?id=17353 347 return XMLDocumentParserScope::currentCachedResourceLoader && currentThread() == libxmlLoaderThread; 348 } 349 350 class OffsetBuffer { 351 public: 352 OffsetBuffer(const Vector<char>& b) : m_buffer(b), m_currentOffset(0) { } 353 354 int readOutBytes(char* outputBuffer, unsigned askedToRead) 355 { 356 unsigned bytesLeft = m_buffer.size() - m_currentOffset; 357 unsigned lenToCopy = min(askedToRead, bytesLeft); 358 if (lenToCopy) { 359 memcpy(outputBuffer, m_buffer.data() + m_currentOffset, lenToCopy); 360 m_currentOffset += lenToCopy; 361 } 362 return lenToCopy; 363 } 364 365 private: 366 Vector<char> m_buffer; 367 unsigned m_currentOffset; 368 }; 369 370 static void switchToUTF16(xmlParserCtxtPtr ctxt) 371 { 372 // Hack around libxml2's lack of encoding overide support by manually 373 // resetting the encoding to UTF-16 before every chunk. Otherwise libxml 374 // will detect <?xml version="1.0" encoding="<encoding name>"?> blocks 375 // and switch encodings, causing the parse to fail. 376 const UChar BOM = 0xFEFF; 377 const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char*>(&BOM); 378 xmlSwitchEncoding(ctxt, BOMHighByte == 0xFF ? XML_CHAR_ENCODING_UTF16LE : XML_CHAR_ENCODING_UTF16BE); 379 } 380 381 static bool shouldAllowExternalLoad(const KURL& url) 382 { 383 String urlString = url.string(); 384 385 // On non-Windows platforms libxml asks for this URL, the 386 // "XML_XML_DEFAULT_CATALOG", on initialization. 387 if (urlString == "file:///etc/xml/catalog") 388 return false; 389 390 // On Windows, libxml computes a URL relative to where its DLL resides. 391 if (urlString.startsWith("file:///", false) && urlString.endsWith("/etc/catalog", false)) 392 return false; 393 394 // The most common DTD. There isn't much point in hammering www.w3c.org 395 // by requesting this URL for every XHTML document. 396 if (urlString.startsWith("http://www.w3.org/TR/xhtml", false)) 397 return false; 398 399 // Similarly, there isn't much point in requesting the SVG DTD. 400 if (urlString.startsWith("http://www.w3.org/Graphics/SVG", false)) 401 return false; 402 403 // The libxml doesn't give us a lot of context for deciding whether to 404 // allow this request. In the worst case, this load could be for an 405 // external entity and the resulting document could simply read the 406 // retrieved content. If we had more context, we could potentially allow 407 // the parser to load a DTD. As things stand, we take the conservative 408 // route and allow same-origin requests only. 409 if (!XMLDocumentParserScope::currentCachedResourceLoader->document()->securityOrigin()->canRequest(url)) { 410 XMLDocumentParserScope::currentCachedResourceLoader->printAccessDeniedMessage(url); 411 return false; 412 } 413 414 return true; 415 } 416 417 static void* openFunc(const char* uri) 418 { 419 ASSERT(XMLDocumentParserScope::currentCachedResourceLoader); 420 ASSERT(currentThread() == libxmlLoaderThread); 421 422 KURL url(KURL(), uri); 423 424 if (!shouldAllowExternalLoad(url)) 425 return &globalDescriptor; 426 427 ResourceError error; 428 ResourceResponse response; 429 Vector<char> data; 430 431 432 { 433 CachedResourceLoader* cachedResourceLoader = XMLDocumentParserScope::currentCachedResourceLoader; 434 XMLDocumentParserScope scope(0); 435 // FIXME: We should restore the original global error handler as well. 436 437 if (cachedResourceLoader->frame()) 438 cachedResourceLoader->frame()->loader()->loadResourceSynchronously(url, AllowStoredCredentials, error, response, data); 439 } 440 441 // We have to check the URL again after the load to catch redirects. 442 // See <https://bugs.webkit.org/show_bug.cgi?id=21963>. 443 if (!shouldAllowExternalLoad(response.url())) 444 return &globalDescriptor; 445 446 return new OffsetBuffer(data); 447 } 448 449 static int readFunc(void* context, char* buffer, int len) 450 { 451 // Do 0-byte reads in case of a null descriptor 452 if (context == &globalDescriptor) 453 return 0; 454 455 OffsetBuffer* data = static_cast<OffsetBuffer*>(context); 456 return data->readOutBytes(buffer, len); 457 } 458 459 static int writeFunc(void*, const char*, int) 460 { 461 // Always just do 0-byte writes 462 return 0; 463 } 464 465 static int closeFunc(void* context) 466 { 467 if (context != &globalDescriptor) { 468 OffsetBuffer* data = static_cast<OffsetBuffer*>(context); 469 delete data; 470 } 471 return 0; 472 } 473 474 #if ENABLE(XSLT) 475 static void errorFunc(void*, const char*, ...) 476 { 477 // FIXME: It would be nice to display error messages somewhere. 478 } 479 #endif 480 481 static bool didInit = false; 482 483 PassRefPtr<XMLParserContext> XMLParserContext::createStringParser(xmlSAXHandlerPtr handlers, void* userData) 484 { 485 if (!didInit) { 486 xmlInitParser(); 487 xmlRegisterInputCallbacks(matchFunc, openFunc, readFunc, closeFunc); 488 xmlRegisterOutputCallbacks(matchFunc, openFunc, writeFunc, closeFunc); 489 libxmlLoaderThread = currentThread(); 490 didInit = true; 491 } 492 493 xmlParserCtxtPtr parser = xmlCreatePushParserCtxt(handlers, 0, 0, 0, 0); 494 parser->_private = userData; 495 parser->replaceEntities = true; 496 switchToUTF16(parser); 497 498 return adoptRef(new XMLParserContext(parser)); 499 } 500 501 502 // Chunk should be encoded in UTF-8 503 PassRefPtr<XMLParserContext> XMLParserContext::createMemoryParser(xmlSAXHandlerPtr handlers, void* userData, const char* chunk) 504 { 505 if (!didInit) { 506 xmlInitParser(); 507 xmlRegisterInputCallbacks(matchFunc, openFunc, readFunc, closeFunc); 508 xmlRegisterOutputCallbacks(matchFunc, openFunc, writeFunc, closeFunc); 509 libxmlLoaderThread = currentThread(); 510 didInit = true; 511 } 512 513 xmlParserCtxtPtr parser = xmlCreateMemoryParserCtxt(chunk, xmlStrlen((const xmlChar*)chunk)); 514 515 if (!parser) 516 return 0; 517 518 // Copy the sax handler 519 memcpy(parser->sax, handlers, sizeof(xmlSAXHandler)); 520 521 // Set parser options. 522 // XML_PARSE_NODICT: default dictionary option. 523 // XML_PARSE_NOENT: force entities substitutions. 524 xmlCtxtUseOptions(parser, XML_PARSE_NODICT | XML_PARSE_NOENT); 525 526 // Internal initialization 527 parser->sax2 = 1; 528 parser->instate = XML_PARSER_CONTENT; // We are parsing a CONTENT 529 parser->depth = 0; 530 parser->str_xml = xmlDictLookup(parser->dict, BAD_CAST "xml", 3); 531 parser->str_xmlns = xmlDictLookup(parser->dict, BAD_CAST "xmlns", 5); 532 parser->str_xml_ns = xmlDictLookup(parser->dict, XML_XML_NAMESPACE, 36); 533 parser->_private = userData; 534 535 return adoptRef(new XMLParserContext(parser)); 536 } 537 538 // -------------------------------- 539 540 bool XMLDocumentParser::supportsXMLVersion(const String& version) 541 { 542 return version == "1.0"; 543 } 544 545 XMLDocumentParser::XMLDocumentParser(Document* document, FrameView* frameView) 546 : ScriptableDocumentParser(document) 547 , m_view(frameView) 548 , m_context(0) 549 , m_pendingCallbacks(new PendingCallbacks) 550 , m_currentNode(document) 551 , m_sawError(false) 552 , m_sawCSS(false) 553 , m_sawXSLTransform(false) 554 , m_sawFirstElement(false) 555 , m_isXHTMLDocument(false) 556 #if ENABLE(XHTMLMP) 557 , m_isXHTMLMPDocument(false) 558 , m_hasDocTypeDeclaration(false) 559 #endif 560 , m_parserPaused(false) 561 , m_requestingScript(false) 562 , m_finishCalled(false) 563 , m_errorCount(0) 564 , m_lastErrorPosition(TextPosition1::belowRangePosition()) 565 , m_pendingScript(0) 566 , m_scriptStartPosition(TextPosition1::belowRangePosition()) 567 , m_parsingFragment(false) 568 , m_scriptingPermission(FragmentScriptingAllowed) 569 { 570 } 571 572 XMLDocumentParser::XMLDocumentParser(DocumentFragment* fragment, Element* parentElement, FragmentScriptingPermission scriptingPermission) 573 : ScriptableDocumentParser(fragment->document()) 574 , m_view(0) 575 , m_context(0) 576 , m_pendingCallbacks(new PendingCallbacks) 577 , m_currentNode(fragment) 578 , m_sawError(false) 579 , m_sawCSS(false) 580 , m_sawXSLTransform(false) 581 , m_sawFirstElement(false) 582 , m_isXHTMLDocument(false) 583 #if ENABLE(XHTMLMP) 584 , m_isXHTMLMPDocument(false) 585 , m_hasDocTypeDeclaration(false) 586 #endif 587 , m_parserPaused(false) 588 , m_requestingScript(false) 589 , m_finishCalled(false) 590 , m_errorCount(0) 591 , m_lastErrorPosition(TextPosition1::belowRangePosition()) 592 , m_pendingScript(0) 593 , m_scriptStartPosition(TextPosition1::belowRangePosition()) 594 , m_parsingFragment(true) 595 , m_scriptingPermission(scriptingPermission) 596 { 597 fragment->ref(); 598 599 // Add namespaces based on the parent node 600 Vector<Element*> elemStack; 601 while (parentElement) { 602 elemStack.append(parentElement); 603 604 ContainerNode* n = parentElement->parentNode(); 605 if (!n || !n->isElementNode()) 606 break; 607 parentElement = static_cast<Element*>(n); 608 } 609 610 if (elemStack.isEmpty()) 611 return; 612 613 for (Element* element = elemStack.last(); !elemStack.isEmpty(); elemStack.removeLast()) { 614 if (NamedNodeMap* attrs = element->attributes()) { 615 for (unsigned i = 0; i < attrs->length(); i++) { 616 Attribute* attr = attrs->attributeItem(i); 617 if (attr->localName() == xmlnsAtom) 618 m_defaultNamespaceURI = attr->value(); 619 else if (attr->prefix() == xmlnsAtom) 620 m_prefixToNamespaceMap.set(attr->localName(), attr->value()); 621 } 622 } 623 } 624 625 // If the parent element is not in document tree, there may be no xmlns attribute; just default to the parent's namespace. 626 if (m_defaultNamespaceURI.isNull() && !parentElement->inDocument()) 627 m_defaultNamespaceURI = parentElement->namespaceURI(); 628 } 629 630 XMLParserContext::~XMLParserContext() 631 { 632 if (m_context->myDoc) 633 xmlFreeDoc(m_context->myDoc); 634 xmlFreeParserCtxt(m_context); 635 } 636 637 XMLDocumentParser::~XMLDocumentParser() 638 { 639 // The XMLDocumentParser will always be detached before being destroyed. 640 ASSERT(m_currentNodeStack.isEmpty()); 641 ASSERT(!m_currentNode); 642 643 // FIXME: m_pendingScript handling should be moved into XMLDocumentParser.cpp! 644 if (m_pendingScript) 645 m_pendingScript->removeClient(this); 646 } 647 648 void XMLDocumentParser::doWrite(const String& parseString) 649 { 650 ASSERT(!isDetached()); 651 if (!m_context) 652 initializeParserContext(); 653 654 // Protect the libxml context from deletion during a callback 655 RefPtr<XMLParserContext> context = m_context; 656 657 // libXML throws an error if you try to switch the encoding for an empty string. 658 if (parseString.length()) { 659 // JavaScript may cause the parser to detach during xmlParseChunk 660 // keep this alive until this function is done. 661 RefPtr<XMLDocumentParser> protect(this); 662 663 switchToUTF16(context->context()); 664 XMLDocumentParserScope scope(document()->cachedResourceLoader()); 665 xmlParseChunk(context->context(), reinterpret_cast<const char*>(parseString.characters()), sizeof(UChar) * parseString.length(), 0); 666 667 // JavaScript (which may be run under the xmlParseChunk callstack) may 668 // cause the parser to be stopped or detached. 669 if (isStopped()) 670 return; 671 } 672 673 // FIXME: Why is this here? And why is it after we process the passed source? 674 if (document()->decoder() && document()->decoder()->sawError()) { 675 // If the decoder saw an error, report it as fatal (stops parsing) 676 handleError(fatal, "Encoding error", context->context()->input->line, context->context()->input->col); 677 } 678 } 679 680 static inline String toString(const xmlChar* string, size_t size) 681 { 682 return String::fromUTF8(reinterpret_cast<const char*>(string), size); 683 } 684 685 static inline String toString(const xmlChar* string) 686 { 687 return String::fromUTF8(reinterpret_cast<const char*>(string)); 688 } 689 690 static inline AtomicString toAtomicString(const xmlChar* string, size_t size) 691 { 692 return AtomicString::fromUTF8(reinterpret_cast<const char*>(string), size); 693 } 694 695 static inline AtomicString toAtomicString(const xmlChar* string) 696 { 697 return AtomicString::fromUTF8(reinterpret_cast<const char*>(string)); 698 } 699 700 struct _xmlSAX2Namespace { 701 const xmlChar* prefix; 702 const xmlChar* uri; 703 }; 704 typedef struct _xmlSAX2Namespace xmlSAX2Namespace; 705 706 static inline void handleElementNamespaces(Element* newElement, const xmlChar** libxmlNamespaces, int nb_namespaces, ExceptionCode& ec, FragmentScriptingPermission scriptingPermission) 707 { 708 xmlSAX2Namespace* namespaces = reinterpret_cast<xmlSAX2Namespace*>(libxmlNamespaces); 709 for (int i = 0; i < nb_namespaces; i++) { 710 AtomicString namespaceQName = xmlnsAtom; 711 AtomicString namespaceURI = toAtomicString(namespaces[i].uri); 712 if (namespaces[i].prefix) 713 namespaceQName = "xmlns:" + toString(namespaces[i].prefix); 714 newElement->setAttributeNS(XMLNSNames::xmlnsNamespaceURI, namespaceQName, namespaceURI, ec, scriptingPermission); 715 if (ec) // exception setting attributes 716 return; 717 } 718 } 719 720 struct _xmlSAX2Attributes { 721 const xmlChar* localname; 722 const xmlChar* prefix; 723 const xmlChar* uri; 724 const xmlChar* value; 725 const xmlChar* end; 726 }; 727 typedef struct _xmlSAX2Attributes xmlSAX2Attributes; 728 729 static inline void handleElementAttributes(Element* newElement, const xmlChar** libxmlAttributes, int nb_attributes, ExceptionCode& ec, FragmentScriptingPermission scriptingPermission) 730 { 731 xmlSAX2Attributes* attributes = reinterpret_cast<xmlSAX2Attributes*>(libxmlAttributes); 732 for (int i = 0; i < nb_attributes; i++) { 733 int valueLength = static_cast<int>(attributes[i].end - attributes[i].value); 734 AtomicString attrValue = toAtomicString(attributes[i].value, valueLength); 735 String attrPrefix = toString(attributes[i].prefix); 736 AtomicString attrURI = attrPrefix.isEmpty() ? AtomicString() : toAtomicString(attributes[i].uri); 737 AtomicString attrQName = attrPrefix.isEmpty() ? toAtomicString(attributes[i].localname) : AtomicString(attrPrefix + ":" + toString(attributes[i].localname)); 738 739 newElement->setAttributeNS(attrURI, attrQName, attrValue, ec, scriptingPermission); 740 if (ec) // exception setting attributes 741 return; 742 } 743 } 744 745 void XMLDocumentParser::startElementNs(const xmlChar* xmlLocalName, const xmlChar* xmlPrefix, const xmlChar* xmlURI, int nb_namespaces, 746 const xmlChar** libxmlNamespaces, int nb_attributes, int nb_defaulted, const xmlChar** libxmlAttributes) 747 { 748 if (isStopped()) 749 return; 750 751 if (m_parserPaused) { 752 m_pendingCallbacks->appendStartElementNSCallback(xmlLocalName, xmlPrefix, xmlURI, nb_namespaces, libxmlNamespaces, 753 nb_attributes, nb_defaulted, libxmlAttributes); 754 return; 755 } 756 757 #if ENABLE(XHTMLMP) 758 // check if the DOCTYPE Declaration of XHTMLMP document exists 759 if (!m_hasDocTypeDeclaration && document()->isXHTMLMPDocument()) { 760 handleError(fatal, "DOCTYPE declaration lost.", lineNumber(), columnNumber()); 761 return; 762 } 763 #endif 764 765 exitText(); 766 767 AtomicString localName = toAtomicString(xmlLocalName); 768 AtomicString uri = toAtomicString(xmlURI); 769 AtomicString prefix = toAtomicString(xmlPrefix); 770 771 if (m_parsingFragment && uri.isNull()) { 772 if (!prefix.isNull()) 773 uri = m_prefixToNamespaceMap.get(prefix); 774 else 775 uri = m_defaultNamespaceURI; 776 } 777 778 #if ENABLE(XHTMLMP) 779 if (!m_sawFirstElement && isXHTMLMPDocument()) { 780 // As per the section 7.1 of OMA-WAP-XHTMLMP-V1_1-20061020-A.pdf, 781 // we should make sure that the root element MUST be 'html' and 782 // ensure the name of the default namespace on the root elment 'html' 783 // MUST be 'http://www.w3.org/1999/xhtml' 784 if (localName != HTMLNames::htmlTag.localName()) { 785 handleError(fatal, "XHTMLMP document expects 'html' as root element.", lineNumber(), columnNumber()); 786 return; 787 } 788 789 if (uri.isNull()) { 790 m_defaultNamespaceURI = HTMLNames::xhtmlNamespaceURI; 791 uri = m_defaultNamespaceURI; 792 } 793 } 794 #endif 795 796 bool isFirstElement = !m_sawFirstElement; 797 m_sawFirstElement = true; 798 799 QualifiedName qName(prefix, localName, uri); 800 RefPtr<Element> newElement = document()->createElement(qName, true); 801 if (!newElement) { 802 stopParsing(); 803 return; 804 } 805 806 ExceptionCode ec = 0; 807 handleElementNamespaces(newElement.get(), libxmlNamespaces, nb_namespaces, ec, m_scriptingPermission); 808 if (ec) { 809 stopParsing(); 810 return; 811 } 812 813 handleElementAttributes(newElement.get(), libxmlAttributes, nb_attributes, ec, m_scriptingPermission); 814 if (ec) { 815 stopParsing(); 816 return; 817 } 818 819 newElement->beginParsingChildren(); 820 821 ScriptElement* scriptElement = toScriptElement(newElement.get()); 822 if (scriptElement) 823 m_scriptStartPosition = textPositionOneBased(); 824 825 m_currentNode->deprecatedParserAddChild(newElement.get()); 826 827 pushCurrentNode(newElement.get()); 828 if (m_view && !newElement->attached()) 829 newElement->attach(); 830 831 #if ENABLE(OFFLINE_WEB_APPLICATIONS) 832 if (newElement->hasTagName(HTMLNames::htmlTag)) 833 static_cast<HTMLHtmlElement*>(newElement.get())->insertedByParser(); 834 #endif 835 836 if (!m_parsingFragment && isFirstElement && document()->frame()) 837 document()->frame()->loader()->dispatchDocumentElementAvailable(); 838 } 839 840 void XMLDocumentParser::endElementNs() 841 { 842 if (isStopped()) 843 return; 844 845 if (m_parserPaused) { 846 m_pendingCallbacks->appendEndElementNSCallback(); 847 return; 848 } 849 850 // JavaScript can detach the parser. Make sure this is not released 851 // before the end of this method. 852 RefPtr<XMLDocumentParser> protect(this); 853 854 exitText(); 855 856 RefPtr<Node> n = m_currentNode; 857 n->finishParsingChildren(); 858 859 if (m_scriptingPermission == FragmentScriptingNotAllowed && n->isElementNode() && toScriptElement(static_cast<Element*>(n.get()))) { 860 popCurrentNode(); 861 ExceptionCode ec; 862 n->remove(ec); 863 return; 864 } 865 866 if (!n->isElementNode() || !m_view) { 867 popCurrentNode(); 868 return; 869 } 870 871 Element* element = static_cast<Element*>(n.get()); 872 873 // The element's parent may have already been removed from document. 874 // Parsing continues in this case, but scripts aren't executed. 875 if (!element->inDocument()) { 876 popCurrentNode(); 877 return; 878 } 879 880 ScriptElement* scriptElement = toScriptElement(element); 881 if (!scriptElement) { 882 popCurrentNode(); 883 return; 884 } 885 886 // Don't load external scripts for standalone documents (for now). 887 ASSERT(!m_pendingScript); 888 m_requestingScript = true; 889 890 bool successfullyPrepared = scriptElement->prepareScript(m_scriptStartPosition, ScriptElement::AllowLegacyTypeInTypeAttribute); 891 if (!successfullyPrepared) { 892 #if ENABLE(XHTMLMP) 893 if (!scriptElement->isScriptTypeSupported(ScriptElement::AllowLegacyTypeInTypeAttribute)) 894 document()->setShouldProcessNoscriptElement(true); 895 #endif 896 } else { 897 // FIXME: Script execution should be shared between 898 // the libxml2 and Qt XMLDocumentParser implementations. 899 900 if (scriptElement->readyToBeParserExecuted()) 901 scriptElement->executeScript(ScriptSourceCode(scriptElement->scriptContent(), document()->url(), m_scriptStartPosition)); 902 else if (scriptElement->willBeParserExecuted()) { 903 m_pendingScript = scriptElement->cachedScript(); 904 m_scriptElement = element; 905 m_pendingScript->addClient(this); 906 907 // m_pendingScript will be 0 if script was already loaded and addClient() executed it. 908 if (m_pendingScript) 909 pauseParsing(); 910 } else 911 m_scriptElement = 0; 912 913 // JavaScript may have detached the parser 914 if (isDetached()) 915 return; 916 } 917 m_requestingScript = false; 918 popCurrentNode(); 919 } 920 921 void XMLDocumentParser::characters(const xmlChar* s, int len) 922 { 923 if (isStopped()) 924 return; 925 926 if (m_parserPaused) { 927 m_pendingCallbacks->appendCharactersCallback(s, len); 928 return; 929 } 930 931 if (!m_currentNode->isTextNode()) 932 enterText(); 933 m_bufferedText.append(s, len); 934 } 935 936 void XMLDocumentParser::error(ErrorType type, const char* message, va_list args) 937 { 938 if (isStopped()) 939 return; 940 941 #if COMPILER(MSVC) || COMPILER(RVCT) 942 char m[1024]; 943 vsnprintf(m, sizeof(m) - 1, message, args); 944 #else 945 char* m; 946 if (vasprintf(&m, message, args) == -1) 947 return; 948 #endif 949 950 if (m_parserPaused) 951 m_pendingCallbacks->appendErrorCallback(type, reinterpret_cast<const xmlChar*>(m), lineNumber(), columnNumber()); 952 else 953 handleError(type, m, lineNumber(), columnNumber()); 954 955 #if !COMPILER(MSVC) && !COMPILER(RVCT) 956 free(m); 957 #endif 958 } 959 960 void XMLDocumentParser::processingInstruction(const xmlChar* target, const xmlChar* data) 961 { 962 if (isStopped()) 963 return; 964 965 if (m_parserPaused) { 966 m_pendingCallbacks->appendProcessingInstructionCallback(target, data); 967 return; 968 } 969 970 exitText(); 971 972 // ### handle exceptions 973 ExceptionCode ec = 0; 974 RefPtr<ProcessingInstruction> pi = document()->createProcessingInstruction( 975 toString(target), toString(data), ec); 976 if (ec) 977 return; 978 979 pi->setCreatedByParser(true); 980 981 m_currentNode->deprecatedParserAddChild(pi.get()); 982 if (m_view && !pi->attached()) 983 pi->attach(); 984 985 pi->finishParsingChildren(); 986 987 if (pi->isCSS()) 988 m_sawCSS = true; 989 #if ENABLE(XSLT) 990 m_sawXSLTransform = !m_sawFirstElement && pi->isXSL(); 991 if (m_sawXSLTransform && !document()->transformSourceDocument()) 992 stopParsing(); 993 #endif 994 } 995 996 void XMLDocumentParser::cdataBlock(const xmlChar* s, int len) 997 { 998 if (isStopped()) 999 return; 1000 1001 if (m_parserPaused) { 1002 m_pendingCallbacks->appendCDATABlockCallback(s, len); 1003 return; 1004 } 1005 1006 exitText(); 1007 1008 RefPtr<Node> newNode = CDATASection::create(document(), toString(s, len)); 1009 m_currentNode->deprecatedParserAddChild(newNode.get()); 1010 if (m_view && !newNode->attached()) 1011 newNode->attach(); 1012 } 1013 1014 void XMLDocumentParser::comment(const xmlChar* s) 1015 { 1016 if (isStopped()) 1017 return; 1018 1019 if (m_parserPaused) { 1020 m_pendingCallbacks->appendCommentCallback(s); 1021 return; 1022 } 1023 1024 exitText(); 1025 1026 RefPtr<Node> newNode = Comment::create(document(), toString(s)); 1027 m_currentNode->deprecatedParserAddChild(newNode.get()); 1028 if (m_view && !newNode->attached()) 1029 newNode->attach(); 1030 } 1031 1032 void XMLDocumentParser::startDocument(const xmlChar* version, const xmlChar* encoding, int standalone) 1033 { 1034 ExceptionCode ec = 0; 1035 1036 if (version) 1037 document()->setXMLVersion(toString(version), ec); 1038 document()->setXMLStandalone(standalone == 1, ec); // possible values are 0, 1, and -1 1039 if (encoding) 1040 document()->setXMLEncoding(toString(encoding)); 1041 } 1042 1043 void XMLDocumentParser::endDocument() 1044 { 1045 exitText(); 1046 #if ENABLE(XHTMLMP) 1047 m_hasDocTypeDeclaration = false; 1048 #endif 1049 } 1050 1051 void XMLDocumentParser::internalSubset(const xmlChar* name, const xmlChar* externalID, const xmlChar* systemID) 1052 { 1053 if (isStopped()) 1054 return; 1055 1056 if (m_parserPaused) { 1057 m_pendingCallbacks->appendInternalSubsetCallback(name, externalID, systemID); 1058 return; 1059 } 1060 1061 if (document()) { 1062 #if ENABLE(WML) || ENABLE(XHTMLMP) 1063 String extId = toString(externalID); 1064 #endif 1065 #if ENABLE(WML) 1066 if (isWMLDocument() 1067 && extId != "-//WAPFORUM//DTD WML 1.3//EN" 1068 && extId != "-//WAPFORUM//DTD WML 1.2//EN" 1069 && extId != "-//WAPFORUM//DTD WML 1.1//EN" 1070 && extId != "-//WAPFORUM//DTD WML 1.0//EN") 1071 handleError(fatal, "Invalid DTD Public ID", lineNumber(), columnNumber()); 1072 #endif 1073 #if ENABLE(XHTMLMP) 1074 String dtdName = toString(name); 1075 if (extId == "-//WAPFORUM//DTD XHTML Mobile 1.0//EN" 1076 || extId == "-//WAPFORUM//DTD XHTML Mobile 1.1//EN") { 1077 if (dtdName != HTMLNames::htmlTag.localName()) { 1078 handleError(fatal, "Invalid DOCTYPE declaration, expected 'html' as root element.", lineNumber(), columnNumber()); 1079 return; 1080 } 1081 1082 if (document()->isXHTMLMPDocument()) 1083 setIsXHTMLMPDocument(true); 1084 else 1085 setIsXHTMLDocument(true); 1086 1087 m_hasDocTypeDeclaration = true; 1088 } 1089 #endif 1090 1091 document()->parserAddChild(DocumentType::create(document(), toString(name), toString(externalID), toString(systemID))); 1092 } 1093 } 1094 1095 static inline XMLDocumentParser* getParser(void* closure) 1096 { 1097 xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure); 1098 return static_cast<XMLDocumentParser*>(ctxt->_private); 1099 } 1100 1101 // This is a hack around http://bugzilla.gnome.org/show_bug.cgi?id=159219 1102 // Otherwise libxml seems to call all the SAX callbacks twice for any replaced entity. 1103 static inline bool hackAroundLibXMLEntityBug(void* closure) 1104 { 1105 #if LIBXML_VERSION >= 20627 1106 UNUSED_PARAM(closure); 1107 1108 // This bug has been fixed in libxml 2.6.27. 1109 return false; 1110 #else 1111 return static_cast<xmlParserCtxtPtr>(closure)->node; 1112 #endif 1113 } 1114 1115 static void startElementNsHandler(void* closure, const xmlChar* localname, const xmlChar* prefix, const xmlChar* uri, int nb_namespaces, const xmlChar** namespaces, int nb_attributes, int nb_defaulted, const xmlChar** libxmlAttributes) 1116 { 1117 if (hackAroundLibXMLEntityBug(closure)) 1118 return; 1119 1120 getParser(closure)->startElementNs(localname, prefix, uri, nb_namespaces, namespaces, nb_attributes, nb_defaulted, libxmlAttributes); 1121 } 1122 1123 static void endElementNsHandler(void* closure, const xmlChar*, const xmlChar*, const xmlChar*) 1124 { 1125 if (hackAroundLibXMLEntityBug(closure)) 1126 return; 1127 1128 getParser(closure)->endElementNs(); 1129 } 1130 1131 static void charactersHandler(void* closure, const xmlChar* s, int len) 1132 { 1133 if (hackAroundLibXMLEntityBug(closure)) 1134 return; 1135 1136 getParser(closure)->characters(s, len); 1137 } 1138 1139 static void processingInstructionHandler(void* closure, const xmlChar* target, const xmlChar* data) 1140 { 1141 if (hackAroundLibXMLEntityBug(closure)) 1142 return; 1143 1144 getParser(closure)->processingInstruction(target, data); 1145 } 1146 1147 static void cdataBlockHandler(void* closure, const xmlChar* s, int len) 1148 { 1149 if (hackAroundLibXMLEntityBug(closure)) 1150 return; 1151 1152 getParser(closure)->cdataBlock(s, len); 1153 } 1154 1155 static void commentHandler(void* closure, const xmlChar* comment) 1156 { 1157 if (hackAroundLibXMLEntityBug(closure)) 1158 return; 1159 1160 getParser(closure)->comment(comment); 1161 } 1162 1163 WTF_ATTRIBUTE_PRINTF(2, 3) 1164 static void warningHandler(void* closure, const char* message, ...) 1165 { 1166 va_list args; 1167 va_start(args, message); 1168 getParser(closure)->error(XMLDocumentParser::warning, message, args); 1169 va_end(args); 1170 } 1171 1172 WTF_ATTRIBUTE_PRINTF(2, 3) 1173 static void fatalErrorHandler(void* closure, const char* message, ...) 1174 { 1175 va_list args; 1176 va_start(args, message); 1177 getParser(closure)->error(XMLDocumentParser::fatal, message, args); 1178 va_end(args); 1179 } 1180 1181 WTF_ATTRIBUTE_PRINTF(2, 3) 1182 static void normalErrorHandler(void* closure, const char* message, ...) 1183 { 1184 va_list args; 1185 va_start(args, message); 1186 getParser(closure)->error(XMLDocumentParser::nonFatal, message, args); 1187 va_end(args); 1188 } 1189 1190 // Using a static entity and marking it XML_INTERNAL_PREDEFINED_ENTITY is 1191 // a hack to avoid malloc/free. Using a global variable like this could cause trouble 1192 // if libxml implementation details were to change 1193 static xmlChar sharedXHTMLEntityResult[5] = {0, 0, 0, 0, 0}; 1194 1195 static xmlEntityPtr sharedXHTMLEntity() 1196 { 1197 static xmlEntity entity; 1198 if (!entity.type) { 1199 entity.type = XML_ENTITY_DECL; 1200 entity.orig = sharedXHTMLEntityResult; 1201 entity.content = sharedXHTMLEntityResult; 1202 entity.etype = XML_INTERNAL_PREDEFINED_ENTITY; 1203 } 1204 return &entity; 1205 } 1206 1207 static xmlEntityPtr getXHTMLEntity(const xmlChar* name) 1208 { 1209 UChar c = decodeNamedEntity(reinterpret_cast<const char*>(name)); 1210 if (!c) 1211 return 0; 1212 1213 CString value = String(&c, 1).utf8(); 1214 ASSERT(value.length() < 5); 1215 xmlEntityPtr entity = sharedXHTMLEntity(); 1216 entity->length = value.length(); 1217 entity->name = name; 1218 memcpy(sharedXHTMLEntityResult, value.data(), entity->length + 1); 1219 1220 return entity; 1221 } 1222 1223 static xmlEntityPtr getEntityHandler(void* closure, const xmlChar* name) 1224 { 1225 xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure); 1226 xmlEntityPtr ent = xmlGetPredefinedEntity(name); 1227 if (ent) { 1228 ent->etype = XML_INTERNAL_PREDEFINED_ENTITY; 1229 return ent; 1230 } 1231 1232 ent = xmlGetDocEntity(ctxt->myDoc, name); 1233 if (!ent && (getParser(closure)->isXHTMLDocument() 1234 #if ENABLE(XHTMLMP) 1235 || getParser(closure)->isXHTMLMPDocument() 1236 #endif 1237 #if ENABLE(WML) 1238 || getParser(closure)->isWMLDocument() 1239 #endif 1240 )) { 1241 ent = getXHTMLEntity(name); 1242 if (ent) 1243 ent->etype = XML_INTERNAL_GENERAL_ENTITY; 1244 } 1245 1246 return ent; 1247 } 1248 1249 static void startDocumentHandler(void* closure) 1250 { 1251 xmlParserCtxt* ctxt = static_cast<xmlParserCtxt*>(closure); 1252 switchToUTF16(ctxt); 1253 getParser(closure)->startDocument(ctxt->version, ctxt->encoding, ctxt->standalone); 1254 xmlSAX2StartDocument(closure); 1255 } 1256 1257 static void endDocumentHandler(void* closure) 1258 { 1259 getParser(closure)->endDocument(); 1260 xmlSAX2EndDocument(closure); 1261 } 1262 1263 static void internalSubsetHandler(void* closure, const xmlChar* name, const xmlChar* externalID, const xmlChar* systemID) 1264 { 1265 getParser(closure)->internalSubset(name, externalID, systemID); 1266 xmlSAX2InternalSubset(closure, name, externalID, systemID); 1267 } 1268 1269 static void externalSubsetHandler(void* closure, const xmlChar*, const xmlChar* externalId, const xmlChar*) 1270 { 1271 String extId = toString(externalId); 1272 if ((extId == "-//W3C//DTD XHTML 1.0 Transitional//EN") 1273 || (extId == "-//W3C//DTD XHTML 1.1//EN") 1274 || (extId == "-//W3C//DTD XHTML 1.0 Strict//EN") 1275 || (extId == "-//W3C//DTD XHTML 1.0 Frameset//EN") 1276 || (extId == "-//W3C//DTD XHTML Basic 1.0//EN") 1277 || (extId == "-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN") 1278 || (extId == "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN") 1279 || (extId == "-//WAPFORUM//DTD XHTML Mobile 1.0//EN") 1280 ) 1281 getParser(closure)->setIsXHTMLDocument(true); // controls if we replace entities or not. 1282 } 1283 1284 static void ignorableWhitespaceHandler(void*, const xmlChar*, int) 1285 { 1286 // nothing to do, but we need this to work around a crasher 1287 // http://bugzilla.gnome.org/show_bug.cgi?id=172255 1288 // http://bugs.webkit.org/show_bug.cgi?id=5792 1289 } 1290 1291 void XMLDocumentParser::initializeParserContext(const char* chunk) 1292 { 1293 xmlSAXHandler sax; 1294 memset(&sax, 0, sizeof(sax)); 1295 1296 sax.error = normalErrorHandler; 1297 sax.fatalError = fatalErrorHandler; 1298 sax.characters = charactersHandler; 1299 sax.processingInstruction = processingInstructionHandler; 1300 sax.cdataBlock = cdataBlockHandler; 1301 sax.comment = commentHandler; 1302 sax.warning = warningHandler; 1303 sax.startElementNs = startElementNsHandler; 1304 sax.endElementNs = endElementNsHandler; 1305 sax.getEntity = getEntityHandler; 1306 sax.startDocument = startDocumentHandler; 1307 sax.endDocument = endDocumentHandler; 1308 sax.internalSubset = internalSubsetHandler; 1309 sax.externalSubset = externalSubsetHandler; 1310 sax.ignorableWhitespace = ignorableWhitespaceHandler; 1311 sax.entityDecl = xmlSAX2EntityDecl; 1312 sax.initialized = XML_SAX2_MAGIC; 1313 DocumentParser::startParsing(); 1314 m_sawError = false; 1315 m_sawCSS = false; 1316 m_sawXSLTransform = false; 1317 m_sawFirstElement = false; 1318 1319 XMLDocumentParserScope scope(document()->cachedResourceLoader()); 1320 if (m_parsingFragment) 1321 m_context = XMLParserContext::createMemoryParser(&sax, this, chunk); 1322 else { 1323 ASSERT(!chunk); 1324 m_context = XMLParserContext::createStringParser(&sax, this); 1325 } 1326 } 1327 1328 void XMLDocumentParser::doEnd() 1329 { 1330 if (!isStopped()) { 1331 if (m_context) { 1332 // Tell libxml we're done. 1333 { 1334 XMLDocumentParserScope scope(document()->cachedResourceLoader()); 1335 xmlParseChunk(context(), 0, 0, 1); 1336 } 1337 1338 m_context = 0; 1339 } 1340 } 1341 1342 #if ENABLE(XSLT) 1343 XMLTreeViewer xmlTreeViewer(document()); 1344 bool xmlViewerMode = !m_sawError && !m_sawCSS && !m_sawXSLTransform && xmlTreeViewer.hasNoStyleInformation(); 1345 if (xmlViewerMode) 1346 xmlTreeViewer.transformDocumentToTreeView(); 1347 1348 if (m_sawXSLTransform) { 1349 void* doc = xmlDocPtrForString(document()->cachedResourceLoader(), m_originalSourceForTransform, document()->url().string()); 1350 document()->setTransformSource(new TransformSource(doc)); 1351 1352 document()->setParsing(false); // Make the document think it's done, so it will apply XSL stylesheets. 1353 document()->styleSelectorChanged(RecalcStyleImmediately); 1354 document()->setParsing(true); 1355 1356 DocumentParser::stopParsing(); 1357 } 1358 #endif 1359 } 1360 1361 #if ENABLE(XSLT) 1362 void* xmlDocPtrForString(CachedResourceLoader* cachedResourceLoader, const String& source, const String& url) 1363 { 1364 if (source.isEmpty()) 1365 return 0; 1366 1367 // Parse in a single chunk into an xmlDocPtr 1368 // FIXME: Hook up error handlers so that a failure to parse the main document results in 1369 // good error messages. 1370 const UChar BOM = 0xFEFF; 1371 const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char*>(&BOM); 1372 1373 XMLDocumentParserScope scope(cachedResourceLoader, errorFunc, 0); 1374 xmlDocPtr sourceDoc = xmlReadMemory(reinterpret_cast<const char*>(source.characters()), 1375 source.length() * sizeof(UChar), 1376 url.latin1().data(), 1377 BOMHighByte == 0xFF ? "UTF-16LE" : "UTF-16BE", 1378 XSLT_PARSE_OPTIONS); 1379 return sourceDoc; 1380 } 1381 #endif 1382 1383 int XMLDocumentParser::lineNumber() const 1384 { 1385 // FIXME: The implementation probably returns 1-based int, but method should return 0-based. 1386 return context() ? context()->input->line : 1; 1387 } 1388 1389 int XMLDocumentParser::columnNumber() const 1390 { 1391 // FIXME: The implementation probably returns 1-based int, but method should return 0-based. 1392 return context() ? context()->input->col : 1; 1393 } 1394 1395 TextPosition0 XMLDocumentParser::textPosition() const 1396 { 1397 xmlParserCtxtPtr context = this->context(); 1398 if (!context) 1399 return TextPosition0::minimumPosition(); 1400 // FIXME: The context probably contains 1-based numbers, but we treat them as 0-based, 1401 // to be consistent with fixme's in lineNumber() and columnNumber 1402 // methods. 1403 return TextPosition0(WTF::ZeroBasedNumber::fromZeroBasedInt(context->input->line), 1404 WTF::ZeroBasedNumber::fromZeroBasedInt(context->input->col)); 1405 } 1406 1407 // This method has a correct implementation, in contrast to textPosition() method. 1408 // It should replace textPosition(). 1409 TextPosition1 XMLDocumentParser::textPositionOneBased() const 1410 { 1411 xmlParserCtxtPtr context = this->context(); 1412 if (!context) 1413 return TextPosition1::minimumPosition(); 1414 return TextPosition1(WTF::OneBasedNumber::fromOneBasedInt(context->input->line), 1415 WTF::OneBasedNumber::fromOneBasedInt(context->input->col)); 1416 } 1417 1418 void XMLDocumentParser::stopParsing() 1419 { 1420 DocumentParser::stopParsing(); 1421 if (context()) 1422 xmlStopParser(context()); 1423 } 1424 1425 void XMLDocumentParser::resumeParsing() 1426 { 1427 ASSERT(!isDetached()); 1428 ASSERT(m_parserPaused); 1429 1430 m_parserPaused = false; 1431 1432 // First, execute any pending callbacks 1433 while (!m_pendingCallbacks->isEmpty()) { 1434 m_pendingCallbacks->callAndRemoveFirstCallback(this); 1435 1436 // A callback paused the parser 1437 if (m_parserPaused) 1438 return; 1439 } 1440 1441 // Then, write any pending data 1442 SegmentedString rest = m_pendingSrc; 1443 m_pendingSrc.clear(); 1444 append(rest); 1445 1446 // Finally, if finish() has been called and write() didn't result 1447 // in any further callbacks being queued, call end() 1448 if (m_finishCalled && m_pendingCallbacks->isEmpty()) 1449 end(); 1450 } 1451 1452 bool XMLDocumentParser::appendFragmentSource(const String& chunk) 1453 { 1454 ASSERT(!m_context); 1455 ASSERT(m_parsingFragment); 1456 1457 CString chunkAsUtf8 = chunk.utf8(); 1458 initializeParserContext(chunkAsUtf8.data()); 1459 xmlParseContent(context()); 1460 endDocument(); // Close any open text nodes. 1461 1462 // FIXME: If this code is actually needed, it should probably move to finish() 1463 // XMLDocumentParserQt has a similar check (m_stream.error() == QXmlStreamReader::PrematureEndOfDocumentError) in doEnd(). 1464 // Check if all the chunk has been processed. 1465 long bytesProcessed = xmlByteConsumed(context()); 1466 if (bytesProcessed == -1 || ((unsigned long)bytesProcessed) != chunkAsUtf8.length()) { 1467 // FIXME: I don't believe we can hit this case without also having seen an error. 1468 // If we hit this ASSERT, we've found a test case which demonstrates the need for this code. 1469 ASSERT(m_sawError); 1470 return false; 1471 } 1472 1473 // No error if the chunk is well formed or it is not but we have no error. 1474 return context()->wellFormed || !xmlCtxtGetLastError(context()); 1475 } 1476 1477 // -------------------------------- 1478 1479 struct AttributeParseState { 1480 HashMap<String, String> attributes; 1481 bool gotAttributes; 1482 }; 1483 1484 static void attributesStartElementNsHandler(void* closure, const xmlChar* xmlLocalName, const xmlChar* /*xmlPrefix*/, 1485 const xmlChar* /*xmlURI*/, int /*nb_namespaces*/, const xmlChar** /*namespaces*/, 1486 int nb_attributes, int /*nb_defaulted*/, const xmlChar** libxmlAttributes) 1487 { 1488 if (strcmp(reinterpret_cast<const char*>(xmlLocalName), "attrs") != 0) 1489 return; 1490 1491 xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure); 1492 AttributeParseState* state = static_cast<AttributeParseState*>(ctxt->_private); 1493 1494 state->gotAttributes = true; 1495 1496 xmlSAX2Attributes* attributes = reinterpret_cast<xmlSAX2Attributes*>(libxmlAttributes); 1497 for (int i = 0; i < nb_attributes; i++) { 1498 String attrLocalName = toString(attributes[i].localname); 1499 int valueLength = (int) (attributes[i].end - attributes[i].value); 1500 String attrValue = toString(attributes[i].value, valueLength); 1501 String attrPrefix = toString(attributes[i].prefix); 1502 String attrQName = attrPrefix.isEmpty() ? attrLocalName : attrPrefix + ":" + attrLocalName; 1503 1504 state->attributes.set(attrQName, attrValue); 1505 } 1506 } 1507 1508 HashMap<String, String> parseAttributes(const String& string, bool& attrsOK) 1509 { 1510 AttributeParseState state; 1511 state.gotAttributes = false; 1512 1513 xmlSAXHandler sax; 1514 memset(&sax, 0, sizeof(sax)); 1515 sax.startElementNs = attributesStartElementNsHandler; 1516 sax.initialized = XML_SAX2_MAGIC; 1517 RefPtr<XMLParserContext> parser = XMLParserContext::createStringParser(&sax, &state); 1518 String parseString = "<?xml version=\"1.0\"?><attrs " + string + " />"; 1519 xmlParseChunk(parser->context(), reinterpret_cast<const char*>(parseString.characters()), parseString.length() * sizeof(UChar), 1); 1520 attrsOK = state.gotAttributes; 1521 return state.attributes; 1522 } 1523 1524 } 1525