Home | History | Annotate | Download | only in parser
      1 /*
      2  * Copyright (C) 2000 Peter Kelly (pmk (at) post.com)
      3  * Copyright (C) 2005, 2006, 2008 Apple Inc. All rights reserved.
      4  * Copyright (C) 2006 Alexey Proskuryakov (ap (at) webkit.org)
      5  * Copyright (C) 2007 Samuel Weinig (sam (at) webkit.org)
      6  * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies)
      7  * Copyright (C) 2008 Holger Hans Peter Freyther
      8  * Copyright (C) 2008 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/)
      9  *
     10  * This library is free software; you can redistribute it and/or
     11  * modify it under the terms of the GNU Library General Public
     12  * License as published by the Free Software Foundation; either
     13  * version 2 of the License, or (at your option) any later version.
     14  *
     15  * This library is distributed in the hope that it will be useful,
     16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     18  * Library General Public License for more details.
     19  *
     20  * You should have received a copy of the GNU Library General Public License
     21  * along with this library; see the file COPYING.LIB.  If not, write to
     22  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     23  * Boston, MA 02110-1301, USA.
     24  */
     25 
     26 #include "config.h"
     27 #include "core/xml/parser/XMLDocumentParser.h"
     28 
     29 #include <libxml/catalog.h>
     30 #include <libxml/parser.h>
     31 #include <libxml/parserInternals.h>
     32 #include <libxslt/xslt.h>
     33 #include "FetchInitiatorTypeNames.h"
     34 #include "HTMLNames.h"
     35 #include "RuntimeEnabledFeatures.h"
     36 #include "XMLNSNames.h"
     37 #include "bindings/v8/ExceptionState.h"
     38 #include "bindings/v8/ExceptionStatePlaceholder.h"
     39 #include "bindings/v8/ScriptController.h"
     40 #include "bindings/v8/ScriptSourceCode.h"
     41 #include "core/dom/CDATASection.h"
     42 #include "core/dom/Comment.h"
     43 #include "core/dom/Document.h"
     44 #include "core/dom/DocumentFragment.h"
     45 #include "core/dom/DocumentType.h"
     46 #include "core/dom/ProcessingInstruction.h"
     47 #include "core/dom/ScriptLoader.h"
     48 #include "core/dom/TransformSource.h"
     49 #include "core/fetch/ResourceFetcher.h"
     50 #include "core/fetch/ScriptResource.h"
     51 #include "core/fetch/TextResourceDecoder.h"
     52 #include "core/frame/Frame.h"
     53 #include "core/html/HTMLHtmlElement.h"
     54 #include "core/html/HTMLTemplateElement.h"
     55 #include "core/html/parser/HTMLEntityParser.h"
     56 #include "core/loader/FrameLoader.h"
     57 #include "core/loader/ImageLoader.h"
     58 #include "core/frame/UseCounter.h"
     59 #include "core/xml/XMLTreeViewer.h"
     60 #include "core/xml/parser/XMLDocumentParserScope.h"
     61 #include "core/xml/parser/XMLParserInput.h"
     62 #include "platform/SharedBuffer.h"
     63 #include "platform/network/ResourceError.h"
     64 #include "platform/network/ResourceRequest.h"
     65 #include "platform/network/ResourceResponse.h"
     66 #include "platform/weborigin/SecurityOrigin.h"
     67 #include "wtf/StringExtras.h"
     68 #include "wtf/TemporaryChange.h"
     69 #include "wtf/Threading.h"
     70 #include "wtf/Vector.h"
     71 #include "wtf/unicode/UTF8.h"
     72 
     73 using namespace std;
     74 
     75 namespace WebCore {
     76 
     77 using namespace HTMLNames;
     78 
     79 // FIXME: HTMLConstructionSite has a limit of 512, should these match?
     80 static const unsigned maxXMLTreeDepth = 5000;
     81 
     82 static inline String toString(const xmlChar* string, size_t length)
     83 {
     84     return String::fromUTF8(reinterpret_cast<const char*>(string), length);
     85 }
     86 
     87 static inline String toString(const xmlChar* string)
     88 {
     89     return String::fromUTF8(reinterpret_cast<const char*>(string));
     90 }
     91 
     92 static inline AtomicString toAtomicString(const xmlChar* string, size_t length)
     93 {
     94     return AtomicString::fromUTF8(reinterpret_cast<const char*>(string), length);
     95 }
     96 
     97 static inline AtomicString toAtomicString(const xmlChar* string)
     98 {
     99     return AtomicString::fromUTF8(reinterpret_cast<const char*>(string));
    100 }
    101 
    102 static inline bool hasNoStyleInformation(Document* document)
    103 {
    104     if (document->sawElementsInKnownNamespaces() || document->transformSourceDocument())
    105         return false;
    106 
    107     if (!document->frame() || !document->frame()->page())
    108         return false;
    109 
    110     if (document->frame()->tree().parent())
    111         return false; // This document is not in a top frame
    112 
    113     return true;
    114 }
    115 
    116 class PendingStartElementNSCallback FINAL : public XMLDocumentParser::PendingCallback {
    117 public:
    118     PendingStartElementNSCallback(const AtomicString& localName, const AtomicString& prefix, const AtomicString& uri,
    119         int namespaceCount, const xmlChar** namespaces, int attributeCount, int defaultedCount, const xmlChar** attributes)
    120         : m_localName(localName)
    121         , m_prefix(prefix)
    122         , m_uri(uri)
    123         , m_namespaceCount(namespaceCount)
    124         , m_attributeCount(attributeCount)
    125         , m_defaultedCount(defaultedCount)
    126     {
    127         m_namespaces = static_cast<xmlChar**>(xmlMalloc(sizeof(xmlChar*) * namespaceCount * 2));
    128         for (int i = 0; i < namespaceCount * 2 ; i++)
    129             m_namespaces[i] = xmlStrdup(namespaces[i]);
    130         m_attributes = static_cast<xmlChar**>(xmlMalloc(sizeof(xmlChar*) * attributeCount * 5));
    131         for (int i = 0; i < attributeCount; i++) {
    132             // Each attribute has 5 elements in the array:
    133             // name, prefix, uri, value and an end pointer.
    134             for (int j = 0; j < 3; j++)
    135                 m_attributes[i * 5 + j] = xmlStrdup(attributes[i * 5 + j]);
    136             int length = attributes[i * 5 + 4] - attributes[i * 5 + 3];
    137             m_attributes[i * 5 + 3] = xmlStrndup(attributes[i * 5 + 3], length);
    138             m_attributes[i * 5 + 4] = m_attributes[i * 5 + 3] + length;
    139         }
    140     }
    141 
    142     virtual ~PendingStartElementNSCallback()
    143     {
    144         for (int i = 0; i < m_namespaceCount * 2; i++)
    145             xmlFree(m_namespaces[i]);
    146         xmlFree(m_namespaces);
    147         for (int i = 0; i < m_attributeCount; i++)
    148             for (int j = 0; j < 4; j++)
    149                 xmlFree(m_attributes[i * 5 + j]);
    150         xmlFree(m_attributes);
    151     }
    152 
    153     virtual void call(XMLDocumentParser* parser) OVERRIDE
    154     {
    155         parser->startElementNs(m_localName, m_prefix, m_uri,
    156                                   m_namespaceCount, const_cast<const xmlChar**>(m_namespaces),
    157                                   m_attributeCount, m_defaultedCount, const_cast<const xmlChar**>(m_attributes));
    158     }
    159 
    160 private:
    161     AtomicString m_localName;
    162     AtomicString m_prefix;
    163     AtomicString m_uri;
    164     int m_namespaceCount;
    165     xmlChar** m_namespaces;
    166     int m_attributeCount;
    167     int m_defaultedCount;
    168     xmlChar** m_attributes;
    169 };
    170 
    171 class PendingEndElementNSCallback FINAL : public XMLDocumentParser::PendingCallback {
    172 public:
    173     virtual void call(XMLDocumentParser* parser) OVERRIDE
    174     {
    175         parser->endElementNs();
    176     }
    177 };
    178 
    179 class PendingCharactersCallback FINAL : public XMLDocumentParser::PendingCallback {
    180 public:
    181     PendingCharactersCallback(const xmlChar* chars, int length)
    182         : m_chars(xmlStrndup(chars, length))
    183         , m_length(length)
    184     {
    185     }
    186 
    187     virtual ~PendingCharactersCallback()
    188     {
    189         xmlFree(m_chars);
    190     }
    191 
    192     virtual void call(XMLDocumentParser* parser) OVERRIDE
    193     {
    194         parser->characters(m_chars, m_length);
    195     }
    196 
    197 private:
    198     xmlChar* m_chars;
    199     int m_length;
    200 };
    201 
    202 class PendingProcessingInstructionCallback FINAL : public XMLDocumentParser::PendingCallback {
    203 public:
    204     PendingProcessingInstructionCallback(const String& target, const String& data)
    205         : m_target(target)
    206         , m_data(data)
    207     {
    208     }
    209 
    210     virtual void call(XMLDocumentParser* parser) OVERRIDE
    211     {
    212         parser->processingInstruction(m_target, m_data);
    213     }
    214 
    215 private:
    216     String m_target;
    217     String m_data;
    218 };
    219 
    220 class PendingCDATABlockCallback FINAL : public XMLDocumentParser::PendingCallback {
    221 public:
    222     explicit PendingCDATABlockCallback(const String& text) : m_text(text) { }
    223 
    224     virtual void call(XMLDocumentParser* parser) OVERRIDE
    225     {
    226         parser->cdataBlock(m_text);
    227     }
    228 
    229 private:
    230     String m_text;
    231 };
    232 
    233 class PendingCommentCallback FINAL : public XMLDocumentParser::PendingCallback {
    234 public:
    235     explicit PendingCommentCallback(const String& text) : m_text(text) { }
    236 
    237     virtual void call(XMLDocumentParser* parser) OVERRIDE
    238     {
    239         parser->comment(m_text);
    240     }
    241 
    242 private:
    243     String m_text;
    244 };
    245 
    246 class PendingInternalSubsetCallback FINAL : public XMLDocumentParser::PendingCallback {
    247 public:
    248     PendingInternalSubsetCallback(const String& name, const String& externalID, const String& systemID)
    249         : m_name(name)
    250         , m_externalID(externalID)
    251         , m_systemID(systemID)
    252     {
    253     }
    254 
    255     virtual void call(XMLDocumentParser* parser) OVERRIDE
    256     {
    257         parser->internalSubset(m_name, m_externalID, m_systemID);
    258     }
    259 
    260 private:
    261     String m_name;
    262     String m_externalID;
    263     String m_systemID;
    264 };
    265 
    266 class PendingErrorCallback FINAL : public XMLDocumentParser::PendingCallback {
    267 public:
    268     PendingErrorCallback(XMLErrors::ErrorType type, const xmlChar* message, OrdinalNumber lineNumber, OrdinalNumber columnNumber)
    269         : m_type(type)
    270         , m_message(xmlStrdup(message))
    271         , m_lineNumber(lineNumber)
    272         , m_columnNumber(columnNumber)
    273     {
    274     }
    275 
    276     virtual ~PendingErrorCallback()
    277     {
    278         xmlFree(m_message);
    279     }
    280 
    281     virtual void call(XMLDocumentParser* parser) OVERRIDE
    282     {
    283         parser->handleError(m_type, reinterpret_cast<char*>(m_message), TextPosition(m_lineNumber, m_columnNumber));
    284     }
    285 
    286 private:
    287     XMLErrors::ErrorType m_type;
    288     xmlChar* m_message;
    289     OrdinalNumber m_lineNumber;
    290     OrdinalNumber m_columnNumber;
    291 };
    292 
    293 void XMLDocumentParser::pushCurrentNode(ContainerNode* n)
    294 {
    295     ASSERT(n);
    296     ASSERT(m_currentNode);
    297     if (n != document())
    298         n->ref();
    299     m_currentNodeStack.append(m_currentNode);
    300     m_currentNode = n;
    301     if (m_currentNodeStack.size() > maxXMLTreeDepth)
    302         handleError(XMLErrors::fatal, "Excessive node nesting.", textPosition());
    303 }
    304 
    305 void XMLDocumentParser::popCurrentNode()
    306 {
    307     if (!m_currentNode)
    308         return;
    309     ASSERT(m_currentNodeStack.size());
    310 
    311     if (m_currentNode != document())
    312         m_currentNode->deref();
    313 
    314     m_currentNode = m_currentNodeStack.last();
    315     m_currentNodeStack.removeLast();
    316 }
    317 
    318 void XMLDocumentParser::clearCurrentNodeStack()
    319 {
    320     if (m_currentNode && m_currentNode != document())
    321         m_currentNode->deref();
    322     m_currentNode = 0;
    323     m_leafTextNode = 0;
    324 
    325     if (m_currentNodeStack.size()) { // Aborted parsing.
    326         for (size_t i = m_currentNodeStack.size() - 1; i != 0; --i)
    327             m_currentNodeStack[i]->deref();
    328         if (m_currentNodeStack[0] && m_currentNodeStack[0] != document())
    329             m_currentNodeStack[0]->deref();
    330         m_currentNodeStack.clear();
    331     }
    332 }
    333 
    334 void XMLDocumentParser::insert(const SegmentedString&)
    335 {
    336     ASSERT_NOT_REACHED();
    337 }
    338 
    339 void XMLDocumentParser::append(PassRefPtr<StringImpl> inputSource)
    340 {
    341     SegmentedString source(inputSource);
    342     if (m_sawXSLTransform || !m_sawFirstElement)
    343         m_originalSourceForTransform.append(source);
    344 
    345     if (isStopped() || m_sawXSLTransform)
    346         return;
    347 
    348     if (m_parserPaused) {
    349         m_pendingSrc.append(source);
    350         return;
    351     }
    352 
    353     // JavaScript can detach the parser. Make sure this is not released
    354     // before the end of this method.
    355     RefPtr<XMLDocumentParser> protect(this);
    356 
    357     doWrite(source.toString());
    358 
    359     if (isStopped())
    360         return;
    361 
    362     if (document()->frame() && document()->frame()->script().canExecuteScripts(NotAboutToExecuteScript))
    363         ImageLoader::dispatchPendingBeforeLoadEvents();
    364 }
    365 
    366 void XMLDocumentParser::handleError(XMLErrors::ErrorType type, const char* formattedMessage, TextPosition position)
    367 {
    368     m_xmlErrors.handleError(type, formattedMessage, position);
    369     if (type != XMLErrors::warning)
    370         m_sawError = true;
    371     if (type == XMLErrors::fatal)
    372         stopParsing();
    373 }
    374 
    375 void XMLDocumentParser::enterText()
    376 {
    377     ASSERT(m_bufferedText.size() == 0);
    378     ASSERT(!m_leafTextNode);
    379     m_leafTextNode = Text::create(m_currentNode->document(), "");
    380     m_currentNode->parserAppendChild(m_leafTextNode.get());
    381 }
    382 
    383 void XMLDocumentParser::exitText()
    384 {
    385     if (isStopped())
    386         return;
    387 
    388     if (!m_leafTextNode)
    389         return;
    390 
    391     m_leafTextNode->appendData(toString(m_bufferedText.data(), m_bufferedText.size()));
    392     m_bufferedText.clear();
    393     m_leafTextNode = 0;
    394 }
    395 
    396 void XMLDocumentParser::detach()
    397 {
    398     clearCurrentNodeStack();
    399     ScriptableDocumentParser::detach();
    400 }
    401 
    402 void XMLDocumentParser::end()
    403 {
    404     // XMLDocumentParserLibxml2 will do bad things to the document if doEnd() is called.
    405     // I don't believe XMLDocumentParserQt needs doEnd called in the fragment case.
    406     ASSERT(!m_parsingFragment);
    407 
    408     doEnd();
    409 
    410     // doEnd() call above can detach the parser and null out its document.
    411     // In that case, we just bail out.
    412     if (isDetached())
    413         return;
    414 
    415     // doEnd() could process a script tag, thus pausing parsing.
    416     if (m_parserPaused)
    417         return;
    418 
    419     if (m_sawError)
    420         insertErrorMessageBlock();
    421     else {
    422         exitText();
    423         document()->styleResolverChanged(RecalcStyleImmediately);
    424     }
    425 
    426     if (isParsing())
    427         prepareToStopParsing();
    428     document()->setReadyState(Document::Interactive);
    429     clearCurrentNodeStack();
    430     document()->finishedParsing();
    431 }
    432 
    433 void XMLDocumentParser::finish()
    434 {
    435     // FIXME: We should ASSERT(!m_parserStopped) here, since it does not
    436     // makes sense to call any methods on DocumentParser once it's been stopped.
    437     // However, FrameLoader::stop calls DocumentParser::finish unconditionally.
    438 
    439     if (m_parserPaused)
    440         m_finishCalled = true;
    441     else
    442         end();
    443 }
    444 
    445 void XMLDocumentParser::insertErrorMessageBlock()
    446 {
    447     m_xmlErrors.insertErrorMessageBlock();
    448 }
    449 
    450 void XMLDocumentParser::notifyFinished(Resource* unusedResource)
    451 {
    452     ASSERT_UNUSED(unusedResource, unusedResource == m_pendingScript);
    453     ASSERT(m_pendingScript->accessCount() > 0);
    454 
    455     ScriptSourceCode sourceCode(m_pendingScript.get());
    456     bool errorOccurred = m_pendingScript->errorOccurred();
    457     bool wasCanceled = m_pendingScript->wasCanceled();
    458 
    459     m_pendingScript->removeClient(this);
    460     m_pendingScript = 0;
    461 
    462     RefPtr<Element> e = m_scriptElement;
    463     m_scriptElement = 0;
    464 
    465     ScriptLoader* scriptLoader = toScriptLoaderIfPossible(e.get());
    466     ASSERT(scriptLoader);
    467 
    468     // JavaScript can detach this parser, make sure it's kept alive even if detached.
    469     RefPtr<XMLDocumentParser> protect(this);
    470 
    471     if (errorOccurred)
    472         scriptLoader->dispatchErrorEvent();
    473     else if (!wasCanceled) {
    474         if (scriptLoader->executePotentiallyCrossOriginScript(sourceCode))
    475             scriptLoader->dispatchLoadEvent();
    476     }
    477 
    478     m_scriptElement = 0;
    479 
    480     if (!isDetached() && !m_requestingScript)
    481         resumeParsing();
    482 }
    483 
    484 bool XMLDocumentParser::isWaitingForScripts() const
    485 {
    486     return m_pendingScript;
    487 }
    488 
    489 void XMLDocumentParser::pauseParsing()
    490 {
    491     if (m_parsingFragment)
    492         return;
    493 
    494     m_parserPaused = true;
    495 }
    496 
    497 bool XMLDocumentParser::parseDocumentFragment(const String& chunk, DocumentFragment* fragment, Element* contextElement, ParserContentPolicy parserContentPolicy)
    498 {
    499     if (!chunk.length())
    500         return true;
    501 
    502     // FIXME: We need to implement the HTML5 XML Fragment parsing algorithm:
    503     // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-xhtml-syntax.html#xml-fragment-parsing-algorithm
    504     // For now we have a hack for script/style innerHTML support:
    505     if (contextElement && (contextElement->hasLocalName(HTMLNames::scriptTag) || contextElement->hasLocalName(HTMLNames::styleTag))) {
    506         fragment->parserAppendChild(fragment->document().createTextNode(chunk));
    507         return true;
    508     }
    509 
    510     RefPtr<XMLDocumentParser> parser = XMLDocumentParser::create(fragment, contextElement, parserContentPolicy);
    511     bool wellFormed = parser->appendFragmentSource(chunk);
    512     // Do not call finish().  Current finish() and doEnd() implementations touch the main Document/loader
    513     // and can cause crashes in the fragment case.
    514     parser->detach(); // Allows ~DocumentParser to assert it was detached before destruction.
    515     return wellFormed; // appendFragmentSource()'s wellFormed is more permissive than wellFormed().
    516 }
    517 
    518 static int globalDescriptor = 0;
    519 static ThreadIdentifier libxmlLoaderThread = 0;
    520 
    521 static int matchFunc(const char*)
    522 {
    523     // Only match loads initiated due to uses of libxml2 from within XMLDocumentParser to avoid
    524     // interfering with client applications that also use libxml2.  http://bugs.webkit.org/show_bug.cgi?id=17353
    525     return XMLDocumentParserScope::currentFetcher && currentThread() == libxmlLoaderThread;
    526 }
    527 
    528 class OffsetBuffer {
    529     WTF_MAKE_FAST_ALLOCATED;
    530 public:
    531     OffsetBuffer(const Vector<char>& b) : m_buffer(b), m_currentOffset(0) { }
    532 
    533     int readOutBytes(char* outputBuffer, unsigned askedToRead)
    534     {
    535         unsigned bytesLeft = m_buffer.size() - m_currentOffset;
    536         unsigned lenToCopy = min(askedToRead, bytesLeft);
    537         if (lenToCopy) {
    538             memcpy(outputBuffer, m_buffer.data() + m_currentOffset, lenToCopy);
    539             m_currentOffset += lenToCopy;
    540         }
    541         return lenToCopy;
    542     }
    543 
    544 private:
    545     Vector<char> m_buffer;
    546     unsigned m_currentOffset;
    547 };
    548 
    549 static inline void setAttributes(Element* element, Vector<Attribute>& attributeVector, ParserContentPolicy parserContentPolicy)
    550 {
    551     if (!scriptingContentIsAllowed(parserContentPolicy))
    552         element->stripScriptingAttributes(attributeVector);
    553     element->parserSetAttributes(attributeVector);
    554 }
    555 
    556 static void switchEncoding(xmlParserCtxtPtr ctxt, bool is8Bit)
    557 {
    558     // Hack around libxml2's lack of encoding overide support by manually
    559     // resetting the encoding to UTF-16 before every chunk.  Otherwise libxml
    560     // will detect <?xml version="1.0" encoding="<encoding name>"?> blocks
    561     // and switch encodings, causing the parse to fail.
    562     if (is8Bit) {
    563         xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_8859_1);
    564         return;
    565     }
    566 
    567     const UChar BOM = 0xFEFF;
    568     const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char*>(&BOM);
    569     xmlSwitchEncoding(ctxt, BOMHighByte == 0xFF ? XML_CHAR_ENCODING_UTF16LE : XML_CHAR_ENCODING_UTF16BE);
    570 }
    571 
    572 static void parseChunk(xmlParserCtxtPtr ctxt, const String& chunk)
    573 {
    574     bool is8Bit = chunk.is8Bit();
    575     switchEncoding(ctxt, is8Bit);
    576     if (is8Bit)
    577         xmlParseChunk(ctxt, reinterpret_cast<const char*>(chunk.characters8()), sizeof(LChar) * chunk.length(), 0);
    578     else
    579         xmlParseChunk(ctxt, reinterpret_cast<const char*>(chunk.characters16()), sizeof(UChar) * chunk.length(), 0);
    580 }
    581 
    582 static void finishParsing(xmlParserCtxtPtr ctxt)
    583 {
    584     xmlParseChunk(ctxt, 0, 0, 1);
    585 }
    586 
    587 #define xmlParseChunk #error "Use parseChunk instead to select the correct encoding."
    588 
    589 static bool isLibxmlDefaultCatalogFile(const String& urlString)
    590 {
    591     // On non-Windows platforms libxml asks for this URL, the
    592     // "XML_XML_DEFAULT_CATALOG", on initialization.
    593     if (urlString == "file:///etc/xml/catalog")
    594         return true;
    595 
    596     // On Windows, libxml computes a URL relative to where its DLL resides.
    597     if (urlString.startsWith("file:///", false) && urlString.endsWith("/etc/catalog", false))
    598         return true;
    599     return false;
    600 }
    601 
    602 static bool shouldAllowExternalLoad(const KURL& url)
    603 {
    604     String urlString = url.string();
    605 
    606     // This isn't really necessary now that initializeLibXMLIfNecessary
    607     // disables catalog support in libxml, but keeping it for defense in depth.
    608     if (isLibxmlDefaultCatalogFile(url))
    609         return false;
    610 
    611     // The most common DTD.  There isn't much point in hammering www.w3c.org
    612     // by requesting this URL for every XHTML document.
    613     if (urlString.startsWith("http://www.w3.org/TR/xhtml", false))
    614         return false;
    615 
    616     // Similarly, there isn't much point in requesting the SVG DTD.
    617     if (urlString.startsWith("http://www.w3.org/Graphics/SVG", false))
    618         return false;
    619 
    620     // The libxml doesn't give us a lot of context for deciding whether to
    621     // allow this request.  In the worst case, this load could be for an
    622     // external entity and the resulting document could simply read the
    623     // retrieved content.  If we had more context, we could potentially allow
    624     // the parser to load a DTD.  As things stand, we take the conservative
    625     // route and allow same-origin requests only.
    626     if (!XMLDocumentParserScope::currentFetcher->document()->securityOrigin()->canRequest(url)) {
    627         XMLDocumentParserScope::currentFetcher->printAccessDeniedMessage(url);
    628         return false;
    629     }
    630 
    631     return true;
    632 }
    633 
    634 static void* openFunc(const char* uri)
    635 {
    636     ASSERT(XMLDocumentParserScope::currentFetcher);
    637     ASSERT(currentThread() == libxmlLoaderThread);
    638 
    639     KURL url(KURL(), uri);
    640 
    641     if (!shouldAllowExternalLoad(url))
    642         return &globalDescriptor;
    643 
    644     KURL finalURL;
    645     Vector<char> data;
    646 
    647     {
    648         ResourceFetcher* fetcher = XMLDocumentParserScope::currentFetcher;
    649         XMLDocumentParserScope scope(0);
    650         // FIXME: We should restore the original global error handler as well.
    651 
    652         if (fetcher->frame()) {
    653             FetchRequest request(ResourceRequest(url), FetchInitiatorTypeNames::xml, ResourceFetcher::defaultResourceOptions());
    654             ResourcePtr<Resource> resource = fetcher->fetchSynchronously(request);
    655             if (resource && !resource->errorOccurred()) {
    656                 resource->resourceBuffer()->moveTo(data);
    657                 finalURL = resource->response().url();
    658             }
    659         }
    660     }
    661 
    662     // We have to check the URL again after the load to catch redirects.
    663     // See <https://bugs.webkit.org/show_bug.cgi?id=21963>.
    664     if (!shouldAllowExternalLoad(finalURL))
    665         return &globalDescriptor;
    666 
    667     return new OffsetBuffer(data);
    668 }
    669 
    670 static int readFunc(void* context, char* buffer, int len)
    671 {
    672     // Do 0-byte reads in case of a null descriptor
    673     if (context == &globalDescriptor)
    674         return 0;
    675 
    676     OffsetBuffer* data = static_cast<OffsetBuffer*>(context);
    677     return data->readOutBytes(buffer, len);
    678 }
    679 
    680 static int writeFunc(void*, const char*, int)
    681 {
    682     // Always just do 0-byte writes
    683     return 0;
    684 }
    685 
    686 static int closeFunc(void* context)
    687 {
    688     if (context != &globalDescriptor) {
    689         OffsetBuffer* data = static_cast<OffsetBuffer*>(context);
    690         delete data;
    691     }
    692     return 0;
    693 }
    694 
    695 static void errorFunc(void*, const char*, ...)
    696 {
    697     // FIXME: It would be nice to display error messages somewhere.
    698 }
    699 
    700 static void initializeLibXMLIfNecessary()
    701 {
    702     static bool didInit = false;
    703     if (didInit)
    704         return;
    705 
    706     // We don't want libxml to try and load catalogs.
    707     // FIXME: It's not nice to set global settings in libxml, embedders of Blink
    708     // could be trying to use libxml themselves.
    709     xmlCatalogSetDefaults(XML_CATA_ALLOW_NONE);
    710     xmlInitParser();
    711     xmlRegisterInputCallbacks(matchFunc, openFunc, readFunc, closeFunc);
    712     xmlRegisterOutputCallbacks(matchFunc, openFunc, writeFunc, closeFunc);
    713     libxmlLoaderThread = currentThread();
    714     didInit = true;
    715 }
    716 
    717 
    718 PassRefPtr<XMLParserContext> XMLParserContext::createStringParser(xmlSAXHandlerPtr handlers, void* userData)
    719 {
    720     initializeLibXMLIfNecessary();
    721     xmlParserCtxtPtr parser = xmlCreatePushParserCtxt(handlers, 0, 0, 0, 0);
    722     parser->_private = userData;
    723     parser->replaceEntities = true;
    724     return adoptRef(new XMLParserContext(parser));
    725 }
    726 
    727 // Chunk should be encoded in UTF-8
    728 PassRefPtr<XMLParserContext> XMLParserContext::createMemoryParser(xmlSAXHandlerPtr handlers, void* userData, const CString& chunk)
    729 {
    730     initializeLibXMLIfNecessary();
    731 
    732     // appendFragmentSource() checks that the length doesn't overflow an int.
    733     xmlParserCtxtPtr parser = xmlCreateMemoryParserCtxt(chunk.data(), chunk.length());
    734 
    735     if (!parser)
    736         return 0;
    737 
    738     // Copy the sax handler
    739     memcpy(parser->sax, handlers, sizeof(xmlSAXHandler));
    740 
    741     // Set parser options.
    742     // XML_PARSE_NODICT: default dictionary option.
    743     // XML_PARSE_NOENT: force entities substitutions.
    744     xmlCtxtUseOptions(parser, XML_PARSE_NODICT | XML_PARSE_NOENT);
    745 
    746     // Internal initialization
    747     parser->sax2 = 1;
    748     parser->instate = XML_PARSER_CONTENT; // We are parsing a CONTENT
    749     parser->depth = 0;
    750     parser->str_xml = xmlDictLookup(parser->dict, BAD_CAST "xml", 3);
    751     parser->str_xmlns = xmlDictLookup(parser->dict, BAD_CAST "xmlns", 5);
    752     parser->str_xml_ns = xmlDictLookup(parser->dict, XML_XML_NAMESPACE, 36);
    753     parser->_private = userData;
    754 
    755     return adoptRef(new XMLParserContext(parser));
    756 }
    757 
    758 // --------------------------------
    759 
    760 bool XMLDocumentParser::supportsXMLVersion(const String& version)
    761 {
    762     return version == "1.0";
    763 }
    764 
    765 XMLDocumentParser::XMLDocumentParser(Document* document, FrameView* frameView)
    766     : ScriptableDocumentParser(document)
    767     , m_view(frameView)
    768     , m_context(0)
    769     , m_currentNode(document)
    770     , m_isCurrentlyParsing8BitChunk(false)
    771     , m_sawError(false)
    772     , m_sawCSS(false)
    773     , m_sawXSLTransform(false)
    774     , m_sawFirstElement(false)
    775     , m_isXHTMLDocument(false)
    776     , m_parserPaused(false)
    777     , m_requestingScript(false)
    778     , m_finishCalled(false)
    779     , m_xmlErrors(document)
    780     , m_pendingScript(0)
    781     , m_scriptStartPosition(TextPosition::belowRangePosition())
    782     , m_parsingFragment(false)
    783 {
    784     // This is XML being used as a document resource.
    785     UseCounter::count(*document, UseCounter::XMLDocument);
    786 }
    787 
    788 XMLDocumentParser::XMLDocumentParser(DocumentFragment* fragment, Element* parentElement, ParserContentPolicy parserContentPolicy)
    789     : ScriptableDocumentParser(&fragment->document(), parserContentPolicy)
    790     , m_view(0)
    791     , m_context(0)
    792     , m_currentNode(fragment)
    793     , m_isCurrentlyParsing8BitChunk(false)
    794     , m_sawError(false)
    795     , m_sawCSS(false)
    796     , m_sawXSLTransform(false)
    797     , m_sawFirstElement(false)
    798     , m_isXHTMLDocument(false)
    799     , m_parserPaused(false)
    800     , m_requestingScript(false)
    801     , m_finishCalled(false)
    802     , m_xmlErrors(&fragment->document())
    803     , m_pendingScript(0)
    804     , m_scriptStartPosition(TextPosition::belowRangePosition())
    805     , m_parsingFragment(true)
    806 {
    807     fragment->ref();
    808 
    809     // Add namespaces based on the parent node
    810     Vector<Element*> elemStack;
    811     while (parentElement) {
    812         elemStack.append(parentElement);
    813 
    814         ContainerNode* n = parentElement->parentNode();
    815         if (!n || !n->isElementNode())
    816             break;
    817         parentElement = toElement(n);
    818     }
    819 
    820     if (elemStack.isEmpty())
    821         return;
    822 
    823     for (; !elemStack.isEmpty(); elemStack.removeLast()) {
    824         Element* element = elemStack.last();
    825         if (element->hasAttributes()) {
    826             for (unsigned i = 0; i < element->attributeCount(); i++) {
    827                 const Attribute* attribute = element->attributeItem(i);
    828                 if (attribute->localName() == xmlnsAtom)
    829                     m_defaultNamespaceURI = attribute->value();
    830                 else if (attribute->prefix() == xmlnsAtom)
    831                     m_prefixToNamespaceMap.set(attribute->localName(), attribute->value());
    832             }
    833         }
    834     }
    835 
    836     // If the parent element is not in document tree, there may be no xmlns attribute; just default to the parent's namespace.
    837     if (m_defaultNamespaceURI.isNull() && !parentElement->inDocument())
    838         m_defaultNamespaceURI = parentElement->namespaceURI();
    839 }
    840 
    841 XMLParserContext::~XMLParserContext()
    842 {
    843     if (m_context->myDoc)
    844         xmlFreeDoc(m_context->myDoc);
    845     xmlFreeParserCtxt(m_context);
    846 }
    847 
    848 XMLDocumentParser::~XMLDocumentParser()
    849 {
    850     // The XMLDocumentParser will always be detached before being destroyed.
    851     ASSERT(m_currentNodeStack.isEmpty());
    852     ASSERT(!m_currentNode);
    853 
    854     // FIXME: m_pendingScript handling should be moved into XMLDocumentParser.cpp!
    855     if (m_pendingScript)
    856         m_pendingScript->removeClient(this);
    857 }
    858 
    859 void XMLDocumentParser::doWrite(const String& parseString)
    860 {
    861     ASSERT(!isDetached());
    862     if (!m_context)
    863         initializeParserContext();
    864 
    865     // Protect the libxml context from deletion during a callback
    866     RefPtr<XMLParserContext> context = m_context;
    867 
    868     // libXML throws an error if you try to switch the encoding for an empty string.
    869     if (parseString.length()) {
    870         // JavaScript may cause the parser to detach during parseChunk
    871         // keep this alive until this function is done.
    872         RefPtr<XMLDocumentParser> protect(this);
    873 
    874         XMLDocumentParserScope scope(document()->fetcher());
    875         TemporaryChange<bool> encodingScope(m_isCurrentlyParsing8BitChunk, parseString.is8Bit());
    876         parseChunk(context->context(), parseString);
    877 
    878         // JavaScript (which may be run under the parseChunk callstack) may
    879         // cause the parser to be stopped or detached.
    880         if (isStopped())
    881             return;
    882     }
    883 
    884     // FIXME: Why is this here?  And why is it after we process the passed source?
    885     if (document()->sawDecodingError()) {
    886         // If the decoder saw an error, report it as fatal (stops parsing)
    887         TextPosition position(OrdinalNumber::fromOneBasedInt(context->context()->input->line), OrdinalNumber::fromOneBasedInt(context->context()->input->col));
    888         handleError(XMLErrors::fatal, "Encoding error", position);
    889     }
    890 }
    891 
    892 struct _xmlSAX2Namespace {
    893     const xmlChar* prefix;
    894     const xmlChar* uri;
    895 };
    896 typedef struct _xmlSAX2Namespace xmlSAX2Namespace;
    897 
    898 static inline void handleNamespaceAttributes(Vector<Attribute>& prefixedAttributes, const xmlChar** libxmlNamespaces, int nbNamespaces, ExceptionState& exceptionState)
    899 {
    900     xmlSAX2Namespace* namespaces = reinterpret_cast<xmlSAX2Namespace*>(libxmlNamespaces);
    901     for (int i = 0; i < nbNamespaces; i++) {
    902         AtomicString namespaceQName = xmlnsAtom;
    903         AtomicString namespaceURI = toAtomicString(namespaces[i].uri);
    904         if (namespaces[i].prefix)
    905             namespaceQName = "xmlns:" + toString(namespaces[i].prefix);
    906 
    907         QualifiedName parsedName = anyName;
    908         if (!Element::parseAttributeName(parsedName, XMLNSNames::xmlnsNamespaceURI, namespaceQName, exceptionState))
    909             return;
    910 
    911         prefixedAttributes.append(Attribute(parsedName, namespaceURI));
    912     }
    913 }
    914 
    915 struct _xmlSAX2Attributes {
    916     const xmlChar* localname;
    917     const xmlChar* prefix;
    918     const xmlChar* uri;
    919     const xmlChar* value;
    920     const xmlChar* end;
    921 };
    922 typedef struct _xmlSAX2Attributes xmlSAX2Attributes;
    923 
    924 static inline void handleElementAttributes(Vector<Attribute>& prefixedAttributes, const xmlChar** libxmlAttributes, int nbAttributes, ExceptionState& exceptionState)
    925 {
    926     xmlSAX2Attributes* attributes = reinterpret_cast<xmlSAX2Attributes*>(libxmlAttributes);
    927     for (int i = 0; i < nbAttributes; i++) {
    928         int valueLength = static_cast<int>(attributes[i].end - attributes[i].value);
    929         AtomicString attrValue = toAtomicString(attributes[i].value, valueLength);
    930         String attrPrefix = toString(attributes[i].prefix);
    931         AtomicString attrURI = attrPrefix.isEmpty() ? AtomicString() : toAtomicString(attributes[i].uri);
    932         AtomicString attrQName = attrPrefix.isEmpty() ? toAtomicString(attributes[i].localname) : attrPrefix + ":" + toString(attributes[i].localname);
    933 
    934         QualifiedName parsedName = anyName;
    935         if (!Element::parseAttributeName(parsedName, attrURI, attrQName, exceptionState))
    936             return;
    937 
    938         prefixedAttributes.append(Attribute(parsedName, attrValue));
    939     }
    940 }
    941 
    942 void XMLDocumentParser::startElementNs(const AtomicString& localName, const AtomicString& prefix, const AtomicString& uri, int nbNamespaces,
    943     const xmlChar** libxmlNamespaces, int nbAttributes, int nbDefaulted, const xmlChar** libxmlAttributes)
    944 {
    945     if (isStopped())
    946         return;
    947 
    948     if (m_parserPaused) {
    949         m_pendingCallbacks.append(adoptPtr(new PendingStartElementNSCallback(localName, prefix, uri, nbNamespaces, libxmlNamespaces,
    950             nbAttributes, nbDefaulted, libxmlAttributes)));
    951         return;
    952     }
    953 
    954     exitText();
    955 
    956     AtomicString adjustedURI = uri;
    957     if (m_parsingFragment && adjustedURI.isNull()) {
    958         if (!prefix.isNull())
    959             adjustedURI = m_prefixToNamespaceMap.get(prefix);
    960         else
    961             adjustedURI = m_defaultNamespaceURI;
    962     }
    963 
    964     bool isFirstElement = !m_sawFirstElement;
    965     m_sawFirstElement = true;
    966 
    967     QualifiedName qName(prefix, localName, adjustedURI);
    968     RefPtr<Element> newElement = m_currentNode->document().createElement(qName, true);
    969     if (!newElement) {
    970         stopParsing();
    971         return;
    972     }
    973 
    974     Vector<Attribute> prefixedAttributes;
    975     TrackExceptionState exceptionState;
    976     handleNamespaceAttributes(prefixedAttributes, libxmlNamespaces, nbNamespaces, exceptionState);
    977     if (exceptionState.hadException()) {
    978         setAttributes(newElement.get(), prefixedAttributes, parserContentPolicy());
    979         stopParsing();
    980         return;
    981     }
    982 
    983     handleElementAttributes(prefixedAttributes, libxmlAttributes, nbAttributes, exceptionState);
    984     setAttributes(newElement.get(), prefixedAttributes, parserContentPolicy());
    985     if (exceptionState.hadException()) {
    986         stopParsing();
    987         return;
    988     }
    989 
    990     newElement->beginParsingChildren();
    991 
    992     ScriptLoader* scriptLoader = toScriptLoaderIfPossible(newElement.get());
    993     if (scriptLoader)
    994         m_scriptStartPosition = textPosition();
    995 
    996     m_currentNode->parserAppendChild(newElement.get());
    997 
    998     if (newElement->hasTagName(HTMLNames::templateTag))
    999         pushCurrentNode(toHTMLTemplateElement(newElement.get())->content());
   1000     else
   1001         pushCurrentNode(newElement.get());
   1002 
   1003     if (isHTMLHtmlElement(newElement.get()))
   1004         toHTMLHtmlElement(newElement)->insertedByParser();
   1005 
   1006     if (!m_parsingFragment && isFirstElement && document()->frame())
   1007         document()->frame()->loader().dispatchDocumentElementAvailable();
   1008 }
   1009 
   1010 void XMLDocumentParser::endElementNs()
   1011 {
   1012     if (isStopped())
   1013         return;
   1014 
   1015     if (m_parserPaused) {
   1016         m_pendingCallbacks.append(adoptPtr(new PendingEndElementNSCallback()));
   1017         return;
   1018     }
   1019 
   1020     // JavaScript can detach the parser.  Make sure this is not released
   1021     // before the end of this method.
   1022     RefPtr<XMLDocumentParser> protect(this);
   1023 
   1024     exitText();
   1025 
   1026     RefPtr<ContainerNode> n = m_currentNode;
   1027     n->finishParsingChildren();
   1028 
   1029     if (!scriptingContentIsAllowed(parserContentPolicy()) && n->isElementNode() && toScriptLoaderIfPossible(toElement(n))) {
   1030         popCurrentNode();
   1031         n->remove(IGNORE_EXCEPTION);
   1032         return;
   1033     }
   1034 
   1035     if (!n->isElementNode() || !m_view) {
   1036         popCurrentNode();
   1037         return;
   1038     }
   1039 
   1040     Element* element = toElement(n);
   1041 
   1042     // The element's parent may have already been removed from document.
   1043     // Parsing continues in this case, but scripts aren't executed.
   1044     if (!element->inDocument()) {
   1045         popCurrentNode();
   1046         return;
   1047     }
   1048 
   1049     ScriptLoader* scriptLoader = toScriptLoaderIfPossible(element);
   1050     if (!scriptLoader) {
   1051         popCurrentNode();
   1052         return;
   1053     }
   1054 
   1055     // Don't load external scripts for standalone documents (for now).
   1056     ASSERT(!m_pendingScript);
   1057     m_requestingScript = true;
   1058 
   1059     if (scriptLoader->prepareScript(m_scriptStartPosition, ScriptLoader::AllowLegacyTypeInTypeAttribute)) {
   1060         // FIXME: Script execution should be shared between
   1061         // the libxml2 and Qt XMLDocumentParser implementations.
   1062 
   1063         if (scriptLoader->readyToBeParserExecuted()) {
   1064             scriptLoader->executeScript(ScriptSourceCode(scriptLoader->scriptContent(), document()->url(), m_scriptStartPosition));
   1065         } else if (scriptLoader->willBeParserExecuted()) {
   1066             m_pendingScript = scriptLoader->resource();
   1067             m_scriptElement = element;
   1068             m_pendingScript->addClient(this);
   1069 
   1070             // m_pendingScript will be 0 if script was already loaded and addClient() executed it.
   1071             if (m_pendingScript)
   1072                 pauseParsing();
   1073         } else {
   1074             m_scriptElement = 0;
   1075         }
   1076 
   1077         // JavaScript may have detached the parser
   1078         if (isDetached())
   1079             return;
   1080     }
   1081     m_requestingScript = false;
   1082     popCurrentNode();
   1083 }
   1084 
   1085 void XMLDocumentParser::characters(const xmlChar* chars, int length)
   1086 {
   1087     if (isStopped())
   1088         return;
   1089 
   1090     if (m_parserPaused) {
   1091         m_pendingCallbacks.append(adoptPtr(new PendingCharactersCallback(chars, length)));
   1092         return;
   1093     }
   1094 
   1095     if (!m_leafTextNode)
   1096         enterText();
   1097     m_bufferedText.append(chars, length);
   1098 }
   1099 
   1100 void XMLDocumentParser::error(XMLErrors::ErrorType type, const char* message, va_list args)
   1101 {
   1102     if (isStopped())
   1103         return;
   1104 
   1105 #if HAVE(VASPRINTF)
   1106     char* formattedMessage;
   1107     if (vasprintf(&formattedMessage, message, args) == -1)
   1108         return;
   1109 #else
   1110     char formattedMessage[1024];
   1111     vsnprintf(formattedMessage, sizeof(formattedMessage) - 1, message, args);
   1112 #endif
   1113 
   1114     if (m_parserPaused) {
   1115         m_pendingCallbacks.append(adoptPtr(new PendingErrorCallback(type, reinterpret_cast<const xmlChar*>(formattedMessage), lineNumber(), columnNumber())));
   1116 #if HAVE(VASPRINTF)
   1117         free(formattedMessage);
   1118 #endif
   1119         return;
   1120     }
   1121 
   1122     handleError(type, formattedMessage, textPosition());
   1123 
   1124 #if HAVE(VASPRINTF)
   1125     free(formattedMessage);
   1126 #endif
   1127 }
   1128 
   1129 void XMLDocumentParser::processingInstruction(const String& target, const String& data)
   1130 {
   1131     if (isStopped())
   1132         return;
   1133 
   1134     if (m_parserPaused) {
   1135         m_pendingCallbacks.append(adoptPtr(new PendingProcessingInstructionCallback(target ,data)));
   1136         return;
   1137     }
   1138 
   1139     exitText();
   1140 
   1141     // ### handle exceptions
   1142     TrackExceptionState exceptionState;
   1143     RefPtr<ProcessingInstruction> pi = m_currentNode->document().createProcessingInstruction(target, data, exceptionState);
   1144     if (exceptionState.hadException())
   1145         return;
   1146 
   1147     pi->setCreatedByParser(true);
   1148 
   1149     m_currentNode->parserAppendChild(pi.get());
   1150 
   1151     pi->finishParsingChildren();
   1152 
   1153     if (pi->isCSS())
   1154         m_sawCSS = true;
   1155 
   1156     if (!RuntimeEnabledFeatures::xsltEnabled())
   1157         return;
   1158 
   1159     m_sawXSLTransform = !m_sawFirstElement && pi->isXSL();
   1160     if (m_sawXSLTransform && !document()->transformSourceDocument()) {
   1161         // This behavior is very tricky. We call stopParsing() here because we want to stop processing the document
   1162         // until we're ready to apply the transform, but we actually still want to be fed decoded string pieces to
   1163         // accumulate in m_originalSourceForTransform. So, we call stopParsing() here and
   1164         // check isStopped() in element callbacks.
   1165         // FIXME: This contradicts the contract of DocumentParser.
   1166         stopParsing();
   1167     }
   1168 }
   1169 
   1170 void XMLDocumentParser::cdataBlock(const String& text)
   1171 {
   1172     if (isStopped())
   1173         return;
   1174 
   1175     if (m_parserPaused) {
   1176         m_pendingCallbacks.append(adoptPtr(new PendingCDATABlockCallback(text)));
   1177         return;
   1178     }
   1179 
   1180     exitText();
   1181 
   1182     RefPtr<CDATASection> newNode = CDATASection::create(m_currentNode->document(), text);
   1183     m_currentNode->parserAppendChild(newNode.get());
   1184 }
   1185 
   1186 void XMLDocumentParser::comment(const String& text)
   1187 {
   1188     if (isStopped())
   1189         return;
   1190 
   1191     if (m_parserPaused) {
   1192         m_pendingCallbacks.append(adoptPtr(new PendingCommentCallback(text)));
   1193         return;
   1194     }
   1195 
   1196     exitText();
   1197 
   1198     RefPtr<Comment> newNode = Comment::create(m_currentNode->document(), text);
   1199     m_currentNode->parserAppendChild(newNode.get());
   1200 }
   1201 
   1202 enum StandaloneInfo {
   1203     StandaloneUnspecified = -2,
   1204     NoXMlDeclaration,
   1205     StandaloneNo,
   1206     StandaloneYes
   1207 };
   1208 
   1209 void XMLDocumentParser::startDocument(const String& version, const String& encoding, int standalone)
   1210 {
   1211     StandaloneInfo standaloneInfo = (StandaloneInfo)standalone;
   1212     if (standaloneInfo == NoXMlDeclaration) {
   1213         document()->setHasXMLDeclaration(false);
   1214         return;
   1215     }
   1216 
   1217     if (!version.isNull())
   1218         document()->setXMLVersion(version, ASSERT_NO_EXCEPTION);
   1219     if (standalone != StandaloneUnspecified)
   1220         document()->setXMLStandalone(standaloneInfo == StandaloneYes, ASSERT_NO_EXCEPTION);
   1221     if (!encoding.isNull())
   1222         document()->setXMLEncoding(encoding);
   1223     document()->setHasXMLDeclaration(true);
   1224 }
   1225 
   1226 void XMLDocumentParser::endDocument()
   1227 {
   1228     exitText();
   1229 }
   1230 
   1231 void XMLDocumentParser::internalSubset(const String& name, const String& externalID, const String& systemID)
   1232 {
   1233     if (isStopped())
   1234         return;
   1235 
   1236     if (m_parserPaused) {
   1237         m_pendingCallbacks.append(adoptPtr(new PendingInternalSubsetCallback(name, externalID, systemID)));
   1238         return;
   1239     }
   1240 
   1241     if (document())
   1242         document()->parserAppendChild(DocumentType::create(document(), name, externalID, systemID));
   1243 }
   1244 
   1245 static inline XMLDocumentParser* getParser(void* closure)
   1246 {
   1247     xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure);
   1248     return static_cast<XMLDocumentParser*>(ctxt->_private);
   1249 }
   1250 
   1251 static void startElementNsHandler(void* closure, const xmlChar* localName, const xmlChar* prefix, const xmlChar* uri, int nbNamespaces, const xmlChar** namespaces, int nbAttributes, int nbDefaulted, const xmlChar** libxmlAttributes)
   1252 {
   1253     getParser(closure)->startElementNs(toAtomicString(localName), toAtomicString(prefix), toAtomicString(uri), nbNamespaces, namespaces, nbAttributes, nbDefaulted, libxmlAttributes);
   1254 }
   1255 
   1256 static void endElementNsHandler(void* closure, const xmlChar*, const xmlChar*, const xmlChar*)
   1257 {
   1258     getParser(closure)->endElementNs();
   1259 }
   1260 
   1261 static void charactersHandler(void* closure, const xmlChar* chars, int length)
   1262 {
   1263     getParser(closure)->characters(chars, length);
   1264 }
   1265 
   1266 static void processingInstructionHandler(void* closure, const xmlChar* target, const xmlChar* data)
   1267 {
   1268     getParser(closure)->processingInstruction(toString(target), toString(data));
   1269 }
   1270 
   1271 static void cdataBlockHandler(void* closure, const xmlChar* text, int length)
   1272 {
   1273     getParser(closure)->cdataBlock(toString(text, length));
   1274 }
   1275 
   1276 static void commentHandler(void* closure, const xmlChar* text)
   1277 {
   1278     getParser(closure)->comment(toString(text));
   1279 }
   1280 
   1281 WTF_ATTRIBUTE_PRINTF(2, 3)
   1282 static void warningHandler(void* closure, const char* message, ...)
   1283 {
   1284     va_list args;
   1285     va_start(args, message);
   1286     getParser(closure)->error(XMLErrors::warning, message, args);
   1287     va_end(args);
   1288 }
   1289 
   1290 WTF_ATTRIBUTE_PRINTF(2, 3)
   1291 static void fatalErrorHandler(void* closure, const char* message, ...)
   1292 {
   1293     va_list args;
   1294     va_start(args, message);
   1295     getParser(closure)->error(XMLErrors::fatal, message, args);
   1296     va_end(args);
   1297 }
   1298 
   1299 WTF_ATTRIBUTE_PRINTF(2, 3)
   1300 static void normalErrorHandler(void* closure, const char* message, ...)
   1301 {
   1302     va_list args;
   1303     va_start(args, message);
   1304     getParser(closure)->error(XMLErrors::nonFatal, message, args);
   1305     va_end(args);
   1306 }
   1307 
   1308 // Using a static entity and marking it XML_INTERNAL_PREDEFINED_ENTITY is
   1309 // a hack to avoid malloc/free. Using a global variable like this could cause trouble
   1310 // if libxml implementation details were to change
   1311 static xmlChar sharedXHTMLEntityResult[9] = {0, 0, 0, 0, 0, 0, 0, 0, 0};
   1312 
   1313 static xmlEntityPtr sharedXHTMLEntity()
   1314 {
   1315     static xmlEntity entity;
   1316     if (!entity.type) {
   1317         entity.type = XML_ENTITY_DECL;
   1318         entity.orig = sharedXHTMLEntityResult;
   1319         entity.content = sharedXHTMLEntityResult;
   1320         entity.etype = XML_INTERNAL_PREDEFINED_ENTITY;
   1321     }
   1322     return &entity;
   1323 }
   1324 
   1325 static size_t convertUTF16EntityToUTF8(const UChar* utf16Entity, size_t numberOfCodeUnits, char* target, size_t targetSize)
   1326 {
   1327     const char* originalTarget = target;
   1328     WTF::Unicode::ConversionResult conversionResult = WTF::Unicode::convertUTF16ToUTF8(&utf16Entity,
   1329         utf16Entity + numberOfCodeUnits, &target, target + targetSize);
   1330     if (conversionResult != WTF::Unicode::conversionOK)
   1331         return 0;
   1332 
   1333     // Even though we must pass the length, libxml expects the entity string to be null terminated.
   1334     ASSERT(target > originalTarget + 1);
   1335     *target = '\0';
   1336     return target - originalTarget;
   1337 }
   1338 
   1339 static xmlEntityPtr getXHTMLEntity(const xmlChar* name)
   1340 {
   1341     UChar utf16DecodedEntity[4];
   1342     size_t numberOfCodeUnits = decodeNamedEntityToUCharArray(reinterpret_cast<const char*>(name), utf16DecodedEntity);
   1343     if (!numberOfCodeUnits)
   1344         return 0;
   1345 
   1346     ASSERT(numberOfCodeUnits <= 4);
   1347     size_t entityLengthInUTF8 = convertUTF16EntityToUTF8(utf16DecodedEntity, numberOfCodeUnits,
   1348         reinterpret_cast<char*>(sharedXHTMLEntityResult), WTF_ARRAY_LENGTH(sharedXHTMLEntityResult));
   1349     if (!entityLengthInUTF8)
   1350         return 0;
   1351 
   1352     xmlEntityPtr entity = sharedXHTMLEntity();
   1353     entity->length = entityLengthInUTF8;
   1354     entity->name = name;
   1355     return entity;
   1356 }
   1357 
   1358 static xmlEntityPtr getEntityHandler(void* closure, const xmlChar* name)
   1359 {
   1360     xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure);
   1361     xmlEntityPtr ent = xmlGetPredefinedEntity(name);
   1362     if (ent) {
   1363         ent->etype = XML_INTERNAL_PREDEFINED_ENTITY;
   1364         return ent;
   1365     }
   1366 
   1367     ent = xmlGetDocEntity(ctxt->myDoc, name);
   1368     if (!ent && getParser(closure)->isXHTMLDocument()) {
   1369         ent = getXHTMLEntity(name);
   1370         if (ent)
   1371             ent->etype = XML_INTERNAL_GENERAL_ENTITY;
   1372     }
   1373 
   1374     return ent;
   1375 }
   1376 
   1377 static void startDocumentHandler(void* closure)
   1378 {
   1379     xmlParserCtxt* ctxt = static_cast<xmlParserCtxt*>(closure);
   1380     XMLDocumentParser* parser = getParser(closure);
   1381     switchEncoding(ctxt, parser->isCurrentlyParsing8BitChunk());
   1382     parser->startDocument(toString(ctxt->version), toString(ctxt->encoding), ctxt->standalone);
   1383     xmlSAX2StartDocument(closure);
   1384 }
   1385 
   1386 static void endDocumentHandler(void* closure)
   1387 {
   1388     getParser(closure)->endDocument();
   1389     xmlSAX2EndDocument(closure);
   1390 }
   1391 
   1392 static void internalSubsetHandler(void* closure, const xmlChar* name, const xmlChar* externalID, const xmlChar* systemID)
   1393 {
   1394     getParser(closure)->internalSubset(toString(name), toString(externalID), toString(systemID));
   1395     xmlSAX2InternalSubset(closure, name, externalID, systemID);
   1396 }
   1397 
   1398 static void externalSubsetHandler(void* closure, const xmlChar*, const xmlChar* externalId, const xmlChar*)
   1399 {
   1400     String extId = toString(externalId);
   1401     if ((extId == "-//W3C//DTD XHTML 1.0 Transitional//EN")
   1402         || (extId == "-//W3C//DTD XHTML 1.1//EN")
   1403         || (extId == "-//W3C//DTD XHTML 1.0 Strict//EN")
   1404         || (extId == "-//W3C//DTD XHTML 1.0 Frameset//EN")
   1405         || (extId == "-//W3C//DTD XHTML Basic 1.0//EN")
   1406         || (extId == "-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN")
   1407         || (extId == "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN")
   1408         || (extId == "-//WAPFORUM//DTD XHTML Mobile 1.0//EN")
   1409         || (extId == "-//WAPFORUM//DTD XHTML Mobile 1.1//EN")
   1410         || (extId == "-//WAPFORUM//DTD XHTML Mobile 1.2//EN"))
   1411         getParser(closure)->setIsXHTMLDocument(true); // controls if we replace entities or not.
   1412 }
   1413 
   1414 static void ignorableWhitespaceHandler(void*, const xmlChar*, int)
   1415 {
   1416     // nothing to do, but we need this to work around a crasher
   1417     // http://bugzilla.gnome.org/show_bug.cgi?id=172255
   1418     // http://bugs.webkit.org/show_bug.cgi?id=5792
   1419 }
   1420 
   1421 void XMLDocumentParser::initializeParserContext(const CString& chunk)
   1422 {
   1423     xmlSAXHandler sax;
   1424     memset(&sax, 0, sizeof(sax));
   1425 
   1426     sax.error = normalErrorHandler;
   1427     sax.fatalError = fatalErrorHandler;
   1428     sax.characters = charactersHandler;
   1429     sax.processingInstruction = processingInstructionHandler;
   1430     sax.cdataBlock = cdataBlockHandler;
   1431     sax.comment = commentHandler;
   1432     sax.warning = warningHandler;
   1433     sax.startElementNs = startElementNsHandler;
   1434     sax.endElementNs = endElementNsHandler;
   1435     sax.getEntity = getEntityHandler;
   1436     sax.startDocument = startDocumentHandler;
   1437     sax.endDocument = endDocumentHandler;
   1438     sax.internalSubset = internalSubsetHandler;
   1439     sax.externalSubset = externalSubsetHandler;
   1440     sax.ignorableWhitespace = ignorableWhitespaceHandler;
   1441     sax.entityDecl = xmlSAX2EntityDecl;
   1442     sax.initialized = XML_SAX2_MAGIC;
   1443     DocumentParser::startParsing();
   1444     m_sawError = false;
   1445     m_sawCSS = false;
   1446     m_sawXSLTransform = false;
   1447     m_sawFirstElement = false;
   1448 
   1449     XMLDocumentParserScope scope(document()->fetcher());
   1450     if (m_parsingFragment)
   1451         m_context = XMLParserContext::createMemoryParser(&sax, this, chunk);
   1452     else {
   1453         ASSERT(!chunk.data());
   1454         m_context = XMLParserContext::createStringParser(&sax, this);
   1455     }
   1456 }
   1457 
   1458 void XMLDocumentParser::doEnd()
   1459 {
   1460     if (!isStopped()) {
   1461         if (m_context) {
   1462             // Tell libxml we're done.
   1463             {
   1464                 XMLDocumentParserScope scope(document()->fetcher());
   1465                 finishParsing(context());
   1466             }
   1467 
   1468             m_context = 0;
   1469         }
   1470     }
   1471 
   1472     bool xmlViewerMode = !m_sawError && !m_sawCSS && !m_sawXSLTransform && hasNoStyleInformation(document());
   1473     if (xmlViewerMode) {
   1474         XMLTreeViewer xmlTreeViewer(document());
   1475         xmlTreeViewer.transformDocumentToTreeView();
   1476     } else if (m_sawXSLTransform) {
   1477         xmlDocPtr doc = xmlDocPtrForString(document()->fetcher(), m_originalSourceForTransform.toString(), document()->url().string());
   1478         document()->setTransformSource(adoptPtr(new TransformSource(doc)));
   1479 
   1480         document()->setParsing(false); // Make the document think it's done, so it will apply XSL stylesheets.
   1481         document()->styleResolverChanged(RecalcStyleImmediately);
   1482 
   1483         // styleResolverChanged() call can detach the parser and null out its document.
   1484         // In that case, we just bail out.
   1485         if (isDetached())
   1486             return;
   1487 
   1488         document()->setParsing(true);
   1489         DocumentParser::stopParsing();
   1490     }
   1491 }
   1492 
   1493 xmlDocPtr xmlDocPtrForString(ResourceFetcher* fetcher, const String& source, const String& url)
   1494 {
   1495     if (source.isEmpty())
   1496         return 0;
   1497     // Parse in a single chunk into an xmlDocPtr
   1498     // FIXME: Hook up error handlers so that a failure to parse the main document results in
   1499     // good error messages.
   1500     XMLDocumentParserScope scope(fetcher, errorFunc, 0);
   1501     XMLParserInput input(source);
   1502     return xmlReadMemory(input.data(), input.size(), url.latin1().data(), input.encoding(), XSLT_PARSE_OPTIONS);
   1503 }
   1504 
   1505 OrdinalNumber XMLDocumentParser::lineNumber() const
   1506 {
   1507     return OrdinalNumber::fromOneBasedInt(context() ? context()->input->line : 1);
   1508 }
   1509 
   1510 OrdinalNumber XMLDocumentParser::columnNumber() const
   1511 {
   1512     return OrdinalNumber::fromOneBasedInt(context() ? context()->input->col : 1);
   1513 }
   1514 
   1515 TextPosition XMLDocumentParser::textPosition() const
   1516 {
   1517     xmlParserCtxtPtr context = this->context();
   1518     if (!context)
   1519         return TextPosition::minimumPosition();
   1520     return TextPosition(OrdinalNumber::fromOneBasedInt(context->input->line),
   1521                         OrdinalNumber::fromOneBasedInt(context->input->col));
   1522 }
   1523 
   1524 void XMLDocumentParser::stopParsing()
   1525 {
   1526     DocumentParser::stopParsing();
   1527     if (context())
   1528         xmlStopParser(context());
   1529 }
   1530 
   1531 void XMLDocumentParser::resumeParsing()
   1532 {
   1533     ASSERT(!isDetached());
   1534     ASSERT(m_parserPaused);
   1535 
   1536     m_parserPaused = false;
   1537 
   1538     // First, execute any pending callbacks
   1539     while (!m_pendingCallbacks.isEmpty()) {
   1540         OwnPtr<PendingCallback> callback = m_pendingCallbacks.takeFirst();
   1541         callback->call(this);
   1542 
   1543         // A callback paused the parser
   1544         if (m_parserPaused)
   1545             return;
   1546     }
   1547 
   1548     // Then, write any pending data
   1549     SegmentedString rest = m_pendingSrc;
   1550     m_pendingSrc.clear();
   1551     // There is normally only one string left, so toString() shouldn't copy.
   1552     // In any case, the XML parser runs on the main thread and it's OK if
   1553     // the passed string has more than one reference.
   1554     append(rest.toString().impl());
   1555 
   1556     // Finally, if finish() has been called and write() didn't result
   1557     // in any further callbacks being queued, call end()
   1558     if (m_finishCalled && m_pendingCallbacks.isEmpty())
   1559         end();
   1560 }
   1561 
   1562 bool XMLDocumentParser::appendFragmentSource(const String& chunk)
   1563 {
   1564     ASSERT(!m_context);
   1565     ASSERT(m_parsingFragment);
   1566 
   1567     CString chunkAsUtf8 = chunk.utf8();
   1568 
   1569     // libxml2 takes an int for a length, and therefore can't handle XML chunks larger than 2 GiB.
   1570     if (chunkAsUtf8.length() > INT_MAX)
   1571         return false;
   1572 
   1573     initializeParserContext(chunkAsUtf8);
   1574     xmlParseContent(context());
   1575     endDocument(); // Close any open text nodes.
   1576 
   1577     // FIXME: If this code is actually needed, it should probably move to finish()
   1578     // XMLDocumentParserQt has a similar check (m_stream.error() == QXmlStreamReader::PrematureEndOfDocumentError) in doEnd().
   1579     // Check if all the chunk has been processed.
   1580     long bytesProcessed = xmlByteConsumed(context());
   1581     if (bytesProcessed == -1 || ((unsigned long)bytesProcessed) != chunkAsUtf8.length()) {
   1582         // FIXME: I don't believe we can hit this case without also having seen an error or a null byte.
   1583         // If we hit this ASSERT, we've found a test case which demonstrates the need for this code.
   1584         ASSERT(m_sawError || (bytesProcessed >= 0 && !chunkAsUtf8.data()[bytesProcessed]));
   1585         return false;
   1586     }
   1587 
   1588     // No error if the chunk is well formed or it is not but we have no error.
   1589     return context()->wellFormed || !xmlCtxtGetLastError(context());
   1590 }
   1591 
   1592 // --------------------------------
   1593 
   1594 struct AttributeParseState {
   1595     HashMap<String, String> attributes;
   1596     bool gotAttributes;
   1597 };
   1598 
   1599 static void attributesStartElementNsHandler(void* closure, const xmlChar* xmlLocalName, const xmlChar* /*xmlPrefix*/,
   1600     const xmlChar* /*xmlURI*/, int /*nbNamespaces*/, const xmlChar** /*namespaces*/,
   1601     int nbAttributes, int /*nbDefaulted*/, const xmlChar** libxmlAttributes)
   1602 {
   1603     if (strcmp(reinterpret_cast<const char*>(xmlLocalName), "attrs") != 0)
   1604         return;
   1605 
   1606     xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure);
   1607     AttributeParseState* state = static_cast<AttributeParseState*>(ctxt->_private);
   1608 
   1609     state->gotAttributes = true;
   1610 
   1611     xmlSAX2Attributes* attributes = reinterpret_cast<xmlSAX2Attributes*>(libxmlAttributes);
   1612     for (int i = 0; i < nbAttributes; i++) {
   1613         String attrLocalName = toString(attributes[i].localname);
   1614         int valueLength = (int) (attributes[i].end - attributes[i].value);
   1615         String attrValue = toString(attributes[i].value, valueLength);
   1616         String attrPrefix = toString(attributes[i].prefix);
   1617         String attrQName = attrPrefix.isEmpty() ? attrLocalName : attrPrefix + ":" + attrLocalName;
   1618 
   1619         state->attributes.set(attrQName, attrValue);
   1620     }
   1621 }
   1622 
   1623 HashMap<String, String> parseAttributes(const String& string, bool& attrsOK)
   1624 {
   1625     AttributeParseState state;
   1626     state.gotAttributes = false;
   1627 
   1628     xmlSAXHandler sax;
   1629     memset(&sax, 0, sizeof(sax));
   1630     sax.startElementNs = attributesStartElementNsHandler;
   1631     sax.initialized = XML_SAX2_MAGIC;
   1632     RefPtr<XMLParserContext> parser = XMLParserContext::createStringParser(&sax, &state);
   1633     String parseString = "<?xml version=\"1.0\"?><attrs " + string + " />";
   1634     parseChunk(parser->context(), parseString);
   1635     finishParsing(parser->context());
   1636     attrsOK = state.gotAttributes;
   1637     return state.attributes;
   1638 }
   1639 
   1640 } // namespace WebCore
   1641