Home | History | Annotate | Download | only in parser
      1 /*
      2  * Copyright (C) 2000 Peter Kelly (pmk (at) post.com)
      3  * Copyright (C) 2005, 2006, 2008, 2014 Apple Inc. All rights reserved.
      4  * Copyright (C) 2006 Alexey Proskuryakov (ap (at) webkit.org)
      5  * Copyright (C) 2007 Samuel Weinig (sam (at) webkit.org)
      6  * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies)
      7  * Copyright (C) 2008 Holger Hans Peter Freyther
      8  * Copyright (C) 2008 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/)
      9  *
     10  * This library is free software; you can redistribute it and/or
     11  * modify it under the terms of the GNU Library General Public
     12  * License as published by the Free Software Foundation; either
     13  * version 2 of the License, or (at your option) any later version.
     14  *
     15  * This library is distributed in the hope that it will be useful,
     16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     18  * Library General Public License for more details.
     19  *
     20  * You should have received a copy of the GNU Library General Public License
     21  * along with this library; see the file COPYING.LIB.  If not, write to
     22  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     23  * Boston, MA 02110-1301, USA.
     24  */
     25 
     26 #include "config.h"
     27 #include "core/xml/parser/XMLDocumentParser.h"
     28 
     29 #include "bindings/core/v8/ExceptionState.h"
     30 #include "bindings/core/v8/ExceptionStatePlaceholder.h"
     31 #include "bindings/core/v8/ScriptController.h"
     32 #include "bindings/core/v8/ScriptSourceCode.h"
     33 #include "bindings/core/v8/V8Document.h"
     34 #include "core/FetchInitiatorTypeNames.h"
     35 #include "core/HTMLNames.h"
     36 #include "core/XMLNSNames.h"
     37 #include "core/dom/CDATASection.h"
     38 #include "core/dom/Comment.h"
     39 #include "core/dom/Document.h"
     40 #include "core/dom/DocumentFragment.h"
     41 #include "core/dom/DocumentType.h"
     42 #include "core/dom/ProcessingInstruction.h"
     43 #include "core/dom/ScriptLoader.h"
     44 #include "core/dom/TransformSource.h"
     45 #include "core/fetch/ResourceFetcher.h"
     46 #include "core/fetch/ScriptResource.h"
     47 #include "core/frame/LocalFrame.h"
     48 #include "core/frame/UseCounter.h"
     49 #include "core/html/HTMLHtmlElement.h"
     50 #include "core/html/HTMLTemplateElement.h"
     51 #include "core/html/parser/HTMLEntityParser.h"
     52 #include "core/html/parser/TextResourceDecoder.h"
     53 #include "core/loader/FrameLoader.h"
     54 #include "core/loader/ImageLoader.h"
     55 #include "core/svg/graphics/SVGImage.h"
     56 #include "core/xml/parser/SharedBufferReader.h"
     57 #include "core/xml/parser/XMLDocumentParserScope.h"
     58 #include "core/xml/parser/XMLParserInput.h"
     59 #include "platform/RuntimeEnabledFeatures.h"
     60 #include "platform/SharedBuffer.h"
     61 #include "platform/TraceEvent.h"
     62 #include "platform/network/ResourceError.h"
     63 #include "platform/network/ResourceRequest.h"
     64 #include "platform/network/ResourceResponse.h"
     65 #include "platform/weborigin/SecurityOrigin.h"
     66 #include "wtf/StringExtras.h"
     67 #include "wtf/TemporaryChange.h"
     68 #include "wtf/Threading.h"
     69 #include "wtf/Vector.h"
     70 #include "wtf/unicode/UTF8.h"
     71 #include <libxml/catalog.h>
     72 #include <libxml/parser.h>
     73 #include <libxml/parserInternals.h>
     74 #include <libxslt/xslt.h>
     75 
     76 namespace blink {
     77 
     78 using namespace HTMLNames;
     79 
     80 // FIXME: HTMLConstructionSite has a limit of 512, should these match?
     81 static const unsigned maxXMLTreeDepth = 5000;
     82 
     83 static inline String toString(const xmlChar* string, size_t length)
     84 {
     85     return String::fromUTF8(reinterpret_cast<const char*>(string), length);
     86 }
     87 
     88 static inline String toString(const xmlChar* string)
     89 {
     90     return String::fromUTF8(reinterpret_cast<const char*>(string));
     91 }
     92 
     93 static inline AtomicString toAtomicString(const xmlChar* string, size_t length)
     94 {
     95     return AtomicString::fromUTF8(reinterpret_cast<const char*>(string), length);
     96 }
     97 
     98 static inline AtomicString toAtomicString(const xmlChar* string)
     99 {
    100     return AtomicString::fromUTF8(reinterpret_cast<const char*>(string));
    101 }
    102 
    103 static inline bool hasNoStyleInformation(Document* document)
    104 {
    105     if (document->sawElementsInKnownNamespaces() || document->transformSourceDocument())
    106         return false;
    107 
    108     if (!document->frame() || !document->frame()->page())
    109         return false;
    110 
    111     if (document->frame()->tree().parent())
    112         return false; // This document is not in a top frame
    113 
    114     if (SVGImage::isInSVGImage(document))
    115         return false;
    116 
    117     return true;
    118 }
    119 
    120 class PendingStartElementNSCallback FINAL : public XMLDocumentParser::PendingCallback {
    121 public:
    122     PendingStartElementNSCallback(const AtomicString& localName, const AtomicString& prefix, const AtomicString& uri,
    123         int namespaceCount, const xmlChar** namespaces, int attributeCount, int defaultedCount, const xmlChar** attributes)
    124         : m_localName(localName)
    125         , m_prefix(prefix)
    126         , m_uri(uri)
    127         , m_namespaceCount(namespaceCount)
    128         , m_attributeCount(attributeCount)
    129         , m_defaultedCount(defaultedCount)
    130     {
    131         m_namespaces = static_cast<xmlChar**>(xmlMalloc(sizeof(xmlChar*) * namespaceCount * 2));
    132         for (int i = 0; i < namespaceCount * 2 ; ++i)
    133             m_namespaces[i] = xmlStrdup(namespaces[i]);
    134         m_attributes = static_cast<xmlChar**>(xmlMalloc(sizeof(xmlChar*) * attributeCount * 5));
    135         for (int i = 0; i < attributeCount; ++i) {
    136             // Each attribute has 5 elements in the array:
    137             // name, prefix, uri, value and an end pointer.
    138             for (int j = 0; j < 3; ++j)
    139                 m_attributes[i * 5 + j] = xmlStrdup(attributes[i * 5 + j]);
    140             int length = attributes[i * 5 + 4] - attributes[i * 5 + 3];
    141             m_attributes[i * 5 + 3] = xmlStrndup(attributes[i * 5 + 3], length);
    142             m_attributes[i * 5 + 4] = m_attributes[i * 5 + 3] + length;
    143         }
    144     }
    145 
    146     virtual ~PendingStartElementNSCallback()
    147     {
    148         for (int i = 0; i < m_namespaceCount * 2; ++i)
    149             xmlFree(m_namespaces[i]);
    150         xmlFree(m_namespaces);
    151         for (int i = 0; i < m_attributeCount; ++i)
    152             for (int j = 0; j < 4; ++j)
    153                 xmlFree(m_attributes[i * 5 + j]);
    154         xmlFree(m_attributes);
    155     }
    156 
    157     virtual void call(XMLDocumentParser* parser) OVERRIDE
    158     {
    159         parser->startElementNs(m_localName, m_prefix, m_uri,
    160             m_namespaceCount, const_cast<const xmlChar**>(m_namespaces),
    161             m_attributeCount, m_defaultedCount, const_cast<const xmlChar**>(m_attributes));
    162     }
    163 
    164 private:
    165     AtomicString m_localName;
    166     AtomicString m_prefix;
    167     AtomicString m_uri;
    168     int m_namespaceCount;
    169     xmlChar** m_namespaces;
    170     int m_attributeCount;
    171     int m_defaultedCount;
    172     xmlChar** m_attributes;
    173 };
    174 
    175 class PendingEndElementNSCallback FINAL : public XMLDocumentParser::PendingCallback {
    176 public:
    177     virtual void call(XMLDocumentParser* parser) OVERRIDE
    178     {
    179         parser->endElementNs();
    180     }
    181 };
    182 
    183 class PendingCharactersCallback FINAL : public XMLDocumentParser::PendingCallback {
    184 public:
    185     PendingCharactersCallback(const xmlChar* chars, int length)
    186         : m_chars(xmlStrndup(chars, length))
    187         , m_length(length)
    188     {
    189     }
    190 
    191     virtual ~PendingCharactersCallback()
    192     {
    193         xmlFree(m_chars);
    194     }
    195 
    196     virtual void call(XMLDocumentParser* parser) OVERRIDE
    197     {
    198         parser->characters(m_chars, m_length);
    199     }
    200 
    201 private:
    202     xmlChar* m_chars;
    203     int m_length;
    204 };
    205 
    206 class PendingProcessingInstructionCallback FINAL : public XMLDocumentParser::PendingCallback {
    207 public:
    208     PendingProcessingInstructionCallback(const String& target, const String& data)
    209         : m_target(target)
    210         , m_data(data)
    211     {
    212     }
    213 
    214     virtual void call(XMLDocumentParser* parser) OVERRIDE
    215     {
    216         parser->processingInstruction(m_target, m_data);
    217     }
    218 
    219 private:
    220     String m_target;
    221     String m_data;
    222 };
    223 
    224 class PendingCDATABlockCallback FINAL : public XMLDocumentParser::PendingCallback {
    225 public:
    226     explicit PendingCDATABlockCallback(const String& text) : m_text(text) { }
    227 
    228     virtual void call(XMLDocumentParser* parser) OVERRIDE
    229     {
    230         parser->cdataBlock(m_text);
    231     }
    232 
    233 private:
    234     String m_text;
    235 };
    236 
    237 class PendingCommentCallback FINAL : public XMLDocumentParser::PendingCallback {
    238 public:
    239     explicit PendingCommentCallback(const String& text) : m_text(text) { }
    240 
    241     virtual void call(XMLDocumentParser* parser) OVERRIDE
    242     {
    243         parser->comment(m_text);
    244     }
    245 
    246 private:
    247     String m_text;
    248 };
    249 
    250 class PendingInternalSubsetCallback FINAL : public XMLDocumentParser::PendingCallback {
    251 public:
    252     PendingInternalSubsetCallback(const String& name, const String& externalID, const String& systemID)
    253         : m_name(name)
    254         , m_externalID(externalID)
    255         , m_systemID(systemID)
    256     {
    257     }
    258 
    259     virtual void call(XMLDocumentParser* parser) OVERRIDE
    260     {
    261         parser->internalSubset(m_name, m_externalID, m_systemID);
    262     }
    263 
    264 private:
    265     String m_name;
    266     String m_externalID;
    267     String m_systemID;
    268 };
    269 
    270 class PendingErrorCallback FINAL : public XMLDocumentParser::PendingCallback {
    271 public:
    272     PendingErrorCallback(XMLErrors::ErrorType type, const xmlChar* message, OrdinalNumber lineNumber, OrdinalNumber columnNumber)
    273         : m_type(type)
    274         , m_message(xmlStrdup(message))
    275         , m_lineNumber(lineNumber)
    276         , m_columnNumber(columnNumber)
    277     {
    278     }
    279 
    280     virtual ~PendingErrorCallback()
    281     {
    282         xmlFree(m_message);
    283     }
    284 
    285     virtual void call(XMLDocumentParser* parser) OVERRIDE
    286     {
    287         parser->handleError(m_type, reinterpret_cast<char*>(m_message), TextPosition(m_lineNumber, m_columnNumber));
    288     }
    289 
    290 private:
    291     XMLErrors::ErrorType m_type;
    292     xmlChar* m_message;
    293     OrdinalNumber m_lineNumber;
    294     OrdinalNumber m_columnNumber;
    295 };
    296 
    297 void XMLDocumentParser::pushCurrentNode(ContainerNode* n)
    298 {
    299     ASSERT(n);
    300     ASSERT(m_currentNode);
    301 #if !ENABLE(OILPAN)
    302     if (n != document())
    303         n->ref();
    304 #endif
    305     m_currentNodeStack.append(m_currentNode);
    306     m_currentNode = n;
    307     if (m_currentNodeStack.size() > maxXMLTreeDepth)
    308         handleError(XMLErrors::ErrorTypeFatal, "Excessive node nesting.", textPosition());
    309 }
    310 
    311 void XMLDocumentParser::popCurrentNode()
    312 {
    313     if (!m_currentNode)
    314         return;
    315     ASSERT(m_currentNodeStack.size());
    316 #if !ENABLE(OILPAN)
    317     if (m_currentNode != document())
    318         m_currentNode->deref();
    319 #endif
    320     m_currentNode = m_currentNodeStack.last();
    321     m_currentNodeStack.removeLast();
    322 }
    323 
    324 void XMLDocumentParser::clearCurrentNodeStack()
    325 {
    326 #if !ENABLE(OILPAN)
    327     if (m_currentNode && m_currentNode != document())
    328         m_currentNode->deref();
    329 #endif
    330     m_currentNode = nullptr;
    331     m_leafTextNode = nullptr;
    332 
    333     if (m_currentNodeStack.size()) { // Aborted parsing.
    334 #if !ENABLE(OILPAN)
    335         for (size_t i = m_currentNodeStack.size() - 1; i != 0; --i)
    336             m_currentNodeStack[i]->deref();
    337         if (m_currentNodeStack[0] && m_currentNodeStack[0] != document())
    338             m_currentNodeStack[0]->deref();
    339 #endif
    340         m_currentNodeStack.clear();
    341     }
    342 }
    343 
    344 void XMLDocumentParser::insert(const SegmentedString&)
    345 {
    346     ASSERT_NOT_REACHED();
    347 }
    348 
    349 void XMLDocumentParser::append(PassRefPtr<StringImpl> inputSource)
    350 {
    351     SegmentedString source(inputSource);
    352     if (m_sawXSLTransform || !m_sawFirstElement)
    353         m_originalSourceForTransform.append(source);
    354 
    355     if (isStopped() || m_sawXSLTransform)
    356         return;
    357 
    358     if (m_parserPaused) {
    359         m_pendingSrc.append(source);
    360         return;
    361     }
    362 
    363     // JavaScript can detach the parser. Make sure this is not released
    364     // before the end of this method.
    365     RefPtrWillBeRawPtr<XMLDocumentParser> protect(this);
    366 
    367     doWrite(source.toString());
    368 }
    369 
    370 void XMLDocumentParser::handleError(XMLErrors::ErrorType type, const char* formattedMessage, TextPosition position)
    371 {
    372     m_xmlErrors.handleError(type, formattedMessage, position);
    373     if (type != XMLErrors::ErrorTypeWarning)
    374         m_sawError = true;
    375     if (type == XMLErrors::ErrorTypeFatal)
    376         stopParsing();
    377 }
    378 
    379 void XMLDocumentParser::enterText()
    380 {
    381     ASSERT(m_bufferedText.size() == 0);
    382     ASSERT(!m_leafTextNode);
    383     m_leafTextNode = Text::create(m_currentNode->document(), "");
    384     m_currentNode->parserAppendChild(m_leafTextNode.get());
    385 }
    386 
    387 void XMLDocumentParser::exitText()
    388 {
    389     if (isStopped())
    390         return;
    391 
    392     if (!m_leafTextNode)
    393         return;
    394 
    395     m_leafTextNode->appendData(toString(m_bufferedText.data(), m_bufferedText.size()));
    396     m_bufferedText.clear();
    397     m_leafTextNode = nullptr;
    398 }
    399 
    400 void XMLDocumentParser::detach()
    401 {
    402     clearCurrentNodeStack();
    403     ScriptableDocumentParser::detach();
    404 }
    405 
    406 void XMLDocumentParser::end()
    407 {
    408     TRACE_EVENT0("blink", "XMLDocumentParser::end");
    409     // XMLDocumentParserLibxml2 will do bad things to the document if doEnd() is called.
    410     // I don't believe XMLDocumentParserQt needs doEnd called in the fragment case.
    411     ASSERT(!m_parsingFragment);
    412 
    413     doEnd();
    414 
    415     // doEnd() call above can detach the parser and null out its document.
    416     // In that case, we just bail out.
    417     if (isDetached())
    418         return;
    419 
    420     // doEnd() could process a script tag, thus pausing parsing.
    421     if (m_parserPaused)
    422         return;
    423 
    424     if (m_sawError) {
    425         insertErrorMessageBlock();
    426     } else {
    427         exitText();
    428         document()->styleResolverChanged();
    429     }
    430 
    431     if (isParsing())
    432         prepareToStopParsing();
    433     document()->setReadyState(Document::Interactive);
    434     clearCurrentNodeStack();
    435     document()->finishedParsing();
    436 }
    437 
    438 void XMLDocumentParser::finish()
    439 {
    440     // FIXME: We should ASSERT(!m_parserStopped) here, since it does not
    441     // makes sense to call any methods on DocumentParser once it's been stopped.
    442     // However, FrameLoader::stop calls DocumentParser::finish unconditionally.
    443 
    444     // flush may ending up executing arbitrary script, and possibly detach the parser.
    445     RefPtrWillBeRawPtr<XMLDocumentParser> protect(this);
    446     flush();
    447     if (isDetached())
    448         return;
    449 
    450     if (m_parserPaused)
    451         m_finishCalled = true;
    452     else
    453         end();
    454 }
    455 
    456 void XMLDocumentParser::insertErrorMessageBlock()
    457 {
    458     m_xmlErrors.insertErrorMessageBlock();
    459 }
    460 
    461 void XMLDocumentParser::notifyFinished(Resource* unusedResource)
    462 {
    463     ASSERT_UNUSED(unusedResource, unusedResource == m_pendingScript);
    464 
    465     ScriptSourceCode sourceCode(m_pendingScript.get());
    466     bool errorOccurred = m_pendingScript->errorOccurred();
    467     bool wasCanceled = m_pendingScript->wasCanceled();
    468 
    469     m_pendingScript->removeClient(this);
    470     m_pendingScript = 0;
    471 
    472     RefPtrWillBeRawPtr<Element> e = m_scriptElement;
    473     m_scriptElement = nullptr;
    474 
    475     ScriptLoader* scriptLoader = toScriptLoaderIfPossible(e.get());
    476     ASSERT(scriptLoader);
    477 
    478     // JavaScript can detach this parser, make sure it's kept alive even if
    479     // detached.
    480     RefPtrWillBeRawPtr<XMLDocumentParser> protect(this);
    481 
    482     if (errorOccurred) {
    483         scriptLoader->dispatchErrorEvent();
    484     } else if (!wasCanceled) {
    485         scriptLoader->executeScript(sourceCode);
    486         scriptLoader->dispatchLoadEvent();
    487     }
    488 
    489     m_scriptElement = nullptr;
    490 
    491     if (!isDetached() && !m_requestingScript)
    492         resumeParsing();
    493 }
    494 
    495 bool XMLDocumentParser::isWaitingForScripts() const
    496 {
    497     return m_pendingScript;
    498 }
    499 
    500 void XMLDocumentParser::pauseParsing()
    501 {
    502     if (!m_parsingFragment)
    503         m_parserPaused = true;
    504 }
    505 
    506 bool XMLDocumentParser::parseDocumentFragment(const String& chunk, DocumentFragment* fragment, Element* contextElement, ParserContentPolicy parserContentPolicy)
    507 {
    508     if (!chunk.length())
    509         return true;
    510 
    511     // FIXME: We need to implement the HTML5 XML Fragment parsing algorithm:
    512     // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-xhtml-syntax.html#xml-fragment-parsing-algorithm
    513     // For now we have a hack for script/style innerHTML support:
    514     if (contextElement && (contextElement->hasLocalName(scriptTag.localName()) || contextElement->hasLocalName(styleTag.localName()))) {
    515         fragment->parserAppendChild(fragment->document().createTextNode(chunk));
    516         return true;
    517     }
    518 
    519     RefPtrWillBeRawPtr<XMLDocumentParser> parser = XMLDocumentParser::create(fragment, contextElement, parserContentPolicy);
    520     bool wellFormed = parser->appendFragmentSource(chunk);
    521 
    522     // Do not call finish(). Current finish() and doEnd() implementations touch
    523     // the main Document/loader and can cause crashes in the fragment case.
    524 
    525     // Allows ~DocumentParser to assert it was detached before destruction.
    526     parser->detach();
    527     // appendFragmentSource()'s wellFormed is more permissive than wellFormed().
    528     return wellFormed;
    529 }
    530 
    531 static int globalDescriptor = 0;
    532 static ThreadIdentifier libxmlLoaderThread = 0;
    533 
    534 static int matchFunc(const char*)
    535 {
    536     // Only match loads initiated due to uses of libxml2 from within
    537     // XMLDocumentParser to avoid interfering with client applications that also
    538     // use libxml2. http://bugs.webkit.org/show_bug.cgi?id=17353
    539     return XMLDocumentParserScope::currentFetcher && currentThread() == libxmlLoaderThread;
    540 }
    541 
    542 static inline void setAttributes(Element* element, Vector<Attribute>& attributeVector, ParserContentPolicy parserContentPolicy)
    543 {
    544     if (!scriptingContentIsAllowed(parserContentPolicy))
    545         element->stripScriptingAttributes(attributeVector);
    546     element->parserSetAttributes(attributeVector);
    547 }
    548 
    549 static void switchEncoding(xmlParserCtxtPtr ctxt, bool is8Bit)
    550 {
    551     // Hack around libxml2's lack of encoding overide support by manually
    552     // resetting the encoding to UTF-16 before every chunk. Otherwise libxml
    553     // will detect <?xml version="1.0" encoding="<encoding name>"?> blocks and
    554     // switch encodings, causing the parse to fail.
    555     if (is8Bit) {
    556         xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_8859_1);
    557         return;
    558     }
    559 
    560     const UChar BOM = 0xFEFF;
    561     const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char*>(&BOM);
    562     xmlSwitchEncoding(ctxt, BOMHighByte == 0xFF ? XML_CHAR_ENCODING_UTF16LE : XML_CHAR_ENCODING_UTF16BE);
    563 }
    564 
    565 static void parseChunk(xmlParserCtxtPtr ctxt, const String& chunk)
    566 {
    567     bool is8Bit = chunk.is8Bit();
    568     switchEncoding(ctxt, is8Bit);
    569     if (is8Bit)
    570         xmlParseChunk(ctxt, reinterpret_cast<const char*>(chunk.characters8()), sizeof(LChar) * chunk.length(), 0);
    571     else
    572         xmlParseChunk(ctxt, reinterpret_cast<const char*>(chunk.characters16()), sizeof(UChar) * chunk.length(), 0);
    573 }
    574 
    575 static void finishParsing(xmlParserCtxtPtr ctxt)
    576 {
    577     xmlParseChunk(ctxt, 0, 0, 1);
    578 }
    579 
    580 #define xmlParseChunk #error "Use parseChunk instead to select the correct encoding."
    581 
    582 static bool isLibxmlDefaultCatalogFile(const String& urlString)
    583 {
    584     // On non-Windows platforms libxml asks for this URL, the
    585     // "XML_XML_DEFAULT_CATALOG", on initialization.
    586     if (urlString == "file:///etc/xml/catalog")
    587         return true;
    588 
    589     // On Windows, libxml computes a URL relative to where its DLL resides.
    590     if (urlString.startsWith("file:///", false) && urlString.endsWith("/etc/catalog", false))
    591         return true;
    592     return false;
    593 }
    594 
    595 static bool shouldAllowExternalLoad(const KURL& url)
    596 {
    597     String urlString = url.string();
    598 
    599     // This isn't really necessary now that initializeLibXMLIfNecessary
    600     // disables catalog support in libxml, but keeping it for defense in depth.
    601     if (isLibxmlDefaultCatalogFile(url))
    602         return false;
    603 
    604     // The most common DTD. There isn't much point in hammering www.w3c.org by
    605     // requesting this URL for every XHTML document.
    606     if (urlString.startsWith("http://www.w3.org/TR/xhtml", false))
    607         return false;
    608 
    609     // Similarly, there isn't much point in requesting the SVG DTD.
    610     if (urlString.startsWith("http://www.w3.org/Graphics/SVG", false))
    611         return false;
    612 
    613     // The libxml doesn't give us a lot of context for deciding whether to allow
    614     // this request. In the worst case, this load could be for an external
    615     // entity and the resulting document could simply read the retrieved
    616     // content. If we had more context, we could potentially allow the parser to
    617     // load a DTD. As things stand, we take the conservative route and allow
    618     // same-origin requests only.
    619     if (!XMLDocumentParserScope::currentFetcher->document()->securityOrigin()->canRequest(url)) {
    620         XMLDocumentParserScope::currentFetcher->printAccessDeniedMessage(url);
    621         return false;
    622     }
    623 
    624     return true;
    625 }
    626 
    627 static void* openFunc(const char* uri)
    628 {
    629     ASSERT(XMLDocumentParserScope::currentFetcher);
    630     ASSERT(currentThread() == libxmlLoaderThread);
    631 
    632     KURL url(KURL(), uri);
    633 
    634     if (!shouldAllowExternalLoad(url))
    635         return &globalDescriptor;
    636 
    637     KURL finalURL;
    638     RefPtr<SharedBuffer> data;
    639 
    640     {
    641         ResourceFetcher* fetcher = XMLDocumentParserScope::currentFetcher;
    642         XMLDocumentParserScope scope(0);
    643         // FIXME: We should restore the original global error handler as well.
    644 
    645         if (fetcher->frame()) {
    646             FetchRequest request(ResourceRequest(url), FetchInitiatorTypeNames::xml, ResourceFetcher::defaultResourceOptions());
    647             ResourcePtr<Resource> resource = fetcher->fetchSynchronously(request);
    648             if (resource && !resource->errorOccurred()) {
    649                 data = resource->resourceBuffer();
    650                 finalURL = resource->response().url();
    651             }
    652         }
    653     }
    654 
    655     // We have to check the URL again after the load to catch redirects.
    656     // See <https://bugs.webkit.org/show_bug.cgi?id=21963>.
    657     if (!shouldAllowExternalLoad(finalURL))
    658         return &globalDescriptor;
    659 
    660     UseCounter::count(XMLDocumentParserScope::currentFetcher->document(), UseCounter::XMLExternalResourceLoad);
    661 
    662     return new SharedBufferReader(data);
    663 }
    664 
    665 static int readFunc(void* context, char* buffer, int len)
    666 {
    667     // Do 0-byte reads in case of a null descriptor
    668     if (context == &globalDescriptor)
    669         return 0;
    670 
    671     SharedBufferReader* data = static_cast<SharedBufferReader*>(context);
    672     return data->readData(buffer, len);
    673 }
    674 
    675 static int writeFunc(void*, const char*, int)
    676 {
    677     // Always just do 0-byte writes
    678     return 0;
    679 }
    680 
    681 static int closeFunc(void* context)
    682 {
    683     if (context != &globalDescriptor) {
    684         SharedBufferReader* data = static_cast<SharedBufferReader*>(context);
    685         delete data;
    686     }
    687     return 0;
    688 }
    689 
    690 static void errorFunc(void*, const char*, ...)
    691 {
    692     // FIXME: It would be nice to display error messages somewhere.
    693 }
    694 
    695 static void initializeLibXMLIfNecessary()
    696 {
    697     static bool didInit = false;
    698     if (didInit)
    699         return;
    700 
    701     // We don't want libxml to try and load catalogs.
    702     // FIXME: It's not nice to set global settings in libxml, embedders of Blink
    703     // could be trying to use libxml themselves.
    704     xmlCatalogSetDefaults(XML_CATA_ALLOW_NONE);
    705     xmlInitParser();
    706     xmlRegisterInputCallbacks(matchFunc, openFunc, readFunc, closeFunc);
    707     xmlRegisterOutputCallbacks(matchFunc, openFunc, writeFunc, closeFunc);
    708     libxmlLoaderThread = currentThread();
    709     didInit = true;
    710 }
    711 
    712 
    713 PassRefPtr<XMLParserContext> XMLParserContext::createStringParser(xmlSAXHandlerPtr handlers, void* userData)
    714 {
    715     initializeLibXMLIfNecessary();
    716     xmlParserCtxtPtr parser = xmlCreatePushParserCtxt(handlers, 0, 0, 0, 0);
    717     parser->_private = userData;
    718     parser->replaceEntities = true;
    719     return adoptRef(new XMLParserContext(parser));
    720 }
    721 
    722 // Chunk should be encoded in UTF-8
    723 PassRefPtr<XMLParserContext> XMLParserContext::createMemoryParser(xmlSAXHandlerPtr handlers, void* userData, const CString& chunk)
    724 {
    725     initializeLibXMLIfNecessary();
    726 
    727     // appendFragmentSource() checks that the length doesn't overflow an int.
    728     xmlParserCtxtPtr parser = xmlCreateMemoryParserCtxt(chunk.data(), chunk.length());
    729 
    730     if (!parser)
    731         return nullptr;
    732 
    733     // Copy the sax handler
    734     memcpy(parser->sax, handlers, sizeof(xmlSAXHandler));
    735 
    736     // Set parser options.
    737     // XML_PARSE_NODICT: default dictionary option.
    738     // XML_PARSE_NOENT: force entities substitutions.
    739     xmlCtxtUseOptions(parser, XML_PARSE_NODICT | XML_PARSE_NOENT);
    740 
    741     // Internal initialization
    742     parser->sax2 = 1;
    743     parser->instate = XML_PARSER_CONTENT; // We are parsing a CONTENT
    744     parser->depth = 0;
    745     parser->str_xml = xmlDictLookup(parser->dict, BAD_CAST "xml", 3);
    746     parser->str_xmlns = xmlDictLookup(parser->dict, BAD_CAST "xmlns", 5);
    747     parser->str_xml_ns = xmlDictLookup(parser->dict, XML_XML_NAMESPACE, 36);
    748     parser->_private = userData;
    749 
    750     return adoptRef(new XMLParserContext(parser));
    751 }
    752 
    753 // --------------------------------
    754 
    755 bool XMLDocumentParser::supportsXMLVersion(const String& version)
    756 {
    757     return version == "1.0";
    758 }
    759 
    760 XMLDocumentParser::XMLDocumentParser(Document& document, FrameView* frameView)
    761     : ScriptableDocumentParser(document)
    762     , m_hasView(frameView)
    763     , m_context(nullptr)
    764     , m_currentNode(&document)
    765     , m_isCurrentlyParsing8BitChunk(false)
    766     , m_sawError(false)
    767     , m_sawCSS(false)
    768     , m_sawXSLTransform(false)
    769     , m_sawFirstElement(false)
    770     , m_isXHTMLDocument(false)
    771     , m_parserPaused(false)
    772     , m_requestingScript(false)
    773     , m_finishCalled(false)
    774     , m_xmlErrors(&document)
    775     , m_pendingScript(0)
    776     , m_scriptStartPosition(TextPosition::belowRangePosition())
    777     , m_parsingFragment(false)
    778 {
    779     // This is XML being used as a document resource.
    780     if (frameView && document.isXMLDocument())
    781         UseCounter::count(document, UseCounter::XMLDocument);
    782 }
    783 
    784 XMLDocumentParser::XMLDocumentParser(DocumentFragment* fragment, Element* parentElement, ParserContentPolicy parserContentPolicy)
    785     : ScriptableDocumentParser(fragment->document(), parserContentPolicy)
    786     , m_hasView(false)
    787     , m_context(nullptr)
    788     , m_currentNode(fragment)
    789     , m_isCurrentlyParsing8BitChunk(false)
    790     , m_sawError(false)
    791     , m_sawCSS(false)
    792     , m_sawXSLTransform(false)
    793     , m_sawFirstElement(false)
    794     , m_isXHTMLDocument(false)
    795     , m_parserPaused(false)
    796     , m_requestingScript(false)
    797     , m_finishCalled(false)
    798     , m_xmlErrors(&fragment->document())
    799     , m_pendingScript(0)
    800     , m_scriptStartPosition(TextPosition::belowRangePosition())
    801     , m_parsingFragment(true)
    802 {
    803 #if !ENABLE(OILPAN)
    804     fragment->ref();
    805 #endif
    806 
    807     // Add namespaces based on the parent node
    808     WillBeHeapVector<RawPtrWillBeMember<Element> > elemStack;
    809     while (parentElement) {
    810         elemStack.append(parentElement);
    811 
    812         Element* grandParentElement = parentElement->parentElement();
    813         if (!grandParentElement)
    814             break;
    815         parentElement = grandParentElement;
    816     }
    817 
    818     if (elemStack.isEmpty())
    819         return;
    820 
    821     for (; !elemStack.isEmpty(); elemStack.removeLast()) {
    822         Element* element = elemStack.last();
    823         AttributeCollection attributes = element->attributes();
    824         AttributeCollection::iterator end = attributes.end();
    825         for (AttributeCollection::iterator it = attributes.begin(); it != end; ++it) {
    826             if (it->localName() == xmlnsAtom)
    827                 m_defaultNamespaceURI = it->value();
    828             else if (it->prefix() == xmlnsAtom)
    829                 m_prefixToNamespaceMap.set(it->localName(), it->value());
    830         }
    831     }
    832 
    833     // If the parent element is not in document tree, there may be no xmlns attribute; just default to the parent's namespace.
    834     if (m_defaultNamespaceURI.isNull() && !parentElement->inDocument())
    835         m_defaultNamespaceURI = parentElement->namespaceURI();
    836 }
    837 
    838 XMLParserContext::~XMLParserContext()
    839 {
    840     if (m_context->myDoc)
    841         xmlFreeDoc(m_context->myDoc);
    842     xmlFreeParserCtxt(m_context);
    843 }
    844 
    845 XMLDocumentParser::~XMLDocumentParser()
    846 {
    847 #if !ENABLE(OILPAN)
    848     // The XMLDocumentParser will always be detached before being destroyed.
    849     ASSERT(m_currentNodeStack.isEmpty());
    850     ASSERT(!m_currentNode);
    851 #endif
    852 
    853     // FIXME: m_pendingScript handling should be moved into XMLDocumentParser.cpp!
    854     if (m_pendingScript)
    855         m_pendingScript->removeClient(this);
    856 }
    857 
    858 void XMLDocumentParser::trace(Visitor* visitor)
    859 {
    860     visitor->trace(m_currentNode);
    861 #if ENABLE(OILPAN)
    862     visitor->trace(m_currentNodeStack);
    863 #endif
    864     visitor->trace(m_leafTextNode);
    865     visitor->trace(m_xmlErrors);
    866     visitor->trace(m_scriptElement);
    867     ScriptableDocumentParser::trace(visitor);
    868 }
    869 
    870 void XMLDocumentParser::doWrite(const String& parseString)
    871 {
    872     TRACE_EVENT0("blink", "XMLDocumentParser::doWrite");
    873     ASSERT(!isDetached());
    874     if (!m_context)
    875         initializeParserContext();
    876 
    877     // Protect the libxml context from deletion during a callback
    878     RefPtr<XMLParserContext> context = m_context;
    879 
    880     // libXML throws an error if you try to switch the encoding for an empty
    881     // string.
    882     if (parseString.length()) {
    883         // JavaScript may cause the parser to detach during parseChunk
    884         // keep this alive until this function is done.
    885         RefPtrWillBeRawPtr<XMLDocumentParser> protect(this);
    886 
    887         XMLDocumentParserScope scope(document()->fetcher());
    888         TemporaryChange<bool> encodingScope(m_isCurrentlyParsing8BitChunk, parseString.is8Bit());
    889         parseChunk(context->context(), parseString);
    890 
    891         // JavaScript (which may be run under the parseChunk callstack) may
    892         // cause the parser to be stopped or detached.
    893         if (isStopped())
    894             return;
    895     }
    896 
    897     // FIXME: Why is this here? And why is it after we process the passed
    898     // source?
    899     if (document()->sawDecodingError()) {
    900         // If the decoder saw an error, report it as fatal (stops parsing)
    901         TextPosition position(OrdinalNumber::fromOneBasedInt(context->context()->input->line), OrdinalNumber::fromOneBasedInt(context->context()->input->col));
    902         handleError(XMLErrors::ErrorTypeFatal, "Encoding error", position);
    903     }
    904 }
    905 
    906 struct xmlSAX2Namespace {
    907     const xmlChar* prefix;
    908     const xmlChar* uri;
    909 };
    910 
    911 static inline void handleNamespaceAttributes(Vector<Attribute>& prefixedAttributes, const xmlChar** libxmlNamespaces, int nbNamespaces, ExceptionState& exceptionState)
    912 {
    913     xmlSAX2Namespace* namespaces = reinterpret_cast<xmlSAX2Namespace*>(libxmlNamespaces);
    914     for (int i = 0; i < nbNamespaces; ++i) {
    915         AtomicString namespaceQName = xmlnsAtom;
    916         AtomicString namespaceURI = toAtomicString(namespaces[i].uri);
    917         if (namespaces[i].prefix)
    918             namespaceQName = WTF::xmlnsWithColon + namespaces[i].prefix;
    919 
    920         QualifiedName parsedName = anyName;
    921         if (!Element::parseAttributeName(parsedName, XMLNSNames::xmlnsNamespaceURI, namespaceQName, exceptionState))
    922             return;
    923 
    924         prefixedAttributes.append(Attribute(parsedName, namespaceURI));
    925     }
    926 }
    927 
    928 struct xmlSAX2Attributes {
    929     const xmlChar* localname;
    930     const xmlChar* prefix;
    931     const xmlChar* uri;
    932     const xmlChar* value;
    933     const xmlChar* end;
    934 };
    935 
    936 static inline void handleElementAttributes(Vector<Attribute>& prefixedAttributes, const xmlChar** libxmlAttributes, int nbAttributes, ExceptionState& exceptionState)
    937 {
    938     xmlSAX2Attributes* attributes = reinterpret_cast<xmlSAX2Attributes*>(libxmlAttributes);
    939     for (int i = 0; i < nbAttributes; ++i) {
    940         int valueLength = static_cast<int>(attributes[i].end - attributes[i].value);
    941         AtomicString attrValue = toAtomicString(attributes[i].value, valueLength);
    942         String attrPrefix = toString(attributes[i].prefix);
    943         AtomicString attrURI = attrPrefix.isEmpty() ? AtomicString() : toAtomicString(attributes[i].uri);
    944         AtomicString attrQName = attrPrefix.isEmpty() ? toAtomicString(attributes[i].localname) : attrPrefix + ":" + toString(attributes[i].localname);
    945 
    946         QualifiedName parsedName = anyName;
    947         if (!Element::parseAttributeName(parsedName, attrURI, attrQName, exceptionState))
    948             return;
    949 
    950         prefixedAttributes.append(Attribute(parsedName, attrValue));
    951     }
    952 }
    953 
    954 void XMLDocumentParser::startElementNs(const AtomicString& localName, const AtomicString& prefix, const AtomicString& uri, int nbNamespaces,
    955     const xmlChar** libxmlNamespaces, int nbAttributes, int nbDefaulted, const xmlChar** libxmlAttributes)
    956 {
    957     if (isStopped())
    958         return;
    959 
    960     if (m_parserPaused) {
    961         m_pendingCallbacks.append(adoptPtr(new PendingStartElementNSCallback(localName, prefix, uri, nbNamespaces, libxmlNamespaces,
    962             nbAttributes, nbDefaulted, libxmlAttributes)));
    963         return;
    964     }
    965 
    966     exitText();
    967 
    968     AtomicString adjustedURI = uri;
    969     if (m_parsingFragment && adjustedURI.isNull()) {
    970         if (!prefix.isNull())
    971             adjustedURI = m_prefixToNamespaceMap.get(prefix);
    972         else
    973             adjustedURI = m_defaultNamespaceURI;
    974     }
    975 
    976     bool isFirstElement = !m_sawFirstElement;
    977     m_sawFirstElement = true;
    978 
    979     QualifiedName qName(prefix, localName, adjustedURI);
    980     RefPtrWillBeRawPtr<Element> newElement = m_currentNode->document().createElement(qName, true);
    981     if (!newElement) {
    982         stopParsing();
    983         return;
    984     }
    985 
    986     Vector<Attribute> prefixedAttributes;
    987     TrackExceptionState exceptionState;
    988     handleNamespaceAttributes(prefixedAttributes, libxmlNamespaces, nbNamespaces, exceptionState);
    989     if (exceptionState.hadException()) {
    990         setAttributes(newElement.get(), prefixedAttributes, parserContentPolicy());
    991         stopParsing();
    992         return;
    993     }
    994 
    995     handleElementAttributes(prefixedAttributes, libxmlAttributes, nbAttributes, exceptionState);
    996     setAttributes(newElement.get(), prefixedAttributes, parserContentPolicy());
    997     if (exceptionState.hadException()) {
    998         stopParsing();
    999         return;
   1000     }
   1001 
   1002     newElement->beginParsingChildren();
   1003 
   1004     ScriptLoader* scriptLoader = toScriptLoaderIfPossible(newElement.get());
   1005     if (scriptLoader)
   1006         m_scriptStartPosition = textPosition();
   1007 
   1008     m_currentNode->parserAppendChild(newElement.get());
   1009 
   1010     // Event handlers may synchronously trigger removal of the
   1011     // document and cancellation of this parser.
   1012     if (isStopped()) {
   1013         stopParsing();
   1014         return;
   1015     }
   1016 
   1017     if (isHTMLTemplateElement(*newElement))
   1018         pushCurrentNode(toHTMLTemplateElement(*newElement).content());
   1019     else
   1020         pushCurrentNode(newElement.get());
   1021 
   1022     if (isHTMLHtmlElement(*newElement))
   1023         toHTMLHtmlElement(*newElement).insertedByParser();
   1024 
   1025     if (!m_parsingFragment && isFirstElement && document()->frame())
   1026         document()->frame()->loader().dispatchDocumentElementAvailable();
   1027 }
   1028 
   1029 void XMLDocumentParser::endElementNs()
   1030 {
   1031     if (isStopped())
   1032         return;
   1033 
   1034     if (m_parserPaused) {
   1035         m_pendingCallbacks.append(adoptPtr(new PendingEndElementNSCallback()));
   1036         return;
   1037     }
   1038 
   1039     // JavaScript can detach the parser. Make sure this is not released before
   1040     // the end of this method.
   1041     RefPtrWillBeRawPtr<XMLDocumentParser> protect(this);
   1042 
   1043     exitText();
   1044 
   1045     RefPtrWillBeRawPtr<ContainerNode> n = m_currentNode;
   1046     if (m_currentNode->isElementNode())
   1047         toElement(n.get())->finishParsingChildren();
   1048 
   1049     if (!scriptingContentIsAllowed(parserContentPolicy()) && n->isElementNode() && toScriptLoaderIfPossible(toElement(n))) {
   1050         popCurrentNode();
   1051         n->remove(IGNORE_EXCEPTION);
   1052         return;
   1053     }
   1054 
   1055     if (!n->isElementNode() || !m_hasView) {
   1056         popCurrentNode();
   1057         return;
   1058     }
   1059 
   1060     Element* element = toElement(n);
   1061 
   1062     // The element's parent may have already been removed from document.
   1063     // Parsing continues in this case, but scripts aren't executed.
   1064     if (!element->inDocument()) {
   1065         popCurrentNode();
   1066         return;
   1067     }
   1068 
   1069     ScriptLoader* scriptLoader = toScriptLoaderIfPossible(element);
   1070     if (!scriptLoader) {
   1071         popCurrentNode();
   1072         return;
   1073     }
   1074 
   1075     // Don't load external scripts for standalone documents (for now).
   1076     ASSERT(!m_pendingScript);
   1077     m_requestingScript = true;
   1078 
   1079     if (scriptLoader->prepareScript(m_scriptStartPosition, ScriptLoader::AllowLegacyTypeInTypeAttribute)) {
   1080         // FIXME: Script execution should be shared between
   1081         // the libxml2 and Qt XMLDocumentParser implementations.
   1082 
   1083         if (scriptLoader->readyToBeParserExecuted()) {
   1084             scriptLoader->executeScript(ScriptSourceCode(scriptLoader->scriptContent(), document()->url(), m_scriptStartPosition));
   1085         } else if (scriptLoader->willBeParserExecuted()) {
   1086             m_pendingScript = scriptLoader->resource();
   1087             m_scriptElement = element;
   1088             m_pendingScript->addClient(this);
   1089 
   1090             // m_pendingScript will be 0 if script was already loaded and
   1091             // addClient() executed it.
   1092             if (m_pendingScript)
   1093                 pauseParsing();
   1094         } else {
   1095             m_scriptElement = nullptr;
   1096         }
   1097 
   1098         // JavaScript may have detached the parser
   1099         if (isDetached())
   1100             return;
   1101     }
   1102     m_requestingScript = false;
   1103     popCurrentNode();
   1104 }
   1105 
   1106 void XMLDocumentParser::characters(const xmlChar* chars, int length)
   1107 {
   1108     if (isStopped())
   1109         return;
   1110 
   1111     if (m_parserPaused) {
   1112         m_pendingCallbacks.append(adoptPtr(new PendingCharactersCallback(chars, length)));
   1113         return;
   1114     }
   1115 
   1116     if (!m_leafTextNode)
   1117         enterText();
   1118     m_bufferedText.append(chars, length);
   1119 }
   1120 
   1121 void XMLDocumentParser::error(XMLErrors::ErrorType type, const char* message, va_list args)
   1122 {
   1123     if (isStopped())
   1124         return;
   1125 
   1126     char formattedMessage[1024];
   1127     vsnprintf(formattedMessage, sizeof(formattedMessage) - 1, message, args);
   1128 
   1129     if (m_parserPaused) {
   1130         m_pendingCallbacks.append(adoptPtr(new PendingErrorCallback(type, reinterpret_cast<const xmlChar*>(formattedMessage), lineNumber(), columnNumber())));
   1131         return;
   1132     }
   1133 
   1134     handleError(type, formattedMessage, textPosition());
   1135 }
   1136 
   1137 void XMLDocumentParser::processingInstruction(const String& target, const String& data)
   1138 {
   1139     if (isStopped())
   1140         return;
   1141 
   1142     if (m_parserPaused) {
   1143         m_pendingCallbacks.append(adoptPtr(new PendingProcessingInstructionCallback(target, data)));
   1144         return;
   1145     }
   1146 
   1147     exitText();
   1148 
   1149     // ### handle exceptions
   1150     TrackExceptionState exceptionState;
   1151     RefPtrWillBeRawPtr<ProcessingInstruction> pi = m_currentNode->document().createProcessingInstruction(target, data, exceptionState);
   1152     if (exceptionState.hadException())
   1153         return;
   1154 
   1155     pi->setCreatedByParser(true);
   1156 
   1157     m_currentNode->parserAppendChild(pi.get());
   1158 
   1159     pi->setCreatedByParser(false);
   1160 
   1161     if (pi->isCSS())
   1162         m_sawCSS = true;
   1163 
   1164     if (!RuntimeEnabledFeatures::xsltEnabled())
   1165         return;
   1166 
   1167     m_sawXSLTransform = !m_sawFirstElement && pi->isXSL();
   1168     if (m_sawXSLTransform && !document()->transformSourceDocument()) {
   1169         // This behavior is very tricky. We call stopParsing() here because we
   1170         // want to stop processing the document until we're ready to apply the
   1171         // transform, but we actually still want to be fed decoded string pieces
   1172         // to accumulate in m_originalSourceForTransform. So, we call
   1173         // stopParsing() here and check isStopped() in element callbacks.
   1174         // FIXME: This contradicts the contract of DocumentParser.
   1175         stopParsing();
   1176     }
   1177 }
   1178 
   1179 void XMLDocumentParser::cdataBlock(const String& text)
   1180 {
   1181     if (isStopped())
   1182         return;
   1183 
   1184     if (m_parserPaused) {
   1185         m_pendingCallbacks.append(adoptPtr(new PendingCDATABlockCallback(text)));
   1186         return;
   1187     }
   1188 
   1189     exitText();
   1190 
   1191     m_currentNode->parserAppendChild(CDATASection::create(m_currentNode->document(), text));
   1192 }
   1193 
   1194 void XMLDocumentParser::comment(const String& text)
   1195 {
   1196     if (isStopped())
   1197         return;
   1198 
   1199     if (m_parserPaused) {
   1200         m_pendingCallbacks.append(adoptPtr(new PendingCommentCallback(text)));
   1201         return;
   1202     }
   1203 
   1204     exitText();
   1205 
   1206     m_currentNode->parserAppendChild(Comment::create(m_currentNode->document(), text));
   1207 }
   1208 
   1209 enum StandaloneInfo {
   1210     StandaloneUnspecified = -2,
   1211     NoXMlDeclaration,
   1212     StandaloneNo,
   1213     StandaloneYes
   1214 };
   1215 
   1216 void XMLDocumentParser::startDocument(const String& version, const String& encoding, int standalone)
   1217 {
   1218     StandaloneInfo standaloneInfo = static_cast<StandaloneInfo>(standalone);
   1219     if (standaloneInfo == NoXMlDeclaration) {
   1220         document()->setHasXMLDeclaration(false);
   1221         return;
   1222     }
   1223 
   1224     if (!version.isNull())
   1225         document()->setXMLVersion(version, ASSERT_NO_EXCEPTION);
   1226     if (standalone != StandaloneUnspecified)
   1227         document()->setXMLStandalone(standaloneInfo == StandaloneYes, ASSERT_NO_EXCEPTION);
   1228     if (!encoding.isNull())
   1229         document()->setXMLEncoding(encoding);
   1230     document()->setHasXMLDeclaration(true);
   1231 }
   1232 
   1233 void XMLDocumentParser::endDocument()
   1234 {
   1235     exitText();
   1236 }
   1237 
   1238 void XMLDocumentParser::internalSubset(const String& name, const String& externalID, const String& systemID)
   1239 {
   1240     if (isStopped())
   1241         return;
   1242 
   1243     if (m_parserPaused) {
   1244         m_pendingCallbacks.append(adoptPtr(new PendingInternalSubsetCallback(name, externalID, systemID)));
   1245         return;
   1246     }
   1247 
   1248     if (document())
   1249         document()->parserAppendChild(DocumentType::create(document(), name, externalID, systemID));
   1250 }
   1251 
   1252 static inline XMLDocumentParser* getParser(void* closure)
   1253 {
   1254     xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure);
   1255     return static_cast<XMLDocumentParser*>(ctxt->_private);
   1256 }
   1257 
   1258 static void startElementNsHandler(void* closure, const xmlChar* localName, const xmlChar* prefix, const xmlChar* uri, int nbNamespaces, const xmlChar** namespaces, int nbAttributes, int nbDefaulted, const xmlChar** libxmlAttributes)
   1259 {
   1260     getParser(closure)->startElementNs(toAtomicString(localName), toAtomicString(prefix), toAtomicString(uri), nbNamespaces, namespaces, nbAttributes, nbDefaulted, libxmlAttributes);
   1261 }
   1262 
   1263 static void endElementNsHandler(void* closure, const xmlChar*, const xmlChar*, const xmlChar*)
   1264 {
   1265     getParser(closure)->endElementNs();
   1266 }
   1267 
   1268 static void charactersHandler(void* closure, const xmlChar* chars, int length)
   1269 {
   1270     getParser(closure)->characters(chars, length);
   1271 }
   1272 
   1273 static void processingInstructionHandler(void* closure, const xmlChar* target, const xmlChar* data)
   1274 {
   1275     getParser(closure)->processingInstruction(toString(target), toString(data));
   1276 }
   1277 
   1278 static void cdataBlockHandler(void* closure, const xmlChar* text, int length)
   1279 {
   1280     getParser(closure)->cdataBlock(toString(text, length));
   1281 }
   1282 
   1283 static void commentHandler(void* closure, const xmlChar* text)
   1284 {
   1285     getParser(closure)->comment(toString(text));
   1286 }
   1287 
   1288 WTF_ATTRIBUTE_PRINTF(2, 3)
   1289 static void warningHandler(void* closure, const char* message, ...)
   1290 {
   1291     va_list args;
   1292     va_start(args, message);
   1293     getParser(closure)->error(XMLErrors::ErrorTypeWarning, message, args);
   1294     va_end(args);
   1295 }
   1296 
   1297 WTF_ATTRIBUTE_PRINTF(2, 3)
   1298 static void fatalErrorHandler(void* closure, const char* message, ...)
   1299 {
   1300     va_list args;
   1301     va_start(args, message);
   1302     getParser(closure)->error(XMLErrors::ErrorTypeFatal, message, args);
   1303     va_end(args);
   1304 }
   1305 
   1306 WTF_ATTRIBUTE_PRINTF(2, 3)
   1307 static void normalErrorHandler(void* closure, const char* message, ...)
   1308 {
   1309     va_list args;
   1310     va_start(args, message);
   1311     getParser(closure)->error(XMLErrors::ErrorTypeNonFatal, message, args);
   1312     va_end(args);
   1313 }
   1314 
   1315 // Using a static entity and marking it XML_INTERNAL_PREDEFINED_ENTITY is a hack
   1316 // to avoid malloc/free. Using a global variable like this could cause trouble
   1317 // if libxml implementation details were to change
   1318 static xmlChar sharedXHTMLEntityResult[9] = {0, 0, 0, 0, 0, 0, 0, 0, 0};
   1319 
   1320 static xmlEntityPtr sharedXHTMLEntity()
   1321 {
   1322     static xmlEntity entity;
   1323     if (!entity.type) {
   1324         entity.type = XML_ENTITY_DECL;
   1325         entity.orig = sharedXHTMLEntityResult;
   1326         entity.content = sharedXHTMLEntityResult;
   1327         entity.etype = XML_INTERNAL_PREDEFINED_ENTITY;
   1328     }
   1329     return &entity;
   1330 }
   1331 
   1332 static size_t convertUTF16EntityToUTF8(const UChar* utf16Entity, size_t numberOfCodeUnits, char* target, size_t targetSize)
   1333 {
   1334     const char* originalTarget = target;
   1335     WTF::Unicode::ConversionResult conversionResult = WTF::Unicode::convertUTF16ToUTF8(&utf16Entity,
   1336         utf16Entity + numberOfCodeUnits, &target, target + targetSize);
   1337     if (conversionResult != WTF::Unicode::conversionOK)
   1338         return 0;
   1339 
   1340     // Even though we must pass the length, libxml expects the entity string to be null terminated.
   1341     ASSERT(target > originalTarget + 1);
   1342     *target = '\0';
   1343     return target - originalTarget;
   1344 }
   1345 
   1346 static xmlEntityPtr getXHTMLEntity(const xmlChar* name)
   1347 {
   1348     UChar utf16DecodedEntity[4];
   1349     size_t numberOfCodeUnits = decodeNamedEntityToUCharArray(reinterpret_cast<const char*>(name), utf16DecodedEntity);
   1350     if (!numberOfCodeUnits)
   1351         return 0;
   1352 
   1353     ASSERT(numberOfCodeUnits <= 4);
   1354     size_t entityLengthInUTF8 = convertUTF16EntityToUTF8(utf16DecodedEntity, numberOfCodeUnits,
   1355         reinterpret_cast<char*>(sharedXHTMLEntityResult), WTF_ARRAY_LENGTH(sharedXHTMLEntityResult));
   1356     if (!entityLengthInUTF8)
   1357         return 0;
   1358 
   1359     xmlEntityPtr entity = sharedXHTMLEntity();
   1360     entity->length = entityLengthInUTF8;
   1361     entity->name = name;
   1362     return entity;
   1363 }
   1364 
   1365 static xmlEntityPtr getEntityHandler(void* closure, const xmlChar* name)
   1366 {
   1367     xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure);
   1368     xmlEntityPtr ent = xmlGetPredefinedEntity(name);
   1369     if (ent) {
   1370         ent->etype = XML_INTERNAL_PREDEFINED_ENTITY;
   1371         return ent;
   1372     }
   1373 
   1374     ent = xmlGetDocEntity(ctxt->myDoc, name);
   1375     if (!ent && getParser(closure)->isXHTMLDocument()) {
   1376         ent = getXHTMLEntity(name);
   1377         if (ent)
   1378             ent->etype = XML_INTERNAL_GENERAL_ENTITY;
   1379     }
   1380 
   1381     return ent;
   1382 }
   1383 
   1384 static void startDocumentHandler(void* closure)
   1385 {
   1386     xmlParserCtxt* ctxt = static_cast<xmlParserCtxt*>(closure);
   1387     XMLDocumentParser* parser = getParser(closure);
   1388     switchEncoding(ctxt, parser->isCurrentlyParsing8BitChunk());
   1389     parser->startDocument(toString(ctxt->version), toString(ctxt->encoding), ctxt->standalone);
   1390     xmlSAX2StartDocument(closure);
   1391 }
   1392 
   1393 static void endDocumentHandler(void* closure)
   1394 {
   1395     getParser(closure)->endDocument();
   1396     xmlSAX2EndDocument(closure);
   1397 }
   1398 
   1399 static void internalSubsetHandler(void* closure, const xmlChar* name, const xmlChar* externalID, const xmlChar* systemID)
   1400 {
   1401     getParser(closure)->internalSubset(toString(name), toString(externalID), toString(systemID));
   1402     xmlSAX2InternalSubset(closure, name, externalID, systemID);
   1403 }
   1404 
   1405 static void externalSubsetHandler(void* closure, const xmlChar*, const xmlChar* externalId, const xmlChar*)
   1406 {
   1407     String extId = toString(externalId);
   1408     if (extId == "-//W3C//DTD XHTML 1.0 Transitional//EN"
   1409         || extId == "-//W3C//DTD XHTML 1.1//EN"
   1410         || extId == "-//W3C//DTD XHTML 1.0 Strict//EN"
   1411         || extId == "-//W3C//DTD XHTML 1.0 Frameset//EN"
   1412         || extId == "-//W3C//DTD XHTML Basic 1.0//EN"
   1413         || extId == "-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN"
   1414         || extId == "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN"
   1415         || extId == "-//WAPFORUM//DTD XHTML Mobile 1.0//EN"
   1416         || extId == "-//WAPFORUM//DTD XHTML Mobile 1.1//EN"
   1417         || extId == "-//WAPFORUM//DTD XHTML Mobile 1.2//EN") {
   1418         // Controls if we replace entities or not.
   1419         getParser(closure)->setIsXHTMLDocument(true);
   1420     }
   1421 }
   1422 
   1423 static void ignorableWhitespaceHandler(void*, const xmlChar*, int)
   1424 {
   1425     // Nothing to do, but we need this to work around a crasher.
   1426     // http://bugzilla.gnome.org/show_bug.cgi?id=172255
   1427     // http://bugs.webkit.org/show_bug.cgi?id=5792
   1428 }
   1429 
   1430 void XMLDocumentParser::initializeParserContext(const CString& chunk)
   1431 {
   1432     xmlSAXHandler sax;
   1433     memset(&sax, 0, sizeof(sax));
   1434 
   1435     sax.error = normalErrorHandler;
   1436     sax.fatalError = fatalErrorHandler;
   1437     sax.characters = charactersHandler;
   1438     sax.processingInstruction = processingInstructionHandler;
   1439     sax.cdataBlock = cdataBlockHandler;
   1440     sax.comment = commentHandler;
   1441     sax.warning = warningHandler;
   1442     sax.startElementNs = startElementNsHandler;
   1443     sax.endElementNs = endElementNsHandler;
   1444     sax.getEntity = getEntityHandler;
   1445     sax.startDocument = startDocumentHandler;
   1446     sax.endDocument = endDocumentHandler;
   1447     sax.internalSubset = internalSubsetHandler;
   1448     sax.externalSubset = externalSubsetHandler;
   1449     sax.ignorableWhitespace = ignorableWhitespaceHandler;
   1450     sax.entityDecl = xmlSAX2EntityDecl;
   1451     sax.initialized = XML_SAX2_MAGIC;
   1452     m_sawError = false;
   1453     m_sawCSS = false;
   1454     m_sawXSLTransform = false;
   1455     m_sawFirstElement = false;
   1456 
   1457     XMLDocumentParserScope scope(document()->fetcher());
   1458     if (m_parsingFragment) {
   1459         m_context = XMLParserContext::createMemoryParser(&sax, this, chunk);
   1460     } else {
   1461         ASSERT(!chunk.data());
   1462         m_context = XMLParserContext::createStringParser(&sax, this);
   1463     }
   1464 }
   1465 
   1466 void XMLDocumentParser::doEnd()
   1467 {
   1468     if (!isStopped()) {
   1469         if (m_context) {
   1470             // Tell libxml we're done.
   1471             {
   1472                 XMLDocumentParserScope scope(document()->fetcher());
   1473                 finishParsing(context());
   1474             }
   1475 
   1476             m_context = nullptr;
   1477         }
   1478     }
   1479 
   1480     bool xmlViewerMode = !m_sawError && !m_sawCSS && !m_sawXSLTransform && hasNoStyleInformation(document());
   1481     if (xmlViewerMode) {
   1482         const char noStyleMessage[] = "This XML file does not appear to have any style information associated with it. The document tree is shown below.";
   1483         document()->setIsViewSource(true);
   1484         V8Document::PrivateScript::transformDocumentToTreeViewMethod(document()->frame(), document(), noStyleMessage);
   1485     } else if (m_sawXSLTransform) {
   1486         xmlDocPtr doc = xmlDocPtrForString(document()->fetcher(), m_originalSourceForTransform.toString(), document()->url().string());
   1487         document()->setTransformSource(adoptPtr(new TransformSource(doc)));
   1488         // Make the document think it's done, so it will apply XSL stylesheets.
   1489         document()->setParsing(false);
   1490         document()->styleResolverChanged();
   1491 
   1492         // styleResolverChanged() call can detach the parser and null out its
   1493         // document. In that case, we just bail out.
   1494         if (isDetached())
   1495             return;
   1496 
   1497         document()->setParsing(true);
   1498         DocumentParser::stopParsing();
   1499     }
   1500 }
   1501 
   1502 xmlDocPtr xmlDocPtrForString(ResourceFetcher* fetcher, const String& source, const String& url)
   1503 {
   1504     if (source.isEmpty())
   1505         return 0;
   1506     // Parse in a single chunk into an xmlDocPtr
   1507     // FIXME: Hook up error handlers so that a failure to parse the main
   1508     // document results in good error messages.
   1509     XMLDocumentParserScope scope(fetcher, errorFunc, 0);
   1510     XMLParserInput input(source);
   1511     return xmlReadMemory(input.data(), input.size(), url.latin1().data(), input.encoding(), XSLT_PARSE_OPTIONS);
   1512 }
   1513 
   1514 OrdinalNumber XMLDocumentParser::lineNumber() const
   1515 {
   1516     return OrdinalNumber::fromOneBasedInt(context() ? context()->input->line : 1);
   1517 }
   1518 
   1519 OrdinalNumber XMLDocumentParser::columnNumber() const
   1520 {
   1521     return OrdinalNumber::fromOneBasedInt(context() ? context()->input->col : 1);
   1522 }
   1523 
   1524 TextPosition XMLDocumentParser::textPosition() const
   1525 {
   1526     xmlParserCtxtPtr context = this->context();
   1527     if (!context)
   1528         return TextPosition::minimumPosition();
   1529     return TextPosition(OrdinalNumber::fromOneBasedInt(context->input->line), OrdinalNumber::fromOneBasedInt(context->input->col));
   1530 }
   1531 
   1532 void XMLDocumentParser::stopParsing()
   1533 {
   1534     DocumentParser::stopParsing();
   1535     if (context())
   1536         xmlStopParser(context());
   1537 }
   1538 
   1539 void XMLDocumentParser::resumeParsing()
   1540 {
   1541     ASSERT(!isDetached());
   1542     ASSERT(m_parserPaused);
   1543 
   1544     m_parserPaused = false;
   1545 
   1546     // First, execute any pending callbacks
   1547     while (!m_pendingCallbacks.isEmpty()) {
   1548         OwnPtr<PendingCallback> callback = m_pendingCallbacks.takeFirst();
   1549         callback->call(this);
   1550 
   1551         // A callback paused the parser
   1552         if (m_parserPaused)
   1553             return;
   1554     }
   1555 
   1556     // Then, write any pending data
   1557     SegmentedString rest = m_pendingSrc;
   1558     m_pendingSrc.clear();
   1559     // There is normally only one string left, so toString() shouldn't copy.
   1560     // In any case, the XML parser runs on the main thread and it's OK if
   1561     // the passed string has more than one reference.
   1562     append(rest.toString().impl());
   1563 
   1564     // Finally, if finish() has been called and write() didn't result
   1565     // in any further callbacks being queued, call end()
   1566     if (m_finishCalled && m_pendingCallbacks.isEmpty())
   1567         end();
   1568 }
   1569 
   1570 bool XMLDocumentParser::appendFragmentSource(const String& chunk)
   1571 {
   1572     ASSERT(!m_context);
   1573     ASSERT(m_parsingFragment);
   1574 
   1575     CString chunkAsUtf8 = chunk.utf8();
   1576 
   1577     // libxml2 takes an int for a length, and therefore can't handle XML chunks
   1578     // larger than 2 GiB.
   1579     if (chunkAsUtf8.length() > INT_MAX)
   1580         return false;
   1581 
   1582     TRACE_EVENT0("blink", "XMLDocumentParser::appendFragmentSource");
   1583     initializeParserContext(chunkAsUtf8);
   1584     xmlParseContent(context());
   1585     endDocument(); // Close any open text nodes.
   1586 
   1587     // FIXME: If this code is actually needed, it should probably move to
   1588     // finish()
   1589     // XMLDocumentParserQt has a similar check (m_stream.error() ==
   1590     // QXmlStreamReader::PrematureEndOfDocumentError) in doEnd(). Check if all
   1591     // the chunk has been processed.
   1592     long bytesProcessed = xmlByteConsumed(context());
   1593     if (bytesProcessed == -1 || static_cast<unsigned long>(bytesProcessed) != chunkAsUtf8.length()) {
   1594         // FIXME: I don't believe we can hit this case without also having seen
   1595         // an error or a null byte. If we hit this ASSERT, we've found a test
   1596         // case which demonstrates the need for this code.
   1597         ASSERT(m_sawError || (bytesProcessed >= 0 && !chunkAsUtf8.data()[bytesProcessed]));
   1598         return false;
   1599     }
   1600 
   1601     // No error if the chunk is well formed or it is not but we have no error.
   1602     return context()->wellFormed || !xmlCtxtGetLastError(context());
   1603 }
   1604 
   1605 // --------------------------------
   1606 
   1607 struct AttributeParseState {
   1608     HashMap<String, String> attributes;
   1609     bool gotAttributes;
   1610 };
   1611 
   1612 static void attributesStartElementNsHandler(void* closure, const xmlChar* xmlLocalName, const xmlChar* /*xmlPrefix*/,
   1613     const xmlChar* /*xmlURI*/, int /*nbNamespaces*/, const xmlChar** /*namespaces*/,
   1614     int nbAttributes, int /*nbDefaulted*/, const xmlChar** libxmlAttributes)
   1615 {
   1616     if (strcmp(reinterpret_cast<const char*>(xmlLocalName), "attrs") != 0)
   1617         return;
   1618 
   1619     xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure);
   1620     AttributeParseState* state = static_cast<AttributeParseState*>(ctxt->_private);
   1621 
   1622     state->gotAttributes = true;
   1623 
   1624     xmlSAX2Attributes* attributes = reinterpret_cast<xmlSAX2Attributes*>(libxmlAttributes);
   1625     for (int i = 0; i < nbAttributes; ++i) {
   1626         String attrLocalName = toString(attributes[i].localname);
   1627         int valueLength = (int) (attributes[i].end - attributes[i].value);
   1628         String attrValue = toString(attributes[i].value, valueLength);
   1629         String attrPrefix = toString(attributes[i].prefix);
   1630         String attrQName = attrPrefix.isEmpty() ? attrLocalName : attrPrefix + ":" + attrLocalName;
   1631 
   1632         state->attributes.set(attrQName, attrValue);
   1633     }
   1634 }
   1635 
   1636 HashMap<String, String> parseAttributes(const String& string, bool& attrsOK)
   1637 {
   1638     AttributeParseState state;
   1639     state.gotAttributes = false;
   1640 
   1641     xmlSAXHandler sax;
   1642     memset(&sax, 0, sizeof(sax));
   1643     sax.startElementNs = attributesStartElementNsHandler;
   1644     sax.initialized = XML_SAX2_MAGIC;
   1645     RefPtr<XMLParserContext> parser = XMLParserContext::createStringParser(&sax, &state);
   1646     String parseString = "<?xml version=\"1.0\"?><attrs " + string + " />";
   1647     parseChunk(parser->context(), parseString);
   1648     finishParsing(parser->context());
   1649     attrsOK = state.gotAttributes;
   1650     return state.attributes;
   1651 }
   1652 
   1653 } // namespace blink
   1654