Home | History | Annotate | Download | only in parser
      1 /*
      2  * Copyright (C) 2000 Peter Kelly (pmk (at) post.com)
      3  * Copyright (C) 2005, 2006, 2008 Apple Inc. All rights reserved.
      4  * Copyright (C) 2006 Alexey Proskuryakov (ap (at) webkit.org)
      5  * Copyright (C) 2007 Samuel Weinig (sam (at) webkit.org)
      6  * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies)
      7  * Copyright (C) 2008 Holger Hans Peter Freyther
      8  * Copyright (C) 2008 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/)
      9  *
     10  * This library is free software; you can redistribute it and/or
     11  * modify it under the terms of the GNU Library General Public
     12  * License as published by the Free Software Foundation; either
     13  * version 2 of the License, or (at your option) any later version.
     14  *
     15  * This library is distributed in the hope that it will be useful,
     16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     18  * Library General Public License for more details.
     19  *
     20  * You should have received a copy of the GNU Library General Public License
     21  * along with this library; see the file COPYING.LIB.  If not, write to
     22  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     23  * Boston, MA 02110-1301, USA.
     24  */
     25 
     26 #include "config.h"
     27 #include "core/xml/parser/XMLDocumentParser.h"
     28 
     29 #include "bindings/v8/ExceptionState.h"
     30 #include "bindings/v8/ExceptionStatePlaceholder.h"
     31 #include "bindings/v8/ScriptController.h"
     32 #include "bindings/v8/ScriptSourceCode.h"
     33 #include "core/FetchInitiatorTypeNames.h"
     34 #include "core/HTMLNames.h"
     35 #include "core/XMLNSNames.h"
     36 #include "core/dom/CDATASection.h"
     37 #include "core/dom/Comment.h"
     38 #include "core/dom/Document.h"
     39 #include "core/dom/DocumentFragment.h"
     40 #include "core/dom/DocumentType.h"
     41 #include "core/dom/ProcessingInstruction.h"
     42 #include "core/dom/ScriptLoader.h"
     43 #include "core/dom/TransformSource.h"
     44 #include "core/fetch/ResourceFetcher.h"
     45 #include "core/fetch/ScriptResource.h"
     46 #include "core/frame/LocalFrame.h"
     47 #include "core/frame/UseCounter.h"
     48 #include "core/html/HTMLHtmlElement.h"
     49 #include "core/html/HTMLTemplateElement.h"
     50 #include "core/html/parser/HTMLEntityParser.h"
     51 #include "core/html/parser/TextResourceDecoder.h"
     52 #include "core/loader/FrameLoader.h"
     53 #include "core/loader/ImageLoader.h"
     54 #include "core/svg/graphics/SVGImage.h"
     55 #include "core/xml/XMLTreeViewer.h"
     56 #include "core/xml/parser/SharedBufferReader.h"
     57 #include "core/xml/parser/XMLDocumentParserScope.h"
     58 #include "core/xml/parser/XMLParserInput.h"
     59 #include "platform/RuntimeEnabledFeatures.h"
     60 #include "platform/SharedBuffer.h"
     61 #include "platform/network/ResourceError.h"
     62 #include "platform/network/ResourceRequest.h"
     63 #include "platform/network/ResourceResponse.h"
     64 #include "platform/weborigin/SecurityOrigin.h"
     65 #include "wtf/StringExtras.h"
     66 #include "wtf/TemporaryChange.h"
     67 #include "wtf/Threading.h"
     68 #include "wtf/Vector.h"
     69 #include "wtf/unicode/UTF8.h"
     70 #include <libxml/catalog.h>
     71 #include <libxml/parser.h>
     72 #include <libxml/parserInternals.h>
     73 #include <libxslt/xslt.h>
     74 
     75 namespace WebCore {
     76 
     77 using namespace HTMLNames;
     78 
     79 // FIXME: HTMLConstructionSite has a limit of 512, should these match?
     80 static const unsigned maxXMLTreeDepth = 5000;
     81 
     82 static inline String toString(const xmlChar* string, size_t length)
     83 {
     84     return String::fromUTF8(reinterpret_cast<const char*>(string), length);
     85 }
     86 
     87 static inline String toString(const xmlChar* string)
     88 {
     89     return String::fromUTF8(reinterpret_cast<const char*>(string));
     90 }
     91 
     92 static inline AtomicString toAtomicString(const xmlChar* string, size_t length)
     93 {
     94     return AtomicString::fromUTF8(reinterpret_cast<const char*>(string), length);
     95 }
     96 
     97 static inline AtomicString toAtomicString(const xmlChar* string)
     98 {
     99     return AtomicString::fromUTF8(reinterpret_cast<const char*>(string));
    100 }
    101 
    102 static inline bool hasNoStyleInformation(Document* document)
    103 {
    104     if (document->sawElementsInKnownNamespaces() || document->transformSourceDocument())
    105         return false;
    106 
    107     if (!document->frame() || !document->frame()->page())
    108         return false;
    109 
    110     if (document->frame()->tree().parent())
    111         return false; // This document is not in a top frame
    112 
    113     if (SVGImage::isInSVGImage(document))
    114         return false;
    115 
    116     return true;
    117 }
    118 
    119 class PendingStartElementNSCallback FINAL : public XMLDocumentParser::PendingCallback {
    120 public:
    121     PendingStartElementNSCallback(const AtomicString& localName, const AtomicString& prefix, const AtomicString& uri,
    122         int namespaceCount, const xmlChar** namespaces, int attributeCount, int defaultedCount, const xmlChar** attributes)
    123         : m_localName(localName)
    124         , m_prefix(prefix)
    125         , m_uri(uri)
    126         , m_namespaceCount(namespaceCount)
    127         , m_attributeCount(attributeCount)
    128         , m_defaultedCount(defaultedCount)
    129     {
    130         m_namespaces = static_cast<xmlChar**>(xmlMalloc(sizeof(xmlChar*) * namespaceCount * 2));
    131         for (int i = 0; i < namespaceCount * 2 ; ++i)
    132             m_namespaces[i] = xmlStrdup(namespaces[i]);
    133         m_attributes = static_cast<xmlChar**>(xmlMalloc(sizeof(xmlChar*) * attributeCount * 5));
    134         for (int i = 0; i < attributeCount; ++i) {
    135             // Each attribute has 5 elements in the array:
    136             // name, prefix, uri, value and an end pointer.
    137             for (int j = 0; j < 3; ++j)
    138                 m_attributes[i * 5 + j] = xmlStrdup(attributes[i * 5 + j]);
    139             int length = attributes[i * 5 + 4] - attributes[i * 5 + 3];
    140             m_attributes[i * 5 + 3] = xmlStrndup(attributes[i * 5 + 3], length);
    141             m_attributes[i * 5 + 4] = m_attributes[i * 5 + 3] + length;
    142         }
    143     }
    144 
    145     virtual ~PendingStartElementNSCallback()
    146     {
    147         for (int i = 0; i < m_namespaceCount * 2; ++i)
    148             xmlFree(m_namespaces[i]);
    149         xmlFree(m_namespaces);
    150         for (int i = 0; i < m_attributeCount; ++i)
    151             for (int j = 0; j < 4; ++j)
    152                 xmlFree(m_attributes[i * 5 + j]);
    153         xmlFree(m_attributes);
    154     }
    155 
    156     virtual void call(XMLDocumentParser* parser) OVERRIDE
    157     {
    158         parser->startElementNs(m_localName, m_prefix, m_uri,
    159             m_namespaceCount, const_cast<const xmlChar**>(m_namespaces),
    160             m_attributeCount, m_defaultedCount, const_cast<const xmlChar**>(m_attributes));
    161     }
    162 
    163 private:
    164     AtomicString m_localName;
    165     AtomicString m_prefix;
    166     AtomicString m_uri;
    167     int m_namespaceCount;
    168     xmlChar** m_namespaces;
    169     int m_attributeCount;
    170     int m_defaultedCount;
    171     xmlChar** m_attributes;
    172 };
    173 
    174 class PendingEndElementNSCallback FINAL : public XMLDocumentParser::PendingCallback {
    175 public:
    176     virtual void call(XMLDocumentParser* parser) OVERRIDE
    177     {
    178         parser->endElementNs();
    179     }
    180 };
    181 
    182 class PendingCharactersCallback FINAL : public XMLDocumentParser::PendingCallback {
    183 public:
    184     PendingCharactersCallback(const xmlChar* chars, int length)
    185         : m_chars(xmlStrndup(chars, length))
    186         , m_length(length)
    187     {
    188     }
    189 
    190     virtual ~PendingCharactersCallback()
    191     {
    192         xmlFree(m_chars);
    193     }
    194 
    195     virtual void call(XMLDocumentParser* parser) OVERRIDE
    196     {
    197         parser->characters(m_chars, m_length);
    198     }
    199 
    200 private:
    201     xmlChar* m_chars;
    202     int m_length;
    203 };
    204 
    205 class PendingProcessingInstructionCallback FINAL : public XMLDocumentParser::PendingCallback {
    206 public:
    207     PendingProcessingInstructionCallback(const String& target, const String& data)
    208         : m_target(target)
    209         , m_data(data)
    210     {
    211     }
    212 
    213     virtual void call(XMLDocumentParser* parser) OVERRIDE
    214     {
    215         parser->processingInstruction(m_target, m_data);
    216     }
    217 
    218 private:
    219     String m_target;
    220     String m_data;
    221 };
    222 
    223 class PendingCDATABlockCallback FINAL : public XMLDocumentParser::PendingCallback {
    224 public:
    225     explicit PendingCDATABlockCallback(const String& text) : m_text(text) { }
    226 
    227     virtual void call(XMLDocumentParser* parser) OVERRIDE
    228     {
    229         parser->cdataBlock(m_text);
    230     }
    231 
    232 private:
    233     String m_text;
    234 };
    235 
    236 class PendingCommentCallback FINAL : public XMLDocumentParser::PendingCallback {
    237 public:
    238     explicit PendingCommentCallback(const String& text) : m_text(text) { }
    239 
    240     virtual void call(XMLDocumentParser* parser) OVERRIDE
    241     {
    242         parser->comment(m_text);
    243     }
    244 
    245 private:
    246     String m_text;
    247 };
    248 
    249 class PendingInternalSubsetCallback FINAL : public XMLDocumentParser::PendingCallback {
    250 public:
    251     PendingInternalSubsetCallback(const String& name, const String& externalID, const String& systemID)
    252         : m_name(name)
    253         , m_externalID(externalID)
    254         , m_systemID(systemID)
    255     {
    256     }
    257 
    258     virtual void call(XMLDocumentParser* parser) OVERRIDE
    259     {
    260         parser->internalSubset(m_name, m_externalID, m_systemID);
    261     }
    262 
    263 private:
    264     String m_name;
    265     String m_externalID;
    266     String m_systemID;
    267 };
    268 
    269 class PendingErrorCallback FINAL : public XMLDocumentParser::PendingCallback {
    270 public:
    271     PendingErrorCallback(XMLErrors::ErrorType type, const xmlChar* message, OrdinalNumber lineNumber, OrdinalNumber columnNumber)
    272         : m_type(type)
    273         , m_message(xmlStrdup(message))
    274         , m_lineNumber(lineNumber)
    275         , m_columnNumber(columnNumber)
    276     {
    277     }
    278 
    279     virtual ~PendingErrorCallback()
    280     {
    281         xmlFree(m_message);
    282     }
    283 
    284     virtual void call(XMLDocumentParser* parser) OVERRIDE
    285     {
    286         parser->handleError(m_type, reinterpret_cast<char*>(m_message), TextPosition(m_lineNumber, m_columnNumber));
    287     }
    288 
    289 private:
    290     XMLErrors::ErrorType m_type;
    291     xmlChar* m_message;
    292     OrdinalNumber m_lineNumber;
    293     OrdinalNumber m_columnNumber;
    294 };
    295 
    296 void XMLDocumentParser::pushCurrentNode(ContainerNode* n)
    297 {
    298     ASSERT(n);
    299     ASSERT(m_currentNode);
    300 #if !ENABLE(OILPAN)
    301     if (n != document())
    302         n->ref();
    303 #endif
    304     m_currentNodeStack.append(m_currentNode);
    305     m_currentNode = n;
    306     if (m_currentNodeStack.size() > maxXMLTreeDepth)
    307         handleError(XMLErrors::ErrorTypeFatal, "Excessive node nesting.", textPosition());
    308 }
    309 
    310 void XMLDocumentParser::popCurrentNode()
    311 {
    312     if (!m_currentNode)
    313         return;
    314     ASSERT(m_currentNodeStack.size());
    315 #if !ENABLE(OILPAN)
    316     if (m_currentNode != document())
    317         m_currentNode->deref();
    318 #endif
    319     m_currentNode = m_currentNodeStack.last();
    320     m_currentNodeStack.removeLast();
    321 }
    322 
    323 void XMLDocumentParser::clearCurrentNodeStack()
    324 {
    325 #if !ENABLE(OILPAN)
    326     if (m_currentNode && m_currentNode != document())
    327         m_currentNode->deref();
    328 #endif
    329     m_currentNode = nullptr;
    330     m_leafTextNode = nullptr;
    331 
    332     if (m_currentNodeStack.size()) { // Aborted parsing.
    333 #if !ENABLE(OILPAN)
    334         for (size_t i = m_currentNodeStack.size() - 1; i != 0; --i)
    335             m_currentNodeStack[i]->deref();
    336         if (m_currentNodeStack[0] && m_currentNodeStack[0] != document())
    337             m_currentNodeStack[0]->deref();
    338 #endif
    339         m_currentNodeStack.clear();
    340     }
    341 }
    342 
    343 void XMLDocumentParser::insert(const SegmentedString&)
    344 {
    345     ASSERT_NOT_REACHED();
    346 }
    347 
    348 void XMLDocumentParser::append(PassRefPtr<StringImpl> inputSource)
    349 {
    350     SegmentedString source(inputSource);
    351     if (m_sawXSLTransform || !m_sawFirstElement)
    352         m_originalSourceForTransform.append(source);
    353 
    354     if (isStopped() || m_sawXSLTransform)
    355         return;
    356 
    357     if (m_parserPaused) {
    358         m_pendingSrc.append(source);
    359         return;
    360     }
    361 
    362     // JavaScript can detach the parser. Make sure this is not released
    363     // before the end of this method.
    364     RefPtrWillBeRawPtr<XMLDocumentParser> protect(this);
    365 
    366     doWrite(source.toString());
    367 }
    368 
    369 void XMLDocumentParser::handleError(XMLErrors::ErrorType type, const char* formattedMessage, TextPosition position)
    370 {
    371     m_xmlErrors.handleError(type, formattedMessage, position);
    372     if (type != XMLErrors::ErrorTypeWarning)
    373         m_sawError = true;
    374     if (type == XMLErrors::ErrorTypeFatal)
    375         stopParsing();
    376 }
    377 
    378 void XMLDocumentParser::enterText()
    379 {
    380     ASSERT(m_bufferedText.size() == 0);
    381     ASSERT(!m_leafTextNode);
    382     m_leafTextNode = Text::create(m_currentNode->document(), "");
    383     m_currentNode->parserAppendChild(m_leafTextNode.get());
    384 }
    385 
    386 void XMLDocumentParser::exitText()
    387 {
    388     if (isStopped())
    389         return;
    390 
    391     if (!m_leafTextNode)
    392         return;
    393 
    394     m_leafTextNode->appendData(toString(m_bufferedText.data(), m_bufferedText.size()));
    395     m_bufferedText.clear();
    396     m_leafTextNode = nullptr;
    397 }
    398 
    399 void XMLDocumentParser::detach()
    400 {
    401     clearCurrentNodeStack();
    402     ScriptableDocumentParser::detach();
    403 }
    404 
    405 void XMLDocumentParser::end()
    406 {
    407     // XMLDocumentParserLibxml2 will do bad things to the document if doEnd() is called.
    408     // I don't believe XMLDocumentParserQt needs doEnd called in the fragment case.
    409     ASSERT(!m_parsingFragment);
    410 
    411     doEnd();
    412 
    413     // doEnd() call above can detach the parser and null out its document.
    414     // In that case, we just bail out.
    415     if (isDetached())
    416         return;
    417 
    418     // doEnd() could process a script tag, thus pausing parsing.
    419     if (m_parserPaused)
    420         return;
    421 
    422     if (m_sawError) {
    423         insertErrorMessageBlock();
    424     } else {
    425         exitText();
    426         document()->styleResolverChanged();
    427     }
    428 
    429     if (isParsing())
    430         prepareToStopParsing();
    431     document()->setReadyState(Document::Interactive);
    432     clearCurrentNodeStack();
    433     document()->finishedParsing();
    434 }
    435 
    436 void XMLDocumentParser::finish()
    437 {
    438     // FIXME: We should ASSERT(!m_parserStopped) here, since it does not
    439     // makes sense to call any methods on DocumentParser once it's been stopped.
    440     // However, FrameLoader::stop calls DocumentParser::finish unconditionally.
    441 
    442     if (m_parserPaused)
    443         m_finishCalled = true;
    444     else
    445         end();
    446 }
    447 
    448 void XMLDocumentParser::insertErrorMessageBlock()
    449 {
    450     m_xmlErrors.insertErrorMessageBlock();
    451 }
    452 
    453 void XMLDocumentParser::notifyFinished(Resource* unusedResource)
    454 {
    455     ASSERT_UNUSED(unusedResource, unusedResource == m_pendingScript);
    456 
    457     ScriptSourceCode sourceCode(m_pendingScript.get());
    458     bool errorOccurred = m_pendingScript->errorOccurred();
    459     bool wasCanceled = m_pendingScript->wasCanceled();
    460 
    461     m_pendingScript->removeClient(this);
    462     m_pendingScript = 0;
    463 
    464     RefPtrWillBeRawPtr<Element> e = m_scriptElement;
    465     m_scriptElement = nullptr;
    466 
    467     ScriptLoader* scriptLoader = toScriptLoaderIfPossible(e.get());
    468     ASSERT(scriptLoader);
    469 
    470     // JavaScript can detach this parser, make sure it's kept alive even if
    471     // detached.
    472     RefPtrWillBeRawPtr<XMLDocumentParser> protect(this);
    473 
    474     if (errorOccurred) {
    475         scriptLoader->dispatchErrorEvent();
    476     } else if (!wasCanceled) {
    477         scriptLoader->executeScript(sourceCode);
    478         scriptLoader->dispatchLoadEvent();
    479     }
    480 
    481     m_scriptElement = nullptr;
    482 
    483     if (!isDetached() && !m_requestingScript)
    484         resumeParsing();
    485 }
    486 
    487 bool XMLDocumentParser::isWaitingForScripts() const
    488 {
    489     return m_pendingScript;
    490 }
    491 
    492 void XMLDocumentParser::pauseParsing()
    493 {
    494     if (!m_parsingFragment)
    495         m_parserPaused = true;
    496 }
    497 
    498 bool XMLDocumentParser::parseDocumentFragment(const String& chunk, DocumentFragment* fragment, Element* contextElement, ParserContentPolicy parserContentPolicy)
    499 {
    500     if (!chunk.length())
    501         return true;
    502 
    503     // FIXME: We need to implement the HTML5 XML Fragment parsing algorithm:
    504     // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-xhtml-syntax.html#xml-fragment-parsing-algorithm
    505     // For now we have a hack for script/style innerHTML support:
    506     if (contextElement && (contextElement->hasLocalName(HTMLNames::scriptTag) || contextElement->hasLocalName(HTMLNames::styleTag))) {
    507         fragment->parserAppendChild(fragment->document().createTextNode(chunk));
    508         return true;
    509     }
    510 
    511     RefPtrWillBeRawPtr<XMLDocumentParser> parser = XMLDocumentParser::create(fragment, contextElement, parserContentPolicy);
    512     bool wellFormed = parser->appendFragmentSource(chunk);
    513 
    514     // Do not call finish(). Current finish() and doEnd() implementations touch
    515     // the main Document/loader and can cause crashes in the fragment case.
    516 
    517     // Allows ~DocumentParser to assert it was detached before destruction.
    518     parser->detach();
    519     // appendFragmentSource()'s wellFormed is more permissive than wellFormed().
    520     return wellFormed;
    521 }
    522 
    523 static int globalDescriptor = 0;
    524 static ThreadIdentifier libxmlLoaderThread = 0;
    525 
    526 static int matchFunc(const char*)
    527 {
    528     // Only match loads initiated due to uses of libxml2 from within
    529     // XMLDocumentParser to avoid interfering with client applications that also
    530     // use libxml2. http://bugs.webkit.org/show_bug.cgi?id=17353
    531     return XMLDocumentParserScope::currentFetcher && currentThread() == libxmlLoaderThread;
    532 }
    533 
    534 static inline void setAttributes(Element* element, Vector<Attribute>& attributeVector, ParserContentPolicy parserContentPolicy)
    535 {
    536     if (!scriptingContentIsAllowed(parserContentPolicy))
    537         element->stripScriptingAttributes(attributeVector);
    538     element->parserSetAttributes(attributeVector);
    539 }
    540 
    541 static void switchEncoding(xmlParserCtxtPtr ctxt, bool is8Bit)
    542 {
    543     // Hack around libxml2's lack of encoding overide support by manually
    544     // resetting the encoding to UTF-16 before every chunk. Otherwise libxml
    545     // will detect <?xml version="1.0" encoding="<encoding name>"?> blocks and
    546     // switch encodings, causing the parse to fail.
    547     if (is8Bit) {
    548         xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_8859_1);
    549         return;
    550     }
    551 
    552     const UChar BOM = 0xFEFF;
    553     const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char*>(&BOM);
    554     xmlSwitchEncoding(ctxt, BOMHighByte == 0xFF ? XML_CHAR_ENCODING_UTF16LE : XML_CHAR_ENCODING_UTF16BE);
    555 }
    556 
    557 static void parseChunk(xmlParserCtxtPtr ctxt, const String& chunk)
    558 {
    559     bool is8Bit = chunk.is8Bit();
    560     switchEncoding(ctxt, is8Bit);
    561     if (is8Bit)
    562         xmlParseChunk(ctxt, reinterpret_cast<const char*>(chunk.characters8()), sizeof(LChar) * chunk.length(), 0);
    563     else
    564         xmlParseChunk(ctxt, reinterpret_cast<const char*>(chunk.characters16()), sizeof(UChar) * chunk.length(), 0);
    565 }
    566 
    567 static void finishParsing(xmlParserCtxtPtr ctxt)
    568 {
    569     xmlParseChunk(ctxt, 0, 0, 1);
    570 }
    571 
    572 #define xmlParseChunk #error "Use parseChunk instead to select the correct encoding."
    573 
    574 static bool isLibxmlDefaultCatalogFile(const String& urlString)
    575 {
    576     // On non-Windows platforms libxml asks for this URL, the
    577     // "XML_XML_DEFAULT_CATALOG", on initialization.
    578     if (urlString == "file:///etc/xml/catalog")
    579         return true;
    580 
    581     // On Windows, libxml computes a URL relative to where its DLL resides.
    582     if (urlString.startsWith("file:///", false) && urlString.endsWith("/etc/catalog", false))
    583         return true;
    584     return false;
    585 }
    586 
    587 static bool shouldAllowExternalLoad(const KURL& url)
    588 {
    589     String urlString = url.string();
    590 
    591     // This isn't really necessary now that initializeLibXMLIfNecessary
    592     // disables catalog support in libxml, but keeping it for defense in depth.
    593     if (isLibxmlDefaultCatalogFile(url))
    594         return false;
    595 
    596     // The most common DTD. There isn't much point in hammering www.w3c.org by
    597     // requesting this URL for every XHTML document.
    598     if (urlString.startsWith("http://www.w3.org/TR/xhtml", false))
    599         return false;
    600 
    601     // Similarly, there isn't much point in requesting the SVG DTD.
    602     if (urlString.startsWith("http://www.w3.org/Graphics/SVG", false))
    603         return false;
    604 
    605     // The libxml doesn't give us a lot of context for deciding whether to allow
    606     // this request. In the worst case, this load could be for an external
    607     // entity and the resulting document could simply read the retrieved
    608     // content. If we had more context, we could potentially allow the parser to
    609     // load a DTD. As things stand, we take the conservative route and allow
    610     // same-origin requests only.
    611     if (!XMLDocumentParserScope::currentFetcher->document()->securityOrigin()->canRequest(url)) {
    612         XMLDocumentParserScope::currentFetcher->printAccessDeniedMessage(url);
    613         return false;
    614     }
    615 
    616     return true;
    617 }
    618 
    619 static void* openFunc(const char* uri)
    620 {
    621     ASSERT(XMLDocumentParserScope::currentFetcher);
    622     ASSERT(currentThread() == libxmlLoaderThread);
    623 
    624     KURL url(KURL(), uri);
    625 
    626     if (!shouldAllowExternalLoad(url))
    627         return &globalDescriptor;
    628 
    629     KURL finalURL;
    630     RefPtr<SharedBuffer> data;
    631 
    632     {
    633         ResourceFetcher* fetcher = XMLDocumentParserScope::currentFetcher;
    634         XMLDocumentParserScope scope(0);
    635         // FIXME: We should restore the original global error handler as well.
    636 
    637         if (fetcher->frame()) {
    638             FetchRequest request(ResourceRequest(url), FetchInitiatorTypeNames::xml, ResourceFetcher::defaultResourceOptions());
    639             ResourcePtr<Resource> resource = fetcher->fetchSynchronously(request);
    640             if (resource && !resource->errorOccurred()) {
    641                 data = resource->resourceBuffer();
    642                 finalURL = resource->response().url();
    643             }
    644         }
    645     }
    646 
    647     // We have to check the URL again after the load to catch redirects.
    648     // See <https://bugs.webkit.org/show_bug.cgi?id=21963>.
    649     if (!shouldAllowExternalLoad(finalURL))
    650         return &globalDescriptor;
    651 
    652     return new SharedBufferReader(data);
    653 }
    654 
    655 static int readFunc(void* context, char* buffer, int len)
    656 {
    657     // Do 0-byte reads in case of a null descriptor
    658     if (context == &globalDescriptor)
    659         return 0;
    660 
    661     SharedBufferReader* data = static_cast<SharedBufferReader*>(context);
    662     return data->readData(buffer, len);
    663 }
    664 
    665 static int writeFunc(void*, const char*, int)
    666 {
    667     // Always just do 0-byte writes
    668     return 0;
    669 }
    670 
    671 static int closeFunc(void* context)
    672 {
    673     if (context != &globalDescriptor) {
    674         SharedBufferReader* data = static_cast<SharedBufferReader*>(context);
    675         delete data;
    676     }
    677     return 0;
    678 }
    679 
    680 static void errorFunc(void*, const char*, ...)
    681 {
    682     // FIXME: It would be nice to display error messages somewhere.
    683 }
    684 
    685 static void initializeLibXMLIfNecessary()
    686 {
    687     static bool didInit = false;
    688     if (didInit)
    689         return;
    690 
    691     // We don't want libxml to try and load catalogs.
    692     // FIXME: It's not nice to set global settings in libxml, embedders of Blink
    693     // could be trying to use libxml themselves.
    694     xmlCatalogSetDefaults(XML_CATA_ALLOW_NONE);
    695     xmlInitParser();
    696     xmlRegisterInputCallbacks(matchFunc, openFunc, readFunc, closeFunc);
    697     xmlRegisterOutputCallbacks(matchFunc, openFunc, writeFunc, closeFunc);
    698     libxmlLoaderThread = currentThread();
    699     didInit = true;
    700 }
    701 
    702 
    703 PassRefPtr<XMLParserContext> XMLParserContext::createStringParser(xmlSAXHandlerPtr handlers, void* userData)
    704 {
    705     initializeLibXMLIfNecessary();
    706     xmlParserCtxtPtr parser = xmlCreatePushParserCtxt(handlers, 0, 0, 0, 0);
    707     parser->_private = userData;
    708     parser->replaceEntities = true;
    709     return adoptRef(new XMLParserContext(parser));
    710 }
    711 
    712 // Chunk should be encoded in UTF-8
    713 PassRefPtr<XMLParserContext> XMLParserContext::createMemoryParser(xmlSAXHandlerPtr handlers, void* userData, const CString& chunk)
    714 {
    715     initializeLibXMLIfNecessary();
    716 
    717     // appendFragmentSource() checks that the length doesn't overflow an int.
    718     xmlParserCtxtPtr parser = xmlCreateMemoryParserCtxt(chunk.data(), chunk.length());
    719 
    720     if (!parser)
    721         return nullptr;
    722 
    723     // Copy the sax handler
    724     memcpy(parser->sax, handlers, sizeof(xmlSAXHandler));
    725 
    726     // Set parser options.
    727     // XML_PARSE_NODICT: default dictionary option.
    728     // XML_PARSE_NOENT: force entities substitutions.
    729     xmlCtxtUseOptions(parser, XML_PARSE_NODICT | XML_PARSE_NOENT);
    730 
    731     // Internal initialization
    732     parser->sax2 = 1;
    733     parser->instate = XML_PARSER_CONTENT; // We are parsing a CONTENT
    734     parser->depth = 0;
    735     parser->str_xml = xmlDictLookup(parser->dict, BAD_CAST "xml", 3);
    736     parser->str_xmlns = xmlDictLookup(parser->dict, BAD_CAST "xmlns", 5);
    737     parser->str_xml_ns = xmlDictLookup(parser->dict, XML_XML_NAMESPACE, 36);
    738     parser->_private = userData;
    739 
    740     return adoptRef(new XMLParserContext(parser));
    741 }
    742 
    743 // --------------------------------
    744 
    745 bool XMLDocumentParser::supportsXMLVersion(const String& version)
    746 {
    747     return version == "1.0";
    748 }
    749 
    750 XMLDocumentParser::XMLDocumentParser(Document& document, FrameView* frameView)
    751     : ScriptableDocumentParser(document)
    752     , m_hasView(frameView)
    753     , m_context(nullptr)
    754     , m_currentNode(&document)
    755     , m_isCurrentlyParsing8BitChunk(false)
    756     , m_sawError(false)
    757     , m_sawCSS(false)
    758     , m_sawXSLTransform(false)
    759     , m_sawFirstElement(false)
    760     , m_isXHTMLDocument(false)
    761     , m_parserPaused(false)
    762     , m_requestingScript(false)
    763     , m_finishCalled(false)
    764     , m_xmlErrors(&document)
    765     , m_pendingScript(0)
    766     , m_scriptStartPosition(TextPosition::belowRangePosition())
    767     , m_parsingFragment(false)
    768 {
    769     // This is XML being used as a document resource.
    770     if (frameView && document.isXMLDocument())
    771         UseCounter::count(document, UseCounter::XMLDocument);
    772 }
    773 
    774 XMLDocumentParser::XMLDocumentParser(DocumentFragment* fragment, Element* parentElement, ParserContentPolicy parserContentPolicy)
    775     : ScriptableDocumentParser(fragment->document(), parserContentPolicy)
    776     , m_hasView(false)
    777     , m_context(nullptr)
    778     , m_currentNode(fragment)
    779     , m_isCurrentlyParsing8BitChunk(false)
    780     , m_sawError(false)
    781     , m_sawCSS(false)
    782     , m_sawXSLTransform(false)
    783     , m_sawFirstElement(false)
    784     , m_isXHTMLDocument(false)
    785     , m_parserPaused(false)
    786     , m_requestingScript(false)
    787     , m_finishCalled(false)
    788     , m_xmlErrors(&fragment->document())
    789     , m_pendingScript(0)
    790     , m_scriptStartPosition(TextPosition::belowRangePosition())
    791     , m_parsingFragment(true)
    792 {
    793 #if !ENABLE(OILPAN)
    794     fragment->ref();
    795 #endif
    796 
    797     // Add namespaces based on the parent node
    798     WillBeHeapVector<RawPtrWillBeMember<Element> > elemStack;
    799     while (parentElement) {
    800         elemStack.append(parentElement);
    801 
    802         ContainerNode* n = parentElement->parentNode();
    803         if (!n || !n->isElementNode())
    804             break;
    805         parentElement = toElement(n);
    806     }
    807 
    808     if (elemStack.isEmpty())
    809         return;
    810 
    811     for (; !elemStack.isEmpty(); elemStack.removeLast()) {
    812         Element* element = elemStack.last();
    813         if (element->hasAttributes()) {
    814             AttributeCollection attributes = element->attributes();
    815             AttributeCollection::const_iterator end = attributes.end();
    816             for (AttributeCollection::const_iterator it = attributes.begin(); it != end; ++it) {
    817                 if (it->localName() == xmlnsAtom)
    818                     m_defaultNamespaceURI = it->value();
    819                 else if (it->prefix() == xmlnsAtom)
    820                     m_prefixToNamespaceMap.set(it->localName(), it->value());
    821             }
    822         }
    823     }
    824 
    825     // If the parent element is not in document tree, there may be no xmlns attribute; just default to the parent's namespace.
    826     if (m_defaultNamespaceURI.isNull() && !parentElement->inDocument())
    827         m_defaultNamespaceURI = parentElement->namespaceURI();
    828 }
    829 
    830 XMLParserContext::~XMLParserContext()
    831 {
    832     if (m_context->myDoc)
    833         xmlFreeDoc(m_context->myDoc);
    834     xmlFreeParserCtxt(m_context);
    835 }
    836 
    837 XMLDocumentParser::~XMLDocumentParser()
    838 {
    839 #if !ENABLE(OILPAN)
    840     // The XMLDocumentParser will always be detached before being destroyed.
    841     ASSERT(m_currentNodeStack.isEmpty());
    842     ASSERT(!m_currentNode);
    843 #endif
    844 
    845     // FIXME: m_pendingScript handling should be moved into XMLDocumentParser.cpp!
    846     if (m_pendingScript)
    847         m_pendingScript->removeClient(this);
    848 }
    849 
    850 void XMLDocumentParser::trace(Visitor* visitor)
    851 {
    852     visitor->trace(m_currentNode);
    853 #if ENABLE(OILPAN)
    854     visitor->trace(m_currentNodeStack);
    855 #endif
    856     visitor->trace(m_leafTextNode);
    857     visitor->trace(m_xmlErrors);
    858     visitor->trace(m_scriptElement);
    859     ScriptableDocumentParser::trace(visitor);
    860 }
    861 
    862 void XMLDocumentParser::doWrite(const String& parseString)
    863 {
    864     ASSERT(!isDetached());
    865     if (!m_context)
    866         initializeParserContext();
    867 
    868     // Protect the libxml context from deletion during a callback
    869     RefPtr<XMLParserContext> context = m_context;
    870 
    871     // libXML throws an error if you try to switch the encoding for an empty
    872     // string.
    873     if (parseString.length()) {
    874         // JavaScript may cause the parser to detach during parseChunk
    875         // keep this alive until this function is done.
    876         RefPtrWillBeRawPtr<XMLDocumentParser> protect(this);
    877 
    878         XMLDocumentParserScope scope(document()->fetcher());
    879         TemporaryChange<bool> encodingScope(m_isCurrentlyParsing8BitChunk, parseString.is8Bit());
    880         parseChunk(context->context(), parseString);
    881 
    882         // JavaScript (which may be run under the parseChunk callstack) may
    883         // cause the parser to be stopped or detached.
    884         if (isStopped())
    885             return;
    886     }
    887 
    888     // FIXME: Why is this here? And why is it after we process the passed
    889     // source?
    890     if (document()->sawDecodingError()) {
    891         // If the decoder saw an error, report it as fatal (stops parsing)
    892         TextPosition position(OrdinalNumber::fromOneBasedInt(context->context()->input->line), OrdinalNumber::fromOneBasedInt(context->context()->input->col));
    893         handleError(XMLErrors::ErrorTypeFatal, "Encoding error", position);
    894     }
    895 }
    896 
    897 struct xmlSAX2Namespace {
    898     const xmlChar* prefix;
    899     const xmlChar* uri;
    900 };
    901 
    902 static inline void handleNamespaceAttributes(Vector<Attribute>& prefixedAttributes, const xmlChar** libxmlNamespaces, int nbNamespaces, ExceptionState& exceptionState)
    903 {
    904     xmlSAX2Namespace* namespaces = reinterpret_cast<xmlSAX2Namespace*>(libxmlNamespaces);
    905     for (int i = 0; i < nbNamespaces; ++i) {
    906         AtomicString namespaceQName = xmlnsAtom;
    907         AtomicString namespaceURI = toAtomicString(namespaces[i].uri);
    908         if (namespaces[i].prefix)
    909             namespaceQName = "xmlns:" + toString(namespaces[i].prefix);
    910 
    911         QualifiedName parsedName = anyName;
    912         if (!Element::parseAttributeName(parsedName, XMLNSNames::xmlnsNamespaceURI, namespaceQName, exceptionState))
    913             return;
    914 
    915         prefixedAttributes.append(Attribute(parsedName, namespaceURI));
    916     }
    917 }
    918 
    919 struct xmlSAX2Attributes {
    920     const xmlChar* localname;
    921     const xmlChar* prefix;
    922     const xmlChar* uri;
    923     const xmlChar* value;
    924     const xmlChar* end;
    925 };
    926 
    927 static inline void handleElementAttributes(Vector<Attribute>& prefixedAttributes, const xmlChar** libxmlAttributes, int nbAttributes, ExceptionState& exceptionState)
    928 {
    929     xmlSAX2Attributes* attributes = reinterpret_cast<xmlSAX2Attributes*>(libxmlAttributes);
    930     for (int i = 0; i < nbAttributes; ++i) {
    931         int valueLength = static_cast<int>(attributes[i].end - attributes[i].value);
    932         AtomicString attrValue = toAtomicString(attributes[i].value, valueLength);
    933         String attrPrefix = toString(attributes[i].prefix);
    934         AtomicString attrURI = attrPrefix.isEmpty() ? AtomicString() : toAtomicString(attributes[i].uri);
    935         AtomicString attrQName = attrPrefix.isEmpty() ? toAtomicString(attributes[i].localname) : attrPrefix + ":" + toString(attributes[i].localname);
    936 
    937         QualifiedName parsedName = anyName;
    938         if (!Element::parseAttributeName(parsedName, attrURI, attrQName, exceptionState))
    939             return;
    940 
    941         prefixedAttributes.append(Attribute(parsedName, attrValue));
    942     }
    943 }
    944 
    945 void XMLDocumentParser::startElementNs(const AtomicString& localName, const AtomicString& prefix, const AtomicString& uri, int nbNamespaces,
    946     const xmlChar** libxmlNamespaces, int nbAttributes, int nbDefaulted, const xmlChar** libxmlAttributes)
    947 {
    948     if (isStopped())
    949         return;
    950 
    951     if (m_parserPaused) {
    952         m_pendingCallbacks.append(adoptPtr(new PendingStartElementNSCallback(localName, prefix, uri, nbNamespaces, libxmlNamespaces,
    953             nbAttributes, nbDefaulted, libxmlAttributes)));
    954         return;
    955     }
    956 
    957     exitText();
    958 
    959     AtomicString adjustedURI = uri;
    960     if (m_parsingFragment && adjustedURI.isNull()) {
    961         if (!prefix.isNull())
    962             adjustedURI = m_prefixToNamespaceMap.get(prefix);
    963         else
    964             adjustedURI = m_defaultNamespaceURI;
    965     }
    966 
    967     bool isFirstElement = !m_sawFirstElement;
    968     m_sawFirstElement = true;
    969 
    970     QualifiedName qName(prefix, localName, adjustedURI);
    971     RefPtrWillBeRawPtr<Element> newElement = m_currentNode->document().createElement(qName, true);
    972     if (!newElement) {
    973         stopParsing();
    974         return;
    975     }
    976 
    977     Vector<Attribute> prefixedAttributes;
    978     TrackExceptionState exceptionState;
    979     handleNamespaceAttributes(prefixedAttributes, libxmlNamespaces, nbNamespaces, exceptionState);
    980     if (exceptionState.hadException()) {
    981         setAttributes(newElement.get(), prefixedAttributes, parserContentPolicy());
    982         stopParsing();
    983         return;
    984     }
    985 
    986     handleElementAttributes(prefixedAttributes, libxmlAttributes, nbAttributes, exceptionState);
    987     setAttributes(newElement.get(), prefixedAttributes, parserContentPolicy());
    988     if (exceptionState.hadException()) {
    989         stopParsing();
    990         return;
    991     }
    992 
    993     newElement->beginParsingChildren();
    994 
    995     ScriptLoader* scriptLoader = toScriptLoaderIfPossible(newElement.get());
    996     if (scriptLoader)
    997         m_scriptStartPosition = textPosition();
    998 
    999     m_currentNode->parserAppendChild(newElement.get());
   1000 
   1001     if (isHTMLTemplateElement(*newElement))
   1002         pushCurrentNode(toHTMLTemplateElement(*newElement).content());
   1003     else
   1004         pushCurrentNode(newElement.get());
   1005 
   1006     if (isHTMLHtmlElement(*newElement))
   1007         toHTMLHtmlElement(*newElement).insertedByParser();
   1008 
   1009     if (!m_parsingFragment && isFirstElement && document()->frame())
   1010         document()->frame()->loader().dispatchDocumentElementAvailable();
   1011 }
   1012 
   1013 void XMLDocumentParser::endElementNs()
   1014 {
   1015     if (isStopped())
   1016         return;
   1017 
   1018     if (m_parserPaused) {
   1019         m_pendingCallbacks.append(adoptPtr(new PendingEndElementNSCallback()));
   1020         return;
   1021     }
   1022 
   1023     // JavaScript can detach the parser. Make sure this is not released before
   1024     // the end of this method.
   1025     RefPtrWillBeRawPtr<XMLDocumentParser> protect(this);
   1026 
   1027     exitText();
   1028 
   1029     RefPtrWillBeRawPtr<ContainerNode> n = m_currentNode;
   1030     if (m_currentNode->isElementNode())
   1031         toElement(n.get())->finishParsingChildren();
   1032 
   1033     if (!scriptingContentIsAllowed(parserContentPolicy()) && n->isElementNode() && toScriptLoaderIfPossible(toElement(n))) {
   1034         popCurrentNode();
   1035         n->remove(IGNORE_EXCEPTION);
   1036         return;
   1037     }
   1038 
   1039     if (!n->isElementNode() || !m_hasView) {
   1040         popCurrentNode();
   1041         return;
   1042     }
   1043 
   1044     Element* element = toElement(n);
   1045 
   1046     // The element's parent may have already been removed from document.
   1047     // Parsing continues in this case, but scripts aren't executed.
   1048     if (!element->inDocument()) {
   1049         popCurrentNode();
   1050         return;
   1051     }
   1052 
   1053     ScriptLoader* scriptLoader = toScriptLoaderIfPossible(element);
   1054     if (!scriptLoader) {
   1055         popCurrentNode();
   1056         return;
   1057     }
   1058 
   1059     // Don't load external scripts for standalone documents (for now).
   1060     ASSERT(!m_pendingScript);
   1061     m_requestingScript = true;
   1062 
   1063     if (scriptLoader->prepareScript(m_scriptStartPosition, ScriptLoader::AllowLegacyTypeInTypeAttribute)) {
   1064         // FIXME: Script execution should be shared between
   1065         // the libxml2 and Qt XMLDocumentParser implementations.
   1066 
   1067         if (scriptLoader->readyToBeParserExecuted()) {
   1068             scriptLoader->executeScript(ScriptSourceCode(scriptLoader->scriptContent(), document()->url(), m_scriptStartPosition));
   1069         } else if (scriptLoader->willBeParserExecuted()) {
   1070             m_pendingScript = scriptLoader->resource();
   1071             m_scriptElement = element;
   1072             m_pendingScript->addClient(this);
   1073 
   1074             // m_pendingScript will be 0 if script was already loaded and
   1075             // addClient() executed it.
   1076             if (m_pendingScript)
   1077                 pauseParsing();
   1078         } else {
   1079             m_scriptElement = nullptr;
   1080         }
   1081 
   1082         // JavaScript may have detached the parser
   1083         if (isDetached())
   1084             return;
   1085     }
   1086     m_requestingScript = false;
   1087     popCurrentNode();
   1088 }
   1089 
   1090 void XMLDocumentParser::characters(const xmlChar* chars, int length)
   1091 {
   1092     if (isStopped())
   1093         return;
   1094 
   1095     if (m_parserPaused) {
   1096         m_pendingCallbacks.append(adoptPtr(new PendingCharactersCallback(chars, length)));
   1097         return;
   1098     }
   1099 
   1100     if (!m_leafTextNode)
   1101         enterText();
   1102     m_bufferedText.append(chars, length);
   1103 }
   1104 
   1105 void XMLDocumentParser::error(XMLErrors::ErrorType type, const char* message, va_list args)
   1106 {
   1107     if (isStopped())
   1108         return;
   1109 
   1110     char formattedMessage[1024];
   1111     vsnprintf(formattedMessage, sizeof(formattedMessage) - 1, message, args);
   1112 
   1113     if (m_parserPaused) {
   1114         m_pendingCallbacks.append(adoptPtr(new PendingErrorCallback(type, reinterpret_cast<const xmlChar*>(formattedMessage), lineNumber(), columnNumber())));
   1115         return;
   1116     }
   1117 
   1118     handleError(type, formattedMessage, textPosition());
   1119 }
   1120 
   1121 void XMLDocumentParser::processingInstruction(const String& target, const String& data)
   1122 {
   1123     if (isStopped())
   1124         return;
   1125 
   1126     if (m_parserPaused) {
   1127         m_pendingCallbacks.append(adoptPtr(new PendingProcessingInstructionCallback(target, data)));
   1128         return;
   1129     }
   1130 
   1131     exitText();
   1132 
   1133     // ### handle exceptions
   1134     TrackExceptionState exceptionState;
   1135     RefPtrWillBeRawPtr<ProcessingInstruction> pi = m_currentNode->document().createProcessingInstruction(target, data, exceptionState);
   1136     if (exceptionState.hadException())
   1137         return;
   1138 
   1139     pi->setCreatedByParser(true);
   1140 
   1141     m_currentNode->parserAppendChild(pi.get());
   1142 
   1143     pi->setCreatedByParser(false);
   1144 
   1145     if (pi->isCSS())
   1146         m_sawCSS = true;
   1147 
   1148     if (!RuntimeEnabledFeatures::xsltEnabled())
   1149         return;
   1150 
   1151     m_sawXSLTransform = !m_sawFirstElement && pi->isXSL();
   1152     if (m_sawXSLTransform && !document()->transformSourceDocument()) {
   1153         // This behavior is very tricky. We call stopParsing() here because we
   1154         // want to stop processing the document until we're ready to apply the
   1155         // transform, but we actually still want to be fed decoded string pieces
   1156         // to accumulate in m_originalSourceForTransform. So, we call
   1157         // stopParsing() here and check isStopped() in element callbacks.
   1158         // FIXME: This contradicts the contract of DocumentParser.
   1159         stopParsing();
   1160     }
   1161 }
   1162 
   1163 void XMLDocumentParser::cdataBlock(const String& text)
   1164 {
   1165     if (isStopped())
   1166         return;
   1167 
   1168     if (m_parserPaused) {
   1169         m_pendingCallbacks.append(adoptPtr(new PendingCDATABlockCallback(text)));
   1170         return;
   1171     }
   1172 
   1173     exitText();
   1174 
   1175     m_currentNode->parserAppendChild(CDATASection::create(m_currentNode->document(), text));
   1176 }
   1177 
   1178 void XMLDocumentParser::comment(const String& text)
   1179 {
   1180     if (isStopped())
   1181         return;
   1182 
   1183     if (m_parserPaused) {
   1184         m_pendingCallbacks.append(adoptPtr(new PendingCommentCallback(text)));
   1185         return;
   1186     }
   1187 
   1188     exitText();
   1189 
   1190     m_currentNode->parserAppendChild(Comment::create(m_currentNode->document(), text));
   1191 }
   1192 
   1193 enum StandaloneInfo {
   1194     StandaloneUnspecified = -2,
   1195     NoXMlDeclaration,
   1196     StandaloneNo,
   1197     StandaloneYes
   1198 };
   1199 
   1200 void XMLDocumentParser::startDocument(const String& version, const String& encoding, int standalone)
   1201 {
   1202     StandaloneInfo standaloneInfo = static_cast<StandaloneInfo>(standalone);
   1203     if (standaloneInfo == NoXMlDeclaration) {
   1204         document()->setHasXMLDeclaration(false);
   1205         return;
   1206     }
   1207 
   1208     if (!version.isNull())
   1209         document()->setXMLVersion(version, ASSERT_NO_EXCEPTION);
   1210     if (standalone != StandaloneUnspecified)
   1211         document()->setXMLStandalone(standaloneInfo == StandaloneYes, ASSERT_NO_EXCEPTION);
   1212     if (!encoding.isNull())
   1213         document()->setXMLEncoding(encoding);
   1214     document()->setHasXMLDeclaration(true);
   1215 }
   1216 
   1217 void XMLDocumentParser::endDocument()
   1218 {
   1219     exitText();
   1220 }
   1221 
   1222 void XMLDocumentParser::internalSubset(const String& name, const String& externalID, const String& systemID)
   1223 {
   1224     if (isStopped())
   1225         return;
   1226 
   1227     if (m_parserPaused) {
   1228         m_pendingCallbacks.append(adoptPtr(new PendingInternalSubsetCallback(name, externalID, systemID)));
   1229         return;
   1230     }
   1231 
   1232     if (document())
   1233         document()->parserAppendChild(DocumentType::create(document(), name, externalID, systemID));
   1234 }
   1235 
   1236 static inline XMLDocumentParser* getParser(void* closure)
   1237 {
   1238     xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure);
   1239     return static_cast<XMLDocumentParser*>(ctxt->_private);
   1240 }
   1241 
   1242 static void startElementNsHandler(void* closure, const xmlChar* localName, const xmlChar* prefix, const xmlChar* uri, int nbNamespaces, const xmlChar** namespaces, int nbAttributes, int nbDefaulted, const xmlChar** libxmlAttributes)
   1243 {
   1244     getParser(closure)->startElementNs(toAtomicString(localName), toAtomicString(prefix), toAtomicString(uri), nbNamespaces, namespaces, nbAttributes, nbDefaulted, libxmlAttributes);
   1245 }
   1246 
   1247 static void endElementNsHandler(void* closure, const xmlChar*, const xmlChar*, const xmlChar*)
   1248 {
   1249     getParser(closure)->endElementNs();
   1250 }
   1251 
   1252 static void charactersHandler(void* closure, const xmlChar* chars, int length)
   1253 {
   1254     getParser(closure)->characters(chars, length);
   1255 }
   1256 
   1257 static void processingInstructionHandler(void* closure, const xmlChar* target, const xmlChar* data)
   1258 {
   1259     getParser(closure)->processingInstruction(toString(target), toString(data));
   1260 }
   1261 
   1262 static void cdataBlockHandler(void* closure, const xmlChar* text, int length)
   1263 {
   1264     getParser(closure)->cdataBlock(toString(text, length));
   1265 }
   1266 
   1267 static void commentHandler(void* closure, const xmlChar* text)
   1268 {
   1269     getParser(closure)->comment(toString(text));
   1270 }
   1271 
   1272 WTF_ATTRIBUTE_PRINTF(2, 3)
   1273 static void warningHandler(void* closure, const char* message, ...)
   1274 {
   1275     va_list args;
   1276     va_start(args, message);
   1277     getParser(closure)->error(XMLErrors::ErrorTypeWarning, message, args);
   1278     va_end(args);
   1279 }
   1280 
   1281 WTF_ATTRIBUTE_PRINTF(2, 3)
   1282 static void fatalErrorHandler(void* closure, const char* message, ...)
   1283 {
   1284     va_list args;
   1285     va_start(args, message);
   1286     getParser(closure)->error(XMLErrors::ErrorTypeFatal, message, args);
   1287     va_end(args);
   1288 }
   1289 
   1290 WTF_ATTRIBUTE_PRINTF(2, 3)
   1291 static void normalErrorHandler(void* closure, const char* message, ...)
   1292 {
   1293     va_list args;
   1294     va_start(args, message);
   1295     getParser(closure)->error(XMLErrors::ErrorTypeNonFatal, message, args);
   1296     va_end(args);
   1297 }
   1298 
   1299 // Using a static entity and marking it XML_INTERNAL_PREDEFINED_ENTITY is a hack
   1300 // to avoid malloc/free. Using a global variable like this could cause trouble
   1301 // if libxml implementation details were to change
   1302 static xmlChar sharedXHTMLEntityResult[9] = {0, 0, 0, 0, 0, 0, 0, 0, 0};
   1303 
   1304 static xmlEntityPtr sharedXHTMLEntity()
   1305 {
   1306     static xmlEntity entity;
   1307     if (!entity.type) {
   1308         entity.type = XML_ENTITY_DECL;
   1309         entity.orig = sharedXHTMLEntityResult;
   1310         entity.content = sharedXHTMLEntityResult;
   1311         entity.etype = XML_INTERNAL_PREDEFINED_ENTITY;
   1312     }
   1313     return &entity;
   1314 }
   1315 
   1316 static size_t convertUTF16EntityToUTF8(const UChar* utf16Entity, size_t numberOfCodeUnits, char* target, size_t targetSize)
   1317 {
   1318     const char* originalTarget = target;
   1319     WTF::Unicode::ConversionResult conversionResult = WTF::Unicode::convertUTF16ToUTF8(&utf16Entity,
   1320         utf16Entity + numberOfCodeUnits, &target, target + targetSize);
   1321     if (conversionResult != WTF::Unicode::conversionOK)
   1322         return 0;
   1323 
   1324     // Even though we must pass the length, libxml expects the entity string to be null terminated.
   1325     ASSERT(target > originalTarget + 1);
   1326     *target = '\0';
   1327     return target - originalTarget;
   1328 }
   1329 
   1330 static xmlEntityPtr getXHTMLEntity(const xmlChar* name)
   1331 {
   1332     UChar utf16DecodedEntity[4];
   1333     size_t numberOfCodeUnits = decodeNamedEntityToUCharArray(reinterpret_cast<const char*>(name), utf16DecodedEntity);
   1334     if (!numberOfCodeUnits)
   1335         return 0;
   1336 
   1337     ASSERT(numberOfCodeUnits <= 4);
   1338     size_t entityLengthInUTF8 = convertUTF16EntityToUTF8(utf16DecodedEntity, numberOfCodeUnits,
   1339         reinterpret_cast<char*>(sharedXHTMLEntityResult), WTF_ARRAY_LENGTH(sharedXHTMLEntityResult));
   1340     if (!entityLengthInUTF8)
   1341         return 0;
   1342 
   1343     xmlEntityPtr entity = sharedXHTMLEntity();
   1344     entity->length = entityLengthInUTF8;
   1345     entity->name = name;
   1346     return entity;
   1347 }
   1348 
   1349 static xmlEntityPtr getEntityHandler(void* closure, const xmlChar* name)
   1350 {
   1351     xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure);
   1352     xmlEntityPtr ent = xmlGetPredefinedEntity(name);
   1353     if (ent) {
   1354         ent->etype = XML_INTERNAL_PREDEFINED_ENTITY;
   1355         return ent;
   1356     }
   1357 
   1358     ent = xmlGetDocEntity(ctxt->myDoc, name);
   1359     if (!ent && getParser(closure)->isXHTMLDocument()) {
   1360         ent = getXHTMLEntity(name);
   1361         if (ent)
   1362             ent->etype = XML_INTERNAL_GENERAL_ENTITY;
   1363     }
   1364 
   1365     return ent;
   1366 }
   1367 
   1368 static void startDocumentHandler(void* closure)
   1369 {
   1370     xmlParserCtxt* ctxt = static_cast<xmlParserCtxt*>(closure);
   1371     XMLDocumentParser* parser = getParser(closure);
   1372     switchEncoding(ctxt, parser->isCurrentlyParsing8BitChunk());
   1373     parser->startDocument(toString(ctxt->version), toString(ctxt->encoding), ctxt->standalone);
   1374     xmlSAX2StartDocument(closure);
   1375 }
   1376 
   1377 static void endDocumentHandler(void* closure)
   1378 {
   1379     getParser(closure)->endDocument();
   1380     xmlSAX2EndDocument(closure);
   1381 }
   1382 
   1383 static void internalSubsetHandler(void* closure, const xmlChar* name, const xmlChar* externalID, const xmlChar* systemID)
   1384 {
   1385     getParser(closure)->internalSubset(toString(name), toString(externalID), toString(systemID));
   1386     xmlSAX2InternalSubset(closure, name, externalID, systemID);
   1387 }
   1388 
   1389 static void externalSubsetHandler(void* closure, const xmlChar*, const xmlChar* externalId, const xmlChar*)
   1390 {
   1391     String extId = toString(externalId);
   1392     if (extId == "-//W3C//DTD XHTML 1.0 Transitional//EN"
   1393         || extId == "-//W3C//DTD XHTML 1.1//EN"
   1394         || extId == "-//W3C//DTD XHTML 1.0 Strict//EN"
   1395         || extId == "-//W3C//DTD XHTML 1.0 Frameset//EN"
   1396         || extId == "-//W3C//DTD XHTML Basic 1.0//EN"
   1397         || extId == "-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN"
   1398         || extId == "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN"
   1399         || extId == "-//WAPFORUM//DTD XHTML Mobile 1.0//EN"
   1400         || extId == "-//WAPFORUM//DTD XHTML Mobile 1.1//EN"
   1401         || extId == "-//WAPFORUM//DTD XHTML Mobile 1.2//EN") {
   1402         // Controls if we replace entities or not.
   1403         getParser(closure)->setIsXHTMLDocument(true);
   1404     }
   1405 }
   1406 
   1407 static void ignorableWhitespaceHandler(void*, const xmlChar*, int)
   1408 {
   1409     // Nothing to do, but we need this to work around a crasher.
   1410     // http://bugzilla.gnome.org/show_bug.cgi?id=172255
   1411     // http://bugs.webkit.org/show_bug.cgi?id=5792
   1412 }
   1413 
   1414 void XMLDocumentParser::initializeParserContext(const CString& chunk)
   1415 {
   1416     xmlSAXHandler sax;
   1417     memset(&sax, 0, sizeof(sax));
   1418 
   1419     sax.error = normalErrorHandler;
   1420     sax.fatalError = fatalErrorHandler;
   1421     sax.characters = charactersHandler;
   1422     sax.processingInstruction = processingInstructionHandler;
   1423     sax.cdataBlock = cdataBlockHandler;
   1424     sax.comment = commentHandler;
   1425     sax.warning = warningHandler;
   1426     sax.startElementNs = startElementNsHandler;
   1427     sax.endElementNs = endElementNsHandler;
   1428     sax.getEntity = getEntityHandler;
   1429     sax.startDocument = startDocumentHandler;
   1430     sax.endDocument = endDocumentHandler;
   1431     sax.internalSubset = internalSubsetHandler;
   1432     sax.externalSubset = externalSubsetHandler;
   1433     sax.ignorableWhitespace = ignorableWhitespaceHandler;
   1434     sax.entityDecl = xmlSAX2EntityDecl;
   1435     sax.initialized = XML_SAX2_MAGIC;
   1436     DocumentParser::startParsing();
   1437     m_sawError = false;
   1438     m_sawCSS = false;
   1439     m_sawXSLTransform = false;
   1440     m_sawFirstElement = false;
   1441 
   1442     XMLDocumentParserScope scope(document()->fetcher());
   1443     if (m_parsingFragment) {
   1444         m_context = XMLParserContext::createMemoryParser(&sax, this, chunk);
   1445     } else {
   1446         ASSERT(!chunk.data());
   1447         m_context = XMLParserContext::createStringParser(&sax, this);
   1448     }
   1449 }
   1450 
   1451 void XMLDocumentParser::doEnd()
   1452 {
   1453     if (!isStopped()) {
   1454         if (m_context) {
   1455             // Tell libxml we're done.
   1456             {
   1457                 XMLDocumentParserScope scope(document()->fetcher());
   1458                 finishParsing(context());
   1459             }
   1460 
   1461             m_context = nullptr;
   1462         }
   1463     }
   1464 
   1465     bool xmlViewerMode = !m_sawError && !m_sawCSS && !m_sawXSLTransform && hasNoStyleInformation(document());
   1466     if (xmlViewerMode) {
   1467         XMLTreeViewer xmlTreeViewer(document());
   1468         xmlTreeViewer.transformDocumentToTreeView();
   1469     } else if (m_sawXSLTransform) {
   1470         xmlDocPtr doc = xmlDocPtrForString(document()->fetcher(), m_originalSourceForTransform.toString(), document()->url().string());
   1471         document()->setTransformSource(adoptPtr(new TransformSource(doc)));
   1472         // Make the document think it's done, so it will apply XSL stylesheets.
   1473         document()->setParsing(false);
   1474         document()->styleResolverChanged();
   1475 
   1476         // styleResolverChanged() call can detach the parser and null out its
   1477         // document. In that case, we just bail out.
   1478         if (isDetached())
   1479             return;
   1480 
   1481         document()->setParsing(true);
   1482         DocumentParser::stopParsing();
   1483     }
   1484 }
   1485 
   1486 xmlDocPtr xmlDocPtrForString(ResourceFetcher* fetcher, const String& source, const String& url)
   1487 {
   1488     if (source.isEmpty())
   1489         return 0;
   1490     // Parse in a single chunk into an xmlDocPtr
   1491     // FIXME: Hook up error handlers so that a failure to parse the main
   1492     // document results in good error messages.
   1493     XMLDocumentParserScope scope(fetcher, errorFunc, 0);
   1494     XMLParserInput input(source);
   1495     return xmlReadMemory(input.data(), input.size(), url.latin1().data(), input.encoding(), XSLT_PARSE_OPTIONS);
   1496 }
   1497 
   1498 OrdinalNumber XMLDocumentParser::lineNumber() const
   1499 {
   1500     return OrdinalNumber::fromOneBasedInt(context() ? context()->input->line : 1);
   1501 }
   1502 
   1503 OrdinalNumber XMLDocumentParser::columnNumber() const
   1504 {
   1505     return OrdinalNumber::fromOneBasedInt(context() ? context()->input->col : 1);
   1506 }
   1507 
   1508 TextPosition XMLDocumentParser::textPosition() const
   1509 {
   1510     xmlParserCtxtPtr context = this->context();
   1511     if (!context)
   1512         return TextPosition::minimumPosition();
   1513     return TextPosition(OrdinalNumber::fromOneBasedInt(context->input->line), OrdinalNumber::fromOneBasedInt(context->input->col));
   1514 }
   1515 
   1516 void XMLDocumentParser::stopParsing()
   1517 {
   1518     DocumentParser::stopParsing();
   1519     if (context())
   1520         xmlStopParser(context());
   1521 }
   1522 
   1523 void XMLDocumentParser::resumeParsing()
   1524 {
   1525     ASSERT(!isDetached());
   1526     ASSERT(m_parserPaused);
   1527 
   1528     m_parserPaused = false;
   1529 
   1530     // First, execute any pending callbacks
   1531     while (!m_pendingCallbacks.isEmpty()) {
   1532         OwnPtr<PendingCallback> callback = m_pendingCallbacks.takeFirst();
   1533         callback->call(this);
   1534 
   1535         // A callback paused the parser
   1536         if (m_parserPaused)
   1537             return;
   1538     }
   1539 
   1540     // Then, write any pending data
   1541     SegmentedString rest = m_pendingSrc;
   1542     m_pendingSrc.clear();
   1543     // There is normally only one string left, so toString() shouldn't copy.
   1544     // In any case, the XML parser runs on the main thread and it's OK if
   1545     // the passed string has more than one reference.
   1546     append(rest.toString().impl());
   1547 
   1548     // Finally, if finish() has been called and write() didn't result
   1549     // in any further callbacks being queued, call end()
   1550     if (m_finishCalled && m_pendingCallbacks.isEmpty())
   1551         end();
   1552 }
   1553 
   1554 bool XMLDocumentParser::appendFragmentSource(const String& chunk)
   1555 {
   1556     ASSERT(!m_context);
   1557     ASSERT(m_parsingFragment);
   1558 
   1559     CString chunkAsUtf8 = chunk.utf8();
   1560 
   1561     // libxml2 takes an int for a length, and therefore can't handle XML chunks
   1562     // larger than 2 GiB.
   1563     if (chunkAsUtf8.length() > INT_MAX)
   1564         return false;
   1565 
   1566     initializeParserContext(chunkAsUtf8);
   1567     xmlParseContent(context());
   1568     endDocument(); // Close any open text nodes.
   1569 
   1570     // FIXME: If this code is actually needed, it should probably move to
   1571     // finish()
   1572     // XMLDocumentParserQt has a similar check (m_stream.error() ==
   1573     // QXmlStreamReader::PrematureEndOfDocumentError) in doEnd(). Check if all
   1574     // the chunk has been processed.
   1575     long bytesProcessed = xmlByteConsumed(context());
   1576     if (bytesProcessed == -1 || static_cast<unsigned long>(bytesProcessed) != chunkAsUtf8.length()) {
   1577         // FIXME: I don't believe we can hit this case without also having seen
   1578         // an error or a null byte. If we hit this ASSERT, we've found a test
   1579         // case which demonstrates the need for this code.
   1580         ASSERT(m_sawError || (bytesProcessed >= 0 && !chunkAsUtf8.data()[bytesProcessed]));
   1581         return false;
   1582     }
   1583 
   1584     // No error if the chunk is well formed or it is not but we have no error.
   1585     return context()->wellFormed || !xmlCtxtGetLastError(context());
   1586 }
   1587 
   1588 // --------------------------------
   1589 
   1590 struct AttributeParseState {
   1591     HashMap<String, String> attributes;
   1592     bool gotAttributes;
   1593 };
   1594 
   1595 static void attributesStartElementNsHandler(void* closure, const xmlChar* xmlLocalName, const xmlChar* /*xmlPrefix*/,
   1596     const xmlChar* /*xmlURI*/, int /*nbNamespaces*/, const xmlChar** /*namespaces*/,
   1597     int nbAttributes, int /*nbDefaulted*/, const xmlChar** libxmlAttributes)
   1598 {
   1599     if (strcmp(reinterpret_cast<const char*>(xmlLocalName), "attrs") != 0)
   1600         return;
   1601 
   1602     xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure);
   1603     AttributeParseState* state = static_cast<AttributeParseState*>(ctxt->_private);
   1604 
   1605     state->gotAttributes = true;
   1606 
   1607     xmlSAX2Attributes* attributes = reinterpret_cast<xmlSAX2Attributes*>(libxmlAttributes);
   1608     for (int i = 0; i < nbAttributes; ++i) {
   1609         String attrLocalName = toString(attributes[i].localname);
   1610         int valueLength = (int) (attributes[i].end - attributes[i].value);
   1611         String attrValue = toString(attributes[i].value, valueLength);
   1612         String attrPrefix = toString(attributes[i].prefix);
   1613         String attrQName = attrPrefix.isEmpty() ? attrLocalName : attrPrefix + ":" + attrLocalName;
   1614 
   1615         state->attributes.set(attrQName, attrValue);
   1616     }
   1617 }
   1618 
   1619 HashMap<String, String> parseAttributes(const String& string, bool& attrsOK)
   1620 {
   1621     AttributeParseState state;
   1622     state.gotAttributes = false;
   1623 
   1624     xmlSAXHandler sax;
   1625     memset(&sax, 0, sizeof(sax));
   1626     sax.startElementNs = attributesStartElementNsHandler;
   1627     sax.initialized = XML_SAX2_MAGIC;
   1628     RefPtr<XMLParserContext> parser = XMLParserContext::createStringParser(&sax, &state);
   1629     String parseString = "<?xml version=\"1.0\"?><attrs " + string + " />";
   1630     parseChunk(parser->context(), parseString);
   1631     finishParsing(parser->context());
   1632     attrsOK = state.gotAttributes;
   1633     return state.attributes;
   1634 }
   1635 
   1636 } // namespace WebCore
   1637