Home | History | Annotate | Download | only in parser
      1 /*
      2  * Copyright (C) 2000 Peter Kelly (pmk (at) post.com)
      3  * Copyright (C) 2005, 2006, 2008 Apple Inc. All rights reserved.
      4  * Copyright (C) 2006 Alexey Proskuryakov (ap (at) webkit.org)
      5  * Copyright (C) 2007 Samuel Weinig (sam (at) webkit.org)
      6  * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies)
      7  * Copyright (C) 2008 Holger Hans Peter Freyther
      8  * Copyright (C) 2008 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/)
      9  *
     10  * This library is free software; you can redistribute it and/or
     11  * modify it under the terms of the GNU Library General Public
     12  * License as published by the Free Software Foundation; either
     13  * version 2 of the License, or (at your option) any later version.
     14  *
     15  * This library is distributed in the hope that it will be useful,
     16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     18  * Library General Public License for more details.
     19  *
     20  * You should have received a copy of the GNU Library General Public License
     21  * along with this library; see the file COPYING.LIB.  If not, write to
     22  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     23  * Boston, MA 02110-1301, USA.
     24  */
     25 
     26 #include "config.h"
     27 #include "core/xml/parser/XMLDocumentParser.h"
     28 
     29 #include <libxml/parser.h>
     30 #include <libxml/parserInternals.h>
     31 #include <libxslt/xslt.h>
     32 #include <wtf/StringExtras.h>
     33 #include <wtf/text/CString.h>
     34 #include <wtf/Threading.h>
     35 #include <wtf/unicode/UTF8.h>
     36 #include <wtf/UnusedParam.h>
     37 #include <wtf/Vector.h>
     38 #include "HTMLNames.h"
     39 #include "XMLNSNames.h"
     40 #include "bindings/v8/ExceptionState.h"
     41 #include "bindings/v8/ExceptionStatePlaceholder.h"
     42 #include "bindings/v8/ScriptController.h"
     43 #include "bindings/v8/ScriptSourceCode.h"
     44 #include "core/dom/CDATASection.h"
     45 #include "core/dom/Comment.h"
     46 #include "core/dom/Document.h"
     47 #include "core/dom/DocumentFragment.h"
     48 #include "core/dom/DocumentType.h"
     49 #include "core/dom/ProcessingInstruction.h"
     50 #include "core/dom/ScriptLoader.h"
     51 #include "core/dom/TransformSource.h"
     52 #include "core/html/HTMLHtmlElement.h"
     53 #include "core/html/HTMLTemplateElement.h"
     54 #include "core/html/parser/HTMLEntityParser.h"
     55 #include "core/loader/FrameLoader.h"
     56 #include "core/loader/ImageLoader.h"
     57 #include "core/loader/TextResourceDecoder.h"
     58 #include "core/loader/cache/ResourceFetcher.h"
     59 #include "core/loader/cache/ScriptResource.h"
     60 #include "core/page/Frame.h"
     61 #include "core/page/UseCounter.h"
     62 #include "core/platform/network/ResourceError.h"
     63 #include "core/platform/network/ResourceRequest.h"
     64 #include "core/platform/network/ResourceResponse.h"
     65 #include "core/xml/XMLErrors.h"
     66 #include "core/xml/XMLTreeViewer.h"
     67 #include "core/xml/parser/XMLDocumentParserScope.h"
     68 #include "core/xml/parser/XMLParserInput.h"
     69 #include "weborigin/SecurityOrigin.h"
     70 #include "wtf/TemporaryChange.h"
     71 
     72 using namespace std;
     73 
     74 namespace WebCore {
     75 
     76 using namespace HTMLNames;
     77 
     78 // FIXME: HTMLConstructionSite has a limit of 512, should these match?
     79 static const unsigned maxXMLTreeDepth = 5000;
     80 
     81 static inline String toString(const xmlChar* string, size_t length)
     82 {
     83     return String::fromUTF8(reinterpret_cast<const char*>(string), length);
     84 }
     85 
     86 static inline String toString(const xmlChar* string)
     87 {
     88     return String::fromUTF8(reinterpret_cast<const char*>(string));
     89 }
     90 
     91 static inline AtomicString toAtomicString(const xmlChar* string, size_t length)
     92 {
     93     return AtomicString::fromUTF8(reinterpret_cast<const char*>(string), length);
     94 }
     95 
     96 static inline AtomicString toAtomicString(const xmlChar* string)
     97 {
     98     return AtomicString::fromUTF8(reinterpret_cast<const char*>(string));
     99 }
    100 
    101 static inline bool hasNoStyleInformation(Document* document)
    102 {
    103     if (document->sawElementsInKnownNamespaces() || document->transformSourceDocument())
    104         return false;
    105 
    106     if (!document->frame() || !document->frame()->page())
    107         return false;
    108 
    109     if (document->frame()->tree()->parent())
    110         return false; // This document is not in a top frame
    111 
    112     return true;
    113 }
    114 
    115 class PendingStartElementNSCallback FINAL : public XMLDocumentParser::PendingCallback {
    116 public:
    117     PendingStartElementNSCallback(const AtomicString& localName, const AtomicString& prefix, const AtomicString& uri,
    118         int namespaceCount, const xmlChar** namespaces, int attributeCount, int defaultedCount, const xmlChar** attributes)
    119         : m_localName(localName)
    120         , m_prefix(prefix)
    121         , m_uri(uri)
    122         , m_namespaceCount(namespaceCount)
    123         , m_attributeCount(attributeCount)
    124         , m_defaultedCount(defaultedCount)
    125     {
    126         m_namespaces = static_cast<xmlChar**>(xmlMalloc(sizeof(xmlChar*) * namespaceCount * 2));
    127         for (int i = 0; i < namespaceCount * 2 ; i++)
    128             m_namespaces[i] = xmlStrdup(namespaces[i]);
    129         m_attributes = static_cast<xmlChar**>(xmlMalloc(sizeof(xmlChar*) * attributeCount * 5));
    130         for (int i = 0; i < attributeCount; i++) {
    131             // Each attribute has 5 elements in the array:
    132             // name, prefix, uri, value and an end pointer.
    133             for (int j = 0; j < 3; j++)
    134                 m_attributes[i * 5 + j] = xmlStrdup(attributes[i * 5 + j]);
    135             int length = attributes[i * 5 + 4] - attributes[i * 5 + 3];
    136             m_attributes[i * 5 + 3] = xmlStrndup(attributes[i * 5 + 3], length);
    137             m_attributes[i * 5 + 4] = m_attributes[i * 5 + 3] + length;
    138         }
    139     }
    140 
    141     virtual ~PendingStartElementNSCallback()
    142     {
    143         for (int i = 0; i < m_namespaceCount * 2; i++)
    144             xmlFree(m_namespaces[i]);
    145         xmlFree(m_namespaces);
    146         for (int i = 0; i < m_attributeCount; i++)
    147             for (int j = 0; j < 4; j++)
    148                 xmlFree(m_attributes[i * 5 + j]);
    149         xmlFree(m_attributes);
    150     }
    151 
    152     virtual void call(XMLDocumentParser* parser) OVERRIDE
    153     {
    154         parser->startElementNs(m_localName, m_prefix, m_uri,
    155                                   m_namespaceCount, const_cast<const xmlChar**>(m_namespaces),
    156                                   m_attributeCount, m_defaultedCount, const_cast<const xmlChar**>(m_attributes));
    157     }
    158 
    159 private:
    160     AtomicString m_localName;
    161     AtomicString m_prefix;
    162     AtomicString m_uri;
    163     int m_namespaceCount;
    164     xmlChar** m_namespaces;
    165     int m_attributeCount;
    166     int m_defaultedCount;
    167     xmlChar** m_attributes;
    168 };
    169 
    170 class PendingEndElementNSCallback FINAL : public XMLDocumentParser::PendingCallback {
    171 public:
    172     virtual void call(XMLDocumentParser* parser) OVERRIDE
    173     {
    174         parser->endElementNs();
    175     }
    176 };
    177 
    178 class PendingCharactersCallback FINAL : public XMLDocumentParser::PendingCallback {
    179 public:
    180     PendingCharactersCallback(const xmlChar* chars, int length)
    181         : m_chars(xmlStrndup(chars, length))
    182         , m_length(length)
    183     {
    184     }
    185 
    186     virtual ~PendingCharactersCallback()
    187     {
    188         xmlFree(m_chars);
    189     }
    190 
    191     virtual void call(XMLDocumentParser* parser) OVERRIDE
    192     {
    193         parser->characters(m_chars, m_length);
    194     }
    195 
    196 private:
    197     xmlChar* m_chars;
    198     int m_length;
    199 };
    200 
    201 class PendingProcessingInstructionCallback FINAL : public XMLDocumentParser::PendingCallback {
    202 public:
    203     PendingProcessingInstructionCallback(const String& target, const String& data)
    204         : m_target(target)
    205         , m_data(data)
    206     {
    207     }
    208 
    209     virtual void call(XMLDocumentParser* parser) OVERRIDE
    210     {
    211         parser->processingInstruction(m_target, m_data);
    212     }
    213 
    214 private:
    215     String m_target;
    216     String m_data;
    217 };
    218 
    219 class PendingCDATABlockCallback FINAL : public XMLDocumentParser::PendingCallback {
    220 public:
    221     explicit PendingCDATABlockCallback(const String& text) : m_text(text) { }
    222 
    223     virtual void call(XMLDocumentParser* parser) OVERRIDE
    224     {
    225         parser->cdataBlock(m_text);
    226     }
    227 
    228 private:
    229     String m_text;
    230 };
    231 
    232 class PendingCommentCallback FINAL : public XMLDocumentParser::PendingCallback {
    233 public:
    234     explicit PendingCommentCallback(const String& text) : m_text(text) { }
    235 
    236     virtual void call(XMLDocumentParser* parser) OVERRIDE
    237     {
    238         parser->comment(m_text);
    239     }
    240 
    241 private:
    242     String m_text;
    243 };
    244 
    245 class PendingInternalSubsetCallback FINAL : public XMLDocumentParser::PendingCallback {
    246 public:
    247     PendingInternalSubsetCallback(const String& name, const String& externalID, const String& systemID)
    248         : m_name(name)
    249         , m_externalID(externalID)
    250         , m_systemID(systemID)
    251     {
    252     }
    253 
    254     virtual void call(XMLDocumentParser* parser) OVERRIDE
    255     {
    256         parser->internalSubset(m_name, m_externalID, m_systemID);
    257     }
    258 
    259 private:
    260     String m_name;
    261     String m_externalID;
    262     String m_systemID;
    263 };
    264 
    265 class PendingErrorCallback FINAL : public XMLDocumentParser::PendingCallback {
    266 public:
    267     PendingErrorCallback(XMLErrors::ErrorType type, const xmlChar* message, OrdinalNumber lineNumber, OrdinalNumber columnNumber)
    268         : m_type(type)
    269         , m_message(xmlStrdup(message))
    270         , m_lineNumber(lineNumber)
    271         , m_columnNumber(columnNumber)
    272     {
    273     }
    274 
    275     virtual ~PendingErrorCallback()
    276     {
    277         xmlFree(m_message);
    278     }
    279 
    280     virtual void call(XMLDocumentParser* parser) OVERRIDE
    281     {
    282         parser->handleError(m_type, reinterpret_cast<char*>(m_message), TextPosition(m_lineNumber, m_columnNumber));
    283     }
    284 
    285 private:
    286     XMLErrors::ErrorType m_type;
    287     xmlChar* m_message;
    288     OrdinalNumber m_lineNumber;
    289     OrdinalNumber m_columnNumber;
    290 };
    291 
    292 void XMLDocumentParser::pushCurrentNode(ContainerNode* n)
    293 {
    294     ASSERT(n);
    295     ASSERT(m_currentNode);
    296     if (n != document())
    297         n->ref();
    298     m_currentNodeStack.append(m_currentNode);
    299     m_currentNode = n;
    300     if (m_currentNodeStack.size() > maxXMLTreeDepth)
    301         handleError(XMLErrors::fatal, "Excessive node nesting.", textPosition());
    302 }
    303 
    304 void XMLDocumentParser::popCurrentNode()
    305 {
    306     if (!m_currentNode)
    307         return;
    308     ASSERT(m_currentNodeStack.size());
    309 
    310     if (m_currentNode != document())
    311         m_currentNode->deref();
    312 
    313     m_currentNode = m_currentNodeStack.last();
    314     m_currentNodeStack.removeLast();
    315 }
    316 
    317 void XMLDocumentParser::clearCurrentNodeStack()
    318 {
    319     if (m_currentNode && m_currentNode != document())
    320         m_currentNode->deref();
    321     m_currentNode = 0;
    322     m_leafTextNode = 0;
    323 
    324     if (m_currentNodeStack.size()) { // Aborted parsing.
    325         for (size_t i = m_currentNodeStack.size() - 1; i != 0; --i)
    326             m_currentNodeStack[i]->deref();
    327         if (m_currentNodeStack[0] && m_currentNodeStack[0] != document())
    328             m_currentNodeStack[0]->deref();
    329         m_currentNodeStack.clear();
    330     }
    331 }
    332 
    333 void XMLDocumentParser::insert(const SegmentedString&)
    334 {
    335     ASSERT_NOT_REACHED();
    336 }
    337 
    338 void XMLDocumentParser::append(PassRefPtr<StringImpl> inputSource)
    339 {
    340     SegmentedString source(inputSource);
    341     if (m_sawXSLTransform || !m_sawFirstElement)
    342         m_originalSourceForTransform.append(source);
    343 
    344     if (isStopped() || m_sawXSLTransform)
    345         return;
    346 
    347     if (m_parserPaused) {
    348         m_pendingSrc.append(source);
    349         return;
    350     }
    351 
    352     // JavaScript can detach the parser. Make sure this is not released
    353     // before the end of this method.
    354     RefPtr<XMLDocumentParser> protect(this);
    355 
    356     doWrite(source.toString());
    357 
    358     if (isStopped())
    359         return;
    360 
    361     if (document()->frame() && document()->frame()->script()->canExecuteScripts(NotAboutToExecuteScript))
    362         ImageLoader::dispatchPendingBeforeLoadEvents();
    363 }
    364 
    365 void XMLDocumentParser::handleError(XMLErrors::ErrorType type, const char* formattedMessage, TextPosition position)
    366 {
    367     m_xmlErrors.handleError(type, formattedMessage, position);
    368     if (type != XMLErrors::warning)
    369         m_sawError = true;
    370     if (type == XMLErrors::fatal)
    371         stopParsing();
    372 }
    373 
    374 void XMLDocumentParser::enterText()
    375 {
    376     ASSERT(m_bufferedText.size() == 0);
    377     ASSERT(!m_leafTextNode);
    378     m_leafTextNode = Text::create(m_currentNode->document(), "");
    379     m_currentNode->parserAppendChild(m_leafTextNode.get());
    380 }
    381 
    382 void XMLDocumentParser::exitText()
    383 {
    384     if (isStopped())
    385         return;
    386 
    387     if (!m_leafTextNode)
    388         return;
    389 
    390     m_leafTextNode->appendData(toString(m_bufferedText.data(), m_bufferedText.size()));
    391     Vector<xmlChar> empty;
    392     m_bufferedText.swap(empty);
    393 
    394     if (m_view && m_leafTextNode->parentNode() && m_leafTextNode->parentNode()->attached()
    395         && !m_leafTextNode->attached())
    396         m_leafTextNode->attach();
    397 
    398     m_leafTextNode = 0;
    399 }
    400 
    401 void XMLDocumentParser::detach()
    402 {
    403     clearCurrentNodeStack();
    404     ScriptableDocumentParser::detach();
    405 }
    406 
    407 void XMLDocumentParser::end()
    408 {
    409     // XMLDocumentParserLibxml2 will do bad things to the document if doEnd() is called.
    410     // I don't believe XMLDocumentParserQt needs doEnd called in the fragment case.
    411     ASSERT(!m_parsingFragment);
    412 
    413     doEnd();
    414 
    415     // doEnd() call above can detach the parser and null out its document.
    416     // In that case, we just bail out.
    417     if (isDetached())
    418         return;
    419 
    420     // doEnd() could process a script tag, thus pausing parsing.
    421     if (m_parserPaused)
    422         return;
    423 
    424     if (m_sawError)
    425         insertErrorMessageBlock();
    426     else {
    427         exitText();
    428         document()->styleResolverChanged(RecalcStyleImmediately);
    429     }
    430 
    431     if (isParsing())
    432         prepareToStopParsing();
    433     document()->setReadyState(Document::Interactive);
    434     clearCurrentNodeStack();
    435     document()->finishedParsing();
    436 }
    437 
    438 void XMLDocumentParser::finish()
    439 {
    440     // FIXME: We should ASSERT(!m_parserStopped) here, since it does not
    441     // makes sense to call any methods on DocumentParser once it's been stopped.
    442     // However, FrameLoader::stop calls DocumentParser::finish unconditionally.
    443 
    444     if (m_parserPaused)
    445         m_finishCalled = true;
    446     else
    447         end();
    448 }
    449 
    450 void XMLDocumentParser::insertErrorMessageBlock()
    451 {
    452     m_xmlErrors.insertErrorMessageBlock();
    453 }
    454 
    455 void XMLDocumentParser::notifyFinished(Resource* unusedResource)
    456 {
    457     ASSERT_UNUSED(unusedResource, unusedResource == m_pendingScript);
    458     ASSERT(m_pendingScript->accessCount() > 0);
    459 
    460     ScriptSourceCode sourceCode(m_pendingScript.get());
    461     bool errorOccurred = m_pendingScript->errorOccurred();
    462     bool wasCanceled = m_pendingScript->wasCanceled();
    463 
    464     m_pendingScript->removeClient(this);
    465     m_pendingScript = 0;
    466 
    467     RefPtr<Element> e = m_scriptElement;
    468     m_scriptElement = 0;
    469 
    470     ScriptLoader* scriptLoader = toScriptLoaderIfPossible(e.get());
    471     ASSERT(scriptLoader);
    472 
    473     // JavaScript can detach this parser, make sure it's kept alive even if detached.
    474     RefPtr<XMLDocumentParser> protect(this);
    475 
    476     if (errorOccurred)
    477         scriptLoader->dispatchErrorEvent();
    478     else if (!wasCanceled) {
    479         scriptLoader->executeScript(sourceCode);
    480         scriptLoader->dispatchLoadEvent();
    481     }
    482 
    483     m_scriptElement = 0;
    484 
    485     if (!isDetached() && !m_requestingScript)
    486         resumeParsing();
    487 }
    488 
    489 bool XMLDocumentParser::isWaitingForScripts() const
    490 {
    491     return m_pendingScript;
    492 }
    493 
    494 void XMLDocumentParser::pauseParsing()
    495 {
    496     if (m_parsingFragment)
    497         return;
    498 
    499     m_parserPaused = true;
    500 }
    501 
    502 bool XMLDocumentParser::parseDocumentFragment(const String& chunk, DocumentFragment* fragment, Element* contextElement, ParserContentPolicy parserContentPolicy)
    503 {
    504     if (!chunk.length())
    505         return true;
    506 
    507     // FIXME: We need to implement the HTML5 XML Fragment parsing algorithm:
    508     // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-xhtml-syntax.html#xml-fragment-parsing-algorithm
    509     // For now we have a hack for script/style innerHTML support:
    510     if (contextElement && (contextElement->hasLocalName(HTMLNames::scriptTag) || contextElement->hasLocalName(HTMLNames::styleTag))) {
    511         fragment->parserAppendChild(fragment->document()->createTextNode(chunk));
    512         return true;
    513     }
    514 
    515     RefPtr<XMLDocumentParser> parser = XMLDocumentParser::create(fragment, contextElement, parserContentPolicy);
    516     bool wellFormed = parser->appendFragmentSource(chunk);
    517     // Do not call finish().  Current finish() and doEnd() implementations touch the main Document/loader
    518     // and can cause crashes in the fragment case.
    519     parser->detach(); // Allows ~DocumentParser to assert it was detached before destruction.
    520     return wellFormed; // appendFragmentSource()'s wellFormed is more permissive than wellFormed().
    521 }
    522 
    523 static int globalDescriptor = 0;
    524 static ThreadIdentifier libxmlLoaderThread = 0;
    525 
    526 static int matchFunc(const char*)
    527 {
    528     // Only match loads initiated due to uses of libxml2 from within XMLDocumentParser to avoid
    529     // interfering with client applications that also use libxml2.  http://bugs.webkit.org/show_bug.cgi?id=17353
    530     return XMLDocumentParserScope::currentFetcher && currentThread() == libxmlLoaderThread;
    531 }
    532 
    533 class OffsetBuffer {
    534     WTF_MAKE_FAST_ALLOCATED;
    535 public:
    536     OffsetBuffer(const Vector<char>& b) : m_buffer(b), m_currentOffset(0) { }
    537 
    538     int readOutBytes(char* outputBuffer, unsigned askedToRead)
    539     {
    540         unsigned bytesLeft = m_buffer.size() - m_currentOffset;
    541         unsigned lenToCopy = min(askedToRead, bytesLeft);
    542         if (lenToCopy) {
    543             memcpy(outputBuffer, m_buffer.data() + m_currentOffset, lenToCopy);
    544             m_currentOffset += lenToCopy;
    545         }
    546         return lenToCopy;
    547     }
    548 
    549 private:
    550     Vector<char> m_buffer;
    551     unsigned m_currentOffset;
    552 };
    553 
    554 static inline void setAttributes(Element* element, Vector<Attribute>& attributeVector, ParserContentPolicy parserContentPolicy)
    555 {
    556     if (!scriptingContentIsAllowed(parserContentPolicy))
    557         element->stripScriptingAttributes(attributeVector);
    558     element->parserSetAttributes(attributeVector);
    559 }
    560 
    561 static void switchEncoding(xmlParserCtxtPtr ctxt, bool is8Bit)
    562 {
    563     // Hack around libxml2's lack of encoding overide support by manually
    564     // resetting the encoding to UTF-16 before every chunk.  Otherwise libxml
    565     // will detect <?xml version="1.0" encoding="<encoding name>"?> blocks
    566     // and switch encodings, causing the parse to fail.
    567     if (is8Bit) {
    568         xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_8859_1);
    569         return;
    570     }
    571 
    572     const UChar BOM = 0xFEFF;
    573     const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char*>(&BOM);
    574     xmlSwitchEncoding(ctxt, BOMHighByte == 0xFF ? XML_CHAR_ENCODING_UTF16LE : XML_CHAR_ENCODING_UTF16BE);
    575 }
    576 
    577 static void parseChunk(xmlParserCtxtPtr ctxt, const String& chunk)
    578 {
    579     bool is8Bit = chunk.is8Bit();
    580     switchEncoding(ctxt, is8Bit);
    581     if (is8Bit)
    582         xmlParseChunk(ctxt, reinterpret_cast<const char*>(chunk.characters8()), sizeof(LChar) * chunk.length(), 0);
    583     else
    584         xmlParseChunk(ctxt, reinterpret_cast<const char*>(chunk.characters16()), sizeof(UChar) * chunk.length(), 0);
    585 }
    586 
    587 static void finishParsing(xmlParserCtxtPtr ctxt)
    588 {
    589     xmlParseChunk(ctxt, 0, 0, 1);
    590 }
    591 
    592 #define xmlParseChunk #error "Use parseChunk instead to select the correct encoding."
    593 
    594 static bool shouldAllowExternalLoad(const KURL& url)
    595 {
    596     String urlString = url.string();
    597 
    598     // On non-Windows platforms libxml asks for this URL, the
    599     // "XML_XML_DEFAULT_CATALOG", on initialization.
    600     if (urlString == "file:///etc/xml/catalog")
    601         return false;
    602 
    603     // On Windows, libxml computes a URL relative to where its DLL resides.
    604     if (urlString.startsWith("file:///", false) && urlString.endsWith("/etc/catalog", false))
    605         return false;
    606 
    607     // The most common DTD.  There isn't much point in hammering www.w3c.org
    608     // by requesting this URL for every XHTML document.
    609     if (urlString.startsWith("http://www.w3.org/TR/xhtml", false))
    610         return false;
    611 
    612     // Similarly, there isn't much point in requesting the SVG DTD.
    613     if (urlString.startsWith("http://www.w3.org/Graphics/SVG", false))
    614         return false;
    615 
    616     // The libxml doesn't give us a lot of context for deciding whether to
    617     // allow this request.  In the worst case, this load could be for an
    618     // external entity and the resulting document could simply read the
    619     // retrieved content.  If we had more context, we could potentially allow
    620     // the parser to load a DTD.  As things stand, we take the conservative
    621     // route and allow same-origin requests only.
    622     if (!XMLDocumentParserScope::currentFetcher->document()->securityOrigin()->canRequest(url)) {
    623         XMLDocumentParserScope::currentFetcher->printAccessDeniedMessage(url);
    624         return false;
    625     }
    626 
    627     return true;
    628 }
    629 
    630 static void* openFunc(const char* uri)
    631 {
    632     ASSERT(XMLDocumentParserScope::currentFetcher);
    633     ASSERT(currentThread() == libxmlLoaderThread);
    634 
    635     KURL url(KURL(), uri);
    636 
    637     if (!shouldAllowExternalLoad(url))
    638         return &globalDescriptor;
    639 
    640     ResourceError error;
    641     ResourceResponse response;
    642     Vector<char> data;
    643 
    644 
    645     {
    646         ResourceFetcher* fetcher = XMLDocumentParserScope::currentFetcher;
    647         XMLDocumentParserScope scope(0);
    648         // FIXME: We should restore the original global error handler as well.
    649 
    650         if (fetcher->frame())
    651             fetcher->frame()->loader()->loadResourceSynchronously(url, AllowStoredCredentials, error, response, data);
    652     }
    653 
    654     // We have to check the URL again after the load to catch redirects.
    655     // See <https://bugs.webkit.org/show_bug.cgi?id=21963>.
    656     if (!shouldAllowExternalLoad(response.url()))
    657         return &globalDescriptor;
    658 
    659     return new OffsetBuffer(data);
    660 }
    661 
    662 static int readFunc(void* context, char* buffer, int len)
    663 {
    664     // Do 0-byte reads in case of a null descriptor
    665     if (context == &globalDescriptor)
    666         return 0;
    667 
    668     OffsetBuffer* data = static_cast<OffsetBuffer*>(context);
    669     return data->readOutBytes(buffer, len);
    670 }
    671 
    672 static int writeFunc(void*, const char*, int)
    673 {
    674     // Always just do 0-byte writes
    675     return 0;
    676 }
    677 
    678 static int closeFunc(void* context)
    679 {
    680     if (context != &globalDescriptor) {
    681         OffsetBuffer* data = static_cast<OffsetBuffer*>(context);
    682         delete data;
    683     }
    684     return 0;
    685 }
    686 
    687 static void errorFunc(void*, const char*, ...)
    688 {
    689     // FIXME: It would be nice to display error messages somewhere.
    690 }
    691 
    692 static bool didInit = false;
    693 
    694 PassRefPtr<XMLParserContext> XMLParserContext::createStringParser(xmlSAXHandlerPtr handlers, void* userData)
    695 {
    696     if (!didInit) {
    697         xmlInitParser();
    698         xmlRegisterInputCallbacks(matchFunc, openFunc, readFunc, closeFunc);
    699         xmlRegisterOutputCallbacks(matchFunc, openFunc, writeFunc, closeFunc);
    700         libxmlLoaderThread = currentThread();
    701         didInit = true;
    702     }
    703 
    704     xmlParserCtxtPtr parser = xmlCreatePushParserCtxt(handlers, 0, 0, 0, 0);
    705     parser->_private = userData;
    706     parser->replaceEntities = true;
    707     return adoptRef(new XMLParserContext(parser));
    708 }
    709 
    710 // Chunk should be encoded in UTF-8
    711 PassRefPtr<XMLParserContext> XMLParserContext::createMemoryParser(xmlSAXHandlerPtr handlers, void* userData, const CString& chunk)
    712 {
    713     if (!didInit) {
    714         xmlInitParser();
    715         xmlRegisterInputCallbacks(matchFunc, openFunc, readFunc, closeFunc);
    716         xmlRegisterOutputCallbacks(matchFunc, openFunc, writeFunc, closeFunc);
    717         libxmlLoaderThread = currentThread();
    718         didInit = true;
    719     }
    720 
    721     // appendFragmentSource() checks that the length doesn't overflow an int.
    722     xmlParserCtxtPtr parser = xmlCreateMemoryParserCtxt(chunk.data(), chunk.length());
    723 
    724     if (!parser)
    725         return 0;
    726 
    727     // Copy the sax handler
    728     memcpy(parser->sax, handlers, sizeof(xmlSAXHandler));
    729 
    730     // Set parser options.
    731     // XML_PARSE_NODICT: default dictionary option.
    732     // XML_PARSE_NOENT: force entities substitutions.
    733     xmlCtxtUseOptions(parser, XML_PARSE_NODICT | XML_PARSE_NOENT);
    734 
    735     // Internal initialization
    736     parser->sax2 = 1;
    737     parser->instate = XML_PARSER_CONTENT; // We are parsing a CONTENT
    738     parser->depth = 0;
    739     parser->str_xml = xmlDictLookup(parser->dict, BAD_CAST "xml", 3);
    740     parser->str_xmlns = xmlDictLookup(parser->dict, BAD_CAST "xmlns", 5);
    741     parser->str_xml_ns = xmlDictLookup(parser->dict, XML_XML_NAMESPACE, 36);
    742     parser->_private = userData;
    743 
    744     return adoptRef(new XMLParserContext(parser));
    745 }
    746 
    747 // --------------------------------
    748 
    749 bool XMLDocumentParser::supportsXMLVersion(const String& version)
    750 {
    751     return version == "1.0";
    752 }
    753 
    754 XMLDocumentParser::XMLDocumentParser(Document* document, FrameView* frameView)
    755     : ScriptableDocumentParser(document)
    756     , m_view(frameView)
    757     , m_context(0)
    758     , m_currentNode(document)
    759     , m_isCurrentlyParsing8BitChunk(false)
    760     , m_sawError(false)
    761     , m_sawCSS(false)
    762     , m_sawXSLTransform(false)
    763     , m_sawFirstElement(false)
    764     , m_isXHTMLDocument(false)
    765     , m_parserPaused(false)
    766     , m_requestingScript(false)
    767     , m_finishCalled(false)
    768     , m_xmlErrors(document)
    769     , m_pendingScript(0)
    770     , m_scriptStartPosition(TextPosition::belowRangePosition())
    771     , m_parsingFragment(false)
    772 {
    773     // This is XML being used as a document resource.
    774     UseCounter::count(document, UseCounter::XMLDocument);
    775 }
    776 
    777 XMLDocumentParser::XMLDocumentParser(DocumentFragment* fragment, Element* parentElement, ParserContentPolicy parserContentPolicy)
    778     : ScriptableDocumentParser(fragment->document(), parserContentPolicy)
    779     , m_view(0)
    780     , m_context(0)
    781     , m_currentNode(fragment)
    782     , m_isCurrentlyParsing8BitChunk(false)
    783     , m_sawError(false)
    784     , m_sawCSS(false)
    785     , m_sawXSLTransform(false)
    786     , m_sawFirstElement(false)
    787     , m_isXHTMLDocument(false)
    788     , m_parserPaused(false)
    789     , m_requestingScript(false)
    790     , m_finishCalled(false)
    791     , m_xmlErrors(fragment->document())
    792     , m_pendingScript(0)
    793     , m_scriptStartPosition(TextPosition::belowRangePosition())
    794     , m_parsingFragment(true)
    795 {
    796     fragment->ref();
    797 
    798     // Add namespaces based on the parent node
    799     Vector<Element*> elemStack;
    800     while (parentElement) {
    801         elemStack.append(parentElement);
    802 
    803         ContainerNode* n = parentElement->parentNode();
    804         if (!n || !n->isElementNode())
    805             break;
    806         parentElement = toElement(n);
    807     }
    808 
    809     if (elemStack.isEmpty())
    810         return;
    811 
    812     for (; !elemStack.isEmpty(); elemStack.removeLast()) {
    813         Element* element = elemStack.last();
    814         if (element->hasAttributes()) {
    815             for (unsigned i = 0; i < element->attributeCount(); i++) {
    816                 const Attribute* attribute = element->attributeItem(i);
    817                 if (attribute->localName() == xmlnsAtom)
    818                     m_defaultNamespaceURI = attribute->value();
    819                 else if (attribute->prefix() == xmlnsAtom)
    820                     m_prefixToNamespaceMap.set(attribute->localName(), attribute->value());
    821             }
    822         }
    823     }
    824 
    825     // If the parent element is not in document tree, there may be no xmlns attribute; just default to the parent's namespace.
    826     if (m_defaultNamespaceURI.isNull() && !parentElement->inDocument())
    827         m_defaultNamespaceURI = parentElement->namespaceURI();
    828 }
    829 
    830 XMLParserContext::~XMLParserContext()
    831 {
    832     if (m_context->myDoc)
    833         xmlFreeDoc(m_context->myDoc);
    834     xmlFreeParserCtxt(m_context);
    835 }
    836 
    837 XMLDocumentParser::~XMLDocumentParser()
    838 {
    839     // The XMLDocumentParser will always be detached before being destroyed.
    840     ASSERT(m_currentNodeStack.isEmpty());
    841     ASSERT(!m_currentNode);
    842 
    843     // FIXME: m_pendingScript handling should be moved into XMLDocumentParser.cpp!
    844     if (m_pendingScript)
    845         m_pendingScript->removeClient(this);
    846 }
    847 
    848 void XMLDocumentParser::doWrite(const String& parseString)
    849 {
    850     ASSERT(!isDetached());
    851     if (!m_context)
    852         initializeParserContext();
    853 
    854     // Protect the libxml context from deletion during a callback
    855     RefPtr<XMLParserContext> context = m_context;
    856 
    857     // libXML throws an error if you try to switch the encoding for an empty string.
    858     if (parseString.length()) {
    859         // JavaScript may cause the parser to detach during parseChunk
    860         // keep this alive until this function is done.
    861         RefPtr<XMLDocumentParser> protect(this);
    862 
    863         XMLDocumentParserScope scope(document()->fetcher());
    864         TemporaryChange<bool> encodingScope(m_isCurrentlyParsing8BitChunk, parseString.is8Bit());
    865         parseChunk(context->context(), parseString);
    866 
    867         // JavaScript (which may be run under the parseChunk callstack) may
    868         // cause the parser to be stopped or detached.
    869         if (isStopped())
    870             return;
    871     }
    872 
    873     // FIXME: Why is this here?  And why is it after we process the passed source?
    874     if (document()->decoder() && document()->decoder()->sawError()) {
    875         // If the decoder saw an error, report it as fatal (stops parsing)
    876         TextPosition position(OrdinalNumber::fromOneBasedInt(context->context()->input->line), OrdinalNumber::fromOneBasedInt(context->context()->input->col));
    877         handleError(XMLErrors::fatal, "Encoding error", position);
    878     }
    879 }
    880 
    881 struct _xmlSAX2Namespace {
    882     const xmlChar* prefix;
    883     const xmlChar* uri;
    884 };
    885 typedef struct _xmlSAX2Namespace xmlSAX2Namespace;
    886 
    887 static inline void handleNamespaceAttributes(Vector<Attribute>& prefixedAttributes, const xmlChar** libxmlNamespaces, int nbNamespaces, ExceptionState& es)
    888 {
    889     xmlSAX2Namespace* namespaces = reinterpret_cast<xmlSAX2Namespace*>(libxmlNamespaces);
    890     for (int i = 0; i < nbNamespaces; i++) {
    891         AtomicString namespaceQName = xmlnsAtom;
    892         AtomicString namespaceURI = toAtomicString(namespaces[i].uri);
    893         if (namespaces[i].prefix)
    894             namespaceQName = "xmlns:" + toString(namespaces[i].prefix);
    895 
    896         QualifiedName parsedName = anyName;
    897         if (!Element::parseAttributeName(parsedName, XMLNSNames::xmlnsNamespaceURI, namespaceQName, es))
    898             return;
    899 
    900         prefixedAttributes.append(Attribute(parsedName, namespaceURI));
    901     }
    902 }
    903 
    904 struct _xmlSAX2Attributes {
    905     const xmlChar* localname;
    906     const xmlChar* prefix;
    907     const xmlChar* uri;
    908     const xmlChar* value;
    909     const xmlChar* end;
    910 };
    911 typedef struct _xmlSAX2Attributes xmlSAX2Attributes;
    912 
    913 static inline void handleElementAttributes(Vector<Attribute>& prefixedAttributes, const xmlChar** libxmlAttributes, int nbAttributes, ExceptionState& es)
    914 {
    915     xmlSAX2Attributes* attributes = reinterpret_cast<xmlSAX2Attributes*>(libxmlAttributes);
    916     for (int i = 0; i < nbAttributes; i++) {
    917         int valueLength = static_cast<int>(attributes[i].end - attributes[i].value);
    918         AtomicString attrValue = toAtomicString(attributes[i].value, valueLength);
    919         String attrPrefix = toString(attributes[i].prefix);
    920         AtomicString attrURI = attrPrefix.isEmpty() ? AtomicString() : toAtomicString(attributes[i].uri);
    921         AtomicString attrQName = attrPrefix.isEmpty() ? toAtomicString(attributes[i].localname) : attrPrefix + ":" + toString(attributes[i].localname);
    922 
    923         QualifiedName parsedName = anyName;
    924         if (!Element::parseAttributeName(parsedName, attrURI, attrQName, es))
    925             return;
    926 
    927         prefixedAttributes.append(Attribute(parsedName, attrValue));
    928     }
    929 }
    930 
    931 void XMLDocumentParser::startElementNs(const AtomicString& localName, const AtomicString& prefix, const AtomicString& uri, int nbNamespaces,
    932     const xmlChar** libxmlNamespaces, int nbAttributes, int nbDefaulted, const xmlChar** libxmlAttributes)
    933 {
    934     if (isStopped())
    935         return;
    936 
    937     if (m_parserPaused) {
    938         m_pendingCallbacks.append(adoptPtr(new PendingStartElementNSCallback(localName, prefix, uri, nbNamespaces, libxmlNamespaces,
    939             nbAttributes, nbDefaulted, libxmlAttributes)));
    940         return;
    941     }
    942 
    943     exitText();
    944 
    945     AtomicString adjustedURI = uri;
    946     if (m_parsingFragment && adjustedURI.isNull()) {
    947         if (!prefix.isNull())
    948             adjustedURI = m_prefixToNamespaceMap.get(prefix);
    949         else
    950             adjustedURI = m_defaultNamespaceURI;
    951     }
    952 
    953     bool isFirstElement = !m_sawFirstElement;
    954     m_sawFirstElement = true;
    955 
    956     QualifiedName qName(prefix, localName, adjustedURI);
    957     RefPtr<Element> newElement = m_currentNode->document()->createElement(qName, true);
    958     if (!newElement) {
    959         stopParsing();
    960         return;
    961     }
    962 
    963     Vector<Attribute> prefixedAttributes;
    964     TrackExceptionState es;
    965     handleNamespaceAttributes(prefixedAttributes, libxmlNamespaces, nbNamespaces, es);
    966     if (es.hadException()) {
    967         setAttributes(newElement.get(), prefixedAttributes, parserContentPolicy());
    968         stopParsing();
    969         return;
    970     }
    971 
    972     handleElementAttributes(prefixedAttributes, libxmlAttributes, nbAttributes, es);
    973     setAttributes(newElement.get(), prefixedAttributes, parserContentPolicy());
    974     if (es.hadException()) {
    975         stopParsing();
    976         return;
    977     }
    978 
    979     newElement->beginParsingChildren();
    980 
    981     ScriptLoader* scriptLoader = toScriptLoaderIfPossible(newElement.get());
    982     if (scriptLoader)
    983         m_scriptStartPosition = textPosition();
    984 
    985     m_currentNode->parserAppendChild(newElement.get());
    986 
    987     const ContainerNode* currentNode = m_currentNode;
    988     if (newElement->hasTagName(HTMLNames::templateTag))
    989         pushCurrentNode(toHTMLTemplateElement(newElement.get())->content());
    990     else
    991         pushCurrentNode(newElement.get());
    992 
    993     if (m_view && currentNode->attached() && !newElement->attached())
    994         newElement->attach();
    995 
    996     if (isHTMLHtmlElement(newElement.get()))
    997         toHTMLHtmlElement(newElement.get())->insertedByParser();
    998 
    999     if (!m_parsingFragment && isFirstElement && document()->frame())
   1000         document()->frame()->loader()->dispatchDocumentElementAvailable();
   1001 }
   1002 
   1003 void XMLDocumentParser::endElementNs()
   1004 {
   1005     if (isStopped())
   1006         return;
   1007 
   1008     if (m_parserPaused) {
   1009         m_pendingCallbacks.append(adoptPtr(new PendingEndElementNSCallback()));
   1010         return;
   1011     }
   1012 
   1013     // JavaScript can detach the parser.  Make sure this is not released
   1014     // before the end of this method.
   1015     RefPtr<XMLDocumentParser> protect(this);
   1016 
   1017     exitText();
   1018 
   1019     RefPtr<ContainerNode> n = m_currentNode;
   1020     n->finishParsingChildren();
   1021 
   1022     if (!scriptingContentIsAllowed(parserContentPolicy()) && n->isElementNode() && toScriptLoaderIfPossible(toElement(n.get()))) {
   1023         popCurrentNode();
   1024         n->remove(IGNORE_EXCEPTION);
   1025         return;
   1026     }
   1027 
   1028     if (!n->isElementNode() || !m_view) {
   1029         popCurrentNode();
   1030         return;
   1031     }
   1032 
   1033     Element* element = toElement(n.get());
   1034 
   1035     // The element's parent may have already been removed from document.
   1036     // Parsing continues in this case, but scripts aren't executed.
   1037     if (!element->inDocument()) {
   1038         popCurrentNode();
   1039         return;
   1040     }
   1041 
   1042     ScriptLoader* scriptLoader = toScriptLoaderIfPossible(element);
   1043     if (!scriptLoader) {
   1044         popCurrentNode();
   1045         return;
   1046     }
   1047 
   1048     // Don't load external scripts for standalone documents (for now).
   1049     ASSERT(!m_pendingScript);
   1050     m_requestingScript = true;
   1051 
   1052     if (scriptLoader->prepareScript(m_scriptStartPosition, ScriptLoader::AllowLegacyTypeInTypeAttribute)) {
   1053         // FIXME: Script execution should be shared between
   1054         // the libxml2 and Qt XMLDocumentParser implementations.
   1055 
   1056         if (scriptLoader->readyToBeParserExecuted()) {
   1057             scriptLoader->executeScript(ScriptSourceCode(scriptLoader->scriptContent(), document()->url(), m_scriptStartPosition));
   1058         } else if (scriptLoader->willBeParserExecuted()) {
   1059             m_pendingScript = scriptLoader->resource();
   1060             m_scriptElement = element;
   1061             m_pendingScript->addClient(this);
   1062 
   1063             // m_pendingScript will be 0 if script was already loaded and addClient() executed it.
   1064             if (m_pendingScript)
   1065                 pauseParsing();
   1066         } else {
   1067             m_scriptElement = 0;
   1068         }
   1069 
   1070         // JavaScript may have detached the parser
   1071         if (isDetached())
   1072             return;
   1073     }
   1074     m_requestingScript = false;
   1075     popCurrentNode();
   1076 }
   1077 
   1078 void XMLDocumentParser::characters(const xmlChar* chars, int length)
   1079 {
   1080     if (isStopped())
   1081         return;
   1082 
   1083     if (m_parserPaused) {
   1084         m_pendingCallbacks.append(adoptPtr(new PendingCharactersCallback(chars, length)));
   1085         return;
   1086     }
   1087 
   1088     if (!m_leafTextNode)
   1089         enterText();
   1090     m_bufferedText.append(chars, length);
   1091 }
   1092 
   1093 void XMLDocumentParser::error(XMLErrors::ErrorType type, const char* message, va_list args)
   1094 {
   1095     if (isStopped())
   1096         return;
   1097 
   1098 #if HAVE(VASPRINTF)
   1099     char* formattedMessage;
   1100     if (vasprintf(&formattedMessage, message, args) == -1)
   1101         return;
   1102 #else
   1103     char formattedMessage[1024];
   1104     vsnprintf(formattedMessage, sizeof(formattedMessage) - 1, message, args);
   1105 #endif
   1106 
   1107     if (m_parserPaused) {
   1108         m_pendingCallbacks.append(adoptPtr(new PendingErrorCallback(type, reinterpret_cast<const xmlChar*>(formattedMessage), lineNumber(), columnNumber())));
   1109 #if HAVE(VASPRINTF)
   1110         free(formattedMessage);
   1111 #endif
   1112         return;
   1113     }
   1114 
   1115     handleError(type, formattedMessage, textPosition());
   1116 
   1117 #if HAVE(VASPRINTF)
   1118     free(formattedMessage);
   1119 #endif
   1120 }
   1121 
   1122 void XMLDocumentParser::processingInstruction(const String& target, const String& data)
   1123 {
   1124     if (isStopped())
   1125         return;
   1126 
   1127     if (m_parserPaused) {
   1128         m_pendingCallbacks.append(adoptPtr(new PendingProcessingInstructionCallback(target ,data)));
   1129         return;
   1130     }
   1131 
   1132     exitText();
   1133 
   1134     // ### handle exceptions
   1135     TrackExceptionState es;
   1136     RefPtr<ProcessingInstruction> pi = m_currentNode->document()->createProcessingInstruction(target, data, es);
   1137     if (es.hadException())
   1138         return;
   1139 
   1140     pi->setCreatedByParser(true);
   1141 
   1142     m_currentNode->parserAppendChild(pi.get());
   1143     if (m_view && !pi->attached())
   1144         pi->attach();
   1145 
   1146     pi->finishParsingChildren();
   1147 
   1148     if (pi->isCSS())
   1149         m_sawCSS = true;
   1150     m_sawXSLTransform = !m_sawFirstElement && pi->isXSL();
   1151     if (m_sawXSLTransform && !document()->transformSourceDocument())
   1152         stopParsing();
   1153 }
   1154 
   1155 void XMLDocumentParser::cdataBlock(const String& text)
   1156 {
   1157     if (isStopped())
   1158         return;
   1159 
   1160     if (m_parserPaused) {
   1161         m_pendingCallbacks.append(adoptPtr(new PendingCDATABlockCallback(text)));
   1162         return;
   1163     }
   1164 
   1165     exitText();
   1166 
   1167     RefPtr<CDATASection> newNode = CDATASection::create(m_currentNode->document(), text);
   1168     m_currentNode->parserAppendChild(newNode.get());
   1169     if (m_view && !newNode->attached())
   1170         newNode->attach();
   1171 }
   1172 
   1173 void XMLDocumentParser::comment(const String& text)
   1174 {
   1175     if (isStopped())
   1176         return;
   1177 
   1178     if (m_parserPaused) {
   1179         m_pendingCallbacks.append(adoptPtr(new PendingCommentCallback(text)));
   1180         return;
   1181     }
   1182 
   1183     exitText();
   1184 
   1185     RefPtr<Comment> newNode = Comment::create(m_currentNode->document(), text);
   1186     m_currentNode->parserAppendChild(newNode.get());
   1187     if (m_view && !newNode->attached())
   1188         newNode->attach();
   1189 }
   1190 
   1191 enum StandaloneInfo {
   1192     StandaloneUnspecified = -2,
   1193     NoXMlDeclaration,
   1194     StandaloneNo,
   1195     StandaloneYes
   1196 };
   1197 
   1198 void XMLDocumentParser::startDocument(const String& version, const String& encoding, int standalone)
   1199 {
   1200     StandaloneInfo standaloneInfo = (StandaloneInfo)standalone;
   1201     if (standaloneInfo == NoXMlDeclaration) {
   1202         document()->setHasXMLDeclaration(false);
   1203         return;
   1204     }
   1205 
   1206     if (!version.isNull())
   1207         document()->setXMLVersion(version, ASSERT_NO_EXCEPTION);
   1208     if (standalone != StandaloneUnspecified)
   1209         document()->setXMLStandalone(standaloneInfo == StandaloneYes, ASSERT_NO_EXCEPTION);
   1210     if (!encoding.isNull())
   1211         document()->setXMLEncoding(encoding);
   1212     document()->setHasXMLDeclaration(true);
   1213 }
   1214 
   1215 void XMLDocumentParser::endDocument()
   1216 {
   1217     exitText();
   1218 }
   1219 
   1220 void XMLDocumentParser::internalSubset(const String& name, const String& externalID, const String& systemID)
   1221 {
   1222     if (isStopped())
   1223         return;
   1224 
   1225     if (m_parserPaused) {
   1226         m_pendingCallbacks.append(adoptPtr(new PendingInternalSubsetCallback(name, externalID, systemID)));
   1227         return;
   1228     }
   1229 
   1230     if (document())
   1231         document()->parserAppendChild(DocumentType::create(document(), name, externalID, systemID));
   1232 }
   1233 
   1234 static inline XMLDocumentParser* getParser(void* closure)
   1235 {
   1236     xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure);
   1237     return static_cast<XMLDocumentParser*>(ctxt->_private);
   1238 }
   1239 
   1240 static void startElementNsHandler(void* closure, const xmlChar* localName, const xmlChar* prefix, const xmlChar* uri, int nbNamespaces, const xmlChar** namespaces, int nbAttributes, int nbDefaulted, const xmlChar** libxmlAttributes)
   1241 {
   1242     getParser(closure)->startElementNs(toAtomicString(localName), toAtomicString(prefix), toAtomicString(uri), nbNamespaces, namespaces, nbAttributes, nbDefaulted, libxmlAttributes);
   1243 }
   1244 
   1245 static void endElementNsHandler(void* closure, const xmlChar*, const xmlChar*, const xmlChar*)
   1246 {
   1247     getParser(closure)->endElementNs();
   1248 }
   1249 
   1250 static void charactersHandler(void* closure, const xmlChar* chars, int length)
   1251 {
   1252     getParser(closure)->characters(chars, length);
   1253 }
   1254 
   1255 static void processingInstructionHandler(void* closure, const xmlChar* target, const xmlChar* data)
   1256 {
   1257     getParser(closure)->processingInstruction(toString(target), toString(data));
   1258 }
   1259 
   1260 static void cdataBlockHandler(void* closure, const xmlChar* text, int length)
   1261 {
   1262     getParser(closure)->cdataBlock(toString(text, length));
   1263 }
   1264 
   1265 static void commentHandler(void* closure, const xmlChar* text)
   1266 {
   1267     getParser(closure)->comment(toString(text));
   1268 }
   1269 
   1270 WTF_ATTRIBUTE_PRINTF(2, 3)
   1271 static void warningHandler(void* closure, const char* message, ...)
   1272 {
   1273     va_list args;
   1274     va_start(args, message);
   1275     getParser(closure)->error(XMLErrors::warning, message, args);
   1276     va_end(args);
   1277 }
   1278 
   1279 WTF_ATTRIBUTE_PRINTF(2, 3)
   1280 static void fatalErrorHandler(void* closure, const char* message, ...)
   1281 {
   1282     va_list args;
   1283     va_start(args, message);
   1284     getParser(closure)->error(XMLErrors::fatal, message, args);
   1285     va_end(args);
   1286 }
   1287 
   1288 WTF_ATTRIBUTE_PRINTF(2, 3)
   1289 static void normalErrorHandler(void* closure, const char* message, ...)
   1290 {
   1291     va_list args;
   1292     va_start(args, message);
   1293     getParser(closure)->error(XMLErrors::nonFatal, message, args);
   1294     va_end(args);
   1295 }
   1296 
   1297 // Using a static entity and marking it XML_INTERNAL_PREDEFINED_ENTITY is
   1298 // a hack to avoid malloc/free. Using a global variable like this could cause trouble
   1299 // if libxml implementation details were to change
   1300 static xmlChar sharedXHTMLEntityResult[9] = {0, 0, 0, 0, 0, 0, 0, 0, 0};
   1301 
   1302 static xmlEntityPtr sharedXHTMLEntity()
   1303 {
   1304     static xmlEntity entity;
   1305     if (!entity.type) {
   1306         entity.type = XML_ENTITY_DECL;
   1307         entity.orig = sharedXHTMLEntityResult;
   1308         entity.content = sharedXHTMLEntityResult;
   1309         entity.etype = XML_INTERNAL_PREDEFINED_ENTITY;
   1310     }
   1311     return &entity;
   1312 }
   1313 
   1314 static size_t convertUTF16EntityToUTF8(const UChar* utf16Entity, size_t numberOfCodeUnits, char* target, size_t targetSize)
   1315 {
   1316     const char* originalTarget = target;
   1317     WTF::Unicode::ConversionResult conversionResult = WTF::Unicode::convertUTF16ToUTF8(&utf16Entity,
   1318         utf16Entity + numberOfCodeUnits, &target, target + targetSize);
   1319     if (conversionResult != WTF::Unicode::conversionOK)
   1320         return 0;
   1321 
   1322     // Even though we must pass the length, libxml expects the entity string to be null terminated.
   1323     ASSERT(target > originalTarget + 1);
   1324     *target = '\0';
   1325     return target - originalTarget;
   1326 }
   1327 
   1328 static xmlEntityPtr getXHTMLEntity(const xmlChar* name)
   1329 {
   1330     UChar utf16DecodedEntity[4];
   1331     size_t numberOfCodeUnits = decodeNamedEntityToUCharArray(reinterpret_cast<const char*>(name), utf16DecodedEntity);
   1332     if (!numberOfCodeUnits)
   1333         return 0;
   1334 
   1335     ASSERT(numberOfCodeUnits <= 4);
   1336     size_t entityLengthInUTF8 = convertUTF16EntityToUTF8(utf16DecodedEntity, numberOfCodeUnits,
   1337         reinterpret_cast<char*>(sharedXHTMLEntityResult), WTF_ARRAY_LENGTH(sharedXHTMLEntityResult));
   1338     if (!entityLengthInUTF8)
   1339         return 0;
   1340 
   1341     xmlEntityPtr entity = sharedXHTMLEntity();
   1342     entity->length = entityLengthInUTF8;
   1343     entity->name = name;
   1344     return entity;
   1345 }
   1346 
   1347 static xmlEntityPtr getEntityHandler(void* closure, const xmlChar* name)
   1348 {
   1349     xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure);
   1350     xmlEntityPtr ent = xmlGetPredefinedEntity(name);
   1351     if (ent) {
   1352         ent->etype = XML_INTERNAL_PREDEFINED_ENTITY;
   1353         return ent;
   1354     }
   1355 
   1356     ent = xmlGetDocEntity(ctxt->myDoc, name);
   1357     if (!ent && getParser(closure)->isXHTMLDocument()) {
   1358         ent = getXHTMLEntity(name);
   1359         if (ent)
   1360             ent->etype = XML_INTERNAL_GENERAL_ENTITY;
   1361     }
   1362 
   1363     return ent;
   1364 }
   1365 
   1366 static void startDocumentHandler(void* closure)
   1367 {
   1368     xmlParserCtxt* ctxt = static_cast<xmlParserCtxt*>(closure);
   1369     XMLDocumentParser* parser = getParser(closure);
   1370     switchEncoding(ctxt, parser->isCurrentlyParsing8BitChunk());
   1371     parser->startDocument(toString(ctxt->version), toString(ctxt->encoding), ctxt->standalone);
   1372     xmlSAX2StartDocument(closure);
   1373 }
   1374 
   1375 static void endDocumentHandler(void* closure)
   1376 {
   1377     getParser(closure)->endDocument();
   1378     xmlSAX2EndDocument(closure);
   1379 }
   1380 
   1381 static void internalSubsetHandler(void* closure, const xmlChar* name, const xmlChar* externalID, const xmlChar* systemID)
   1382 {
   1383     getParser(closure)->internalSubset(toString(name), toString(externalID), toString(systemID));
   1384     xmlSAX2InternalSubset(closure, name, externalID, systemID);
   1385 }
   1386 
   1387 static void externalSubsetHandler(void* closure, const xmlChar*, const xmlChar* externalId, const xmlChar*)
   1388 {
   1389     String extId = toString(externalId);
   1390     if ((extId == "-//W3C//DTD XHTML 1.0 Transitional//EN")
   1391         || (extId == "-//W3C//DTD XHTML 1.1//EN")
   1392         || (extId == "-//W3C//DTD XHTML 1.0 Strict//EN")
   1393         || (extId == "-//W3C//DTD XHTML 1.0 Frameset//EN")
   1394         || (extId == "-//W3C//DTD XHTML Basic 1.0//EN")
   1395         || (extId == "-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN")
   1396         || (extId == "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN")
   1397         || (extId == "-//WAPFORUM//DTD XHTML Mobile 1.0//EN")
   1398         || (extId == "-//WAPFORUM//DTD XHTML Mobile 1.1//EN")
   1399         || (extId == "-//WAPFORUM//DTD XHTML Mobile 1.2//EN"))
   1400         getParser(closure)->setIsXHTMLDocument(true); // controls if we replace entities or not.
   1401 }
   1402 
   1403 static void ignorableWhitespaceHandler(void*, const xmlChar*, int)
   1404 {
   1405     // nothing to do, but we need this to work around a crasher
   1406     // http://bugzilla.gnome.org/show_bug.cgi?id=172255
   1407     // http://bugs.webkit.org/show_bug.cgi?id=5792
   1408 }
   1409 
   1410 void XMLDocumentParser::initializeParserContext(const CString& chunk)
   1411 {
   1412     xmlSAXHandler sax;
   1413     memset(&sax, 0, sizeof(sax));
   1414 
   1415     sax.error = normalErrorHandler;
   1416     sax.fatalError = fatalErrorHandler;
   1417     sax.characters = charactersHandler;
   1418     sax.processingInstruction = processingInstructionHandler;
   1419     sax.cdataBlock = cdataBlockHandler;
   1420     sax.comment = commentHandler;
   1421     sax.warning = warningHandler;
   1422     sax.startElementNs = startElementNsHandler;
   1423     sax.endElementNs = endElementNsHandler;
   1424     sax.getEntity = getEntityHandler;
   1425     sax.startDocument = startDocumentHandler;
   1426     sax.endDocument = endDocumentHandler;
   1427     sax.internalSubset = internalSubsetHandler;
   1428     sax.externalSubset = externalSubsetHandler;
   1429     sax.ignorableWhitespace = ignorableWhitespaceHandler;
   1430     sax.entityDecl = xmlSAX2EntityDecl;
   1431     sax.initialized = XML_SAX2_MAGIC;
   1432     DocumentParser::startParsing();
   1433     m_sawError = false;
   1434     m_sawCSS = false;
   1435     m_sawXSLTransform = false;
   1436     m_sawFirstElement = false;
   1437 
   1438     XMLDocumentParserScope scope(document()->fetcher());
   1439     if (m_parsingFragment)
   1440         m_context = XMLParserContext::createMemoryParser(&sax, this, chunk);
   1441     else {
   1442         ASSERT(!chunk.data());
   1443         m_context = XMLParserContext::createStringParser(&sax, this);
   1444     }
   1445 }
   1446 
   1447 void XMLDocumentParser::doEnd()
   1448 {
   1449     if (!isStopped()) {
   1450         if (m_context) {
   1451             // Tell libxml we're done.
   1452             {
   1453                 XMLDocumentParserScope scope(document()->fetcher());
   1454                 finishParsing(context());
   1455             }
   1456 
   1457             m_context = 0;
   1458         }
   1459     }
   1460 
   1461     bool xmlViewerMode = !m_sawError && !m_sawCSS && !m_sawXSLTransform && hasNoStyleInformation(document());
   1462     if (xmlViewerMode) {
   1463         XMLTreeViewer xmlTreeViewer(document());
   1464         xmlTreeViewer.transformDocumentToTreeView();
   1465     } else if (m_sawXSLTransform) {
   1466         xmlDocPtr doc = xmlDocPtrForString(document()->fetcher(), m_originalSourceForTransform.toString(), document()->url().string());
   1467         document()->setTransformSource(adoptPtr(new TransformSource(doc)));
   1468 
   1469         document()->setParsing(false); // Make the document think it's done, so it will apply XSL stylesheets.
   1470         document()->styleResolverChanged(RecalcStyleImmediately);
   1471 
   1472         // styleResolverChanged() call can detach the parser and null out its document.
   1473         // In that case, we just bail out.
   1474         if (isDetached())
   1475             return;
   1476 
   1477         document()->setParsing(true);
   1478         DocumentParser::stopParsing();
   1479     }
   1480 }
   1481 
   1482 xmlDocPtr xmlDocPtrForString(ResourceFetcher* fetcher, const String& source, const String& url)
   1483 {
   1484     if (source.isEmpty())
   1485         return 0;
   1486     // Parse in a single chunk into an xmlDocPtr
   1487     // FIXME: Hook up error handlers so that a failure to parse the main document results in
   1488     // good error messages.
   1489     XMLDocumentParserScope scope(fetcher, errorFunc, 0);
   1490     XMLParserInput input(source);
   1491     return xmlReadMemory(input.data(), input.size(), url.latin1().data(), input.encoding(), XSLT_PARSE_OPTIONS);
   1492 }
   1493 
   1494 OrdinalNumber XMLDocumentParser::lineNumber() const
   1495 {
   1496     return OrdinalNumber::fromOneBasedInt(context() ? context()->input->line : 1);
   1497 }
   1498 
   1499 OrdinalNumber XMLDocumentParser::columnNumber() const
   1500 {
   1501     return OrdinalNumber::fromOneBasedInt(context() ? context()->input->col : 1);
   1502 }
   1503 
   1504 TextPosition XMLDocumentParser::textPosition() const
   1505 {
   1506     xmlParserCtxtPtr context = this->context();
   1507     if (!context)
   1508         return TextPosition::minimumPosition();
   1509     return TextPosition(OrdinalNumber::fromOneBasedInt(context->input->line),
   1510                         OrdinalNumber::fromOneBasedInt(context->input->col));
   1511 }
   1512 
   1513 void XMLDocumentParser::stopParsing()
   1514 {
   1515     DocumentParser::stopParsing();
   1516     if (context())
   1517         xmlStopParser(context());
   1518 }
   1519 
   1520 void XMLDocumentParser::resumeParsing()
   1521 {
   1522     ASSERT(!isDetached());
   1523     ASSERT(m_parserPaused);
   1524 
   1525     m_parserPaused = false;
   1526 
   1527     // First, execute any pending callbacks
   1528     while (!m_pendingCallbacks.isEmpty()) {
   1529         OwnPtr<PendingCallback> callback = m_pendingCallbacks.takeFirst();
   1530         callback->call(this);
   1531 
   1532         // A callback paused the parser
   1533         if (m_parserPaused)
   1534             return;
   1535     }
   1536 
   1537     // Then, write any pending data
   1538     SegmentedString rest = m_pendingSrc;
   1539     m_pendingSrc.clear();
   1540     // There is normally only one string left, so toString() shouldn't copy.
   1541     // In any case, the XML parser runs on the main thread and it's OK if
   1542     // the passed string has more than one reference.
   1543     append(rest.toString().impl());
   1544 
   1545     // Finally, if finish() has been called and write() didn't result
   1546     // in any further callbacks being queued, call end()
   1547     if (m_finishCalled && m_pendingCallbacks.isEmpty())
   1548         end();
   1549 }
   1550 
   1551 bool XMLDocumentParser::appendFragmentSource(const String& chunk)
   1552 {
   1553     ASSERT(!m_context);
   1554     ASSERT(m_parsingFragment);
   1555 
   1556     CString chunkAsUtf8 = chunk.utf8();
   1557 
   1558     // libxml2 takes an int for a length, and therefore can't handle XML chunks larger than 2 GiB.
   1559     if (chunkAsUtf8.length() > INT_MAX)
   1560         return false;
   1561 
   1562     initializeParserContext(chunkAsUtf8);
   1563     xmlParseContent(context());
   1564     endDocument(); // Close any open text nodes.
   1565 
   1566     // FIXME: If this code is actually needed, it should probably move to finish()
   1567     // XMLDocumentParserQt has a similar check (m_stream.error() == QXmlStreamReader::PrematureEndOfDocumentError) in doEnd().
   1568     // Check if all the chunk has been processed.
   1569     long bytesProcessed = xmlByteConsumed(context());
   1570     if (bytesProcessed == -1 || ((unsigned long)bytesProcessed) != chunkAsUtf8.length()) {
   1571         // FIXME: I don't believe we can hit this case without also having seen an error or a null byte.
   1572         // If we hit this ASSERT, we've found a test case which demonstrates the need for this code.
   1573         ASSERT(m_sawError || (bytesProcessed >= 0 && !chunkAsUtf8.data()[bytesProcessed]));
   1574         return false;
   1575     }
   1576 
   1577     // No error if the chunk is well formed or it is not but we have no error.
   1578     return context()->wellFormed || !xmlCtxtGetLastError(context());
   1579 }
   1580 
   1581 // --------------------------------
   1582 
   1583 struct AttributeParseState {
   1584     HashMap<String, String> attributes;
   1585     bool gotAttributes;
   1586 };
   1587 
   1588 static void attributesStartElementNsHandler(void* closure, const xmlChar* xmlLocalName, const xmlChar* /*xmlPrefix*/,
   1589     const xmlChar* /*xmlURI*/, int /*nbNamespaces*/, const xmlChar** /*namespaces*/,
   1590     int nbAttributes, int /*nbDefaulted*/, const xmlChar** libxmlAttributes)
   1591 {
   1592     if (strcmp(reinterpret_cast<const char*>(xmlLocalName), "attrs") != 0)
   1593         return;
   1594 
   1595     xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure);
   1596     AttributeParseState* state = static_cast<AttributeParseState*>(ctxt->_private);
   1597 
   1598     state->gotAttributes = true;
   1599 
   1600     xmlSAX2Attributes* attributes = reinterpret_cast<xmlSAX2Attributes*>(libxmlAttributes);
   1601     for (int i = 0; i < nbAttributes; i++) {
   1602         String attrLocalName = toString(attributes[i].localname);
   1603         int valueLength = (int) (attributes[i].end - attributes[i].value);
   1604         String attrValue = toString(attributes[i].value, valueLength);
   1605         String attrPrefix = toString(attributes[i].prefix);
   1606         String attrQName = attrPrefix.isEmpty() ? attrLocalName : attrPrefix + ":" + attrLocalName;
   1607 
   1608         state->attributes.set(attrQName, attrValue);
   1609     }
   1610 }
   1611 
   1612 HashMap<String, String> parseAttributes(const String& string, bool& attrsOK)
   1613 {
   1614     AttributeParseState state;
   1615     state.gotAttributes = false;
   1616 
   1617     xmlSAXHandler sax;
   1618     memset(&sax, 0, sizeof(sax));
   1619     sax.startElementNs = attributesStartElementNsHandler;
   1620     sax.initialized = XML_SAX2_MAGIC;
   1621     RefPtr<XMLParserContext> parser = XMLParserContext::createStringParser(&sax, &state);
   1622     String parseString = "<?xml version=\"1.0\"?><attrs " + string + " />";
   1623     parseChunk(parser->context(), parseString);
   1624     finishParsing(parser->context());
   1625     attrsOK = state.gotAttributes;
   1626     return state.attributes;
   1627 }
   1628 
   1629 } // namespace WebCore
   1630