Home | History | Annotate | Download | only in dom
      1 /*
      2  * Copyright (C) 2000 Peter Kelly <pmk (at) post.com>
      3  * Copyright (C) 2005, 2006, 2008 Apple Inc. All rights reserved.
      4  * Copyright (C) 2006 Alexey Proskuryakov <ap (at) webkit.org>
      5  * Copyright (C) 2007 Samuel Weinig <sam (at) webkit.org>
      6  * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies)
      7  * Copyright (C) 2008 Holger Hans Peter Freyther
      8  * Copyright (C) 2008, 2009 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/)
      9  * Copyright (C) 2010 Patrick Gansterer <paroga (at) paroga.com>
     10  *
     11  * This library is free software; you can redistribute it and/or
     12  * modify it under the terms of the GNU Library General Public
     13  * License as published by the Free Software Foundation; either
     14  * version 2 of the License, or (at your option) any later version.
     15  *
     16  * This library is distributed in the hope that it will be useful,
     17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     19  * Library General Public License for more details.
     20  *
     21  * You should have received a copy of the GNU Library General Public License
     22  * along with this library; see the file COPYING.LIB.  If not, write to
     23  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     24  * Boston, MA 02110-1301, USA.
     25  */
     26 
     27 #include "config.h"
     28 #include "XMLDocumentParser.h"
     29 
     30 #include "CDATASection.h"
     31 #include "CachedScript.h"
     32 #include "Comment.h"
     33 #include "CachedResourceLoader.h"
     34 #include "Document.h"
     35 #include "DocumentFragment.h"
     36 #include "DocumentType.h"
     37 #include "Frame.h"
     38 #include "FrameLoader.h"
     39 #include "FrameView.h"
     40 #include "HTMLEntityParser.h"
     41 #include "HTMLHtmlElement.h"
     42 #include "HTMLLinkElement.h"
     43 #include "HTMLNames.h"
     44 #include "HTMLStyleElement.h"
     45 #include "ProcessingInstruction.h"
     46 #include "ResourceError.h"
     47 #include "ResourceHandle.h"
     48 #include "ResourceRequest.h"
     49 #include "ResourceResponse.h"
     50 #include "ScriptElement.h"
     51 #include "ScriptSourceCode.h"
     52 #include "ScriptValue.h"
     53 #include "TextResourceDecoder.h"
     54 #include "TransformSource.h"
     55 #include "XMLNSNames.h"
     56 #include "XMLDocumentParserScope.h"
     57 #include <libxml/parser.h>
     58 #include <libxml/parserInternals.h>
     59 #include <wtf/text/CString.h>
     60 #include <wtf/StringExtras.h>
     61 #include <wtf/Threading.h>
     62 #include <wtf/UnusedParam.h>
     63 #include <wtf/Vector.h>
     64 
     65 #if ENABLE(XSLT)
     66 #include "XMLTreeViewer.h"
     67 #include <libxslt/xslt.h>
     68 #endif
     69 
     70 #if ENABLE(XHTMLMP)
     71 #include "HTMLScriptElement.h"
     72 #endif
     73 
     74 
     75 using namespace std;
     76 
     77 namespace WebCore {
     78 
     79 class PendingCallbacks {
     80     WTF_MAKE_NONCOPYABLE(PendingCallbacks);
     81 public:
     82     PendingCallbacks() { }
     83     ~PendingCallbacks()
     84     {
     85         deleteAllValues(m_callbacks);
     86     }
     87 
     88     void appendStartElementNSCallback(const xmlChar* xmlLocalName, const xmlChar* xmlPrefix, const xmlChar* xmlURI, int nb_namespaces,
     89                                       const xmlChar** namespaces, int nb_attributes, int nb_defaulted, const xmlChar** attributes)
     90     {
     91         PendingStartElementNSCallback* callback = new PendingStartElementNSCallback;
     92 
     93         callback->xmlLocalName = xmlStrdup(xmlLocalName);
     94         callback->xmlPrefix = xmlStrdup(xmlPrefix);
     95         callback->xmlURI = xmlStrdup(xmlURI);
     96         callback->nb_namespaces = nb_namespaces;
     97         callback->namespaces = static_cast<xmlChar**>(xmlMalloc(sizeof(xmlChar*) * nb_namespaces * 2));
     98         for (int i = 0; i < nb_namespaces * 2 ; i++)
     99             callback->namespaces[i] = xmlStrdup(namespaces[i]);
    100         callback->nb_attributes = nb_attributes;
    101         callback->nb_defaulted = nb_defaulted;
    102         callback->attributes = static_cast<xmlChar**>(xmlMalloc(sizeof(xmlChar*) * nb_attributes * 5));
    103         for (int i = 0; i < nb_attributes; i++) {
    104             // Each attribute has 5 elements in the array:
    105             // name, prefix, uri, value and an end pointer.
    106 
    107             for (int j = 0; j < 3; j++)
    108                 callback->attributes[i * 5 + j] = xmlStrdup(attributes[i * 5 + j]);
    109 
    110             int len = attributes[i * 5 + 4] - attributes[i * 5 + 3];
    111 
    112             callback->attributes[i * 5 + 3] = xmlStrndup(attributes[i * 5 + 3], len);
    113             callback->attributes[i * 5 + 4] = callback->attributes[i * 5 + 3] + len;
    114         }
    115 
    116         m_callbacks.append(callback);
    117     }
    118 
    119     void appendEndElementNSCallback()
    120     {
    121         PendingEndElementNSCallback* callback = new PendingEndElementNSCallback;
    122 
    123         m_callbacks.append(callback);
    124     }
    125 
    126     void appendCharactersCallback(const xmlChar* s, int len)
    127     {
    128         PendingCharactersCallback* callback = new PendingCharactersCallback;
    129 
    130         callback->s = xmlStrndup(s, len);
    131         callback->len = len;
    132 
    133         m_callbacks.append(callback);
    134     }
    135 
    136     void appendProcessingInstructionCallback(const xmlChar* target, const xmlChar* data)
    137     {
    138         PendingProcessingInstructionCallback* callback = new PendingProcessingInstructionCallback;
    139 
    140         callback->target = xmlStrdup(target);
    141         callback->data = xmlStrdup(data);
    142 
    143         m_callbacks.append(callback);
    144     }
    145 
    146     void appendCDATABlockCallback(const xmlChar* s, int len)
    147     {
    148         PendingCDATABlockCallback* callback = new PendingCDATABlockCallback;
    149 
    150         callback->s = xmlStrndup(s, len);
    151         callback->len = len;
    152 
    153         m_callbacks.append(callback);
    154     }
    155 
    156     void appendCommentCallback(const xmlChar* s)
    157     {
    158         PendingCommentCallback* callback = new PendingCommentCallback;
    159 
    160         callback->s = xmlStrdup(s);
    161 
    162         m_callbacks.append(callback);
    163     }
    164 
    165     void appendInternalSubsetCallback(const xmlChar* name, const xmlChar* externalID, const xmlChar* systemID)
    166     {
    167         PendingInternalSubsetCallback* callback = new PendingInternalSubsetCallback;
    168 
    169         callback->name = xmlStrdup(name);
    170         callback->externalID = xmlStrdup(externalID);
    171         callback->systemID = xmlStrdup(systemID);
    172 
    173         m_callbacks.append(callback);
    174     }
    175 
    176     void appendErrorCallback(XMLDocumentParser::ErrorType type, const xmlChar* message, int lineNumber, int columnNumber)
    177     {
    178         PendingErrorCallback* callback = new PendingErrorCallback;
    179 
    180         callback->message = xmlStrdup(message);
    181         callback->type = type;
    182         callback->lineNumber = lineNumber;
    183         callback->columnNumber = columnNumber;
    184 
    185         m_callbacks.append(callback);
    186     }
    187 
    188     void callAndRemoveFirstCallback(XMLDocumentParser* parser)
    189     {
    190         OwnPtr<PendingCallback> callback(m_callbacks.takeFirst());
    191         callback->call(parser);
    192     }
    193 
    194     bool isEmpty() const { return m_callbacks.isEmpty(); }
    195 
    196 private:
    197     struct PendingCallback {
    198         virtual ~PendingCallback() { }
    199         virtual void call(XMLDocumentParser* parser) = 0;
    200     };
    201 
    202     struct PendingStartElementNSCallback : public PendingCallback {
    203         virtual ~PendingStartElementNSCallback()
    204         {
    205             xmlFree(xmlLocalName);
    206             xmlFree(xmlPrefix);
    207             xmlFree(xmlURI);
    208             for (int i = 0; i < nb_namespaces * 2; i++)
    209                 xmlFree(namespaces[i]);
    210             xmlFree(namespaces);
    211             for (int i = 0; i < nb_attributes; i++)
    212                 for (int j = 0; j < 4; j++)
    213                     xmlFree(attributes[i * 5 + j]);
    214             xmlFree(attributes);
    215         }
    216 
    217         virtual void call(XMLDocumentParser* parser)
    218         {
    219             parser->startElementNs(xmlLocalName, xmlPrefix, xmlURI,
    220                                       nb_namespaces, const_cast<const xmlChar**>(namespaces),
    221                                       nb_attributes, nb_defaulted, const_cast<const xmlChar**>(attributes));
    222         }
    223 
    224         xmlChar* xmlLocalName;
    225         xmlChar* xmlPrefix;
    226         xmlChar* xmlURI;
    227         int nb_namespaces;
    228         xmlChar** namespaces;
    229         int nb_attributes;
    230         int nb_defaulted;
    231         xmlChar** attributes;
    232     };
    233 
    234     struct PendingEndElementNSCallback : public PendingCallback {
    235         virtual void call(XMLDocumentParser* parser)
    236         {
    237             parser->endElementNs();
    238         }
    239     };
    240 
    241     struct PendingCharactersCallback : public PendingCallback {
    242         virtual ~PendingCharactersCallback()
    243         {
    244             xmlFree(s);
    245         }
    246 
    247         virtual void call(XMLDocumentParser* parser)
    248         {
    249             parser->characters(s, len);
    250         }
    251 
    252         xmlChar* s;
    253         int len;
    254     };
    255 
    256     struct PendingProcessingInstructionCallback : public PendingCallback {
    257         virtual ~PendingProcessingInstructionCallback()
    258         {
    259             xmlFree(target);
    260             xmlFree(data);
    261         }
    262 
    263         virtual void call(XMLDocumentParser* parser)
    264         {
    265             parser->processingInstruction(target, data);
    266         }
    267 
    268         xmlChar* target;
    269         xmlChar* data;
    270     };
    271 
    272     struct PendingCDATABlockCallback : public PendingCallback {
    273         virtual ~PendingCDATABlockCallback()
    274         {
    275             xmlFree(s);
    276         }
    277 
    278         virtual void call(XMLDocumentParser* parser)
    279         {
    280             parser->cdataBlock(s, len);
    281         }
    282 
    283         xmlChar* s;
    284         int len;
    285     };
    286 
    287     struct PendingCommentCallback : public PendingCallback {
    288         virtual ~PendingCommentCallback()
    289         {
    290             xmlFree(s);
    291         }
    292 
    293         virtual void call(XMLDocumentParser* parser)
    294         {
    295             parser->comment(s);
    296         }
    297 
    298         xmlChar* s;
    299     };
    300 
    301     struct PendingInternalSubsetCallback : public PendingCallback {
    302         virtual ~PendingInternalSubsetCallback()
    303         {
    304             xmlFree(name);
    305             xmlFree(externalID);
    306             xmlFree(systemID);
    307         }
    308 
    309         virtual void call(XMLDocumentParser* parser)
    310         {
    311             parser->internalSubset(name, externalID, systemID);
    312         }
    313 
    314         xmlChar* name;
    315         xmlChar* externalID;
    316         xmlChar* systemID;
    317     };
    318 
    319     struct PendingErrorCallback: public PendingCallback {
    320         virtual ~PendingErrorCallback()
    321         {
    322             xmlFree(message);
    323         }
    324 
    325         virtual void call(XMLDocumentParser* parser)
    326         {
    327             parser->handleError(type, reinterpret_cast<char*>(message), lineNumber, columnNumber);
    328         }
    329 
    330         XMLDocumentParser::ErrorType type;
    331         xmlChar* message;
    332         int lineNumber;
    333         int columnNumber;
    334     };
    335 
    336     Deque<PendingCallback*> m_callbacks;
    337 };
    338 // --------------------------------
    339 
    340 static int globalDescriptor = 0;
    341 static ThreadIdentifier libxmlLoaderThread = 0;
    342 
    343 static int matchFunc(const char*)
    344 {
    345     // Only match loads initiated due to uses of libxml2 from within XMLDocumentParser to avoid
    346     // interfering with client applications that also use libxml2.  http://bugs.webkit.org/show_bug.cgi?id=17353
    347     return XMLDocumentParserScope::currentCachedResourceLoader && currentThread() == libxmlLoaderThread;
    348 }
    349 
    350 class OffsetBuffer {
    351 public:
    352     OffsetBuffer(const Vector<char>& b) : m_buffer(b), m_currentOffset(0) { }
    353 
    354     int readOutBytes(char* outputBuffer, unsigned askedToRead)
    355     {
    356         unsigned bytesLeft = m_buffer.size() - m_currentOffset;
    357         unsigned lenToCopy = min(askedToRead, bytesLeft);
    358         if (lenToCopy) {
    359             memcpy(outputBuffer, m_buffer.data() + m_currentOffset, lenToCopy);
    360             m_currentOffset += lenToCopy;
    361         }
    362         return lenToCopy;
    363     }
    364 
    365 private:
    366     Vector<char> m_buffer;
    367     unsigned m_currentOffset;
    368 };
    369 
    370 static void switchToUTF16(xmlParserCtxtPtr ctxt)
    371 {
    372     // Hack around libxml2's lack of encoding overide support by manually
    373     // resetting the encoding to UTF-16 before every chunk.  Otherwise libxml
    374     // will detect <?xml version="1.0" encoding="<encoding name>"?> blocks
    375     // and switch encodings, causing the parse to fail.
    376     const UChar BOM = 0xFEFF;
    377     const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char*>(&BOM);
    378     xmlSwitchEncoding(ctxt, BOMHighByte == 0xFF ? XML_CHAR_ENCODING_UTF16LE : XML_CHAR_ENCODING_UTF16BE);
    379 }
    380 
    381 static bool shouldAllowExternalLoad(const KURL& url)
    382 {
    383     String urlString = url.string();
    384 
    385     // On non-Windows platforms libxml asks for this URL, the
    386     // "XML_XML_DEFAULT_CATALOG", on initialization.
    387     if (urlString == "file:///etc/xml/catalog")
    388         return false;
    389 
    390     // On Windows, libxml computes a URL relative to where its DLL resides.
    391     if (urlString.startsWith("file:///", false) && urlString.endsWith("/etc/catalog", false))
    392         return false;
    393 
    394     // The most common DTD.  There isn't much point in hammering www.w3c.org
    395     // by requesting this URL for every XHTML document.
    396     if (urlString.startsWith("http://www.w3.org/TR/xhtml", false))
    397         return false;
    398 
    399     // Similarly, there isn't much point in requesting the SVG DTD.
    400     if (urlString.startsWith("http://www.w3.org/Graphics/SVG", false))
    401         return false;
    402 
    403     // The libxml doesn't give us a lot of context for deciding whether to
    404     // allow this request.  In the worst case, this load could be for an
    405     // external entity and the resulting document could simply read the
    406     // retrieved content.  If we had more context, we could potentially allow
    407     // the parser to load a DTD.  As things stand, we take the conservative
    408     // route and allow same-origin requests only.
    409     if (!XMLDocumentParserScope::currentCachedResourceLoader->document()->securityOrigin()->canRequest(url)) {
    410         XMLDocumentParserScope::currentCachedResourceLoader->printAccessDeniedMessage(url);
    411         return false;
    412     }
    413 
    414     return true;
    415 }
    416 
    417 static void* openFunc(const char* uri)
    418 {
    419     ASSERT(XMLDocumentParserScope::currentCachedResourceLoader);
    420     ASSERT(currentThread() == libxmlLoaderThread);
    421 
    422     KURL url(KURL(), uri);
    423 
    424     if (!shouldAllowExternalLoad(url))
    425         return &globalDescriptor;
    426 
    427     ResourceError error;
    428     ResourceResponse response;
    429     Vector<char> data;
    430 
    431 
    432     {
    433         CachedResourceLoader* cachedResourceLoader = XMLDocumentParserScope::currentCachedResourceLoader;
    434         XMLDocumentParserScope scope(0);
    435         // FIXME: We should restore the original global error handler as well.
    436 
    437         if (cachedResourceLoader->frame())
    438             cachedResourceLoader->frame()->loader()->loadResourceSynchronously(url, AllowStoredCredentials, error, response, data);
    439     }
    440 
    441     // We have to check the URL again after the load to catch redirects.
    442     // See <https://bugs.webkit.org/show_bug.cgi?id=21963>.
    443     if (!shouldAllowExternalLoad(response.url()))
    444         return &globalDescriptor;
    445 
    446     return new OffsetBuffer(data);
    447 }
    448 
    449 static int readFunc(void* context, char* buffer, int len)
    450 {
    451     // Do 0-byte reads in case of a null descriptor
    452     if (context == &globalDescriptor)
    453         return 0;
    454 
    455     OffsetBuffer* data = static_cast<OffsetBuffer*>(context);
    456     return data->readOutBytes(buffer, len);
    457 }
    458 
    459 static int writeFunc(void*, const char*, int)
    460 {
    461     // Always just do 0-byte writes
    462     return 0;
    463 }
    464 
    465 static int closeFunc(void* context)
    466 {
    467     if (context != &globalDescriptor) {
    468         OffsetBuffer* data = static_cast<OffsetBuffer*>(context);
    469         delete data;
    470     }
    471     return 0;
    472 }
    473 
    474 #if ENABLE(XSLT)
    475 static void errorFunc(void*, const char*, ...)
    476 {
    477     // FIXME: It would be nice to display error messages somewhere.
    478 }
    479 #endif
    480 
    481 static bool didInit = false;
    482 
    483 PassRefPtr<XMLParserContext> XMLParserContext::createStringParser(xmlSAXHandlerPtr handlers, void* userData)
    484 {
    485     if (!didInit) {
    486         xmlInitParser();
    487         xmlRegisterInputCallbacks(matchFunc, openFunc, readFunc, closeFunc);
    488         xmlRegisterOutputCallbacks(matchFunc, openFunc, writeFunc, closeFunc);
    489         libxmlLoaderThread = currentThread();
    490         didInit = true;
    491     }
    492 
    493     xmlParserCtxtPtr parser = xmlCreatePushParserCtxt(handlers, 0, 0, 0, 0);
    494     parser->_private = userData;
    495     parser->replaceEntities = true;
    496     switchToUTF16(parser);
    497 
    498     return adoptRef(new XMLParserContext(parser));
    499 }
    500 
    501 
    502 // Chunk should be encoded in UTF-8
    503 PassRefPtr<XMLParserContext> XMLParserContext::createMemoryParser(xmlSAXHandlerPtr handlers, void* userData, const char* chunk)
    504 {
    505     if (!didInit) {
    506         xmlInitParser();
    507         xmlRegisterInputCallbacks(matchFunc, openFunc, readFunc, closeFunc);
    508         xmlRegisterOutputCallbacks(matchFunc, openFunc, writeFunc, closeFunc);
    509         libxmlLoaderThread = currentThread();
    510         didInit = true;
    511     }
    512 
    513     xmlParserCtxtPtr parser = xmlCreateMemoryParserCtxt(chunk, xmlStrlen((const xmlChar*)chunk));
    514 
    515     if (!parser)
    516         return 0;
    517 
    518     // Copy the sax handler
    519     memcpy(parser->sax, handlers, sizeof(xmlSAXHandler));
    520 
    521     // Set parser options.
    522     // XML_PARSE_NODICT: default dictionary option.
    523     // XML_PARSE_NOENT: force entities substitutions.
    524     xmlCtxtUseOptions(parser, XML_PARSE_NODICT | XML_PARSE_NOENT);
    525 
    526     // Internal initialization
    527     parser->sax2 = 1;
    528     parser->instate = XML_PARSER_CONTENT; // We are parsing a CONTENT
    529     parser->depth = 0;
    530     parser->str_xml = xmlDictLookup(parser->dict, BAD_CAST "xml", 3);
    531     parser->str_xmlns = xmlDictLookup(parser->dict, BAD_CAST "xmlns", 5);
    532     parser->str_xml_ns = xmlDictLookup(parser->dict, XML_XML_NAMESPACE, 36);
    533     parser->_private = userData;
    534 
    535     return adoptRef(new XMLParserContext(parser));
    536 }
    537 
    538 // --------------------------------
    539 
    540 bool XMLDocumentParser::supportsXMLVersion(const String& version)
    541 {
    542     return version == "1.0";
    543 }
    544 
    545 XMLDocumentParser::XMLDocumentParser(Document* document, FrameView* frameView)
    546     : ScriptableDocumentParser(document)
    547     , m_view(frameView)
    548     , m_context(0)
    549     , m_pendingCallbacks(new PendingCallbacks)
    550     , m_currentNode(document)
    551     , m_sawError(false)
    552     , m_sawCSS(false)
    553     , m_sawXSLTransform(false)
    554     , m_sawFirstElement(false)
    555     , m_isXHTMLDocument(false)
    556 #if ENABLE(XHTMLMP)
    557     , m_isXHTMLMPDocument(false)
    558     , m_hasDocTypeDeclaration(false)
    559 #endif
    560     , m_parserPaused(false)
    561     , m_requestingScript(false)
    562     , m_finishCalled(false)
    563     , m_errorCount(0)
    564     , m_lastErrorPosition(TextPosition1::belowRangePosition())
    565     , m_pendingScript(0)
    566     , m_scriptStartPosition(TextPosition1::belowRangePosition())
    567     , m_parsingFragment(false)
    568     , m_scriptingPermission(FragmentScriptingAllowed)
    569 {
    570 }
    571 
    572 XMLDocumentParser::XMLDocumentParser(DocumentFragment* fragment, Element* parentElement, FragmentScriptingPermission scriptingPermission)
    573     : ScriptableDocumentParser(fragment->document())
    574     , m_view(0)
    575     , m_context(0)
    576     , m_pendingCallbacks(new PendingCallbacks)
    577     , m_currentNode(fragment)
    578     , m_sawError(false)
    579     , m_sawCSS(false)
    580     , m_sawXSLTransform(false)
    581     , m_sawFirstElement(false)
    582     , m_isXHTMLDocument(false)
    583 #if ENABLE(XHTMLMP)
    584     , m_isXHTMLMPDocument(false)
    585     , m_hasDocTypeDeclaration(false)
    586 #endif
    587     , m_parserPaused(false)
    588     , m_requestingScript(false)
    589     , m_finishCalled(false)
    590     , m_errorCount(0)
    591     , m_lastErrorPosition(TextPosition1::belowRangePosition())
    592     , m_pendingScript(0)
    593     , m_scriptStartPosition(TextPosition1::belowRangePosition())
    594     , m_parsingFragment(true)
    595     , m_scriptingPermission(scriptingPermission)
    596 {
    597     fragment->ref();
    598 
    599     // Add namespaces based on the parent node
    600     Vector<Element*> elemStack;
    601     while (parentElement) {
    602         elemStack.append(parentElement);
    603 
    604         ContainerNode* n = parentElement->parentNode();
    605         if (!n || !n->isElementNode())
    606             break;
    607         parentElement = static_cast<Element*>(n);
    608     }
    609 
    610     if (elemStack.isEmpty())
    611         return;
    612 
    613     for (Element* element = elemStack.last(); !elemStack.isEmpty(); elemStack.removeLast()) {
    614         if (NamedNodeMap* attrs = element->attributes()) {
    615             for (unsigned i = 0; i < attrs->length(); i++) {
    616                 Attribute* attr = attrs->attributeItem(i);
    617                 if (attr->localName() == xmlnsAtom)
    618                     m_defaultNamespaceURI = attr->value();
    619                 else if (attr->prefix() == xmlnsAtom)
    620                     m_prefixToNamespaceMap.set(attr->localName(), attr->value());
    621             }
    622         }
    623     }
    624 
    625     // If the parent element is not in document tree, there may be no xmlns attribute; just default to the parent's namespace.
    626     if (m_defaultNamespaceURI.isNull() && !parentElement->inDocument())
    627         m_defaultNamespaceURI = parentElement->namespaceURI();
    628 }
    629 
    630 XMLParserContext::~XMLParserContext()
    631 {
    632     if (m_context->myDoc)
    633         xmlFreeDoc(m_context->myDoc);
    634     xmlFreeParserCtxt(m_context);
    635 }
    636 
    637 XMLDocumentParser::~XMLDocumentParser()
    638 {
    639     // The XMLDocumentParser will always be detached before being destroyed.
    640     ASSERT(m_currentNodeStack.isEmpty());
    641     ASSERT(!m_currentNode);
    642 
    643     // FIXME: m_pendingScript handling should be moved into XMLDocumentParser.cpp!
    644     if (m_pendingScript)
    645         m_pendingScript->removeClient(this);
    646 }
    647 
    648 void XMLDocumentParser::doWrite(const String& parseString)
    649 {
    650     ASSERT(!isDetached());
    651     if (!m_context)
    652         initializeParserContext();
    653 
    654     // Protect the libxml context from deletion during a callback
    655     RefPtr<XMLParserContext> context = m_context;
    656 
    657     // libXML throws an error if you try to switch the encoding for an empty string.
    658     if (parseString.length()) {
    659         // JavaScript may cause the parser to detach during xmlParseChunk
    660         // keep this alive until this function is done.
    661         RefPtr<XMLDocumentParser> protect(this);
    662 
    663         switchToUTF16(context->context());
    664         XMLDocumentParserScope scope(document()->cachedResourceLoader());
    665         xmlParseChunk(context->context(), reinterpret_cast<const char*>(parseString.characters()), sizeof(UChar) * parseString.length(), 0);
    666 
    667         // JavaScript (which may be run under the xmlParseChunk callstack) may
    668         // cause the parser to be stopped or detached.
    669         if (isStopped())
    670             return;
    671     }
    672 
    673     // FIXME: Why is this here?  And why is it after we process the passed source?
    674     if (document()->decoder() && document()->decoder()->sawError()) {
    675         // If the decoder saw an error, report it as fatal (stops parsing)
    676         handleError(fatal, "Encoding error", context->context()->input->line, context->context()->input->col);
    677     }
    678 }
    679 
    680 static inline String toString(const xmlChar* string, size_t size)
    681 {
    682     return String::fromUTF8(reinterpret_cast<const char*>(string), size);
    683 }
    684 
    685 static inline String toString(const xmlChar* string)
    686 {
    687     return String::fromUTF8(reinterpret_cast<const char*>(string));
    688 }
    689 
    690 static inline AtomicString toAtomicString(const xmlChar* string, size_t size)
    691 {
    692     return AtomicString::fromUTF8(reinterpret_cast<const char*>(string), size);
    693 }
    694 
    695 static inline AtomicString toAtomicString(const xmlChar* string)
    696 {
    697     return AtomicString::fromUTF8(reinterpret_cast<const char*>(string));
    698 }
    699 
    700 struct _xmlSAX2Namespace {
    701     const xmlChar* prefix;
    702     const xmlChar* uri;
    703 };
    704 typedef struct _xmlSAX2Namespace xmlSAX2Namespace;
    705 
    706 static inline void handleElementNamespaces(Element* newElement, const xmlChar** libxmlNamespaces, int nb_namespaces, ExceptionCode& ec, FragmentScriptingPermission scriptingPermission)
    707 {
    708     xmlSAX2Namespace* namespaces = reinterpret_cast<xmlSAX2Namespace*>(libxmlNamespaces);
    709     for (int i = 0; i < nb_namespaces; i++) {
    710         AtomicString namespaceQName = xmlnsAtom;
    711         AtomicString namespaceURI = toAtomicString(namespaces[i].uri);
    712         if (namespaces[i].prefix)
    713             namespaceQName = "xmlns:" + toString(namespaces[i].prefix);
    714         newElement->setAttributeNS(XMLNSNames::xmlnsNamespaceURI, namespaceQName, namespaceURI, ec, scriptingPermission);
    715         if (ec) // exception setting attributes
    716             return;
    717     }
    718 }
    719 
    720 struct _xmlSAX2Attributes {
    721     const xmlChar* localname;
    722     const xmlChar* prefix;
    723     const xmlChar* uri;
    724     const xmlChar* value;
    725     const xmlChar* end;
    726 };
    727 typedef struct _xmlSAX2Attributes xmlSAX2Attributes;
    728 
    729 static inline void handleElementAttributes(Element* newElement, const xmlChar** libxmlAttributes, int nb_attributes, ExceptionCode& ec, FragmentScriptingPermission scriptingPermission)
    730 {
    731     xmlSAX2Attributes* attributes = reinterpret_cast<xmlSAX2Attributes*>(libxmlAttributes);
    732     for (int i = 0; i < nb_attributes; i++) {
    733         int valueLength = static_cast<int>(attributes[i].end - attributes[i].value);
    734         AtomicString attrValue = toAtomicString(attributes[i].value, valueLength);
    735         String attrPrefix = toString(attributes[i].prefix);
    736         AtomicString attrURI = attrPrefix.isEmpty() ? AtomicString() : toAtomicString(attributes[i].uri);
    737         AtomicString attrQName = attrPrefix.isEmpty() ? toAtomicString(attributes[i].localname) : AtomicString(attrPrefix + ":" + toString(attributes[i].localname));
    738 
    739         newElement->setAttributeNS(attrURI, attrQName, attrValue, ec, scriptingPermission);
    740         if (ec) // exception setting attributes
    741             return;
    742     }
    743 }
    744 
    745 void XMLDocumentParser::startElementNs(const xmlChar* xmlLocalName, const xmlChar* xmlPrefix, const xmlChar* xmlURI, int nb_namespaces,
    746                                   const xmlChar** libxmlNamespaces, int nb_attributes, int nb_defaulted, const xmlChar** libxmlAttributes)
    747 {
    748     if (isStopped())
    749         return;
    750 
    751     if (m_parserPaused) {
    752         m_pendingCallbacks->appendStartElementNSCallback(xmlLocalName, xmlPrefix, xmlURI, nb_namespaces, libxmlNamespaces,
    753                                                          nb_attributes, nb_defaulted, libxmlAttributes);
    754         return;
    755     }
    756 
    757 #if ENABLE(XHTMLMP)
    758     // check if the DOCTYPE Declaration of XHTMLMP document exists
    759     if (!m_hasDocTypeDeclaration && document()->isXHTMLMPDocument()) {
    760         handleError(fatal, "DOCTYPE declaration lost.", lineNumber(), columnNumber());
    761         return;
    762     }
    763 #endif
    764 
    765     exitText();
    766 
    767     AtomicString localName = toAtomicString(xmlLocalName);
    768     AtomicString uri = toAtomicString(xmlURI);
    769     AtomicString prefix = toAtomicString(xmlPrefix);
    770 
    771     if (m_parsingFragment && uri.isNull()) {
    772         if (!prefix.isNull())
    773             uri = m_prefixToNamespaceMap.get(prefix);
    774         else
    775             uri = m_defaultNamespaceURI;
    776     }
    777 
    778 #if ENABLE(XHTMLMP)
    779     if (!m_sawFirstElement && isXHTMLMPDocument()) {
    780         // As per the section 7.1 of OMA-WAP-XHTMLMP-V1_1-20061020-A.pdf,
    781         // we should make sure that the root element MUST be 'html' and
    782         // ensure the name of the default namespace on the root elment 'html'
    783         // MUST be 'http://www.w3.org/1999/xhtml'
    784         if (localName != HTMLNames::htmlTag.localName()) {
    785             handleError(fatal, "XHTMLMP document expects 'html' as root element.", lineNumber(), columnNumber());
    786             return;
    787         }
    788 
    789         if (uri.isNull()) {
    790             m_defaultNamespaceURI = HTMLNames::xhtmlNamespaceURI;
    791             uri = m_defaultNamespaceURI;
    792         }
    793     }
    794 #endif
    795 
    796     bool isFirstElement = !m_sawFirstElement;
    797     m_sawFirstElement = true;
    798 
    799     QualifiedName qName(prefix, localName, uri);
    800     RefPtr<Element> newElement = document()->createElement(qName, true);
    801     if (!newElement) {
    802         stopParsing();
    803         return;
    804     }
    805 
    806     ExceptionCode ec = 0;
    807     handleElementNamespaces(newElement.get(), libxmlNamespaces, nb_namespaces, ec, m_scriptingPermission);
    808     if (ec) {
    809         stopParsing();
    810         return;
    811     }
    812 
    813     handleElementAttributes(newElement.get(), libxmlAttributes, nb_attributes, ec, m_scriptingPermission);
    814     if (ec) {
    815         stopParsing();
    816         return;
    817     }
    818 
    819     newElement->beginParsingChildren();
    820 
    821     ScriptElement* scriptElement = toScriptElement(newElement.get());
    822     if (scriptElement)
    823         m_scriptStartPosition = textPositionOneBased();
    824 
    825     m_currentNode->deprecatedParserAddChild(newElement.get());
    826 
    827     pushCurrentNode(newElement.get());
    828     if (m_view && !newElement->attached())
    829         newElement->attach();
    830 
    831 #if ENABLE(OFFLINE_WEB_APPLICATIONS)
    832     if (newElement->hasTagName(HTMLNames::htmlTag))
    833         static_cast<HTMLHtmlElement*>(newElement.get())->insertedByParser();
    834 #endif
    835 
    836     if (!m_parsingFragment && isFirstElement && document()->frame())
    837         document()->frame()->loader()->dispatchDocumentElementAvailable();
    838 }
    839 
    840 void XMLDocumentParser::endElementNs()
    841 {
    842     if (isStopped())
    843         return;
    844 
    845     if (m_parserPaused) {
    846         m_pendingCallbacks->appendEndElementNSCallback();
    847         return;
    848     }
    849 
    850     // JavaScript can detach the parser.  Make sure this is not released
    851     // before the end of this method.
    852     RefPtr<XMLDocumentParser> protect(this);
    853 
    854     exitText();
    855 
    856     RefPtr<Node> n = m_currentNode;
    857     n->finishParsingChildren();
    858 
    859     if (m_scriptingPermission == FragmentScriptingNotAllowed && n->isElementNode() && toScriptElement(static_cast<Element*>(n.get()))) {
    860         popCurrentNode();
    861         ExceptionCode ec;
    862         n->remove(ec);
    863         return;
    864     }
    865 
    866     if (!n->isElementNode() || !m_view) {
    867         popCurrentNode();
    868         return;
    869     }
    870 
    871     Element* element = static_cast<Element*>(n.get());
    872 
    873     // The element's parent may have already been removed from document.
    874     // Parsing continues in this case, but scripts aren't executed.
    875     if (!element->inDocument()) {
    876         popCurrentNode();
    877         return;
    878     }
    879 
    880     ScriptElement* scriptElement = toScriptElement(element);
    881     if (!scriptElement) {
    882         popCurrentNode();
    883         return;
    884     }
    885 
    886     // Don't load external scripts for standalone documents (for now).
    887     ASSERT(!m_pendingScript);
    888     m_requestingScript = true;
    889 
    890     bool successfullyPrepared = scriptElement->prepareScript(m_scriptStartPosition, ScriptElement::AllowLegacyTypeInTypeAttribute);
    891     if (!successfullyPrepared) {
    892 #if ENABLE(XHTMLMP)
    893         if (!scriptElement->isScriptTypeSupported(ScriptElement::AllowLegacyTypeInTypeAttribute))
    894             document()->setShouldProcessNoscriptElement(true);
    895 #endif
    896     } else {
    897         // FIXME: Script execution should be shared between
    898         // the libxml2 and Qt XMLDocumentParser implementations.
    899 
    900         if (scriptElement->readyToBeParserExecuted())
    901             scriptElement->executeScript(ScriptSourceCode(scriptElement->scriptContent(), document()->url(), m_scriptStartPosition));
    902         else if (scriptElement->willBeParserExecuted()) {
    903             m_pendingScript = scriptElement->cachedScript();
    904             m_scriptElement = element;
    905             m_pendingScript->addClient(this);
    906 
    907             // m_pendingScript will be 0 if script was already loaded and addClient() executed it.
    908             if (m_pendingScript)
    909                 pauseParsing();
    910         } else
    911             m_scriptElement = 0;
    912 
    913         // JavaScript may have detached the parser
    914         if (isDetached())
    915             return;
    916     }
    917     m_requestingScript = false;
    918     popCurrentNode();
    919 }
    920 
    921 void XMLDocumentParser::characters(const xmlChar* s, int len)
    922 {
    923     if (isStopped())
    924         return;
    925 
    926     if (m_parserPaused) {
    927         m_pendingCallbacks->appendCharactersCallback(s, len);
    928         return;
    929     }
    930 
    931     if (!m_currentNode->isTextNode())
    932         enterText();
    933     m_bufferedText.append(s, len);
    934 }
    935 
    936 void XMLDocumentParser::error(ErrorType type, const char* message, va_list args)
    937 {
    938     if (isStopped())
    939         return;
    940 
    941 #if COMPILER(MSVC) || COMPILER(RVCT)
    942     char m[1024];
    943     vsnprintf(m, sizeof(m) - 1, message, args);
    944 #else
    945     char* m;
    946     if (vasprintf(&m, message, args) == -1)
    947         return;
    948 #endif
    949 
    950     if (m_parserPaused)
    951         m_pendingCallbacks->appendErrorCallback(type, reinterpret_cast<const xmlChar*>(m), lineNumber(), columnNumber());
    952     else
    953         handleError(type, m, lineNumber(), columnNumber());
    954 
    955 #if !COMPILER(MSVC) && !COMPILER(RVCT)
    956     free(m);
    957 #endif
    958 }
    959 
    960 void XMLDocumentParser::processingInstruction(const xmlChar* target, const xmlChar* data)
    961 {
    962     if (isStopped())
    963         return;
    964 
    965     if (m_parserPaused) {
    966         m_pendingCallbacks->appendProcessingInstructionCallback(target, data);
    967         return;
    968     }
    969 
    970     exitText();
    971 
    972     // ### handle exceptions
    973     ExceptionCode ec = 0;
    974     RefPtr<ProcessingInstruction> pi = document()->createProcessingInstruction(
    975         toString(target), toString(data), ec);
    976     if (ec)
    977         return;
    978 
    979     pi->setCreatedByParser(true);
    980 
    981     m_currentNode->deprecatedParserAddChild(pi.get());
    982     if (m_view && !pi->attached())
    983         pi->attach();
    984 
    985     pi->finishParsingChildren();
    986 
    987     if (pi->isCSS())
    988         m_sawCSS = true;
    989 #if ENABLE(XSLT)
    990     m_sawXSLTransform = !m_sawFirstElement && pi->isXSL();
    991     if (m_sawXSLTransform && !document()->transformSourceDocument())
    992         stopParsing();
    993 #endif
    994 }
    995 
    996 void XMLDocumentParser::cdataBlock(const xmlChar* s, int len)
    997 {
    998     if (isStopped())
    999         return;
   1000 
   1001     if (m_parserPaused) {
   1002         m_pendingCallbacks->appendCDATABlockCallback(s, len);
   1003         return;
   1004     }
   1005 
   1006     exitText();
   1007 
   1008     RefPtr<Node> newNode = CDATASection::create(document(), toString(s, len));
   1009     m_currentNode->deprecatedParserAddChild(newNode.get());
   1010     if (m_view && !newNode->attached())
   1011         newNode->attach();
   1012 }
   1013 
   1014 void XMLDocumentParser::comment(const xmlChar* s)
   1015 {
   1016     if (isStopped())
   1017         return;
   1018 
   1019     if (m_parserPaused) {
   1020         m_pendingCallbacks->appendCommentCallback(s);
   1021         return;
   1022     }
   1023 
   1024     exitText();
   1025 
   1026     RefPtr<Node> newNode = Comment::create(document(), toString(s));
   1027     m_currentNode->deprecatedParserAddChild(newNode.get());
   1028     if (m_view && !newNode->attached())
   1029         newNode->attach();
   1030 }
   1031 
   1032 void XMLDocumentParser::startDocument(const xmlChar* version, const xmlChar* encoding, int standalone)
   1033 {
   1034     ExceptionCode ec = 0;
   1035 
   1036     if (version)
   1037         document()->setXMLVersion(toString(version), ec);
   1038     document()->setXMLStandalone(standalone == 1, ec); // possible values are 0, 1, and -1
   1039     if (encoding)
   1040         document()->setXMLEncoding(toString(encoding));
   1041 }
   1042 
   1043 void XMLDocumentParser::endDocument()
   1044 {
   1045     exitText();
   1046 #if ENABLE(XHTMLMP)
   1047     m_hasDocTypeDeclaration = false;
   1048 #endif
   1049 }
   1050 
   1051 void XMLDocumentParser::internalSubset(const xmlChar* name, const xmlChar* externalID, const xmlChar* systemID)
   1052 {
   1053     if (isStopped())
   1054         return;
   1055 
   1056     if (m_parserPaused) {
   1057         m_pendingCallbacks->appendInternalSubsetCallback(name, externalID, systemID);
   1058         return;
   1059     }
   1060 
   1061     if (document()) {
   1062 #if ENABLE(WML) || ENABLE(XHTMLMP)
   1063         String extId = toString(externalID);
   1064 #endif
   1065 #if ENABLE(WML)
   1066         if (isWMLDocument()
   1067             && extId != "-//WAPFORUM//DTD WML 1.3//EN"
   1068             && extId != "-//WAPFORUM//DTD WML 1.2//EN"
   1069             && extId != "-//WAPFORUM//DTD WML 1.1//EN"
   1070             && extId != "-//WAPFORUM//DTD WML 1.0//EN")
   1071             handleError(fatal, "Invalid DTD Public ID", lineNumber(), columnNumber());
   1072 #endif
   1073 #if ENABLE(XHTMLMP)
   1074         String dtdName = toString(name);
   1075         if (extId == "-//WAPFORUM//DTD XHTML Mobile 1.0//EN"
   1076             || extId == "-//WAPFORUM//DTD XHTML Mobile 1.1//EN") {
   1077             if (dtdName != HTMLNames::htmlTag.localName()) {
   1078                 handleError(fatal, "Invalid DOCTYPE declaration, expected 'html' as root element.", lineNumber(), columnNumber());
   1079                 return;
   1080             }
   1081 
   1082             if (document()->isXHTMLMPDocument())
   1083                 setIsXHTMLMPDocument(true);
   1084             else
   1085                 setIsXHTMLDocument(true);
   1086 
   1087             m_hasDocTypeDeclaration = true;
   1088         }
   1089 #endif
   1090 
   1091         document()->parserAddChild(DocumentType::create(document(), toString(name), toString(externalID), toString(systemID)));
   1092     }
   1093 }
   1094 
   1095 static inline XMLDocumentParser* getParser(void* closure)
   1096 {
   1097     xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure);
   1098     return static_cast<XMLDocumentParser*>(ctxt->_private);
   1099 }
   1100 
   1101 // This is a hack around http://bugzilla.gnome.org/show_bug.cgi?id=159219
   1102 // Otherwise libxml seems to call all the SAX callbacks twice for any replaced entity.
   1103 static inline bool hackAroundLibXMLEntityBug(void* closure)
   1104 {
   1105 #if LIBXML_VERSION >= 20627
   1106     UNUSED_PARAM(closure);
   1107 
   1108     // This bug has been fixed in libxml 2.6.27.
   1109     return false;
   1110 #else
   1111     return static_cast<xmlParserCtxtPtr>(closure)->node;
   1112 #endif
   1113 }
   1114 
   1115 static void startElementNsHandler(void* closure, const xmlChar* localname, const xmlChar* prefix, const xmlChar* uri, int nb_namespaces, const xmlChar** namespaces, int nb_attributes, int nb_defaulted, const xmlChar** libxmlAttributes)
   1116 {
   1117     if (hackAroundLibXMLEntityBug(closure))
   1118         return;
   1119 
   1120     getParser(closure)->startElementNs(localname, prefix, uri, nb_namespaces, namespaces, nb_attributes, nb_defaulted, libxmlAttributes);
   1121 }
   1122 
   1123 static void endElementNsHandler(void* closure, const xmlChar*, const xmlChar*, const xmlChar*)
   1124 {
   1125     if (hackAroundLibXMLEntityBug(closure))
   1126         return;
   1127 
   1128     getParser(closure)->endElementNs();
   1129 }
   1130 
   1131 static void charactersHandler(void* closure, const xmlChar* s, int len)
   1132 {
   1133     if (hackAroundLibXMLEntityBug(closure))
   1134         return;
   1135 
   1136     getParser(closure)->characters(s, len);
   1137 }
   1138 
   1139 static void processingInstructionHandler(void* closure, const xmlChar* target, const xmlChar* data)
   1140 {
   1141     if (hackAroundLibXMLEntityBug(closure))
   1142         return;
   1143 
   1144     getParser(closure)->processingInstruction(target, data);
   1145 }
   1146 
   1147 static void cdataBlockHandler(void* closure, const xmlChar* s, int len)
   1148 {
   1149     if (hackAroundLibXMLEntityBug(closure))
   1150         return;
   1151 
   1152     getParser(closure)->cdataBlock(s, len);
   1153 }
   1154 
   1155 static void commentHandler(void* closure, const xmlChar* comment)
   1156 {
   1157     if (hackAroundLibXMLEntityBug(closure))
   1158         return;
   1159 
   1160     getParser(closure)->comment(comment);
   1161 }
   1162 
   1163 WTF_ATTRIBUTE_PRINTF(2, 3)
   1164 static void warningHandler(void* closure, const char* message, ...)
   1165 {
   1166     va_list args;
   1167     va_start(args, message);
   1168     getParser(closure)->error(XMLDocumentParser::warning, message, args);
   1169     va_end(args);
   1170 }
   1171 
   1172 WTF_ATTRIBUTE_PRINTF(2, 3)
   1173 static void fatalErrorHandler(void* closure, const char* message, ...)
   1174 {
   1175     va_list args;
   1176     va_start(args, message);
   1177     getParser(closure)->error(XMLDocumentParser::fatal, message, args);
   1178     va_end(args);
   1179 }
   1180 
   1181 WTF_ATTRIBUTE_PRINTF(2, 3)
   1182 static void normalErrorHandler(void* closure, const char* message, ...)
   1183 {
   1184     va_list args;
   1185     va_start(args, message);
   1186     getParser(closure)->error(XMLDocumentParser::nonFatal, message, args);
   1187     va_end(args);
   1188 }
   1189 
   1190 // Using a static entity and marking it XML_INTERNAL_PREDEFINED_ENTITY is
   1191 // a hack to avoid malloc/free. Using a global variable like this could cause trouble
   1192 // if libxml implementation details were to change
   1193 static xmlChar sharedXHTMLEntityResult[5] = {0, 0, 0, 0, 0};
   1194 
   1195 static xmlEntityPtr sharedXHTMLEntity()
   1196 {
   1197     static xmlEntity entity;
   1198     if (!entity.type) {
   1199         entity.type = XML_ENTITY_DECL;
   1200         entity.orig = sharedXHTMLEntityResult;
   1201         entity.content = sharedXHTMLEntityResult;
   1202         entity.etype = XML_INTERNAL_PREDEFINED_ENTITY;
   1203     }
   1204     return &entity;
   1205 }
   1206 
   1207 static xmlEntityPtr getXHTMLEntity(const xmlChar* name)
   1208 {
   1209     UChar c = decodeNamedEntity(reinterpret_cast<const char*>(name));
   1210     if (!c)
   1211         return 0;
   1212 
   1213     CString value = String(&c, 1).utf8();
   1214     ASSERT(value.length() < 5);
   1215     xmlEntityPtr entity = sharedXHTMLEntity();
   1216     entity->length = value.length();
   1217     entity->name = name;
   1218     memcpy(sharedXHTMLEntityResult, value.data(), entity->length + 1);
   1219 
   1220     return entity;
   1221 }
   1222 
   1223 static xmlEntityPtr getEntityHandler(void* closure, const xmlChar* name)
   1224 {
   1225     xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure);
   1226     xmlEntityPtr ent = xmlGetPredefinedEntity(name);
   1227     if (ent) {
   1228         ent->etype = XML_INTERNAL_PREDEFINED_ENTITY;
   1229         return ent;
   1230     }
   1231 
   1232     ent = xmlGetDocEntity(ctxt->myDoc, name);
   1233     if (!ent && (getParser(closure)->isXHTMLDocument()
   1234 #if ENABLE(XHTMLMP)
   1235                  || getParser(closure)->isXHTMLMPDocument()
   1236 #endif
   1237 #if ENABLE(WML)
   1238                  || getParser(closure)->isWMLDocument()
   1239 #endif
   1240        )) {
   1241         ent = getXHTMLEntity(name);
   1242         if (ent)
   1243             ent->etype = XML_INTERNAL_GENERAL_ENTITY;
   1244     }
   1245 
   1246     return ent;
   1247 }
   1248 
   1249 static void startDocumentHandler(void* closure)
   1250 {
   1251     xmlParserCtxt* ctxt = static_cast<xmlParserCtxt*>(closure);
   1252     switchToUTF16(ctxt);
   1253     getParser(closure)->startDocument(ctxt->version, ctxt->encoding, ctxt->standalone);
   1254     xmlSAX2StartDocument(closure);
   1255 }
   1256 
   1257 static void endDocumentHandler(void* closure)
   1258 {
   1259     getParser(closure)->endDocument();
   1260     xmlSAX2EndDocument(closure);
   1261 }
   1262 
   1263 static void internalSubsetHandler(void* closure, const xmlChar* name, const xmlChar* externalID, const xmlChar* systemID)
   1264 {
   1265     getParser(closure)->internalSubset(name, externalID, systemID);
   1266     xmlSAX2InternalSubset(closure, name, externalID, systemID);
   1267 }
   1268 
   1269 static void externalSubsetHandler(void* closure, const xmlChar*, const xmlChar* externalId, const xmlChar*)
   1270 {
   1271     String extId = toString(externalId);
   1272     if ((extId == "-//W3C//DTD XHTML 1.0 Transitional//EN")
   1273         || (extId == "-//W3C//DTD XHTML 1.1//EN")
   1274         || (extId == "-//W3C//DTD XHTML 1.0 Strict//EN")
   1275         || (extId == "-//W3C//DTD XHTML 1.0 Frameset//EN")
   1276         || (extId == "-//W3C//DTD XHTML Basic 1.0//EN")
   1277         || (extId == "-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN")
   1278         || (extId == "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN")
   1279         || (extId == "-//WAPFORUM//DTD XHTML Mobile 1.0//EN")
   1280        )
   1281         getParser(closure)->setIsXHTMLDocument(true); // controls if we replace entities or not.
   1282 }
   1283 
   1284 static void ignorableWhitespaceHandler(void*, const xmlChar*, int)
   1285 {
   1286     // nothing to do, but we need this to work around a crasher
   1287     // http://bugzilla.gnome.org/show_bug.cgi?id=172255
   1288     // http://bugs.webkit.org/show_bug.cgi?id=5792
   1289 }
   1290 
   1291 void XMLDocumentParser::initializeParserContext(const char* chunk)
   1292 {
   1293     xmlSAXHandler sax;
   1294     memset(&sax, 0, sizeof(sax));
   1295 
   1296     sax.error = normalErrorHandler;
   1297     sax.fatalError = fatalErrorHandler;
   1298     sax.characters = charactersHandler;
   1299     sax.processingInstruction = processingInstructionHandler;
   1300     sax.cdataBlock = cdataBlockHandler;
   1301     sax.comment = commentHandler;
   1302     sax.warning = warningHandler;
   1303     sax.startElementNs = startElementNsHandler;
   1304     sax.endElementNs = endElementNsHandler;
   1305     sax.getEntity = getEntityHandler;
   1306     sax.startDocument = startDocumentHandler;
   1307     sax.endDocument = endDocumentHandler;
   1308     sax.internalSubset = internalSubsetHandler;
   1309     sax.externalSubset = externalSubsetHandler;
   1310     sax.ignorableWhitespace = ignorableWhitespaceHandler;
   1311     sax.entityDecl = xmlSAX2EntityDecl;
   1312     sax.initialized = XML_SAX2_MAGIC;
   1313     DocumentParser::startParsing();
   1314     m_sawError = false;
   1315     m_sawCSS = false;
   1316     m_sawXSLTransform = false;
   1317     m_sawFirstElement = false;
   1318 
   1319     XMLDocumentParserScope scope(document()->cachedResourceLoader());
   1320     if (m_parsingFragment)
   1321         m_context = XMLParserContext::createMemoryParser(&sax, this, chunk);
   1322     else {
   1323         ASSERT(!chunk);
   1324         m_context = XMLParserContext::createStringParser(&sax, this);
   1325     }
   1326 }
   1327 
   1328 void XMLDocumentParser::doEnd()
   1329 {
   1330     if (!isStopped()) {
   1331         if (m_context) {
   1332             // Tell libxml we're done.
   1333             {
   1334                 XMLDocumentParserScope scope(document()->cachedResourceLoader());
   1335                 xmlParseChunk(context(), 0, 0, 1);
   1336             }
   1337 
   1338             m_context = 0;
   1339         }
   1340     }
   1341 
   1342 #if ENABLE(XSLT)
   1343     XMLTreeViewer xmlTreeViewer(document());
   1344     bool xmlViewerMode = !m_sawError && !m_sawCSS && !m_sawXSLTransform && xmlTreeViewer.hasNoStyleInformation();
   1345     if (xmlViewerMode)
   1346         xmlTreeViewer.transformDocumentToTreeView();
   1347 
   1348     if (m_sawXSLTransform) {
   1349         void* doc = xmlDocPtrForString(document()->cachedResourceLoader(), m_originalSourceForTransform, document()->url().string());
   1350         document()->setTransformSource(new TransformSource(doc));
   1351 
   1352         document()->setParsing(false); // Make the document think it's done, so it will apply XSL stylesheets.
   1353         document()->styleSelectorChanged(RecalcStyleImmediately);
   1354         document()->setParsing(true);
   1355 
   1356         DocumentParser::stopParsing();
   1357     }
   1358 #endif
   1359 }
   1360 
   1361 #if ENABLE(XSLT)
   1362 void* xmlDocPtrForString(CachedResourceLoader* cachedResourceLoader, const String& source, const String& url)
   1363 {
   1364     if (source.isEmpty())
   1365         return 0;
   1366 
   1367     // Parse in a single chunk into an xmlDocPtr
   1368     // FIXME: Hook up error handlers so that a failure to parse the main document results in
   1369     // good error messages.
   1370     const UChar BOM = 0xFEFF;
   1371     const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char*>(&BOM);
   1372 
   1373     XMLDocumentParserScope scope(cachedResourceLoader, errorFunc, 0);
   1374     xmlDocPtr sourceDoc = xmlReadMemory(reinterpret_cast<const char*>(source.characters()),
   1375                                         source.length() * sizeof(UChar),
   1376                                         url.latin1().data(),
   1377                                         BOMHighByte == 0xFF ? "UTF-16LE" : "UTF-16BE",
   1378                                         XSLT_PARSE_OPTIONS);
   1379     return sourceDoc;
   1380 }
   1381 #endif
   1382 
   1383 int XMLDocumentParser::lineNumber() const
   1384 {
   1385     // FIXME: The implementation probably returns 1-based int, but method should return 0-based.
   1386     return context() ? context()->input->line : 1;
   1387 }
   1388 
   1389 int XMLDocumentParser::columnNumber() const
   1390 {
   1391     // FIXME: The implementation probably returns 1-based int, but method should return 0-based.
   1392     return context() ? context()->input->col : 1;
   1393 }
   1394 
   1395 TextPosition0 XMLDocumentParser::textPosition() const
   1396 {
   1397     xmlParserCtxtPtr context = this->context();
   1398     if (!context)
   1399         return TextPosition0::minimumPosition();
   1400     // FIXME: The context probably contains 1-based numbers, but we treat them as 0-based,
   1401     //        to be consistent with fixme's in lineNumber() and columnNumber
   1402     //        methods.
   1403     return TextPosition0(WTF::ZeroBasedNumber::fromZeroBasedInt(context->input->line),
   1404         WTF::ZeroBasedNumber::fromZeroBasedInt(context->input->col));
   1405 }
   1406 
   1407 // This method has a correct implementation, in contrast to textPosition() method.
   1408 // It should replace textPosition().
   1409 TextPosition1 XMLDocumentParser::textPositionOneBased() const
   1410 {
   1411     xmlParserCtxtPtr context = this->context();
   1412     if (!context)
   1413         return TextPosition1::minimumPosition();
   1414     return TextPosition1(WTF::OneBasedNumber::fromOneBasedInt(context->input->line),
   1415         WTF::OneBasedNumber::fromOneBasedInt(context->input->col));
   1416 }
   1417 
   1418 void XMLDocumentParser::stopParsing()
   1419 {
   1420     DocumentParser::stopParsing();
   1421     if (context())
   1422         xmlStopParser(context());
   1423 }
   1424 
   1425 void XMLDocumentParser::resumeParsing()
   1426 {
   1427     ASSERT(!isDetached());
   1428     ASSERT(m_parserPaused);
   1429 
   1430     m_parserPaused = false;
   1431 
   1432     // First, execute any pending callbacks
   1433     while (!m_pendingCallbacks->isEmpty()) {
   1434         m_pendingCallbacks->callAndRemoveFirstCallback(this);
   1435 
   1436         // A callback paused the parser
   1437         if (m_parserPaused)
   1438             return;
   1439     }
   1440 
   1441     // Then, write any pending data
   1442     SegmentedString rest = m_pendingSrc;
   1443     m_pendingSrc.clear();
   1444     append(rest);
   1445 
   1446     // Finally, if finish() has been called and write() didn't result
   1447     // in any further callbacks being queued, call end()
   1448     if (m_finishCalled && m_pendingCallbacks->isEmpty())
   1449         end();
   1450 }
   1451 
   1452 bool XMLDocumentParser::appendFragmentSource(const String& chunk)
   1453 {
   1454     ASSERT(!m_context);
   1455     ASSERT(m_parsingFragment);
   1456 
   1457     CString chunkAsUtf8 = chunk.utf8();
   1458     initializeParserContext(chunkAsUtf8.data());
   1459     xmlParseContent(context());
   1460     endDocument(); // Close any open text nodes.
   1461 
   1462     // FIXME: If this code is actually needed, it should probably move to finish()
   1463     // XMLDocumentParserQt has a similar check (m_stream.error() == QXmlStreamReader::PrematureEndOfDocumentError) in doEnd().
   1464     // Check if all the chunk has been processed.
   1465     long bytesProcessed = xmlByteConsumed(context());
   1466     if (bytesProcessed == -1 || ((unsigned long)bytesProcessed) != chunkAsUtf8.length()) {
   1467         // FIXME: I don't believe we can hit this case without also having seen an error.
   1468         // If we hit this ASSERT, we've found a test case which demonstrates the need for this code.
   1469         ASSERT(m_sawError);
   1470         return false;
   1471     }
   1472 
   1473     // No error if the chunk is well formed or it is not but we have no error.
   1474     return context()->wellFormed || !xmlCtxtGetLastError(context());
   1475 }
   1476 
   1477 // --------------------------------
   1478 
   1479 struct AttributeParseState {
   1480     HashMap<String, String> attributes;
   1481     bool gotAttributes;
   1482 };
   1483 
   1484 static void attributesStartElementNsHandler(void* closure, const xmlChar* xmlLocalName, const xmlChar* /*xmlPrefix*/,
   1485                                             const xmlChar* /*xmlURI*/, int /*nb_namespaces*/, const xmlChar** /*namespaces*/,
   1486                                             int nb_attributes, int /*nb_defaulted*/, const xmlChar** libxmlAttributes)
   1487 {
   1488     if (strcmp(reinterpret_cast<const char*>(xmlLocalName), "attrs") != 0)
   1489         return;
   1490 
   1491     xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure);
   1492     AttributeParseState* state = static_cast<AttributeParseState*>(ctxt->_private);
   1493 
   1494     state->gotAttributes = true;
   1495 
   1496     xmlSAX2Attributes* attributes = reinterpret_cast<xmlSAX2Attributes*>(libxmlAttributes);
   1497     for (int i = 0; i < nb_attributes; i++) {
   1498         String attrLocalName = toString(attributes[i].localname);
   1499         int valueLength = (int) (attributes[i].end - attributes[i].value);
   1500         String attrValue = toString(attributes[i].value, valueLength);
   1501         String attrPrefix = toString(attributes[i].prefix);
   1502         String attrQName = attrPrefix.isEmpty() ? attrLocalName : attrPrefix + ":" + attrLocalName;
   1503 
   1504         state->attributes.set(attrQName, attrValue);
   1505     }
   1506 }
   1507 
   1508 HashMap<String, String> parseAttributes(const String& string, bool& attrsOK)
   1509 {
   1510     AttributeParseState state;
   1511     state.gotAttributes = false;
   1512 
   1513     xmlSAXHandler sax;
   1514     memset(&sax, 0, sizeof(sax));
   1515     sax.startElementNs = attributesStartElementNsHandler;
   1516     sax.initialized = XML_SAX2_MAGIC;
   1517     RefPtr<XMLParserContext> parser = XMLParserContext::createStringParser(&sax, &state);
   1518     String parseString = "<?xml version=\"1.0\"?><attrs " + string + " />";
   1519     xmlParseChunk(parser->context(), reinterpret_cast<const char*>(parseString.characters()), parseString.length() * sizeof(UChar), 1);
   1520     attrsOK = state.gotAttributes;
   1521     return state.attributes;
   1522 }
   1523 
   1524 }
   1525