Home | History | Annotate | Download | only in dom
      1 /*
      2  * Copyright (C) 2000 Peter Kelly (pmk (at) post.com)
      3  * Copyright (C) 2005, 2006, 2008 Apple Inc. All rights reserved.
      4  * Copyright (C) 2006 Alexey Proskuryakov (ap (at) webkit.org)
      5  * Copyright (C) 2007 Samuel Weinig (sam (at) webkit.org)
      6  * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies)
      7  * Copyright (C) 2008 Holger Hans Peter Freyther
      8  * Copyright (C) 2008, 2009 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/)
      9  *
     10  * This library is free software; you can redistribute it and/or
     11  * modify it under the terms of the GNU Library General Public
     12  * License as published by the Free Software Foundation; either
     13  * version 2 of the License, or (at your option) any later version.
     14  *
     15  * This library is distributed in the hope that it will be useful,
     16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     18  * Library General Public License for more details.
     19  *
     20  * You should have received a copy of the GNU Library General Public License
     21  * along with this library; see the file COPYING.LIB.  If not, write to
     22  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     23  * Boston, MA 02110-1301, USA.
     24  */
     25 
     26 #include "config.h"
     27 #include "XMLTokenizer.h"
     28 
     29 #include "CDATASection.h"
     30 #include "CString.h"
     31 #include "CachedScript.h"
     32 #include "Comment.h"
     33 #include "DocLoader.h"
     34 #include "Document.h"
     35 #include "DocumentFragment.h"
     36 #include "DocumentType.h"
     37 #include "Frame.h"
     38 #include "FrameLoader.h"
     39 #include "FrameView.h"
     40 #include "HTMLLinkElement.h"
     41 #include "HTMLStyleElement.h"
     42 #include "HTMLTokenizer.h" // for decodeNamedEntity
     43 #include "ProcessingInstruction.h"
     44 #include "ResourceError.h"
     45 #include "ResourceHandle.h"
     46 #include "ResourceRequest.h"
     47 #include "ResourceResponse.h"
     48 #include "ScriptController.h"
     49 #include "ScriptElement.h"
     50 #include "ScriptSourceCode.h"
     51 #include "ScriptValue.h"
     52 #include "TextResourceDecoder.h"
     53 #include "TransformSource.h"
     54 #include "XMLNSNames.h"
     55 #include "XMLTokenizerScope.h"
     56 #include <libxml/parser.h>
     57 #include <libxml/parserInternals.h>
     58 #include <wtf/Platform.h>
     59 #include <wtf/StringExtras.h>
     60 #include <wtf/Threading.h>
     61 #include <wtf/UnusedParam.h>
     62 #include <wtf/Vector.h>
     63 
     64 #if ENABLE(XSLT)
     65 #include <libxslt/xslt.h>
     66 #endif
     67 
     68 #if ENABLE(XHTMLMP)
     69 #include "HTMLNames.h"
     70 #include "HTMLScriptElement.h"
     71 #endif
     72 
     73 using namespace std;
     74 
     75 namespace WebCore {
     76 
     77 class PendingCallbacks : public Noncopyable {
     78 public:
     79     ~PendingCallbacks()
     80     {
     81         deleteAllValues(m_callbacks);
     82     }
     83 
     84     void appendStartElementNSCallback(const xmlChar* xmlLocalName, const xmlChar* xmlPrefix, const xmlChar* xmlURI, int nb_namespaces,
     85                                       const xmlChar** namespaces, int nb_attributes, int nb_defaulted, const xmlChar** attributes)
     86     {
     87         PendingStartElementNSCallback* callback = new PendingStartElementNSCallback;
     88 
     89         callback->xmlLocalName = xmlStrdup(xmlLocalName);
     90         callback->xmlPrefix = xmlStrdup(xmlPrefix);
     91         callback->xmlURI = xmlStrdup(xmlURI);
     92         callback->nb_namespaces = nb_namespaces;
     93         callback->namespaces = static_cast<xmlChar**>(xmlMalloc(sizeof(xmlChar*) * nb_namespaces * 2));
     94         for (int i = 0; i < nb_namespaces * 2 ; i++)
     95             callback->namespaces[i] = xmlStrdup(namespaces[i]);
     96         callback->nb_attributes = nb_attributes;
     97         callback->nb_defaulted = nb_defaulted;
     98         callback->attributes = static_cast<xmlChar**>(xmlMalloc(sizeof(xmlChar*) * nb_attributes * 5));
     99         for (int i = 0; i < nb_attributes; i++) {
    100             // Each attribute has 5 elements in the array:
    101             // name, prefix, uri, value and an end pointer.
    102 
    103             for (int j = 0; j < 3; j++)
    104                 callback->attributes[i * 5 + j] = xmlStrdup(attributes[i * 5 + j]);
    105 
    106             int len = attributes[i * 5 + 4] - attributes[i * 5 + 3];
    107 
    108             callback->attributes[i * 5 + 3] = xmlStrndup(attributes[i * 5 + 3], len);
    109             callback->attributes[i * 5 + 4] = callback->attributes[i * 5 + 3] + len;
    110         }
    111 
    112         m_callbacks.append(callback);
    113     }
    114 
    115     void appendEndElementNSCallback()
    116     {
    117         PendingEndElementNSCallback* callback = new PendingEndElementNSCallback;
    118 
    119         m_callbacks.append(callback);
    120     }
    121 
    122     void appendCharactersCallback(const xmlChar* s, int len)
    123     {
    124         PendingCharactersCallback* callback = new PendingCharactersCallback;
    125 
    126         callback->s = xmlStrndup(s, len);
    127         callback->len = len;
    128 
    129         m_callbacks.append(callback);
    130     }
    131 
    132     void appendProcessingInstructionCallback(const xmlChar* target, const xmlChar* data)
    133     {
    134         PendingProcessingInstructionCallback* callback = new PendingProcessingInstructionCallback;
    135 
    136         callback->target = xmlStrdup(target);
    137         callback->data = xmlStrdup(data);
    138 
    139         m_callbacks.append(callback);
    140     }
    141 
    142     void appendCDATABlockCallback(const xmlChar* s, int len)
    143     {
    144         PendingCDATABlockCallback* callback = new PendingCDATABlockCallback;
    145 
    146         callback->s = xmlStrndup(s, len);
    147         callback->len = len;
    148 
    149         m_callbacks.append(callback);
    150     }
    151 
    152     void appendCommentCallback(const xmlChar* s)
    153     {
    154         PendingCommentCallback* callback = new PendingCommentCallback;
    155 
    156         callback->s = xmlStrdup(s);
    157 
    158         m_callbacks.append(callback);
    159     }
    160 
    161     void appendInternalSubsetCallback(const xmlChar* name, const xmlChar* externalID, const xmlChar* systemID)
    162     {
    163         PendingInternalSubsetCallback* callback = new PendingInternalSubsetCallback;
    164 
    165         callback->name = xmlStrdup(name);
    166         callback->externalID = xmlStrdup(externalID);
    167         callback->systemID = xmlStrdup(systemID);
    168 
    169         m_callbacks.append(callback);
    170     }
    171 
    172     void appendErrorCallback(XMLTokenizer::ErrorType type, const xmlChar* message, int lineNumber, int columnNumber)
    173     {
    174         PendingErrorCallback* callback = new PendingErrorCallback;
    175 
    176         callback->message = xmlStrdup(message);
    177         callback->type = type;
    178         callback->lineNumber = lineNumber;
    179         callback->columnNumber = columnNumber;
    180 
    181         m_callbacks.append(callback);
    182     }
    183 
    184     void callAndRemoveFirstCallback(XMLTokenizer* tokenizer)
    185     {
    186         OwnPtr<PendingCallback> callback(m_callbacks.first());
    187         m_callbacks.removeFirst();
    188         callback->call(tokenizer);
    189     }
    190 
    191     bool isEmpty() const { return m_callbacks.isEmpty(); }
    192 
    193 private:
    194     struct PendingCallback {
    195         virtual ~PendingCallback() { }
    196         virtual void call(XMLTokenizer* tokenizer) = 0;
    197     };
    198 
    199     struct PendingStartElementNSCallback : public PendingCallback {
    200         virtual ~PendingStartElementNSCallback()
    201         {
    202             xmlFree(xmlLocalName);
    203             xmlFree(xmlPrefix);
    204             xmlFree(xmlURI);
    205             for (int i = 0; i < nb_namespaces * 2; i++)
    206                 xmlFree(namespaces[i]);
    207             xmlFree(namespaces);
    208             for (int i = 0; i < nb_attributes; i++)
    209                 for (int j = 0; j < 4; j++)
    210                     xmlFree(attributes[i * 5 + j]);
    211             xmlFree(attributes);
    212         }
    213 
    214         virtual void call(XMLTokenizer* tokenizer)
    215         {
    216             tokenizer->startElementNs(xmlLocalName, xmlPrefix, xmlURI,
    217                                       nb_namespaces, const_cast<const xmlChar**>(namespaces),
    218                                       nb_attributes, nb_defaulted, const_cast<const xmlChar**>(attributes));
    219         }
    220 
    221         xmlChar* xmlLocalName;
    222         xmlChar* xmlPrefix;
    223         xmlChar* xmlURI;
    224         int nb_namespaces;
    225         xmlChar** namespaces;
    226         int nb_attributes;
    227         int nb_defaulted;
    228         xmlChar** attributes;
    229     };
    230 
    231     struct PendingEndElementNSCallback : public PendingCallback {
    232         virtual void call(XMLTokenizer* tokenizer)
    233         {
    234             tokenizer->endElementNs();
    235         }
    236     };
    237 
    238     struct PendingCharactersCallback : public PendingCallback {
    239         virtual ~PendingCharactersCallback()
    240         {
    241             xmlFree(s);
    242         }
    243 
    244         virtual void call(XMLTokenizer* tokenizer)
    245         {
    246             tokenizer->characters(s, len);
    247         }
    248 
    249         xmlChar* s;
    250         int len;
    251     };
    252 
    253     struct PendingProcessingInstructionCallback : public PendingCallback {
    254         virtual ~PendingProcessingInstructionCallback()
    255         {
    256             xmlFree(target);
    257             xmlFree(data);
    258         }
    259 
    260         virtual void call(XMLTokenizer* tokenizer)
    261         {
    262             tokenizer->processingInstruction(target, data);
    263         }
    264 
    265         xmlChar* target;
    266         xmlChar* data;
    267     };
    268 
    269     struct PendingCDATABlockCallback : public PendingCallback {
    270         virtual ~PendingCDATABlockCallback()
    271         {
    272             xmlFree(s);
    273         }
    274 
    275         virtual void call(XMLTokenizer* tokenizer)
    276         {
    277             tokenizer->cdataBlock(s, len);
    278         }
    279 
    280         xmlChar* s;
    281         int len;
    282     };
    283 
    284     struct PendingCommentCallback : public PendingCallback {
    285         virtual ~PendingCommentCallback()
    286         {
    287             xmlFree(s);
    288         }
    289 
    290         virtual void call(XMLTokenizer* tokenizer)
    291         {
    292             tokenizer->comment(s);
    293         }
    294 
    295         xmlChar* s;
    296     };
    297 
    298     struct PendingInternalSubsetCallback : public PendingCallback {
    299         virtual ~PendingInternalSubsetCallback()
    300         {
    301             xmlFree(name);
    302             xmlFree(externalID);
    303             xmlFree(systemID);
    304         }
    305 
    306         virtual void call(XMLTokenizer* tokenizer)
    307         {
    308             tokenizer->internalSubset(name, externalID, systemID);
    309         }
    310 
    311         xmlChar* name;
    312         xmlChar* externalID;
    313         xmlChar* systemID;
    314     };
    315 
    316     struct PendingErrorCallback: public PendingCallback {
    317         virtual ~PendingErrorCallback()
    318         {
    319             xmlFree(message);
    320         }
    321 
    322         virtual void call(XMLTokenizer* tokenizer)
    323         {
    324             tokenizer->handleError(type, reinterpret_cast<char*>(message), lineNumber, columnNumber);
    325         }
    326 
    327         XMLTokenizer::ErrorType type;
    328         xmlChar* message;
    329         int lineNumber;
    330         int columnNumber;
    331     };
    332 
    333     Deque<PendingCallback*> m_callbacks;
    334 };
    335 // --------------------------------
    336 
    337 static int globalDescriptor = 0;
    338 static ThreadIdentifier libxmlLoaderThread = 0;
    339 
    340 static int matchFunc(const char*)
    341 {
    342     // Only match loads initiated due to uses of libxml2 from within XMLTokenizer to avoid
    343     // interfering with client applications that also use libxml2.  http://bugs.webkit.org/show_bug.cgi?id=17353
    344     return XMLTokenizerScope::currentDocLoader && currentThread() == libxmlLoaderThread;
    345 }
    346 
    347 class OffsetBuffer {
    348 public:
    349     OffsetBuffer(const Vector<char>& b) : m_buffer(b), m_currentOffset(0) { }
    350 
    351     int readOutBytes(char* outputBuffer, unsigned askedToRead)
    352     {
    353         unsigned bytesLeft = m_buffer.size() - m_currentOffset;
    354         unsigned lenToCopy = min(askedToRead, bytesLeft);
    355         if (lenToCopy) {
    356             memcpy(outputBuffer, m_buffer.data() + m_currentOffset, lenToCopy);
    357             m_currentOffset += lenToCopy;
    358         }
    359         return lenToCopy;
    360     }
    361 
    362 private:
    363     Vector<char> m_buffer;
    364     unsigned m_currentOffset;
    365 };
    366 
    367 static bool shouldAllowExternalLoad(const KURL& url)
    368 {
    369     String urlString = url.string();
    370 
    371     // On non-Windows platforms libxml asks for this URL, the
    372     // "XML_XML_DEFAULT_CATALOG", on initialization.
    373     if (urlString == "file:///etc/xml/catalog")
    374         return false;
    375 
    376     // On Windows, libxml computes a URL relative to where its DLL resides.
    377     if (urlString.startsWith("file:///", false) && urlString.endsWith("/etc/catalog", false))
    378         return false;
    379 
    380     // The most common DTD.  There isn't much point in hammering www.w3c.org
    381     // by requesting this URL for every XHTML document.
    382     if (urlString.startsWith("http://www.w3.org/TR/xhtml", false))
    383         return false;
    384 
    385     // Similarly, there isn't much point in requesting the SVG DTD.
    386     if (urlString.startsWith("http://www.w3.org/Graphics/SVG", false))
    387         return false;
    388 
    389     // The libxml doesn't give us a lot of context for deciding whether to
    390     // allow this request.  In the worst case, this load could be for an
    391     // external entity and the resulting document could simply read the
    392     // retrieved content.  If we had more context, we could potentially allow
    393     // the parser to load a DTD.  As things stand, we take the conservative
    394     // route and allow same-origin requests only.
    395     if (!XMLTokenizerScope::currentDocLoader->doc()->securityOrigin()->canRequest(url)) {
    396         XMLTokenizerScope::currentDocLoader->printAccessDeniedMessage(url);
    397         return false;
    398     }
    399 
    400     return true;
    401 }
    402 
    403 static void* openFunc(const char* uri)
    404 {
    405     ASSERT(XMLTokenizerScope::currentDocLoader);
    406     ASSERT(currentThread() == libxmlLoaderThread);
    407 
    408     KURL url(KURL(), uri);
    409 
    410     if (!shouldAllowExternalLoad(url))
    411         return &globalDescriptor;
    412 
    413     ResourceError error;
    414     ResourceResponse response;
    415     Vector<char> data;
    416 
    417 
    418     {
    419         DocLoader* docLoader = XMLTokenizerScope::currentDocLoader;
    420         XMLTokenizerScope scope(0);
    421         // FIXME: We should restore the original global error handler as well.
    422 
    423         if (docLoader->frame())
    424             docLoader->frame()->loader()->loadResourceSynchronously(url, AllowStoredCredentials, error, response, data);
    425     }
    426 
    427     // We have to check the URL again after the load to catch redirects.
    428     // See <https://bugs.webkit.org/show_bug.cgi?id=21963>.
    429     if (!shouldAllowExternalLoad(response.url()))
    430         return &globalDescriptor;
    431 
    432     return new OffsetBuffer(data);
    433 }
    434 
    435 static int readFunc(void* context, char* buffer, int len)
    436 {
    437     // Do 0-byte reads in case of a null descriptor
    438     if (context == &globalDescriptor)
    439         return 0;
    440 
    441     OffsetBuffer* data = static_cast<OffsetBuffer*>(context);
    442     return data->readOutBytes(buffer, len);
    443 }
    444 
    445 static int writeFunc(void*, const char*, int)
    446 {
    447     // Always just do 0-byte writes
    448     return 0;
    449 }
    450 
    451 static int closeFunc(void* context)
    452 {
    453     if (context != &globalDescriptor) {
    454         OffsetBuffer* data = static_cast<OffsetBuffer*>(context);
    455         delete data;
    456     }
    457     return 0;
    458 }
    459 
    460 #if ENABLE(XSLT)
    461 static void errorFunc(void*, const char*, ...)
    462 {
    463     // FIXME: It would be nice to display error messages somewhere.
    464 }
    465 #endif
    466 
    467 static bool didInit = false;
    468 
    469 PassRefPtr<XMLParserContext> XMLParserContext::createStringParser(xmlSAXHandlerPtr handlers, void* userData)
    470 {
    471     if (!didInit) {
    472         xmlInitParser();
    473         xmlRegisterInputCallbacks(matchFunc, openFunc, readFunc, closeFunc);
    474         xmlRegisterOutputCallbacks(matchFunc, openFunc, writeFunc, closeFunc);
    475         libxmlLoaderThread = currentThread();
    476         didInit = true;
    477     }
    478 
    479     xmlParserCtxtPtr parser = xmlCreatePushParserCtxt(handlers, 0, 0, 0, 0);
    480     parser->_private = userData;
    481     parser->replaceEntities = true;
    482     const UChar BOM = 0xFEFF;
    483     const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char*>(&BOM);
    484     xmlSwitchEncoding(parser, BOMHighByte == 0xFF ? XML_CHAR_ENCODING_UTF16LE : XML_CHAR_ENCODING_UTF16BE);
    485 
    486     return adoptRef(new XMLParserContext(parser));
    487 }
    488 
    489 
    490 // Chunk should be encoded in UTF-8
    491 PassRefPtr<XMLParserContext> XMLParserContext::createMemoryParser(xmlSAXHandlerPtr handlers, void* userData, const char* chunk)
    492 {
    493     if (!didInit) {
    494         xmlInitParser();
    495         xmlRegisterInputCallbacks(matchFunc, openFunc, readFunc, closeFunc);
    496         xmlRegisterOutputCallbacks(matchFunc, openFunc, writeFunc, closeFunc);
    497         libxmlLoaderThread = currentThread();
    498         didInit = true;
    499     }
    500 
    501     xmlParserCtxtPtr parser = xmlCreateMemoryParserCtxt(chunk, xmlStrlen((const xmlChar*)chunk));
    502 
    503     if (!parser)
    504         return 0;
    505 
    506     // Copy the sax handler
    507     memcpy(parser->sax, handlers, sizeof(xmlSAXHandler));
    508 
    509     // Set parser options.
    510     // XML_PARSE_NODICT: default dictionary option.
    511     // XML_PARSE_NOENT: force entities substitutions.
    512     xmlCtxtUseOptions(parser, XML_PARSE_NODICT | XML_PARSE_NOENT);
    513 
    514     // Internal initialization
    515     parser->sax2 = 1;
    516     parser->instate = XML_PARSER_CONTENT; // We are parsing a CONTENT
    517     parser->depth = 0;
    518     parser->str_xml = xmlDictLookup(parser->dict, BAD_CAST "xml", 3);
    519     parser->str_xmlns = xmlDictLookup(parser->dict, BAD_CAST "xmlns", 5);
    520     parser->str_xml_ns = xmlDictLookup(parser->dict, XML_XML_NAMESPACE, 36);
    521     parser->_private = userData;
    522 
    523     return adoptRef(new XMLParserContext(parser));
    524 }
    525 
    526 // --------------------------------
    527 
    528 XMLTokenizer::XMLTokenizer(Document* _doc, FrameView* _view)
    529     : m_doc(_doc)
    530     , m_view(_view)
    531     , m_context(0)
    532     , m_pendingCallbacks(new PendingCallbacks)
    533     , m_currentNode(_doc)
    534     , m_sawError(false)
    535     , m_sawXSLTransform(false)
    536     , m_sawFirstElement(false)
    537     , m_isXHTMLDocument(false)
    538 #if ENABLE(XHTMLMP)
    539     , m_isXHTMLMPDocument(false)
    540     , m_hasDocTypeDeclaration(false)
    541 #endif
    542     , m_parserPaused(false)
    543     , m_requestingScript(false)
    544     , m_finishCalled(false)
    545     , m_errorCount(0)
    546     , m_lastErrorLine(0)
    547     , m_lastErrorColumn(0)
    548     , m_pendingScript(0)
    549     , m_scriptStartLine(0)
    550     , m_parsingFragment(false)
    551     , m_scriptingPermission(FragmentScriptingAllowed)
    552 {
    553 }
    554 
    555 XMLTokenizer::XMLTokenizer(DocumentFragment* fragment, Element* parentElement, FragmentScriptingPermission scriptingPermission)
    556     : m_doc(fragment->document())
    557     , m_view(0)
    558     , m_context(0)
    559     , m_pendingCallbacks(new PendingCallbacks)
    560     , m_currentNode(fragment)
    561     , m_sawError(false)
    562     , m_sawXSLTransform(false)
    563     , m_sawFirstElement(false)
    564     , m_isXHTMLDocument(false)
    565 #if ENABLE(XHTMLMP)
    566     , m_isXHTMLMPDocument(false)
    567     , m_hasDocTypeDeclaration(false)
    568 #endif
    569     , m_parserPaused(false)
    570     , m_requestingScript(false)
    571     , m_finishCalled(false)
    572     , m_errorCount(0)
    573     , m_lastErrorLine(0)
    574     , m_lastErrorColumn(0)
    575     , m_pendingScript(0)
    576     , m_scriptStartLine(0)
    577     , m_parsingFragment(true)
    578     , m_scriptingPermission(scriptingPermission)
    579 {
    580     fragment->ref();
    581     if (m_doc)
    582         m_doc->ref();
    583 
    584     // Add namespaces based on the parent node
    585     Vector<Element*> elemStack;
    586     while (parentElement) {
    587         elemStack.append(parentElement);
    588 
    589         Node* n = parentElement->parentNode();
    590         if (!n || !n->isElementNode())
    591             break;
    592         parentElement = static_cast<Element*>(n);
    593     }
    594 
    595     if (elemStack.isEmpty())
    596         return;
    597 
    598     for (Element* element = elemStack.last(); !elemStack.isEmpty(); elemStack.removeLast()) {
    599         if (NamedNodeMap* attrs = element->attributes()) {
    600             for (unsigned i = 0; i < attrs->length(); i++) {
    601                 Attribute* attr = attrs->attributeItem(i);
    602                 if (attr->localName() == xmlnsAtom)
    603                     m_defaultNamespaceURI = attr->value();
    604                 else if (attr->prefix() == xmlnsAtom)
    605                     m_prefixToNamespaceMap.set(attr->localName(), attr->value());
    606             }
    607         }
    608     }
    609 
    610     // If the parent element is not in document tree, there may be no xmlns attribute; just default to the parent's namespace.
    611     if (m_defaultNamespaceURI.isNull() && !parentElement->inDocument())
    612         m_defaultNamespaceURI = parentElement->namespaceURI();
    613 }
    614 
    615 XMLParserContext::~XMLParserContext()
    616 {
    617     if (m_context->myDoc)
    618         xmlFreeDoc(m_context->myDoc);
    619     xmlFreeParserCtxt(m_context);
    620 }
    621 
    622 XMLTokenizer::~XMLTokenizer()
    623 {
    624     clearCurrentNodeStack();
    625     if (m_parsingFragment && m_doc)
    626         m_doc->deref();
    627     if (m_pendingScript)
    628         m_pendingScript->removeClient(this);
    629 }
    630 
    631 void XMLTokenizer::doWrite(const String& parseString)
    632 {
    633     if (!m_context)
    634         initializeParserContext();
    635 
    636     // Protect the libxml context from deletion during a callback
    637     RefPtr<XMLParserContext> context = m_context;
    638 
    639     // libXML throws an error if you try to switch the encoding for an empty string.
    640     if (parseString.length()) {
    641         // Hack around libxml2's lack of encoding overide support by manually
    642         // resetting the encoding to UTF-16 before every chunk.  Otherwise libxml
    643         // will detect <?xml version="1.0" encoding="<encoding name>"?> blocks
    644         // and switch encodings, causing the parse to fail.
    645         const UChar BOM = 0xFEFF;
    646         const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char*>(&BOM);
    647         xmlSwitchEncoding(context->context(), BOMHighByte == 0xFF ? XML_CHAR_ENCODING_UTF16LE : XML_CHAR_ENCODING_UTF16BE);
    648 
    649         XMLTokenizerScope scope(m_doc->docLoader());
    650         xmlParseChunk(context->context(), reinterpret_cast<const char*>(parseString.characters()), sizeof(UChar) * parseString.length(), 0);
    651     }
    652 
    653     if (m_doc->decoder() && m_doc->decoder()->sawError()) {
    654         // If the decoder saw an error, report it as fatal (stops parsing)
    655         handleError(fatal, "Encoding error", context->context()->input->line, context->context()->input->col);
    656     }
    657 
    658     return;
    659 }
    660 
    661 static inline String toString(const xmlChar* str, unsigned len)
    662 {
    663     return UTF8Encoding().decode(reinterpret_cast<const char*>(str), len);
    664 }
    665 
    666 static inline String toString(const xmlChar* str)
    667 {
    668     if (!str)
    669         return String();
    670 
    671     return UTF8Encoding().decode(reinterpret_cast<const char*>(str), strlen(reinterpret_cast<const char*>(str)));
    672 }
    673 
    674 struct _xmlSAX2Namespace {
    675     const xmlChar* prefix;
    676     const xmlChar* uri;
    677 };
    678 typedef struct _xmlSAX2Namespace xmlSAX2Namespace;
    679 
    680 static inline void handleElementNamespaces(Element* newElement, const xmlChar** libxmlNamespaces, int nb_namespaces, ExceptionCode& ec, FragmentScriptingPermission scriptingPermission)
    681 {
    682     xmlSAX2Namespace* namespaces = reinterpret_cast<xmlSAX2Namespace*>(libxmlNamespaces);
    683     for (int i = 0; i < nb_namespaces; i++) {
    684         AtomicString namespaceQName = xmlnsAtom;
    685         String namespaceURI = toString(namespaces[i].uri);
    686         if (namespaces[i].prefix)
    687             namespaceQName = "xmlns:" + toString(namespaces[i].prefix);
    688         newElement->setAttributeNS(XMLNSNames::xmlnsNamespaceURI, namespaceQName, namespaceURI, ec, scriptingPermission);
    689         if (ec) // exception setting attributes
    690             return;
    691     }
    692 }
    693 
    694 struct _xmlSAX2Attributes {
    695     const xmlChar* localname;
    696     const xmlChar* prefix;
    697     const xmlChar* uri;
    698     const xmlChar* value;
    699     const xmlChar* end;
    700 };
    701 typedef struct _xmlSAX2Attributes xmlSAX2Attributes;
    702 
    703 static inline void handleElementAttributes(Element* newElement, const xmlChar** libxmlAttributes, int nb_attributes, ExceptionCode& ec, FragmentScriptingPermission scriptingPermission)
    704 {
    705     xmlSAX2Attributes* attributes = reinterpret_cast<xmlSAX2Attributes*>(libxmlAttributes);
    706     for (int i = 0; i < nb_attributes; i++) {
    707         String attrLocalName = toString(attributes[i].localname);
    708         int valueLength = (int) (attributes[i].end - attributes[i].value);
    709         String attrValue = toString(attributes[i].value, valueLength);
    710         String attrPrefix = toString(attributes[i].prefix);
    711         String attrURI = attrPrefix.isEmpty() ? String() : toString(attributes[i].uri);
    712         String attrQName = attrPrefix.isEmpty() ? attrLocalName : attrPrefix + ":" + attrLocalName;
    713 
    714         newElement->setAttributeNS(attrURI, attrQName, attrValue, ec, scriptingPermission);
    715         if (ec) // exception setting attributes
    716             return;
    717     }
    718 }
    719 
    720 void XMLTokenizer::startElementNs(const xmlChar* xmlLocalName, const xmlChar* xmlPrefix, const xmlChar* xmlURI, int nb_namespaces,
    721                                   const xmlChar** libxmlNamespaces, int nb_attributes, int nb_defaulted, const xmlChar** libxmlAttributes)
    722 {
    723     if (m_parserStopped)
    724         return;
    725 
    726     if (m_parserPaused) {
    727         m_pendingCallbacks->appendStartElementNSCallback(xmlLocalName, xmlPrefix, xmlURI, nb_namespaces, libxmlNamespaces,
    728                                                          nb_attributes, nb_defaulted, libxmlAttributes);
    729         return;
    730     }
    731 
    732 #if ENABLE(XHTMLMP)
    733     // check if the DOCTYPE Declaration of XHTMLMP document exists
    734     if (!m_hasDocTypeDeclaration && m_doc->isXHTMLMPDocument()) {
    735         handleError(fatal, "DOCTYPE declaration lost.", lineNumber(), columnNumber());
    736         return;
    737     }
    738 #endif
    739 
    740     exitText();
    741 
    742     String localName = toString(xmlLocalName);
    743     String uri = toString(xmlURI);
    744     String prefix = toString(xmlPrefix);
    745 
    746     if (m_parsingFragment && uri.isNull()) {
    747         if (!prefix.isNull())
    748             uri = m_prefixToNamespaceMap.get(prefix);
    749         else
    750             uri = m_defaultNamespaceURI;
    751     }
    752 
    753 #if ENABLE(XHTMLMP)
    754     if (!m_sawFirstElement && isXHTMLMPDocument()) {
    755         // As per the section 7.1 of OMA-WAP-XHTMLMP-V1_1-20061020-A.pdf,
    756         // we should make sure that the root element MUST be 'html' and
    757         // ensure the name of the default namespace on the root elment 'html'
    758         // MUST be 'http://www.w3.org/1999/xhtml'
    759         if (localName != HTMLNames::htmlTag.localName()) {
    760             handleError(fatal, "XHTMLMP document expects 'html' as root element.", lineNumber(), columnNumber());
    761             return;
    762         }
    763 
    764         if (uri.isNull()) {
    765             m_defaultNamespaceURI = HTMLNames::xhtmlNamespaceURI;
    766             uri = m_defaultNamespaceURI;
    767         }
    768     }
    769 #endif
    770 
    771     bool isFirstElement = !m_sawFirstElement;
    772     m_sawFirstElement = true;
    773 
    774     QualifiedName qName(prefix, localName, uri);
    775     RefPtr<Element> newElement = m_doc->createElement(qName, true);
    776     if (!newElement) {
    777         stopParsing();
    778         return;
    779     }
    780 
    781     ExceptionCode ec = 0;
    782     handleElementNamespaces(newElement.get(), libxmlNamespaces, nb_namespaces, ec, m_scriptingPermission);
    783     if (ec) {
    784         stopParsing();
    785         return;
    786     }
    787 
    788     ScriptController* jsProxy = m_doc->frame() ? m_doc->frame()->script() : 0;
    789     if (jsProxy && m_doc->frame()->script()->canExecuteScripts())
    790         jsProxy->setEventHandlerLineNumber(lineNumber());
    791 
    792     handleElementAttributes(newElement.get(), libxmlAttributes, nb_attributes, ec, m_scriptingPermission);
    793     if (ec) {
    794         stopParsing();
    795         return;
    796     }
    797 
    798     if (jsProxy)
    799         jsProxy->setEventHandlerLineNumber(0);
    800 
    801     newElement->beginParsingChildren();
    802 
    803     ScriptElement* scriptElement = toScriptElement(newElement.get());
    804     if (scriptElement)
    805         m_scriptStartLine = lineNumber();
    806 
    807     if (!m_currentNode->addChild(newElement.get())) {
    808         stopParsing();
    809         return;
    810     }
    811 
    812     pushCurrentNode(newElement.get());
    813     if (m_view && !newElement->attached())
    814         newElement->attach();
    815 
    816     if (!m_parsingFragment && isFirstElement && m_doc->frame())
    817         m_doc->frame()->loader()->dispatchDocumentElementAvailable();
    818 }
    819 
    820 void XMLTokenizer::endElementNs()
    821 {
    822     if (m_parserStopped)
    823         return;
    824 
    825     if (m_parserPaused) {
    826         m_pendingCallbacks->appendEndElementNSCallback();
    827         return;
    828     }
    829 
    830     exitText();
    831 
    832     Node* n = m_currentNode;
    833     n->finishParsingChildren();
    834 
    835     if (m_scriptingPermission == FragmentScriptingNotAllowed && n->isElementNode() && toScriptElement(static_cast<Element*>(n))) {
    836         popCurrentNode();
    837         ExceptionCode ec;
    838         n->remove(ec);
    839         return;
    840     }
    841 
    842     if (!n->isElementNode() || !m_view) {
    843         popCurrentNode();
    844         return;
    845     }
    846 
    847     Element* element = static_cast<Element*>(n);
    848 
    849     // The element's parent may have already been removed from document.
    850     // Parsing continues in this case, but scripts aren't executed.
    851     if (!element->inDocument()) {
    852         popCurrentNode();
    853         return;
    854     }
    855 
    856     ScriptElement* scriptElement = toScriptElement(element);
    857     if (!scriptElement) {
    858         popCurrentNode();
    859         return;
    860     }
    861 
    862     // Don't load external scripts for standalone documents (for now).
    863     ASSERT(!m_pendingScript);
    864     m_requestingScript = true;
    865 
    866 #if ENABLE(XHTMLMP)
    867     if (!scriptElement->shouldExecuteAsJavaScript())
    868         m_doc->setShouldProcessNoscriptElement(true);
    869     else
    870 #endif
    871     {
    872         String scriptHref = scriptElement->sourceAttributeValue();
    873         if (!scriptHref.isEmpty()) {
    874             // we have a src attribute
    875             String scriptCharset = scriptElement->scriptCharset();
    876             if (element->dispatchBeforeLoadEvent(scriptHref) &&
    877                 (m_pendingScript = m_doc->docLoader()->requestScript(scriptHref, scriptCharset))) {
    878                 m_scriptElement = element;
    879                 m_pendingScript->addClient(this);
    880 
    881                 // m_pendingScript will be 0 if script was already loaded and ref() executed it
    882                 if (m_pendingScript)
    883                     pauseParsing();
    884             } else
    885                 m_scriptElement = 0;
    886         } else
    887             m_view->frame()->script()->executeScript(ScriptSourceCode(scriptElement->scriptContent(), m_doc->url(), m_scriptStartLine));
    888     }
    889     m_requestingScript = false;
    890     popCurrentNode();
    891 }
    892 
    893 void XMLTokenizer::characters(const xmlChar* s, int len)
    894 {
    895     if (m_parserStopped)
    896         return;
    897 
    898     if (m_parserPaused) {
    899         m_pendingCallbacks->appendCharactersCallback(s, len);
    900         return;
    901     }
    902 
    903     if (m_currentNode->isTextNode() || enterText())
    904         m_bufferedText.append(s, len);
    905 }
    906 
    907 void XMLTokenizer::error(ErrorType type, const char* message, va_list args)
    908 {
    909     if (m_parserStopped)
    910         return;
    911 
    912 #if COMPILER(MSVC) || COMPILER(RVCT)
    913     char m[1024];
    914     vsnprintf(m, sizeof(m) - 1, message, args);
    915 #else
    916     char* m;
    917     if (vasprintf(&m, message, args) == -1)
    918         return;
    919 #endif
    920 
    921     if (m_parserPaused)
    922         m_pendingCallbacks->appendErrorCallback(type, reinterpret_cast<const xmlChar*>(m), lineNumber(), columnNumber());
    923     else
    924         handleError(type, m, lineNumber(), columnNumber());
    925 
    926 #if !COMPILER(MSVC) && !COMPILER(RVCT)
    927     free(m);
    928 #endif
    929 }
    930 
    931 void XMLTokenizer::processingInstruction(const xmlChar* target, const xmlChar* data)
    932 {
    933     if (m_parserStopped)
    934         return;
    935 
    936     if (m_parserPaused) {
    937         m_pendingCallbacks->appendProcessingInstructionCallback(target, data);
    938         return;
    939     }
    940 
    941     exitText();
    942 
    943     // ### handle exceptions
    944     int exception = 0;
    945     RefPtr<ProcessingInstruction> pi = m_doc->createProcessingInstruction(
    946         toString(target), toString(data), exception);
    947     if (exception)
    948         return;
    949 
    950     pi->setCreatedByParser(true);
    951 
    952     if (!m_currentNode->addChild(pi.get()))
    953         return;
    954     if (m_view && !pi->attached())
    955         pi->attach();
    956 
    957     pi->finishParsingChildren();
    958 
    959 #if ENABLE(XSLT)
    960     m_sawXSLTransform = !m_sawFirstElement && pi->isXSL();
    961     if (m_sawXSLTransform && !m_doc->transformSourceDocument())
    962         stopParsing();
    963 #endif
    964 }
    965 
    966 void XMLTokenizer::cdataBlock(const xmlChar* s, int len)
    967 {
    968     if (m_parserStopped)
    969         return;
    970 
    971     if (m_parserPaused) {
    972         m_pendingCallbacks->appendCDATABlockCallback(s, len);
    973         return;
    974     }
    975 
    976     exitText();
    977 
    978     RefPtr<Node> newNode = CDATASection::create(m_doc, toString(s, len));
    979     if (!m_currentNode->addChild(newNode.get()))
    980         return;
    981     if (m_view && !newNode->attached())
    982         newNode->attach();
    983 }
    984 
    985 void XMLTokenizer::comment(const xmlChar* s)
    986 {
    987     if (m_parserStopped)
    988         return;
    989 
    990     if (m_parserPaused) {
    991         m_pendingCallbacks->appendCommentCallback(s);
    992         return;
    993     }
    994 
    995     exitText();
    996 
    997     RefPtr<Node> newNode = Comment::create(m_doc, toString(s));
    998     m_currentNode->addChild(newNode.get());
    999     if (m_view && !newNode->attached())
   1000         newNode->attach();
   1001 }
   1002 
   1003 void XMLTokenizer::startDocument(const xmlChar* version, const xmlChar* encoding, int standalone)
   1004 {
   1005     ExceptionCode ec = 0;
   1006 
   1007     if (version)
   1008         m_doc->setXMLVersion(toString(version), ec);
   1009     m_doc->setXMLStandalone(standalone == 1, ec); // possible values are 0, 1, and -1
   1010     if (encoding)
   1011         m_doc->setXMLEncoding(toString(encoding));
   1012 }
   1013 
   1014 void XMLTokenizer::endDocument()
   1015 {
   1016     exitText();
   1017 #if ENABLE(XHTMLMP)
   1018     m_hasDocTypeDeclaration = false;
   1019 #endif
   1020 }
   1021 
   1022 void XMLTokenizer::internalSubset(const xmlChar* name, const xmlChar* externalID, const xmlChar* systemID)
   1023 {
   1024     if (m_parserStopped)
   1025         return;
   1026 
   1027     if (m_parserPaused) {
   1028         m_pendingCallbacks->appendInternalSubsetCallback(name, externalID, systemID);
   1029         return;
   1030     }
   1031 
   1032     if (m_doc) {
   1033 #if ENABLE(WML) || ENABLE(XHTMLMP)
   1034         String extId = toString(externalID);
   1035 #endif
   1036 #if ENABLE(WML)
   1037         if (isWMLDocument()
   1038             && extId != "-//WAPFORUM//DTD WML 1.3//EN"
   1039             && extId != "-//WAPFORUM//DTD WML 1.2//EN"
   1040             && extId != "-//WAPFORUM//DTD WML 1.1//EN"
   1041             && extId != "-//WAPFORUM//DTD WML 1.0//EN")
   1042             handleError(fatal, "Invalid DTD Public ID", lineNumber(), columnNumber());
   1043 #endif
   1044 #if ENABLE(XHTMLMP)
   1045         String dtdName = toString(name);
   1046         if (extId == "-//WAPFORUM//DTD XHTML Mobile 1.0//EN"
   1047             || extId == "-//WAPFORUM//DTD XHTML Mobile 1.1//EN") {
   1048             if (dtdName != HTMLNames::htmlTag.localName()) {
   1049                 handleError(fatal, "Invalid DOCTYPE declaration, expected 'html' as root element.", lineNumber(), columnNumber());
   1050                 return;
   1051             }
   1052 
   1053             if (m_doc->isXHTMLMPDocument())
   1054                 setIsXHTMLMPDocument(true);
   1055             else
   1056                 setIsXHTMLDocument(true);
   1057 
   1058             m_hasDocTypeDeclaration = true;
   1059         }
   1060 #endif
   1061 
   1062 #if ENABLE(XHTMLMP)
   1063         m_doc->addChild(DocumentType::create(m_doc, dtdName, extId, toString(systemID)));
   1064 #elif ENABLE(WML)
   1065         m_doc->addChild(DocumentType::create(m_doc, toString(name), extId, toString(systemID)));
   1066 #else
   1067         m_doc->addChild(DocumentType::create(m_doc, toString(name), toString(externalID), toString(systemID)));
   1068 #endif
   1069     }
   1070 }
   1071 
   1072 static inline XMLTokenizer* getTokenizer(void* closure)
   1073 {
   1074     xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure);
   1075     return static_cast<XMLTokenizer*>(ctxt->_private);
   1076 }
   1077 
   1078 // This is a hack around http://bugzilla.gnome.org/show_bug.cgi?id=159219
   1079 // Otherwise libxml seems to call all the SAX callbacks twice for any replaced entity.
   1080 static inline bool hackAroundLibXMLEntityBug(void* closure)
   1081 {
   1082 #if LIBXML_VERSION >= 20627
   1083     UNUSED_PARAM(closure);
   1084 
   1085     // This bug has been fixed in libxml 2.6.27.
   1086     return false;
   1087 #else
   1088     return static_cast<xmlParserCtxtPtr>(closure)->node;
   1089 #endif
   1090 }
   1091 
   1092 static void startElementNsHandler(void* closure, const xmlChar* localname, const xmlChar* prefix, const xmlChar* uri, int nb_namespaces, const xmlChar** namespaces, int nb_attributes, int nb_defaulted, const xmlChar** libxmlAttributes)
   1093 {
   1094     if (hackAroundLibXMLEntityBug(closure))
   1095         return;
   1096 
   1097     getTokenizer(closure)->startElementNs(localname, prefix, uri, nb_namespaces, namespaces, nb_attributes, nb_defaulted, libxmlAttributes);
   1098 }
   1099 
   1100 static void endElementNsHandler(void* closure, const xmlChar*, const xmlChar*, const xmlChar*)
   1101 {
   1102     if (hackAroundLibXMLEntityBug(closure))
   1103         return;
   1104 
   1105     getTokenizer(closure)->endElementNs();
   1106 }
   1107 
   1108 static void charactersHandler(void* closure, const xmlChar* s, int len)
   1109 {
   1110     if (hackAroundLibXMLEntityBug(closure))
   1111         return;
   1112 
   1113     getTokenizer(closure)->characters(s, len);
   1114 }
   1115 
   1116 static void processingInstructionHandler(void* closure, const xmlChar* target, const xmlChar* data)
   1117 {
   1118     if (hackAroundLibXMLEntityBug(closure))
   1119         return;
   1120 
   1121     getTokenizer(closure)->processingInstruction(target, data);
   1122 }
   1123 
   1124 static void cdataBlockHandler(void* closure, const xmlChar* s, int len)
   1125 {
   1126     if (hackAroundLibXMLEntityBug(closure))
   1127         return;
   1128 
   1129     getTokenizer(closure)->cdataBlock(s, len);
   1130 }
   1131 
   1132 static void commentHandler(void* closure, const xmlChar* comment)
   1133 {
   1134     if (hackAroundLibXMLEntityBug(closure))
   1135         return;
   1136 
   1137     getTokenizer(closure)->comment(comment);
   1138 }
   1139 
   1140 WTF_ATTRIBUTE_PRINTF(2, 3)
   1141 static void warningHandler(void* closure, const char* message, ...)
   1142 {
   1143     va_list args;
   1144     va_start(args, message);
   1145     getTokenizer(closure)->error(XMLTokenizer::warning, message, args);
   1146     va_end(args);
   1147 }
   1148 
   1149 WTF_ATTRIBUTE_PRINTF(2, 3)
   1150 static void fatalErrorHandler(void* closure, const char* message, ...)
   1151 {
   1152     va_list args;
   1153     va_start(args, message);
   1154     getTokenizer(closure)->error(XMLTokenizer::fatal, message, args);
   1155     va_end(args);
   1156 }
   1157 
   1158 WTF_ATTRIBUTE_PRINTF(2, 3)
   1159 static void normalErrorHandler(void* closure, const char* message, ...)
   1160 {
   1161     va_list args;
   1162     va_start(args, message);
   1163     getTokenizer(closure)->error(XMLTokenizer::nonFatal, message, args);
   1164     va_end(args);
   1165 }
   1166 
   1167 // Using a static entity and marking it XML_INTERNAL_PREDEFINED_ENTITY is
   1168 // a hack to avoid malloc/free. Using a global variable like this could cause trouble
   1169 // if libxml implementation details were to change
   1170 static xmlChar sharedXHTMLEntityResult[5] = {0, 0, 0, 0, 0};
   1171 
   1172 static xmlEntityPtr sharedXHTMLEntity()
   1173 {
   1174     static xmlEntity entity;
   1175     if (!entity.type) {
   1176         entity.type = XML_ENTITY_DECL;
   1177         entity.orig = sharedXHTMLEntityResult;
   1178         entity.content = sharedXHTMLEntityResult;
   1179         entity.etype = XML_INTERNAL_PREDEFINED_ENTITY;
   1180     }
   1181     return &entity;
   1182 }
   1183 
   1184 static xmlEntityPtr getXHTMLEntity(const xmlChar* name)
   1185 {
   1186     UChar c = decodeNamedEntity(reinterpret_cast<const char*>(name));
   1187     if (!c)
   1188         return 0;
   1189 
   1190     CString value = String(&c, 1).utf8();
   1191     ASSERT(value.length() < 5);
   1192     xmlEntityPtr entity = sharedXHTMLEntity();
   1193     entity->length = value.length();
   1194     entity->name = name;
   1195     memcpy(sharedXHTMLEntityResult, value.data(), entity->length + 1);
   1196 
   1197     return entity;
   1198 }
   1199 
   1200 static xmlEntityPtr getEntityHandler(void* closure, const xmlChar* name)
   1201 {
   1202     xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure);
   1203     xmlEntityPtr ent = xmlGetPredefinedEntity(name);
   1204     if (ent) {
   1205         ent->etype = XML_INTERNAL_PREDEFINED_ENTITY;
   1206         return ent;
   1207     }
   1208 
   1209     ent = xmlGetDocEntity(ctxt->myDoc, name);
   1210     if (!ent && (getTokenizer(closure)->isXHTMLDocument()
   1211 #if ENABLE(XHTMLMP)
   1212                  || getTokenizer(closure)->isXHTMLMPDocument()
   1213 #endif
   1214 #if ENABLE(WML)
   1215                  || getTokenizer(closure)->isWMLDocument()
   1216 #endif
   1217        )) {
   1218         ent = getXHTMLEntity(name);
   1219         if (ent)
   1220             ent->etype = XML_INTERNAL_GENERAL_ENTITY;
   1221     }
   1222 
   1223     return ent;
   1224 }
   1225 
   1226 static void startDocumentHandler(void* closure)
   1227 {
   1228     xmlParserCtxt* ctxt = static_cast<xmlParserCtxt*>(closure);
   1229     getTokenizer(closure)->startDocument(ctxt->version, ctxt->encoding, ctxt->standalone);
   1230     xmlSAX2StartDocument(closure);
   1231 }
   1232 
   1233 static void endDocumentHandler(void* closure)
   1234 {
   1235     getTokenizer(closure)->endDocument();
   1236     xmlSAX2EndDocument(closure);
   1237 }
   1238 
   1239 static void internalSubsetHandler(void* closure, const xmlChar* name, const xmlChar* externalID, const xmlChar* systemID)
   1240 {
   1241     getTokenizer(closure)->internalSubset(name, externalID, systemID);
   1242     xmlSAX2InternalSubset(closure, name, externalID, systemID);
   1243 }
   1244 
   1245 static void externalSubsetHandler(void* closure, const xmlChar*, const xmlChar* externalId, const xmlChar*)
   1246 {
   1247     String extId = toString(externalId);
   1248     if ((extId == "-//W3C//DTD XHTML 1.0 Transitional//EN")
   1249         || (extId == "-//W3C//DTD XHTML 1.1//EN")
   1250         || (extId == "-//W3C//DTD XHTML 1.0 Strict//EN")
   1251         || (extId == "-//W3C//DTD XHTML 1.0 Frameset//EN")
   1252         || (extId == "-//W3C//DTD XHTML Basic 1.0//EN")
   1253         || (extId == "-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN")
   1254         || (extId == "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN")
   1255 #if !ENABLE(XHTMLMP)
   1256         || (extId == "-//WAPFORUM//DTD XHTML Mobile 1.0//EN")
   1257 #endif
   1258        )
   1259         getTokenizer(closure)->setIsXHTMLDocument(true); // controls if we replace entities or not.
   1260 }
   1261 
   1262 static void ignorableWhitespaceHandler(void*, const xmlChar*, int)
   1263 {
   1264     // nothing to do, but we need this to work around a crasher
   1265     // http://bugzilla.gnome.org/show_bug.cgi?id=172255
   1266     // http://bugs.webkit.org/show_bug.cgi?id=5792
   1267 }
   1268 
   1269 void XMLTokenizer::initializeParserContext(const char* chunk)
   1270 {
   1271     xmlSAXHandler sax;
   1272     memset(&sax, 0, sizeof(sax));
   1273 
   1274     sax.error = normalErrorHandler;
   1275     sax.fatalError = fatalErrorHandler;
   1276     sax.characters = charactersHandler;
   1277     sax.processingInstruction = processingInstructionHandler;
   1278     sax.cdataBlock = cdataBlockHandler;
   1279     sax.comment = commentHandler;
   1280     sax.warning = warningHandler;
   1281     sax.startElementNs = startElementNsHandler;
   1282     sax.endElementNs = endElementNsHandler;
   1283     sax.getEntity = getEntityHandler;
   1284     sax.startDocument = startDocumentHandler;
   1285     sax.endDocument = endDocumentHandler;
   1286     sax.internalSubset = internalSubsetHandler;
   1287     sax.externalSubset = externalSubsetHandler;
   1288     sax.ignorableWhitespace = ignorableWhitespaceHandler;
   1289     sax.entityDecl = xmlSAX2EntityDecl;
   1290     sax.initialized = XML_SAX2_MAGIC;
   1291     m_parserStopped = false;
   1292     m_sawError = false;
   1293     m_sawXSLTransform = false;
   1294     m_sawFirstElement = false;
   1295 
   1296     XMLTokenizerScope scope(m_doc->docLoader());
   1297     if (m_parsingFragment)
   1298         m_context = XMLParserContext::createMemoryParser(&sax, this, chunk);
   1299     else
   1300         m_context = XMLParserContext::createStringParser(&sax, this);
   1301 }
   1302 
   1303 void XMLTokenizer::doEnd()
   1304 {
   1305 #if ENABLE(XSLT)
   1306     if (m_sawXSLTransform) {
   1307         void* doc = xmlDocPtrForString(m_doc->docLoader(), m_originalSourceForTransform, m_doc->url().string());
   1308         m_doc->setTransformSource(new TransformSource(doc));
   1309 
   1310         m_doc->setParsing(false); // Make the doc think it's done, so it will apply xsl sheets.
   1311         m_doc->updateStyleSelector();
   1312         m_doc->setParsing(true);
   1313         m_parserStopped = true;
   1314     }
   1315 #endif
   1316 
   1317     if (m_context) {
   1318         // Tell libxml we're done.
   1319         {
   1320             XMLTokenizerScope scope(m_doc->docLoader());
   1321             xmlParseChunk(context(), 0, 0, 1);
   1322         }
   1323 
   1324         m_context = 0;
   1325     }
   1326 }
   1327 
   1328 #if ENABLE(XSLT)
   1329 void* xmlDocPtrForString(DocLoader* docLoader, const String& source, const String& url)
   1330 {
   1331     if (source.isEmpty())
   1332         return 0;
   1333 
   1334     // Parse in a single chunk into an xmlDocPtr
   1335     // FIXME: Hook up error handlers so that a failure to parse the main document results in
   1336     // good error messages.
   1337     const UChar BOM = 0xFEFF;
   1338     const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char*>(&BOM);
   1339 
   1340     XMLTokenizerScope scope(docLoader, errorFunc, 0);
   1341     xmlDocPtr sourceDoc = xmlReadMemory(reinterpret_cast<const char*>(source.characters()),
   1342                                         source.length() * sizeof(UChar),
   1343                                         url.latin1().data(),
   1344                                         BOMHighByte == 0xFF ? "UTF-16LE" : "UTF-16BE",
   1345                                         XSLT_PARSE_OPTIONS);
   1346     return sourceDoc;
   1347 }
   1348 #endif
   1349 
   1350 int XMLTokenizer::lineNumber() const
   1351 {
   1352     return context() ? context()->input->line : 1;
   1353 }
   1354 
   1355 int XMLTokenizer::columnNumber() const
   1356 {
   1357     return context() ? context()->input->col : 1;
   1358 }
   1359 
   1360 void XMLTokenizer::stopParsing()
   1361 {
   1362     Tokenizer::stopParsing();
   1363     if (context())
   1364         xmlStopParser(context());
   1365 }
   1366 
   1367 void XMLTokenizer::resumeParsing()
   1368 {
   1369     ASSERT(m_parserPaused);
   1370 
   1371     m_parserPaused = false;
   1372 
   1373     // First, execute any pending callbacks
   1374     while (!m_pendingCallbacks->isEmpty()) {
   1375         m_pendingCallbacks->callAndRemoveFirstCallback(this);
   1376 
   1377         // A callback paused the parser
   1378         if (m_parserPaused)
   1379             return;
   1380     }
   1381 
   1382     // Then, write any pending data
   1383     SegmentedString rest = m_pendingSrc;
   1384     m_pendingSrc.clear();
   1385     write(rest, false);
   1386 
   1387     // Finally, if finish() has been called and write() didn't result
   1388     // in any further callbacks being queued, call end()
   1389     if (m_finishCalled && m_pendingCallbacks->isEmpty())
   1390         end();
   1391 }
   1392 
   1393 bool parseXMLDocumentFragment(const String& chunk, DocumentFragment* fragment, Element* parent, FragmentScriptingPermission scriptingPermission)
   1394 {
   1395     if (!chunk.length())
   1396         return true;
   1397 
   1398     XMLTokenizer tokenizer(fragment, parent, scriptingPermission);
   1399 
   1400     CString chunkAsUtf8 = chunk.utf8();
   1401     tokenizer.initializeParserContext(chunkAsUtf8.data());
   1402 
   1403     xmlParseContent(tokenizer.context());
   1404 
   1405     tokenizer.endDocument();
   1406 
   1407     // Check if all the chunk has been processed.
   1408     long bytesProcessed = xmlByteConsumed(tokenizer.context());
   1409     if (bytesProcessed == -1 || ((unsigned long)bytesProcessed) != chunkAsUtf8.length())
   1410         return false;
   1411 
   1412     // No error if the chunk is well formed or it is not but we have no error.
   1413     return tokenizer.context()->wellFormed || xmlCtxtGetLastError(tokenizer.context()) == 0;
   1414 }
   1415 
   1416 // --------------------------------
   1417 
   1418 struct AttributeParseState {
   1419     HashMap<String, String> attributes;
   1420     bool gotAttributes;
   1421 };
   1422 
   1423 static void attributesStartElementNsHandler(void* closure, const xmlChar* xmlLocalName, const xmlChar* /*xmlPrefix*/,
   1424                                             const xmlChar* /*xmlURI*/, int /*nb_namespaces*/, const xmlChar** /*namespaces*/,
   1425                                             int nb_attributes, int /*nb_defaulted*/, const xmlChar** libxmlAttributes)
   1426 {
   1427     if (strcmp(reinterpret_cast<const char*>(xmlLocalName), "attrs") != 0)
   1428         return;
   1429 
   1430     xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure);
   1431     AttributeParseState* state = static_cast<AttributeParseState*>(ctxt->_private);
   1432 
   1433     state->gotAttributes = true;
   1434 
   1435     xmlSAX2Attributes* attributes = reinterpret_cast<xmlSAX2Attributes*>(libxmlAttributes);
   1436     for (int i = 0; i < nb_attributes; i++) {
   1437         String attrLocalName = toString(attributes[i].localname);
   1438         int valueLength = (int) (attributes[i].end - attributes[i].value);
   1439         String attrValue = toString(attributes[i].value, valueLength);
   1440         String attrPrefix = toString(attributes[i].prefix);
   1441         String attrQName = attrPrefix.isEmpty() ? attrLocalName : attrPrefix + ":" + attrLocalName;
   1442 
   1443         state->attributes.set(attrQName, attrValue);
   1444     }
   1445 }
   1446 
   1447 HashMap<String, String> parseAttributes(const String& string, bool& attrsOK)
   1448 {
   1449     AttributeParseState state;
   1450     state.gotAttributes = false;
   1451 
   1452     xmlSAXHandler sax;
   1453     memset(&sax, 0, sizeof(sax));
   1454     sax.startElementNs = attributesStartElementNsHandler;
   1455     sax.initialized = XML_SAX2_MAGIC;
   1456     RefPtr<XMLParserContext> parser = XMLParserContext::createStringParser(&sax, &state);
   1457     String parseString = "<?xml version=\"1.0\"?><attrs " + string + " />";
   1458     xmlParseChunk(parser->context(), reinterpret_cast<const char*>(parseString.characters()), parseString.length() * sizeof(UChar), 1);
   1459     attrsOK = state.gotAttributes;
   1460     return state.attributes;
   1461 }
   1462 
   1463 }
   1464