Home | History | Annotate | Download | only in dom
      1 /*
      2  * Copyright (C) 2000 Peter Kelly (pmk (at) post.com)
      3  * Copyright (C) 2005, 2006, 2007 Apple Inc. All rights reserved.
      4  * Copyright (C) 2007 Samuel Weinig (sam (at) webkit.org)
      5  * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies)
      6  * Copyright (C) 2008, 2009 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/)
      7  *
      8  * This library is free software; you can redistribute it and/or
      9  * modify it under the terms of the GNU Library General Public
     10  * License as published by the Free Software Foundation; either
     11  * version 2 of the License, or (at your option) any later version.
     12  *
     13  * This library is distributed in the hope that it will be useful,
     14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     16  * Library General Public License for more details.
     17  *
     18  * You should have received a copy of the GNU Library General Public License
     19  * along with this library; see the file COPYING.LIB.  If not, write to
     20  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     21  * Boston, MA 02110-1301, USA.
     22  *
     23  */
     24 
     25 #ifndef XMLTokenizer_h
     26 #define XMLTokenizer_h
     27 
     28 #if USE(EXPAT)
     29 
     30 #include "CachedResourceClient.h"
     31 #include "SegmentedString.h"
     32 #include "StringHash.h"
     33 #include "Tokenizer.h"
     34 #include <libexpat/expat.h>
     35 #include <wtf/HashMap.h>
     36 #include <wtf/OwnPtr.h>
     37 
     38 namespace WebCore {
     39 
     40     class Node;
     41     class CachedScript;
     42     class DocumentFragment;
     43     class Document;
     44     class Element;
     45     class FrameView;
     46     class PendingCallbacks;
     47 
     48     class XMLTokenizer : public Tokenizer, public CachedResourceClient {
     49     public:
     50         XMLTokenizer(Document *, FrameView * = 0);
     51         XMLTokenizer(DocumentFragment *, Element *);
     52         ~XMLTokenizer();
     53 
     54         enum ErrorType { warning, nonFatal, fatal };
     55 
     56         // from Tokenizer
     57         virtual bool write(const SegmentedString &str, bool);
     58         virtual void finish();
     59         virtual bool isWaitingForScripts() const;
     60         virtual void stopParsing();
     61         virtual bool wellFormed() const { return !m_sawError; }
     62         virtual int lineNumber() const;
     63         virtual int columnNumber() const;
     64 
     65         // from CachedObjectClient
     66         virtual void notifyFinished(CachedResource *finishedObj);
     67 
     68         // callbacks from parser expat
     69         void startElementNs(const XML_Char *name, const XML_Char **atts);
     70         void endElementNs();
     71         void characters(const XML_Char *s, int len);
     72         void processingInstruction(const XML_Char *target, const XML_Char *data);
     73         void comment(const XML_Char *s);
     74         void startCdata();
     75         void endCdata();
     76 
     77         void error(ErrorType type, const char* m, int lineNumber, int columnNumber);
     78 
     79         // utilities
     80         XML_Parser getXMLParser() const { return m_parser; }
     81         void setXMLParser(XML_Parser parser) { m_parser = parser; }
     82 
     83     private:
     84         void setCurrentNode(Node*);
     85 
     86         void end();
     87 
     88         void pauseParsing();
     89         void resumeParsing();
     90 
     91         void reportError();
     92         void insertErrorMessageBlock();
     93 
     94         bool enterText();
     95         void exitText();
     96 
     97         Document *m_doc;
     98         FrameView *m_view;
     99 
    100         XML_Parser m_parser;
    101 
    102         Node *m_currentNode;
    103         bool m_currentNodeIsReferenced;
    104 
    105         bool m_sawError;
    106         bool m_sawXSLTransform;
    107         bool m_sawFirstElement;
    108 
    109         bool m_parserPaused;
    110         bool m_requestingScript;
    111         bool m_finishCalled;
    112 
    113         int m_errorCount;
    114         String m_errorMessages;
    115 
    116         CachedScript *m_pendingScript;
    117         RefPtr<Element> m_scriptElement;
    118         int m_scriptStartLine;
    119 
    120         bool m_parsingFragment;
    121         String m_defaultNamespaceURI;
    122 
    123         typedef HashMap<String, String> PrefixForNamespaceMap;
    124         PrefixForNamespaceMap m_prefixToNamespaceMap;
    125 
    126         OwnPtr<PendingCallbacks> m_pendingCallbacks;
    127         SegmentedString m_pendingSrc;
    128     };
    129 
    130 HashMap<String, String> parseAttributes(const String&, bool& attrsOK);
    131 bool parseXMLDocumentFragment(const String&, DocumentFragment*, Element* parent = 0);
    132 
    133 } // namespace WebCore
    134 
    135 #else   // USE(EXPAT)
    136 
    137 #include "CachedResourceClient.h"
    138 #include "CachedResourceHandle.h"
    139 #include "MappedAttributeEntry.h"
    140 #include "SegmentedString.h"
    141 #include "StringHash.h"
    142 #include "Tokenizer.h"
    143 #include <wtf/HashMap.h>
    144 #include <wtf/OwnPtr.h>
    145 
    146 #if USE(QXMLSTREAM)
    147 #include <qxmlstream.h>
    148 #else
    149 #include <libxml/tree.h>
    150 #include <libxml/xmlstring.h>
    151 #endif
    152 
    153 namespace WebCore {
    154 
    155     class Node;
    156     class CachedScript;
    157     class DocLoader;
    158     class DocumentFragment;
    159     class Document;
    160     class Element;
    161     class FrameView;
    162     class PendingCallbacks;
    163     class ScriptElement;
    164 
    165 #if !USE(QXMLSTREAM)
    166     class XMLParserContext : public RefCounted<XMLParserContext> {
    167     public:
    168         static PassRefPtr<XMLParserContext> createMemoryParser(xmlSAXHandlerPtr, void*, const char*);
    169         static PassRefPtr<XMLParserContext> createStringParser(xmlSAXHandlerPtr, void*);
    170         ~XMLParserContext();
    171         xmlParserCtxtPtr context() const { return m_context; }
    172 
    173     private:
    174         XMLParserContext(xmlParserCtxtPtr context)
    175             : m_context(context)
    176         {
    177         }
    178         xmlParserCtxtPtr m_context;
    179     };
    180 #endif
    181 
    182     class XMLTokenizer : public Tokenizer, public CachedResourceClient {
    183     public:
    184         XMLTokenizer(Document*, FrameView* = 0);
    185         XMLTokenizer(DocumentFragment*, Element*, FragmentScriptingPermission);
    186         ~XMLTokenizer();
    187 
    188         enum ErrorType { warning, nonFatal, fatal };
    189 
    190         // from Tokenizer
    191         virtual void write(const SegmentedString&, bool appendData);
    192         virtual void finish();
    193         virtual bool isWaitingForScripts() const;
    194         virtual void stopParsing();
    195 
    196         void end();
    197 
    198         void pauseParsing();
    199         void resumeParsing();
    200 
    201         void setIsXHTMLDocument(bool isXHTML) { m_isXHTMLDocument = isXHTML; }
    202         bool isXHTMLDocument() const { return m_isXHTMLDocument; }
    203 #if ENABLE(XHTMLMP)
    204         void setIsXHTMLMPDocument(bool isXHTML) { m_isXHTMLMPDocument = isXHTML; }
    205         bool isXHTMLMPDocument() const { return m_isXHTMLMPDocument; }
    206 #endif
    207 #if ENABLE(WML)
    208         bool isWMLDocument() const;
    209 #endif
    210 
    211         // from CachedResourceClient
    212         virtual void notifyFinished(CachedResource* finishedObj);
    213 
    214 
    215         void handleError(ErrorType type, const char* m, int lineNumber, int columnNumber);
    216 
    217         virtual bool wellFormed() const { return !m_sawError; }
    218 
    219         int lineNumber() const;
    220         int columnNumber() const;
    221 
    222 #if USE(QXMLSTREAM)
    223 private:
    224         void parse();
    225         void startDocument();
    226         void parseStartElement();
    227         void parseEndElement();
    228         void parseCharacters();
    229         void parseProcessingInstruction();
    230         void parseCdata();
    231         void parseComment();
    232         void endDocument();
    233         void parseDtd();
    234         bool hasError() const;
    235 #else
    236 public:
    237         // callbacks from parser SAX
    238         void error(ErrorType, const char* message, va_list args) WTF_ATTRIBUTE_PRINTF(3, 0);
    239         void startElementNs(const xmlChar* xmlLocalName, const xmlChar* xmlPrefix, const xmlChar* xmlURI, int nb_namespaces,
    240                             const xmlChar** namespaces, int nb_attributes, int nb_defaulted, const xmlChar** libxmlAttributes);
    241         void endElementNs();
    242         void characters(const xmlChar* s, int len);
    243         void processingInstruction(const xmlChar* target, const xmlChar* data);
    244         void cdataBlock(const xmlChar* s, int len);
    245         void comment(const xmlChar* s);
    246         void startDocument(const xmlChar* version, const xmlChar* encoding, int standalone);
    247         void internalSubset(const xmlChar* name, const xmlChar* externalID, const xmlChar* systemID);
    248         void endDocument();
    249 #endif
    250     private:
    251         friend bool parseXMLDocumentFragment(const String&, DocumentFragment*, Element*, FragmentScriptingPermission);
    252 
    253         void initializeParserContext(const char* chunk = 0);
    254 
    255         void pushCurrentNode(Node*);
    256         void popCurrentNode();
    257         void clearCurrentNodeStack();
    258 
    259         void insertErrorMessageBlock();
    260 
    261         bool enterText();
    262         void exitText();
    263 
    264         void doWrite(const String&);
    265         void doEnd();
    266 
    267         Document* m_doc;
    268         FrameView* m_view;
    269 
    270         String m_originalSourceForTransform;
    271 
    272 #if USE(QXMLSTREAM)
    273         QXmlStreamReader m_stream;
    274         bool m_wroteText;
    275 #else
    276         xmlParserCtxtPtr context() const { return m_context ? m_context->context() : 0; };
    277         RefPtr<XMLParserContext> m_context;
    278         OwnPtr<PendingCallbacks> m_pendingCallbacks;
    279         Vector<xmlChar> m_bufferedText;
    280 #endif
    281         Node* m_currentNode;
    282         Vector<Node*> m_currentNodeStack;
    283 
    284         bool m_sawError;
    285         bool m_sawXSLTransform;
    286         bool m_sawFirstElement;
    287         bool m_isXHTMLDocument;
    288 #if ENABLE(XHTMLMP)
    289         bool m_isXHTMLMPDocument;
    290         bool m_hasDocTypeDeclaration;
    291 #endif
    292 
    293         bool m_parserPaused;
    294         bool m_requestingScript;
    295         bool m_finishCalled;
    296 
    297         int m_errorCount;
    298         int m_lastErrorLine;
    299         int m_lastErrorColumn;
    300         String m_errorMessages;
    301 
    302         CachedResourceHandle<CachedScript> m_pendingScript;
    303         RefPtr<Element> m_scriptElement;
    304         int m_scriptStartLine;
    305 
    306         bool m_parsingFragment;
    307         String m_defaultNamespaceURI;
    308 
    309         typedef HashMap<String, String> PrefixForNamespaceMap;
    310         PrefixForNamespaceMap m_prefixToNamespaceMap;
    311         SegmentedString m_pendingSrc;
    312         FragmentScriptingPermission m_scriptingPermission;
    313     };
    314 
    315 #if ENABLE(XSLT)
    316 void* xmlDocPtrForString(DocLoader*, const String& source, const String& url);
    317 #endif
    318 
    319 HashMap<String, String> parseAttributes(const String&, bool& attrsOK);
    320 bool parseXMLDocumentFragment(const String&, DocumentFragment*, Element* parent = 0, FragmentScriptingPermission = FragmentScriptingAllowed);
    321 
    322 } // namespace WebCore
    323 
    324 #endif // USE(EXPAT)
    325 
    326 #endif // XMLTokenizer_h
    327