Home | History | Annotate | Download | only in dom
      1 /*
      2  * Copyright (C) 2000 Peter Kelly (pmk (at) post.com)
      3  * Copyright (C) 2005, 2006, 2007 Apple Inc. All rights reserved.
      4  * Copyright (C) 2007 Samuel Weinig (sam (at) webkit.org)
      5  * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies)
      6  * Copyright (C) 2008, 2009 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/)
      7  *
      8  * This library is free software; you can redistribute it and/or
      9  * modify it under the terms of the GNU Library General Public
     10  * License as published by the Free Software Foundation; either
     11  * version 2 of the License, or (at your option) any later version.
     12  *
     13  * This library is distributed in the hope that it will be useful,
     14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     16  * Library General Public License for more details.
     17  *
     18  * You should have received a copy of the GNU Library General Public License
     19  * along with this library; see the file COPYING.LIB.  If not, write to
     20  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     21  * Boston, MA 02110-1301, USA.
     22  *
     23  */
     24 
     25 #ifndef XMLDocumentParser_h
     26 #define XMLDocumentParser_h
     27 
     28 #if USE(EXPAT)
     29 
     30 #include "CachedResourceClient.h"
     31 #include "SegmentedString.h"
     32 #include "StringHash.h"
     33 #include "Tokenizer.h"
     34 #include <libexpat/expat.h>
     35 #include <wtf/HashMap.h>
     36 #include <wtf/OwnPtr.h>
     37 
     38 namespace WebCore {
     39 
     40     class Node;
     41     class CachedScript;
     42     class DocumentFragment;
     43     class Document;
     44     class Element;
     45     class FrameView;
     46     class PendingCallbacks;
     47 
     48     class XMLTokenizer : public Tokenizer, public CachedResourceClient {
     49     public:
     50         XMLTokenizer(Document *, FrameView * = 0);
     51         XMLTokenizer(DocumentFragment *, Element *);
     52         ~XMLTokenizer();
     53 
     54         enum ErrorType { warning, nonFatal, fatal };
     55 
     56         // from Tokenizer
     57         virtual bool write(const SegmentedString &str, bool);
     58         virtual void finish();
     59         virtual bool isWaitingForScripts() const;
     60         virtual void stopParsing();
     61         virtual bool wellFormed() const { return !m_sawError; }
     62         virtual int lineNumber() const;
     63         virtual int columnNumber() const;
     64 
     65         // from CachedObjectClient
     66         virtual void notifyFinished(CachedResource *finishedObj);
     67 
     68         // callbacks from parser expat
     69         void startElementNs(const XML_Char *name, const XML_Char **atts);
     70         void endElementNs();
     71         void characters(const XML_Char *s, int len);
     72         void processingInstruction(const XML_Char *target, const XML_Char *data);
     73         void comment(const XML_Char *s);
     74         void startCdata();
     75         void endCdata();
     76 
     77         void error(ErrorType type, const char* m, int lineNumber, int columnNumber);
     78 
     79         // utilities
     80         XML_Parser getXMLParser() const { return m_parser; }
     81         void setXMLParser(XML_Parser parser) { m_parser = parser; }
     82 
     83     private:
     84         void setCurrentNode(Node*);
     85 
     86         void end();
     87 
     88         void pauseParsing();
     89         void resumeParsing();
     90 
     91         void reportError();
     92         void insertErrorMessageBlock();
     93 
     94         bool enterText();
     95         void exitText();
     96 
     97         Document *m_doc;
     98         FrameView *m_view;
     99 
    100         XML_Parser m_parser;
    101 
    102         Node *m_currentNode;
    103         bool m_currentNodeIsReferenced;
    104 
    105         bool m_sawError;
    106         bool m_sawXSLTransform;
    107         bool m_sawFirstElement;
    108 
    109         bool m_parserPaused;
    110         bool m_requestingScript;
    111         bool m_finishCalled;
    112 
    113         int m_errorCount;
    114         String m_errorMessages;
    115 
    116         CachedScript *m_pendingScript;
    117         RefPtr<Element> m_scriptElement;
    118         int m_scriptStartLine;
    119 
    120         bool m_parsingFragment;
    121         String m_defaultNamespaceURI;
    122 
    123         typedef HashMap<String, String> PrefixForNamespaceMap;
    124         PrefixForNamespaceMap m_prefixToNamespaceMap;
    125 
    126         OwnPtr<PendingCallbacks> m_pendingCallbacks;
    127         SegmentedString m_pendingSrc;
    128     };
    129 
    130 HashMap<String, String> parseAttributes(const String&, bool& attrsOK);
    131 bool parseXMLDocumentFragment(const String&, DocumentFragment*, Element* parent = 0);
    132 
    133 } // namespace WebCore
    134 
    135 #else   // USE(EXPAT)
    136 
    137 #include "CachedResourceClient.h"
    138 #include "CachedResourceHandle.h"
    139 #include "FragmentScriptingPermission.h"
    140 #include "ScriptableDocumentParser.h"
    141 #include "SegmentedString.h"
    142 #include <wtf/HashMap.h>
    143 #include <wtf/OwnPtr.h>
    144 #include <wtf/text/StringHash.h>
    145 
    146 #if USE(QXMLSTREAM)
    147 #include <qxmlstream.h>
    148 #else
    149 #include <libxml/tree.h>
    150 #include <libxml/xmlstring.h>
    151 #endif
    152 
    153 namespace WebCore {
    154 
    155     class Node;
    156     class CachedScript;
    157     class CachedResourceLoader;
    158     class DocumentFragment;
    159     class Document;
    160     class Element;
    161     class FrameView;
    162     class PendingCallbacks;
    163     class ScriptElement;
    164 
    165 #if !USE(QXMLSTREAM)
    166     class XMLParserContext : public RefCounted<XMLParserContext> {
    167     public:
    168         static PassRefPtr<XMLParserContext> createMemoryParser(xmlSAXHandlerPtr, void*, const char*);
    169         static PassRefPtr<XMLParserContext> createStringParser(xmlSAXHandlerPtr, void*);
    170         ~XMLParserContext();
    171         xmlParserCtxtPtr context() const { return m_context; }
    172 
    173     private:
    174         XMLParserContext(xmlParserCtxtPtr context)
    175             : m_context(context)
    176         {
    177         }
    178         xmlParserCtxtPtr m_context;
    179     };
    180 #endif
    181 
    182     class XMLDocumentParser : public ScriptableDocumentParser, public CachedResourceClient {
    183         WTF_MAKE_FAST_ALLOCATED;
    184     public:
    185         static PassRefPtr<XMLDocumentParser> create(Document* document, FrameView* view)
    186         {
    187             return adoptRef(new XMLDocumentParser(document, view));
    188         }
    189         static PassRefPtr<XMLDocumentParser> create(DocumentFragment* fragment, Element* element, FragmentScriptingPermission permission)
    190         {
    191             return adoptRef(new XMLDocumentParser(fragment, element, permission));
    192         }
    193 
    194         ~XMLDocumentParser();
    195 
    196         // Exposed for callbacks:
    197         enum ErrorType { warning, nonFatal, fatal };
    198         void handleError(ErrorType, const char* message, int lineNumber, int columnNumber);
    199         void handleError(ErrorType, const char* message, TextPosition1);
    200 
    201         void setIsXHTMLDocument(bool isXHTML) { m_isXHTMLDocument = isXHTML; }
    202         bool isXHTMLDocument() const { return m_isXHTMLDocument; }
    203 #if ENABLE(XHTMLMP)
    204         void setIsXHTMLMPDocument(bool isXHTML) { m_isXHTMLMPDocument = isXHTML; }
    205         bool isXHTMLMPDocument() const { return m_isXHTMLMPDocument; }
    206 #endif
    207 #if ENABLE(WML)
    208         bool isWMLDocument() const;
    209 #endif
    210 
    211         static bool parseDocumentFragment(const String&, DocumentFragment*, Element* parent = 0, FragmentScriptingPermission = FragmentScriptingAllowed);
    212 
    213         // WMLErrorHandling uses these functions.
    214         virtual bool wellFormed() const { return !m_sawError; }
    215 
    216         TextPosition0 textPosition() const;
    217         TextPosition1 textPositionOneBased() const;
    218 
    219         static bool supportsXMLVersion(const String&);
    220 
    221     private:
    222         XMLDocumentParser(Document*, FrameView* = 0);
    223         XMLDocumentParser(DocumentFragment*, Element*, FragmentScriptingPermission);
    224 
    225         // From DocumentParser
    226         virtual void insert(const SegmentedString&);
    227         virtual void append(const SegmentedString&);
    228         virtual void finish();
    229         virtual bool finishWasCalled();
    230         virtual bool isWaitingForScripts() const;
    231         virtual void stopParsing();
    232         virtual void detach();
    233         virtual int lineNumber() const;
    234         int columnNumber() const;
    235 
    236         // from CachedResourceClient
    237         virtual void notifyFinished(CachedResource*);
    238 
    239         void end();
    240 
    241         void pauseParsing();
    242         void resumeParsing();
    243 
    244         bool appendFragmentSource(const String&);
    245 
    246 #if USE(QXMLSTREAM)
    247 private:
    248         void parse();
    249         void startDocument();
    250         void parseStartElement();
    251         void parseEndElement();
    252         void parseCharacters();
    253         void parseProcessingInstruction();
    254         void parseCdata();
    255         void parseComment();
    256         void endDocument();
    257         void parseDtd();
    258         bool hasError() const;
    259 #else
    260 public:
    261         // callbacks from parser SAX
    262         void error(ErrorType, const char* message, va_list args) WTF_ATTRIBUTE_PRINTF(3, 0);
    263         void startElementNs(const xmlChar* xmlLocalName, const xmlChar* xmlPrefix, const xmlChar* xmlURI, int nb_namespaces,
    264                             const xmlChar** namespaces, int nb_attributes, int nb_defaulted, const xmlChar** libxmlAttributes);
    265         void endElementNs();
    266         void characters(const xmlChar* s, int len);
    267         void processingInstruction(const xmlChar* target, const xmlChar* data);
    268         void cdataBlock(const xmlChar* s, int len);
    269         void comment(const xmlChar* s);
    270         void startDocument(const xmlChar* version, const xmlChar* encoding, int standalone);
    271         void internalSubset(const xmlChar* name, const xmlChar* externalID, const xmlChar* systemID);
    272         void endDocument();
    273 #endif
    274     private:
    275         void initializeParserContext(const char* chunk = 0);
    276 
    277         void pushCurrentNode(Node*);
    278         void popCurrentNode();
    279         void clearCurrentNodeStack();
    280 
    281         void insertErrorMessageBlock();
    282 
    283         void enterText();
    284         void exitText();
    285 
    286         void doWrite(const String&);
    287         void doEnd();
    288 
    289         FrameView* m_view;
    290 
    291         String m_originalSourceForTransform;
    292 
    293 #if USE(QXMLSTREAM)
    294         QXmlStreamReader m_stream;
    295         bool m_wroteText;
    296 #else
    297         xmlParserCtxtPtr context() const { return m_context ? m_context->context() : 0; };
    298         RefPtr<XMLParserContext> m_context;
    299         OwnPtr<PendingCallbacks> m_pendingCallbacks;
    300         Vector<xmlChar> m_bufferedText;
    301 #endif
    302         Node* m_currentNode;
    303         Vector<Node*> m_currentNodeStack;
    304 
    305         bool m_sawError;
    306         bool m_sawCSS;
    307         bool m_sawXSLTransform;
    308         bool m_sawFirstElement;
    309         bool m_isXHTMLDocument;
    310 #if ENABLE(XHTMLMP)
    311         bool m_isXHTMLMPDocument;
    312         bool m_hasDocTypeDeclaration;
    313 #endif
    314 
    315         bool m_parserPaused;
    316         bool m_requestingScript;
    317         bool m_finishCalled;
    318 
    319         int m_errorCount;
    320         TextPosition1 m_lastErrorPosition;
    321         String m_errorMessages;
    322 
    323         CachedResourceHandle<CachedScript> m_pendingScript;
    324         RefPtr<Element> m_scriptElement;
    325         TextPosition1 m_scriptStartPosition;
    326 
    327         bool m_parsingFragment;
    328         AtomicString m_defaultNamespaceURI;
    329 
    330         typedef HashMap<AtomicString, AtomicString> PrefixForNamespaceMap;
    331         PrefixForNamespaceMap m_prefixToNamespaceMap;
    332         SegmentedString m_pendingSrc;
    333         FragmentScriptingPermission m_scriptingPermission;
    334     };
    335 
    336 #if ENABLE(XSLT)
    337 void* xmlDocPtrForString(CachedResourceLoader*, const String& source, const String& url);
    338 #endif
    339 
    340 HashMap<String, String> parseAttributes(const String&, bool& attrsOK);
    341 
    342 } // namespace WebCore
    343 
    344 #endif // USE(EXPAT)
    345 
    346 #endif // XMLDocumentParser_h
    347