1 /* 2 * Copyright (C) 2000 Peter Kelly (pmk (at) post.com) 3 * Copyright (C) 2005, 2006, 2007 Apple Inc. All rights reserved. 4 * Copyright (C) 2007 Samuel Weinig (sam (at) webkit.org) 5 * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies) 6 * Copyright (C) 2008, 2009 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/) 7 * 8 * This library is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU Library General Public 10 * License as published by the Free Software Foundation; either 11 * version 2 of the License, or (at your option) any later version. 12 * 13 * This library is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * Library General Public License for more details. 17 * 18 * You should have received a copy of the GNU Library General Public License 19 * along with this library; see the file COPYING.LIB. If not, write to 20 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 21 * Boston, MA 02110-1301, USA. 22 * 23 */ 24 25 #ifndef XMLDocumentParser_h 26 #define XMLDocumentParser_h 27 28 #if USE(EXPAT) 29 30 #include "CachedResourceClient.h" 31 #include "SegmentedString.h" 32 #include "StringHash.h" 33 #include "Tokenizer.h" 34 #include <libexpat/expat.h> 35 #include <wtf/HashMap.h> 36 #include <wtf/OwnPtr.h> 37 38 namespace WebCore { 39 40 class Node; 41 class CachedScript; 42 class DocumentFragment; 43 class Document; 44 class Element; 45 class FrameView; 46 class PendingCallbacks; 47 48 class XMLTokenizer : public Tokenizer, public CachedResourceClient { 49 public: 50 XMLTokenizer(Document *, FrameView * = 0); 51 XMLTokenizer(DocumentFragment *, Element *); 52 ~XMLTokenizer(); 53 54 enum ErrorType { warning, nonFatal, fatal }; 55 56 // from Tokenizer 57 virtual bool write(const SegmentedString &str, bool); 58 virtual void finish(); 59 virtual bool isWaitingForScripts() const; 60 virtual void stopParsing(); 61 virtual bool wellFormed() const { return !m_sawError; } 62 virtual int lineNumber() const; 63 virtual int columnNumber() const; 64 65 // from CachedObjectClient 66 virtual void notifyFinished(CachedResource *finishedObj); 67 68 // callbacks from parser expat 69 void startElementNs(const XML_Char *name, const XML_Char **atts); 70 void endElementNs(); 71 void characters(const XML_Char *s, int len); 72 void processingInstruction(const XML_Char *target, const XML_Char *data); 73 void comment(const XML_Char *s); 74 void startCdata(); 75 void endCdata(); 76 77 void error(ErrorType type, const char* m, int lineNumber, int columnNumber); 78 79 // utilities 80 XML_Parser getXMLParser() const { return m_parser; } 81 void setXMLParser(XML_Parser parser) { m_parser = parser; } 82 83 private: 84 void setCurrentNode(Node*); 85 86 void end(); 87 88 void pauseParsing(); 89 void resumeParsing(); 90 91 void reportError(); 92 void insertErrorMessageBlock(); 93 94 bool enterText(); 95 void exitText(); 96 97 Document *m_doc; 98 FrameView *m_view; 99 100 XML_Parser m_parser; 101 102 Node *m_currentNode; 103 bool m_currentNodeIsReferenced; 104 105 bool m_sawError; 106 bool m_sawXSLTransform; 107 bool m_sawFirstElement; 108 109 bool m_parserPaused; 110 bool m_requestingScript; 111 bool m_finishCalled; 112 113 int m_errorCount; 114 String m_errorMessages; 115 116 CachedScript *m_pendingScript; 117 RefPtr<Element> m_scriptElement; 118 int m_scriptStartLine; 119 120 bool m_parsingFragment; 121 String m_defaultNamespaceURI; 122 123 typedef HashMap<String, String> PrefixForNamespaceMap; 124 PrefixForNamespaceMap m_prefixToNamespaceMap; 125 126 OwnPtr<PendingCallbacks> m_pendingCallbacks; 127 SegmentedString m_pendingSrc; 128 }; 129 130 HashMap<String, String> parseAttributes(const String&, bool& attrsOK); 131 bool parseXMLDocumentFragment(const String&, DocumentFragment*, Element* parent = 0); 132 133 } // namespace WebCore 134 135 #else // USE(EXPAT) 136 137 #include "CachedResourceClient.h" 138 #include "CachedResourceHandle.h" 139 #include "FragmentScriptingPermission.h" 140 #include "ScriptableDocumentParser.h" 141 #include "SegmentedString.h" 142 #include <wtf/HashMap.h> 143 #include <wtf/OwnPtr.h> 144 #include <wtf/text/StringHash.h> 145 146 #if USE(QXMLSTREAM) 147 #include <qxmlstream.h> 148 #else 149 #include <libxml/tree.h> 150 #include <libxml/xmlstring.h> 151 #endif 152 153 namespace WebCore { 154 155 class Node; 156 class CachedScript; 157 class CachedResourceLoader; 158 class DocumentFragment; 159 class Document; 160 class Element; 161 class FrameView; 162 class PendingCallbacks; 163 class ScriptElement; 164 165 #if !USE(QXMLSTREAM) 166 class XMLParserContext : public RefCounted<XMLParserContext> { 167 public: 168 static PassRefPtr<XMLParserContext> createMemoryParser(xmlSAXHandlerPtr, void*, const char*); 169 static PassRefPtr<XMLParserContext> createStringParser(xmlSAXHandlerPtr, void*); 170 ~XMLParserContext(); 171 xmlParserCtxtPtr context() const { return m_context; } 172 173 private: 174 XMLParserContext(xmlParserCtxtPtr context) 175 : m_context(context) 176 { 177 } 178 xmlParserCtxtPtr m_context; 179 }; 180 #endif 181 182 class XMLDocumentParser : public ScriptableDocumentParser, public CachedResourceClient { 183 WTF_MAKE_FAST_ALLOCATED; 184 public: 185 static PassRefPtr<XMLDocumentParser> create(Document* document, FrameView* view) 186 { 187 return adoptRef(new XMLDocumentParser(document, view)); 188 } 189 static PassRefPtr<XMLDocumentParser> create(DocumentFragment* fragment, Element* element, FragmentScriptingPermission permission) 190 { 191 return adoptRef(new XMLDocumentParser(fragment, element, permission)); 192 } 193 194 ~XMLDocumentParser(); 195 196 // Exposed for callbacks: 197 enum ErrorType { warning, nonFatal, fatal }; 198 void handleError(ErrorType, const char* message, int lineNumber, int columnNumber); 199 void handleError(ErrorType, const char* message, TextPosition1); 200 201 void setIsXHTMLDocument(bool isXHTML) { m_isXHTMLDocument = isXHTML; } 202 bool isXHTMLDocument() const { return m_isXHTMLDocument; } 203 #if ENABLE(XHTMLMP) 204 void setIsXHTMLMPDocument(bool isXHTML) { m_isXHTMLMPDocument = isXHTML; } 205 bool isXHTMLMPDocument() const { return m_isXHTMLMPDocument; } 206 #endif 207 #if ENABLE(WML) 208 bool isWMLDocument() const; 209 #endif 210 211 static bool parseDocumentFragment(const String&, DocumentFragment*, Element* parent = 0, FragmentScriptingPermission = FragmentScriptingAllowed); 212 213 // WMLErrorHandling uses these functions. 214 virtual bool wellFormed() const { return !m_sawError; } 215 216 TextPosition0 textPosition() const; 217 TextPosition1 textPositionOneBased() const; 218 219 static bool supportsXMLVersion(const String&); 220 221 private: 222 XMLDocumentParser(Document*, FrameView* = 0); 223 XMLDocumentParser(DocumentFragment*, Element*, FragmentScriptingPermission); 224 225 // From DocumentParser 226 virtual void insert(const SegmentedString&); 227 virtual void append(const SegmentedString&); 228 virtual void finish(); 229 virtual bool finishWasCalled(); 230 virtual bool isWaitingForScripts() const; 231 virtual void stopParsing(); 232 virtual void detach(); 233 virtual int lineNumber() const; 234 int columnNumber() const; 235 236 // from CachedResourceClient 237 virtual void notifyFinished(CachedResource*); 238 239 void end(); 240 241 void pauseParsing(); 242 void resumeParsing(); 243 244 bool appendFragmentSource(const String&); 245 246 #if USE(QXMLSTREAM) 247 private: 248 void parse(); 249 void startDocument(); 250 void parseStartElement(); 251 void parseEndElement(); 252 void parseCharacters(); 253 void parseProcessingInstruction(); 254 void parseCdata(); 255 void parseComment(); 256 void endDocument(); 257 void parseDtd(); 258 bool hasError() const; 259 #else 260 public: 261 // callbacks from parser SAX 262 void error(ErrorType, const char* message, va_list args) WTF_ATTRIBUTE_PRINTF(3, 0); 263 void startElementNs(const xmlChar* xmlLocalName, const xmlChar* xmlPrefix, const xmlChar* xmlURI, int nb_namespaces, 264 const xmlChar** namespaces, int nb_attributes, int nb_defaulted, const xmlChar** libxmlAttributes); 265 void endElementNs(); 266 void characters(const xmlChar* s, int len); 267 void processingInstruction(const xmlChar* target, const xmlChar* data); 268 void cdataBlock(const xmlChar* s, int len); 269 void comment(const xmlChar* s); 270 void startDocument(const xmlChar* version, const xmlChar* encoding, int standalone); 271 void internalSubset(const xmlChar* name, const xmlChar* externalID, const xmlChar* systemID); 272 void endDocument(); 273 #endif 274 private: 275 void initializeParserContext(const char* chunk = 0); 276 277 void pushCurrentNode(Node*); 278 void popCurrentNode(); 279 void clearCurrentNodeStack(); 280 281 void insertErrorMessageBlock(); 282 283 void enterText(); 284 void exitText(); 285 286 void doWrite(const String&); 287 void doEnd(); 288 289 FrameView* m_view; 290 291 String m_originalSourceForTransform; 292 293 #if USE(QXMLSTREAM) 294 QXmlStreamReader m_stream; 295 bool m_wroteText; 296 #else 297 xmlParserCtxtPtr context() const { return m_context ? m_context->context() : 0; }; 298 RefPtr<XMLParserContext> m_context; 299 OwnPtr<PendingCallbacks> m_pendingCallbacks; 300 Vector<xmlChar> m_bufferedText; 301 #endif 302 Node* m_currentNode; 303 Vector<Node*> m_currentNodeStack; 304 305 bool m_sawError; 306 bool m_sawCSS; 307 bool m_sawXSLTransform; 308 bool m_sawFirstElement; 309 bool m_isXHTMLDocument; 310 #if ENABLE(XHTMLMP) 311 bool m_isXHTMLMPDocument; 312 bool m_hasDocTypeDeclaration; 313 #endif 314 315 bool m_parserPaused; 316 bool m_requestingScript; 317 bool m_finishCalled; 318 319 int m_errorCount; 320 TextPosition1 m_lastErrorPosition; 321 String m_errorMessages; 322 323 CachedResourceHandle<CachedScript> m_pendingScript; 324 RefPtr<Element> m_scriptElement; 325 TextPosition1 m_scriptStartPosition; 326 327 bool m_parsingFragment; 328 AtomicString m_defaultNamespaceURI; 329 330 typedef HashMap<AtomicString, AtomicString> PrefixForNamespaceMap; 331 PrefixForNamespaceMap m_prefixToNamespaceMap; 332 SegmentedString m_pendingSrc; 333 FragmentScriptingPermission m_scriptingPermission; 334 }; 335 336 #if ENABLE(XSLT) 337 void* xmlDocPtrForString(CachedResourceLoader*, const String& source, const String& url); 338 #endif 339 340 HashMap<String, String> parseAttributes(const String&, bool& attrsOK); 341 342 } // namespace WebCore 343 344 #endif // USE(EXPAT) 345 346 #endif // XMLDocumentParser_h 347