1 /* 2 Copyright (C) 1997 Martin Jones (mjones (at) kde.org) 3 (C) 1997 Torben Weis (weis (at) kde.org) 4 (C) 1998 Waldo Bastian (bastian (at) kde.org) 5 (C) 1999 Lars Knoll (knoll (at) kde.org) 6 Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. 7 8 This library is free software; you can redistribute it and/or 9 modify it under the terms of the GNU Library General Public 10 License as published by the Free Software Foundation; either 11 version 2 of the License, or (at your option) any later version. 12 13 This library is distributed in the hope that it will be useful, 14 but WITHOUT ANY WARRANTY; without even the implied warranty of 15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 Library General Public License for more details. 17 18 You should have received a copy of the GNU Library General Public License 19 along with this library; see the file COPYING.LIB. If not, write to 20 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 21 Boston, MA 02110-1301, USA. 22 */ 23 24 #ifndef HTMLParser_h 25 #define HTMLParser_h 26 27 #include "QualifiedName.h" 28 #include <wtf/Forward.h> 29 #include <wtf/OwnPtr.h> 30 #include <wtf/RefPtr.h> 31 #include "HTMLParserErrorCodes.h" 32 #include "MappedAttributeEntry.h" 33 34 namespace WebCore { 35 36 class DoctypeToken; 37 class Document; 38 class DocumentFragment; 39 class HTMLDocument; 40 class HTMLFormElement; 41 class HTMLHeadElement; 42 class HTMLMapElement; 43 class HTMLParserQuirks; 44 class Node; 45 46 struct HTMLStackElem; 47 struct Token; 48 49 /** 50 * The parser for HTML. It receives a stream of tokens from the HTMLTokenizer, and 51 * builds up the Document structure from it. 52 */ 53 class HTMLParser : public Noncopyable { 54 public: 55 HTMLParser(HTMLDocument*, bool reportErrors); 56 HTMLParser(DocumentFragment*, FragmentScriptingPermission = FragmentScriptingAllowed); 57 virtual ~HTMLParser(); 58 59 /** 60 * parses one token delivered by the tokenizer 61 */ 62 PassRefPtr<Node> parseToken(Token*); 63 64 // Parses a doctype token. 65 void parseDoctypeToken(DoctypeToken*); 66 67 /** 68 * tokenizer says it's not going to be sending us any more tokens 69 */ 70 void finished(); 71 72 /** 73 * resets the parser 74 */ 75 void reset(); 76 77 bool skipMode() const { return !m_skipModeTag.isNull(); } 78 bool isHandlingResidualStyleAcrossBlocks() const { return m_handlingResidualStyleAcrossBlocks; } 79 80 private: 81 void setCurrent(Node*); 82 void derefCurrent(); 83 void setSkipMode(const QualifiedName& qName) { m_skipModeTag = qName.localName(); } 84 85 PassRefPtr<Node> getNode(Token*); 86 bool bodyCreateErrorCheck(Token*, RefPtr<Node>&); 87 bool canvasCreateErrorCheck(Token*, RefPtr<Node>&); 88 bool commentCreateErrorCheck(Token*, RefPtr<Node>&); 89 bool ddCreateErrorCheck(Token*, RefPtr<Node>&); 90 bool dtCreateErrorCheck(Token*, RefPtr<Node>&); 91 bool formCreateErrorCheck(Token*, RefPtr<Node>&); 92 bool framesetCreateErrorCheck(Token*, RefPtr<Node>&); 93 bool headCreateErrorCheck(Token*, RefPtr<Node>&); 94 bool iframeCreateErrorCheck(Token*, RefPtr<Node>&); 95 bool isindexCreateErrorCheck(Token*, RefPtr<Node>&); 96 bool mapCreateErrorCheck(Token*, RefPtr<Node>&); 97 bool nestedCreateErrorCheck(Token*, RefPtr<Node>&); 98 bool nestedPCloserCreateErrorCheck(Token*, RefPtr<Node>&); 99 bool nestedStyleCreateErrorCheck(Token*, RefPtr<Node>&); 100 bool noembedCreateErrorCheck(Token*, RefPtr<Node>&); 101 bool noframesCreateErrorCheck(Token*, RefPtr<Node>&); 102 bool nolayerCreateErrorCheck(Token*, RefPtr<Node>&); 103 bool noscriptCreateErrorCheck(Token*, RefPtr<Node>&); 104 bool pCloserCreateErrorCheck(Token*, RefPtr<Node>&); 105 bool pCloserStrictCreateErrorCheck(Token*, RefPtr<Node>&); 106 bool rpCreateErrorCheck(Token*, RefPtr<Node>&); 107 bool rtCreateErrorCheck(Token*, RefPtr<Node>&); 108 bool selectCreateErrorCheck(Token*, RefPtr<Node>&); 109 bool tableCellCreateErrorCheck(Token*, RefPtr<Node>&); 110 bool tableSectionCreateErrorCheck(Token*, RefPtr<Node>&); 111 bool textCreateErrorCheck(Token*, RefPtr<Node>&); 112 113 void processCloseTag(Token*); 114 115 void limitBlockDepth(int tagPriority); 116 117 bool insertNodeAfterLimitBlockDepth(Node*, bool flat = false); 118 bool insertNode(Node*, bool flat = false); 119 bool handleError(Node*, bool flat, const AtomicString& localName, int tagPriority); 120 121 void pushBlock(const AtomicString& tagName, int level); 122 void popBlock(const AtomicString& tagName, bool reportErrors = false); 123 void popBlock(const QualifiedName& qName, bool reportErrors = false) { return popBlock(qName.localName(), reportErrors); } // Convenience function for readability. 124 void popOneBlock(); 125 void moveOneBlockToStack(HTMLStackElem*& head); 126 inline HTMLStackElem* popOneBlockCommon(); 127 void popInlineBlocks(); 128 129 void freeBlock(); 130 131 void createHead(); 132 133 static bool isResidualStyleTag(const AtomicString& tagName); 134 static bool isAffectedByResidualStyle(const AtomicString& tagName); 135 void handleResidualStyleCloseTagAcrossBlocks(HTMLStackElem*); 136 void reopenResidualStyleTags(HTMLStackElem*, Node* malformedTableParent); 137 138 bool allowNestedRedundantTag(const AtomicString& tagName); 139 140 static bool isHeadingTag(const AtomicString& tagName); 141 142 bool isInline(Node*) const; 143 144 void startBody(); // inserts the isindex element 145 PassRefPtr<Node> handleIsindex(Token*); 146 147 void checkIfHasPElementInScope(); 148 bool hasPElementInScope() 149 { 150 if (m_hasPElementInScope == Unknown) 151 checkIfHasPElementInScope(); 152 return m_hasPElementInScope == InScope; 153 } 154 155 void reportError(HTMLParserErrorCode errorCode, const AtomicString* tagName1 = 0, const AtomicString* tagName2 = 0, bool closeTags = false) 156 { if (!m_reportErrors) return; reportErrorToConsole(errorCode, tagName1, tagName2, closeTags); } 157 158 void reportErrorToConsole(HTMLParserErrorCode, const AtomicString* tagName1, const AtomicString* tagName2, bool closeTags); 159 160 Document* m_document; 161 162 // The currently active element (the one new elements will be added to). Can be a document fragment, a document or an element. 163 Node* m_current; 164 // We can't ref a document, but we don't want to constantly check if a node is a document just to decide whether to deref. 165 bool m_didRefCurrent; 166 167 HTMLStackElem* m_blockStack; 168 169 // The number of tags with priority minBlockLevelTagPriority or higher 170 // currently in m_blockStack. The parser enforces a cap on this value by 171 // adding such new elements as siblings instead of children once it is reached. 172 size_t m_blocksInStack; 173 174 enum ElementInScopeState { NotInScope, InScope, Unknown }; 175 ElementInScopeState m_hasPElementInScope; 176 177 RefPtr<HTMLFormElement> m_currentFormElement; // currently active form 178 RefPtr<HTMLMapElement> m_currentMapElement; // current map 179 RefPtr<HTMLHeadElement> m_head; // head element; needed for HTML which defines <base> after </head> 180 RefPtr<Node> m_isindexElement; // a possible <isindex> element in the head 181 182 bool m_inBody; 183 bool m_haveContent; 184 bool m_haveFrameSet; 185 186 AtomicString m_skipModeTag; // tells the parser to discard all tags until it reaches the one specified 187 188 bool m_isParsingFragment; 189 bool m_reportErrors; 190 bool m_handlingResidualStyleAcrossBlocks; 191 int m_inStrayTableContent; 192 FragmentScriptingPermission m_scriptingPermission; 193 194 OwnPtr<HTMLParserQuirks> m_parserQuirks; 195 }; 196 197 #if defined(BUILDING_ON_LEOPARD) || defined(BUILDING_ON_TIGER) 198 bool shouldCreateImplicitHead(Document*); 199 #else 200 inline bool shouldCreateImplicitHead(Document*) { return true; } 201 #endif 202 203 } 204 205 #endif // HTMLParser_h 206