Home | History | Annotate | Download | only in html
      1 /*
      2     Copyright (C) 1997 Martin Jones (mjones (at) kde.org)
      3               (C) 1997 Torben Weis (weis (at) kde.org)
      4               (C) 1998 Waldo Bastian (bastian (at) kde.org)
      5               (C) 1999 Lars Knoll (knoll (at) kde.org)
      6     Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
      7 
      8     This library is free software; you can redistribute it and/or
      9     modify it under the terms of the GNU Library General Public
     10     License as published by the Free Software Foundation; either
     11     version 2 of the License, or (at your option) any later version.
     12 
     13     This library is distributed in the hope that it will be useful,
     14     but WITHOUT ANY WARRANTY; without even the implied warranty of
     15     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     16     Library General Public License for more details.
     17 
     18     You should have received a copy of the GNU Library General Public License
     19     along with this library; see the file COPYING.LIB.  If not, write to
     20     the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     21     Boston, MA 02110-1301, USA.
     22 */
     23 
     24 #ifndef HTMLParser_h
     25 #define HTMLParser_h
     26 
     27 #include "QualifiedName.h"
     28 #include <wtf/Forward.h>
     29 #include <wtf/OwnPtr.h>
     30 #include <wtf/RefPtr.h>
     31 #include "HTMLParserErrorCodes.h"
     32 #include "MappedAttributeEntry.h"
     33 
     34 namespace WebCore {
     35 
     36 class DoctypeToken;
     37 class Document;
     38 class DocumentFragment;
     39 class HTMLDocument;
     40 class HTMLFormElement;
     41 class HTMLHeadElement;
     42 class HTMLMapElement;
     43 class HTMLParserQuirks;
     44 class Node;
     45 
     46 struct HTMLStackElem;
     47 struct Token;
     48 
     49 /**
     50  * The parser for HTML. It receives a stream of tokens from the HTMLTokenizer, and
     51  * builds up the Document structure from it.
     52  */
     53 class HTMLParser : public Noncopyable {
     54 public:
     55     HTMLParser(HTMLDocument*, bool reportErrors);
     56     HTMLParser(DocumentFragment*, FragmentScriptingPermission = FragmentScriptingAllowed);
     57     virtual ~HTMLParser();
     58 
     59     /**
     60      * parses one token delivered by the tokenizer
     61      */
     62     PassRefPtr<Node> parseToken(Token*);
     63 
     64     // Parses a doctype token.
     65     void parseDoctypeToken(DoctypeToken*);
     66 
     67     /**
     68      * tokenizer says it's not going to be sending us any more tokens
     69      */
     70     void finished();
     71 
     72     /**
     73      * resets the parser
     74      */
     75     void reset();
     76 
     77     bool skipMode() const { return !m_skipModeTag.isNull(); }
     78     bool isHandlingResidualStyleAcrossBlocks() const { return m_handlingResidualStyleAcrossBlocks; }
     79 
     80 private:
     81     void setCurrent(Node*);
     82     void derefCurrent();
     83     void setSkipMode(const QualifiedName& qName) { m_skipModeTag = qName.localName(); }
     84 
     85     PassRefPtr<Node> getNode(Token*);
     86     bool bodyCreateErrorCheck(Token*, RefPtr<Node>&);
     87     bool canvasCreateErrorCheck(Token*, RefPtr<Node>&);
     88     bool commentCreateErrorCheck(Token*, RefPtr<Node>&);
     89     bool ddCreateErrorCheck(Token*, RefPtr<Node>&);
     90     bool dtCreateErrorCheck(Token*, RefPtr<Node>&);
     91     bool formCreateErrorCheck(Token*, RefPtr<Node>&);
     92     bool framesetCreateErrorCheck(Token*, RefPtr<Node>&);
     93     bool headCreateErrorCheck(Token*, RefPtr<Node>&);
     94     bool iframeCreateErrorCheck(Token*, RefPtr<Node>&);
     95     bool isindexCreateErrorCheck(Token*, RefPtr<Node>&);
     96     bool mapCreateErrorCheck(Token*, RefPtr<Node>&);
     97     bool nestedCreateErrorCheck(Token*, RefPtr<Node>&);
     98     bool nestedPCloserCreateErrorCheck(Token*, RefPtr<Node>&);
     99     bool nestedStyleCreateErrorCheck(Token*, RefPtr<Node>&);
    100     bool noembedCreateErrorCheck(Token*, RefPtr<Node>&);
    101     bool noframesCreateErrorCheck(Token*, RefPtr<Node>&);
    102     bool nolayerCreateErrorCheck(Token*, RefPtr<Node>&);
    103     bool noscriptCreateErrorCheck(Token*, RefPtr<Node>&);
    104     bool pCloserCreateErrorCheck(Token*, RefPtr<Node>&);
    105     bool pCloserStrictCreateErrorCheck(Token*, RefPtr<Node>&);
    106     bool rpCreateErrorCheck(Token*, RefPtr<Node>&);
    107     bool rtCreateErrorCheck(Token*, RefPtr<Node>&);
    108     bool selectCreateErrorCheck(Token*, RefPtr<Node>&);
    109     bool tableCellCreateErrorCheck(Token*, RefPtr<Node>&);
    110     bool tableSectionCreateErrorCheck(Token*, RefPtr<Node>&);
    111     bool textCreateErrorCheck(Token*, RefPtr<Node>&);
    112 
    113     void processCloseTag(Token*);
    114 
    115     void limitBlockDepth(int tagPriority);
    116 
    117     bool insertNodeAfterLimitBlockDepth(Node*, bool flat = false);
    118     bool insertNode(Node*, bool flat = false);
    119     bool handleError(Node*, bool flat, const AtomicString& localName, int tagPriority);
    120 
    121     void pushBlock(const AtomicString& tagName, int level);
    122     void popBlock(const AtomicString& tagName, bool reportErrors = false);
    123     void popBlock(const QualifiedName& qName, bool reportErrors = false) { return popBlock(qName.localName(), reportErrors); } // Convenience function for readability.
    124     void popOneBlock();
    125     void moveOneBlockToStack(HTMLStackElem*& head);
    126     inline HTMLStackElem* popOneBlockCommon();
    127     void popInlineBlocks();
    128 
    129     void freeBlock();
    130 
    131     void createHead();
    132 
    133     static bool isResidualStyleTag(const AtomicString& tagName);
    134     static bool isAffectedByResidualStyle(const AtomicString& tagName);
    135     void handleResidualStyleCloseTagAcrossBlocks(HTMLStackElem*);
    136     void reopenResidualStyleTags(HTMLStackElem*, Node* malformedTableParent);
    137 
    138     bool allowNestedRedundantTag(const AtomicString& tagName);
    139 
    140     static bool isHeadingTag(const AtomicString& tagName);
    141 
    142     bool isInline(Node*) const;
    143 
    144     void startBody(); // inserts the isindex element
    145     PassRefPtr<Node> handleIsindex(Token*);
    146 
    147     void checkIfHasPElementInScope();
    148     bool hasPElementInScope()
    149     {
    150         if (m_hasPElementInScope == Unknown)
    151             checkIfHasPElementInScope();
    152         return m_hasPElementInScope == InScope;
    153     }
    154 
    155     void reportError(HTMLParserErrorCode errorCode, const AtomicString* tagName1 = 0, const AtomicString* tagName2 = 0, bool closeTags = false)
    156     { if (!m_reportErrors) return; reportErrorToConsole(errorCode, tagName1, tagName2, closeTags); }
    157 
    158     void reportErrorToConsole(HTMLParserErrorCode, const AtomicString* tagName1, const AtomicString* tagName2, bool closeTags);
    159 
    160     Document* m_document;
    161 
    162     // The currently active element (the one new elements will be added to). Can be a document fragment, a document or an element.
    163     Node* m_current;
    164     // We can't ref a document, but we don't want to constantly check if a node is a document just to decide whether to deref.
    165     bool m_didRefCurrent;
    166 
    167     HTMLStackElem* m_blockStack;
    168 
    169     // The number of tags with priority minBlockLevelTagPriority or higher
    170     // currently in m_blockStack. The parser enforces a cap on this value by
    171     // adding such new elements as siblings instead of children once it is reached.
    172     size_t m_blocksInStack;
    173 
    174     enum ElementInScopeState { NotInScope, InScope, Unknown };
    175     ElementInScopeState m_hasPElementInScope;
    176 
    177     RefPtr<HTMLFormElement> m_currentFormElement; // currently active form
    178     RefPtr<HTMLMapElement> m_currentMapElement; // current map
    179     RefPtr<HTMLHeadElement> m_head; // head element; needed for HTML which defines <base> after </head>
    180     RefPtr<Node> m_isindexElement; // a possible <isindex> element in the head
    181 
    182     bool m_inBody;
    183     bool m_haveContent;
    184     bool m_haveFrameSet;
    185 
    186     AtomicString m_skipModeTag; // tells the parser to discard all tags until it reaches the one specified
    187 
    188     bool m_isParsingFragment;
    189     bool m_reportErrors;
    190     bool m_handlingResidualStyleAcrossBlocks;
    191     int m_inStrayTableContent;
    192     FragmentScriptingPermission m_scriptingPermission;
    193 
    194     OwnPtr<HTMLParserQuirks> m_parserQuirks;
    195 };
    196 
    197 #if defined(BUILDING_ON_LEOPARD) || defined(BUILDING_ON_TIGER)
    198 bool shouldCreateImplicitHead(Document*);
    199 #else
    200 inline bool shouldCreateImplicitHead(Document*) { return true; }
    201 #endif
    202 
    203 }
    204 
    205 #endif // HTMLParser_h
    206