Home | History | Annotate | Download | only in parser
      1 /*
      2  * Copyright (C) 2010 Google, Inc. All Rights Reserved.
      3  * Copyright (C) 2011 Apple Inc. All rights reserved.
      4  *
      5  * Redistribution and use in source and binary forms, with or without
      6  * modification, are permitted provided that the following conditions
      7  * are met:
      8  * 1. Redistributions of source code must retain the above copyright
      9  *    notice, this list of conditions and the following disclaimer.
     10  * 2. Redistributions in binary form must reproduce the above copyright
     11  *    notice, this list of conditions and the following disclaimer in the
     12  *    documentation and/or other materials provided with the distribution.
     13  *
     14  * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
     15  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     17  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL GOOGLE INC. OR
     18  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     19  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     20  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     21  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
     22  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     24  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     25  */
     26 
     27 #ifndef HTMLConstructionSite_h
     28 #define HTMLConstructionSite_h
     29 
     30 #include "core/dom/ParserContentPolicy.h"
     31 #include "core/html/parser/HTMLElementStack.h"
     32 #include "core/html/parser/HTMLFormattingElementList.h"
     33 #include "wtf/Noncopyable.h"
     34 #include "wtf/PassRefPtr.h"
     35 #include "wtf/RefPtr.h"
     36 #include "wtf/Vector.h"
     37 #include "wtf/text/StringBuilder.h"
     38 
     39 namespace WebCore {
     40 
     41 struct HTMLConstructionSiteTask {
     42     enum Operation {
     43         Insert,
     44         InsertText, // Handles possible merging of text nodes.
     45         InsertAlreadyParsedChild, // Insert w/o calling begin/end parsing.
     46         Reparent,
     47         TakeAllChildren,
     48     };
     49 
     50     explicit HTMLConstructionSiteTask(Operation op)
     51         : operation(op)
     52         , selfClosing(false)
     53     {
     54     }
     55 
     56     ContainerNode* oldParent()
     57     {
     58         // It's sort of ugly, but we store the |oldParent| in the |child| field
     59         // of the task so that we don't bloat the HTMLConstructionSiteTask
     60         // object in the common case of the Insert operation.
     61         return toContainerNode(child.get());
     62     }
     63 
     64     Operation operation;
     65     RefPtr<ContainerNode> parent;
     66     RefPtr<Node> nextChild;
     67     RefPtr<Node> child;
     68     bool selfClosing;
     69 };
     70 
     71 } // namespace WebCore
     72 
     73 namespace WTF {
     74 template<> struct VectorTraits<WebCore::HTMLConstructionSiteTask> : SimpleClassVectorTraits { };
     75 } // namespace WTF
     76 
     77 namespace WebCore {
     78 
     79 // Note: These are intentionally ordered so that when we concatonate
     80 // strings and whitespaces the resulting whitespace is ws = min(ws1, ws2).
     81 enum WhitespaceMode {
     82     WhitespaceUnknown,
     83     NotAllWhitespace,
     84     AllWhitespace,
     85 };
     86 
     87 class AtomicHTMLToken;
     88 class Document;
     89 class Element;
     90 class HTMLFormElement;
     91 
     92 class HTMLConstructionSite {
     93     WTF_MAKE_NONCOPYABLE(HTMLConstructionSite);
     94 public:
     95     HTMLConstructionSite(Document*, ParserContentPolicy);
     96     HTMLConstructionSite(DocumentFragment*, ParserContentPolicy);
     97     ~HTMLConstructionSite();
     98 
     99     void detach();
    100 
    101     // executeQueuedTasks empties the queue but does not flush pending text.
    102     // NOTE: Possible reentrancy via JavaScript execution.
    103     void executeQueuedTasks();
    104 
    105     // flushPendingText turns pending text into queued Text insertions, but does not execute them.
    106     void flushPendingText();
    107 
    108     // Called before every token in HTMLTreeBuilder::processToken, thus inlined:
    109     void flush()
    110     {
    111         if (!hasPendingTasks())
    112             return;
    113         flushPendingText();
    114         executeQueuedTasks(); // NOTE: Possible reentrancy via JavaScript execution.
    115         ASSERT(!hasPendingTasks());
    116     }
    117 
    118     bool hasPendingTasks()
    119     {
    120         return !m_pendingText.isEmpty() || !m_taskQueue.isEmpty();
    121     }
    122 
    123     void setDefaultCompatibilityMode();
    124     void processEndOfFile();
    125     void finishedParsing();
    126 
    127     void insertDoctype(AtomicHTMLToken*);
    128     void insertComment(AtomicHTMLToken*);
    129     void insertCommentOnDocument(AtomicHTMLToken*);
    130     void insertCommentOnHTMLHtmlElement(AtomicHTMLToken*);
    131     void insertHTMLElement(AtomicHTMLToken*);
    132     void insertSelfClosingHTMLElement(AtomicHTMLToken*);
    133     void insertFormattingElement(AtomicHTMLToken*);
    134     void insertHTMLHeadElement(AtomicHTMLToken*);
    135     void insertHTMLBodyElement(AtomicHTMLToken*);
    136     void insertHTMLFormElement(AtomicHTMLToken*, bool isDemoted = false);
    137     void insertScriptElement(AtomicHTMLToken*);
    138     void insertTextNode(const String&, WhitespaceMode = WhitespaceUnknown);
    139     void insertForeignElement(AtomicHTMLToken*, const AtomicString& namespaceURI);
    140 
    141     void insertHTMLHtmlStartTagBeforeHTML(AtomicHTMLToken*);
    142     void insertHTMLHtmlStartTagInBody(AtomicHTMLToken*);
    143     void insertHTMLBodyStartTagInBody(AtomicHTMLToken*);
    144 
    145     void reparent(HTMLElementStack::ElementRecord* newParent, HTMLElementStack::ElementRecord* child);
    146     void reparent(HTMLElementStack::ElementRecord* newParent, HTMLStackItem* child);
    147     // insertAlreadyParsedChild assumes that |child| has already been parsed (i.e., we're just
    148     // moving it around in the tree rather than parsing it for the first time). That means
    149     // this function doesn't call beginParsingChildren / finishParsingChildren.
    150     void insertAlreadyParsedChild(HTMLStackItem* newParent, HTMLElementStack::ElementRecord* child);
    151     void takeAllChildren(HTMLStackItem* newParent, HTMLElementStack::ElementRecord* oldParent);
    152 
    153     PassRefPtr<HTMLStackItem> createElementFromSavedToken(HTMLStackItem*);
    154 
    155     bool shouldFosterParent() const;
    156     void fosterParent(PassRefPtr<Node>);
    157 
    158     bool indexOfFirstUnopenFormattingElement(unsigned& firstUnopenElementIndex) const;
    159     void reconstructTheActiveFormattingElements();
    160 
    161     void generateImpliedEndTags();
    162     void generateImpliedEndTagsWithExclusion(const AtomicString& tagName);
    163 
    164     bool inQuirksMode();
    165 
    166     bool isEmpty() const { return !m_openElements.stackDepth(); }
    167     HTMLElementStack::ElementRecord* currentElementRecord() const { return m_openElements.topRecord(); }
    168     Element* currentElement() const { return m_openElements.top(); }
    169     ContainerNode* currentNode() const { return m_openElements.topNode(); }
    170     HTMLStackItem* currentStackItem() const { return m_openElements.topStackItem(); }
    171     HTMLStackItem* oneBelowTop() const { return m_openElements.oneBelowTop(); }
    172     Document& ownerDocumentForCurrentNode();
    173     HTMLElementStack* openElements() const { return &m_openElements; }
    174     HTMLFormattingElementList* activeFormattingElements() const { return &m_activeFormattingElements; }
    175     bool currentIsRootNode() { return m_openElements.topNode() == m_openElements.rootNode(); }
    176 
    177     Element* head() const { return m_head->element(); }
    178     HTMLStackItem* headStackItem() const { return m_head.get(); }
    179 
    180     void setForm(HTMLFormElement*);
    181     HTMLFormElement* form() const { return m_form.get(); }
    182     PassRefPtr<HTMLFormElement> takeForm();
    183 
    184     ParserContentPolicy parserContentPolicy() { return m_parserContentPolicy; }
    185 
    186     class RedirectToFosterParentGuard {
    187         WTF_MAKE_NONCOPYABLE(RedirectToFosterParentGuard);
    188     public:
    189         RedirectToFosterParentGuard(HTMLConstructionSite& tree)
    190             : m_tree(tree)
    191             , m_wasRedirectingBefore(tree.m_redirectAttachToFosterParent)
    192         {
    193             m_tree.m_redirectAttachToFosterParent = true;
    194         }
    195 
    196         ~RedirectToFosterParentGuard()
    197         {
    198             m_tree.m_redirectAttachToFosterParent = m_wasRedirectingBefore;
    199         }
    200 
    201     private:
    202         HTMLConstructionSite& m_tree;
    203         bool m_wasRedirectingBefore;
    204     };
    205 
    206 private:
    207     // In the common case, this queue will have only one task because most
    208     // tokens produce only one DOM mutation.
    209     typedef Vector<HTMLConstructionSiteTask, 1> TaskQueue;
    210 
    211     void setCompatibilityMode(Document::CompatibilityMode);
    212     void setCompatibilityModeFromDoctype(const String& name, const String& publicId, const String& systemId);
    213 
    214     void attachLater(ContainerNode* parent, PassRefPtr<Node> child, bool selfClosing = false);
    215 
    216     void findFosterSite(HTMLConstructionSiteTask&);
    217 
    218     PassRefPtr<Element> createHTMLElement(AtomicHTMLToken*);
    219     PassRefPtr<Element> createElement(AtomicHTMLToken*, const AtomicString& namespaceURI);
    220 
    221     void mergeAttributesFromTokenIntoElement(AtomicHTMLToken*, Element*);
    222     void dispatchDocumentElementAvailableIfNeeded();
    223 
    224     void executeTask(HTMLConstructionSiteTask&);
    225     void queueTask(const HTMLConstructionSiteTask&);
    226 
    227     Document* m_document;
    228 
    229     // This is the root ContainerNode to which the parser attaches all newly
    230     // constructed nodes. It points to a DocumentFragment when parsing fragments
    231     // and a Document in all other cases.
    232     ContainerNode* m_attachmentRoot;
    233 
    234     RefPtr<HTMLStackItem> m_head;
    235     RefPtr<HTMLFormElement> m_form;
    236     mutable HTMLElementStack m_openElements;
    237     mutable HTMLFormattingElementList m_activeFormattingElements;
    238 
    239     TaskQueue m_taskQueue;
    240 
    241     struct PendingText {
    242         PendingText()
    243             : whitespaceMode(WhitespaceUnknown)
    244         {
    245         }
    246 
    247         void append(PassRefPtr<ContainerNode> newParent, PassRefPtr<Node> newNextChild, const String& newString, WhitespaceMode newWhitespaceMode)
    248         {
    249             ASSERT(!parent || parent == newParent);
    250             parent = newParent;
    251             ASSERT(!nextChild || nextChild == newNextChild);
    252             nextChild = newNextChild;
    253             stringBuilder.append(newString);
    254             whitespaceMode = std::min(whitespaceMode, newWhitespaceMode);
    255         }
    256 
    257         void swap(PendingText& other)
    258         {
    259             std::swap(whitespaceMode, other.whitespaceMode);
    260             parent.swap(other.parent);
    261             nextChild.swap(other.nextChild);
    262             stringBuilder.swap(other.stringBuilder);
    263         }
    264 
    265         void discard()
    266         {
    267             PendingText discardedText;
    268             swap(discardedText);
    269         }
    270 
    271         bool isEmpty()
    272         {
    273             // When the stringbuilder is empty, the parent and whitespace should also be "empty".
    274             ASSERT(stringBuilder.isEmpty() == !parent);
    275             ASSERT(!stringBuilder.isEmpty() || !nextChild);
    276             ASSERT(!stringBuilder.isEmpty() || (whitespaceMode == WhitespaceUnknown));
    277             return stringBuilder.isEmpty();
    278         }
    279 
    280         RefPtr<ContainerNode> parent;
    281         RefPtr<Node> nextChild;
    282         StringBuilder stringBuilder;
    283         WhitespaceMode whitespaceMode;
    284     };
    285 
    286     PendingText m_pendingText;
    287 
    288     ParserContentPolicy m_parserContentPolicy;
    289     bool m_isParsingFragment;
    290 
    291     // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-intable
    292     // In the "in table" insertion mode, we sometimes get into a state where
    293     // "whenever a node would be inserted into the current node, it must instead
    294     // be foster parented."  This flag tracks whether we're in that state.
    295     bool m_redirectAttachToFosterParent;
    296 
    297     bool m_inQuirksMode;
    298 };
    299 
    300 } // namespace WebCore
    301 
    302 #endif
    303