Home | History | Annotate | Download | only in parser
      1 /*
      2  * Copyright (C) 2010 Google, Inc. All Rights Reserved.
      3  * Copyright (C) 2011 Apple Inc. All rights reserved.
      4  *
      5  * Redistribution and use in source and binary forms, with or without
      6  * modification, are permitted provided that the following conditions
      7  * are met:
      8  * 1. Redistributions of source code must retain the above copyright
      9  *    notice, this list of conditions and the following disclaimer.
     10  * 2. Redistributions in binary form must reproduce the above copyright
     11  *    notice, this list of conditions and the following disclaimer in the
     12  *    documentation and/or other materials provided with the distribution.
     13  *
     14  * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
     15  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     17  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL GOOGLE INC. OR
     18  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     19  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     20  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     21  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
     22  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     24  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     25  */
     26 
     27 #ifndef HTMLConstructionSite_h
     28 #define HTMLConstructionSite_h
     29 
     30 #include "core/dom/Document.h"
     31 #include "core/dom/ParserContentPolicy.h"
     32 #include "core/html/parser/HTMLElementStack.h"
     33 #include "core/html/parser/HTMLFormattingElementList.h"
     34 #include "wtf/Noncopyable.h"
     35 #include "wtf/PassRefPtr.h"
     36 #include "wtf/RefPtr.h"
     37 #include "wtf/Vector.h"
     38 #include "wtf/text/StringBuilder.h"
     39 
     40 namespace WebCore {
     41 
     42 struct HTMLConstructionSiteTask {
     43     ALLOW_ONLY_INLINE_ALLOCATION();
     44 public:
     45     enum Operation {
     46         Insert,
     47         InsertText, // Handles possible merging of text nodes.
     48         InsertAlreadyParsedChild, // Insert w/o calling begin/end parsing.
     49         Reparent,
     50         TakeAllChildren,
     51     };
     52 
     53     explicit HTMLConstructionSiteTask(Operation op)
     54         : operation(op)
     55         , selfClosing(false)
     56     {
     57     }
     58 
     59     void trace(Visitor* visitor)
     60     {
     61         visitor->trace(parent);
     62         visitor->trace(nextChild);
     63         visitor->trace(child);
     64     }
     65 
     66     ContainerNode* oldParent()
     67     {
     68         // It's sort of ugly, but we store the |oldParent| in the |child| field
     69         // of the task so that we don't bloat the HTMLConstructionSiteTask
     70         // object in the common case of the Insert operation.
     71         return toContainerNode(child.get());
     72     }
     73 
     74     Operation operation;
     75     RefPtrWillBeMember<ContainerNode> parent;
     76     RefPtrWillBeMember<Node> nextChild;
     77     RefPtrWillBeMember<Node> child;
     78     bool selfClosing;
     79 };
     80 
     81 } // namespace WebCore
     82 
     83 WTF_ALLOW_MOVE_INIT_AND_COMPARE_WITH_MEM_FUNCTIONS(WebCore::HTMLConstructionSiteTask);
     84 
     85 namespace WebCore {
     86 
     87 // Note: These are intentionally ordered so that when we concatonate
     88 // strings and whitespaces the resulting whitespace is ws = min(ws1, ws2).
     89 enum WhitespaceMode {
     90     WhitespaceUnknown,
     91     NotAllWhitespace,
     92     AllWhitespace,
     93 };
     94 
     95 class AtomicHTMLToken;
     96 class Document;
     97 class Element;
     98 class HTMLFormElement;
     99 
    100 class HTMLConstructionSite FINAL {
    101     WTF_MAKE_NONCOPYABLE(HTMLConstructionSite);
    102     DISALLOW_ALLOCATION();
    103 public:
    104     HTMLConstructionSite(Document*, ParserContentPolicy);
    105     HTMLConstructionSite(DocumentFragment*, ParserContentPolicy);
    106     ~HTMLConstructionSite();
    107     void trace(Visitor*);
    108 
    109     void detach();
    110 
    111     // executeQueuedTasks empties the queue but does not flush pending text.
    112     // NOTE: Possible reentrancy via JavaScript execution.
    113     void executeQueuedTasks();
    114 
    115     // flushPendingText turns pending text into queued Text insertions, but does not execute them.
    116     void flushPendingText();
    117 
    118     // Called before every token in HTMLTreeBuilder::processToken, thus inlined:
    119     void flush()
    120     {
    121         if (!hasPendingTasks())
    122             return;
    123         flushPendingText();
    124         executeQueuedTasks(); // NOTE: Possible reentrancy via JavaScript execution.
    125         ASSERT(!hasPendingTasks());
    126     }
    127 
    128     bool hasPendingTasks()
    129     {
    130         return !m_pendingText.isEmpty() || !m_taskQueue.isEmpty();
    131     }
    132 
    133     void setDefaultCompatibilityMode();
    134     void processEndOfFile();
    135     void finishedParsing();
    136 
    137     void insertDoctype(AtomicHTMLToken*);
    138     void insertComment(AtomicHTMLToken*);
    139     void insertCommentOnDocument(AtomicHTMLToken*);
    140     void insertCommentOnHTMLHtmlElement(AtomicHTMLToken*);
    141     void insertHTMLElement(AtomicHTMLToken*);
    142     void insertSelfClosingHTMLElement(AtomicHTMLToken*);
    143     void insertFormattingElement(AtomicHTMLToken*);
    144     void insertHTMLHeadElement(AtomicHTMLToken*);
    145     void insertHTMLBodyElement(AtomicHTMLToken*);
    146     void insertHTMLFormElement(AtomicHTMLToken*, bool isDemoted = false);
    147     void insertScriptElement(AtomicHTMLToken*);
    148     void insertTextNode(const String&, WhitespaceMode = WhitespaceUnknown);
    149     void insertForeignElement(AtomicHTMLToken*, const AtomicString& namespaceURI);
    150 
    151     void insertHTMLHtmlStartTagBeforeHTML(AtomicHTMLToken*);
    152     void insertHTMLHtmlStartTagInBody(AtomicHTMLToken*);
    153     void insertHTMLBodyStartTagInBody(AtomicHTMLToken*);
    154 
    155     void reparent(HTMLElementStack::ElementRecord* newParent, HTMLElementStack::ElementRecord* child);
    156     void reparent(HTMLElementStack::ElementRecord* newParent, HTMLStackItem* child);
    157     // insertAlreadyParsedChild assumes that |child| has already been parsed (i.e., we're just
    158     // moving it around in the tree rather than parsing it for the first time). That means
    159     // this function doesn't call beginParsingChildren / finishParsingChildren.
    160     void insertAlreadyParsedChild(HTMLStackItem* newParent, HTMLElementStack::ElementRecord* child);
    161     void takeAllChildren(HTMLStackItem* newParent, HTMLElementStack::ElementRecord* oldParent);
    162 
    163     PassRefPtrWillBeRawPtr<HTMLStackItem> createElementFromSavedToken(HTMLStackItem*);
    164 
    165     bool shouldFosterParent() const;
    166     void fosterParent(PassRefPtrWillBeRawPtr<Node>);
    167 
    168     bool indexOfFirstUnopenFormattingElement(unsigned& firstUnopenElementIndex) const;
    169     void reconstructTheActiveFormattingElements();
    170 
    171     void generateImpliedEndTags();
    172     void generateImpliedEndTagsWithExclusion(const AtomicString& tagName);
    173 
    174     bool inQuirksMode();
    175 
    176     bool isEmpty() const { return !m_openElements.stackDepth(); }
    177     HTMLElementStack::ElementRecord* currentElementRecord() const { return m_openElements.topRecord(); }
    178     Element* currentElement() const { return m_openElements.top(); }
    179     ContainerNode* currentNode() const { return m_openElements.topNode(); }
    180     HTMLStackItem* currentStackItem() const { return m_openElements.topStackItem(); }
    181     HTMLStackItem* oneBelowTop() const { return m_openElements.oneBelowTop(); }
    182     Document& ownerDocumentForCurrentNode();
    183     HTMLElementStack* openElements() const { return &m_openElements; }
    184     HTMLFormattingElementList* activeFormattingElements() const { return &m_activeFormattingElements; }
    185     bool currentIsRootNode() { return m_openElements.topNode() == m_openElements.rootNode(); }
    186 
    187     Element* head() const { return m_head->element(); }
    188     HTMLStackItem* headStackItem() const { return m_head.get(); }
    189 
    190     void setForm(HTMLFormElement*);
    191     HTMLFormElement* form() const { return m_form.get(); }
    192     PassRefPtrWillBeRawPtr<HTMLFormElement> takeForm();
    193 
    194     ParserContentPolicy parserContentPolicy() { return m_parserContentPolicy; }
    195 
    196     class RedirectToFosterParentGuard {
    197         WTF_MAKE_NONCOPYABLE(RedirectToFosterParentGuard);
    198     public:
    199         RedirectToFosterParentGuard(HTMLConstructionSite& tree)
    200             : m_tree(tree)
    201             , m_wasRedirectingBefore(tree.m_redirectAttachToFosterParent)
    202         {
    203             m_tree.m_redirectAttachToFosterParent = true;
    204         }
    205 
    206         ~RedirectToFosterParentGuard()
    207         {
    208             m_tree.m_redirectAttachToFosterParent = m_wasRedirectingBefore;
    209         }
    210 
    211     private:
    212         HTMLConstructionSite& m_tree;
    213         bool m_wasRedirectingBefore;
    214     };
    215 
    216 private:
    217     // In the common case, this queue will have only one task because most
    218     // tokens produce only one DOM mutation.
    219     typedef WillBeHeapVector<HTMLConstructionSiteTask, 1> TaskQueue;
    220 
    221     void setCompatibilityMode(Document::CompatibilityMode);
    222     void setCompatibilityModeFromDoctype(const String& name, const String& publicId, const String& systemId);
    223 
    224     void attachLater(ContainerNode* parent, PassRefPtrWillBeRawPtr<Node> child, bool selfClosing = false);
    225 
    226     void findFosterSite(HTMLConstructionSiteTask&);
    227 
    228     PassRefPtrWillBeRawPtr<Element> createHTMLElement(AtomicHTMLToken*);
    229     PassRefPtrWillBeRawPtr<Element> createElement(AtomicHTMLToken*, const AtomicString& namespaceURI);
    230 
    231     void mergeAttributesFromTokenIntoElement(AtomicHTMLToken*, Element*);
    232     void dispatchDocumentElementAvailableIfNeeded();
    233 
    234     void executeTask(HTMLConstructionSiteTask&);
    235     void queueTask(const HTMLConstructionSiteTask&);
    236 
    237     RawPtrWillBeMember<Document> m_document;
    238 
    239     // This is the root ContainerNode to which the parser attaches all newly
    240     // constructed nodes. It points to a DocumentFragment when parsing fragments
    241     // and a Document in all other cases.
    242     RawPtrWillBeMember<ContainerNode> m_attachmentRoot;
    243 
    244     RefPtrWillBeMember<HTMLStackItem> m_head;
    245     RefPtrWillBeMember<HTMLFormElement> m_form;
    246     mutable HTMLElementStack m_openElements;
    247     mutable HTMLFormattingElementList m_activeFormattingElements;
    248 
    249     TaskQueue m_taskQueue;
    250 
    251     class PendingText FINAL {
    252         DISALLOW_ALLOCATION();
    253     public:
    254         PendingText()
    255             : whitespaceMode(WhitespaceUnknown)
    256         {
    257         }
    258 
    259         void append(PassRefPtrWillBeRawPtr<ContainerNode> newParent, PassRefPtrWillBeRawPtr<Node> newNextChild, const String& newString, WhitespaceMode newWhitespaceMode)
    260         {
    261             ASSERT(!parent || parent == newParent);
    262             parent = newParent;
    263             ASSERT(!nextChild || nextChild == newNextChild);
    264             nextChild = newNextChild;
    265             stringBuilder.append(newString);
    266             whitespaceMode = std::min(whitespaceMode, newWhitespaceMode);
    267         }
    268 
    269         void swap(PendingText& other)
    270         {
    271             std::swap(whitespaceMode, other.whitespaceMode);
    272             parent.swap(other.parent);
    273             nextChild.swap(other.nextChild);
    274             stringBuilder.swap(other.stringBuilder);
    275         }
    276 
    277         void discard()
    278         {
    279             PendingText discardedText;
    280             swap(discardedText);
    281         }
    282 
    283         bool isEmpty()
    284         {
    285             // When the stringbuilder is empty, the parent and whitespace should also be "empty".
    286             ASSERT(stringBuilder.isEmpty() == !parent);
    287             ASSERT(!stringBuilder.isEmpty() || !nextChild);
    288             ASSERT(!stringBuilder.isEmpty() || (whitespaceMode == WhitespaceUnknown));
    289             return stringBuilder.isEmpty();
    290         }
    291 
    292         void trace(Visitor*);
    293 
    294         RefPtrWillBeMember<ContainerNode> parent;
    295         RefPtrWillBeMember<Node> nextChild;
    296         StringBuilder stringBuilder;
    297         WhitespaceMode whitespaceMode;
    298     };
    299 
    300     PendingText m_pendingText;
    301 
    302     ParserContentPolicy m_parserContentPolicy;
    303     bool m_isParsingFragment;
    304 
    305     // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-intable
    306     // In the "in table" insertion mode, we sometimes get into a state where
    307     // "whenever a node would be inserted into the current node, it must instead
    308     // be foster parented."  This flag tracks whether we're in that state.
    309     bool m_redirectAttachToFosterParent;
    310 
    311     bool m_inQuirksMode;
    312 };
    313 
    314 } // namespace WebCore
    315 
    316 #endif
    317