Home | History | Annotate | Download | only in parser
      1 /*
      2  * Copyright (C) 2010 Google, Inc. All Rights Reserved.
      3  * Copyright (C) 2011 Apple Inc. All rights reserved.
      4  *
      5  * Redistribution and use in source and binary forms, with or without
      6  * modification, are permitted provided that the following conditions
      7  * are met:
      8  * 1. Redistributions of source code must retain the above copyright
      9  *    notice, this list of conditions and the following disclaimer.
     10  * 2. Redistributions in binary form must reproduce the above copyright
     11  *    notice, this list of conditions and the following disclaimer in the
     12  *    documentation and/or other materials provided with the distribution.
     13  *
     14  * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
     15  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     17  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL GOOGLE INC. OR
     18  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     19  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     20  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     21  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
     22  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     24  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     25  */
     26 
     27 #ifndef HTMLConstructionSite_h
     28 #define HTMLConstructionSite_h
     29 
     30 #include "core/dom/Document.h"
     31 #include "core/dom/ParserContentPolicy.h"
     32 #include "core/html/parser/HTMLElementStack.h"
     33 #include "core/html/parser/HTMLFormattingElementList.h"
     34 #include "wtf/Noncopyable.h"
     35 #include "wtf/PassRefPtr.h"
     36 #include "wtf/RefPtr.h"
     37 #include "wtf/Vector.h"
     38 #include "wtf/text/StringBuilder.h"
     39 
     40 namespace blink {
     41 
     42 struct HTMLConstructionSiteTask {
     43     ALLOW_ONLY_INLINE_ALLOCATION();
     44 public:
     45     enum Operation {
     46         Insert,
     47         InsertText, // Handles possible merging of text nodes.
     48         InsertAlreadyParsedChild, // Insert w/o calling begin/end parsing.
     49         Reparent,
     50         TakeAllChildren,
     51     };
     52 
     53     explicit HTMLConstructionSiteTask(Operation op)
     54         : operation(op)
     55         , selfClosing(false)
     56     {
     57     }
     58 
     59     void trace(Visitor* visitor)
     60     {
     61         visitor->trace(parent);
     62         visitor->trace(nextChild);
     63         visitor->trace(child);
     64     }
     65 
     66     ContainerNode* oldParent()
     67     {
     68         // It's sort of ugly, but we store the |oldParent| in the |child| field
     69         // of the task so that we don't bloat the HTMLConstructionSiteTask
     70         // object in the common case of the Insert operation.
     71         return toContainerNode(child.get());
     72     }
     73 
     74     Operation operation;
     75     RefPtrWillBeMember<ContainerNode> parent;
     76     RefPtrWillBeMember<Node> nextChild;
     77     RefPtrWillBeMember<Node> child;
     78     bool selfClosing;
     79 };
     80 
     81 } // namespace blink
     82 
     83 WTF_ALLOW_MOVE_INIT_AND_COMPARE_WITH_MEM_FUNCTIONS(blink::HTMLConstructionSiteTask);
     84 
     85 namespace blink {
     86 
     87 // Note: These are intentionally ordered so that when we concatonate
     88 // strings and whitespaces the resulting whitespace is ws = min(ws1, ws2).
     89 enum WhitespaceMode {
     90     WhitespaceUnknown,
     91     NotAllWhitespace,
     92     AllWhitespace,
     93 };
     94 
     95 enum FlushMode {
     96     // Flush pending text. Flush queued tasks.
     97     FlushAlways,
     98 
     99     // Flush pending text if node has length limit. Flush queued tasks.
    100     FlushIfAtTextLimit,
    101 };
    102 
    103 class AtomicHTMLToken;
    104 class Document;
    105 class Element;
    106 class HTMLFormElement;
    107 
    108 class HTMLConstructionSite FINAL {
    109     WTF_MAKE_NONCOPYABLE(HTMLConstructionSite);
    110     DISALLOW_ALLOCATION();
    111 public:
    112     HTMLConstructionSite(Document*, ParserContentPolicy);
    113     HTMLConstructionSite(DocumentFragment*, ParserContentPolicy);
    114     ~HTMLConstructionSite();
    115     void trace(Visitor*);
    116 
    117     void detach();
    118 
    119     // executeQueuedTasks empties the queue but does not flush pending text.
    120     // NOTE: Possible reentrancy via JavaScript execution.
    121     void executeQueuedTasks();
    122 
    123     // flushPendingText turns pending text into queued Text insertions, but does not execute them.
    124     void flushPendingText(FlushMode);
    125 
    126     // Called before every token in HTMLTreeBuilder::processToken, thus inlined:
    127     void flush(FlushMode mode)
    128     {
    129         if (!hasPendingTasks())
    130             return;
    131         flushPendingText(mode);
    132         executeQueuedTasks(); // NOTE: Possible reentrancy via JavaScript execution.
    133         ASSERT(mode == FlushIfAtTextLimit || !hasPendingTasks());
    134     }
    135 
    136     bool hasPendingTasks()
    137     {
    138         return !m_pendingText.isEmpty() || !m_taskQueue.isEmpty();
    139     }
    140 
    141     void setDefaultCompatibilityMode();
    142     void processEndOfFile();
    143     void finishedParsing();
    144 
    145     void insertDoctype(AtomicHTMLToken*);
    146     void insertComment(AtomicHTMLToken*);
    147     void insertCommentOnDocument(AtomicHTMLToken*);
    148     void insertCommentOnHTMLHtmlElement(AtomicHTMLToken*);
    149     void insertHTMLElement(AtomicHTMLToken*);
    150     void insertSelfClosingHTMLElement(AtomicHTMLToken*);
    151     void insertFormattingElement(AtomicHTMLToken*);
    152     void insertHTMLHeadElement(AtomicHTMLToken*);
    153     void insertHTMLBodyElement(AtomicHTMLToken*);
    154     void insertHTMLFormElement(AtomicHTMLToken*, bool isDemoted = false);
    155     void insertScriptElement(AtomicHTMLToken*);
    156     void insertTextNode(const String&, WhitespaceMode = WhitespaceUnknown);
    157     void insertForeignElement(AtomicHTMLToken*, const AtomicString& namespaceURI);
    158 
    159     void insertHTMLHtmlStartTagBeforeHTML(AtomicHTMLToken*);
    160     void insertHTMLHtmlStartTagInBody(AtomicHTMLToken*);
    161     void insertHTMLBodyStartTagInBody(AtomicHTMLToken*);
    162 
    163     void reparent(HTMLElementStack::ElementRecord* newParent, HTMLElementStack::ElementRecord* child);
    164     void reparent(HTMLElementStack::ElementRecord* newParent, HTMLStackItem* child);
    165     // insertAlreadyParsedChild assumes that |child| has already been parsed (i.e., we're just
    166     // moving it around in the tree rather than parsing it for the first time). That means
    167     // this function doesn't call beginParsingChildren / finishParsingChildren.
    168     void insertAlreadyParsedChild(HTMLStackItem* newParent, HTMLElementStack::ElementRecord* child);
    169     void takeAllChildren(HTMLStackItem* newParent, HTMLElementStack::ElementRecord* oldParent);
    170 
    171     PassRefPtrWillBeRawPtr<HTMLStackItem> createElementFromSavedToken(HTMLStackItem*);
    172 
    173     bool shouldFosterParent() const;
    174     void fosterParent(PassRefPtrWillBeRawPtr<Node>);
    175 
    176     bool indexOfFirstUnopenFormattingElement(unsigned& firstUnopenElementIndex) const;
    177     void reconstructTheActiveFormattingElements();
    178 
    179     void generateImpliedEndTags();
    180     void generateImpliedEndTagsWithExclusion(const AtomicString& tagName);
    181 
    182     bool inQuirksMode();
    183 
    184     bool isEmpty() const { return !m_openElements.stackDepth(); }
    185     HTMLElementStack::ElementRecord* currentElementRecord() const { return m_openElements.topRecord(); }
    186     Element* currentElement() const { return m_openElements.top(); }
    187     ContainerNode* currentNode() const { return m_openElements.topNode(); }
    188     HTMLStackItem* currentStackItem() const { return m_openElements.topStackItem(); }
    189     HTMLStackItem* oneBelowTop() const { return m_openElements.oneBelowTop(); }
    190     Document& ownerDocumentForCurrentNode();
    191     HTMLElementStack* openElements() const { return &m_openElements; }
    192     HTMLFormattingElementList* activeFormattingElements() const { return &m_activeFormattingElements; }
    193     bool currentIsRootNode() { return m_openElements.topNode() == m_openElements.rootNode(); }
    194 
    195     Element* head() const { return m_head->element(); }
    196     HTMLStackItem* headStackItem() const { return m_head.get(); }
    197 
    198     void setForm(HTMLFormElement*);
    199     HTMLFormElement* form() const { return m_form.get(); }
    200     PassRefPtrWillBeRawPtr<HTMLFormElement> takeForm();
    201 
    202     ParserContentPolicy parserContentPolicy() { return m_parserContentPolicy; }
    203 
    204     class RedirectToFosterParentGuard {
    205         WTF_MAKE_NONCOPYABLE(RedirectToFosterParentGuard);
    206     public:
    207         RedirectToFosterParentGuard(HTMLConstructionSite& tree)
    208             : m_tree(tree)
    209             , m_wasRedirectingBefore(tree.m_redirectAttachToFosterParent)
    210         {
    211             m_tree.m_redirectAttachToFosterParent = true;
    212         }
    213 
    214         ~RedirectToFosterParentGuard()
    215         {
    216             m_tree.m_redirectAttachToFosterParent = m_wasRedirectingBefore;
    217         }
    218 
    219     private:
    220         HTMLConstructionSite& m_tree;
    221         bool m_wasRedirectingBefore;
    222     };
    223 
    224 private:
    225     // In the common case, this queue will have only one task because most
    226     // tokens produce only one DOM mutation.
    227     typedef WillBeHeapVector<HTMLConstructionSiteTask, 1> TaskQueue;
    228 
    229     void setCompatibilityMode(Document::CompatibilityMode);
    230     void setCompatibilityModeFromDoctype(const String& name, const String& publicId, const String& systemId);
    231 
    232     void attachLater(ContainerNode* parent, PassRefPtrWillBeRawPtr<Node> child, bool selfClosing = false);
    233 
    234     void findFosterSite(HTMLConstructionSiteTask&);
    235 
    236     PassRefPtrWillBeRawPtr<HTMLElement> createHTMLElement(AtomicHTMLToken*);
    237     PassRefPtrWillBeRawPtr<Element> createElement(AtomicHTMLToken*, const AtomicString& namespaceURI);
    238 
    239     void mergeAttributesFromTokenIntoElement(AtomicHTMLToken*, Element*);
    240     void dispatchDocumentElementAvailableIfNeeded();
    241 
    242     void executeTask(HTMLConstructionSiteTask&);
    243     void queueTask(const HTMLConstructionSiteTask&);
    244 
    245     RawPtrWillBeMember<Document> m_document;
    246 
    247     // This is the root ContainerNode to which the parser attaches all newly
    248     // constructed nodes. It points to a DocumentFragment when parsing fragments
    249     // and a Document in all other cases.
    250     RawPtrWillBeMember<ContainerNode> m_attachmentRoot;
    251 
    252     RefPtrWillBeMember<HTMLStackItem> m_head;
    253     RefPtrWillBeMember<HTMLFormElement> m_form;
    254     mutable HTMLElementStack m_openElements;
    255     mutable HTMLFormattingElementList m_activeFormattingElements;
    256 
    257     TaskQueue m_taskQueue;
    258 
    259     class PendingText FINAL {
    260         DISALLOW_ALLOCATION();
    261     public:
    262         PendingText()
    263             : whitespaceMode(WhitespaceUnknown)
    264         {
    265         }
    266 
    267         void append(PassRefPtrWillBeRawPtr<ContainerNode> newParent, PassRefPtrWillBeRawPtr<Node> newNextChild, const String& newString, WhitespaceMode newWhitespaceMode)
    268         {
    269             ASSERT(!parent || parent == newParent);
    270             parent = newParent;
    271             ASSERT(!nextChild || nextChild == newNextChild);
    272             nextChild = newNextChild;
    273             stringBuilder.append(newString);
    274             whitespaceMode = std::min(whitespaceMode, newWhitespaceMode);
    275         }
    276 
    277         void swap(PendingText& other)
    278         {
    279             std::swap(whitespaceMode, other.whitespaceMode);
    280             parent.swap(other.parent);
    281             nextChild.swap(other.nextChild);
    282             stringBuilder.swap(other.stringBuilder);
    283         }
    284 
    285         void discard()
    286         {
    287             PendingText discardedText;
    288             swap(discardedText);
    289         }
    290 
    291         bool isEmpty()
    292         {
    293             // When the stringbuilder is empty, the parent and whitespace should also be "empty".
    294             ASSERT(stringBuilder.isEmpty() == !parent);
    295             ASSERT(!stringBuilder.isEmpty() || !nextChild);
    296             ASSERT(!stringBuilder.isEmpty() || (whitespaceMode == WhitespaceUnknown));
    297             return stringBuilder.isEmpty();
    298         }
    299 
    300         void trace(Visitor*);
    301 
    302         RefPtrWillBeMember<ContainerNode> parent;
    303         RefPtrWillBeMember<Node> nextChild;
    304         StringBuilder stringBuilder;
    305         WhitespaceMode whitespaceMode;
    306     };
    307 
    308     PendingText m_pendingText;
    309 
    310     ParserContentPolicy m_parserContentPolicy;
    311     bool m_isParsingFragment;
    312 
    313     // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-intable
    314     // In the "in table" insertion mode, we sometimes get into a state where
    315     // "whenever a node would be inserted into the current node, it must instead
    316     // be foster parented."  This flag tracks whether we're in that state.
    317     bool m_redirectAttachToFosterParent;
    318 
    319     bool m_inQuirksMode;
    320 };
    321 
    322 } // namespace blink
    323 
    324 #endif
    325