Home | History | Annotate | Download | only in parser
      1 /*
      2  * Copyright (C) 2010 Google, Inc. All Rights Reserved.
      3  *
      4  * Redistribution and use in source and binary forms, with or without
      5  * modification, are permitted provided that the following conditions
      6  * are met:
      7  * 1. Redistributions of source code must retain the above copyright
      8  *    notice, this list of conditions and the following disclaimer.
      9  * 2. Redistributions in binary form must reproduce the above copyright
     10  *    notice, this list of conditions and the following disclaimer in the
     11  *    documentation and/or other materials provided with the distribution.
     12  *
     13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
     14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
     17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
     21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     24  */
     25 
     26 #ifndef HTMLDocumentParser_h
     27 #define HTMLDocumentParser_h
     28 
     29 #include "core/dom/ParserContentPolicy.h"
     30 #include "core/dom/ScriptableDocumentParser.h"
     31 #include "core/fetch/ResourceClient.h"
     32 #include "core/frame/UseCounter.h"
     33 #include "core/html/parser/BackgroundHTMLInputStream.h"
     34 #include "core/html/parser/CompactHTMLToken.h"
     35 #include "core/html/parser/HTMLInputStream.h"
     36 #include "core/html/parser/HTMLParserOptions.h"
     37 #include "core/html/parser/HTMLPreloadScanner.h"
     38 #include "core/html/parser/HTMLScriptRunnerHost.h"
     39 #include "core/html/parser/HTMLSourceTracker.h"
     40 #include "core/html/parser/HTMLToken.h"
     41 #include "core/html/parser/HTMLTokenizer.h"
     42 #include "core/html/parser/HTMLTreeBuilderSimulator.h"
     43 #include "core/html/parser/XSSAuditor.h"
     44 #include "core/html/parser/XSSAuditorDelegate.h"
     45 #include "platform/text/SegmentedString.h"
     46 #include "wtf/Deque.h"
     47 #include "wtf/OwnPtr.h"
     48 #include "wtf/WeakPtr.h"
     49 #include "wtf/text/TextPosition.h"
     50 
     51 namespace WebCore {
     52 
     53 class BackgroundHTMLParser;
     54 class CompactHTMLToken;
     55 class Document;
     56 class DocumentFragment;
     57 class HTMLDocument;
     58 class HTMLParserScheduler;
     59 class HTMLScriptRunner;
     60 class HTMLTreeBuilder;
     61 class HTMLResourcePreloader;
     62 class ScriptController;
     63 class ScriptSourceCode;
     64 
     65 class PumpSession;
     66 
     67 class HTMLDocumentParser :  public ScriptableDocumentParser, HTMLScriptRunnerHost, ResourceClient {
     68     WTF_MAKE_FAST_ALLOCATED;
     69 public:
     70     static PassRefPtr<HTMLDocumentParser> create(HTMLDocument* document, bool reportErrors)
     71     {
     72         return adoptRef(new HTMLDocumentParser(document, reportErrors));
     73     }
     74     virtual ~HTMLDocumentParser();
     75 
     76     // Exposed for HTMLParserScheduler
     77     void resumeParsingAfterYield();
     78 
     79     static void parseDocumentFragment(const String&, DocumentFragment*, Element* contextElement, ParserContentPolicy = AllowScriptingContent);
     80 
     81     HTMLTokenizer* tokenizer() const { return m_tokenizer.get(); }
     82 
     83     virtual TextPosition textPosition() const;
     84     virtual OrdinalNumber lineNumber() const;
     85 
     86     virtual void suspendScheduledTasks();
     87     virtual void resumeScheduledTasks();
     88 
     89     struct ParsedChunk {
     90         OwnPtr<CompactHTMLTokenStream> tokens;
     91         PreloadRequestStream preloads;
     92         XSSInfoStream xssInfos;
     93         HTMLTokenizer::State tokenizerState;
     94         HTMLTreeBuilderSimulator::State treeBuilderState;
     95         HTMLInputCheckpoint inputCheckpoint;
     96         TokenPreloadScannerCheckpoint preloadScannerCheckpoint;
     97     };
     98     void didReceiveParsedChunkFromBackgroundParser(PassOwnPtr<ParsedChunk>);
     99 
    100     UseCounter* useCounter() { return UseCounter::getFrom(contextForParsingSession()); }
    101 
    102 protected:
    103     virtual void insert(const SegmentedString&) OVERRIDE;
    104     virtual void append(PassRefPtr<StringImpl>) OVERRIDE;
    105     virtual void finish() OVERRIDE;
    106 
    107     HTMLDocumentParser(HTMLDocument*, bool reportErrors);
    108     HTMLDocumentParser(DocumentFragment*, Element* contextElement, ParserContentPolicy);
    109 
    110     HTMLTreeBuilder* treeBuilder() const { return m_treeBuilder.get(); }
    111 
    112     void forcePlaintextForTextDocument();
    113 
    114 private:
    115     static PassRefPtr<HTMLDocumentParser> create(DocumentFragment* fragment, Element* contextElement, ParserContentPolicy parserContentPolicy)
    116     {
    117         return adoptRef(new HTMLDocumentParser(fragment, contextElement, parserContentPolicy));
    118     }
    119 
    120     // DocumentParser
    121     virtual void pinToMainThread() OVERRIDE;
    122     virtual void detach() OVERRIDE;
    123     virtual bool hasInsertionPoint() OVERRIDE;
    124     virtual bool processingData() const OVERRIDE;
    125     virtual void prepareToStopParsing() OVERRIDE;
    126     virtual void stopParsing() OVERRIDE;
    127     virtual bool isWaitingForScripts() const OVERRIDE;
    128     virtual bool isExecutingScript() const OVERRIDE;
    129     virtual void executeScriptsWaitingForResources() OVERRIDE;
    130 
    131     // HTMLScriptRunnerHost
    132     virtual void watchForLoad(Resource*) OVERRIDE;
    133     virtual void stopWatchingForLoad(Resource*) OVERRIDE;
    134     virtual HTMLInputStream& inputStream() { return m_input; }
    135     virtual bool hasPreloadScanner() const { return m_preloadScanner.get() && !shouldUseThreading(); }
    136     virtual void appendCurrentInputStreamToPreloadScannerAndScan() OVERRIDE;
    137 
    138     // ResourceClient
    139     virtual void notifyFinished(Resource*);
    140 
    141     void startBackgroundParser();
    142     void stopBackgroundParser();
    143     void validateSpeculations(PassOwnPtr<ParsedChunk> lastChunk);
    144     void discardSpeculationsAndResumeFrom(PassOwnPtr<ParsedChunk> lastChunk, PassOwnPtr<HTMLToken>, PassOwnPtr<HTMLTokenizer>);
    145     void processParsedChunkFromBackgroundParser(PassOwnPtr<ParsedChunk>);
    146     void pumpPendingSpeculations();
    147 
    148     Document* contextForParsingSession();
    149 
    150     enum SynchronousMode {
    151         AllowYield,
    152         ForceSynchronous,
    153     };
    154     bool canTakeNextToken(SynchronousMode, PumpSession&);
    155     void pumpTokenizer(SynchronousMode);
    156     void pumpTokenizerIfPossible(SynchronousMode);
    157     void constructTreeFromHTMLToken(HTMLToken&);
    158     void constructTreeFromCompactHTMLToken(const CompactHTMLToken&);
    159 
    160     void runScriptsForPausedTreeBuilder();
    161     void resumeParsingAfterScriptExecution();
    162 
    163     void attemptToEnd();
    164     void endIfDelayed();
    165     void attemptToRunDeferredScriptsAndEnd();
    166     void end();
    167 
    168     bool shouldUseThreading() const { return m_options.useThreading && !m_isPinnedToMainThread; }
    169 
    170     bool isParsingFragment() const;
    171     bool isScheduledForResume() const;
    172     bool inPumpSession() const { return m_pumpSessionNestingLevel > 0; }
    173     bool shouldDelayEnd() const { return inPumpSession() || isWaitingForScripts() || isScheduledForResume() || isExecutingScript(); }
    174 
    175     HTMLToken& token() { return *m_token; }
    176 
    177     HTMLParserOptions m_options;
    178     HTMLInputStream m_input;
    179 
    180     OwnPtr<HTMLToken> m_token;
    181     OwnPtr<HTMLTokenizer> m_tokenizer;
    182     OwnPtr<HTMLScriptRunner> m_scriptRunner;
    183     OwnPtr<HTMLTreeBuilder> m_treeBuilder;
    184     OwnPtr<HTMLPreloadScanner> m_preloadScanner;
    185     OwnPtr<HTMLPreloadScanner> m_insertionPreloadScanner;
    186     OwnPtr<HTMLParserScheduler> m_parserScheduler;
    187     HTMLSourceTracker m_sourceTracker;
    188     TextPosition m_textPosition;
    189     XSSAuditor m_xssAuditor;
    190     XSSAuditorDelegate m_xssAuditorDelegate;
    191 
    192     // FIXME: m_lastChunkBeforeScript, m_tokenizer, m_token, and m_input should be combined into a single state object
    193     // so they can be set and cleared together and passed between threads together.
    194     OwnPtr<ParsedChunk> m_lastChunkBeforeScript;
    195     Deque<OwnPtr<ParsedChunk> > m_speculations;
    196     WeakPtrFactory<HTMLDocumentParser> m_weakFactory;
    197     WeakPtr<BackgroundHTMLParser> m_backgroundParser;
    198     OwnPtr<HTMLResourcePreloader> m_preloader;
    199 
    200     bool m_isPinnedToMainThread;
    201     bool m_endWasDelayed;
    202     bool m_haveBackgroundParser;
    203     unsigned m_pumpSessionNestingLevel;
    204 };
    205 
    206 }
    207 
    208 #endif
    209