Home | History | Annotate | Download | only in parser
      1 /*
      2  * Copyright (C) 2010 Google, Inc. All Rights Reserved.
      3  *
      4  * Redistribution and use in source and binary forms, with or without
      5  * modification, are permitted provided that the following conditions
      6  * are met:
      7  * 1. Redistributions of source code must retain the above copyright
      8  *    notice, this list of conditions and the following disclaimer.
      9  * 2. Redistributions in binary form must reproduce the above copyright
     10  *    notice, this list of conditions and the following disclaimer in the
     11  *    documentation and/or other materials provided with the distribution.
     12  *
     13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
     14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
     17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
     21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     24  */
     25 
     26 #ifndef HTMLDocumentParser_h
     27 #define HTMLDocumentParser_h
     28 
     29 #include "core/dom/ParserContentPolicy.h"
     30 #include "core/dom/ScriptableDocumentParser.h"
     31 #include "core/html/parser/BackgroundHTMLInputStream.h"
     32 #include "core/html/parser/CompactHTMLToken.h"
     33 #include "core/html/parser/HTMLInputStream.h"
     34 #include "core/html/parser/HTMLParserOptions.h"
     35 #include "core/html/parser/HTMLPreloadScanner.h"
     36 #include "core/html/parser/HTMLScriptRunnerHost.h"
     37 #include "core/html/parser/HTMLSourceTracker.h"
     38 #include "core/html/parser/HTMLToken.h"
     39 #include "core/html/parser/HTMLTokenizer.h"
     40 #include "core/html/parser/HTMLTreeBuilderSimulator.h"
     41 #include "core/html/parser/XSSAuditor.h"
     42 #include "core/html/parser/XSSAuditorDelegate.h"
     43 #include "core/loader/cache/ResourceClient.h"
     44 #include "core/platform/text/SegmentedString.h"
     45 #include "wtf/Deque.h"
     46 #include "wtf/OwnPtr.h"
     47 #include "wtf/WeakPtr.h"
     48 #include "wtf/text/TextPosition.h"
     49 
     50 namespace WebCore {
     51 
     52 class BackgroundHTMLParser;
     53 class CompactHTMLToken;
     54 class Document;
     55 class DocumentFragment;
     56 class HTMLDocument;
     57 class HTMLParserScheduler;
     58 class HTMLScriptRunner;
     59 class HTMLTreeBuilder;
     60 class HTMLResourcePreloader;
     61 class ScriptController;
     62 class ScriptSourceCode;
     63 
     64 class PumpSession;
     65 
     66 class HTMLDocumentParser :  public ScriptableDocumentParser, HTMLScriptRunnerHost, ResourceClient {
     67     WTF_MAKE_FAST_ALLOCATED;
     68 public:
     69     static PassRefPtr<HTMLDocumentParser> create(Document* document, bool reportErrors)
     70     {
     71         return adoptRef(new HTMLDocumentParser(document, reportErrors));
     72     }
     73     virtual ~HTMLDocumentParser();
     74 
     75     // Exposed for HTMLParserScheduler
     76     void resumeParsingAfterYield();
     77 
     78     static void parseDocumentFragment(const String&, DocumentFragment*, Element* contextElement, ParserContentPolicy = AllowScriptingContent);
     79 
     80     HTMLTokenizer* tokenizer() const { return m_tokenizer.get(); }
     81 
     82     virtual TextPosition textPosition() const;
     83     virtual OrdinalNumber lineNumber() const;
     84 
     85     virtual void suspendScheduledTasks();
     86     virtual void resumeScheduledTasks();
     87 
     88     struct ParsedChunk {
     89         OwnPtr<CompactHTMLTokenStream> tokens;
     90         PreloadRequestStream preloads;
     91         XSSInfoStream xssInfos;
     92         HTMLTokenizer::State tokenizerState;
     93         HTMLTreeBuilderSimulator::State treeBuilderState;
     94         HTMLInputCheckpoint inputCheckpoint;
     95         TokenPreloadScannerCheckpoint preloadScannerCheckpoint;
     96     };
     97     void didReceiveParsedChunkFromBackgroundParser(PassOwnPtr<ParsedChunk>);
     98 
     99 protected:
    100     virtual void insert(const SegmentedString&) OVERRIDE;
    101     virtual void append(PassRefPtr<StringImpl>) OVERRIDE;
    102     virtual void finish() OVERRIDE;
    103 
    104     HTMLDocumentParser(Document*, bool reportErrors);
    105     HTMLDocumentParser(DocumentFragment*, Element* contextElement, ParserContentPolicy);
    106 
    107     HTMLTreeBuilder* treeBuilder() const { return m_treeBuilder.get(); }
    108 
    109     void forcePlaintextForTextDocument();
    110 
    111 private:
    112     static PassRefPtr<HTMLDocumentParser> create(DocumentFragment* fragment, Element* contextElement, ParserContentPolicy parserContentPolicy)
    113     {
    114         return adoptRef(new HTMLDocumentParser(fragment, contextElement, parserContentPolicy));
    115     }
    116 
    117     // DocumentParser
    118     virtual void pinToMainThread() OVERRIDE;
    119     virtual void detach() OVERRIDE;
    120     virtual bool hasInsertionPoint() OVERRIDE;
    121     virtual bool processingData() const OVERRIDE;
    122     virtual void prepareToStopParsing() OVERRIDE;
    123     virtual void stopParsing() OVERRIDE;
    124     virtual bool isWaitingForScripts() const OVERRIDE;
    125     virtual bool isExecutingScript() const OVERRIDE;
    126     virtual void executeScriptsWaitingForResources() OVERRIDE;
    127 
    128     // HTMLScriptRunnerHost
    129     virtual void watchForLoad(Resource*) OVERRIDE;
    130     virtual void stopWatchingForLoad(Resource*) OVERRIDE;
    131     virtual HTMLInputStream& inputStream() { return m_input; }
    132     virtual bool hasPreloadScanner() const { return m_preloadScanner.get() && !shouldUseThreading(); }
    133     virtual void appendCurrentInputStreamToPreloadScannerAndScan() OVERRIDE;
    134 
    135     // ResourceClient
    136     virtual void notifyFinished(Resource*);
    137 
    138     void startBackgroundParser();
    139     void stopBackgroundParser();
    140     void validateSpeculations(PassOwnPtr<ParsedChunk> lastChunk);
    141     void discardSpeculationsAndResumeFrom(PassOwnPtr<ParsedChunk> lastChunk, PassOwnPtr<HTMLToken>, PassOwnPtr<HTMLTokenizer>);
    142     void processParsedChunkFromBackgroundParser(PassOwnPtr<ParsedChunk>);
    143     void pumpPendingSpeculations();
    144 
    145     Document* contextForParsingSession();
    146 
    147     enum SynchronousMode {
    148         AllowYield,
    149         ForceSynchronous,
    150     };
    151     bool canTakeNextToken(SynchronousMode, PumpSession&);
    152     void pumpTokenizer(SynchronousMode);
    153     void pumpTokenizerIfPossible(SynchronousMode);
    154     void constructTreeFromHTMLToken(HTMLToken&);
    155     void constructTreeFromCompactHTMLToken(const CompactHTMLToken&);
    156 
    157     void runScriptsForPausedTreeBuilder();
    158     void resumeParsingAfterScriptExecution();
    159 
    160     void attemptToEnd();
    161     void endIfDelayed();
    162     void attemptToRunDeferredScriptsAndEnd();
    163     void end();
    164 
    165     bool shouldUseThreading() const { return m_options.useThreading && !m_isPinnedToMainThread; }
    166 
    167     bool isParsingFragment() const;
    168     bool isScheduledForResume() const;
    169     bool inPumpSession() const { return m_pumpSessionNestingLevel > 0; }
    170     bool shouldDelayEnd() const { return inPumpSession() || isWaitingForScripts() || isScheduledForResume() || isExecutingScript(); }
    171 
    172     HTMLToken& token() { return *m_token; }
    173 
    174     HTMLParserOptions m_options;
    175     HTMLInputStream m_input;
    176 
    177     OwnPtr<HTMLToken> m_token;
    178     OwnPtr<HTMLTokenizer> m_tokenizer;
    179     OwnPtr<HTMLScriptRunner> m_scriptRunner;
    180     OwnPtr<HTMLTreeBuilder> m_treeBuilder;
    181     OwnPtr<HTMLPreloadScanner> m_preloadScanner;
    182     OwnPtr<HTMLPreloadScanner> m_insertionPreloadScanner;
    183     OwnPtr<HTMLParserScheduler> m_parserScheduler;
    184     HTMLSourceTracker m_sourceTracker;
    185     TextPosition m_textPosition;
    186     XSSAuditor m_xssAuditor;
    187     XSSAuditorDelegate m_xssAuditorDelegate;
    188 
    189     // FIXME: m_lastChunkBeforeScript, m_tokenizer, m_token, and m_input should be combined into a single state object
    190     // so they can be set and cleared together and passed between threads together.
    191     OwnPtr<ParsedChunk> m_lastChunkBeforeScript;
    192     Deque<OwnPtr<ParsedChunk> > m_speculations;
    193     WeakPtrFactory<HTMLDocumentParser> m_weakFactory;
    194     WeakPtr<BackgroundHTMLParser> m_backgroundParser;
    195     OwnPtr<HTMLResourcePreloader> m_preloader;
    196 
    197     bool m_isPinnedToMainThread;
    198     bool m_endWasDelayed;
    199     bool m_haveBackgroundParser;
    200     unsigned m_pumpSessionNestingLevel;
    201 };
    202 
    203 }
    204 
    205 #endif
    206