Home | History | Annotate | Download | only in parser
      1 /*
      2  * Copyright (C) 2010 Google, Inc. All Rights Reserved.
      3  *
      4  * Redistribution and use in source and binary forms, with or without
      5  * modification, are permitted provided that the following conditions
      6  * are met:
      7  * 1. Redistributions of source code must retain the above copyright
      8  *    notice, this list of conditions and the following disclaimer.
      9  * 2. Redistributions in binary form must reproduce the above copyright
     10  *    notice, this list of conditions and the following disclaimer in the
     11  *    documentation and/or other materials provided with the distribution.
     12  *
     13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
     14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
     17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
     21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     24  */
     25 
     26 #ifndef HTMLDocumentParser_h
     27 #define HTMLDocumentParser_h
     28 
     29 #include "core/dom/ParserContentPolicy.h"
     30 #include "core/dom/ScriptableDocumentParser.h"
     31 #include "core/fetch/ResourceClient.h"
     32 #include "core/frame/UseCounter.h"
     33 #include "core/html/parser/BackgroundHTMLInputStream.h"
     34 #include "core/html/parser/CompactHTMLToken.h"
     35 #include "core/html/parser/HTMLInputStream.h"
     36 #include "core/html/parser/HTMLParserOptions.h"
     37 #include "core/html/parser/HTMLPreloadScanner.h"
     38 #include "core/html/parser/HTMLScriptRunnerHost.h"
     39 #include "core/html/parser/HTMLSourceTracker.h"
     40 #include "core/html/parser/HTMLToken.h"
     41 #include "core/html/parser/HTMLTokenizer.h"
     42 #include "core/html/parser/HTMLTreeBuilderSimulator.h"
     43 #include "core/html/parser/TextResourceDecoder.h"
     44 #include "core/html/parser/XSSAuditor.h"
     45 #include "core/html/parser/XSSAuditorDelegate.h"
     46 #include "platform/text/SegmentedString.h"
     47 #include "wtf/Deque.h"
     48 #include "wtf/OwnPtr.h"
     49 #include "wtf/WeakPtr.h"
     50 #include "wtf/text/TextPosition.h"
     51 
     52 namespace blink {
     53 
     54 class BackgroundHTMLParser;
     55 class CompactHTMLToken;
     56 class Document;
     57 class DocumentFragment;
     58 class HTMLDocument;
     59 class HTMLParserScheduler;
     60 class HTMLScriptRunner;
     61 class HTMLTreeBuilder;
     62 class HTMLResourcePreloader;
     63 class ScriptController;
     64 class ScriptSourceCode;
     65 
     66 class PumpSession;
     67 
     68 class HTMLDocumentParser :  public ScriptableDocumentParser, private HTMLScriptRunnerHost {
     69     WTF_MAKE_FAST_ALLOCATED_WILL_BE_REMOVED;
     70     WILL_BE_USING_GARBAGE_COLLECTED_MIXIN(HTMLDocumentParser);
     71 public:
     72     static PassRefPtrWillBeRawPtr<HTMLDocumentParser> create(HTMLDocument& document, bool reportErrors)
     73     {
     74         return adoptRefWillBeNoop(new HTMLDocumentParser(document, reportErrors));
     75     }
     76     virtual ~HTMLDocumentParser();
     77     virtual void trace(Visitor*) OVERRIDE;
     78 
     79     // Exposed for HTMLParserScheduler
     80     void resumeParsingAfterYield();
     81 
     82     static void parseDocumentFragment(const String&, DocumentFragment*, Element* contextElement, ParserContentPolicy = AllowScriptingContent);
     83 
     84     HTMLTokenizer* tokenizer() const { return m_tokenizer.get(); }
     85 
     86     virtual TextPosition textPosition() const OVERRIDE FINAL;
     87     virtual OrdinalNumber lineNumber() const OVERRIDE FINAL;
     88 
     89     virtual void suspendScheduledTasks() OVERRIDE FINAL;
     90     virtual void resumeScheduledTasks() OVERRIDE FINAL;
     91 
     92     struct ParsedChunk {
     93         OwnPtr<CompactHTMLTokenStream> tokens;
     94         PreloadRequestStream preloads;
     95         XSSInfoStream xssInfos;
     96         HTMLTokenizer::State tokenizerState;
     97         HTMLTreeBuilderSimulator::State treeBuilderState;
     98         HTMLInputCheckpoint inputCheckpoint;
     99         TokenPreloadScannerCheckpoint preloadScannerCheckpoint;
    100     };
    101     void didReceiveParsedChunkFromBackgroundParser(PassOwnPtr<ParsedChunk>);
    102     void didReceiveEncodingDataFromBackgroundParser(const DocumentEncodingData&);
    103 
    104     virtual void appendBytes(const char* bytes, size_t length) OVERRIDE;
    105     virtual void flush() OVERRIDE FINAL;
    106     virtual void setDecoder(PassOwnPtr<TextResourceDecoder>) OVERRIDE FINAL;
    107 
    108     UseCounter* useCounter() { return UseCounter::getFrom(contextForParsingSession()); }
    109 
    110 protected:
    111     virtual void insert(const SegmentedString&) OVERRIDE FINAL;
    112     virtual void append(PassRefPtr<StringImpl>) OVERRIDE;
    113     virtual void finish() OVERRIDE FINAL;
    114 
    115     HTMLDocumentParser(HTMLDocument&, bool reportErrors);
    116     HTMLDocumentParser(DocumentFragment*, Element* contextElement, ParserContentPolicy);
    117 
    118     HTMLTreeBuilder* treeBuilder() const { return m_treeBuilder.get(); }
    119 
    120     void forcePlaintextForTextDocument();
    121 
    122 private:
    123     static PassRefPtrWillBeRawPtr<HTMLDocumentParser> create(DocumentFragment* fragment, Element* contextElement, ParserContentPolicy parserContentPolicy)
    124     {
    125         return adoptRefWillBeNoop(new HTMLDocumentParser(fragment, contextElement, parserContentPolicy));
    126     }
    127 
    128     // DocumentParser
    129     virtual void pinToMainThread() OVERRIDE FINAL;
    130     virtual void detach() OVERRIDE FINAL;
    131     virtual bool hasInsertionPoint() OVERRIDE FINAL;
    132     virtual bool processingData() const OVERRIDE FINAL;
    133     virtual void prepareToStopParsing() OVERRIDE FINAL;
    134     virtual void stopParsing() OVERRIDE FINAL;
    135     virtual bool isWaitingForScripts() const OVERRIDE FINAL;
    136     virtual bool isExecutingScript() const OVERRIDE FINAL;
    137     virtual void executeScriptsWaitingForResources() OVERRIDE FINAL;
    138 
    139     // HTMLScriptRunnerHost
    140     virtual void notifyScriptLoaded(Resource*) OVERRIDE FINAL;
    141     virtual HTMLInputStream& inputStream() OVERRIDE FINAL { return m_input; }
    142     virtual bool hasPreloadScanner() const OVERRIDE FINAL { return m_preloadScanner.get() && !shouldUseThreading(); }
    143     virtual void appendCurrentInputStreamToPreloadScannerAndScan() OVERRIDE FINAL;
    144 
    145     void startBackgroundParser();
    146     void stopBackgroundParser();
    147     void validateSpeculations(PassOwnPtr<ParsedChunk> lastChunk);
    148     void discardSpeculationsAndResumeFrom(PassOwnPtr<ParsedChunk> lastChunk, PassOwnPtr<HTMLToken>, PassOwnPtr<HTMLTokenizer>);
    149     void processParsedChunkFromBackgroundParser(PassOwnPtr<ParsedChunk>);
    150     void pumpPendingSpeculations();
    151 
    152     Document* contextForParsingSession();
    153 
    154     enum SynchronousMode {
    155         AllowYield,
    156         ForceSynchronous,
    157     };
    158     bool canTakeNextToken(SynchronousMode, PumpSession&);
    159     void pumpTokenizer(SynchronousMode);
    160     void pumpTokenizerIfPossible(SynchronousMode);
    161     void constructTreeFromHTMLToken(HTMLToken&);
    162     void constructTreeFromCompactHTMLToken(const CompactHTMLToken&);
    163 
    164     void runScriptsForPausedTreeBuilder();
    165     void resumeParsingAfterScriptExecution();
    166 
    167     void attemptToEnd();
    168     void endIfDelayed();
    169     void attemptToRunDeferredScriptsAndEnd();
    170     void end();
    171 
    172     bool shouldUseThreading() const { return m_options.useThreading && !m_isPinnedToMainThread; }
    173 
    174     bool isParsingFragment() const;
    175     bool isScheduledForResume() const;
    176     bool inPumpSession() const { return m_pumpSessionNestingLevel > 0; }
    177     bool shouldDelayEnd() const { return inPumpSession() || isWaitingForScripts() || isScheduledForResume() || isExecutingScript(); }
    178 
    179     HTMLToken& token() { return *m_token; }
    180 
    181     HTMLParserOptions m_options;
    182     HTMLInputStream m_input;
    183 
    184     OwnPtr<HTMLToken> m_token;
    185     OwnPtr<HTMLTokenizer> m_tokenizer;
    186     OwnPtrWillBeMember<HTMLScriptRunner> m_scriptRunner;
    187     OwnPtrWillBeMember<HTMLTreeBuilder> m_treeBuilder;
    188     OwnPtr<HTMLPreloadScanner> m_preloadScanner;
    189     OwnPtr<HTMLPreloadScanner> m_insertionPreloadScanner;
    190     OwnPtr<HTMLParserScheduler> m_parserScheduler;
    191     HTMLSourceTracker m_sourceTracker;
    192     TextPosition m_textPosition;
    193     XSSAuditor m_xssAuditor;
    194     XSSAuditorDelegate m_xssAuditorDelegate;
    195 
    196     // FIXME: m_lastChunkBeforeScript, m_tokenizer, m_token, and m_input should be combined into a single state object
    197     // so they can be set and cleared together and passed between threads together.
    198     OwnPtr<ParsedChunk> m_lastChunkBeforeScript;
    199     Deque<OwnPtr<ParsedChunk> > m_speculations;
    200     WeakPtrFactory<HTMLDocumentParser> m_weakFactory;
    201     WeakPtr<BackgroundHTMLParser> m_backgroundParser;
    202     OwnPtrWillBeMember<HTMLResourcePreloader> m_preloader;
    203 
    204     bool m_isPinnedToMainThread;
    205     bool m_endWasDelayed;
    206     bool m_haveBackgroundParser;
    207     unsigned m_pumpSessionNestingLevel;
    208 };
    209 
    210 }
    211 
    212 #endif
    213