Home | History | Annotate | Download | only in parser
      1 /*
      2  * Copyright (C) 2013 Google, Inc. All Rights Reserved.
      3  *
      4  * Redistribution and use in source and binary forms, with or without
      5  * modification, are permitted provided that the following conditions
      6  * are met:
      7  * 1. Redistributions of source code must retain the above copyright
      8  *    notice, this list of conditions and the following disclaimer.
      9  * 2. Redistributions in binary form must reproduce the above copyright
     10  *    notice, this list of conditions and the following disclaimer in the
     11  *    documentation and/or other materials provided with the distribution.
     12  *
     13  * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
     14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL GOOGLE INC. OR
     17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
     21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     24  */
     25 
     26 #ifndef BackgroundHTMLParser_h
     27 #define BackgroundHTMLParser_h
     28 
     29 #include "core/html/parser/BackgroundHTMLInputStream.h"
     30 #include "core/html/parser/CompactHTMLToken.h"
     31 #include "core/html/parser/HTMLParserOptions.h"
     32 #include "core/html/parser/HTMLPreloadScanner.h"
     33 #include "core/html/parser/HTMLSourceTracker.h"
     34 #include "core/html/parser/HTMLToken.h"
     35 #include "core/html/parser/HTMLTokenizer.h"
     36 #include "core/html/parser/HTMLTreeBuilderSimulator.h"
     37 #include "core/html/parser/XSSAuditorDelegate.h"
     38 #include "wtf/PassOwnPtr.h"
     39 #include "wtf/WeakPtr.h"
     40 
     41 namespace WebCore {
     42 
     43 class HTMLDocumentParser;
     44 class XSSAuditor;
     45 
     46 class BackgroundHTMLParser {
     47     WTF_MAKE_FAST_ALLOCATED;
     48 public:
     49     struct Configuration {
     50         HTMLParserOptions options;
     51         WeakPtr<HTMLDocumentParser> parser;
     52         OwnPtr<XSSAuditor> xssAuditor;
     53         OwnPtr<TokenPreloadScanner> preloadScanner;
     54     };
     55 
     56     static void create(PassRefPtr<WeakReference<BackgroundHTMLParser> > reference, PassOwnPtr<Configuration> config)
     57     {
     58         new BackgroundHTMLParser(reference, config);
     59         // Caller must free by calling stop().
     60     }
     61 
     62     struct Checkpoint {
     63         WeakPtr<HTMLDocumentParser> parser;
     64         OwnPtr<HTMLToken> token;
     65         OwnPtr<HTMLTokenizer> tokenizer;
     66         HTMLTreeBuilderSimulator::State treeBuilderState;
     67         HTMLInputCheckpoint inputCheckpoint;
     68         TokenPreloadScannerCheckpoint preloadScannerCheckpoint;
     69         String unparsedInput;
     70     };
     71 
     72     void append(const String&);
     73     void resumeFrom(PassOwnPtr<Checkpoint>);
     74     void startedChunkWithCheckpoint(HTMLInputCheckpoint);
     75     void finish();
     76     void stop();
     77 
     78     void forcePlaintextForTextDocument();
     79 
     80 private:
     81     BackgroundHTMLParser(PassRefPtr<WeakReference<BackgroundHTMLParser> >, PassOwnPtr<Configuration>);
     82 
     83     void markEndOfFile();
     84     void pumpTokenizer();
     85     void sendTokensToMainThread();
     86 
     87     WeakPtrFactory<BackgroundHTMLParser> m_weakFactory;
     88     BackgroundHTMLInputStream m_input;
     89     HTMLSourceTracker m_sourceTracker;
     90     OwnPtr<HTMLToken> m_token;
     91     OwnPtr<HTMLTokenizer> m_tokenizer;
     92     HTMLTreeBuilderSimulator m_treeBuilderSimulator;
     93     HTMLParserOptions m_options;
     94     WeakPtr<HTMLDocumentParser> m_parser;
     95 
     96     OwnPtr<CompactHTMLTokenStream> m_pendingTokens;
     97     PreloadRequestStream m_pendingPreloads;
     98     XSSInfoStream m_pendingXSSInfos;
     99 
    100     OwnPtr<XSSAuditor> m_xssAuditor;
    101     OwnPtr<TokenPreloadScanner> m_preloadScanner;
    102 };
    103 
    104 }
    105 
    106 #endif
    107