Home | History | Annotate | Download | only in parser
      1 /*
      2  * Copyright (C) 2013 Google, Inc. All Rights Reserved.
      3  *
      4  * Redistribution and use in source and binary forms, with or without
      5  * modification, are permitted provided that the following conditions
      6  * are met:
      7  * 1. Redistributions of source code must retain the above copyright
      8  *    notice, this list of conditions and the following disclaimer.
      9  * 2. Redistributions in binary form must reproduce the above copyright
     10  *    notice, this list of conditions and the following disclaimer in the
     11  *    documentation and/or other materials provided with the distribution.
     12  *
     13  * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
     14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL GOOGLE INC. OR
     17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
     21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     24  */
     25 
     26 #ifndef BackgroundHTMLParser_h
     27 #define BackgroundHTMLParser_h
     28 
     29 #include "core/dom/DocumentEncodingData.h"
     30 #include "core/html/parser/BackgroundHTMLInputStream.h"
     31 #include "core/html/parser/CompactHTMLToken.h"
     32 #include "core/html/parser/HTMLParserOptions.h"
     33 #include "core/html/parser/HTMLPreloadScanner.h"
     34 #include "core/html/parser/HTMLSourceTracker.h"
     35 #include "core/html/parser/HTMLTreeBuilderSimulator.h"
     36 #include "core/html/parser/TextResourceDecoder.h"
     37 #include "core/html/parser/XSSAuditorDelegate.h"
     38 #include "wtf/PassOwnPtr.h"
     39 #include "wtf/WeakPtr.h"
     40 
     41 namespace WebCore {
     42 
     43 class HTMLDocumentParser;
     44 class SharedBuffer;
     45 class XSSAuditor;
     46 
     47 class BackgroundHTMLParser {
     48     WTF_MAKE_FAST_ALLOCATED;
     49 public:
     50     struct Configuration {
     51         HTMLParserOptions options;
     52         WeakPtr<HTMLDocumentParser> parser;
     53         OwnPtr<XSSAuditor> xssAuditor;
     54         OwnPtr<TokenPreloadScanner> preloadScanner;
     55         OwnPtr<TextResourceDecoder> decoder;
     56     };
     57 
     58     static void start(PassRefPtr<WeakReference<BackgroundHTMLParser> >, PassOwnPtr<Configuration>);
     59 
     60     struct Checkpoint {
     61         WeakPtr<HTMLDocumentParser> parser;
     62         OwnPtr<HTMLToken> token;
     63         OwnPtr<HTMLTokenizer> tokenizer;
     64         HTMLTreeBuilderSimulator::State treeBuilderState;
     65         HTMLInputCheckpoint inputCheckpoint;
     66         TokenPreloadScannerCheckpoint preloadScannerCheckpoint;
     67         String unparsedInput;
     68     };
     69 
     70     void appendRawBytesFromParserThread(const char* data, int dataLength);
     71 
     72     void appendRawBytesFromMainThread(PassOwnPtr<Vector<char> >);
     73     void setDecoder(PassOwnPtr<TextResourceDecoder>);
     74     void flush();
     75     void resumeFrom(PassOwnPtr<Checkpoint>);
     76     void startedChunkWithCheckpoint(HTMLInputCheckpoint);
     77     void finish();
     78     void stop();
     79 
     80     void forcePlaintextForTextDocument();
     81 
     82 private:
     83     BackgroundHTMLParser(PassRefPtr<WeakReference<BackgroundHTMLParser> >, PassOwnPtr<Configuration>);
     84     ~BackgroundHTMLParser();
     85 
     86     void appendDecodedBytes(const String&);
     87     void markEndOfFile();
     88     void pumpTokenizer();
     89     void sendTokensToMainThread();
     90     void updateDocument(const String& decodedData);
     91 
     92     WeakPtrFactory<BackgroundHTMLParser> m_weakFactory;
     93     BackgroundHTMLInputStream m_input;
     94     HTMLSourceTracker m_sourceTracker;
     95     OwnPtr<HTMLToken> m_token;
     96     OwnPtr<HTMLTokenizer> m_tokenizer;
     97     HTMLTreeBuilderSimulator m_treeBuilderSimulator;
     98     HTMLParserOptions m_options;
     99     WeakPtr<HTMLDocumentParser> m_parser;
    100 
    101     OwnPtr<CompactHTMLTokenStream> m_pendingTokens;
    102     PreloadRequestStream m_pendingPreloads;
    103     XSSInfoStream m_pendingXSSInfos;
    104 
    105     OwnPtr<XSSAuditor> m_xssAuditor;
    106     OwnPtr<TokenPreloadScanner> m_preloadScanner;
    107     OwnPtr<TextResourceDecoder> m_decoder;
    108     DocumentEncodingData m_lastSeenEncodingData;
    109 };
    110 
    111 }
    112 
    113 #endif
    114