Home | History | Annotate | Download | only in parser
      1 /*
      2     Copyright (C) 1999 Lars Knoll (knoll (at) mpi-hd.mpg.de)
      3     Copyright (C) 2006 Alexey Proskuryakov (ap (at) nypop.com)
      4     Copyright (C) 2006, 2008 Apple Inc. All rights reserved.
      5 
      6     This library is free software; you can redistribute it and/or
      7     modify it under the terms of the GNU Library General Public
      8     License as published by the Free Software Foundation; either
      9     version 2 of the License, or (at your option) any later version.
     10 
     11     This library is distributed in the hope that it will be useful,
     12     but WITHOUT ANY WARRANTY; without even the implied warranty of
     13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     14     Library General Public License for more details.
     15 
     16     You should have received a copy of the GNU Library General Public License
     17     along with this library; see the file COPYING.LIB.  If not, write to
     18     the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     19     Boston, MA 02110-1301, USA.
     20 
     21 */
     22 
     23 #ifndef TextResourceDecoder_h
     24 #define TextResourceDecoder_h
     25 
     26 #include "wtf/RefCounted.h"
     27 #include "wtf/text/TextEncoding.h"
     28 
     29 namespace blink {
     30 
     31 class DocumentEncodingData;
     32 class HTMLMetaCharsetParser;
     33 
     34 class TextResourceDecoder {
     35 public:
     36     enum EncodingSource {
     37         DefaultEncoding,
     38         AutoDetectedEncoding,
     39         EncodingFromContentSniffing,
     40         EncodingFromXMLHeader,
     41         EncodingFromMetaTag,
     42         EncodingFromCSSCharset,
     43         EncodingFromHTTPHeader,
     44         UserChosenEncoding,
     45         EncodingFromParentFrame
     46     };
     47 
     48     static PassOwnPtr<TextResourceDecoder> create(const String& mimeType, const WTF::TextEncoding& defaultEncoding = WTF::TextEncoding(), bool usesEncodingDetector = false)
     49     {
     50         return adoptPtr(new TextResourceDecoder(mimeType, defaultEncoding, usesEncodingDetector));
     51     }
     52     ~TextResourceDecoder();
     53 
     54     void setEncoding(const WTF::TextEncoding&, EncodingSource);
     55     const WTF::TextEncoding& encoding() const { return m_encoding; }
     56     bool encodingWasDetectedHeuristically() const
     57     {
     58         return m_source == AutoDetectedEncoding
     59             || m_source == EncodingFromContentSniffing;
     60     }
     61 
     62     String decode(const char* data, size_t length);
     63     String flush();
     64 
     65     void setHintEncoding(const WTF::TextEncoding& encoding)
     66     {
     67         m_hintEncoding = encoding.name();
     68     }
     69 
     70     void useLenientXMLDecoding() { m_useLenientXMLDecoding = true; }
     71     bool sawError() const { return m_sawError; }
     72 
     73 private:
     74     TextResourceDecoder(const String& mimeType, const WTF::TextEncoding& defaultEncoding, bool usesEncodingDetector);
     75 
     76     enum ContentType { PlainTextContent, HTMLContent, XMLContent, CSSContent }; // PlainText only checks for BOM.
     77     static ContentType determineContentType(const String& mimeType);
     78     static const WTF::TextEncoding& defaultEncoding(ContentType, const WTF::TextEncoding& defaultEncoding);
     79 
     80     size_t checkForBOM(const char*, size_t);
     81     bool checkForCSSCharset(const char*, size_t, bool& movedDataToBuffer);
     82     bool checkForXMLCharset(const char*, size_t, bool& movedDataToBuffer);
     83     void checkForMetaCharset(const char*, size_t);
     84     void detectJapaneseEncoding(const char*, size_t);
     85     bool shouldAutoDetect() const;
     86 
     87     ContentType m_contentType;
     88     WTF::TextEncoding m_encoding;
     89     OwnPtr<TextCodec> m_codec;
     90     EncodingSource m_source;
     91     const char* m_hintEncoding;
     92     Vector<char> m_buffer;
     93     bool m_checkedForBOM;
     94     bool m_checkedForCSSCharset;
     95     bool m_checkedForXMLCharset;
     96     bool m_checkedForMetaCharset;
     97     bool m_useLenientXMLDecoding; // Don't stop on XML decoding errors.
     98     bool m_sawError;
     99     bool m_usesEncodingDetector;
    100 
    101     OwnPtr<HTMLMetaCharsetParser> m_charsetParser;
    102 };
    103 
    104 }
    105 
    106 #endif
    107