Home | History | Annotate | Download | only in fetch
      1 /*
      2     Copyright (C) 1999 Lars Knoll (knoll (at) mpi-hd.mpg.de)
      3     Copyright (C) 2006 Alexey Proskuryakov (ap (at) nypop.com)
      4     Copyright (C) 2006, 2008 Apple Inc. All rights reserved.
      5 
      6     This library is free software; you can redistribute it and/or
      7     modify it under the terms of the GNU Library General Public
      8     License as published by the Free Software Foundation; either
      9     version 2 of the License, or (at your option) any later version.
     10 
     11     This library is distributed in the hope that it will be useful,
     12     but WITHOUT ANY WARRANTY; without even the implied warranty of
     13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     14     Library General Public License for more details.
     15 
     16     You should have received a copy of the GNU Library General Public License
     17     along with this library; see the file COPYING.LIB.  If not, write to
     18     the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     19     Boston, MA 02110-1301, USA.
     20 
     21 */
     22 
     23 #ifndef TextResourceDecoder_h
     24 #define TextResourceDecoder_h
     25 
     26 #include "wtf/RefCounted.h"
     27 #include "wtf/text/TextEncoding.h"
     28 
     29 namespace WebCore {
     30 
     31 class HTMLMetaCharsetParser;
     32 
     33 class TextResourceDecoder {
     34 public:
     35     enum EncodingSource {
     36         DefaultEncoding,
     37         AutoDetectedEncoding,
     38         EncodingFromContentSniffing,
     39         EncodingFromXMLHeader,
     40         EncodingFromMetaTag,
     41         EncodingFromCSSCharset,
     42         EncodingFromHTTPHeader,
     43         UserChosenEncoding,
     44         EncodingFromParentFrame
     45     };
     46 
     47     static PassOwnPtr<TextResourceDecoder> create(const String& mimeType, const WTF::TextEncoding& defaultEncoding = WTF::TextEncoding(), bool usesEncodingDetector = false)
     48     {
     49         return adoptPtr(new TextResourceDecoder(mimeType, defaultEncoding, usesEncodingDetector));
     50     }
     51     ~TextResourceDecoder();
     52 
     53     void setEncoding(const WTF::TextEncoding&, EncodingSource);
     54     const WTF::TextEncoding& encoding() const { return m_encoding; }
     55     bool encodingWasDetectedHeuristically() const
     56     {
     57         return m_source == AutoDetectedEncoding
     58             || m_source == EncodingFromContentSniffing;
     59     }
     60 
     61     String decode(const char* data, size_t length);
     62     String flush();
     63 
     64     void setHintEncoding(const WTF::TextEncoding& encoding)
     65     {
     66         m_hintEncoding = encoding.name();
     67     }
     68 
     69     void useLenientXMLDecoding() { m_useLenientXMLDecoding = true; }
     70     bool sawError() const { return m_sawError; }
     71 
     72 private:
     73     TextResourceDecoder(const String& mimeType, const WTF::TextEncoding& defaultEncoding, bool usesEncodingDetector);
     74 
     75     enum ContentType { PlainText, HTML, XML, CSS }; // PlainText only checks for BOM.
     76     static ContentType determineContentType(const String& mimeType);
     77     static const WTF::TextEncoding& defaultEncoding(ContentType, const WTF::TextEncoding& defaultEncoding);
     78 
     79     size_t checkForBOM(const char*, size_t);
     80     bool checkForCSSCharset(const char*, size_t, bool& movedDataToBuffer);
     81     bool checkForXMLCharset(const char*, size_t, bool& movedDataToBuffer);
     82     void checkForMetaCharset(const char*, size_t);
     83     void detectJapaneseEncoding(const char*, size_t);
     84     bool shouldAutoDetect() const;
     85 
     86     ContentType m_contentType;
     87     WTF::TextEncoding m_encoding;
     88     OwnPtr<TextCodec> m_codec;
     89     EncodingSource m_source;
     90     const char* m_hintEncoding;
     91     Vector<char> m_buffer;
     92     bool m_checkedForBOM;
     93     bool m_checkedForCSSCharset;
     94     bool m_checkedForXMLCharset;
     95     bool m_checkedForMetaCharset;
     96     bool m_useLenientXMLDecoding; // Don't stop on XML decoding errors.
     97     bool m_sawError;
     98     bool m_usesEncodingDetector;
     99 
    100     OwnPtr<HTMLMetaCharsetParser> m_charsetParser;
    101 };
    102 
    103 }
    104 
    105 #endif
    106