Home | History | Annotate | Download | only in loader
      1 /*
      2     Copyright (C) 1999 Lars Knoll (knoll (at) mpi-hd.mpg.de)
      3     Copyright (C) 2006 Alexey Proskuryakov (ap (at) nypop.com)
      4     Copyright (C) 2006, 2008 Apple Inc. All rights reserved.
      5 
      6     This library is free software; you can redistribute it and/or
      7     modify it under the terms of the GNU Library General Public
      8     License as published by the Free Software Foundation; either
      9     version 2 of the License, or (at your option) any later version.
     10 
     11     This library is distributed in the hope that it will be useful,
     12     but WITHOUT ANY WARRANTY; without even the implied warranty of
     13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     14     Library General Public License for more details.
     15 
     16     You should have received a copy of the GNU Library General Public License
     17     along with this library; see the file COPYING.LIB.  If not, write to
     18     the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     19     Boston, MA 02110-1301, USA.
     20 
     21 */
     22 
     23 #ifndef TextResourceDecoder_h
     24 #define TextResourceDecoder_h
     25 
     26 #include "wtf/RefCounted.h"
     27 #include "wtf/text/TextEncoding.h"
     28 
     29 namespace WebCore {
     30 
     31 class HTMLMetaCharsetParser;
     32 
     33 class TextResourceDecoder : public RefCounted<TextResourceDecoder> {
     34 public:
     35     enum EncodingSource {
     36         DefaultEncoding,
     37         AutoDetectedEncoding,
     38         EncodingFromContentSniffing,
     39         EncodingFromXMLHeader,
     40         EncodingFromMetaTag,
     41         EncodingFromCSSCharset,
     42         EncodingFromHTTPHeader,
     43         UserChosenEncoding,
     44         EncodingFromParentFrame
     45     };
     46 
     47     static PassRefPtr<TextResourceDecoder> create(const String& mimeType, const WTF::TextEncoding& defaultEncoding = WTF::TextEncoding(), bool usesEncodingDetector = false)
     48     {
     49         return adoptRef(new TextResourceDecoder(mimeType, defaultEncoding, usesEncodingDetector));
     50     }
     51     ~TextResourceDecoder();
     52 
     53     void setEncoding(const WTF::TextEncoding&, EncodingSource);
     54     const WTF::TextEncoding& encoding() const { return m_encoding; }
     55 
     56     String decode(const char* data, size_t length);
     57     String flush();
     58 
     59     void setHintEncoding(const TextResourceDecoder* hintDecoder)
     60     {
     61         // hintEncoding is for use with autodetection, which should be
     62         // only invoked when hintEncoding comes from auto-detection.
     63         if (hintDecoder && hintDecoder->wasDetectedHueristically())
     64             m_hintEncoding = hintDecoder->encoding().name();
     65     }
     66 
     67     void useLenientXMLDecoding() { m_useLenientXMLDecoding = true; }
     68     bool sawError() const { return m_sawError; }
     69 
     70 private:
     71     TextResourceDecoder(const String& mimeType, const WTF::TextEncoding& defaultEncoding, bool usesEncodingDetector);
     72 
     73     enum ContentType { PlainText, HTML, XML, CSS }; // PlainText only checks for BOM.
     74     static ContentType determineContentType(const String& mimeType);
     75     static const WTF::TextEncoding& defaultEncoding(ContentType, const WTF::TextEncoding& defaultEncoding);
     76 
     77     bool wasDetectedHueristically() const { return m_source == AutoDetectedEncoding || m_source == EncodingFromContentSniffing; }
     78 
     79     size_t checkForBOM(const char*, size_t);
     80     bool checkForCSSCharset(const char*, size_t, bool& movedDataToBuffer);
     81     bool checkForXMLCharset(const char*, size_t, bool& movedDataToBuffer);
     82     void checkForMetaCharset(const char*, size_t);
     83     void detectJapaneseEncoding(const char*, size_t);
     84     bool shouldAutoDetect() const;
     85 
     86     ContentType m_contentType;
     87     WTF::TextEncoding m_encoding;
     88     OwnPtr<TextCodec> m_codec;
     89     EncodingSource m_source;
     90     const char* m_hintEncoding;
     91     Vector<char> m_buffer;
     92     bool m_checkedForBOM;
     93     bool m_checkedForCSSCharset;
     94     bool m_checkedForXMLCharset;
     95     bool m_checkedForMetaCharset;
     96     bool m_useLenientXMLDecoding; // Don't stop on XML decoding errors.
     97     bool m_sawError;
     98     bool m_usesEncodingDetector;
     99 
    100     OwnPtr<HTMLMetaCharsetParser> m_charsetParser;
    101 };
    102 
    103 }
    104 
    105 #endif
    106