Home | History | Annotate | Download | only in base
      1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Provides global database of differential decompression dictionaries for the
      6 // SDCH filter (processes sdch enconded content).
      7 
      8 // Exactly one instance of SdchManager is built, and all references are made
      9 // into that collection.
     10 //
     11 // The SdchManager maintains a collection of memory resident dictionaries. It
     12 // can find a dictionary (based on a server specification of a hash), store a
     13 // dictionary, and make judgements about what URLs can use, set, etc. a
     14 // dictionary.
     15 
     16 // These dictionaries are acquired over the net, and include a header
     17 // (containing metadata) as well as a VCDIFF dictionary (for use by a VCDIFF
     18 // module) to decompress data.
     19 
     20 #ifndef NET_BASE_SDCH_MANAGER_H_
     21 #define NET_BASE_SDCH_MANAGER_H_
     22 
     23 #include <map>
     24 #include <set>
     25 #include <string>
     26 
     27 #include "base/gtest_prod_util.h"
     28 #include "base/memory/ref_counted.h"
     29 #include "base/memory/scoped_ptr.h"
     30 #include "base/threading/non_thread_safe.h"
     31 #include "base/time/time.h"
     32 #include "net/base/net_export.h"
     33 #include "url/gurl.h"
     34 
     35 namespace net {
     36 
     37 //------------------------------------------------------------------------------
     38 // Create a public interface to help us load SDCH dictionaries.
     39 // The SdchManager class allows registration to support this interface.
     40 // A browser may register a fetcher that is used by the dictionary managers to
     41 // get data from a specified URL. This allows us to use very high level browser
     42 // functionality in this base (when the functionality can be provided).
     43 class NET_EXPORT SdchFetcher {
     44  public:
     45   class NET_EXPORT Delegate {
     46    public:
     47     virtual ~Delegate() {}
     48 
     49     // Called whenever the SdchFetcher has successfully retrieved a
     50     // dictionary.  |dictionary_text| contains the body of the dictionary
     51     // retrieved from |dictionary_url|.
     52     virtual void AddSdchDictionary(const std::string& dictionary_text,
     53                                    const GURL& dictionary_url) = 0;
     54   };
     55 
     56   SdchFetcher() {}
     57   virtual ~SdchFetcher() {}
     58 
     59   // The Schedule() method is called when there is a need to get a dictionary
     60   // from a server. The callee is responsible for getting that dictionary_text,
     61   // and then calling back to AddSdchDictionary() in the Delegate instance.
     62   virtual void Schedule(const GURL& dictionary_url) = 0;
     63 
     64   // The Cancel() method is called to cancel all pending dictionary fetches.
     65   // This is used for implementation of ClearData() below.
     66   virtual void Cancel() = 0;
     67 
     68  private:
     69   DISALLOW_COPY_AND_ASSIGN(SdchFetcher);
     70 };
     71 
     72 //------------------------------------------------------------------------------
     73 
     74 class NET_EXPORT SdchManager
     75     : public SdchFetcher::Delegate,
     76       public NON_EXPORTED_BASE(base::NonThreadSafe) {
     77  public:
     78   // A list of errors that appeared and were either resolved, or used to turn
     79   // off sdch encoding.
     80   enum ProblemCodes {
     81     MIN_PROBLEM_CODE,
     82 
     83     // Content-encoding correction problems.
     84     ADDED_CONTENT_ENCODING = 1,
     85     FIXED_CONTENT_ENCODING = 2,
     86     FIXED_CONTENT_ENCODINGS = 3,
     87 
     88     // Content decoding errors.
     89     DECODE_HEADER_ERROR = 4,
     90     DECODE_BODY_ERROR = 5,
     91 
     92     // More content-encoding correction problems.
     93     OPTIONAL_GUNZIP_ENCODING_ADDED = 6,
     94 
     95     // Content encoding correction when we're not even tagged as HTML!?!
     96     BINARY_ADDED_CONTENT_ENCODING = 7,
     97     BINARY_FIXED_CONTENT_ENCODING = 8,
     98     BINARY_FIXED_CONTENT_ENCODINGS = 9,
     99 
    100     // Dictionary selection for use problems.
    101     DICTIONARY_FOUND_HAS_WRONG_DOMAIN = 10,
    102     DICTIONARY_FOUND_HAS_WRONG_PORT_LIST = 11,
    103     DICTIONARY_FOUND_HAS_WRONG_PATH = 12,
    104     DICTIONARY_FOUND_HAS_WRONG_SCHEME = 13,
    105     DICTIONARY_HASH_NOT_FOUND = 14,
    106     DICTIONARY_HASH_MALFORMED = 15,
    107 
    108     // Dictionary saving problems.
    109     DICTIONARY_HAS_NO_HEADER = 20,
    110     DICTIONARY_HEADER_LINE_MISSING_COLON = 21,
    111     DICTIONARY_MISSING_DOMAIN_SPECIFIER = 22,
    112     DICTIONARY_SPECIFIES_TOP_LEVEL_DOMAIN = 23,
    113     DICTIONARY_DOMAIN_NOT_MATCHING_SOURCE_URL = 24,
    114     DICTIONARY_PORT_NOT_MATCHING_SOURCE_URL = 25,
    115     DICTIONARY_HAS_NO_TEXT = 26,
    116     DICTIONARY_REFERER_URL_HAS_DOT_IN_PREFIX = 27,
    117 
    118     // Dictionary loading problems.
    119     DICTIONARY_LOAD_ATTEMPT_FROM_DIFFERENT_HOST = 30,
    120     DICTIONARY_SELECTED_FOR_SSL = 31,
    121     DICTIONARY_ALREADY_LOADED = 32,
    122     DICTIONARY_SELECTED_FROM_NON_HTTP = 33,
    123     DICTIONARY_IS_TOO_LARGE= 34,
    124     DICTIONARY_COUNT_EXCEEDED = 35,
    125     DICTIONARY_ALREADY_SCHEDULED_TO_DOWNLOAD = 36,
    126     DICTIONARY_ALREADY_TRIED_TO_DOWNLOAD = 37,
    127     DICTIONARY_FETCH_READ_FAILED = 38,
    128 
    129     // Failsafe hack.
    130     ATTEMPT_TO_DECODE_NON_HTTP_DATA = 40,
    131 
    132 
    133     // Content-Encoding problems detected, with no action taken.
    134     MULTIENCODING_FOR_NON_SDCH_REQUEST = 50,
    135     SDCH_CONTENT_ENCODE_FOR_NON_SDCH_REQUEST = 51,
    136 
    137     // Dictionary manager issues.
    138     DOMAIN_BLACKLIST_INCLUDES_TARGET = 61,
    139 
    140     // Problematic decode recovery methods.
    141     META_REFRESH_RECOVERY = 70,            // Dictionary not found.
    142     // defunct =  71, // Almost the same as META_REFRESH_UNSUPPORTED.
    143     // defunct = 72,  // Almost the same as CACHED_META_REFRESH_UNSUPPORTED.
    144     // defunct = 73,  // PASSING_THROUGH_NON_SDCH plus
    145                       // RESPONSE_TENTATIVE_SDCH in ../filter/sdch_filter.cc.
    146     META_REFRESH_UNSUPPORTED = 74,         // Unrecoverable error.
    147     CACHED_META_REFRESH_UNSUPPORTED = 75,  // As above, but pulled from cache.
    148     PASSING_THROUGH_NON_SDCH = 76,  // Tagged sdch but missing dictionary-hash.
    149     INCOMPLETE_SDCH_CONTENT = 77,   // Last window was not completely decoded.
    150     PASS_THROUGH_404_CODE = 78,     // URL not found message passing through.
    151 
    152     // This next report is very common, and not really an error scenario, but
    153     // it exercises the error recovery logic.
    154     PASS_THROUGH_OLD_CACHED = 79,   // Back button got pre-SDCH cached content.
    155 
    156     // Common decoded recovery methods.
    157     META_REFRESH_CACHED_RECOVERY = 80,  // Probably startup tab loading.
    158     // defunct = 81, // Now tracked by ResponseCorruptionDetectionCause histo.
    159 
    160     // Non SDCH problems, only accounted for to make stat counting complete
    161     // (i.e., be able to be sure all dictionary advertisements are accounted
    162     // for).
    163 
    164     UNFLUSHED_CONTENT = 90,    // Possible error in filter chaining.
    165     // defunct = 91,           // MISSING_TIME_STATS (Should never happen.)
    166     CACHE_DECODED = 92,        // No timing stats recorded.
    167     // defunct = 93,           // OVER_10_MINUTES (No timing stats recorded.)
    168     UNINITIALIZED = 94,        // Filter never even got initialized.
    169     PRIOR_TO_DICTIONARY = 95,  // We hadn't even parsed a dictionary selector.
    170     DECODE_ERROR = 96,         // Something went wrong during decode.
    171 
    172     // Problem during the latency test.
    173     LATENCY_TEST_DISALLOWED = 100,  // SDCH now failing, but it worked before!
    174 
    175     MAX_PROBLEM_CODE  // Used to bound histogram.
    176   };
    177 
    178   // Use the following static limits to block DOS attacks until we implement
    179   // a cached dictionary evicition strategy.
    180   static const size_t kMaxDictionarySize;
    181   static const size_t kMaxDictionaryCount;
    182 
    183   // There is one instance of |Dictionary| for each memory-cached SDCH
    184   // dictionary.
    185   class NET_EXPORT_PRIVATE Dictionary : public base::RefCounted<Dictionary> {
    186    public:
    187     // Sdch filters can get our text to use in decoding compressed data.
    188     const std::string& text() const { return text_; }
    189 
    190    private:
    191     friend class base::RefCounted<Dictionary>;
    192     friend class SdchManager;  // Only manager can construct an instance.
    193     FRIEND_TEST_ALL_PREFIXES(SdchManagerTest, PathMatch);
    194 
    195     // Construct a vc-diff usable dictionary from the dictionary_text starting
    196     // at the given offset. The supplied client_hash should be used to
    197     // advertise the dictionary's availability relative to the suppplied URL.
    198     Dictionary(const std::string& dictionary_text,
    199                size_t offset,
    200                const std::string& client_hash,
    201                const GURL& url,
    202                const std::string& domain,
    203                const std::string& path,
    204                const base::Time& expiration,
    205                const std::set<int>& ports);
    206     virtual ~Dictionary();
    207 
    208     const GURL& url() const { return url_; }
    209     const std::string& client_hash() const { return client_hash_; }
    210 
    211     // Security method to check if we can advertise this dictionary for use
    212     // if the |target_url| returns SDCH compressed data.
    213     bool CanAdvertise(const GURL& target_url);
    214 
    215     // Security methods to check if we can establish a new dictionary with the
    216     // given data, that arrived in response to get of dictionary_url.
    217     static bool CanSet(const std::string& domain, const std::string& path,
    218                        const std::set<int>& ports, const GURL& dictionary_url);
    219 
    220     // Security method to check if we can use a dictionary to decompress a
    221     // target that arrived with a reference to this dictionary.
    222     bool CanUse(const GURL& referring_url);
    223 
    224     // Compare paths to see if they "match" for dictionary use.
    225     static bool PathMatch(const std::string& path,
    226                           const std::string& restriction);
    227 
    228     // Compare domains to see if the "match" for dictionary use.
    229     static bool DomainMatch(const GURL& url, const std::string& restriction);
    230 
    231 
    232     // The actual text of the dictionary.
    233     std::string text_;
    234 
    235     // Part of the hash of text_ that the client uses to advertise the fact that
    236     // it has a specific dictionary pre-cached.
    237     std::string client_hash_;
    238 
    239     // The GURL that arrived with the text_ in a URL request to specify where
    240     // this dictionary may be used.
    241     const GURL url_;
    242 
    243     // Metadate "headers" in before dictionary text contained the following:
    244     // Each dictionary payload consists of several headers, followed by the text
    245     // of the dictionary. The following are the known headers.
    246     const std::string domain_;
    247     const std::string path_;
    248     const base::Time expiration_;  // Implied by max-age.
    249     const std::set<int> ports_;
    250 
    251     DISALLOW_COPY_AND_ASSIGN(Dictionary);
    252   };
    253 
    254   SdchManager();
    255   virtual ~SdchManager();
    256 
    257   // Clear data (for browser data removal).
    258   void ClearData();
    259 
    260   // Record stats on various errors.
    261   static void SdchErrorRecovery(ProblemCodes problem);
    262 
    263   // Register a fetcher that this class can use to obtain dictionaries.
    264   void set_sdch_fetcher(scoped_ptr<SdchFetcher> fetcher);
    265 
    266   // Enables or disables SDCH compression.
    267   static void EnableSdchSupport(bool enabled);
    268 
    269   static bool sdch_enabled() { return g_sdch_enabled_; }
    270 
    271   // Enables or disables SDCH compression over secure connection.
    272   static void EnableSecureSchemeSupport(bool enabled);
    273 
    274   static bool secure_scheme_supported() { return g_secure_scheme_supported_; }
    275 
    276   // Briefly prevent further advertising of SDCH on this domain (if SDCH is
    277   // enabled). After enough calls to IsInSupportedDomain() the blacklisting
    278   // will be removed. Additional blacklists take exponentially more calls
    279   // to IsInSupportedDomain() before the blacklisting is undone.
    280   // Used when filter errors are found from a given domain, but it is plausible
    281   // that the cause is temporary (such as application startup, where cached
    282   // entries are used, but a dictionary is not yet loaded).
    283   void BlacklistDomain(const GURL& url, ProblemCodes blacklist_reason);
    284 
    285   // Used when SEVERE filter errors are found from a given domain, to prevent
    286   // further use of SDCH on that domain.
    287   void BlacklistDomainForever(const GURL& url, ProblemCodes blacklist_reason);
    288 
    289   // Unit test only, this function resets enabling of sdch, and clears the
    290   // blacklist.
    291   void ClearBlacklistings();
    292 
    293   // Unit test only, this function resets the blacklisting count for a domain.
    294   void ClearDomainBlacklisting(const std::string& domain);
    295 
    296   // Unit test only: indicate how many more times a domain will be blacklisted.
    297   int BlackListDomainCount(const std::string& domain);
    298 
    299   // Unit test only: Indicate what current blacklist increment is for a domain.
    300   int BlacklistDomainExponential(const std::string& domain);
    301 
    302   // Check to see if SDCH is enabled (globally), and the given URL is in a
    303   // supported domain (i.e., not blacklisted, and either the specific supported
    304   // domain, or all domains were assumed supported). If it is blacklist, reduce
    305   // by 1 the number of times it will be reported as blacklisted.
    306   bool IsInSupportedDomain(const GURL& url);
    307 
    308   // Schedule the URL fetching to load a dictionary. This will always return
    309   // before the dictionary is actually loaded and added.
    310   // After the implied task does completes, the dictionary will have been
    311   // cached in memory.
    312   void FetchDictionary(const GURL& request_url, const GURL& dictionary_url);
    313 
    314   // Security test function used before initiating a FetchDictionary.
    315   // Return true if fetch is legal.
    316   bool CanFetchDictionary(const GURL& referring_url,
    317                           const GURL& dictionary_url) const;
    318 
    319   // Find the vcdiff dictionary (the body of the sdch dictionary that appears
    320   // after the meta-data headers like Domain:...) with the given |server_hash|
    321   // to use to decompreses data that arrived as SDCH encoded content. Check to
    322   // be sure the returned |dictionary| can be used for decoding content supplied
    323   // in response to a request for |referring_url|.
    324   // Return null in |dictionary| if there is no matching legal dictionary.
    325   void GetVcdiffDictionary(const std::string& server_hash,
    326                            const GURL& referring_url,
    327                            scoped_refptr<Dictionary>* dictionary);
    328 
    329   // Get list of available (pre-cached) dictionaries that we have already loaded
    330   // into memory. The list is a comma separated list of (client) hashes per
    331   // the SDCH spec.
    332   void GetAvailDictionaryList(const GURL& target_url, std::string* list);
    333 
    334   // Construct the pair of hashes for client and server to identify an SDCH
    335   // dictionary. This is only made public to facilitate unit testing, but is
    336   // otherwise private
    337   static void GenerateHash(const std::string& dictionary_text,
    338                            std::string* client_hash, std::string* server_hash);
    339 
    340   // For Latency testing only, we need to know if we've succeeded in doing a
    341   // round trip before starting our comparative tests. If ever we encounter
    342   // problems with SDCH, we opt-out of the test unless/until we perform a
    343   // complete SDCH decoding.
    344   bool AllowLatencyExperiment(const GURL& url) const;
    345 
    346   void SetAllowLatencyExperiment(const GURL& url, bool enable);
    347 
    348   int GetFetchesCountForTesting() const {
    349     return fetches_count_for_testing_;
    350   }
    351 
    352   // Implementation of SdchFetcher::Delegate.
    353 
    354   // Add an SDCH dictionary to our list of availible
    355   // dictionaries. This addition will fail if addition is illegal
    356   // (data in the dictionary is not acceptable from the
    357   // dictionary_url; dictionary already added, etc.).
    358   virtual void AddSdchDictionary(const std::string& dictionary_text,
    359                                  const GURL& dictionary_url) OVERRIDE;
    360 
    361  private:
    362   struct BlacklistInfo {
    363     BlacklistInfo()
    364         : count(0),
    365           exponential_count(0),
    366           reason(MIN_PROBLEM_CODE) {}
    367 
    368     int count;                   // # of times to refuse SDCH advertisement.
    369     int exponential_count;       // Current exponential backoff ratchet.
    370     ProblemCodes reason;         // Why domain was blacklisted.
    371 
    372   };
    373   typedef std::map<std::string, BlacklistInfo> DomainBlacklistInfo;
    374   typedef std::set<std::string> ExperimentSet;
    375 
    376   // A map of dictionaries info indexed by the hash that the server provides.
    377   typedef std::map<std::string, scoped_refptr<Dictionary> > DictionaryMap;
    378 
    379   // Support SDCH compression, by advertising in headers.
    380   static bool g_sdch_enabled_;
    381 
    382   // Support SDCH compression for HTTPS requests and responses. When supported,
    383   // HTTPS applicable dictionaries MUST have been acquired securely via HTTPS.
    384   static bool g_secure_scheme_supported_;
    385 
    386   // A simple implementation of a RFC 3548 "URL safe" base64 encoder.
    387   static void UrlSafeBase64Encode(const std::string& input,
    388                                   std::string* output);
    389   DictionaryMap dictionaries_;
    390 
    391   // An instance that can fetch a dictionary given a URL.
    392   scoped_ptr<SdchFetcher> fetcher_;
    393 
    394   // List domains where decode failures have required disabling sdch.
    395   DomainBlacklistInfo blacklisted_domains_;
    396 
    397   // List of hostnames for which a latency experiment is allowed (because a
    398   // round trip test has recently passed).
    399   ExperimentSet allow_latency_experiment_;
    400 
    401   int fetches_count_for_testing_;
    402 
    403   DISALLOW_COPY_AND_ASSIGN(SdchManager);
    404 };
    405 
    406 }  // namespace net
    407 
    408 #endif  // NET_BASE_SDCH_MANAGER_H_
    409