Home | History | Annotate | Download | only in base
      1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Provides global database of differential decompression dictionaries for the
      6 // SDCH filter (processes sdch enconded content).
      7 
      8 // Exactly one instance of SdchManager is built, and all references are made
      9 // into that collection.
     10 //
     11 // The SdchManager maintains a collection of memory resident dictionaries.  It
     12 // can find a dictionary (based on a server specification of a hash), store a
     13 // dictionary, and make judgements about what URLs can use, set, etc. a
     14 // dictionary.
     15 
     16 // These dictionaries are acquired over the net, and include a header
     17 // (containing metadata) as well as a VCDIFF dictionary (for use by a VCDIFF
     18 // module) to decompress data.
     19 
     20 #ifndef NET_BASE_SDCH_MANAGER_H_
     21 #define NET_BASE_SDCH_MANAGER_H_
     22 #pragma once
     23 
     24 #include <map>
     25 #include <set>
     26 #include <string>
     27 
     28 #include "base/gtest_prod_util.h"
     29 #include "base/memory/ref_counted.h"
     30 #include "base/memory/scoped_ptr.h"
     31 #include "base/time.h"
     32 #include "googleurl/src/gurl.h"
     33 
     34 namespace net {
     35 
     36 //------------------------------------------------------------------------------
     37 // Create a public interface to help us load SDCH dictionaries.
     38 // The SdchManager class allows registration to support this interface.
     39 // A browser may register a fetcher that is used by the dictionary managers to
     40 // get data from a specified URL.  This allows us to use very high level browser
     41 // functionality in this base (when the functionaity can be provided).
     42 class SdchFetcher {
     43  public:
     44   SdchFetcher() {}
     45   virtual ~SdchFetcher() {}
     46 
     47   // The Schedule() method is called when there is a need to get a dictionary
     48   // from a server.  The callee is responsible for getting that dictionary_text,
     49   // and then calling back to AddSdchDictionary() to the SdchManager instance.
     50   virtual void Schedule(const GURL& dictionary_url) = 0;
     51  private:
     52   DISALLOW_COPY_AND_ASSIGN(SdchFetcher);
     53 };
     54 
     55 //------------------------------------------------------------------------------
     56 
     57 class SdchManager {
     58  public:
     59   // A list of errors that appeared and were either resolved, or used to turn
     60   // off sdch encoding.
     61   enum ProblemCodes {
     62     MIN_PROBLEM_CODE,
     63 
     64     // Content-encoding correction problems.
     65     ADDED_CONTENT_ENCODING = 1,
     66     FIXED_CONTENT_ENCODING = 2,
     67     FIXED_CONTENT_ENCODINGS = 3,
     68 
     69     // Content decoding errors.
     70     DECODE_HEADER_ERROR = 4,
     71     DECODE_BODY_ERROR = 5,
     72 
     73     // More content-encoding correction problems.
     74     OPTIONAL_GUNZIP_ENCODING_ADDED = 6,
     75 
     76     // Content encoding correction when we're not even tagged as HTML!?!
     77     BINARY_ADDED_CONTENT_ENCODING = 7,
     78     BINARY_FIXED_CONTENT_ENCODING = 8,
     79     BINARY_FIXED_CONTENT_ENCODINGS = 9,
     80 
     81     // Dictionary selection for use problems.
     82     DICTIONARY_FOUND_HAS_WRONG_DOMAIN = 10,
     83     DICTIONARY_FOUND_HAS_WRONG_PORT_LIST = 11,
     84     DICTIONARY_FOUND_HAS_WRONG_PATH = 12,
     85     DICTIONARY_FOUND_HAS_WRONG_SCHEME = 13,
     86     DICTIONARY_HASH_NOT_FOUND = 14,
     87     DICTIONARY_HASH_MALFORMED = 15,
     88 
     89     // Dictionary saving problems.
     90     DICTIONARY_HAS_NO_HEADER = 20,
     91     DICTIONARY_HEADER_LINE_MISSING_COLON = 21,
     92     DICTIONARY_MISSING_DOMAIN_SPECIFIER = 22,
     93     DICTIONARY_SPECIFIES_TOP_LEVEL_DOMAIN = 23,
     94     DICTIONARY_DOMAIN_NOT_MATCHING_SOURCE_URL = 24,
     95     DICTIONARY_PORT_NOT_MATCHING_SOURCE_URL = 25,
     96     DICTIONARY_HAS_NO_TEXT = 26,
     97     DICTIONARY_REFERER_URL_HAS_DOT_IN_PREFIX = 27,
     98 
     99     // Dictionary loading problems.
    100     DICTIONARY_LOAD_ATTEMPT_FROM_DIFFERENT_HOST = 30,
    101     DICTIONARY_SELECTED_FOR_SSL = 31,
    102     DICTIONARY_ALREADY_LOADED = 32,
    103     DICTIONARY_SELECTED_FROM_NON_HTTP = 33,
    104     DICTIONARY_IS_TOO_LARGE= 34,
    105     DICTIONARY_COUNT_EXCEEDED = 35,
    106     DICTIONARY_ALREADY_SCHEDULED_TO_DOWNLOAD = 36,
    107     DICTIONARY_ALREADY_TRIED_TO_DOWNLOAD = 37,
    108 
    109     // Failsafe hack.
    110     ATTEMPT_TO_DECODE_NON_HTTP_DATA = 40,
    111 
    112 
    113     // Content-Encoding problems detected, with no action taken.
    114     MULTIENCODING_FOR_NON_SDCH_REQUEST = 50,
    115     SDCH_CONTENT_ENCODE_FOR_NON_SDCH_REQUEST = 51,
    116 
    117     // Dictionary manager issues.
    118     DOMAIN_BLACKLIST_INCLUDES_TARGET = 61,
    119 
    120     // Problematic decode recovery methods.
    121     META_REFRESH_RECOVERY = 70,            // Dictionary not found.
    122     // defunct =  71, // Almost the same as META_REFRESH_UNSUPPORTED.
    123     // defunct = 72,  // Almost the same as CACHED_META_REFRESH_UNSUPPORTED.
    124     // defunct = 73,  // PASSING_THROUGH_NON_SDCH plus DISCARD_TENTATIVE_SDCH.
    125     META_REFRESH_UNSUPPORTED = 74,         // Unrecoverable error.
    126     CACHED_META_REFRESH_UNSUPPORTED = 75,  // As above, but pulled from cache.
    127     PASSING_THROUGH_NON_SDCH = 76,  // Tagged sdch but missing dictionary-hash.
    128     INCOMPLETE_SDCH_CONTENT = 77,   // Last window was not completely decoded.
    129     PASS_THROUGH_404_CODE = 78,     // URL not found message passing through.
    130 
    131     // This next report is very common, and not really an error scenario, but
    132     // it exercises the error recovery logic.
    133     PASS_THROUGH_OLD_CACHED = 79,   // Back button got pre-SDCH cached content.
    134 
    135     // Common decoded recovery methods.
    136     META_REFRESH_CACHED_RECOVERY = 80,  // Probably startup tab loading.
    137     DISCARD_TENTATIVE_SDCH = 81,        // Server decided not to use sdch.
    138 
    139     // Non SDCH problems, only accounted for to make stat counting complete
    140     // (i.e., be able to be sure all dictionary advertisements are accounted
    141     // for).
    142 
    143     UNFLUSHED_CONTENT = 90,    // Possible error in filter chaining.
    144     // defunct = 91,           // MISSING_TIME_STATS (Should never happen.)
    145     CACHE_DECODED = 92,        // No timing stats recorded.
    146     // defunct = 93,           // OVER_10_MINUTES (No timing stats recorded.)
    147     UNINITIALIZED = 94,        // Filter never even got initialized.
    148     PRIOR_TO_DICTIONARY = 95,  // We hadn't even parsed a dictionary selector.
    149     DECODE_ERROR = 96,         // Something went wrong during decode.
    150 
    151     // Problem during the latency test.
    152     LATENCY_TEST_DISALLOWED = 100,  // SDCH now failing, but it worked before!
    153 
    154     MAX_PROBLEM_CODE  // Used to bound histogram.
    155   };
    156 
    157   // Use the following static limits to block DOS attacks until we implement
    158   // a cached dictionary evicition strategy.
    159   static const size_t kMaxDictionarySize;
    160   static const size_t kMaxDictionaryCount;
    161 
    162   // There is one instance of |Dictionary| for each memory-cached SDCH
    163   // dictionary.
    164   class Dictionary : public base::RefCounted<Dictionary> {
    165    public:
    166     // Sdch filters can get our text to use in decoding compressed data.
    167     const std::string& text() const { return text_; }
    168 
    169    private:
    170     friend class base::RefCounted<Dictionary>;
    171     friend class SdchManager;  // Only manager can construct an instance.
    172     FRIEND_TEST_ALL_PREFIXES(SdchFilterTest, PathMatch);
    173 
    174     // Construct a vc-diff usable dictionary from the dictionary_text starting
    175     // at the given offset.  The supplied client_hash should be used to
    176     // advertise the dictionary's availability relative to the suppplied URL.
    177     Dictionary(const std::string& dictionary_text,
    178                size_t offset,
    179                const std::string& client_hash,
    180                const GURL& url,
    181                const std::string& domain,
    182                const std::string& path,
    183                const base::Time& expiration,
    184                const std::set<int>& ports);
    185     ~Dictionary();
    186 
    187     const GURL& url() const { return url_; }
    188     const std::string& client_hash() const { return client_hash_; }
    189 
    190     // Security method to check if we can advertise this dictionary for use
    191     // if the |target_url| returns SDCH compressed data.
    192     bool CanAdvertise(const GURL& target_url);
    193 
    194     // Security methods to check if we can establish a new dictionary with the
    195     // given data, that arrived in response to get of dictionary_url.
    196     static bool CanSet(const std::string& domain, const std::string& path,
    197                        const std::set<int>& ports, const GURL& dictionary_url);
    198 
    199     // Security method to check if we can use a dictionary to decompress a
    200     // target that arrived with a reference to this dictionary.
    201     bool CanUse(const GURL& referring_url);
    202 
    203     // Compare paths to see if they "match" for dictionary use.
    204     static bool PathMatch(const std::string& path,
    205                           const std::string& restriction);
    206 
    207     // Compare domains to see if the "match" for dictionary use.
    208     static bool DomainMatch(const GURL& url, const std::string& restriction);
    209 
    210 
    211     // The actual text of the dictionary.
    212     std::string text_;
    213 
    214     // Part of the hash of text_ that the client uses to advertise the fact that
    215     // it has a specific dictionary pre-cached.
    216     std::string client_hash_;
    217 
    218     // The GURL that arrived with the text_ in a URL request to specify where
    219     // this dictionary may be used.
    220     const GURL url_;
    221 
    222     // Metadate "headers" in before dictionary text contained the following:
    223     // Each dictionary payload consists of several headers, followed by the text
    224     // of the dictionary.  The following are the known headers.
    225     const std::string domain_;
    226     const std::string path_;
    227     const base::Time expiration_;  // Implied by max-age.
    228     const std::set<int> ports_;
    229 
    230     DISALLOW_COPY_AND_ASSIGN(Dictionary);
    231   };
    232 
    233   SdchManager();
    234   ~SdchManager();
    235 
    236   // Discontinue fetching of dictionaries, as we're now shutting down.
    237   static void Shutdown();
    238 
    239   // Provide access to the single instance of this class.
    240   static SdchManager* Global();
    241 
    242   // Record stats on various errors.
    243   static void SdchErrorRecovery(ProblemCodes problem);
    244 
    245   // Register a fetcher that this class can use to obtain dictionaries.
    246   void set_sdch_fetcher(SdchFetcher* fetcher) { fetcher_.reset(fetcher); }
    247 
    248   // If called with an empty string, advertise and support sdch on all domains.
    249   // If called with a specific string, advertise and support only the specified
    250   // domain.  Function assumes the existence of a global SdchManager instance.
    251   void EnableSdchSupport(const std::string& domain);
    252 
    253   static bool sdch_enabled() { return global_ && global_->sdch_enabled_; }
    254 
    255   // Briefly prevent further advertising of SDCH on this domain (if SDCH is
    256   // enabled). After enough calls to IsInSupportedDomain() the blacklisting
    257   // will be removed.  Additional blacklists take exponentially more calls
    258   // to IsInSupportedDomain() before the blacklisting is undone.
    259   // Used when filter errors are found from a given domain, but it is plausible
    260   // that the cause is temporary (such as application startup, where cached
    261   // entries are used, but a dictionary is not yet loaded).
    262   static void BlacklistDomain(const GURL& url);
    263 
    264   // Used when SEVERE filter errors are found from a given domain, to prevent
    265   // further use of SDCH on that domain.
    266   static void BlacklistDomainForever(const GURL& url);
    267 
    268   // Unit test only, this function resets enabling of sdch, and clears the
    269   // blacklist.
    270   static void ClearBlacklistings();
    271 
    272   // Unit test only, this function resets the blacklisting count for a domain.
    273   static void ClearDomainBlacklisting(const std::string& domain);
    274 
    275   // Unit test only: indicate how many more times a domain will be blacklisted.
    276   static int BlackListDomainCount(const std::string& domain);
    277 
    278   // Unit test only: Indicate what current blacklist increment is for a domain.
    279   static int BlacklistDomainExponential(const std::string& domain);
    280 
    281   // Check to see if SDCH is enabled (globally), and the given URL is in a
    282   // supported domain (i.e., not blacklisted, and either the specific supported
    283   // domain, or all domains were assumed supported).  If it is blacklist, reduce
    284   // by 1 the number of times it will be reported as blacklisted.
    285   bool IsInSupportedDomain(const GURL& url);
    286 
    287   // Schedule the URL fetching to load a dictionary. This will always return
    288   // before the dictionary is actually loaded and added.
    289   // After the implied task does completes, the dictionary will have been
    290   // cached in memory.
    291   void FetchDictionary(const GURL& request_url, const GURL& dictionary_url);
    292 
    293   // Security test function used before initiating a FetchDictionary.
    294   // Return true if fetch is legal.
    295   bool CanFetchDictionary(const GURL& referring_url,
    296                           const GURL& dictionary_url) const;
    297 
    298   // Add an SDCH dictionary to our list of availible dictionaries. This addition
    299   // will fail (return false) if addition is illegal (data in the dictionary is
    300   // not acceptable from the dictionary_url; dictionary already added, etc.).
    301   bool AddSdchDictionary(const std::string& dictionary_text,
    302                          const GURL& dictionary_url);
    303 
    304   // Find the vcdiff dictionary (the body of the sdch dictionary that appears
    305   // after the meta-data headers like Domain:...) with the given |server_hash|
    306   // to use to decompreses data that arrived as SDCH encoded content.  Check to
    307   // be sure the returned |dictionary| can be used for decoding content supplied
    308   // in response to a request for |referring_url|.
    309   // Caller is responsible for AddRef()ing the dictionary, and Release()ing it
    310   // when done.
    311   // Return null in |dictionary| if there is no matching legal dictionary.
    312   void GetVcdiffDictionary(const std::string& server_hash,
    313                            const GURL& referring_url,
    314                            Dictionary** dictionary);
    315 
    316   // Get list of available (pre-cached) dictionaries that we have already loaded
    317   // into memory.  The list is a comma separated list of (client) hashes per
    318   // the SDCH spec.
    319   void GetAvailDictionaryList(const GURL& target_url, std::string* list);
    320 
    321   // Construct the pair of hashes for client and server to identify an SDCH
    322   // dictionary.  This is only made public to facilitate unit testing, but is
    323   // otherwise private
    324   static void GenerateHash(const std::string& dictionary_text,
    325                            std::string* client_hash, std::string* server_hash);
    326 
    327   // For Latency testing only, we need to know if we've succeeded in doing a
    328   // round trip before starting our comparative tests.  If ever we encounter
    329   // problems with SDCH, we opt-out of the test unless/until we perform a
    330   // complete SDCH decoding.
    331   bool AllowLatencyExperiment(const GURL& url) const;
    332 
    333   void SetAllowLatencyExperiment(const GURL& url, bool enable);
    334 
    335  private:
    336   typedef std::map<std::string, int> DomainCounter;
    337   typedef std::set<std::string> ExperimentSet;
    338 
    339   // A map of dictionaries info indexed by the hash that the server provides.
    340   typedef std::map<std::string, Dictionary*> DictionaryMap;
    341 
    342   // The one global instance of that holds all the data.
    343   static SdchManager* global_;
    344 
    345   // A simple implementation of a RFC 3548 "URL safe" base64 encoder.
    346   static void UrlSafeBase64Encode(const std::string& input,
    347                                   std::string* output);
    348   DictionaryMap dictionaries_;
    349 
    350   // An instance that can fetch a dictionary given a URL.
    351   scoped_ptr<SdchFetcher> fetcher_;
    352 
    353   // Support SDCH compression, by advertising in headers.
    354   bool sdch_enabled_;
    355 
    356   // Empty string means all domains.  Non-empty means support only the given
    357   // domain is supported.
    358   std::string supported_domain_;
    359 
    360   // List domains where decode failures have required disabling sdch, along with
    361   // count of how many additonal uses should be blacklisted.
    362   DomainCounter blacklisted_domains_;
    363 
    364   // Support exponential backoff in number of domain accesses before
    365   // blacklisting expires.
    366   DomainCounter exponential_blacklist_count;
    367 
    368   // List of hostnames for which a latency experiment is allowed (because a
    369   // round trip test has recently passed).
    370   ExperimentSet allow_latency_experiment_;
    371 
    372   DISALLOW_COPY_AND_ASSIGN(SdchManager);
    373 };
    374 
    375 }  // namespace net
    376 
    377 #endif  // NET_BASE_SDCH_MANAGER_H_
    378