1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Provides global database of differential decompression dictionaries for the 6 // SDCH filter (processes sdch enconded content). 7 8 // Exactly one instance of SdchManager is built, and all references are made 9 // into that collection. 10 // 11 // The SdchManager maintains a collection of memory resident dictionaries. It 12 // can find a dictionary (based on a server specification of a hash), store a 13 // dictionary, and make judgements about what URLs can use, set, etc. a 14 // dictionary. 15 16 // These dictionaries are acquired over the net, and include a header 17 // (containing metadata) as well as a VCDIFF dictionary (for use by a VCDIFF 18 // module) to decompress data. 19 20 #ifndef NET_BASE_SDCH_MANAGER_H_ 21 #define NET_BASE_SDCH_MANAGER_H_ 22 #pragma once 23 24 #include <map> 25 #include <set> 26 #include <string> 27 28 #include "base/gtest_prod_util.h" 29 #include "base/memory/ref_counted.h" 30 #include "base/memory/scoped_ptr.h" 31 #include "base/time.h" 32 #include "googleurl/src/gurl.h" 33 34 namespace net { 35 36 //------------------------------------------------------------------------------ 37 // Create a public interface to help us load SDCH dictionaries. 38 // The SdchManager class allows registration to support this interface. 39 // A browser may register a fetcher that is used by the dictionary managers to 40 // get data from a specified URL. This allows us to use very high level browser 41 // functionality in this base (when the functionaity can be provided). 42 class SdchFetcher { 43 public: 44 SdchFetcher() {} 45 virtual ~SdchFetcher() {} 46 47 // The Schedule() method is called when there is a need to get a dictionary 48 // from a server. The callee is responsible for getting that dictionary_text, 49 // and then calling back to AddSdchDictionary() to the SdchManager instance. 50 virtual void Schedule(const GURL& dictionary_url) = 0; 51 private: 52 DISALLOW_COPY_AND_ASSIGN(SdchFetcher); 53 }; 54 55 //------------------------------------------------------------------------------ 56 57 class SdchManager { 58 public: 59 // A list of errors that appeared and were either resolved, or used to turn 60 // off sdch encoding. 61 enum ProblemCodes { 62 MIN_PROBLEM_CODE, 63 64 // Content-encoding correction problems. 65 ADDED_CONTENT_ENCODING = 1, 66 FIXED_CONTENT_ENCODING = 2, 67 FIXED_CONTENT_ENCODINGS = 3, 68 69 // Content decoding errors. 70 DECODE_HEADER_ERROR = 4, 71 DECODE_BODY_ERROR = 5, 72 73 // More content-encoding correction problems. 74 OPTIONAL_GUNZIP_ENCODING_ADDED = 6, 75 76 // Content encoding correction when we're not even tagged as HTML!?! 77 BINARY_ADDED_CONTENT_ENCODING = 7, 78 BINARY_FIXED_CONTENT_ENCODING = 8, 79 BINARY_FIXED_CONTENT_ENCODINGS = 9, 80 81 // Dictionary selection for use problems. 82 DICTIONARY_FOUND_HAS_WRONG_DOMAIN = 10, 83 DICTIONARY_FOUND_HAS_WRONG_PORT_LIST = 11, 84 DICTIONARY_FOUND_HAS_WRONG_PATH = 12, 85 DICTIONARY_FOUND_HAS_WRONG_SCHEME = 13, 86 DICTIONARY_HASH_NOT_FOUND = 14, 87 DICTIONARY_HASH_MALFORMED = 15, 88 89 // Dictionary saving problems. 90 DICTIONARY_HAS_NO_HEADER = 20, 91 DICTIONARY_HEADER_LINE_MISSING_COLON = 21, 92 DICTIONARY_MISSING_DOMAIN_SPECIFIER = 22, 93 DICTIONARY_SPECIFIES_TOP_LEVEL_DOMAIN = 23, 94 DICTIONARY_DOMAIN_NOT_MATCHING_SOURCE_URL = 24, 95 DICTIONARY_PORT_NOT_MATCHING_SOURCE_URL = 25, 96 DICTIONARY_HAS_NO_TEXT = 26, 97 DICTIONARY_REFERER_URL_HAS_DOT_IN_PREFIX = 27, 98 99 // Dictionary loading problems. 100 DICTIONARY_LOAD_ATTEMPT_FROM_DIFFERENT_HOST = 30, 101 DICTIONARY_SELECTED_FOR_SSL = 31, 102 DICTIONARY_ALREADY_LOADED = 32, 103 DICTIONARY_SELECTED_FROM_NON_HTTP = 33, 104 DICTIONARY_IS_TOO_LARGE= 34, 105 DICTIONARY_COUNT_EXCEEDED = 35, 106 DICTIONARY_ALREADY_SCHEDULED_TO_DOWNLOAD = 36, 107 DICTIONARY_ALREADY_TRIED_TO_DOWNLOAD = 37, 108 109 // Failsafe hack. 110 ATTEMPT_TO_DECODE_NON_HTTP_DATA = 40, 111 112 113 // Content-Encoding problems detected, with no action taken. 114 MULTIENCODING_FOR_NON_SDCH_REQUEST = 50, 115 SDCH_CONTENT_ENCODE_FOR_NON_SDCH_REQUEST = 51, 116 117 // Dictionary manager issues. 118 DOMAIN_BLACKLIST_INCLUDES_TARGET = 61, 119 120 // Problematic decode recovery methods. 121 META_REFRESH_RECOVERY = 70, // Dictionary not found. 122 // defunct = 71, // Almost the same as META_REFRESH_UNSUPPORTED. 123 // defunct = 72, // Almost the same as CACHED_META_REFRESH_UNSUPPORTED. 124 // defunct = 73, // PASSING_THROUGH_NON_SDCH plus DISCARD_TENTATIVE_SDCH. 125 META_REFRESH_UNSUPPORTED = 74, // Unrecoverable error. 126 CACHED_META_REFRESH_UNSUPPORTED = 75, // As above, but pulled from cache. 127 PASSING_THROUGH_NON_SDCH = 76, // Tagged sdch but missing dictionary-hash. 128 INCOMPLETE_SDCH_CONTENT = 77, // Last window was not completely decoded. 129 PASS_THROUGH_404_CODE = 78, // URL not found message passing through. 130 131 // This next report is very common, and not really an error scenario, but 132 // it exercises the error recovery logic. 133 PASS_THROUGH_OLD_CACHED = 79, // Back button got pre-SDCH cached content. 134 135 // Common decoded recovery methods. 136 META_REFRESH_CACHED_RECOVERY = 80, // Probably startup tab loading. 137 DISCARD_TENTATIVE_SDCH = 81, // Server decided not to use sdch. 138 139 // Non SDCH problems, only accounted for to make stat counting complete 140 // (i.e., be able to be sure all dictionary advertisements are accounted 141 // for). 142 143 UNFLUSHED_CONTENT = 90, // Possible error in filter chaining. 144 // defunct = 91, // MISSING_TIME_STATS (Should never happen.) 145 CACHE_DECODED = 92, // No timing stats recorded. 146 // defunct = 93, // OVER_10_MINUTES (No timing stats recorded.) 147 UNINITIALIZED = 94, // Filter never even got initialized. 148 PRIOR_TO_DICTIONARY = 95, // We hadn't even parsed a dictionary selector. 149 DECODE_ERROR = 96, // Something went wrong during decode. 150 151 // Problem during the latency test. 152 LATENCY_TEST_DISALLOWED = 100, // SDCH now failing, but it worked before! 153 154 MAX_PROBLEM_CODE // Used to bound histogram. 155 }; 156 157 // Use the following static limits to block DOS attacks until we implement 158 // a cached dictionary evicition strategy. 159 static const size_t kMaxDictionarySize; 160 static const size_t kMaxDictionaryCount; 161 162 // There is one instance of |Dictionary| for each memory-cached SDCH 163 // dictionary. 164 class Dictionary : public base::RefCounted<Dictionary> { 165 public: 166 // Sdch filters can get our text to use in decoding compressed data. 167 const std::string& text() const { return text_; } 168 169 private: 170 friend class base::RefCounted<Dictionary>; 171 friend class SdchManager; // Only manager can construct an instance. 172 FRIEND_TEST_ALL_PREFIXES(SdchFilterTest, PathMatch); 173 174 // Construct a vc-diff usable dictionary from the dictionary_text starting 175 // at the given offset. The supplied client_hash should be used to 176 // advertise the dictionary's availability relative to the suppplied URL. 177 Dictionary(const std::string& dictionary_text, 178 size_t offset, 179 const std::string& client_hash, 180 const GURL& url, 181 const std::string& domain, 182 const std::string& path, 183 const base::Time& expiration, 184 const std::set<int>& ports); 185 ~Dictionary(); 186 187 const GURL& url() const { return url_; } 188 const std::string& client_hash() const { return client_hash_; } 189 190 // Security method to check if we can advertise this dictionary for use 191 // if the |target_url| returns SDCH compressed data. 192 bool CanAdvertise(const GURL& target_url); 193 194 // Security methods to check if we can establish a new dictionary with the 195 // given data, that arrived in response to get of dictionary_url. 196 static bool CanSet(const std::string& domain, const std::string& path, 197 const std::set<int>& ports, const GURL& dictionary_url); 198 199 // Security method to check if we can use a dictionary to decompress a 200 // target that arrived with a reference to this dictionary. 201 bool CanUse(const GURL& referring_url); 202 203 // Compare paths to see if they "match" for dictionary use. 204 static bool PathMatch(const std::string& path, 205 const std::string& restriction); 206 207 // Compare domains to see if the "match" for dictionary use. 208 static bool DomainMatch(const GURL& url, const std::string& restriction); 209 210 211 // The actual text of the dictionary. 212 std::string text_; 213 214 // Part of the hash of text_ that the client uses to advertise the fact that 215 // it has a specific dictionary pre-cached. 216 std::string client_hash_; 217 218 // The GURL that arrived with the text_ in a URL request to specify where 219 // this dictionary may be used. 220 const GURL url_; 221 222 // Metadate "headers" in before dictionary text contained the following: 223 // Each dictionary payload consists of several headers, followed by the text 224 // of the dictionary. The following are the known headers. 225 const std::string domain_; 226 const std::string path_; 227 const base::Time expiration_; // Implied by max-age. 228 const std::set<int> ports_; 229 230 DISALLOW_COPY_AND_ASSIGN(Dictionary); 231 }; 232 233 SdchManager(); 234 ~SdchManager(); 235 236 // Discontinue fetching of dictionaries, as we're now shutting down. 237 static void Shutdown(); 238 239 // Provide access to the single instance of this class. 240 static SdchManager* Global(); 241 242 // Record stats on various errors. 243 static void SdchErrorRecovery(ProblemCodes problem); 244 245 // Register a fetcher that this class can use to obtain dictionaries. 246 void set_sdch_fetcher(SdchFetcher* fetcher) { fetcher_.reset(fetcher); } 247 248 // If called with an empty string, advertise and support sdch on all domains. 249 // If called with a specific string, advertise and support only the specified 250 // domain. Function assumes the existence of a global SdchManager instance. 251 void EnableSdchSupport(const std::string& domain); 252 253 static bool sdch_enabled() { return global_ && global_->sdch_enabled_; } 254 255 // Briefly prevent further advertising of SDCH on this domain (if SDCH is 256 // enabled). After enough calls to IsInSupportedDomain() the blacklisting 257 // will be removed. Additional blacklists take exponentially more calls 258 // to IsInSupportedDomain() before the blacklisting is undone. 259 // Used when filter errors are found from a given domain, but it is plausible 260 // that the cause is temporary (such as application startup, where cached 261 // entries are used, but a dictionary is not yet loaded). 262 static void BlacklistDomain(const GURL& url); 263 264 // Used when SEVERE filter errors are found from a given domain, to prevent 265 // further use of SDCH on that domain. 266 static void BlacklistDomainForever(const GURL& url); 267 268 // Unit test only, this function resets enabling of sdch, and clears the 269 // blacklist. 270 static void ClearBlacklistings(); 271 272 // Unit test only, this function resets the blacklisting count for a domain. 273 static void ClearDomainBlacklisting(const std::string& domain); 274 275 // Unit test only: indicate how many more times a domain will be blacklisted. 276 static int BlackListDomainCount(const std::string& domain); 277 278 // Unit test only: Indicate what current blacklist increment is for a domain. 279 static int BlacklistDomainExponential(const std::string& domain); 280 281 // Check to see if SDCH is enabled (globally), and the given URL is in a 282 // supported domain (i.e., not blacklisted, and either the specific supported 283 // domain, or all domains were assumed supported). If it is blacklist, reduce 284 // by 1 the number of times it will be reported as blacklisted. 285 bool IsInSupportedDomain(const GURL& url); 286 287 // Schedule the URL fetching to load a dictionary. This will always return 288 // before the dictionary is actually loaded and added. 289 // After the implied task does completes, the dictionary will have been 290 // cached in memory. 291 void FetchDictionary(const GURL& request_url, const GURL& dictionary_url); 292 293 // Security test function used before initiating a FetchDictionary. 294 // Return true if fetch is legal. 295 bool CanFetchDictionary(const GURL& referring_url, 296 const GURL& dictionary_url) const; 297 298 // Add an SDCH dictionary to our list of availible dictionaries. This addition 299 // will fail (return false) if addition is illegal (data in the dictionary is 300 // not acceptable from the dictionary_url; dictionary already added, etc.). 301 bool AddSdchDictionary(const std::string& dictionary_text, 302 const GURL& dictionary_url); 303 304 // Find the vcdiff dictionary (the body of the sdch dictionary that appears 305 // after the meta-data headers like Domain:...) with the given |server_hash| 306 // to use to decompreses data that arrived as SDCH encoded content. Check to 307 // be sure the returned |dictionary| can be used for decoding content supplied 308 // in response to a request for |referring_url|. 309 // Caller is responsible for AddRef()ing the dictionary, and Release()ing it 310 // when done. 311 // Return null in |dictionary| if there is no matching legal dictionary. 312 void GetVcdiffDictionary(const std::string& server_hash, 313 const GURL& referring_url, 314 Dictionary** dictionary); 315 316 // Get list of available (pre-cached) dictionaries that we have already loaded 317 // into memory. The list is a comma separated list of (client) hashes per 318 // the SDCH spec. 319 void GetAvailDictionaryList(const GURL& target_url, std::string* list); 320 321 // Construct the pair of hashes for client and server to identify an SDCH 322 // dictionary. This is only made public to facilitate unit testing, but is 323 // otherwise private 324 static void GenerateHash(const std::string& dictionary_text, 325 std::string* client_hash, std::string* server_hash); 326 327 // For Latency testing only, we need to know if we've succeeded in doing a 328 // round trip before starting our comparative tests. If ever we encounter 329 // problems with SDCH, we opt-out of the test unless/until we perform a 330 // complete SDCH decoding. 331 bool AllowLatencyExperiment(const GURL& url) const; 332 333 void SetAllowLatencyExperiment(const GURL& url, bool enable); 334 335 private: 336 typedef std::map<std::string, int> DomainCounter; 337 typedef std::set<std::string> ExperimentSet; 338 339 // A map of dictionaries info indexed by the hash that the server provides. 340 typedef std::map<std::string, Dictionary*> DictionaryMap; 341 342 // The one global instance of that holds all the data. 343 static SdchManager* global_; 344 345 // A simple implementation of a RFC 3548 "URL safe" base64 encoder. 346 static void UrlSafeBase64Encode(const std::string& input, 347 std::string* output); 348 DictionaryMap dictionaries_; 349 350 // An instance that can fetch a dictionary given a URL. 351 scoped_ptr<SdchFetcher> fetcher_; 352 353 // Support SDCH compression, by advertising in headers. 354 bool sdch_enabled_; 355 356 // Empty string means all domains. Non-empty means support only the given 357 // domain is supported. 358 std::string supported_domain_; 359 360 // List domains where decode failures have required disabling sdch, along with 361 // count of how many additonal uses should be blacklisted. 362 DomainCounter blacklisted_domains_; 363 364 // Support exponential backoff in number of domain accesses before 365 // blacklisting expires. 366 DomainCounter exponential_blacklist_count; 367 368 // List of hostnames for which a latency experiment is allowed (because a 369 // round trip test has recently passed). 370 ExperimentSet allow_latency_experiment_; 371 372 DISALLOW_COPY_AND_ASSIGN(SdchManager); 373 }; 374 375 } // namespace net 376 377 #endif // NET_BASE_SDCH_MANAGER_H_ 378