1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Provides global database of differential decompression dictionaries for the 6 // SDCH filter (processes sdch enconded content). 7 8 // Exactly one instance of SdchManager is built, and all references are made 9 // into that collection. 10 // 11 // The SdchManager maintains a collection of memory resident dictionaries. It 12 // can find a dictionary (based on a server specification of a hash), store a 13 // dictionary, and make judgements about what URLs can use, set, etc. a 14 // dictionary. 15 16 // These dictionaries are acquired over the net, and include a header 17 // (containing metadata) as well as a VCDIFF dictionary (for use by a VCDIFF 18 // module) to decompress data. 19 20 #ifndef NET_BASE_SDCH_MANAGER_H_ 21 #define NET_BASE_SDCH_MANAGER_H_ 22 23 #include <map> 24 #include <set> 25 #include <string> 26 27 #include "base/gtest_prod_util.h" 28 #include "base/memory/ref_counted.h" 29 #include "base/memory/scoped_ptr.h" 30 #include "base/threading/non_thread_safe.h" 31 #include "base/time/time.h" 32 #include "net/base/net_export.h" 33 #include "url/gurl.h" 34 35 namespace net { 36 37 //------------------------------------------------------------------------------ 38 // Create a public interface to help us load SDCH dictionaries. 39 // The SdchManager class allows registration to support this interface. 40 // A browser may register a fetcher that is used by the dictionary managers to 41 // get data from a specified URL. This allows us to use very high level browser 42 // functionality in this base (when the functionality can be provided). 43 class NET_EXPORT SdchFetcher { 44 public: 45 class NET_EXPORT Delegate { 46 public: 47 virtual ~Delegate() {} 48 49 // Called whenever the SdchFetcher has successfully retrieved a 50 // dictionary. |dictionary_text| contains the body of the dictionary 51 // retrieved from |dictionary_url|. 52 virtual void AddSdchDictionary(const std::string& dictionary_text, 53 const GURL& dictionary_url) = 0; 54 }; 55 56 SdchFetcher() {} 57 virtual ~SdchFetcher() {} 58 59 // The Schedule() method is called when there is a need to get a dictionary 60 // from a server. The callee is responsible for getting that dictionary_text, 61 // and then calling back to AddSdchDictionary() in the Delegate instance. 62 virtual void Schedule(const GURL& dictionary_url) = 0; 63 64 // The Cancel() method is called to cancel all pending dictionary fetches. 65 // This is used for implementation of ClearData() below. 66 virtual void Cancel() = 0; 67 68 private: 69 DISALLOW_COPY_AND_ASSIGN(SdchFetcher); 70 }; 71 72 //------------------------------------------------------------------------------ 73 74 class NET_EXPORT SdchManager 75 : public SdchFetcher::Delegate, 76 public NON_EXPORTED_BASE(base::NonThreadSafe) { 77 public: 78 // A list of errors that appeared and were either resolved, or used to turn 79 // off sdch encoding. 80 enum ProblemCodes { 81 MIN_PROBLEM_CODE, 82 83 // Content-encoding correction problems. 84 ADDED_CONTENT_ENCODING = 1, 85 FIXED_CONTENT_ENCODING = 2, 86 FIXED_CONTENT_ENCODINGS = 3, 87 88 // Content decoding errors. 89 DECODE_HEADER_ERROR = 4, 90 DECODE_BODY_ERROR = 5, 91 92 // More content-encoding correction problems. 93 OPTIONAL_GUNZIP_ENCODING_ADDED = 6, 94 95 // Content encoding correction when we're not even tagged as HTML!?! 96 BINARY_ADDED_CONTENT_ENCODING = 7, 97 BINARY_FIXED_CONTENT_ENCODING = 8, 98 BINARY_FIXED_CONTENT_ENCODINGS = 9, 99 100 // Dictionary selection for use problems. 101 DICTIONARY_FOUND_HAS_WRONG_DOMAIN = 10, 102 DICTIONARY_FOUND_HAS_WRONG_PORT_LIST = 11, 103 DICTIONARY_FOUND_HAS_WRONG_PATH = 12, 104 DICTIONARY_FOUND_HAS_WRONG_SCHEME = 13, 105 DICTIONARY_HASH_NOT_FOUND = 14, 106 DICTIONARY_HASH_MALFORMED = 15, 107 108 // Dictionary saving problems. 109 DICTIONARY_HAS_NO_HEADER = 20, 110 DICTIONARY_HEADER_LINE_MISSING_COLON = 21, 111 DICTIONARY_MISSING_DOMAIN_SPECIFIER = 22, 112 DICTIONARY_SPECIFIES_TOP_LEVEL_DOMAIN = 23, 113 DICTIONARY_DOMAIN_NOT_MATCHING_SOURCE_URL = 24, 114 DICTIONARY_PORT_NOT_MATCHING_SOURCE_URL = 25, 115 DICTIONARY_HAS_NO_TEXT = 26, 116 DICTIONARY_REFERER_URL_HAS_DOT_IN_PREFIX = 27, 117 118 // Dictionary loading problems. 119 DICTIONARY_LOAD_ATTEMPT_FROM_DIFFERENT_HOST = 30, 120 DICTIONARY_SELECTED_FOR_SSL = 31, 121 DICTIONARY_ALREADY_LOADED = 32, 122 DICTIONARY_SELECTED_FROM_NON_HTTP = 33, 123 DICTIONARY_IS_TOO_LARGE= 34, 124 DICTIONARY_COUNT_EXCEEDED = 35, 125 DICTIONARY_ALREADY_SCHEDULED_TO_DOWNLOAD = 36, 126 DICTIONARY_ALREADY_TRIED_TO_DOWNLOAD = 37, 127 DICTIONARY_FETCH_READ_FAILED = 38, 128 129 // Failsafe hack. 130 ATTEMPT_TO_DECODE_NON_HTTP_DATA = 40, 131 132 133 // Content-Encoding problems detected, with no action taken. 134 MULTIENCODING_FOR_NON_SDCH_REQUEST = 50, 135 SDCH_CONTENT_ENCODE_FOR_NON_SDCH_REQUEST = 51, 136 137 // Dictionary manager issues. 138 DOMAIN_BLACKLIST_INCLUDES_TARGET = 61, 139 140 // Problematic decode recovery methods. 141 META_REFRESH_RECOVERY = 70, // Dictionary not found. 142 // defunct = 71, // Almost the same as META_REFRESH_UNSUPPORTED. 143 // defunct = 72, // Almost the same as CACHED_META_REFRESH_UNSUPPORTED. 144 // defunct = 73, // PASSING_THROUGH_NON_SDCH plus 145 // RESPONSE_TENTATIVE_SDCH in ../filter/sdch_filter.cc. 146 META_REFRESH_UNSUPPORTED = 74, // Unrecoverable error. 147 CACHED_META_REFRESH_UNSUPPORTED = 75, // As above, but pulled from cache. 148 PASSING_THROUGH_NON_SDCH = 76, // Tagged sdch but missing dictionary-hash. 149 INCOMPLETE_SDCH_CONTENT = 77, // Last window was not completely decoded. 150 PASS_THROUGH_404_CODE = 78, // URL not found message passing through. 151 152 // This next report is very common, and not really an error scenario, but 153 // it exercises the error recovery logic. 154 PASS_THROUGH_OLD_CACHED = 79, // Back button got pre-SDCH cached content. 155 156 // Common decoded recovery methods. 157 META_REFRESH_CACHED_RECOVERY = 80, // Probably startup tab loading. 158 // defunct = 81, // Now tracked by ResponseCorruptionDetectionCause histo. 159 160 // Non SDCH problems, only accounted for to make stat counting complete 161 // (i.e., be able to be sure all dictionary advertisements are accounted 162 // for). 163 164 UNFLUSHED_CONTENT = 90, // Possible error in filter chaining. 165 // defunct = 91, // MISSING_TIME_STATS (Should never happen.) 166 CACHE_DECODED = 92, // No timing stats recorded. 167 // defunct = 93, // OVER_10_MINUTES (No timing stats recorded.) 168 UNINITIALIZED = 94, // Filter never even got initialized. 169 PRIOR_TO_DICTIONARY = 95, // We hadn't even parsed a dictionary selector. 170 DECODE_ERROR = 96, // Something went wrong during decode. 171 172 // Problem during the latency test. 173 LATENCY_TEST_DISALLOWED = 100, // SDCH now failing, but it worked before! 174 175 MAX_PROBLEM_CODE // Used to bound histogram. 176 }; 177 178 // Use the following static limits to block DOS attacks until we implement 179 // a cached dictionary evicition strategy. 180 static const size_t kMaxDictionarySize; 181 static const size_t kMaxDictionaryCount; 182 183 // There is one instance of |Dictionary| for each memory-cached SDCH 184 // dictionary. 185 class NET_EXPORT_PRIVATE Dictionary : public base::RefCounted<Dictionary> { 186 public: 187 // Sdch filters can get our text to use in decoding compressed data. 188 const std::string& text() const { return text_; } 189 190 private: 191 friend class base::RefCounted<Dictionary>; 192 friend class SdchManager; // Only manager can construct an instance. 193 FRIEND_TEST_ALL_PREFIXES(SdchManagerTest, PathMatch); 194 195 // Construct a vc-diff usable dictionary from the dictionary_text starting 196 // at the given offset. The supplied client_hash should be used to 197 // advertise the dictionary's availability relative to the suppplied URL. 198 Dictionary(const std::string& dictionary_text, 199 size_t offset, 200 const std::string& client_hash, 201 const GURL& url, 202 const std::string& domain, 203 const std::string& path, 204 const base::Time& expiration, 205 const std::set<int>& ports); 206 virtual ~Dictionary(); 207 208 const GURL& url() const { return url_; } 209 const std::string& client_hash() const { return client_hash_; } 210 211 // Security method to check if we can advertise this dictionary for use 212 // if the |target_url| returns SDCH compressed data. 213 bool CanAdvertise(const GURL& target_url); 214 215 // Security methods to check if we can establish a new dictionary with the 216 // given data, that arrived in response to get of dictionary_url. 217 static bool CanSet(const std::string& domain, const std::string& path, 218 const std::set<int>& ports, const GURL& dictionary_url); 219 220 // Security method to check if we can use a dictionary to decompress a 221 // target that arrived with a reference to this dictionary. 222 bool CanUse(const GURL& referring_url); 223 224 // Compare paths to see if they "match" for dictionary use. 225 static bool PathMatch(const std::string& path, 226 const std::string& restriction); 227 228 // Compare domains to see if the "match" for dictionary use. 229 static bool DomainMatch(const GURL& url, const std::string& restriction); 230 231 232 // The actual text of the dictionary. 233 std::string text_; 234 235 // Part of the hash of text_ that the client uses to advertise the fact that 236 // it has a specific dictionary pre-cached. 237 std::string client_hash_; 238 239 // The GURL that arrived with the text_ in a URL request to specify where 240 // this dictionary may be used. 241 const GURL url_; 242 243 // Metadate "headers" in before dictionary text contained the following: 244 // Each dictionary payload consists of several headers, followed by the text 245 // of the dictionary. The following are the known headers. 246 const std::string domain_; 247 const std::string path_; 248 const base::Time expiration_; // Implied by max-age. 249 const std::set<int> ports_; 250 251 DISALLOW_COPY_AND_ASSIGN(Dictionary); 252 }; 253 254 SdchManager(); 255 virtual ~SdchManager(); 256 257 // Clear data (for browser data removal). 258 void ClearData(); 259 260 // Record stats on various errors. 261 static void SdchErrorRecovery(ProblemCodes problem); 262 263 // Register a fetcher that this class can use to obtain dictionaries. 264 void set_sdch_fetcher(scoped_ptr<SdchFetcher> fetcher); 265 266 // Enables or disables SDCH compression. 267 static void EnableSdchSupport(bool enabled); 268 269 static bool sdch_enabled() { return g_sdch_enabled_; } 270 271 // Enables or disables SDCH compression over secure connection. 272 static void EnableSecureSchemeSupport(bool enabled); 273 274 static bool secure_scheme_supported() { return g_secure_scheme_supported_; } 275 276 // Briefly prevent further advertising of SDCH on this domain (if SDCH is 277 // enabled). After enough calls to IsInSupportedDomain() the blacklisting 278 // will be removed. Additional blacklists take exponentially more calls 279 // to IsInSupportedDomain() before the blacklisting is undone. 280 // Used when filter errors are found from a given domain, but it is plausible 281 // that the cause is temporary (such as application startup, where cached 282 // entries are used, but a dictionary is not yet loaded). 283 void BlacklistDomain(const GURL& url, ProblemCodes blacklist_reason); 284 285 // Used when SEVERE filter errors are found from a given domain, to prevent 286 // further use of SDCH on that domain. 287 void BlacklistDomainForever(const GURL& url, ProblemCodes blacklist_reason); 288 289 // Unit test only, this function resets enabling of sdch, and clears the 290 // blacklist. 291 void ClearBlacklistings(); 292 293 // Unit test only, this function resets the blacklisting count for a domain. 294 void ClearDomainBlacklisting(const std::string& domain); 295 296 // Unit test only: indicate how many more times a domain will be blacklisted. 297 int BlackListDomainCount(const std::string& domain); 298 299 // Unit test only: Indicate what current blacklist increment is for a domain. 300 int BlacklistDomainExponential(const std::string& domain); 301 302 // Check to see if SDCH is enabled (globally), and the given URL is in a 303 // supported domain (i.e., not blacklisted, and either the specific supported 304 // domain, or all domains were assumed supported). If it is blacklist, reduce 305 // by 1 the number of times it will be reported as blacklisted. 306 bool IsInSupportedDomain(const GURL& url); 307 308 // Schedule the URL fetching to load a dictionary. This will always return 309 // before the dictionary is actually loaded and added. 310 // After the implied task does completes, the dictionary will have been 311 // cached in memory. 312 void FetchDictionary(const GURL& request_url, const GURL& dictionary_url); 313 314 // Security test function used before initiating a FetchDictionary. 315 // Return true if fetch is legal. 316 bool CanFetchDictionary(const GURL& referring_url, 317 const GURL& dictionary_url) const; 318 319 // Find the vcdiff dictionary (the body of the sdch dictionary that appears 320 // after the meta-data headers like Domain:...) with the given |server_hash| 321 // to use to decompreses data that arrived as SDCH encoded content. Check to 322 // be sure the returned |dictionary| can be used for decoding content supplied 323 // in response to a request for |referring_url|. 324 // Return null in |dictionary| if there is no matching legal dictionary. 325 void GetVcdiffDictionary(const std::string& server_hash, 326 const GURL& referring_url, 327 scoped_refptr<Dictionary>* dictionary); 328 329 // Get list of available (pre-cached) dictionaries that we have already loaded 330 // into memory. The list is a comma separated list of (client) hashes per 331 // the SDCH spec. 332 void GetAvailDictionaryList(const GURL& target_url, std::string* list); 333 334 // Construct the pair of hashes for client and server to identify an SDCH 335 // dictionary. This is only made public to facilitate unit testing, but is 336 // otherwise private 337 static void GenerateHash(const std::string& dictionary_text, 338 std::string* client_hash, std::string* server_hash); 339 340 // For Latency testing only, we need to know if we've succeeded in doing a 341 // round trip before starting our comparative tests. If ever we encounter 342 // problems with SDCH, we opt-out of the test unless/until we perform a 343 // complete SDCH decoding. 344 bool AllowLatencyExperiment(const GURL& url) const; 345 346 void SetAllowLatencyExperiment(const GURL& url, bool enable); 347 348 int GetFetchesCountForTesting() const { 349 return fetches_count_for_testing_; 350 } 351 352 // Implementation of SdchFetcher::Delegate. 353 354 // Add an SDCH dictionary to our list of availible 355 // dictionaries. This addition will fail if addition is illegal 356 // (data in the dictionary is not acceptable from the 357 // dictionary_url; dictionary already added, etc.). 358 virtual void AddSdchDictionary(const std::string& dictionary_text, 359 const GURL& dictionary_url) OVERRIDE; 360 361 private: 362 struct BlacklistInfo { 363 BlacklistInfo() 364 : count(0), 365 exponential_count(0), 366 reason(MIN_PROBLEM_CODE) {} 367 368 int count; // # of times to refuse SDCH advertisement. 369 int exponential_count; // Current exponential backoff ratchet. 370 ProblemCodes reason; // Why domain was blacklisted. 371 372 }; 373 typedef std::map<std::string, BlacklistInfo> DomainBlacklistInfo; 374 typedef std::set<std::string> ExperimentSet; 375 376 // A map of dictionaries info indexed by the hash that the server provides. 377 typedef std::map<std::string, scoped_refptr<Dictionary> > DictionaryMap; 378 379 // Support SDCH compression, by advertising in headers. 380 static bool g_sdch_enabled_; 381 382 // Support SDCH compression for HTTPS requests and responses. When supported, 383 // HTTPS applicable dictionaries MUST have been acquired securely via HTTPS. 384 static bool g_secure_scheme_supported_; 385 386 // A simple implementation of a RFC 3548 "URL safe" base64 encoder. 387 static void UrlSafeBase64Encode(const std::string& input, 388 std::string* output); 389 DictionaryMap dictionaries_; 390 391 // An instance that can fetch a dictionary given a URL. 392 scoped_ptr<SdchFetcher> fetcher_; 393 394 // List domains where decode failures have required disabling sdch. 395 DomainBlacklistInfo blacklisted_domains_; 396 397 // List of hostnames for which a latency experiment is allowed (because a 398 // round trip test has recently passed). 399 ExperimentSet allow_latency_experiment_; 400 401 int fetches_count_for_testing_; 402 403 DISALLOW_COPY_AND_ASSIGN(SdchManager); 404 }; 405 406 } // namespace net 407 408 #endif // NET_BASE_SDCH_MANAGER_H_ 409