1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "base/base64.h" 6 #include "base/field_trial.h" 7 #include "base/histogram.h" 8 #include "base/logging.h" 9 #include "base/sha2.h" 10 #include "base/string_util.h" 11 #include "net/base/registry_controlled_domain.h" 12 #include "net/base/sdch_manager.h" 13 #include "net/url_request/url_request_http_job.h" 14 15 using base::Time; 16 using base::TimeDelta; 17 18 //------------------------------------------------------------------------------ 19 // static 20 const size_t SdchManager::kMaxDictionarySize = 1000000; 21 22 // static 23 const size_t SdchManager::kMaxDictionaryCount = 20; 24 25 // static 26 SdchManager* SdchManager::global_; 27 28 // static 29 SdchManager* SdchManager::Global() { 30 return global_; 31 } 32 33 // static 34 void SdchManager::SdchErrorRecovery(ProblemCodes problem) { 35 UMA_HISTOGRAM_ENUMERATION("Sdch3.ProblemCodes_4", problem, MAX_PROBLEM_CODE); 36 } 37 38 // static 39 void SdchManager::ClearBlacklistings() { 40 Global()->blacklisted_domains_.clear(); 41 Global()->exponential_blacklist_count.clear(); 42 } 43 44 // static 45 void SdchManager::ClearDomainBlacklisting(const std::string& domain) { 46 Global()->blacklisted_domains_.erase(StringToLowerASCII(domain)); 47 } 48 49 // static 50 int SdchManager::BlackListDomainCount(const std::string& domain) { 51 if (Global()->blacklisted_domains_.end() == 52 Global()->blacklisted_domains_.find(domain)) 53 return 0; 54 return Global()->blacklisted_domains_[StringToLowerASCII(domain)]; 55 } 56 57 // static 58 int SdchManager::BlacklistDomainExponential(const std::string& domain) { 59 if (Global()->exponential_blacklist_count.end() == 60 Global()->exponential_blacklist_count.find(domain)) 61 return 0; 62 return Global()->exponential_blacklist_count[StringToLowerASCII(domain)]; 63 } 64 65 //------------------------------------------------------------------------------ 66 SdchManager::SdchManager() : sdch_enabled_(false) { 67 DCHECK(!global_); 68 global_ = this; 69 } 70 71 SdchManager::~SdchManager() { 72 DCHECK(global_ == this); 73 while (!dictionaries_.empty()) { 74 DictionaryMap::iterator it = dictionaries_.begin(); 75 it->second->Release(); 76 dictionaries_.erase(it->first); 77 } 78 global_ = NULL; 79 } 80 81 // static 82 void SdchManager::Shutdown() { 83 if (!global_ ) 84 return; 85 global_->fetcher_.reset(NULL); 86 } 87 88 // static 89 void SdchManager::BlacklistDomain(const GURL& url) { 90 if (!global_ ) 91 return; 92 global_->SetAllowLatencyExperiment(url, false); 93 94 std::string domain(StringToLowerASCII(url.host())); 95 int count = global_->blacklisted_domains_[domain]; 96 if (count > 0) 97 return; // Domain is already blacklisted. 98 99 count = 1 + 2 * global_->exponential_blacklist_count[domain]; 100 if (count > 0) 101 global_->exponential_blacklist_count[domain] = count; 102 else 103 count = INT_MAX; 104 105 global_->blacklisted_domains_[domain] = count; 106 } 107 108 // static 109 void SdchManager::BlacklistDomainForever(const GURL& url) { 110 if (!global_ ) 111 return; 112 global_->SetAllowLatencyExperiment(url, false); 113 114 std::string domain(StringToLowerASCII(url.host())); 115 global_->exponential_blacklist_count[domain] = INT_MAX; 116 global_->blacklisted_domains_[domain] = INT_MAX; 117 } 118 119 void SdchManager::EnableSdchSupport(const std::string& domain) { 120 // We presume that there is a SDCH manager instance. 121 global_->supported_domain_ = domain; 122 global_->sdch_enabled_ = true; 123 } 124 125 const bool SdchManager::IsInSupportedDomain(const GURL& url) { 126 if (!sdch_enabled_ ) 127 return false; 128 if (!supported_domain_.empty() && 129 !url.DomainIs(supported_domain_.data(), supported_domain_.size())) 130 return false; // It is not the singular supported domain. 131 132 if (blacklisted_domains_.empty()) 133 return true; 134 135 std::string domain(StringToLowerASCII(url.host())); 136 DomainCounter::iterator it = blacklisted_domains_.find(domain); 137 if (blacklisted_domains_.end() == it) 138 return true; 139 140 int count = it->second - 1; 141 if (count > 0) 142 blacklisted_domains_[domain] = count; 143 else 144 blacklisted_domains_.erase(domain); 145 SdchErrorRecovery(DOMAIN_BLACKLIST_INCLUDES_TARGET); 146 return false; 147 } 148 149 bool SdchManager::CanFetchDictionary(const GURL& referring_url, 150 const GURL& dictionary_url) const { 151 /* The user agent may retrieve a dictionary from the dictionary URL if all of 152 the following are true: 153 1 The dictionary URL host name matches the referrer URL host name 154 2 The dictionary URL host name domain matches the parent domain of the 155 referrer URL host name 156 3 The parent domain of the referrer URL host name is not a top level 157 domain 158 4 The dictionary URL is not an HTTPS URL. 159 */ 160 // Item (1) above implies item (2). Spec should be updated. 161 // I take "host name match" to be "is identical to" 162 if (referring_url.host() != dictionary_url.host()) { 163 SdchErrorRecovery(DICTIONARY_LOAD_ATTEMPT_FROM_DIFFERENT_HOST); 164 return false; 165 } 166 if (referring_url.SchemeIs("https")) { 167 SdchErrorRecovery(DICTIONARY_SELECTED_FOR_SSL); 168 return false; 169 } 170 171 // TODO(jar): Remove this failsafe conservative hack which is more restrictive 172 // than current SDCH spec when needed, and justified by security audit. 173 if (!referring_url.SchemeIs("http")) { 174 SdchErrorRecovery(DICTIONARY_SELECTED_FROM_NON_HTTP); 175 return false; 176 } 177 178 return true; 179 } 180 181 void SdchManager::FetchDictionary(const GURL& request_url, 182 const GURL& dictionary_url) { 183 if (SdchManager::Global()->CanFetchDictionary(request_url, dictionary_url) && 184 fetcher_.get()) 185 fetcher_->Schedule(dictionary_url); 186 } 187 188 bool SdchManager::AddSdchDictionary(const std::string& dictionary_text, 189 const GURL& dictionary_url) { 190 std::string client_hash; 191 std::string server_hash; 192 GenerateHash(dictionary_text, &client_hash, &server_hash); 193 if (dictionaries_.find(server_hash) != dictionaries_.end()) { 194 SdchErrorRecovery(DICTIONARY_ALREADY_LOADED); 195 return false; // Already loaded. 196 } 197 198 std::string domain, path; 199 std::set<int> ports; 200 Time expiration(Time::Now() + TimeDelta::FromDays(30)); 201 202 if (dictionary_text.empty()) { 203 SdchErrorRecovery(DICTIONARY_HAS_NO_TEXT); 204 return false; // Missing header. 205 } 206 207 size_t header_end = dictionary_text.find("\n\n"); 208 if (std::string::npos == header_end) { 209 SdchErrorRecovery(DICTIONARY_HAS_NO_HEADER); 210 return false; // Missing header. 211 } 212 size_t line_start = 0; // Start of line being parsed. 213 while (1) { 214 size_t line_end = dictionary_text.find('\n', line_start); 215 DCHECK(std::string::npos != line_end); 216 DCHECK(line_end <= header_end); 217 218 size_t colon_index = dictionary_text.find(':', line_start); 219 if (std::string::npos == colon_index) { 220 SdchErrorRecovery(DICTIONARY_HEADER_LINE_MISSING_COLON); 221 return false; // Illegal line missing a colon. 222 } 223 224 if (colon_index > line_end) 225 break; 226 227 size_t value_start = dictionary_text.find_first_not_of(" \t", 228 colon_index + 1); 229 if (std::string::npos != value_start) { 230 if (value_start >= line_end) 231 break; 232 std::string name(dictionary_text, line_start, colon_index - line_start); 233 std::string value(dictionary_text, value_start, line_end - value_start); 234 name = StringToLowerASCII(name); 235 if (name == "domain") { 236 domain = value; 237 } else if (name == "path") { 238 path = value; 239 } else if (name == "format-version") { 240 if (value != "1.0") 241 return false; 242 } else if (name == "max-age") { 243 expiration = Time::Now() + TimeDelta::FromSeconds(StringToInt64(value)); 244 } else if (name == "port") { 245 int port = StringToInt(value); 246 if (port >= 0) 247 ports.insert(port); 248 } 249 } 250 251 if (line_end >= header_end) 252 break; 253 line_start = line_end + 1; 254 } 255 256 if (!Dictionary::CanSet(domain, path, ports, dictionary_url)) 257 return false; 258 259 // TODO(jar): Remove these hacks to preclude a DOS attack involving piles of 260 // useless dictionaries. We should probably have a cache eviction plan, 261 // instead of just blocking additions. For now, with the spec in flux, it 262 // is probably not worth doing eviction handling. 263 if (kMaxDictionarySize < dictionary_text.size()) { 264 SdchErrorRecovery(DICTIONARY_IS_TOO_LARGE); 265 return false; 266 } 267 if (kMaxDictionaryCount <= dictionaries_.size()) { 268 SdchErrorRecovery(DICTIONARY_COUNT_EXCEEDED); 269 return false; 270 } 271 272 UMA_HISTOGRAM_COUNTS("Sdch3.Dictionary size loaded", dictionary_text.size()); 273 DLOG(INFO) << "Loaded dictionary with client hash " << client_hash << 274 " and server hash " << server_hash; 275 Dictionary* dictionary = 276 new Dictionary(dictionary_text, header_end + 2, client_hash, 277 dictionary_url, domain, path, expiration, ports); 278 dictionary->AddRef(); 279 dictionaries_[server_hash] = dictionary; 280 return true; 281 } 282 283 void SdchManager::GetVcdiffDictionary(const std::string& server_hash, 284 const GURL& referring_url, Dictionary** dictionary) { 285 *dictionary = NULL; 286 DictionaryMap::iterator it = dictionaries_.find(server_hash); 287 if (it == dictionaries_.end()) { 288 return; 289 } 290 Dictionary* matching_dictionary = it->second; 291 if (!matching_dictionary->CanUse(referring_url)) 292 return; 293 *dictionary = matching_dictionary; 294 } 295 296 // TODO(jar): If we have evictions from the dictionaries_, then we need to 297 // change this interface to return a list of reference counted Dictionary 298 // instances that can be used if/when a server specifies one. 299 void SdchManager::GetAvailDictionaryList(const GURL& target_url, 300 std::string* list) { 301 int count = 0; 302 for (DictionaryMap::iterator it = dictionaries_.begin(); 303 it != dictionaries_.end(); ++it) { 304 if (!it->second->CanAdvertise(target_url)) 305 continue; 306 ++count; 307 if (!list->empty()) 308 list->append(","); 309 list->append(it->second->client_hash()); 310 } 311 // Watch to see if we have corrupt or numerous dictionaries. 312 if (count > 0) 313 UMA_HISTOGRAM_COUNTS("Sdch3.Advertisement_Count", count); 314 } 315 316 SdchManager::Dictionary::Dictionary(const std::string& dictionary_text, 317 size_t offset, const std::string& client_hash, const GURL& gurl, 318 const std::string& domain, const std::string& path, const Time& expiration, 319 const std::set<int> ports) 320 : text_(dictionary_text, offset), 321 client_hash_(client_hash), 322 url_(gurl), 323 domain_(domain), 324 path_(path), 325 expiration_(expiration), 326 ports_(ports) { 327 } 328 329 // static 330 void SdchManager::GenerateHash(const std::string& dictionary_text, 331 std::string* client_hash, std::string* server_hash) { 332 char binary_hash[32]; 333 base::SHA256HashString(dictionary_text, binary_hash, sizeof(binary_hash)); 334 335 std::string first_48_bits(&binary_hash[0], 6); 336 std::string second_48_bits(&binary_hash[6], 6); 337 UrlSafeBase64Encode(first_48_bits, client_hash); 338 UrlSafeBase64Encode(second_48_bits, server_hash); 339 340 DCHECK_EQ(server_hash->length(), 8u); 341 DCHECK_EQ(client_hash->length(), 8u); 342 } 343 344 // static 345 void SdchManager::UrlSafeBase64Encode(const std::string& input, 346 std::string* output) { 347 // Since this is only done during a dictionary load, and hashes are only 8 348 // characters, we just do the simple fixup, rather than rewriting the encoder. 349 base::Base64Encode(input, output); 350 for (size_t i = 0; i < output->size(); ++i) { 351 switch (output->data()[i]) { 352 case '+': 353 (*output)[i] = '-'; 354 continue; 355 case '/': 356 (*output)[i] = '_'; 357 continue; 358 default: 359 continue; 360 } 361 } 362 } 363 364 //------------------------------------------------------------------------------ 365 // Security functions restricting loads and use of dictionaries. 366 367 // static 368 bool SdchManager::Dictionary::CanSet(const std::string& domain, 369 const std::string& path, 370 const std::set<int> ports, 371 const GURL& dictionary_url) { 372 if (!SdchManager::Global()->IsInSupportedDomain(dictionary_url)) 373 return false; 374 /* 375 A dictionary is invalid and must not be stored if any of the following are 376 true: 377 1. The dictionary has no Domain attribute. 378 2. The effective host name that derives from the referer URL host name does 379 not domain-match the Domain attribute. 380 3. The Domain attribute is a top level domain. 381 4. The referer URL host is a host domain name (not IP address) and has the 382 form HD, where D is the value of the Domain attribute, and H is a string 383 that contains one or more dots. 384 5. If the dictionary has a Port attribute and the referer URL's port was not 385 in the list. 386 */ 387 388 // TODO(jar): Redirects in dictionary fetches might plausibly be problematic, 389 // and hence the conservative approach is to not allow any redirects (if there 390 // were any... then don't allow the dictionary to be set). 391 392 if (domain.empty()) { 393 SdchErrorRecovery(DICTIONARY_MISSING_DOMAIN_SPECIFIER); 394 return false; // Domain is required. 395 } 396 if (net::RegistryControlledDomainService::GetDomainAndRegistry(domain).size() 397 == 0) { 398 SdchErrorRecovery(DICTIONARY_SPECIFIES_TOP_LEVEL_DOMAIN); 399 return false; // domain was a TLD. 400 } 401 if (!Dictionary::DomainMatch(dictionary_url, domain)) { 402 SdchErrorRecovery(DICTIONARY_DOMAIN_NOT_MATCHING_SOURCE_URL); 403 return false; 404 } 405 406 std::string referrer_url_host = dictionary_url.host(); 407 size_t postfix_domain_index = referrer_url_host.rfind(domain); 408 // See if it is indeed a postfix, or just an internal string. 409 if (referrer_url_host.size() == postfix_domain_index + domain.size()) { 410 // It is a postfix... so check to see if there's a dot in the prefix. 411 size_t end_of_host_index = referrer_url_host.find_first_of('.'); 412 if (referrer_url_host.npos != end_of_host_index && 413 end_of_host_index < postfix_domain_index) { 414 SdchErrorRecovery(DICTIONARY_REFERER_URL_HAS_DOT_IN_PREFIX); 415 return false; 416 } 417 } 418 419 if (!ports.empty() 420 && 0 == ports.count(dictionary_url.EffectiveIntPort())) { 421 SdchErrorRecovery(DICTIONARY_PORT_NOT_MATCHING_SOURCE_URL); 422 return false; 423 } 424 return true; 425 } 426 427 // static 428 bool SdchManager::Dictionary::CanUse(const GURL& referring_url) { 429 if (!SdchManager::Global()->IsInSupportedDomain(referring_url)) 430 return false; 431 /* 432 1. The request URL's host name domain-matches the Domain attribute of the 433 dictionary. 434 2. If the dictionary has a Port attribute, the request port is one of the 435 ports listed in the Port attribute. 436 3. The request URL path-matches the path attribute of the dictionary. 437 4. The request is not an HTTPS request. 438 */ 439 if (!DomainMatch(referring_url, domain_)) { 440 SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_DOMAIN); 441 return false; 442 } 443 if (!ports_.empty() 444 && 0 == ports_.count(referring_url.EffectiveIntPort())) { 445 SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_PORT_LIST); 446 return false; 447 } 448 if (path_.size() && !PathMatch(referring_url.path(), path_)) { 449 SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_PATH); 450 return false; 451 } 452 if (referring_url.SchemeIsSecure()) { 453 SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_SCHEME); 454 return false; 455 } 456 457 // TODO(jar): Remove overly restrictive failsafe test (added per security 458 // review) when we have a need to be more general. 459 if (!referring_url.SchemeIs("http")) { 460 SdchErrorRecovery(ATTEMPT_TO_DECODE_NON_HTTP_DATA); 461 return false; 462 } 463 464 return true; 465 } 466 467 bool SdchManager::Dictionary::CanAdvertise(const GURL& target_url) { 468 if (!SdchManager::Global()->IsInSupportedDomain(target_url)) 469 return false; 470 /* The specific rules of when a dictionary should be advertised in an 471 Avail-Dictionary header are modeled after the rules for cookie scoping. The 472 terms "domain-match" and "pathmatch" are defined in RFC 2965 [6]. A 473 dictionary may be advertised in the Avail-Dictionaries header exactly when 474 all of the following are true: 475 1. The server's effective host name domain-matches the Domain attribute of 476 the dictionary. 477 2. If the dictionary has a Port attribute, the request port is one of the 478 ports listed in the Port attribute. 479 3. The request URI path-matches the path header of the dictionary. 480 4. The request is not an HTTPS request. 481 */ 482 if (!DomainMatch(target_url, domain_)) 483 return false; 484 if (!ports_.empty() && 0 == ports_.count(target_url.EffectiveIntPort())) 485 return false; 486 if (path_.size() && !PathMatch(target_url.path(), path_)) 487 return false; 488 if (target_url.SchemeIsSecure()) 489 return false; 490 if (Time::Now() > expiration_) 491 return false; 492 return true; 493 } 494 495 bool SdchManager::Dictionary::PathMatch(const std::string& path, 496 const std::string& restriction) { 497 /* Must be either: 498 1. P2 is equal to P1 499 2. P2 is a prefix of P1 and either the final character in P2 is "/" or the 500 character following P2 in P1 is "/". 501 */ 502 if (path == restriction) 503 return true; 504 size_t prefix_length = restriction.size(); 505 if (prefix_length > path.size()) 506 return false; // Can't be a prefix. 507 if (0 != path.compare(0, prefix_length, restriction)) 508 return false; 509 return restriction[prefix_length - 1] == '/' || path[prefix_length] == '/'; 510 } 511 512 // static 513 bool SdchManager::Dictionary::DomainMatch(const GURL& gurl, 514 const std::string& restriction) { 515 // TODO(jar): This is not precisely a domain match definition. 516 return gurl.DomainIs(restriction.data(), restriction.size()); 517 } 518 519 //------------------------------------------------------------------------------ 520 // Methods for supporting latency experiments. 521 522 bool SdchManager::AllowLatencyExperiment(const GURL& url) const { 523 return allow_latency_experiment_.end() != 524 allow_latency_experiment_.find(url.host()); 525 } 526 527 void SdchManager::SetAllowLatencyExperiment(const GURL& url, bool enable) { 528 if (enable) { 529 allow_latency_experiment_.insert(url.host()); 530 return; 531 } 532 ExperimentSet::iterator it = allow_latency_experiment_.find(url.host()); 533 if (allow_latency_experiment_.end() == it) 534 return; // It was already erased, or never allowed. 535 SdchErrorRecovery(LATENCY_TEST_DISALLOWED); 536 allow_latency_experiment_.erase(it); 537 } 538