1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "net/base/sdch_manager.h" 6 7 #include "base/base64.h" 8 #include "base/logging.h" 9 #include "base/metrics/histogram.h" 10 #include "base/strings/string_number_conversions.h" 11 #include "base/strings/string_util.h" 12 #include "crypto/sha2.h" 13 #include "net/base/registry_controlled_domains/registry_controlled_domain.h" 14 #include "net/url_request/url_request_http_job.h" 15 16 namespace { 17 18 void StripTrailingDot(GURL* gurl) { 19 std::string host(gurl->host()); 20 21 if (host.empty()) 22 return; 23 24 if (*host.rbegin() != '.') 25 return; 26 27 host.resize(host.size() - 1); 28 29 GURL::Replacements replacements; 30 replacements.SetHostStr(host); 31 *gurl = gurl->ReplaceComponents(replacements); 32 return; 33 } 34 35 } // namespace 36 37 namespace net { 38 39 //------------------------------------------------------------------------------ 40 // static 41 42 // Adjust SDCH limits downwards for mobile. 43 #if defined(OS_ANDROID) || defined(OS_IOS) 44 // static 45 const size_t SdchManager::kMaxDictionaryCount = 1; 46 const size_t SdchManager::kMaxDictionarySize = 500 * 1000; 47 #else 48 // static 49 const size_t SdchManager::kMaxDictionaryCount = 20; 50 const size_t SdchManager::kMaxDictionarySize = 1000 * 1000; 51 #endif 52 53 // static 54 #if defined(OS_IOS) 55 // Workaround for http://crbug.com/418975; remove when fixed. 56 bool SdchManager::g_sdch_enabled_ = false; 57 #else 58 bool SdchManager::g_sdch_enabled_ = true; 59 #endif 60 61 // static 62 bool SdchManager::g_secure_scheme_supported_ = true; 63 64 //------------------------------------------------------------------------------ 65 SdchManager::Dictionary::Dictionary(const std::string& dictionary_text, 66 size_t offset, 67 const std::string& client_hash, 68 const GURL& gurl, 69 const std::string& domain, 70 const std::string& path, 71 const base::Time& expiration, 72 const std::set<int>& ports) 73 : text_(dictionary_text, offset), 74 client_hash_(client_hash), 75 url_(gurl), 76 domain_(domain), 77 path_(path), 78 expiration_(expiration), 79 ports_(ports) { 80 } 81 82 SdchManager::Dictionary::~Dictionary() { 83 } 84 85 bool SdchManager::Dictionary::CanAdvertise(const GURL& target_url) { 86 /* The specific rules of when a dictionary should be advertised in an 87 Avail-Dictionary header are modeled after the rules for cookie scoping. The 88 terms "domain-match" and "pathmatch" are defined in RFC 2965 [6]. A 89 dictionary may be advertised in the Avail-Dictionaries header exactly when 90 all of the following are true: 91 1. The server's effective host name domain-matches the Domain attribute of 92 the dictionary. 93 2. If the dictionary has a Port attribute, the request port is one of the 94 ports listed in the Port attribute. 95 3. The request URI path-matches the path header of the dictionary. 96 4. The request is not an HTTPS request. 97 We can override (ignore) item (4) only when we have explicitly enabled 98 HTTPS support AND the dictionary acquisition scheme matches the target 99 url scheme. 100 */ 101 if (!DomainMatch(target_url, domain_)) 102 return false; 103 if (!ports_.empty() && 0 == ports_.count(target_url.EffectiveIntPort())) 104 return false; 105 if (path_.size() && !PathMatch(target_url.path(), path_)) 106 return false; 107 if (!SdchManager::secure_scheme_supported() && target_url.SchemeIsSecure()) 108 return false; 109 if (target_url.SchemeIsSecure() != url_.SchemeIsSecure()) 110 return false; 111 if (base::Time::Now() > expiration_) 112 return false; 113 return true; 114 } 115 116 //------------------------------------------------------------------------------ 117 // Security functions restricting loads and use of dictionaries. 118 119 // static 120 bool SdchManager::Dictionary::CanSet(const std::string& domain, 121 const std::string& path, 122 const std::set<int>& ports, 123 const GURL& dictionary_url) { 124 /* 125 A dictionary is invalid and must not be stored if any of the following are 126 true: 127 1. The dictionary has no Domain attribute. 128 2. The effective host name that derives from the referer URL host name does 129 not domain-match the Domain attribute. 130 3. The Domain attribute is a top level domain. 131 4. The referer URL host is a host domain name (not IP address) and has the 132 form HD, where D is the value of the Domain attribute, and H is a string 133 that contains one or more dots. 134 5. If the dictionary has a Port attribute and the referer URL's port was not 135 in the list. 136 */ 137 138 // TODO(jar): Redirects in dictionary fetches might plausibly be problematic, 139 // and hence the conservative approach is to not allow any redirects (if there 140 // were any... then don't allow the dictionary to be set). 141 142 if (domain.empty()) { 143 SdchErrorRecovery(DICTIONARY_MISSING_DOMAIN_SPECIFIER); 144 return false; // Domain is required. 145 } 146 if (registry_controlled_domains::GetDomainAndRegistry( 147 domain, 148 registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES).empty()) { 149 SdchErrorRecovery(DICTIONARY_SPECIFIES_TOP_LEVEL_DOMAIN); 150 return false; // domain was a TLD. 151 } 152 if (!Dictionary::DomainMatch(dictionary_url, domain)) { 153 SdchErrorRecovery(DICTIONARY_DOMAIN_NOT_MATCHING_SOURCE_URL); 154 return false; 155 } 156 157 std::string referrer_url_host = dictionary_url.host(); 158 size_t postfix_domain_index = referrer_url_host.rfind(domain); 159 // See if it is indeed a postfix, or just an internal string. 160 if (referrer_url_host.size() == postfix_domain_index + domain.size()) { 161 // It is a postfix... so check to see if there's a dot in the prefix. 162 size_t end_of_host_index = referrer_url_host.find_first_of('.'); 163 if (referrer_url_host.npos != end_of_host_index && 164 end_of_host_index < postfix_domain_index) { 165 SdchErrorRecovery(DICTIONARY_REFERER_URL_HAS_DOT_IN_PREFIX); 166 return false; 167 } 168 } 169 170 if (!ports.empty() 171 && 0 == ports.count(dictionary_url.EffectiveIntPort())) { 172 SdchErrorRecovery(DICTIONARY_PORT_NOT_MATCHING_SOURCE_URL); 173 return false; 174 } 175 return true; 176 } 177 178 // static 179 bool SdchManager::Dictionary::CanUse(const GURL& referring_url) { 180 /* 181 1. The request URL's host name domain-matches the Domain attribute of the 182 dictionary. 183 2. If the dictionary has a Port attribute, the request port is one of the 184 ports listed in the Port attribute. 185 3. The request URL path-matches the path attribute of the dictionary. 186 4. The request is not an HTTPS request. 187 We can override (ignore) item (4) only when we have explicitly enabled 188 HTTPS support AND the dictionary acquisition scheme matches the target 189 url scheme. 190 */ 191 if (!DomainMatch(referring_url, domain_)) { 192 SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_DOMAIN); 193 return false; 194 } 195 if (!ports_.empty() 196 && 0 == ports_.count(referring_url.EffectiveIntPort())) { 197 SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_PORT_LIST); 198 return false; 199 } 200 if (path_.size() && !PathMatch(referring_url.path(), path_)) { 201 SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_PATH); 202 return false; 203 } 204 if (!SdchManager::secure_scheme_supported() && 205 referring_url.SchemeIsSecure()) { 206 SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_SCHEME); 207 return false; 208 } 209 if (referring_url.SchemeIsSecure() != url_.SchemeIsSecure()) { 210 SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_SCHEME); 211 return false; 212 } 213 214 // TODO(jar): Remove overly restrictive failsafe test (added per security 215 // review) when we have a need to be more general. 216 if (!referring_url.SchemeIsHTTPOrHTTPS()) { 217 SdchErrorRecovery(ATTEMPT_TO_DECODE_NON_HTTP_DATA); 218 return false; 219 } 220 221 return true; 222 } 223 224 bool SdchManager::Dictionary::PathMatch(const std::string& path, 225 const std::string& restriction) { 226 /* Must be either: 227 1. P2 is equal to P1 228 2. P2 is a prefix of P1 and either the final character in P2 is "/" or the 229 character following P2 in P1 is "/". 230 */ 231 if (path == restriction) 232 return true; 233 size_t prefix_length = restriction.size(); 234 if (prefix_length > path.size()) 235 return false; // Can't be a prefix. 236 if (0 != path.compare(0, prefix_length, restriction)) 237 return false; 238 return restriction[prefix_length - 1] == '/' || path[prefix_length] == '/'; 239 } 240 241 // static 242 bool SdchManager::Dictionary::DomainMatch(const GURL& gurl, 243 const std::string& restriction) { 244 // TODO(jar): This is not precisely a domain match definition. 245 return gurl.DomainIs(restriction.data(), restriction.size()); 246 } 247 248 //------------------------------------------------------------------------------ 249 SdchManager::SdchManager() 250 : fetches_count_for_testing_(0) { 251 DCHECK(CalledOnValidThread()); 252 } 253 254 SdchManager::~SdchManager() { 255 DCHECK(CalledOnValidThread()); 256 while (!dictionaries_.empty()) { 257 DictionaryMap::iterator it = dictionaries_.begin(); 258 dictionaries_.erase(it->first); 259 } 260 } 261 262 void SdchManager::ClearData() { 263 blacklisted_domains_.clear(); 264 allow_latency_experiment_.clear(); 265 if (fetcher_.get()) 266 fetcher_->Cancel(); 267 268 // Note that this may result in not having dictionaries we've advertised 269 // for incoming responses. The window is relatively small (as ClearData() 270 // is not expected to be called frequently), so we rely on meta-refresh 271 // to handle this case. 272 dictionaries_.clear(); 273 } 274 275 // static 276 void SdchManager::SdchErrorRecovery(ProblemCodes problem) { 277 UMA_HISTOGRAM_ENUMERATION("Sdch3.ProblemCodes_4", problem, MAX_PROBLEM_CODE); 278 } 279 280 void SdchManager::set_sdch_fetcher(scoped_ptr<SdchFetcher> fetcher) { 281 DCHECK(CalledOnValidThread()); 282 fetcher_ = fetcher.Pass(); 283 } 284 285 // static 286 void SdchManager::EnableSdchSupport(bool enabled) { 287 g_sdch_enabled_ = enabled; 288 } 289 290 // static 291 void SdchManager::EnableSecureSchemeSupport(bool enabled) { 292 g_secure_scheme_supported_ = enabled; 293 } 294 295 void SdchManager::BlacklistDomain(const GURL& url, 296 ProblemCodes blacklist_reason) { 297 SetAllowLatencyExperiment(url, false); 298 299 BlacklistInfo* blacklist_info = 300 &blacklisted_domains_[base::StringToLowerASCII(url.host())]; 301 302 if (blacklist_info->count > 0) 303 return; // Domain is already blacklisted. 304 305 if (blacklist_info->exponential_count > (INT_MAX - 1) / 2) { 306 blacklist_info->exponential_count = INT_MAX; 307 } else { 308 blacklist_info->exponential_count = 309 blacklist_info->exponential_count * 2 + 1; 310 } 311 312 blacklist_info->count = blacklist_info->exponential_count; 313 blacklist_info->reason = blacklist_reason; 314 } 315 316 void SdchManager::BlacklistDomainForever(const GURL& url, 317 ProblemCodes blacklist_reason) { 318 SetAllowLatencyExperiment(url, false); 319 320 BlacklistInfo* blacklist_info = 321 &blacklisted_domains_[base::StringToLowerASCII(url.host())]; 322 blacklist_info->count = INT_MAX; 323 blacklist_info->exponential_count = INT_MAX; 324 blacklist_info->reason = blacklist_reason; 325 } 326 327 void SdchManager::ClearBlacklistings() { 328 blacklisted_domains_.clear(); 329 } 330 331 void SdchManager::ClearDomainBlacklisting(const std::string& domain) { 332 BlacklistInfo* blacklist_info = &blacklisted_domains_[ 333 base::StringToLowerASCII(domain)]; 334 blacklist_info->count = 0; 335 blacklist_info->reason = MIN_PROBLEM_CODE; 336 } 337 338 int SdchManager::BlackListDomainCount(const std::string& domain) { 339 std::string domain_lower(base::StringToLowerASCII(domain)); 340 341 if (blacklisted_domains_.end() == blacklisted_domains_.find(domain_lower)) 342 return 0; 343 return blacklisted_domains_[domain_lower].count; 344 } 345 346 int SdchManager::BlacklistDomainExponential(const std::string& domain) { 347 std::string domain_lower(base::StringToLowerASCII(domain)); 348 349 if (blacklisted_domains_.end() == blacklisted_domains_.find(domain_lower)) 350 return 0; 351 return blacklisted_domains_[domain_lower].exponential_count; 352 } 353 354 bool SdchManager::IsInSupportedDomain(const GURL& url) { 355 DCHECK(CalledOnValidThread()); 356 if (!g_sdch_enabled_ ) 357 return false; 358 359 if (!secure_scheme_supported() && url.SchemeIsSecure()) 360 return false; 361 362 if (blacklisted_domains_.empty()) 363 return true; 364 365 DomainBlacklistInfo::iterator it = 366 blacklisted_domains_.find(base::StringToLowerASCII(url.host())); 367 if (blacklisted_domains_.end() == it || it->second.count == 0) 368 return true; 369 370 UMA_HISTOGRAM_ENUMERATION("Sdch3.BlacklistReason", it->second.reason, 371 MAX_PROBLEM_CODE); 372 SdchErrorRecovery(DOMAIN_BLACKLIST_INCLUDES_TARGET); 373 374 int count = it->second.count - 1; 375 if (count > 0) { 376 it->second.count = count; 377 } else { 378 it->second.count = 0; 379 it->second.reason = MIN_PROBLEM_CODE; 380 } 381 382 return false; 383 } 384 385 void SdchManager::FetchDictionary(const GURL& request_url, 386 const GURL& dictionary_url) { 387 DCHECK(CalledOnValidThread()); 388 if (CanFetchDictionary(request_url, dictionary_url) && fetcher_.get()) { 389 ++fetches_count_for_testing_; 390 fetcher_->Schedule(dictionary_url); 391 } 392 } 393 394 bool SdchManager::CanFetchDictionary(const GURL& referring_url, 395 const GURL& dictionary_url) const { 396 DCHECK(CalledOnValidThread()); 397 /* The user agent may retrieve a dictionary from the dictionary URL if all of 398 the following are true: 399 1 The dictionary URL host name matches the referrer URL host name and 400 scheme. 401 2 The dictionary URL host name domain matches the parent domain of the 402 referrer URL host name 403 3 The parent domain of the referrer URL host name is not a top level 404 domain 405 4 The dictionary URL is not an HTTPS URL. 406 */ 407 // Item (1) above implies item (2). Spec should be updated. 408 // I take "host name match" to be "is identical to" 409 if (referring_url.host() != dictionary_url.host() || 410 referring_url.scheme() != dictionary_url.scheme()) { 411 SdchErrorRecovery(DICTIONARY_LOAD_ATTEMPT_FROM_DIFFERENT_HOST); 412 return false; 413 } 414 if (!secure_scheme_supported() && referring_url.SchemeIsSecure()) { 415 SdchErrorRecovery(DICTIONARY_SELECTED_FOR_SSL); 416 return false; 417 } 418 419 // TODO(jar): Remove this failsafe conservative hack which is more restrictive 420 // than current SDCH spec when needed, and justified by security audit. 421 if (!referring_url.SchemeIsHTTPOrHTTPS()) { 422 SdchErrorRecovery(DICTIONARY_SELECTED_FROM_NON_HTTP); 423 return false; 424 } 425 426 return true; 427 } 428 429 void SdchManager::GetVcdiffDictionary( 430 const std::string& server_hash, 431 const GURL& referring_url, 432 scoped_refptr<Dictionary>* dictionary) { 433 DCHECK(CalledOnValidThread()); 434 *dictionary = NULL; 435 DictionaryMap::iterator it = dictionaries_.find(server_hash); 436 if (it == dictionaries_.end()) { 437 return; 438 } 439 scoped_refptr<Dictionary> matching_dictionary = it->second; 440 if (!IsInSupportedDomain(referring_url)) 441 return; 442 if (!matching_dictionary->CanUse(referring_url)) 443 return; 444 *dictionary = matching_dictionary; 445 } 446 447 // TODO(jar): If we have evictions from the dictionaries_, then we need to 448 // change this interface to return a list of reference counted Dictionary 449 // instances that can be used if/when a server specifies one. 450 void SdchManager::GetAvailDictionaryList(const GURL& target_url, 451 std::string* list) { 452 DCHECK(CalledOnValidThread()); 453 int count = 0; 454 for (DictionaryMap::iterator it = dictionaries_.begin(); 455 it != dictionaries_.end(); ++it) { 456 if (!IsInSupportedDomain(target_url)) 457 continue; 458 if (!it->second->CanAdvertise(target_url)) 459 continue; 460 ++count; 461 if (!list->empty()) 462 list->append(","); 463 list->append(it->second->client_hash()); 464 } 465 // Watch to see if we have corrupt or numerous dictionaries. 466 if (count > 0) 467 UMA_HISTOGRAM_COUNTS("Sdch3.Advertisement_Count", count); 468 } 469 470 // static 471 void SdchManager::GenerateHash(const std::string& dictionary_text, 472 std::string* client_hash, std::string* server_hash) { 473 char binary_hash[32]; 474 crypto::SHA256HashString(dictionary_text, binary_hash, sizeof(binary_hash)); 475 476 std::string first_48_bits(&binary_hash[0], 6); 477 std::string second_48_bits(&binary_hash[6], 6); 478 UrlSafeBase64Encode(first_48_bits, client_hash); 479 UrlSafeBase64Encode(second_48_bits, server_hash); 480 481 DCHECK_EQ(server_hash->length(), 8u); 482 DCHECK_EQ(client_hash->length(), 8u); 483 } 484 485 //------------------------------------------------------------------------------ 486 // Methods for supporting latency experiments. 487 488 bool SdchManager::AllowLatencyExperiment(const GURL& url) const { 489 DCHECK(CalledOnValidThread()); 490 return allow_latency_experiment_.end() != 491 allow_latency_experiment_.find(url.host()); 492 } 493 494 void SdchManager::SetAllowLatencyExperiment(const GURL& url, bool enable) { 495 DCHECK(CalledOnValidThread()); 496 if (enable) { 497 allow_latency_experiment_.insert(url.host()); 498 return; 499 } 500 ExperimentSet::iterator it = allow_latency_experiment_.find(url.host()); 501 if (allow_latency_experiment_.end() == it) 502 return; // It was already erased, or never allowed. 503 SdchErrorRecovery(LATENCY_TEST_DISALLOWED); 504 allow_latency_experiment_.erase(it); 505 } 506 507 void SdchManager::AddSdchDictionary(const std::string& dictionary_text, 508 const GURL& dictionary_url) { 509 DCHECK(CalledOnValidThread()); 510 std::string client_hash; 511 std::string server_hash; 512 GenerateHash(dictionary_text, &client_hash, &server_hash); 513 if (dictionaries_.find(server_hash) != dictionaries_.end()) { 514 SdchErrorRecovery(DICTIONARY_ALREADY_LOADED); 515 return; // Already loaded. 516 } 517 518 std::string domain, path; 519 std::set<int> ports; 520 base::Time expiration(base::Time::Now() + base::TimeDelta::FromDays(30)); 521 522 if (dictionary_text.empty()) { 523 SdchErrorRecovery(DICTIONARY_HAS_NO_TEXT); 524 return; // Missing header. 525 } 526 527 size_t header_end = dictionary_text.find("\n\n"); 528 if (std::string::npos == header_end) { 529 SdchErrorRecovery(DICTIONARY_HAS_NO_HEADER); 530 return; // Missing header. 531 } 532 size_t line_start = 0; // Start of line being parsed. 533 while (1) { 534 size_t line_end = dictionary_text.find('\n', line_start); 535 DCHECK(std::string::npos != line_end); 536 DCHECK_LE(line_end, header_end); 537 538 size_t colon_index = dictionary_text.find(':', line_start); 539 if (std::string::npos == colon_index) { 540 SdchErrorRecovery(DICTIONARY_HEADER_LINE_MISSING_COLON); 541 return; // Illegal line missing a colon. 542 } 543 544 if (colon_index > line_end) 545 break; 546 547 size_t value_start = dictionary_text.find_first_not_of(" \t", 548 colon_index + 1); 549 if (std::string::npos != value_start) { 550 if (value_start >= line_end) 551 break; 552 std::string name(dictionary_text, line_start, colon_index - line_start); 553 std::string value(dictionary_text, value_start, line_end - value_start); 554 name = base::StringToLowerASCII(name); 555 if (name == "domain") { 556 domain = value; 557 } else if (name == "path") { 558 path = value; 559 } else if (name == "format-version") { 560 if (value != "1.0") 561 return; 562 } else if (name == "max-age") { 563 int64 seconds; 564 base::StringToInt64(value, &seconds); 565 expiration = base::Time::Now() + base::TimeDelta::FromSeconds(seconds); 566 } else if (name == "port") { 567 int port; 568 base::StringToInt(value, &port); 569 if (port >= 0) 570 ports.insert(port); 571 } 572 } 573 574 if (line_end >= header_end) 575 break; 576 line_start = line_end + 1; 577 } 578 579 // Narrow fix for http://crbug.com/389451. 580 GURL dictionary_url_normalized(dictionary_url); 581 StripTrailingDot(&dictionary_url_normalized); 582 583 if (!IsInSupportedDomain(dictionary_url_normalized)) 584 return; 585 586 if (!Dictionary::CanSet(domain, path, ports, dictionary_url_normalized)) 587 return; 588 589 // TODO(jar): Remove these hacks to preclude a DOS attack involving piles of 590 // useless dictionaries. We should probably have a cache eviction plan, 591 // instead of just blocking additions. For now, with the spec in flux, it 592 // is probably not worth doing eviction handling. 593 if (kMaxDictionarySize < dictionary_text.size()) { 594 SdchErrorRecovery(DICTIONARY_IS_TOO_LARGE); 595 return; 596 } 597 if (kMaxDictionaryCount <= dictionaries_.size()) { 598 SdchErrorRecovery(DICTIONARY_COUNT_EXCEEDED); 599 return; 600 } 601 602 UMA_HISTOGRAM_COUNTS("Sdch3.Dictionary size loaded", dictionary_text.size()); 603 DVLOG(1) << "Loaded dictionary with client hash " << client_hash 604 << " and server hash " << server_hash; 605 Dictionary* dictionary = 606 new Dictionary(dictionary_text, header_end + 2, client_hash, 607 dictionary_url_normalized, domain, 608 path, expiration, ports); 609 dictionaries_[server_hash] = dictionary; 610 return; 611 } 612 613 // static 614 void SdchManager::UrlSafeBase64Encode(const std::string& input, 615 std::string* output) { 616 // Since this is only done during a dictionary load, and hashes are only 8 617 // characters, we just do the simple fixup, rather than rewriting the encoder. 618 base::Base64Encode(input, output); 619 std::replace(output->begin(), output->end(), '+', '-'); 620 std::replace(output->begin(), output->end(), '/', '_'); 621 } 622 623 } // namespace net 624