1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "net/base/sdch_manager.h" 6 7 #include "base/base64.h" 8 #include "base/logging.h" 9 #include "base/metrics/histogram.h" 10 #include "base/string_number_conversions.h" 11 #include "base/string_util.h" 12 #include "crypto/sha2.h" 13 #include "net/base/registry_controlled_domain.h" 14 #include "net/url_request/url_request_http_job.h" 15 16 namespace net { 17 18 //------------------------------------------------------------------------------ 19 // static 20 const size_t SdchManager::kMaxDictionarySize = 1000000; 21 22 // static 23 const size_t SdchManager::kMaxDictionaryCount = 20; 24 25 // static 26 SdchManager* SdchManager::global_; 27 28 //------------------------------------------------------------------------------ 29 SdchManager::Dictionary::Dictionary(const std::string& dictionary_text, 30 size_t offset, 31 const std::string& client_hash, 32 const GURL& gurl, 33 const std::string& domain, 34 const std::string& path, 35 const base::Time& expiration, 36 const std::set<int>& ports) 37 : text_(dictionary_text, offset), 38 client_hash_(client_hash), 39 url_(gurl), 40 domain_(domain), 41 path_(path), 42 expiration_(expiration), 43 ports_(ports) { 44 } 45 46 SdchManager::Dictionary::~Dictionary() { 47 } 48 49 bool SdchManager::Dictionary::CanAdvertise(const GURL& target_url) { 50 if (!SdchManager::Global()->IsInSupportedDomain(target_url)) 51 return false; 52 /* The specific rules of when a dictionary should be advertised in an 53 Avail-Dictionary header are modeled after the rules for cookie scoping. The 54 terms "domain-match" and "pathmatch" are defined in RFC 2965 [6]. A 55 dictionary may be advertised in the Avail-Dictionaries header exactly when 56 all of the following are true: 57 1. The server's effective host name domain-matches the Domain attribute of 58 the dictionary. 59 2. If the dictionary has a Port attribute, the request port is one of the 60 ports listed in the Port attribute. 61 3. The request URI path-matches the path header of the dictionary. 62 4. The request is not an HTTPS request. 63 */ 64 if (!DomainMatch(target_url, domain_)) 65 return false; 66 if (!ports_.empty() && 0 == ports_.count(target_url.EffectiveIntPort())) 67 return false; 68 if (path_.size() && !PathMatch(target_url.path(), path_)) 69 return false; 70 if (target_url.SchemeIsSecure()) 71 return false; 72 if (base::Time::Now() > expiration_) 73 return false; 74 return true; 75 } 76 77 //------------------------------------------------------------------------------ 78 // Security functions restricting loads and use of dictionaries. 79 80 // static 81 bool SdchManager::Dictionary::CanSet(const std::string& domain, 82 const std::string& path, 83 const std::set<int>& ports, 84 const GURL& dictionary_url) { 85 if (!SdchManager::Global()->IsInSupportedDomain(dictionary_url)) 86 return false; 87 /* 88 A dictionary is invalid and must not be stored if any of the following are 89 true: 90 1. The dictionary has no Domain attribute. 91 2. The effective host name that derives from the referer URL host name does 92 not domain-match the Domain attribute. 93 3. The Domain attribute is a top level domain. 94 4. The referer URL host is a host domain name (not IP address) and has the 95 form HD, where D is the value of the Domain attribute, and H is a string 96 that contains one or more dots. 97 5. If the dictionary has a Port attribute and the referer URL's port was not 98 in the list. 99 */ 100 101 // TODO(jar): Redirects in dictionary fetches might plausibly be problematic, 102 // and hence the conservative approach is to not allow any redirects (if there 103 // were any... then don't allow the dictionary to be set). 104 105 if (domain.empty()) { 106 SdchErrorRecovery(DICTIONARY_MISSING_DOMAIN_SPECIFIER); 107 return false; // Domain is required. 108 } 109 if (RegistryControlledDomainService::GetDomainAndRegistry(domain).size() 110 == 0) { 111 SdchErrorRecovery(DICTIONARY_SPECIFIES_TOP_LEVEL_DOMAIN); 112 return false; // domain was a TLD. 113 } 114 if (!Dictionary::DomainMatch(dictionary_url, domain)) { 115 SdchErrorRecovery(DICTIONARY_DOMAIN_NOT_MATCHING_SOURCE_URL); 116 return false; 117 } 118 119 std::string referrer_url_host = dictionary_url.host(); 120 size_t postfix_domain_index = referrer_url_host.rfind(domain); 121 // See if it is indeed a postfix, or just an internal string. 122 if (referrer_url_host.size() == postfix_domain_index + domain.size()) { 123 // It is a postfix... so check to see if there's a dot in the prefix. 124 size_t end_of_host_index = referrer_url_host.find_first_of('.'); 125 if (referrer_url_host.npos != end_of_host_index && 126 end_of_host_index < postfix_domain_index) { 127 SdchErrorRecovery(DICTIONARY_REFERER_URL_HAS_DOT_IN_PREFIX); 128 return false; 129 } 130 } 131 132 if (!ports.empty() 133 && 0 == ports.count(dictionary_url.EffectiveIntPort())) { 134 SdchErrorRecovery(DICTIONARY_PORT_NOT_MATCHING_SOURCE_URL); 135 return false; 136 } 137 return true; 138 } 139 140 // static 141 bool SdchManager::Dictionary::CanUse(const GURL& referring_url) { 142 if (!SdchManager::Global()->IsInSupportedDomain(referring_url)) 143 return false; 144 /* 145 1. The request URL's host name domain-matches the Domain attribute of the 146 dictionary. 147 2. If the dictionary has a Port attribute, the request port is one of the 148 ports listed in the Port attribute. 149 3. The request URL path-matches the path attribute of the dictionary. 150 4. The request is not an HTTPS request. 151 */ 152 if (!DomainMatch(referring_url, domain_)) { 153 SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_DOMAIN); 154 return false; 155 } 156 if (!ports_.empty() 157 && 0 == ports_.count(referring_url.EffectiveIntPort())) { 158 SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_PORT_LIST); 159 return false; 160 } 161 if (path_.size() && !PathMatch(referring_url.path(), path_)) { 162 SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_PATH); 163 return false; 164 } 165 if (referring_url.SchemeIsSecure()) { 166 SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_SCHEME); 167 return false; 168 } 169 170 // TODO(jar): Remove overly restrictive failsafe test (added per security 171 // review) when we have a need to be more general. 172 if (!referring_url.SchemeIs("http")) { 173 SdchErrorRecovery(ATTEMPT_TO_DECODE_NON_HTTP_DATA); 174 return false; 175 } 176 177 return true; 178 } 179 180 bool SdchManager::Dictionary::PathMatch(const std::string& path, 181 const std::string& restriction) { 182 /* Must be either: 183 1. P2 is equal to P1 184 2. P2 is a prefix of P1 and either the final character in P2 is "/" or the 185 character following P2 in P1 is "/". 186 */ 187 if (path == restriction) 188 return true; 189 size_t prefix_length = restriction.size(); 190 if (prefix_length > path.size()) 191 return false; // Can't be a prefix. 192 if (0 != path.compare(0, prefix_length, restriction)) 193 return false; 194 return restriction[prefix_length - 1] == '/' || path[prefix_length] == '/'; 195 } 196 197 // static 198 bool SdchManager::Dictionary::DomainMatch(const GURL& gurl, 199 const std::string& restriction) { 200 // TODO(jar): This is not precisely a domain match definition. 201 return gurl.DomainIs(restriction.data(), restriction.size()); 202 } 203 204 //------------------------------------------------------------------------------ 205 SdchManager::SdchManager() : sdch_enabled_(false) { 206 DCHECK(!global_); 207 global_ = this; 208 } 209 210 SdchManager::~SdchManager() { 211 DCHECK(global_ == this); 212 while (!dictionaries_.empty()) { 213 DictionaryMap::iterator it = dictionaries_.begin(); 214 it->second->Release(); 215 dictionaries_.erase(it->first); 216 } 217 global_ = NULL; 218 } 219 220 // static 221 void SdchManager::Shutdown() { 222 if (!global_ ) 223 return; 224 global_->fetcher_.reset(NULL); 225 } 226 227 // static 228 SdchManager* SdchManager::Global() { 229 return global_; 230 } 231 232 // static 233 void SdchManager::SdchErrorRecovery(ProblemCodes problem) { 234 UMA_HISTOGRAM_ENUMERATION("Sdch3.ProblemCodes_4", problem, MAX_PROBLEM_CODE); 235 } 236 237 void SdchManager::EnableSdchSupport(const std::string& domain) { 238 // We presume that there is a SDCH manager instance. 239 global_->supported_domain_ = domain; 240 global_->sdch_enabled_ = true; 241 } 242 243 // static 244 void SdchManager::BlacklistDomain(const GURL& url) { 245 if (!global_ ) 246 return; 247 global_->SetAllowLatencyExperiment(url, false); 248 249 std::string domain(StringToLowerASCII(url.host())); 250 int count = global_->blacklisted_domains_[domain]; 251 if (count > 0) 252 return; // Domain is already blacklisted. 253 254 count = 1 + 2 * global_->exponential_blacklist_count[domain]; 255 if (count > 0) 256 global_->exponential_blacklist_count[domain] = count; 257 else 258 count = INT_MAX; 259 260 global_->blacklisted_domains_[domain] = count; 261 } 262 263 // static 264 void SdchManager::BlacklistDomainForever(const GURL& url) { 265 if (!global_ ) 266 return; 267 global_->SetAllowLatencyExperiment(url, false); 268 269 std::string domain(StringToLowerASCII(url.host())); 270 global_->exponential_blacklist_count[domain] = INT_MAX; 271 global_->blacklisted_domains_[domain] = INT_MAX; 272 } 273 274 // static 275 void SdchManager::ClearBlacklistings() { 276 Global()->blacklisted_domains_.clear(); 277 Global()->exponential_blacklist_count.clear(); 278 } 279 280 // static 281 void SdchManager::ClearDomainBlacklisting(const std::string& domain) { 282 Global()->blacklisted_domains_.erase(StringToLowerASCII(domain)); 283 } 284 285 // static 286 int SdchManager::BlackListDomainCount(const std::string& domain) { 287 if (Global()->blacklisted_domains_.end() == 288 Global()->blacklisted_domains_.find(domain)) 289 return 0; 290 return Global()->blacklisted_domains_[StringToLowerASCII(domain)]; 291 } 292 293 // static 294 int SdchManager::BlacklistDomainExponential(const std::string& domain) { 295 if (Global()->exponential_blacklist_count.end() == 296 Global()->exponential_blacklist_count.find(domain)) 297 return 0; 298 return Global()->exponential_blacklist_count[StringToLowerASCII(domain)]; 299 } 300 301 bool SdchManager::IsInSupportedDomain(const GURL& url) { 302 if (!sdch_enabled_ ) 303 return false; 304 if (!supported_domain_.empty() && 305 !url.DomainIs(supported_domain_.data(), supported_domain_.size())) 306 return false; // It is not the singular supported domain. 307 308 if (blacklisted_domains_.empty()) 309 return true; 310 311 std::string domain(StringToLowerASCII(url.host())); 312 DomainCounter::iterator it = blacklisted_domains_.find(domain); 313 if (blacklisted_domains_.end() == it) 314 return true; 315 316 int count = it->second - 1; 317 if (count > 0) 318 blacklisted_domains_[domain] = count; 319 else 320 blacklisted_domains_.erase(domain); 321 SdchErrorRecovery(DOMAIN_BLACKLIST_INCLUDES_TARGET); 322 return false; 323 } 324 325 void SdchManager::FetchDictionary(const GURL& request_url, 326 const GURL& dictionary_url) { 327 if (SdchManager::Global()->CanFetchDictionary(request_url, dictionary_url) && 328 fetcher_.get()) 329 fetcher_->Schedule(dictionary_url); 330 } 331 332 bool SdchManager::CanFetchDictionary(const GURL& referring_url, 333 const GURL& dictionary_url) const { 334 /* The user agent may retrieve a dictionary from the dictionary URL if all of 335 the following are true: 336 1 The dictionary URL host name matches the referrer URL host name 337 2 The dictionary URL host name domain matches the parent domain of the 338 referrer URL host name 339 3 The parent domain of the referrer URL host name is not a top level 340 domain 341 4 The dictionary URL is not an HTTPS URL. 342 */ 343 // Item (1) above implies item (2). Spec should be updated. 344 // I take "host name match" to be "is identical to" 345 if (referring_url.host() != dictionary_url.host()) { 346 SdchErrorRecovery(DICTIONARY_LOAD_ATTEMPT_FROM_DIFFERENT_HOST); 347 return false; 348 } 349 if (referring_url.SchemeIs("https")) { 350 SdchErrorRecovery(DICTIONARY_SELECTED_FOR_SSL); 351 return false; 352 } 353 354 // TODO(jar): Remove this failsafe conservative hack which is more restrictive 355 // than current SDCH spec when needed, and justified by security audit. 356 if (!referring_url.SchemeIs("http")) { 357 SdchErrorRecovery(DICTIONARY_SELECTED_FROM_NON_HTTP); 358 return false; 359 } 360 361 return true; 362 } 363 364 bool SdchManager::AddSdchDictionary(const std::string& dictionary_text, 365 const GURL& dictionary_url) { 366 std::string client_hash; 367 std::string server_hash; 368 GenerateHash(dictionary_text, &client_hash, &server_hash); 369 if (dictionaries_.find(server_hash) != dictionaries_.end()) { 370 SdchErrorRecovery(DICTIONARY_ALREADY_LOADED); 371 return false; // Already loaded. 372 } 373 374 std::string domain, path; 375 std::set<int> ports; 376 base::Time expiration(base::Time::Now() + base::TimeDelta::FromDays(30)); 377 378 if (dictionary_text.empty()) { 379 SdchErrorRecovery(DICTIONARY_HAS_NO_TEXT); 380 return false; // Missing header. 381 } 382 383 size_t header_end = dictionary_text.find("\n\n"); 384 if (std::string::npos == header_end) { 385 SdchErrorRecovery(DICTIONARY_HAS_NO_HEADER); 386 return false; // Missing header. 387 } 388 size_t line_start = 0; // Start of line being parsed. 389 while (1) { 390 size_t line_end = dictionary_text.find('\n', line_start); 391 DCHECK(std::string::npos != line_end); 392 DCHECK(line_end <= header_end); 393 394 size_t colon_index = dictionary_text.find(':', line_start); 395 if (std::string::npos == colon_index) { 396 SdchErrorRecovery(DICTIONARY_HEADER_LINE_MISSING_COLON); 397 return false; // Illegal line missing a colon. 398 } 399 400 if (colon_index > line_end) 401 break; 402 403 size_t value_start = dictionary_text.find_first_not_of(" \t", 404 colon_index + 1); 405 if (std::string::npos != value_start) { 406 if (value_start >= line_end) 407 break; 408 std::string name(dictionary_text, line_start, colon_index - line_start); 409 std::string value(dictionary_text, value_start, line_end - value_start); 410 name = StringToLowerASCII(name); 411 if (name == "domain") { 412 domain = value; 413 } else if (name == "path") { 414 path = value; 415 } else if (name == "format-version") { 416 if (value != "1.0") 417 return false; 418 } else if (name == "max-age") { 419 int64 seconds; 420 base::StringToInt64(value, &seconds); 421 expiration = base::Time::Now() + base::TimeDelta::FromSeconds(seconds); 422 } else if (name == "port") { 423 int port; 424 base::StringToInt(value, &port); 425 if (port >= 0) 426 ports.insert(port); 427 } 428 } 429 430 if (line_end >= header_end) 431 break; 432 line_start = line_end + 1; 433 } 434 435 if (!Dictionary::CanSet(domain, path, ports, dictionary_url)) 436 return false; 437 438 // TODO(jar): Remove these hacks to preclude a DOS attack involving piles of 439 // useless dictionaries. We should probably have a cache eviction plan, 440 // instead of just blocking additions. For now, with the spec in flux, it 441 // is probably not worth doing eviction handling. 442 if (kMaxDictionarySize < dictionary_text.size()) { 443 SdchErrorRecovery(DICTIONARY_IS_TOO_LARGE); 444 return false; 445 } 446 if (kMaxDictionaryCount <= dictionaries_.size()) { 447 SdchErrorRecovery(DICTIONARY_COUNT_EXCEEDED); 448 return false; 449 } 450 451 UMA_HISTOGRAM_COUNTS("Sdch3.Dictionary size loaded", dictionary_text.size()); 452 DVLOG(1) << "Loaded dictionary with client hash " << client_hash 453 << " and server hash " << server_hash; 454 Dictionary* dictionary = 455 new Dictionary(dictionary_text, header_end + 2, client_hash, 456 dictionary_url, domain, path, expiration, ports); 457 dictionary->AddRef(); 458 dictionaries_[server_hash] = dictionary; 459 return true; 460 } 461 462 void SdchManager::GetVcdiffDictionary(const std::string& server_hash, 463 const GURL& referring_url, Dictionary** dictionary) { 464 *dictionary = NULL; 465 DictionaryMap::iterator it = dictionaries_.find(server_hash); 466 if (it == dictionaries_.end()) { 467 return; 468 } 469 Dictionary* matching_dictionary = it->second; 470 if (!matching_dictionary->CanUse(referring_url)) 471 return; 472 *dictionary = matching_dictionary; 473 } 474 475 // TODO(jar): If we have evictions from the dictionaries_, then we need to 476 // change this interface to return a list of reference counted Dictionary 477 // instances that can be used if/when a server specifies one. 478 void SdchManager::GetAvailDictionaryList(const GURL& target_url, 479 std::string* list) { 480 int count = 0; 481 for (DictionaryMap::iterator it = dictionaries_.begin(); 482 it != dictionaries_.end(); ++it) { 483 if (!it->second->CanAdvertise(target_url)) 484 continue; 485 ++count; 486 if (!list->empty()) 487 list->append(","); 488 list->append(it->second->client_hash()); 489 } 490 // Watch to see if we have corrupt or numerous dictionaries. 491 if (count > 0) 492 UMA_HISTOGRAM_COUNTS("Sdch3.Advertisement_Count", count); 493 } 494 495 // static 496 void SdchManager::GenerateHash(const std::string& dictionary_text, 497 std::string* client_hash, std::string* server_hash) { 498 char binary_hash[32]; 499 crypto::SHA256HashString(dictionary_text, binary_hash, sizeof(binary_hash)); 500 501 std::string first_48_bits(&binary_hash[0], 6); 502 std::string second_48_bits(&binary_hash[6], 6); 503 UrlSafeBase64Encode(first_48_bits, client_hash); 504 UrlSafeBase64Encode(second_48_bits, server_hash); 505 506 DCHECK_EQ(server_hash->length(), 8u); 507 DCHECK_EQ(client_hash->length(), 8u); 508 } 509 510 //------------------------------------------------------------------------------ 511 // Methods for supporting latency experiments. 512 513 bool SdchManager::AllowLatencyExperiment(const GURL& url) const { 514 return allow_latency_experiment_.end() != 515 allow_latency_experiment_.find(url.host()); 516 } 517 518 void SdchManager::SetAllowLatencyExperiment(const GURL& url, bool enable) { 519 if (enable) { 520 allow_latency_experiment_.insert(url.host()); 521 return; 522 } 523 ExperimentSet::iterator it = allow_latency_experiment_.find(url.host()); 524 if (allow_latency_experiment_.end() == it) 525 return; // It was already erased, or never allowed. 526 SdchErrorRecovery(LATENCY_TEST_DISALLOWED); 527 allow_latency_experiment_.erase(it); 528 } 529 530 // static 531 void SdchManager::UrlSafeBase64Encode(const std::string& input, 532 std::string* output) { 533 // Since this is only done during a dictionary load, and hashes are only 8 534 // characters, we just do the simple fixup, rather than rewriting the encoder. 535 base::Base64Encode(input, output); 536 for (size_t i = 0; i < output->size(); ++i) { 537 switch (output->data()[i]) { 538 case '+': 539 (*output)[i] = '-'; 540 continue; 541 case '/': 542 (*output)[i] = '_'; 543 continue; 544 default: 545 continue; 546 } 547 } 548 } 549 550 } // namespace net 551