Home | History | Annotate | Download | only in base
      1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "base/base64.h"
      6 #include "base/field_trial.h"
      7 #include "base/histogram.h"
      8 #include "base/logging.h"
      9 #include "base/sha2.h"
     10 #include "base/string_util.h"
     11 #include "net/base/registry_controlled_domain.h"
     12 #include "net/base/sdch_manager.h"
     13 #include "net/url_request/url_request_http_job.h"
     14 
     15 using base::Time;
     16 using base::TimeDelta;
     17 
     18 //------------------------------------------------------------------------------
     19 // static
     20 const size_t SdchManager::kMaxDictionarySize = 1000000;
     21 
     22 // static
     23 const size_t SdchManager::kMaxDictionaryCount = 20;
     24 
     25 // static
     26 SdchManager* SdchManager::global_;
     27 
     28 // static
     29 SdchManager* SdchManager::Global() {
     30   return global_;
     31 }
     32 
     33 // static
     34 void SdchManager::SdchErrorRecovery(ProblemCodes problem) {
     35   UMA_HISTOGRAM_ENUMERATION("Sdch3.ProblemCodes_4", problem, MAX_PROBLEM_CODE);
     36 }
     37 
     38 // static
     39 void SdchManager::ClearBlacklistings() {
     40   Global()->blacklisted_domains_.clear();
     41   Global()->exponential_blacklist_count.clear();
     42 }
     43 
     44 // static
     45 void SdchManager::ClearDomainBlacklisting(const std::string& domain) {
     46   Global()->blacklisted_domains_.erase(StringToLowerASCII(domain));
     47 }
     48 
     49 // static
     50 int SdchManager::BlackListDomainCount(const std::string& domain) {
     51   if (Global()->blacklisted_domains_.end() ==
     52       Global()->blacklisted_domains_.find(domain))
     53     return 0;
     54   return Global()->blacklisted_domains_[StringToLowerASCII(domain)];
     55 }
     56 
     57 // static
     58 int SdchManager::BlacklistDomainExponential(const std::string& domain) {
     59   if (Global()->exponential_blacklist_count.end() ==
     60       Global()->exponential_blacklist_count.find(domain))
     61     return 0;
     62   return Global()->exponential_blacklist_count[StringToLowerASCII(domain)];
     63 }
     64 
     65 //------------------------------------------------------------------------------
     66 SdchManager::SdchManager() : sdch_enabled_(false) {
     67   DCHECK(!global_);
     68   global_ = this;
     69 }
     70 
     71 SdchManager::~SdchManager() {
     72   DCHECK(global_ == this);
     73   while (!dictionaries_.empty()) {
     74     DictionaryMap::iterator it = dictionaries_.begin();
     75     it->second->Release();
     76     dictionaries_.erase(it->first);
     77   }
     78   global_ = NULL;
     79 }
     80 
     81 // static
     82 void SdchManager::Shutdown() {
     83   if (!global_ )
     84     return;
     85   global_->fetcher_.reset(NULL);
     86 }
     87 
     88 // static
     89 void SdchManager::BlacklistDomain(const GURL& url) {
     90   if (!global_ )
     91     return;
     92   global_->SetAllowLatencyExperiment(url, false);
     93 
     94   std::string domain(StringToLowerASCII(url.host()));
     95   int count = global_->blacklisted_domains_[domain];
     96   if (count > 0)
     97     return;  // Domain is already blacklisted.
     98 
     99   count = 1 + 2 * global_->exponential_blacklist_count[domain];
    100   if (count > 0)
    101     global_->exponential_blacklist_count[domain] = count;
    102   else
    103     count = INT_MAX;
    104 
    105   global_->blacklisted_domains_[domain] = count;
    106 }
    107 
    108 // static
    109 void SdchManager::BlacklistDomainForever(const GURL& url) {
    110   if (!global_ )
    111     return;
    112   global_->SetAllowLatencyExperiment(url, false);
    113 
    114   std::string domain(StringToLowerASCII(url.host()));
    115   global_->exponential_blacklist_count[domain] = INT_MAX;
    116   global_->blacklisted_domains_[domain] = INT_MAX;
    117 }
    118 
    119 void SdchManager::EnableSdchSupport(const std::string& domain) {
    120   // We presume that there is a SDCH manager instance.
    121   global_->supported_domain_ = domain;
    122   global_->sdch_enabled_ = true;
    123 }
    124 
    125 const bool SdchManager::IsInSupportedDomain(const GURL& url) {
    126   if (!sdch_enabled_ )
    127     return false;
    128   if (!supported_domain_.empty() &&
    129       !url.DomainIs(supported_domain_.data(), supported_domain_.size()))
    130      return false;  // It is not the singular supported domain.
    131 
    132   if (blacklisted_domains_.empty())
    133     return true;
    134 
    135   std::string domain(StringToLowerASCII(url.host()));
    136   DomainCounter::iterator it = blacklisted_domains_.find(domain);
    137   if (blacklisted_domains_.end() == it)
    138     return true;
    139 
    140   int count = it->second - 1;
    141   if (count > 0)
    142     blacklisted_domains_[domain] = count;
    143   else
    144     blacklisted_domains_.erase(domain);
    145   SdchErrorRecovery(DOMAIN_BLACKLIST_INCLUDES_TARGET);
    146   return false;
    147 }
    148 
    149 bool SdchManager::CanFetchDictionary(const GURL& referring_url,
    150                                      const GURL& dictionary_url) const {
    151   /* The user agent may retrieve a dictionary from the dictionary URL if all of
    152      the following are true:
    153        1 The dictionary URL host name matches the referrer URL host name
    154        2 The dictionary URL host name domain matches the parent domain of the
    155            referrer URL host name
    156        3 The parent domain of the referrer URL host name is not a top level
    157            domain
    158        4 The dictionary URL is not an HTTPS URL.
    159    */
    160   // Item (1) above implies item (2).  Spec should be updated.
    161   // I take "host name match" to be "is identical to"
    162   if (referring_url.host() != dictionary_url.host()) {
    163     SdchErrorRecovery(DICTIONARY_LOAD_ATTEMPT_FROM_DIFFERENT_HOST);
    164     return false;
    165   }
    166   if (referring_url.SchemeIs("https")) {
    167     SdchErrorRecovery(DICTIONARY_SELECTED_FOR_SSL);
    168     return false;
    169   }
    170 
    171   // TODO(jar): Remove this failsafe conservative hack which is more restrictive
    172   // than current SDCH spec when needed, and justified by security audit.
    173   if (!referring_url.SchemeIs("http")) {
    174     SdchErrorRecovery(DICTIONARY_SELECTED_FROM_NON_HTTP);
    175     return false;
    176   }
    177 
    178   return true;
    179 }
    180 
    181 void SdchManager::FetchDictionary(const GURL& request_url,
    182                                   const GURL& dictionary_url) {
    183   if (SdchManager::Global()->CanFetchDictionary(request_url, dictionary_url) &&
    184       fetcher_.get())
    185     fetcher_->Schedule(dictionary_url);
    186 }
    187 
    188 bool SdchManager::AddSdchDictionary(const std::string& dictionary_text,
    189     const GURL& dictionary_url) {
    190   std::string client_hash;
    191   std::string server_hash;
    192   GenerateHash(dictionary_text, &client_hash, &server_hash);
    193   if (dictionaries_.find(server_hash) != dictionaries_.end()) {
    194     SdchErrorRecovery(DICTIONARY_ALREADY_LOADED);
    195     return false;  // Already loaded.
    196   }
    197 
    198   std::string domain, path;
    199   std::set<int> ports;
    200   Time expiration(Time::Now() + TimeDelta::FromDays(30));
    201 
    202   if (dictionary_text.empty()) {
    203     SdchErrorRecovery(DICTIONARY_HAS_NO_TEXT);
    204     return false;  // Missing header.
    205   }
    206 
    207   size_t header_end = dictionary_text.find("\n\n");
    208   if (std::string::npos == header_end) {
    209     SdchErrorRecovery(DICTIONARY_HAS_NO_HEADER);
    210     return false;  // Missing header.
    211   }
    212   size_t line_start = 0;  // Start of line being parsed.
    213   while (1) {
    214     size_t line_end = dictionary_text.find('\n', line_start);
    215     DCHECK(std::string::npos != line_end);
    216     DCHECK(line_end <= header_end);
    217 
    218     size_t colon_index = dictionary_text.find(':', line_start);
    219     if (std::string::npos == colon_index) {
    220       SdchErrorRecovery(DICTIONARY_HEADER_LINE_MISSING_COLON);
    221       return false;  // Illegal line missing a colon.
    222     }
    223 
    224     if (colon_index > line_end)
    225       break;
    226 
    227     size_t value_start = dictionary_text.find_first_not_of(" \t",
    228                                                            colon_index + 1);
    229     if (std::string::npos != value_start) {
    230       if (value_start >= line_end)
    231         break;
    232       std::string name(dictionary_text, line_start, colon_index - line_start);
    233       std::string value(dictionary_text, value_start, line_end - value_start);
    234       name = StringToLowerASCII(name);
    235       if (name == "domain") {
    236         domain = value;
    237       } else if (name == "path") {
    238         path = value;
    239       } else if (name == "format-version") {
    240         if (value != "1.0")
    241           return false;
    242       } else if (name == "max-age") {
    243         expiration = Time::Now() + TimeDelta::FromSeconds(StringToInt64(value));
    244       } else if (name == "port") {
    245         int port = StringToInt(value);
    246         if (port >= 0)
    247           ports.insert(port);
    248       }
    249     }
    250 
    251     if (line_end >= header_end)
    252       break;
    253     line_start = line_end + 1;
    254   }
    255 
    256   if (!Dictionary::CanSet(domain, path, ports, dictionary_url))
    257     return false;
    258 
    259   // TODO(jar): Remove these hacks to preclude a DOS attack involving piles of
    260   // useless dictionaries.  We should probably have a cache eviction plan,
    261   // instead of just blocking additions.  For now, with the spec in flux, it
    262   // is probably not worth doing eviction handling.
    263   if (kMaxDictionarySize < dictionary_text.size()) {
    264     SdchErrorRecovery(DICTIONARY_IS_TOO_LARGE);
    265     return false;
    266   }
    267   if (kMaxDictionaryCount <= dictionaries_.size()) {
    268     SdchErrorRecovery(DICTIONARY_COUNT_EXCEEDED);
    269     return false;
    270   }
    271 
    272   UMA_HISTOGRAM_COUNTS("Sdch3.Dictionary size loaded", dictionary_text.size());
    273   DLOG(INFO) << "Loaded dictionary with client hash " << client_hash <<
    274       " and server hash " << server_hash;
    275   Dictionary* dictionary =
    276       new Dictionary(dictionary_text, header_end + 2, client_hash,
    277                      dictionary_url, domain, path, expiration, ports);
    278   dictionary->AddRef();
    279   dictionaries_[server_hash] = dictionary;
    280   return true;
    281 }
    282 
    283 void SdchManager::GetVcdiffDictionary(const std::string& server_hash,
    284     const GURL& referring_url, Dictionary** dictionary) {
    285   *dictionary = NULL;
    286   DictionaryMap::iterator it = dictionaries_.find(server_hash);
    287   if (it == dictionaries_.end()) {
    288     return;
    289   }
    290   Dictionary* matching_dictionary = it->second;
    291   if (!matching_dictionary->CanUse(referring_url))
    292     return;
    293   *dictionary = matching_dictionary;
    294 }
    295 
    296 // TODO(jar): If we have evictions from the dictionaries_, then we need to
    297 // change this interface to return a list of reference counted Dictionary
    298 // instances that can be used if/when a server specifies one.
    299 void SdchManager::GetAvailDictionaryList(const GURL& target_url,
    300                                          std::string* list) {
    301   int count = 0;
    302   for (DictionaryMap::iterator it = dictionaries_.begin();
    303        it != dictionaries_.end(); ++it) {
    304     if (!it->second->CanAdvertise(target_url))
    305       continue;
    306     ++count;
    307     if (!list->empty())
    308       list->append(",");
    309     list->append(it->second->client_hash());
    310   }
    311   // Watch to see if we have corrupt or numerous dictionaries.
    312   if (count > 0)
    313     UMA_HISTOGRAM_COUNTS("Sdch3.Advertisement_Count", count);
    314 }
    315 
    316 SdchManager::Dictionary::Dictionary(const std::string& dictionary_text,
    317     size_t offset, const std::string& client_hash, const GURL& gurl,
    318     const std::string& domain, const std::string& path, const Time& expiration,
    319     const std::set<int> ports)
    320       : text_(dictionary_text, offset),
    321         client_hash_(client_hash),
    322         url_(gurl),
    323         domain_(domain),
    324         path_(path),
    325         expiration_(expiration),
    326         ports_(ports) {
    327 }
    328 
    329 // static
    330 void SdchManager::GenerateHash(const std::string& dictionary_text,
    331     std::string* client_hash, std::string* server_hash) {
    332   char binary_hash[32];
    333   base::SHA256HashString(dictionary_text, binary_hash, sizeof(binary_hash));
    334 
    335   std::string first_48_bits(&binary_hash[0], 6);
    336   std::string second_48_bits(&binary_hash[6], 6);
    337   UrlSafeBase64Encode(first_48_bits, client_hash);
    338   UrlSafeBase64Encode(second_48_bits, server_hash);
    339 
    340   DCHECK_EQ(server_hash->length(), 8u);
    341   DCHECK_EQ(client_hash->length(), 8u);
    342 }
    343 
    344 // static
    345 void SdchManager::UrlSafeBase64Encode(const std::string& input,
    346                                       std::string* output) {
    347   // Since this is only done during a dictionary load, and hashes are only 8
    348   // characters, we just do the simple fixup, rather than rewriting the encoder.
    349   base::Base64Encode(input, output);
    350   for (size_t i = 0; i < output->size(); ++i) {
    351     switch (output->data()[i]) {
    352       case '+':
    353         (*output)[i] = '-';
    354         continue;
    355       case '/':
    356         (*output)[i] = '_';
    357         continue;
    358       default:
    359         continue;
    360     }
    361   }
    362 }
    363 
    364 //------------------------------------------------------------------------------
    365 // Security functions restricting loads and use of dictionaries.
    366 
    367 // static
    368 bool SdchManager::Dictionary::CanSet(const std::string& domain,
    369                                      const std::string& path,
    370                                      const std::set<int> ports,
    371                                      const GURL& dictionary_url) {
    372   if (!SdchManager::Global()->IsInSupportedDomain(dictionary_url))
    373     return false;
    374   /*
    375   A dictionary is invalid and must not be stored if any of the following are
    376   true:
    377     1. The dictionary has no Domain attribute.
    378     2. The effective host name that derives from the referer URL host name does
    379       not domain-match the Domain attribute.
    380     3. The Domain attribute is a top level domain.
    381     4. The referer URL host is a host domain name (not IP address) and has the
    382       form HD, where D is the value of the Domain attribute, and H is a string
    383       that contains one or more dots.
    384     5. If the dictionary has a Port attribute and the referer URL's port was not
    385       in the list.
    386   */
    387 
    388   // TODO(jar): Redirects in dictionary fetches might plausibly be problematic,
    389   // and hence the conservative approach is to not allow any redirects (if there
    390   // were any... then don't allow the dictionary to be set).
    391 
    392   if (domain.empty()) {
    393     SdchErrorRecovery(DICTIONARY_MISSING_DOMAIN_SPECIFIER);
    394     return false;  // Domain is required.
    395   }
    396   if (net::RegistryControlledDomainService::GetDomainAndRegistry(domain).size()
    397       == 0) {
    398     SdchErrorRecovery(DICTIONARY_SPECIFIES_TOP_LEVEL_DOMAIN);
    399     return false;  // domain was a TLD.
    400   }
    401   if (!Dictionary::DomainMatch(dictionary_url, domain)) {
    402     SdchErrorRecovery(DICTIONARY_DOMAIN_NOT_MATCHING_SOURCE_URL);
    403     return false;
    404   }
    405 
    406   std::string referrer_url_host = dictionary_url.host();
    407   size_t postfix_domain_index = referrer_url_host.rfind(domain);
    408   // See if it is indeed a postfix, or just an internal string.
    409   if (referrer_url_host.size() == postfix_domain_index + domain.size()) {
    410     // It is a postfix... so check to see if there's a dot in the prefix.
    411     size_t end_of_host_index = referrer_url_host.find_first_of('.');
    412     if (referrer_url_host.npos != end_of_host_index  &&
    413         end_of_host_index < postfix_domain_index) {
    414       SdchErrorRecovery(DICTIONARY_REFERER_URL_HAS_DOT_IN_PREFIX);
    415       return false;
    416     }
    417   }
    418 
    419   if (!ports.empty()
    420       && 0 == ports.count(dictionary_url.EffectiveIntPort())) {
    421     SdchErrorRecovery(DICTIONARY_PORT_NOT_MATCHING_SOURCE_URL);
    422     return false;
    423   }
    424   return true;
    425 }
    426 
    427 // static
    428 bool SdchManager::Dictionary::CanUse(const GURL& referring_url) {
    429   if (!SdchManager::Global()->IsInSupportedDomain(referring_url))
    430     return false;
    431   /*
    432     1. The request URL's host name domain-matches the Domain attribute of the
    433       dictionary.
    434     2. If the dictionary has a Port attribute, the request port is one of the
    435       ports listed in the Port attribute.
    436     3. The request URL path-matches the path attribute of the dictionary.
    437     4. The request is not an HTTPS request.
    438 */
    439   if (!DomainMatch(referring_url, domain_)) {
    440     SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_DOMAIN);
    441     return false;
    442   }
    443   if (!ports_.empty()
    444       && 0 == ports_.count(referring_url.EffectiveIntPort())) {
    445     SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_PORT_LIST);
    446     return false;
    447   }
    448   if (path_.size() && !PathMatch(referring_url.path(), path_)) {
    449     SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_PATH);
    450     return false;
    451   }
    452   if (referring_url.SchemeIsSecure()) {
    453     SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_SCHEME);
    454     return false;
    455   }
    456 
    457   // TODO(jar): Remove overly restrictive failsafe test (added per security
    458   // review) when we have a need to be more general.
    459   if (!referring_url.SchemeIs("http")) {
    460     SdchErrorRecovery(ATTEMPT_TO_DECODE_NON_HTTP_DATA);
    461     return false;
    462   }
    463 
    464   return true;
    465 }
    466 
    467 bool SdchManager::Dictionary::CanAdvertise(const GURL& target_url) {
    468   if (!SdchManager::Global()->IsInSupportedDomain(target_url))
    469     return false;
    470   /* The specific rules of when a dictionary should be advertised in an
    471      Avail-Dictionary header are modeled after the rules for cookie scoping. The
    472      terms "domain-match" and "pathmatch" are defined in RFC 2965 [6]. A
    473      dictionary may be advertised in the Avail-Dictionaries header exactly when
    474      all of the following are true:
    475       1. The server's effective host name domain-matches the Domain attribute of
    476          the dictionary.
    477       2. If the dictionary has a Port attribute, the request port is one of the
    478          ports listed in the Port attribute.
    479       3. The request URI path-matches the path header of the dictionary.
    480       4. The request is not an HTTPS request.
    481     */
    482   if (!DomainMatch(target_url, domain_))
    483     return false;
    484   if (!ports_.empty() && 0 == ports_.count(target_url.EffectiveIntPort()))
    485     return false;
    486   if (path_.size() && !PathMatch(target_url.path(), path_))
    487     return false;
    488   if (target_url.SchemeIsSecure())
    489     return false;
    490   if (Time::Now() > expiration_)
    491     return false;
    492   return true;
    493 }
    494 
    495 bool SdchManager::Dictionary::PathMatch(const std::string& path,
    496                                         const std::string& restriction) {
    497   /*  Must be either:
    498   1. P2 is equal to P1
    499   2. P2 is a prefix of P1 and either the final character in P2 is "/" or the
    500       character following P2 in P1 is "/".
    501       */
    502   if (path == restriction)
    503     return true;
    504   size_t prefix_length = restriction.size();
    505   if (prefix_length > path.size())
    506     return false;  // Can't be a prefix.
    507   if (0 != path.compare(0, prefix_length, restriction))
    508     return false;
    509   return restriction[prefix_length - 1] == '/' || path[prefix_length] == '/';
    510 }
    511 
    512 // static
    513 bool SdchManager::Dictionary::DomainMatch(const GURL& gurl,
    514                                           const std::string& restriction) {
    515   // TODO(jar): This is not precisely a domain match definition.
    516   return gurl.DomainIs(restriction.data(), restriction.size());
    517 }
    518 
    519 //------------------------------------------------------------------------------
    520 // Methods for supporting latency experiments.
    521 
    522 bool SdchManager::AllowLatencyExperiment(const GURL& url) const {
    523   return allow_latency_experiment_.end() !=
    524       allow_latency_experiment_.find(url.host());
    525 }
    526 
    527 void SdchManager::SetAllowLatencyExperiment(const GURL& url, bool enable) {
    528   if (enable) {
    529     allow_latency_experiment_.insert(url.host());
    530     return;
    531   }
    532   ExperimentSet::iterator it = allow_latency_experiment_.find(url.host());
    533   if (allow_latency_experiment_.end() == it)
    534     return;  // It was already erased, or never allowed.
    535   SdchErrorRecovery(LATENCY_TEST_DISALLOWED);
    536   allow_latency_experiment_.erase(it);
    537 }
    538