Home | History | Annotate | Download | only in base
      1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "net/base/sdch_manager.h"
      6 
      7 #include "base/base64.h"
      8 #include "base/logging.h"
      9 #include "base/metrics/histogram.h"
     10 #include "base/string_number_conversions.h"
     11 #include "base/string_util.h"
     12 #include "crypto/sha2.h"
     13 #include "net/base/registry_controlled_domain.h"
     14 #include "net/url_request/url_request_http_job.h"
     15 
     16 namespace net {
     17 
     18 //------------------------------------------------------------------------------
     19 // static
     20 const size_t SdchManager::kMaxDictionarySize = 1000000;
     21 
     22 // static
     23 const size_t SdchManager::kMaxDictionaryCount = 20;
     24 
     25 // static
     26 SdchManager* SdchManager::global_;
     27 
     28 //------------------------------------------------------------------------------
     29 SdchManager::Dictionary::Dictionary(const std::string& dictionary_text,
     30                                     size_t offset,
     31                                     const std::string& client_hash,
     32                                     const GURL& gurl,
     33                                     const std::string& domain,
     34                                     const std::string& path,
     35                                     const base::Time& expiration,
     36                                     const std::set<int>& ports)
     37     : text_(dictionary_text, offset),
     38       client_hash_(client_hash),
     39       url_(gurl),
     40       domain_(domain),
     41       path_(path),
     42       expiration_(expiration),
     43       ports_(ports) {
     44 }
     45 
     46 SdchManager::Dictionary::~Dictionary() {
     47 }
     48 
     49 bool SdchManager::Dictionary::CanAdvertise(const GURL& target_url) {
     50   if (!SdchManager::Global()->IsInSupportedDomain(target_url))
     51     return false;
     52   /* The specific rules of when a dictionary should be advertised in an
     53      Avail-Dictionary header are modeled after the rules for cookie scoping. The
     54      terms "domain-match" and "pathmatch" are defined in RFC 2965 [6]. A
     55      dictionary may be advertised in the Avail-Dictionaries header exactly when
     56      all of the following are true:
     57       1. The server's effective host name domain-matches the Domain attribute of
     58          the dictionary.
     59       2. If the dictionary has a Port attribute, the request port is one of the
     60          ports listed in the Port attribute.
     61       3. The request URI path-matches the path header of the dictionary.
     62       4. The request is not an HTTPS request.
     63     */
     64   if (!DomainMatch(target_url, domain_))
     65     return false;
     66   if (!ports_.empty() && 0 == ports_.count(target_url.EffectiveIntPort()))
     67     return false;
     68   if (path_.size() && !PathMatch(target_url.path(), path_))
     69     return false;
     70   if (target_url.SchemeIsSecure())
     71     return false;
     72   if (base::Time::Now() > expiration_)
     73     return false;
     74   return true;
     75 }
     76 
     77 //------------------------------------------------------------------------------
     78 // Security functions restricting loads and use of dictionaries.
     79 
     80 // static
     81 bool SdchManager::Dictionary::CanSet(const std::string& domain,
     82                                      const std::string& path,
     83                                      const std::set<int>& ports,
     84                                      const GURL& dictionary_url) {
     85   if (!SdchManager::Global()->IsInSupportedDomain(dictionary_url))
     86     return false;
     87   /*
     88   A dictionary is invalid and must not be stored if any of the following are
     89   true:
     90     1. The dictionary has no Domain attribute.
     91     2. The effective host name that derives from the referer URL host name does
     92       not domain-match the Domain attribute.
     93     3. The Domain attribute is a top level domain.
     94     4. The referer URL host is a host domain name (not IP address) and has the
     95       form HD, where D is the value of the Domain attribute, and H is a string
     96       that contains one or more dots.
     97     5. If the dictionary has a Port attribute and the referer URL's port was not
     98       in the list.
     99   */
    100 
    101   // TODO(jar): Redirects in dictionary fetches might plausibly be problematic,
    102   // and hence the conservative approach is to not allow any redirects (if there
    103   // were any... then don't allow the dictionary to be set).
    104 
    105   if (domain.empty()) {
    106     SdchErrorRecovery(DICTIONARY_MISSING_DOMAIN_SPECIFIER);
    107     return false;  // Domain is required.
    108   }
    109   if (RegistryControlledDomainService::GetDomainAndRegistry(domain).size()
    110       == 0) {
    111     SdchErrorRecovery(DICTIONARY_SPECIFIES_TOP_LEVEL_DOMAIN);
    112     return false;  // domain was a TLD.
    113   }
    114   if (!Dictionary::DomainMatch(dictionary_url, domain)) {
    115     SdchErrorRecovery(DICTIONARY_DOMAIN_NOT_MATCHING_SOURCE_URL);
    116     return false;
    117   }
    118 
    119   std::string referrer_url_host = dictionary_url.host();
    120   size_t postfix_domain_index = referrer_url_host.rfind(domain);
    121   // See if it is indeed a postfix, or just an internal string.
    122   if (referrer_url_host.size() == postfix_domain_index + domain.size()) {
    123     // It is a postfix... so check to see if there's a dot in the prefix.
    124     size_t end_of_host_index = referrer_url_host.find_first_of('.');
    125     if (referrer_url_host.npos != end_of_host_index  &&
    126         end_of_host_index < postfix_domain_index) {
    127       SdchErrorRecovery(DICTIONARY_REFERER_URL_HAS_DOT_IN_PREFIX);
    128       return false;
    129     }
    130   }
    131 
    132   if (!ports.empty()
    133       && 0 == ports.count(dictionary_url.EffectiveIntPort())) {
    134     SdchErrorRecovery(DICTIONARY_PORT_NOT_MATCHING_SOURCE_URL);
    135     return false;
    136   }
    137   return true;
    138 }
    139 
    140 // static
    141 bool SdchManager::Dictionary::CanUse(const GURL& referring_url) {
    142   if (!SdchManager::Global()->IsInSupportedDomain(referring_url))
    143     return false;
    144   /*
    145     1. The request URL's host name domain-matches the Domain attribute of the
    146       dictionary.
    147     2. If the dictionary has a Port attribute, the request port is one of the
    148       ports listed in the Port attribute.
    149     3. The request URL path-matches the path attribute of the dictionary.
    150     4. The request is not an HTTPS request.
    151 */
    152   if (!DomainMatch(referring_url, domain_)) {
    153     SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_DOMAIN);
    154     return false;
    155   }
    156   if (!ports_.empty()
    157       && 0 == ports_.count(referring_url.EffectiveIntPort())) {
    158     SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_PORT_LIST);
    159     return false;
    160   }
    161   if (path_.size() && !PathMatch(referring_url.path(), path_)) {
    162     SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_PATH);
    163     return false;
    164   }
    165   if (referring_url.SchemeIsSecure()) {
    166     SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_SCHEME);
    167     return false;
    168   }
    169 
    170   // TODO(jar): Remove overly restrictive failsafe test (added per security
    171   // review) when we have a need to be more general.
    172   if (!referring_url.SchemeIs("http")) {
    173     SdchErrorRecovery(ATTEMPT_TO_DECODE_NON_HTTP_DATA);
    174     return false;
    175   }
    176 
    177   return true;
    178 }
    179 
    180 bool SdchManager::Dictionary::PathMatch(const std::string& path,
    181                                         const std::string& restriction) {
    182   /*  Must be either:
    183   1. P2 is equal to P1
    184   2. P2 is a prefix of P1 and either the final character in P2 is "/" or the
    185       character following P2 in P1 is "/".
    186       */
    187   if (path == restriction)
    188     return true;
    189   size_t prefix_length = restriction.size();
    190   if (prefix_length > path.size())
    191     return false;  // Can't be a prefix.
    192   if (0 != path.compare(0, prefix_length, restriction))
    193     return false;
    194   return restriction[prefix_length - 1] == '/' || path[prefix_length] == '/';
    195 }
    196 
    197 // static
    198 bool SdchManager::Dictionary::DomainMatch(const GURL& gurl,
    199                                           const std::string& restriction) {
    200   // TODO(jar): This is not precisely a domain match definition.
    201   return gurl.DomainIs(restriction.data(), restriction.size());
    202 }
    203 
    204 //------------------------------------------------------------------------------
    205 SdchManager::SdchManager() : sdch_enabled_(false) {
    206   DCHECK(!global_);
    207   global_ = this;
    208 }
    209 
    210 SdchManager::~SdchManager() {
    211   DCHECK(global_ == this);
    212   while (!dictionaries_.empty()) {
    213     DictionaryMap::iterator it = dictionaries_.begin();
    214     it->second->Release();
    215     dictionaries_.erase(it->first);
    216   }
    217   global_ = NULL;
    218 }
    219 
    220 // static
    221 void SdchManager::Shutdown() {
    222   if (!global_ )
    223     return;
    224   global_->fetcher_.reset(NULL);
    225 }
    226 
    227 // static
    228 SdchManager* SdchManager::Global() {
    229   return global_;
    230 }
    231 
    232 // static
    233 void SdchManager::SdchErrorRecovery(ProblemCodes problem) {
    234   UMA_HISTOGRAM_ENUMERATION("Sdch3.ProblemCodes_4", problem, MAX_PROBLEM_CODE);
    235 }
    236 
    237 void SdchManager::EnableSdchSupport(const std::string& domain) {
    238   // We presume that there is a SDCH manager instance.
    239   global_->supported_domain_ = domain;
    240   global_->sdch_enabled_ = true;
    241 }
    242 
    243 // static
    244 void SdchManager::BlacklistDomain(const GURL& url) {
    245   if (!global_ )
    246     return;
    247   global_->SetAllowLatencyExperiment(url, false);
    248 
    249   std::string domain(StringToLowerASCII(url.host()));
    250   int count = global_->blacklisted_domains_[domain];
    251   if (count > 0)
    252     return;  // Domain is already blacklisted.
    253 
    254   count = 1 + 2 * global_->exponential_blacklist_count[domain];
    255   if (count > 0)
    256     global_->exponential_blacklist_count[domain] = count;
    257   else
    258     count = INT_MAX;
    259 
    260   global_->blacklisted_domains_[domain] = count;
    261 }
    262 
    263 // static
    264 void SdchManager::BlacklistDomainForever(const GURL& url) {
    265   if (!global_ )
    266     return;
    267   global_->SetAllowLatencyExperiment(url, false);
    268 
    269   std::string domain(StringToLowerASCII(url.host()));
    270   global_->exponential_blacklist_count[domain] = INT_MAX;
    271   global_->blacklisted_domains_[domain] = INT_MAX;
    272 }
    273 
    274 // static
    275 void SdchManager::ClearBlacklistings() {
    276   Global()->blacklisted_domains_.clear();
    277   Global()->exponential_blacklist_count.clear();
    278 }
    279 
    280 // static
    281 void SdchManager::ClearDomainBlacklisting(const std::string& domain) {
    282   Global()->blacklisted_domains_.erase(StringToLowerASCII(domain));
    283 }
    284 
    285 // static
    286 int SdchManager::BlackListDomainCount(const std::string& domain) {
    287   if (Global()->blacklisted_domains_.end() ==
    288       Global()->blacklisted_domains_.find(domain))
    289     return 0;
    290   return Global()->blacklisted_domains_[StringToLowerASCII(domain)];
    291 }
    292 
    293 // static
    294 int SdchManager::BlacklistDomainExponential(const std::string& domain) {
    295   if (Global()->exponential_blacklist_count.end() ==
    296       Global()->exponential_blacklist_count.find(domain))
    297     return 0;
    298   return Global()->exponential_blacklist_count[StringToLowerASCII(domain)];
    299 }
    300 
    301 bool SdchManager::IsInSupportedDomain(const GURL& url) {
    302   if (!sdch_enabled_ )
    303     return false;
    304   if (!supported_domain_.empty() &&
    305       !url.DomainIs(supported_domain_.data(), supported_domain_.size()))
    306      return false;  // It is not the singular supported domain.
    307 
    308   if (blacklisted_domains_.empty())
    309     return true;
    310 
    311   std::string domain(StringToLowerASCII(url.host()));
    312   DomainCounter::iterator it = blacklisted_domains_.find(domain);
    313   if (blacklisted_domains_.end() == it)
    314     return true;
    315 
    316   int count = it->second - 1;
    317   if (count > 0)
    318     blacklisted_domains_[domain] = count;
    319   else
    320     blacklisted_domains_.erase(domain);
    321   SdchErrorRecovery(DOMAIN_BLACKLIST_INCLUDES_TARGET);
    322   return false;
    323 }
    324 
    325 void SdchManager::FetchDictionary(const GURL& request_url,
    326                                   const GURL& dictionary_url) {
    327   if (SdchManager::Global()->CanFetchDictionary(request_url, dictionary_url) &&
    328       fetcher_.get())
    329     fetcher_->Schedule(dictionary_url);
    330 }
    331 
    332 bool SdchManager::CanFetchDictionary(const GURL& referring_url,
    333                                      const GURL& dictionary_url) const {
    334   /* The user agent may retrieve a dictionary from the dictionary URL if all of
    335      the following are true:
    336        1 The dictionary URL host name matches the referrer URL host name
    337        2 The dictionary URL host name domain matches the parent domain of the
    338            referrer URL host name
    339        3 The parent domain of the referrer URL host name is not a top level
    340            domain
    341        4 The dictionary URL is not an HTTPS URL.
    342    */
    343   // Item (1) above implies item (2).  Spec should be updated.
    344   // I take "host name match" to be "is identical to"
    345   if (referring_url.host() != dictionary_url.host()) {
    346     SdchErrorRecovery(DICTIONARY_LOAD_ATTEMPT_FROM_DIFFERENT_HOST);
    347     return false;
    348   }
    349   if (referring_url.SchemeIs("https")) {
    350     SdchErrorRecovery(DICTIONARY_SELECTED_FOR_SSL);
    351     return false;
    352   }
    353 
    354   // TODO(jar): Remove this failsafe conservative hack which is more restrictive
    355   // than current SDCH spec when needed, and justified by security audit.
    356   if (!referring_url.SchemeIs("http")) {
    357     SdchErrorRecovery(DICTIONARY_SELECTED_FROM_NON_HTTP);
    358     return false;
    359   }
    360 
    361   return true;
    362 }
    363 
    364 bool SdchManager::AddSdchDictionary(const std::string& dictionary_text,
    365     const GURL& dictionary_url) {
    366   std::string client_hash;
    367   std::string server_hash;
    368   GenerateHash(dictionary_text, &client_hash, &server_hash);
    369   if (dictionaries_.find(server_hash) != dictionaries_.end()) {
    370     SdchErrorRecovery(DICTIONARY_ALREADY_LOADED);
    371     return false;  // Already loaded.
    372   }
    373 
    374   std::string domain, path;
    375   std::set<int> ports;
    376   base::Time expiration(base::Time::Now() + base::TimeDelta::FromDays(30));
    377 
    378   if (dictionary_text.empty()) {
    379     SdchErrorRecovery(DICTIONARY_HAS_NO_TEXT);
    380     return false;  // Missing header.
    381   }
    382 
    383   size_t header_end = dictionary_text.find("\n\n");
    384   if (std::string::npos == header_end) {
    385     SdchErrorRecovery(DICTIONARY_HAS_NO_HEADER);
    386     return false;  // Missing header.
    387   }
    388   size_t line_start = 0;  // Start of line being parsed.
    389   while (1) {
    390     size_t line_end = dictionary_text.find('\n', line_start);
    391     DCHECK(std::string::npos != line_end);
    392     DCHECK(line_end <= header_end);
    393 
    394     size_t colon_index = dictionary_text.find(':', line_start);
    395     if (std::string::npos == colon_index) {
    396       SdchErrorRecovery(DICTIONARY_HEADER_LINE_MISSING_COLON);
    397       return false;  // Illegal line missing a colon.
    398     }
    399 
    400     if (colon_index > line_end)
    401       break;
    402 
    403     size_t value_start = dictionary_text.find_first_not_of(" \t",
    404                                                            colon_index + 1);
    405     if (std::string::npos != value_start) {
    406       if (value_start >= line_end)
    407         break;
    408       std::string name(dictionary_text, line_start, colon_index - line_start);
    409       std::string value(dictionary_text, value_start, line_end - value_start);
    410       name = StringToLowerASCII(name);
    411       if (name == "domain") {
    412         domain = value;
    413       } else if (name == "path") {
    414         path = value;
    415       } else if (name == "format-version") {
    416         if (value != "1.0")
    417           return false;
    418       } else if (name == "max-age") {
    419         int64 seconds;
    420         base::StringToInt64(value, &seconds);
    421         expiration = base::Time::Now() + base::TimeDelta::FromSeconds(seconds);
    422       } else if (name == "port") {
    423         int port;
    424         base::StringToInt(value, &port);
    425         if (port >= 0)
    426           ports.insert(port);
    427       }
    428     }
    429 
    430     if (line_end >= header_end)
    431       break;
    432     line_start = line_end + 1;
    433   }
    434 
    435   if (!Dictionary::CanSet(domain, path, ports, dictionary_url))
    436     return false;
    437 
    438   // TODO(jar): Remove these hacks to preclude a DOS attack involving piles of
    439   // useless dictionaries.  We should probably have a cache eviction plan,
    440   // instead of just blocking additions.  For now, with the spec in flux, it
    441   // is probably not worth doing eviction handling.
    442   if (kMaxDictionarySize < dictionary_text.size()) {
    443     SdchErrorRecovery(DICTIONARY_IS_TOO_LARGE);
    444     return false;
    445   }
    446   if (kMaxDictionaryCount <= dictionaries_.size()) {
    447     SdchErrorRecovery(DICTIONARY_COUNT_EXCEEDED);
    448     return false;
    449   }
    450 
    451   UMA_HISTOGRAM_COUNTS("Sdch3.Dictionary size loaded", dictionary_text.size());
    452   DVLOG(1) << "Loaded dictionary with client hash " << client_hash
    453            << " and server hash " << server_hash;
    454   Dictionary* dictionary =
    455       new Dictionary(dictionary_text, header_end + 2, client_hash,
    456                      dictionary_url, domain, path, expiration, ports);
    457   dictionary->AddRef();
    458   dictionaries_[server_hash] = dictionary;
    459   return true;
    460 }
    461 
    462 void SdchManager::GetVcdiffDictionary(const std::string& server_hash,
    463     const GURL& referring_url, Dictionary** dictionary) {
    464   *dictionary = NULL;
    465   DictionaryMap::iterator it = dictionaries_.find(server_hash);
    466   if (it == dictionaries_.end()) {
    467     return;
    468   }
    469   Dictionary* matching_dictionary = it->second;
    470   if (!matching_dictionary->CanUse(referring_url))
    471     return;
    472   *dictionary = matching_dictionary;
    473 }
    474 
    475 // TODO(jar): If we have evictions from the dictionaries_, then we need to
    476 // change this interface to return a list of reference counted Dictionary
    477 // instances that can be used if/when a server specifies one.
    478 void SdchManager::GetAvailDictionaryList(const GURL& target_url,
    479                                          std::string* list) {
    480   int count = 0;
    481   for (DictionaryMap::iterator it = dictionaries_.begin();
    482        it != dictionaries_.end(); ++it) {
    483     if (!it->second->CanAdvertise(target_url))
    484       continue;
    485     ++count;
    486     if (!list->empty())
    487       list->append(",");
    488     list->append(it->second->client_hash());
    489   }
    490   // Watch to see if we have corrupt or numerous dictionaries.
    491   if (count > 0)
    492     UMA_HISTOGRAM_COUNTS("Sdch3.Advertisement_Count", count);
    493 }
    494 
    495 // static
    496 void SdchManager::GenerateHash(const std::string& dictionary_text,
    497     std::string* client_hash, std::string* server_hash) {
    498   char binary_hash[32];
    499   crypto::SHA256HashString(dictionary_text, binary_hash, sizeof(binary_hash));
    500 
    501   std::string first_48_bits(&binary_hash[0], 6);
    502   std::string second_48_bits(&binary_hash[6], 6);
    503   UrlSafeBase64Encode(first_48_bits, client_hash);
    504   UrlSafeBase64Encode(second_48_bits, server_hash);
    505 
    506   DCHECK_EQ(server_hash->length(), 8u);
    507   DCHECK_EQ(client_hash->length(), 8u);
    508 }
    509 
    510 //------------------------------------------------------------------------------
    511 // Methods for supporting latency experiments.
    512 
    513 bool SdchManager::AllowLatencyExperiment(const GURL& url) const {
    514   return allow_latency_experiment_.end() !=
    515       allow_latency_experiment_.find(url.host());
    516 }
    517 
    518 void SdchManager::SetAllowLatencyExperiment(const GURL& url, bool enable) {
    519   if (enable) {
    520     allow_latency_experiment_.insert(url.host());
    521     return;
    522   }
    523   ExperimentSet::iterator it = allow_latency_experiment_.find(url.host());
    524   if (allow_latency_experiment_.end() == it)
    525     return;  // It was already erased, or never allowed.
    526   SdchErrorRecovery(LATENCY_TEST_DISALLOWED);
    527   allow_latency_experiment_.erase(it);
    528 }
    529 
    530 // static
    531 void SdchManager::UrlSafeBase64Encode(const std::string& input,
    532                                       std::string* output) {
    533   // Since this is only done during a dictionary load, and hashes are only 8
    534   // characters, we just do the simple fixup, rather than rewriting the encoder.
    535   base::Base64Encode(input, output);
    536   for (size_t i = 0; i < output->size(); ++i) {
    537     switch (output->data()[i]) {
    538       case '+':
    539         (*output)[i] = '-';
    540         continue;
    541       case '/':
    542         (*output)[i] = '_';
    543         continue;
    544       default:
    545         continue;
    546     }
    547   }
    548 }
    549 
    550 }  // namespace net
    551