Home | History | Annotate | Download | only in base
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "net/base/sdch_manager.h"
      6 
      7 #include "base/base64.h"
      8 #include "base/logging.h"
      9 #include "base/metrics/histogram.h"
     10 #include "base/strings/string_number_conversions.h"
     11 #include "base/strings/string_util.h"
     12 #include "crypto/sha2.h"
     13 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
     14 #include "net/url_request/url_request_http_job.h"
     15 
     16 namespace net {
     17 
     18 //------------------------------------------------------------------------------
     19 // static
     20 
     21 // Adjust SDCH limits downwards for mobile.
     22 #if defined(OS_ANDROID) || defined(OS_IOS)
     23 // static
     24 const size_t SdchManager::kMaxDictionaryCount = 1;
     25 const size_t SdchManager::kMaxDictionarySize = 150 * 1000;
     26 #else
     27 // static
     28 const size_t SdchManager::kMaxDictionaryCount = 20;
     29 const size_t SdchManager::kMaxDictionarySize = 1000 * 1000;
     30 #endif
     31 
     32 // static
     33 bool SdchManager::g_sdch_enabled_ = true;
     34 
     35 // static
     36 bool SdchManager::g_secure_scheme_supported_ = false;
     37 
     38 //------------------------------------------------------------------------------
     39 SdchManager::Dictionary::Dictionary(const std::string& dictionary_text,
     40                                     size_t offset,
     41                                     const std::string& client_hash,
     42                                     const GURL& gurl,
     43                                     const std::string& domain,
     44                                     const std::string& path,
     45                                     const base::Time& expiration,
     46                                     const std::set<int>& ports)
     47     : text_(dictionary_text, offset),
     48       client_hash_(client_hash),
     49       url_(gurl),
     50       domain_(domain),
     51       path_(path),
     52       expiration_(expiration),
     53       ports_(ports) {
     54 }
     55 
     56 SdchManager::Dictionary::~Dictionary() {
     57 }
     58 
     59 bool SdchManager::Dictionary::CanAdvertise(const GURL& target_url) {
     60   /* The specific rules of when a dictionary should be advertised in an
     61      Avail-Dictionary header are modeled after the rules for cookie scoping. The
     62      terms "domain-match" and "pathmatch" are defined in RFC 2965 [6]. A
     63      dictionary may be advertised in the Avail-Dictionaries header exactly when
     64      all of the following are true:
     65       1. The server's effective host name domain-matches the Domain attribute of
     66          the dictionary.
     67       2. If the dictionary has a Port attribute, the request port is one of the
     68          ports listed in the Port attribute.
     69       3. The request URI path-matches the path header of the dictionary.
     70       4. The request is not an HTTPS request.
     71      We can override (ignore) item (4) only when we have explicitly enabled
     72      HTTPS support AND dictionary has been acquired over HTTPS.
     73     */
     74   if (!DomainMatch(target_url, domain_))
     75     return false;
     76   if (!ports_.empty() && 0 == ports_.count(target_url.EffectiveIntPort()))
     77     return false;
     78   if (path_.size() && !PathMatch(target_url.path(), path_))
     79     return false;
     80   if (!SdchManager::secure_scheme_supported() && target_url.SchemeIsSecure())
     81     return false;
     82   if (target_url.SchemeIsSecure() && !url_.SchemeIsSecure())
     83     return false;
     84   if (base::Time::Now() > expiration_)
     85     return false;
     86   return true;
     87 }
     88 
     89 //------------------------------------------------------------------------------
     90 // Security functions restricting loads and use of dictionaries.
     91 
     92 // static
     93 bool SdchManager::Dictionary::CanSet(const std::string& domain,
     94                                      const std::string& path,
     95                                      const std::set<int>& ports,
     96                                      const GURL& dictionary_url) {
     97   /*
     98   A dictionary is invalid and must not be stored if any of the following are
     99   true:
    100     1. The dictionary has no Domain attribute.
    101     2. The effective host name that derives from the referer URL host name does
    102       not domain-match the Domain attribute.
    103     3. The Domain attribute is a top level domain.
    104     4. The referer URL host is a host domain name (not IP address) and has the
    105       form HD, where D is the value of the Domain attribute, and H is a string
    106       that contains one or more dots.
    107     5. If the dictionary has a Port attribute and the referer URL's port was not
    108       in the list.
    109   */
    110 
    111   // TODO(jar): Redirects in dictionary fetches might plausibly be problematic,
    112   // and hence the conservative approach is to not allow any redirects (if there
    113   // were any... then don't allow the dictionary to be set).
    114 
    115   if (domain.empty()) {
    116     SdchErrorRecovery(DICTIONARY_MISSING_DOMAIN_SPECIFIER);
    117     return false;  // Domain is required.
    118   }
    119   if (registry_controlled_domains::GetDomainAndRegistry(
    120         domain,
    121         registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES).empty()) {
    122     SdchErrorRecovery(DICTIONARY_SPECIFIES_TOP_LEVEL_DOMAIN);
    123     return false;  // domain was a TLD.
    124   }
    125   if (!Dictionary::DomainMatch(dictionary_url, domain)) {
    126     SdchErrorRecovery(DICTIONARY_DOMAIN_NOT_MATCHING_SOURCE_URL);
    127     return false;
    128   }
    129 
    130   std::string referrer_url_host = dictionary_url.host();
    131   size_t postfix_domain_index = referrer_url_host.rfind(domain);
    132   // See if it is indeed a postfix, or just an internal string.
    133   if (referrer_url_host.size() == postfix_domain_index + domain.size()) {
    134     // It is a postfix... so check to see if there's a dot in the prefix.
    135     size_t end_of_host_index = referrer_url_host.find_first_of('.');
    136     if (referrer_url_host.npos != end_of_host_index  &&
    137         end_of_host_index < postfix_domain_index) {
    138       SdchErrorRecovery(DICTIONARY_REFERER_URL_HAS_DOT_IN_PREFIX);
    139       return false;
    140     }
    141   }
    142 
    143   if (!ports.empty()
    144       && 0 == ports.count(dictionary_url.EffectiveIntPort())) {
    145     SdchErrorRecovery(DICTIONARY_PORT_NOT_MATCHING_SOURCE_URL);
    146     return false;
    147   }
    148   return true;
    149 }
    150 
    151 // static
    152 bool SdchManager::Dictionary::CanUse(const GURL& referring_url) {
    153   /*
    154     1. The request URL's host name domain-matches the Domain attribute of the
    155       dictionary.
    156     2. If the dictionary has a Port attribute, the request port is one of the
    157       ports listed in the Port attribute.
    158     3. The request URL path-matches the path attribute of the dictionary.
    159     4. The request is not an HTTPS request.
    160     We can override (ignore) item (4) only when we have explicitly enabled
    161     HTTPS support AND dictionary has been acquired over HTTPS.
    162 */
    163   if (!DomainMatch(referring_url, domain_)) {
    164     SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_DOMAIN);
    165     return false;
    166   }
    167   if (!ports_.empty()
    168       && 0 == ports_.count(referring_url.EffectiveIntPort())) {
    169     SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_PORT_LIST);
    170     return false;
    171   }
    172   if (path_.size() && !PathMatch(referring_url.path(), path_)) {
    173     SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_PATH);
    174     return false;
    175   }
    176   if (!SdchManager::secure_scheme_supported() &&
    177       referring_url.SchemeIsSecure()) {
    178     SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_SCHEME);
    179     return false;
    180   }
    181   if (referring_url.SchemeIsSecure() && !url_.SchemeIsSecure()) {
    182     SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_SCHEME);
    183     return false;
    184   }
    185 
    186   // TODO(jar): Remove overly restrictive failsafe test (added per security
    187   // review) when we have a need to be more general.
    188   if (!referring_url.SchemeIsHTTPOrHTTPS()) {
    189     SdchErrorRecovery(ATTEMPT_TO_DECODE_NON_HTTP_DATA);
    190     return false;
    191   }
    192 
    193   return true;
    194 }
    195 
    196 bool SdchManager::Dictionary::PathMatch(const std::string& path,
    197                                         const std::string& restriction) {
    198   /*  Must be either:
    199   1. P2 is equal to P1
    200   2. P2 is a prefix of P1 and either the final character in P2 is "/" or the
    201       character following P2 in P1 is "/".
    202       */
    203   if (path == restriction)
    204     return true;
    205   size_t prefix_length = restriction.size();
    206   if (prefix_length > path.size())
    207     return false;  // Can't be a prefix.
    208   if (0 != path.compare(0, prefix_length, restriction))
    209     return false;
    210   return restriction[prefix_length - 1] == '/' || path[prefix_length] == '/';
    211 }
    212 
    213 // static
    214 bool SdchManager::Dictionary::DomainMatch(const GURL& gurl,
    215                                           const std::string& restriction) {
    216   // TODO(jar): This is not precisely a domain match definition.
    217   return gurl.DomainIs(restriction.data(), restriction.size());
    218 }
    219 
    220 //------------------------------------------------------------------------------
    221 SdchManager::SdchManager() {
    222   DCHECK(CalledOnValidThread());
    223 }
    224 
    225 SdchManager::~SdchManager() {
    226   DCHECK(CalledOnValidThread());
    227   while (!dictionaries_.empty()) {
    228     DictionaryMap::iterator it = dictionaries_.begin();
    229     dictionaries_.erase(it->first);
    230   }
    231 }
    232 
    233 void SdchManager::ClearData() {
    234   blacklisted_domains_.clear();
    235   exponential_blacklist_count_.clear();
    236   allow_latency_experiment_.clear();
    237   if (fetcher_.get())
    238     fetcher_->Cancel();
    239 
    240   // Note that this may result in not having dictionaries we've advertised
    241   // for incoming responses.  The window is relatively small (as ClearData()
    242   // is not expected to be called frequently), so we rely on meta-refresh
    243   // to handle this case.
    244   dictionaries_.clear();
    245 }
    246 
    247 // static
    248 void SdchManager::SdchErrorRecovery(ProblemCodes problem) {
    249   UMA_HISTOGRAM_ENUMERATION("Sdch3.ProblemCodes_4", problem, MAX_PROBLEM_CODE);
    250 }
    251 
    252 void SdchManager::set_sdch_fetcher(SdchFetcher* fetcher) {
    253   DCHECK(CalledOnValidThread());
    254   fetcher_.reset(fetcher);
    255 }
    256 
    257 // static
    258 void SdchManager::EnableSdchSupport(bool enabled) {
    259   g_sdch_enabled_ = enabled;
    260 }
    261 
    262 // static
    263 void SdchManager::EnableSecureSchemeSupport(bool enabled) {
    264   g_secure_scheme_supported_ = enabled;
    265 }
    266 
    267 void SdchManager::BlacklistDomain(const GURL& url) {
    268   SetAllowLatencyExperiment(url, false);
    269 
    270   std::string domain(StringToLowerASCII(url.host()));
    271   int count = blacklisted_domains_[domain];
    272   if (count > 0)
    273     return;  // Domain is already blacklisted.
    274 
    275   count = 1 + 2 * exponential_blacklist_count_[domain];
    276   if (count > 0)
    277     exponential_blacklist_count_[domain] = count;
    278   else
    279     count = INT_MAX;
    280 
    281   blacklisted_domains_[domain] = count;
    282 }
    283 
    284 void SdchManager::BlacklistDomainForever(const GURL& url) {
    285   SetAllowLatencyExperiment(url, false);
    286 
    287   std::string domain(StringToLowerASCII(url.host()));
    288   exponential_blacklist_count_[domain] = INT_MAX;
    289   blacklisted_domains_[domain] = INT_MAX;
    290 }
    291 
    292 void SdchManager::ClearBlacklistings() {
    293   blacklisted_domains_.clear();
    294   exponential_blacklist_count_.clear();
    295 }
    296 
    297 void SdchManager::ClearDomainBlacklisting(const std::string& domain) {
    298   blacklisted_domains_.erase(StringToLowerASCII(domain));
    299 }
    300 
    301 int SdchManager::BlackListDomainCount(const std::string& domain) {
    302   if (blacklisted_domains_.end() == blacklisted_domains_.find(domain))
    303     return 0;
    304   return blacklisted_domains_[StringToLowerASCII(domain)];
    305 }
    306 
    307 int SdchManager::BlacklistDomainExponential(const std::string& domain) {
    308   if (exponential_blacklist_count_.end() ==
    309       exponential_blacklist_count_.find(domain))
    310     return 0;
    311   return exponential_blacklist_count_[StringToLowerASCII(domain)];
    312 }
    313 
    314 bool SdchManager::IsInSupportedDomain(const GURL& url) {
    315   DCHECK(CalledOnValidThread());
    316   if (!g_sdch_enabled_ )
    317     return false;
    318 
    319   if (!secure_scheme_supported() && url.SchemeIsSecure())
    320     return false;
    321 
    322   if (blacklisted_domains_.empty())
    323     return true;
    324 
    325   std::string domain(StringToLowerASCII(url.host()));
    326   DomainCounter::iterator it = blacklisted_domains_.find(domain);
    327   if (blacklisted_domains_.end() == it)
    328     return true;
    329 
    330   int count = it->second - 1;
    331   if (count > 0)
    332     blacklisted_domains_[domain] = count;
    333   else
    334     blacklisted_domains_.erase(domain);
    335   SdchErrorRecovery(DOMAIN_BLACKLIST_INCLUDES_TARGET);
    336   return false;
    337 }
    338 
    339 void SdchManager::FetchDictionary(const GURL& request_url,
    340                                   const GURL& dictionary_url) {
    341   DCHECK(CalledOnValidThread());
    342   if (CanFetchDictionary(request_url, dictionary_url) && fetcher_.get())
    343     fetcher_->Schedule(dictionary_url);
    344 }
    345 
    346 bool SdchManager::CanFetchDictionary(const GURL& referring_url,
    347                                      const GURL& dictionary_url) const {
    348   DCHECK(CalledOnValidThread());
    349   /* The user agent may retrieve a dictionary from the dictionary URL if all of
    350      the following are true:
    351        1 The dictionary URL host name matches the referrer URL host name and
    352            scheme.
    353        2 The dictionary URL host name domain matches the parent domain of the
    354            referrer URL host name
    355        3 The parent domain of the referrer URL host name is not a top level
    356            domain
    357        4 The dictionary URL is not an HTTPS URL.
    358    */
    359   // Item (1) above implies item (2).  Spec should be updated.
    360   // I take "host name match" to be "is identical to"
    361   if (referring_url.host() != dictionary_url.host() ||
    362       referring_url.scheme() != dictionary_url.scheme()) {
    363     SdchErrorRecovery(DICTIONARY_LOAD_ATTEMPT_FROM_DIFFERENT_HOST);
    364     return false;
    365   }
    366   if (!secure_scheme_supported() && referring_url.SchemeIsSecure()) {
    367     SdchErrorRecovery(DICTIONARY_SELECTED_FOR_SSL);
    368     return false;
    369   }
    370 
    371   // TODO(jar): Remove this failsafe conservative hack which is more restrictive
    372   // than current SDCH spec when needed, and justified by security audit.
    373   if (!referring_url.SchemeIsHTTPOrHTTPS()) {
    374     SdchErrorRecovery(DICTIONARY_SELECTED_FROM_NON_HTTP);
    375     return false;
    376   }
    377 
    378   return true;
    379 }
    380 
    381 bool SdchManager::AddSdchDictionary(const std::string& dictionary_text,
    382     const GURL& dictionary_url) {
    383   DCHECK(CalledOnValidThread());
    384   std::string client_hash;
    385   std::string server_hash;
    386   GenerateHash(dictionary_text, &client_hash, &server_hash);
    387   if (dictionaries_.find(server_hash) != dictionaries_.end()) {
    388     SdchErrorRecovery(DICTIONARY_ALREADY_LOADED);
    389     return false;  // Already loaded.
    390   }
    391 
    392   std::string domain, path;
    393   std::set<int> ports;
    394   base::Time expiration(base::Time::Now() + base::TimeDelta::FromDays(30));
    395 
    396   if (dictionary_text.empty()) {
    397     SdchErrorRecovery(DICTIONARY_HAS_NO_TEXT);
    398     return false;  // Missing header.
    399   }
    400 
    401   size_t header_end = dictionary_text.find("\n\n");
    402   if (std::string::npos == header_end) {
    403     SdchErrorRecovery(DICTIONARY_HAS_NO_HEADER);
    404     return false;  // Missing header.
    405   }
    406   size_t line_start = 0;  // Start of line being parsed.
    407   while (1) {
    408     size_t line_end = dictionary_text.find('\n', line_start);
    409     DCHECK(std::string::npos != line_end);
    410     DCHECK_LE(line_end, header_end);
    411 
    412     size_t colon_index = dictionary_text.find(':', line_start);
    413     if (std::string::npos == colon_index) {
    414       SdchErrorRecovery(DICTIONARY_HEADER_LINE_MISSING_COLON);
    415       return false;  // Illegal line missing a colon.
    416     }
    417 
    418     if (colon_index > line_end)
    419       break;
    420 
    421     size_t value_start = dictionary_text.find_first_not_of(" \t",
    422                                                            colon_index + 1);
    423     if (std::string::npos != value_start) {
    424       if (value_start >= line_end)
    425         break;
    426       std::string name(dictionary_text, line_start, colon_index - line_start);
    427       std::string value(dictionary_text, value_start, line_end - value_start);
    428       name = StringToLowerASCII(name);
    429       if (name == "domain") {
    430         domain = value;
    431       } else if (name == "path") {
    432         path = value;
    433       } else if (name == "format-version") {
    434         if (value != "1.0")
    435           return false;
    436       } else if (name == "max-age") {
    437         int64 seconds;
    438         base::StringToInt64(value, &seconds);
    439         expiration = base::Time::Now() + base::TimeDelta::FromSeconds(seconds);
    440       } else if (name == "port") {
    441         int port;
    442         base::StringToInt(value, &port);
    443         if (port >= 0)
    444           ports.insert(port);
    445       }
    446     }
    447 
    448     if (line_end >= header_end)
    449       break;
    450     line_start = line_end + 1;
    451   }
    452 
    453   if (!IsInSupportedDomain(dictionary_url))
    454     return false;
    455 
    456   if (!Dictionary::CanSet(domain, path, ports, dictionary_url))
    457     return false;
    458 
    459   // TODO(jar): Remove these hacks to preclude a DOS attack involving piles of
    460   // useless dictionaries.  We should probably have a cache eviction plan,
    461   // instead of just blocking additions.  For now, with the spec in flux, it
    462   // is probably not worth doing eviction handling.
    463   if (kMaxDictionarySize < dictionary_text.size()) {
    464     SdchErrorRecovery(DICTIONARY_IS_TOO_LARGE);
    465     return false;
    466   }
    467   if (kMaxDictionaryCount <= dictionaries_.size()) {
    468     SdchErrorRecovery(DICTIONARY_COUNT_EXCEEDED);
    469     return false;
    470   }
    471 
    472   UMA_HISTOGRAM_COUNTS("Sdch3.Dictionary size loaded", dictionary_text.size());
    473   DVLOG(1) << "Loaded dictionary with client hash " << client_hash
    474            << " and server hash " << server_hash;
    475   Dictionary* dictionary =
    476       new Dictionary(dictionary_text, header_end + 2, client_hash,
    477                      dictionary_url, domain, path, expiration, ports);
    478   dictionaries_[server_hash] = dictionary;
    479   return true;
    480 }
    481 
    482 void SdchManager::GetVcdiffDictionary(
    483     const std::string& server_hash,
    484     const GURL& referring_url,
    485     scoped_refptr<Dictionary>* dictionary) {
    486   DCHECK(CalledOnValidThread());
    487   *dictionary = NULL;
    488   DictionaryMap::iterator it = dictionaries_.find(server_hash);
    489   if (it == dictionaries_.end()) {
    490     return;
    491   }
    492   scoped_refptr<Dictionary> matching_dictionary = it->second;
    493   if (!IsInSupportedDomain(referring_url))
    494     return;
    495   if (!matching_dictionary->CanUse(referring_url))
    496     return;
    497   *dictionary = matching_dictionary;
    498 }
    499 
    500 // TODO(jar): If we have evictions from the dictionaries_, then we need to
    501 // change this interface to return a list of reference counted Dictionary
    502 // instances that can be used if/when a server specifies one.
    503 void SdchManager::GetAvailDictionaryList(const GURL& target_url,
    504                                          std::string* list) {
    505   DCHECK(CalledOnValidThread());
    506   int count = 0;
    507   for (DictionaryMap::iterator it = dictionaries_.begin();
    508        it != dictionaries_.end(); ++it) {
    509     if (!IsInSupportedDomain(target_url))
    510       continue;
    511     if (!it->second->CanAdvertise(target_url))
    512       continue;
    513     ++count;
    514     if (!list->empty())
    515       list->append(",");
    516     list->append(it->second->client_hash());
    517   }
    518   // Watch to see if we have corrupt or numerous dictionaries.
    519   if (count > 0)
    520     UMA_HISTOGRAM_COUNTS("Sdch3.Advertisement_Count", count);
    521 }
    522 
    523 // static
    524 void SdchManager::GenerateHash(const std::string& dictionary_text,
    525     std::string* client_hash, std::string* server_hash) {
    526   char binary_hash[32];
    527   crypto::SHA256HashString(dictionary_text, binary_hash, sizeof(binary_hash));
    528 
    529   std::string first_48_bits(&binary_hash[0], 6);
    530   std::string second_48_bits(&binary_hash[6], 6);
    531   UrlSafeBase64Encode(first_48_bits, client_hash);
    532   UrlSafeBase64Encode(second_48_bits, server_hash);
    533 
    534   DCHECK_EQ(server_hash->length(), 8u);
    535   DCHECK_EQ(client_hash->length(), 8u);
    536 }
    537 
    538 //------------------------------------------------------------------------------
    539 // Methods for supporting latency experiments.
    540 
    541 bool SdchManager::AllowLatencyExperiment(const GURL& url) const {
    542   DCHECK(CalledOnValidThread());
    543   return allow_latency_experiment_.end() !=
    544       allow_latency_experiment_.find(url.host());
    545 }
    546 
    547 void SdchManager::SetAllowLatencyExperiment(const GURL& url, bool enable) {
    548   DCHECK(CalledOnValidThread());
    549   if (enable) {
    550     allow_latency_experiment_.insert(url.host());
    551     return;
    552   }
    553   ExperimentSet::iterator it = allow_latency_experiment_.find(url.host());
    554   if (allow_latency_experiment_.end() == it)
    555     return;  // It was already erased, or never allowed.
    556   SdchErrorRecovery(LATENCY_TEST_DISALLOWED);
    557   allow_latency_experiment_.erase(it);
    558 }
    559 
    560 // static
    561 void SdchManager::UrlSafeBase64Encode(const std::string& input,
    562                                       std::string* output) {
    563   // Since this is only done during a dictionary load, and hashes are only 8
    564   // characters, we just do the simple fixup, rather than rewriting the encoder.
    565   base::Base64Encode(input, output);
    566   for (size_t i = 0; i < output->size(); ++i) {
    567     switch (output->data()[i]) {
    568       case '+':
    569         (*output)[i] = '-';
    570         continue;
    571       case '/':
    572         (*output)[i] = '_';
    573         continue;
    574       default:
    575         continue;
    576     }
    577   }
    578 }
    579 
    580 }  // namespace net
    581