Home | History | Annotate | Download | only in cookies
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "net/cookies/cookie_util.h"
      6 
      7 #include <cstdio>
      8 #include <cstdlib>
      9 
     10 #include "base/logging.h"
     11 #include "base/strings/string_tokenizer.h"
     12 #include "base/strings/string_util.h"
     13 #include "build/build_config.h"
     14 #include "net/base/net_util.h"
     15 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
     16 #include "url/gurl.h"
     17 
     18 namespace net {
     19 namespace cookie_util {
     20 
     21 bool DomainIsHostOnly(const std::string& domain_string) {
     22   return (domain_string.empty() || domain_string[0] != '.');
     23 }
     24 
     25 std::string GetEffectiveDomain(const std::string& scheme,
     26                                const std::string& host) {
     27   if (scheme == "http" || scheme == "https") {
     28     return registry_controlled_domains::GetDomainAndRegistry(
     29         host,
     30         registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);
     31   }
     32 
     33   if (!DomainIsHostOnly(host))
     34     return host.substr(1);
     35   return host;
     36 }
     37 
     38 bool GetCookieDomainWithString(const GURL& url,
     39                                const std::string& domain_string,
     40                                std::string* result) {
     41   const std::string url_host(url.host());
     42 
     43   // If no domain was specified in the domain string, default to a host cookie.
     44   // We match IE/Firefox in allowing a domain=IPADDR if it matches the url
     45   // ip address hostname exactly.  It should be treated as a host cookie.
     46   if (domain_string.empty() ||
     47       (url.HostIsIPAddress() && url_host == domain_string)) {
     48     *result = url_host;
     49     DCHECK(DomainIsHostOnly(*result));
     50     return true;
     51   }
     52 
     53   // Get the normalized domain specified in cookie line.
     54   url::CanonHostInfo ignored;
     55   std::string cookie_domain(CanonicalizeHost(domain_string, &ignored));
     56   if (cookie_domain.empty())
     57     return false;
     58   if (cookie_domain[0] != '.')
     59     cookie_domain = "." + cookie_domain;
     60 
     61   // Ensure |url| and |cookie_domain| have the same domain+registry.
     62   const std::string url_scheme(url.scheme());
     63   const std::string url_domain_and_registry(
     64       GetEffectiveDomain(url_scheme, url_host));
     65   if (url_domain_and_registry.empty())
     66     return false;  // IP addresses/intranet hosts can't set domain cookies.
     67   const std::string cookie_domain_and_registry(
     68       GetEffectiveDomain(url_scheme, cookie_domain));
     69   if (url_domain_and_registry != cookie_domain_and_registry)
     70     return false;  // Can't set a cookie on a different domain + registry.
     71 
     72   // Ensure |url_host| is |cookie_domain| or one of its subdomains.  Given that
     73   // we know the domain+registry are the same from the above checks, this is
     74   // basically a simple string suffix check.
     75   const bool is_suffix = (url_host.length() < cookie_domain.length()) ?
     76       (cookie_domain != ("." + url_host)) :
     77       (url_host.compare(url_host.length() - cookie_domain.length(),
     78                         cookie_domain.length(), cookie_domain) != 0);
     79   if (is_suffix)
     80     return false;
     81 
     82   *result = cookie_domain;
     83   return true;
     84 }
     85 
     86 // Parse a cookie expiration time.  We try to be lenient, but we need to
     87 // assume some order to distinguish the fields.  The basic rules:
     88 //  - The month name must be present and prefix the first 3 letters of the
     89 //    full month name (jan for January, jun for June).
     90 //  - If the year is <= 2 digits, it must occur after the day of month.
     91 //  - The time must be of the format hh:mm:ss.
     92 // An average cookie expiration will look something like this:
     93 //   Sat, 15-Apr-17 21:01:22 GMT
     94 base::Time ParseCookieTime(const std::string& time_string) {
     95   static const char* kMonths[] = { "jan", "feb", "mar", "apr", "may", "jun",
     96                                    "jul", "aug", "sep", "oct", "nov", "dec" };
     97   static const int kMonthsLen = arraysize(kMonths);
     98   // We want to be pretty liberal, and support most non-ascii and non-digit
     99   // characters as a delimiter.  We can't treat : as a delimiter, because it
    100   // is the delimiter for hh:mm:ss, and we want to keep this field together.
    101   // We make sure to include - and +, since they could prefix numbers.
    102   // If the cookie attribute came in in quotes (ex expires="XXX"), the quotes
    103   // will be preserved, and we will get them here.  So we make sure to include
    104   // quote characters, and also \ for anything that was internally escaped.
    105   static const char* kDelimiters = "\t !\"#$%&'()*+,-./;<=>?@[\\]^_`{|}~";
    106 
    107   base::Time::Exploded exploded = {0};
    108 
    109   base::StringTokenizer tokenizer(time_string, kDelimiters);
    110 
    111   bool found_day_of_month = false;
    112   bool found_month = false;
    113   bool found_time = false;
    114   bool found_year = false;
    115 
    116   while (tokenizer.GetNext()) {
    117     const std::string token = tokenizer.token();
    118     DCHECK(!token.empty());
    119     bool numerical = IsAsciiDigit(token[0]);
    120 
    121     // String field
    122     if (!numerical) {
    123       if (!found_month) {
    124         for (int i = 0; i < kMonthsLen; ++i) {
    125           // Match prefix, so we could match January, etc
    126           if (base::strncasecmp(token.c_str(), kMonths[i], 3) == 0) {
    127             exploded.month = i + 1;
    128             found_month = true;
    129             break;
    130           }
    131         }
    132       } else {
    133         // If we've gotten here, it means we've already found and parsed our
    134         // month, and we have another string, which we would expect to be the
    135         // the time zone name.  According to the RFC and my experiments with
    136         // how sites format their expirations, we don't have much of a reason
    137         // to support timezones.  We don't want to ever barf on user input,
    138         // but this DCHECK should pass for well-formed data.
    139         // DCHECK(token == "GMT");
    140       }
    141     // Numeric field w/ a colon
    142     } else if (token.find(':') != std::string::npos) {
    143       if (!found_time &&
    144 #ifdef COMPILER_MSVC
    145           sscanf_s(
    146 #else
    147           sscanf(
    148 #endif
    149                  token.c_str(), "%2u:%2u:%2u", &exploded.hour,
    150                  &exploded.minute, &exploded.second) == 3) {
    151         found_time = true;
    152       } else {
    153         // We should only ever encounter one time-like thing.  If we're here,
    154         // it means we've found a second, which shouldn't happen.  We keep
    155         // the first.  This check should be ok for well-formed input:
    156         // NOTREACHED();
    157       }
    158     // Numeric field
    159     } else {
    160       // Overflow with atoi() is unspecified, so we enforce a max length.
    161       if (!found_day_of_month && token.length() <= 2) {
    162         exploded.day_of_month = atoi(token.c_str());
    163         found_day_of_month = true;
    164       } else if (!found_year && token.length() <= 5) {
    165         exploded.year = atoi(token.c_str());
    166         found_year = true;
    167       } else {
    168         // If we're here, it means we've either found an extra numeric field,
    169         // or a numeric field which was too long.  For well-formed input, the
    170         // following check would be reasonable:
    171         // NOTREACHED();
    172       }
    173     }
    174   }
    175 
    176   if (!found_day_of_month || !found_month || !found_time || !found_year) {
    177     // We didn't find all of the fields we need.  For well-formed input, the
    178     // following check would be reasonable:
    179     // NOTREACHED() << "Cookie parse expiration failed: " << time_string;
    180     return base::Time();
    181   }
    182 
    183   // Normalize the year to expand abbreviated years to the full year.
    184   if (exploded.year >= 69 && exploded.year <= 99)
    185     exploded.year += 1900;
    186   if (exploded.year >= 0 && exploded.year <= 68)
    187     exploded.year += 2000;
    188 
    189   // If our values are within their correct ranges, we got our time.
    190   if (exploded.day_of_month >= 1 && exploded.day_of_month <= 31 &&
    191       exploded.month >= 1 && exploded.month <= 12 &&
    192       exploded.year >= 1601 && exploded.year <= 30827 &&
    193       exploded.hour <= 23 && exploded.minute <= 59 && exploded.second <= 59) {
    194     return base::Time::FromUTCExploded(exploded);
    195   }
    196 
    197   // One of our values was out of expected range.  For well-formed input,
    198   // the following check would be reasonable:
    199   // NOTREACHED() << "Cookie exploded expiration failed: " << time_string;
    200 
    201   return base::Time();
    202 }
    203 
    204 GURL CookieOriginToURL(const std::string& domain, bool is_https) {
    205   if (domain.empty())
    206     return GURL();
    207 
    208   const std::string scheme = is_https ? "https" : "http";
    209   const std::string host = domain[0] == '.' ? domain.substr(1) : domain;
    210   return GURL(scheme + "://" + host);
    211 }
    212 
    213 }  // namespace cookie_utils
    214 }  // namespace net
    215 
    216