1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "net/cookies/cookie_util.h" 6 7 #include <cstdio> 8 #include <cstdlib> 9 10 #include "base/logging.h" 11 #include "base/strings/string_tokenizer.h" 12 #include "base/strings/string_util.h" 13 #include "build/build_config.h" 14 #include "net/base/net_util.h" 15 #include "net/base/registry_controlled_domains/registry_controlled_domain.h" 16 #include "url/gurl.h" 17 18 namespace net { 19 namespace cookie_util { 20 21 bool DomainIsHostOnly(const std::string& domain_string) { 22 return (domain_string.empty() || domain_string[0] != '.'); 23 } 24 25 std::string GetEffectiveDomain(const std::string& scheme, 26 const std::string& host) { 27 if (scheme == "http" || scheme == "https") { 28 return registry_controlled_domains::GetDomainAndRegistry( 29 host, 30 registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES); 31 } 32 33 if (!DomainIsHostOnly(host)) 34 return host.substr(1); 35 return host; 36 } 37 38 bool GetCookieDomainWithString(const GURL& url, 39 const std::string& domain_string, 40 std::string* result) { 41 const std::string url_host(url.host()); 42 43 // If no domain was specified in the domain string, default to a host cookie. 44 // We match IE/Firefox in allowing a domain=IPADDR if it matches the url 45 // ip address hostname exactly. It should be treated as a host cookie. 46 if (domain_string.empty() || 47 (url.HostIsIPAddress() && url_host == domain_string)) { 48 *result = url_host; 49 DCHECK(DomainIsHostOnly(*result)); 50 return true; 51 } 52 53 // Get the normalized domain specified in cookie line. 54 url::CanonHostInfo ignored; 55 std::string cookie_domain(CanonicalizeHost(domain_string, &ignored)); 56 if (cookie_domain.empty()) 57 return false; 58 if (cookie_domain[0] != '.') 59 cookie_domain = "." + cookie_domain; 60 61 // Ensure |url| and |cookie_domain| have the same domain+registry. 62 const std::string url_scheme(url.scheme()); 63 const std::string url_domain_and_registry( 64 GetEffectiveDomain(url_scheme, url_host)); 65 if (url_domain_and_registry.empty()) 66 return false; // IP addresses/intranet hosts can't set domain cookies. 67 const std::string cookie_domain_and_registry( 68 GetEffectiveDomain(url_scheme, cookie_domain)); 69 if (url_domain_and_registry != cookie_domain_and_registry) 70 return false; // Can't set a cookie on a different domain + registry. 71 72 // Ensure |url_host| is |cookie_domain| or one of its subdomains. Given that 73 // we know the domain+registry are the same from the above checks, this is 74 // basically a simple string suffix check. 75 const bool is_suffix = (url_host.length() < cookie_domain.length()) ? 76 (cookie_domain != ("." + url_host)) : 77 (url_host.compare(url_host.length() - cookie_domain.length(), 78 cookie_domain.length(), cookie_domain) != 0); 79 if (is_suffix) 80 return false; 81 82 *result = cookie_domain; 83 return true; 84 } 85 86 // Parse a cookie expiration time. We try to be lenient, but we need to 87 // assume some order to distinguish the fields. The basic rules: 88 // - The month name must be present and prefix the first 3 letters of the 89 // full month name (jan for January, jun for June). 90 // - If the year is <= 2 digits, it must occur after the day of month. 91 // - The time must be of the format hh:mm:ss. 92 // An average cookie expiration will look something like this: 93 // Sat, 15-Apr-17 21:01:22 GMT 94 base::Time ParseCookieTime(const std::string& time_string) { 95 static const char* kMonths[] = { "jan", "feb", "mar", "apr", "may", "jun", 96 "jul", "aug", "sep", "oct", "nov", "dec" }; 97 static const int kMonthsLen = arraysize(kMonths); 98 // We want to be pretty liberal, and support most non-ascii and non-digit 99 // characters as a delimiter. We can't treat : as a delimiter, because it 100 // is the delimiter for hh:mm:ss, and we want to keep this field together. 101 // We make sure to include - and +, since they could prefix numbers. 102 // If the cookie attribute came in in quotes (ex expires="XXX"), the quotes 103 // will be preserved, and we will get them here. So we make sure to include 104 // quote characters, and also \ for anything that was internally escaped. 105 static const char* kDelimiters = "\t !\"#$%&'()*+,-./;<=>?@[\\]^_`{|}~"; 106 107 base::Time::Exploded exploded = {0}; 108 109 base::StringTokenizer tokenizer(time_string, kDelimiters); 110 111 bool found_day_of_month = false; 112 bool found_month = false; 113 bool found_time = false; 114 bool found_year = false; 115 116 while (tokenizer.GetNext()) { 117 const std::string token = tokenizer.token(); 118 DCHECK(!token.empty()); 119 bool numerical = IsAsciiDigit(token[0]); 120 121 // String field 122 if (!numerical) { 123 if (!found_month) { 124 for (int i = 0; i < kMonthsLen; ++i) { 125 // Match prefix, so we could match January, etc 126 if (base::strncasecmp(token.c_str(), kMonths[i], 3) == 0) { 127 exploded.month = i + 1; 128 found_month = true; 129 break; 130 } 131 } 132 } else { 133 // If we've gotten here, it means we've already found and parsed our 134 // month, and we have another string, which we would expect to be the 135 // the time zone name. According to the RFC and my experiments with 136 // how sites format their expirations, we don't have much of a reason 137 // to support timezones. We don't want to ever barf on user input, 138 // but this DCHECK should pass for well-formed data. 139 // DCHECK(token == "GMT"); 140 } 141 // Numeric field w/ a colon 142 } else if (token.find(':') != std::string::npos) { 143 if (!found_time && 144 #ifdef COMPILER_MSVC 145 sscanf_s( 146 #else 147 sscanf( 148 #endif 149 token.c_str(), "%2u:%2u:%2u", &exploded.hour, 150 &exploded.minute, &exploded.second) == 3) { 151 found_time = true; 152 } else { 153 // We should only ever encounter one time-like thing. If we're here, 154 // it means we've found a second, which shouldn't happen. We keep 155 // the first. This check should be ok for well-formed input: 156 // NOTREACHED(); 157 } 158 // Numeric field 159 } else { 160 // Overflow with atoi() is unspecified, so we enforce a max length. 161 if (!found_day_of_month && token.length() <= 2) { 162 exploded.day_of_month = atoi(token.c_str()); 163 found_day_of_month = true; 164 } else if (!found_year && token.length() <= 5) { 165 exploded.year = atoi(token.c_str()); 166 found_year = true; 167 } else { 168 // If we're here, it means we've either found an extra numeric field, 169 // or a numeric field which was too long. For well-formed input, the 170 // following check would be reasonable: 171 // NOTREACHED(); 172 } 173 } 174 } 175 176 if (!found_day_of_month || !found_month || !found_time || !found_year) { 177 // We didn't find all of the fields we need. For well-formed input, the 178 // following check would be reasonable: 179 // NOTREACHED() << "Cookie parse expiration failed: " << time_string; 180 return base::Time(); 181 } 182 183 // Normalize the year to expand abbreviated years to the full year. 184 if (exploded.year >= 69 && exploded.year <= 99) 185 exploded.year += 1900; 186 if (exploded.year >= 0 && exploded.year <= 68) 187 exploded.year += 2000; 188 189 // If our values are within their correct ranges, we got our time. 190 if (exploded.day_of_month >= 1 && exploded.day_of_month <= 31 && 191 exploded.month >= 1 && exploded.month <= 12 && 192 exploded.year >= 1601 && exploded.year <= 30827 && 193 exploded.hour <= 23 && exploded.minute <= 59 && exploded.second <= 59) { 194 return base::Time::FromUTCExploded(exploded); 195 } 196 197 // One of our values was out of expected range. For well-formed input, 198 // the following check would be reasonable: 199 // NOTREACHED() << "Cookie exploded expiration failed: " << time_string; 200 201 return base::Time(); 202 } 203 204 GURL CookieOriginToURL(const std::string& domain, bool is_https) { 205 if (domain.empty()) 206 return GURL(); 207 208 const std::string scheme = is_https ? "https" : "http"; 209 const std::string host = domain[0] == '.' ? domain.substr(1) : domain; 210 return GURL(scheme + "://" + host); 211 } 212 213 void ParseRequestCookieLine(const std::string& header_value, 214 ParsedRequestCookies* parsed_cookies) { 215 std::string::const_iterator i = header_value.begin(); 216 while (i != header_value.end()) { 217 // Here we are at the beginning of a cookie. 218 219 // Eat whitespace. 220 while (i != header_value.end() && *i == ' ') ++i; 221 if (i == header_value.end()) return; 222 223 // Find cookie name. 224 std::string::const_iterator cookie_name_beginning = i; 225 while (i != header_value.end() && *i != '=') ++i; 226 base::StringPiece cookie_name(cookie_name_beginning, i); 227 228 // Find cookie value. 229 base::StringPiece cookie_value; 230 // Cookies may have no value, in this case '=' may or may not be there. 231 if (i != header_value.end() && i + 1 != header_value.end()) { 232 ++i; // Skip '='. 233 std::string::const_iterator cookie_value_beginning = i; 234 if (*i == '"') { 235 ++i; // Skip '"'. 236 while (i != header_value.end() && *i != '"') ++i; 237 if (i == header_value.end()) return; 238 ++i; // Skip '"'. 239 cookie_value = base::StringPiece(cookie_value_beginning, i); 240 // i points to character after '"', potentially a ';'. 241 } else { 242 while (i != header_value.end() && *i != ';') ++i; 243 cookie_value = base::StringPiece(cookie_value_beginning, i); 244 // i points to ';' or end of string. 245 } 246 } 247 parsed_cookies->push_back(std::make_pair(cookie_name, cookie_value)); 248 // Eat ';'. 249 if (i != header_value.end()) ++i; 250 } 251 } 252 253 std::string SerializeRequestCookieLine( 254 const ParsedRequestCookies& parsed_cookies) { 255 std::string buffer; 256 for (ParsedRequestCookies::const_iterator i = parsed_cookies.begin(); 257 i != parsed_cookies.end(); ++i) { 258 if (!buffer.empty()) 259 buffer.append("; "); 260 buffer.append(i->first.begin(), i->first.end()); 261 buffer.push_back('='); 262 buffer.append(i->second.begin(), i->second.end()); 263 } 264 return buffer; 265 } 266 267 } // namespace cookie_utils 268 } // namespace net 269 270