1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Portions of this code based on Mozilla: 6 // (netwerk/cookie/src/nsCookieService.cpp) 7 /* ***** BEGIN LICENSE BLOCK ***** 8 * Version: MPL 1.1/GPL 2.0/LGPL 2.1 9 * 10 * The contents of this file are subject to the Mozilla Public License Version 11 * 1.1 (the "License"); you may not use this file except in compliance with 12 * the License. You may obtain a copy of the License at 13 * http://www.mozilla.org/MPL/ 14 * 15 * Software distributed under the License is distributed on an "AS IS" basis, 16 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 17 * for the specific language governing rights and limitations under the 18 * License. 19 * 20 * The Original Code is mozilla.org code. 21 * 22 * The Initial Developer of the Original Code is 23 * Netscape Communications Corporation. 24 * Portions created by the Initial Developer are Copyright (C) 2003 25 * the Initial Developer. All Rights Reserved. 26 * 27 * Contributor(s): 28 * Daniel Witte (dwitte (at) stanford.edu) 29 * Michiel van Leeuwen (mvl (at) exedo.nl) 30 * 31 * Alternatively, the contents of this file may be used under the terms of 32 * either the GNU General Public License Version 2 or later (the "GPL"), or 33 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 34 * in which case the provisions of the GPL or the LGPL are applicable instead 35 * of those above. If you wish to allow use of your version of this file only 36 * under the terms of either the GPL or the LGPL, and not to allow others to 37 * use your version of this file under the terms of the MPL, indicate your 38 * decision by deleting the provisions above and replace them with the notice 39 * and other provisions required by the GPL or the LGPL. If you do not delete 40 * the provisions above, a recipient may use your version of this file under 41 * the terms of any one of the MPL, the GPL or the LGPL. 42 * 43 * ***** END LICENSE BLOCK ***** */ 44 45 #include "net/base/cookie_monster.h" 46 47 #include <algorithm> 48 49 #include "base/basictypes.h" 50 #include "base/format_macros.h" 51 #include "base/logging.h" 52 #include "base/scoped_ptr.h" 53 #include "base/string_tokenizer.h" 54 #include "base/string_util.h" 55 #include "googleurl/src/gurl.h" 56 #include "net/base/net_util.h" 57 #include "net/base/registry_controlled_domain.h" 58 59 // #define COOKIE_LOGGING_ENABLED 60 #ifdef COOKIE_LOGGING_ENABLED 61 #define COOKIE_DLOG(severity) DLOG_IF(INFO, 1) 62 #else 63 #define COOKIE_DLOG(severity) DLOG_IF(INFO, 0) 64 #endif 65 66 using base::Time; 67 using base::TimeDelta; 68 69 namespace net { 70 71 // Cookie garbage collection thresholds. Based off of the Mozilla defaults. 72 // It might seem scary to have a high purge value, but really it's not. You 73 // just make sure that you increase the max to cover the increase in purge, 74 // and we would have been purging the same amount of cookies. We're just 75 // going through the garbage collection process less often. 76 static const size_t kNumCookiesPerHost = 70; // ~50 cookies 77 static const size_t kNumCookiesPerHostPurge = 20; 78 static const size_t kNumCookiesTotal = 3300; // ~3000 cookies 79 static const size_t kNumCookiesTotalPurge = 300; 80 81 // Default minimum delay after updating a cookie's LastAccessDate before we 82 // will update it again. 83 static const int kDefaultAccessUpdateThresholdSeconds = 60; 84 85 // static 86 bool CookieMonster::enable_file_scheme_ = false; 87 88 // static 89 void CookieMonster::EnableFileScheme() { 90 enable_file_scheme_ = true; 91 } 92 93 CookieMonster::CookieMonster() 94 : initialized_(false), 95 store_(NULL), 96 last_access_threshold_( 97 TimeDelta::FromSeconds(kDefaultAccessUpdateThresholdSeconds)) { 98 SetDefaultCookieableSchemes(); 99 } 100 101 CookieMonster::CookieMonster(PersistentCookieStore* store) 102 : initialized_(false), 103 store_(store), 104 last_access_threshold_( 105 TimeDelta::FromSeconds(kDefaultAccessUpdateThresholdSeconds)) { 106 SetDefaultCookieableSchemes(); 107 } 108 109 CookieMonster::~CookieMonster() { 110 DeleteAll(false); 111 } 112 113 void CookieMonster::InitStore() { 114 DCHECK(store_) << "Store must exist to initialize"; 115 116 // Initialize the store and sync in any saved persistent cookies. We don't 117 // care if it's expired, insert it so it can be garbage collected, removed, 118 // and sync'd. 119 std::vector<KeyedCanonicalCookie> cookies; 120 // Reserve space for the maximum amount of cookies a database should have. 121 // This prevents multiple vector growth / copies as we append cookies. 122 cookies.reserve(kNumCookiesTotal); 123 store_->Load(&cookies); 124 for (std::vector<KeyedCanonicalCookie>::const_iterator it = cookies.begin(); 125 it != cookies.end(); ++it) { 126 InternalInsertCookie(it->first, it->second, false); 127 } 128 } 129 130 void CookieMonster::SetDefaultCookieableSchemes() { 131 // Note: file must be the last scheme. 132 static const char* kDefaultCookieableSchemes[] = { "http", "https", "file" }; 133 int num_schemes = enable_file_scheme_ ? 3 : 2; 134 SetCookieableSchemes(kDefaultCookieableSchemes, num_schemes); 135 } 136 137 // The system resolution is not high enough, so we can have multiple 138 // set cookies that result in the same system time. When this happens, we 139 // increment by one Time unit. Let's hope computers don't get too fast. 140 Time CookieMonster::CurrentTime() { 141 return std::max(Time::Now(), 142 Time::FromInternalValue(last_time_seen_.ToInternalValue() + 1)); 143 } 144 145 // Parse a cookie expiration time. We try to be lenient, but we need to 146 // assume some order to distinguish the fields. The basic rules: 147 // - The month name must be present and prefix the first 3 letters of the 148 // full month name (jan for January, jun for June). 149 // - If the year is <= 2 digits, it must occur after the day of month. 150 // - The time must be of the format hh:mm:ss. 151 // An average cookie expiration will look something like this: 152 // Sat, 15-Apr-17 21:01:22 GMT 153 Time CookieMonster::ParseCookieTime(const std::string& time_string) { 154 static const char* kMonths[] = { "jan", "feb", "mar", "apr", "may", "jun", 155 "jul", "aug", "sep", "oct", "nov", "dec" }; 156 static const int kMonthsLen = arraysize(kMonths); 157 // We want to be pretty liberal, and support most non-ascii and non-digit 158 // characters as a delimiter. We can't treat : as a delimiter, because it 159 // is the delimiter for hh:mm:ss, and we want to keep this field together. 160 // We make sure to include - and +, since they could prefix numbers. 161 // If the cookie attribute came in in quotes (ex expires="XXX"), the quotes 162 // will be preserved, and we will get them here. So we make sure to include 163 // quote characters, and also \ for anything that was internally escaped. 164 static const char* kDelimiters = "\t !\"#$%&'()*+,-./;<=>?@[\\]^_`{|}~"; 165 166 Time::Exploded exploded = {0}; 167 168 StringTokenizer tokenizer(time_string, kDelimiters); 169 170 bool found_day_of_month = false; 171 bool found_month = false; 172 bool found_time = false; 173 bool found_year = false; 174 175 while (tokenizer.GetNext()) { 176 const std::string token = tokenizer.token(); 177 DCHECK(!token.empty()); 178 bool numerical = IsAsciiDigit(token[0]); 179 180 // String field 181 if (!numerical) { 182 if (!found_month) { 183 for (int i = 0; i < kMonthsLen; ++i) { 184 // Match prefix, so we could match January, etc 185 if (base::strncasecmp(token.c_str(), kMonths[i], 3) == 0) { 186 exploded.month = i + 1; 187 found_month = true; 188 break; 189 } 190 } 191 } else { 192 // If we've gotten here, it means we've already found and parsed our 193 // month, and we have another string, which we would expect to be the 194 // the time zone name. According to the RFC and my experiments with 195 // how sites format their expirations, we don't have much of a reason 196 // to support timezones. We don't want to ever barf on user input, 197 // but this DCHECK should pass for well-formed data. 198 // DCHECK(token == "GMT"); 199 } 200 // Numeric field w/ a colon 201 } else if (token.find(':') != std::string::npos) { 202 if (!found_time && 203 #ifdef COMPILER_MSVC 204 sscanf_s( 205 #else 206 sscanf( 207 #endif 208 token.c_str(), "%2u:%2u:%2u", &exploded.hour, 209 &exploded.minute, &exploded.second) == 3) { 210 found_time = true; 211 } else { 212 // We should only ever encounter one time-like thing. If we're here, 213 // it means we've found a second, which shouldn't happen. We keep 214 // the first. This check should be ok for well-formed input: 215 // NOTREACHED(); 216 } 217 // Numeric field 218 } else { 219 // Overflow with atoi() is unspecified, so we enforce a max length. 220 if (!found_day_of_month && token.length() <= 2) { 221 exploded.day_of_month = atoi(token.c_str()); 222 found_day_of_month = true; 223 } else if (!found_year && token.length() <= 5) { 224 exploded.year = atoi(token.c_str()); 225 found_year = true; 226 } else { 227 // If we're here, it means we've either found an extra numeric field, 228 // or a numeric field which was too long. For well-formed input, the 229 // following check would be reasonable: 230 // NOTREACHED(); 231 } 232 } 233 } 234 235 if (!found_day_of_month || !found_month || !found_time || !found_year) { 236 // We didn't find all of the fields we need. For well-formed input, the 237 // following check would be reasonable: 238 // NOTREACHED() << "Cookie parse expiration failed: " << time_string; 239 return Time(); 240 } 241 242 // Normalize the year to expand abbreviated years to the full year. 243 if (exploded.year >= 69 && exploded.year <= 99) 244 exploded.year += 1900; 245 if (exploded.year >= 0 && exploded.year <= 68) 246 exploded.year += 2000; 247 248 // If our values are within their correct ranges, we got our time. 249 if (exploded.day_of_month >= 1 && exploded.day_of_month <= 31 && 250 exploded.month >= 1 && exploded.month <= 12 && 251 exploded.year >= 1601 && exploded.year <= 30827 && 252 exploded.hour <= 23 && exploded.minute <= 59 && exploded.second <= 59) { 253 return Time::FromUTCExploded(exploded); 254 } 255 256 // One of our values was out of expected range. For well-formed input, 257 // the following check would be reasonable: 258 // NOTREACHED() << "Cookie exploded expiration failed: " << time_string; 259 260 return Time(); 261 } 262 263 // Returns the effective TLD+1 for a given host. This only makes sense for http 264 // and https schemes. For other schemes, the host will be returned unchanged 265 // (minus any leading .). 266 static std::string GetEffectiveDomain(const std::string& scheme, 267 const std::string& host) { 268 if (scheme == "http" || scheme == "https") 269 return RegistryControlledDomainService::GetDomainAndRegistry(host); 270 271 if (!host.empty() && host[0] == '.') 272 return host.substr(1); 273 return host; 274 } 275 276 // Determine the cookie domain key to use for setting the specified cookie. 277 // On success returns true, and sets cookie_domain_key to either a 278 // -host cookie key (ex: "google.com") 279 // -domain cookie key (ex: ".google.com") 280 static bool GetCookieDomainKey(const GURL& url, 281 const CookieMonster::ParsedCookie& pc, 282 std::string* cookie_domain_key) { 283 const std::string url_host(url.host()); 284 285 // If no domain was specified in the cookie, default to a host cookie. 286 // We match IE/Firefox in allowing a domain=IPADDR if it matches the url 287 // ip address hostname exactly. It should be treated as a host cookie. 288 if (!pc.HasDomain() || pc.Domain().empty() || 289 (url.HostIsIPAddress() && url_host == pc.Domain())) { 290 *cookie_domain_key = url_host; 291 DCHECK((*cookie_domain_key)[0] != '.'); 292 return true; 293 } 294 295 // Get the normalized domain specified in cookie line. 296 // Note: The RFC says we can reject a cookie if the domain 297 // attribute does not start with a dot. IE/FF/Safari however, allow a cookie 298 // of the form domain=my.domain.com, treating it the same as 299 // domain=.my.domain.com -- for compatibility we do the same here. Firefox 300 // also treats domain=.....my.domain.com like domain=.my.domain.com, but 301 // neither IE nor Safari do this, and we don't either. 302 url_canon::CanonHostInfo ignored; 303 std::string cookie_domain(net::CanonicalizeHost(pc.Domain(), &ignored)); 304 if (cookie_domain.empty()) 305 return false; 306 if (cookie_domain[0] != '.') 307 cookie_domain = "." + cookie_domain; 308 309 // Ensure |url| and |cookie_domain| have the same domain+registry. 310 const std::string url_scheme(url.scheme()); 311 const std::string url_domain_and_registry( 312 GetEffectiveDomain(url_scheme, url_host)); 313 if (url_domain_and_registry.empty()) 314 return false; // IP addresses/intranet hosts can't set domain cookies. 315 const std::string cookie_domain_and_registry( 316 GetEffectiveDomain(url_scheme, cookie_domain)); 317 if (url_domain_and_registry != cookie_domain_and_registry) 318 return false; // Can't set a cookie on a different domain + registry. 319 320 // Ensure |url_host| is |cookie_domain| or one of its subdomains. Given that 321 // we know the domain+registry are the same from the above checks, this is 322 // basically a simple string suffix check. 323 if ((url_host.length() < cookie_domain.length()) ? 324 (cookie_domain != ("." + url_host)) : 325 url_host.compare(url_host.length() - cookie_domain.length(), 326 cookie_domain.length(), cookie_domain)) 327 return false; 328 329 *cookie_domain_key = cookie_domain; 330 return true; 331 } 332 333 static std::string CanonPath(const GURL& url, 334 const CookieMonster::ParsedCookie& pc) { 335 // The RFC says the path should be a prefix of the current URL path. 336 // However, Mozilla allows you to set any path for compatibility with 337 // broken websites. We unfortunately will mimic this behavior. We try 338 // to be generous and accept cookies with an invalid path attribute, and 339 // default the path to something reasonable. 340 341 // The path was supplied in the cookie, we'll take it. 342 if (pc.HasPath() && !pc.Path().empty() && pc.Path()[0] == '/') 343 return pc.Path(); 344 345 // The path was not supplied in the cookie or invalid, we will default 346 // to the current URL path. 347 // """Defaults to the path of the request URL that generated the 348 // Set-Cookie response, up to, but not including, the 349 // right-most /.""" 350 // How would this work for a cookie on /? We will include it then. 351 const std::string& url_path = url.path(); 352 353 size_t idx = url_path.find_last_of('/'); 354 355 // The cookie path was invalid or a single '/'. 356 if (idx == 0 || idx == std::string::npos) 357 return std::string("/"); 358 359 // Return up to the rightmost '/'. 360 return url_path.substr(0, idx); 361 } 362 363 static Time CanonExpiration(const CookieMonster::ParsedCookie& pc, 364 const Time& current) { 365 // First, try the Max-Age attribute. 366 uint64 max_age = 0; 367 if (pc.HasMaxAge() && 368 #ifdef COMPILER_MSVC 369 sscanf_s( 370 #else 371 sscanf( 372 #endif 373 pc.MaxAge().c_str(), " %" PRIu64, &max_age) == 1) { 374 return current + TimeDelta::FromSeconds(max_age); 375 } 376 377 // Try the Expires attribute. 378 if (pc.HasExpires()) 379 return CookieMonster::ParseCookieTime(pc.Expires()); 380 381 // Invalid or no expiration, persistent cookie. 382 return Time(); 383 } 384 385 bool CookieMonster::HasCookieableScheme(const GURL& url) { 386 // Make sure the request is on a cookie-able url scheme. 387 for (size_t i = 0; i < cookieable_schemes_.size(); ++i) { 388 // We matched a scheme. 389 if (url.SchemeIs(cookieable_schemes_[i].c_str())) { 390 // We've matched a supported scheme. 391 return true; 392 } 393 } 394 395 // The scheme didn't match any in our whitelist. 396 COOKIE_DLOG(WARNING) << "Unsupported cookie scheme: " << url.scheme(); 397 return false; 398 } 399 400 void CookieMonster::SetCookieableSchemes( 401 const char* schemes[], size_t num_schemes) { 402 cookieable_schemes_.clear(); 403 cookieable_schemes_.insert(cookieable_schemes_.end(), 404 schemes, schemes + num_schemes); 405 } 406 407 bool CookieMonster::SetCookieWithCreationTimeAndOptions( 408 const GURL& url, 409 const std::string& cookie_line, 410 const Time& creation_time_or_null, 411 const CookieOptions& options) { 412 if (!HasCookieableScheme(url)) { 413 return false; 414 } 415 416 AutoLock autolock(lock_); 417 InitIfNecessary(); 418 419 COOKIE_DLOG(INFO) << "SetCookie() line: " << cookie_line; 420 421 Time creation_time = creation_time_or_null; 422 if (creation_time.is_null()) { 423 creation_time = CurrentTime(); 424 last_time_seen_ = creation_time; 425 } 426 427 // Parse the cookie. 428 ParsedCookie pc(cookie_line); 429 430 if (!pc.IsValid()) { 431 COOKIE_DLOG(WARNING) << "Couldn't parse cookie"; 432 return false; 433 } 434 435 if (options.exclude_httponly() && pc.IsHttpOnly()) { 436 COOKIE_DLOG(INFO) << "SetCookie() not setting httponly cookie"; 437 return false; 438 } 439 440 std::string cookie_domain; 441 if (!GetCookieDomainKey(url, pc, &cookie_domain)) { 442 return false; 443 } 444 445 std::string cookie_path = CanonPath(url, pc); 446 447 scoped_ptr<CanonicalCookie> cc; 448 Time cookie_expires = CanonExpiration(pc, creation_time); 449 450 cc.reset(new CanonicalCookie(pc.Name(), pc.Value(), cookie_path, 451 pc.IsSecure(), pc.IsHttpOnly(), 452 creation_time, creation_time, 453 !cookie_expires.is_null(), cookie_expires)); 454 455 if (!cc.get()) { 456 COOKIE_DLOG(WARNING) << "Failed to allocate CanonicalCookie"; 457 return false; 458 } 459 460 if (DeleteAnyEquivalentCookie(cookie_domain, 461 *cc, 462 options.exclude_httponly())) { 463 COOKIE_DLOG(INFO) << "SetCookie() not clobbering httponly cookie"; 464 return false; 465 } 466 467 COOKIE_DLOG(INFO) << "SetCookie() cc: " << cc->DebugString(); 468 469 // Realize that we might be setting an expired cookie, and the only point 470 // was to delete the cookie which we've already done. 471 if (!cc->IsExpired(creation_time)) 472 InternalInsertCookie(cookie_domain, cc.release(), true); 473 474 // We assume that hopefully setting a cookie will be less common than 475 // querying a cookie. Since setting a cookie can put us over our limits, 476 // make sure that we garbage collect... We can also make the assumption that 477 // if a cookie was set, in the common case it will be used soon after, 478 // and we will purge the expired cookies in GetCookies(). 479 GarbageCollect(creation_time, cookie_domain); 480 481 return true; 482 } 483 484 void CookieMonster::InternalInsertCookie(const std::string& key, 485 CanonicalCookie* cc, 486 bool sync_to_store) { 487 if (cc->IsPersistent() && store_ && sync_to_store) 488 store_->AddCookie(key, *cc); 489 cookies_.insert(CookieMap::value_type(key, cc)); 490 } 491 492 void CookieMonster::InternalUpdateCookieAccessTime(CanonicalCookie* cc) { 493 // Based off the Mozilla code. When a cookie has been accessed recently, 494 // don't bother updating its access time again. This reduces the number of 495 // updates we do during pageload, which in turn reduces the chance our storage 496 // backend will hit its batch thresholds and be forced to update. 497 const Time current = Time::Now(); 498 if ((current - cc->LastAccessDate()) < last_access_threshold_) 499 return; 500 501 cc->SetLastAccessDate(current); 502 if (cc->IsPersistent() && store_) 503 store_->UpdateCookieAccessTime(*cc); 504 } 505 506 void CookieMonster::InternalDeleteCookie(CookieMap::iterator it, 507 bool sync_to_store) { 508 CanonicalCookie* cc = it->second; 509 COOKIE_DLOG(INFO) << "InternalDeleteCookie() cc: " << cc->DebugString(); 510 if (cc->IsPersistent() && store_ && sync_to_store) 511 store_->DeleteCookie(*cc); 512 cookies_.erase(it); 513 delete cc; 514 } 515 516 bool CookieMonster::DeleteAnyEquivalentCookie(const std::string& key, 517 const CanonicalCookie& ecc, 518 bool skip_httponly) { 519 bool found_equivalent_cookie = false; 520 bool skipped_httponly = false; 521 for (CookieMapItPair its = cookies_.equal_range(key); 522 its.first != its.second; ) { 523 CookieMap::iterator curit = its.first; 524 CanonicalCookie* cc = curit->second; 525 ++its.first; 526 527 if (ecc.IsEquivalent(*cc)) { 528 // We should never have more than one equivalent cookie, since they should 529 // overwrite each other. 530 DCHECK(!found_equivalent_cookie) << 531 "Duplicate equivalent cookies found, cookie store is corrupted."; 532 if (skip_httponly && cc->IsHttpOnly()) { 533 skipped_httponly = true; 534 } else { 535 InternalDeleteCookie(curit, true); 536 } 537 found_equivalent_cookie = true; 538 #ifdef NDEBUG 539 // Speed optimization: No point looping through the rest of the cookies 540 // since we're only doing it as a consistency check. 541 break; 542 #endif 543 } 544 } 545 return skipped_httponly; 546 } 547 548 int CookieMonster::GarbageCollect(const Time& current, 549 const std::string& key) { 550 int num_deleted = 0; 551 552 // Collect garbage for this key. 553 if (cookies_.count(key) > kNumCookiesPerHost) { 554 COOKIE_DLOG(INFO) << "GarbageCollect() key: " << key; 555 num_deleted += GarbageCollectRange(current, cookies_.equal_range(key), 556 kNumCookiesPerHost, kNumCookiesPerHostPurge); 557 } 558 559 // Collect garbage for everything. 560 if (cookies_.size() > kNumCookiesTotal) { 561 COOKIE_DLOG(INFO) << "GarbageCollect() everything"; 562 num_deleted += GarbageCollectRange(current, 563 CookieMapItPair(cookies_.begin(), cookies_.end()), kNumCookiesTotal, 564 kNumCookiesTotalPurge); 565 } 566 567 return num_deleted; 568 } 569 570 static bool LRUCookieSorter(const CookieMonster::CookieMap::iterator& it1, 571 const CookieMonster::CookieMap::iterator& it2) { 572 // Cookies accessed less recently should be deleted first. 573 if (it1->second->LastAccessDate() != it2->second->LastAccessDate()) 574 return it1->second->LastAccessDate() < it2->second->LastAccessDate(); 575 576 // In rare cases we might have two cookies with identical last access times. 577 // To preserve the stability of the sort, in these cases prefer to delete 578 // older cookies over newer ones. CreationDate() is guaranteed to be unique. 579 return it1->second->CreationDate() < it2->second->CreationDate(); 580 } 581 582 int CookieMonster::GarbageCollectRange(const Time& current, 583 const CookieMapItPair& itpair, 584 size_t num_max, 585 size_t num_purge) { 586 // First, delete anything that's expired. 587 std::vector<CookieMap::iterator> cookie_its; 588 int num_deleted = GarbageCollectExpired(current, itpair, &cookie_its); 589 590 // If the range still has too many cookies, delete the least recently used. 591 if (cookie_its.size() > num_max) { 592 COOKIE_DLOG(INFO) << "GarbageCollectRange() Deep Garbage Collect."; 593 // Purge down to (|num_max| - |num_purge|) total cookies. 594 DCHECK(num_purge <= num_max); 595 num_purge += cookie_its.size() - num_max; 596 597 std::partial_sort(cookie_its.begin(), cookie_its.begin() + num_purge, 598 cookie_its.end(), LRUCookieSorter); 599 for (size_t i = 0; i < num_purge; ++i) 600 InternalDeleteCookie(cookie_its[i], true); 601 602 num_deleted += num_purge; 603 } 604 605 return num_deleted; 606 } 607 608 int CookieMonster::GarbageCollectExpired( 609 const Time& current, 610 const CookieMapItPair& itpair, 611 std::vector<CookieMap::iterator>* cookie_its) { 612 int num_deleted = 0; 613 for (CookieMap::iterator it = itpair.first, end = itpair.second; it != end;) { 614 CookieMap::iterator curit = it; 615 ++it; 616 617 if (curit->second->IsExpired(current)) { 618 InternalDeleteCookie(curit, true); 619 ++num_deleted; 620 } else if (cookie_its) { 621 cookie_its->push_back(curit); 622 } 623 } 624 625 return num_deleted; 626 } 627 628 int CookieMonster::DeleteAll(bool sync_to_store) { 629 AutoLock autolock(lock_); 630 InitIfNecessary(); 631 632 int num_deleted = 0; 633 for (CookieMap::iterator it = cookies_.begin(); it != cookies_.end();) { 634 CookieMap::iterator curit = it; 635 ++it; 636 InternalDeleteCookie(curit, sync_to_store); 637 ++num_deleted; 638 } 639 640 return num_deleted; 641 } 642 643 int CookieMonster::DeleteAllCreatedBetween(const Time& delete_begin, 644 const Time& delete_end, 645 bool sync_to_store) { 646 AutoLock autolock(lock_); 647 InitIfNecessary(); 648 649 int num_deleted = 0; 650 for (CookieMap::iterator it = cookies_.begin(); it != cookies_.end();) { 651 CookieMap::iterator curit = it; 652 CanonicalCookie* cc = curit->second; 653 ++it; 654 655 if (cc->CreationDate() >= delete_begin && 656 (delete_end.is_null() || cc->CreationDate() < delete_end)) { 657 InternalDeleteCookie(curit, sync_to_store); 658 ++num_deleted; 659 } 660 } 661 662 return num_deleted; 663 } 664 665 int CookieMonster::DeleteAllCreatedAfter(const Time& delete_begin, 666 bool sync_to_store) { 667 return DeleteAllCreatedBetween(delete_begin, Time(), sync_to_store); 668 } 669 670 bool CookieMonster::DeleteCookie(const std::string& domain, 671 const CanonicalCookie& cookie, 672 bool sync_to_store) { 673 AutoLock autolock(lock_); 674 InitIfNecessary(); 675 676 for (CookieMapItPair its = cookies_.equal_range(domain); 677 its.first != its.second; ++its.first) { 678 // The creation date acts as our unique index... 679 if (its.first->second->CreationDate() == cookie.CreationDate()) { 680 InternalDeleteCookie(its.first, sync_to_store); 681 return true; 682 } 683 } 684 return false; 685 } 686 687 // Mozilla sorts on the path length (longest first), and then it 688 // sorts by creation time (oldest first). 689 // The RFC says the sort order for the domain attribute is undefined. 690 static bool CookieSorter(CookieMonster::CanonicalCookie* cc1, 691 CookieMonster::CanonicalCookie* cc2) { 692 if (cc1->Path().length() == cc2->Path().length()) 693 return cc1->CreationDate() < cc2->CreationDate(); 694 return cc1->Path().length() > cc2->Path().length(); 695 } 696 697 bool CookieMonster::SetCookieWithOptions(const GURL& url, 698 const std::string& cookie_line, 699 const CookieOptions& options) { 700 return SetCookieWithCreationTimeAndOptions(url, cookie_line, Time(), options); 701 } 702 703 // Currently our cookie datastructure is based on Mozilla's approach. We have a 704 // hash keyed on the cookie's domain, and for any query we walk down the domain 705 // components and probe for cookies until we reach the TLD, where we stop. 706 // For example, a.b.blah.com, we would probe 707 // - a.b.blah.com 708 // - .a.b.blah.com (TODO should we check this first or second?) 709 // - .b.blah.com 710 // - .blah.com 711 // There are some alternative datastructures we could try, like a 712 // search/prefix trie, where we reverse the hostname and query for all 713 // keys that are a prefix of our hostname. I think the hash probing 714 // should be fast and simple enough for now. 715 std::string CookieMonster::GetCookiesWithOptions(const GURL& url, 716 const CookieOptions& options) { 717 if (!HasCookieableScheme(url)) { 718 return std::string(); 719 } 720 721 // Get the cookies for this host and its domain(s). 722 std::vector<CanonicalCookie*> cookies; 723 FindCookiesForHostAndDomain(url, options, &cookies); 724 std::sort(cookies.begin(), cookies.end(), CookieSorter); 725 726 std::string cookie_line; 727 for (std::vector<CanonicalCookie*>::const_iterator it = cookies.begin(); 728 it != cookies.end(); ++it) { 729 if (it != cookies.begin()) 730 cookie_line += "; "; 731 // In Mozilla if you set a cookie like AAAA, it will have an empty token 732 // and a value of AAAA. When it sends the cookie back, it will send AAAA, 733 // so we need to avoid sending =AAAA for a blank token value. 734 if (!(*it)->Name().empty()) 735 cookie_line += (*it)->Name() + "="; 736 cookie_line += (*it)->Value(); 737 } 738 739 COOKIE_DLOG(INFO) << "GetCookies() result: " << cookie_line; 740 741 return cookie_line; 742 } 743 744 void CookieMonster::DeleteCookie(const GURL& url, 745 const std::string& cookie_name) { 746 if (!HasCookieableScheme(url)) 747 return; 748 749 CookieOptions options; 750 options.set_include_httponly(); 751 // Get the cookies for this host and its domain(s). 752 std::vector<CanonicalCookie*> cookies; 753 FindCookiesForHostAndDomain(url, options, &cookies); 754 std::set<CanonicalCookie*> matching_cookies; 755 756 for (std::vector<CanonicalCookie*>::const_iterator it = cookies.begin(); 757 it != cookies.end(); ++it) { 758 if ((*it)->Name() != cookie_name) 759 continue; 760 if (url.path().find((*it)->Path())) 761 continue; 762 matching_cookies.insert(*it); 763 } 764 765 for (CookieMap::iterator it = cookies_.begin(); it != cookies_.end();) { 766 CookieMap::iterator curit = it; 767 ++it; 768 if (matching_cookies.find(curit->second) != matching_cookies.end()) 769 InternalDeleteCookie(curit, true); 770 } 771 } 772 773 CookieMonster::CookieList CookieMonster::GetAllCookies() { 774 AutoLock autolock(lock_); 775 InitIfNecessary(); 776 777 // This function is being called to scrape the cookie list for management UI 778 // or similar. We shouldn't show expired cookies in this list since it will 779 // just be confusing to users, and this function is called rarely enough (and 780 // is already slow enough) that it's OK to take the time to garbage collect 781 // the expired cookies now. 782 // 783 // Note that this does not prune cookies to be below our limits (if we've 784 // exceeded them) the way that calling GarbageCollect() would. 785 GarbageCollectExpired(Time::Now(), 786 CookieMapItPair(cookies_.begin(), cookies_.end()), 787 NULL); 788 789 CookieList cookie_list; 790 for (CookieMap::iterator it = cookies_.begin(); it != cookies_.end(); ++it) 791 cookie_list.push_back(CookieListPair(it->first, *it->second)); 792 793 return cookie_list; 794 } 795 796 CookieMonster::CookieList CookieMonster::GetAllCookiesForURL(const GURL& url) { 797 AutoLock autolock(lock_); 798 InitIfNecessary(); 799 800 // Do not return removed cookies. 801 GarbageCollectExpired(Time::Now(), 802 CookieMapItPair(cookies_.begin(), cookies_.end()), 803 NULL); 804 805 CookieList cookie_list; 806 if (!HasCookieableScheme(url)) 807 return cookie_list; 808 809 bool secure = url.SchemeIsSecure(); 810 811 // Query for the full host, For example: 'a.c.blah.com'. 812 std::string key(url.host()); 813 FindRawCookies(key, secure, &cookie_list); 814 815 // See if we can search for domain cookies, i.e. if the host has a TLD + 1. 816 const std::string domain(GetEffectiveDomain(url.scheme(), key)); 817 if (domain.empty()) 818 return cookie_list; 819 820 // Use same logic as in FindCookiesForHostAndDomain. 821 DCHECK_LE(domain.length(), key.length()); 822 DCHECK_EQ(0, key.compare(key.length() - domain.length(), domain.length(), 823 domain)); 824 for (key = "." + key; key.length() > domain.length(); ) { 825 FindRawCookies(key, secure, &cookie_list); 826 const size_t next_dot = key.find('.', 1); // Skip over leading dot. 827 key.erase(0, next_dot); 828 } 829 return cookie_list; 830 } 831 832 void CookieMonster::FindCookiesForHostAndDomain( 833 const GURL& url, 834 const CookieOptions& options, 835 std::vector<CanonicalCookie*>* cookies) { 836 AutoLock autolock(lock_); 837 InitIfNecessary(); 838 839 const Time current_time(CurrentTime()); 840 841 // Query for the full host, For example: 'a.c.blah.com'. 842 std::string key(url.host()); 843 FindCookiesForKey(key, url, options, current_time, cookies); 844 845 // See if we can search for domain cookies, i.e. if the host has a TLD + 1. 846 const std::string domain(GetEffectiveDomain(url.scheme(), key)); 847 if (domain.empty()) 848 return; 849 DCHECK_LE(domain.length(), key.length()); 850 DCHECK_EQ(0, key.compare(key.length() - domain.length(), domain.length(), 851 domain)); 852 853 // Walk through the string and query at the dot points (GURL should have 854 // canonicalized the dots, so this should be safe). Stop once we reach the 855 // domain + registry; we can't write cookies past this point, and with some 856 // registrars other domains can, in which case we don't want to read their 857 // cookies. 858 for (key = "." + key; key.length() > domain.length(); ) { 859 FindCookiesForKey(key, url, options, current_time, cookies); 860 const size_t next_dot = key.find('.', 1); // Skip over leading dot. 861 key.erase(0, next_dot); 862 } 863 } 864 865 void CookieMonster::FindCookiesForKey( 866 const std::string& key, 867 const GURL& url, 868 const CookieOptions& options, 869 const Time& current, 870 std::vector<CanonicalCookie*>* cookies) { 871 bool secure = url.SchemeIsSecure(); 872 873 for (CookieMapItPair its = cookies_.equal_range(key); 874 its.first != its.second; ) { 875 CookieMap::iterator curit = its.first; 876 CanonicalCookie* cc = curit->second; 877 ++its.first; 878 879 // If the cookie is expired, delete it. 880 if (cc->IsExpired(current)) { 881 InternalDeleteCookie(curit, true); 882 continue; 883 } 884 885 // Filter out HttpOnly cookies, per options. 886 if (options.exclude_httponly() && cc->IsHttpOnly()) 887 continue; 888 889 // Filter out secure cookies unless we're https. 890 if (!secure && cc->IsSecure()) 891 continue; 892 893 if (!cc->IsOnPath(url.path())) 894 continue; 895 896 // Add this cookie to the set of matching cookies. Since we're reading the 897 // cookie, update its last access time. 898 InternalUpdateCookieAccessTime(cc); 899 cookies->push_back(cc); 900 } 901 } 902 903 void CookieMonster::FindRawCookies(const std::string& key, 904 bool include_secure, 905 CookieList* list) { 906 for (CookieMapItPair its = cookies_.equal_range(key); 907 its.first != its.second; ++its.first) { 908 CanonicalCookie* cc = its.first->second; 909 if (include_secure || !cc->IsSecure()) 910 list->push_back(CookieListPair(key, *cc)); 911 } 912 } 913 914 915 CookieMonster::ParsedCookie::ParsedCookie(const std::string& cookie_line) 916 : is_valid_(false), 917 path_index_(0), 918 domain_index_(0), 919 expires_index_(0), 920 maxage_index_(0), 921 secure_index_(0), 922 httponly_index_(0) { 923 924 if (cookie_line.size() > kMaxCookieSize) { 925 LOG(INFO) << "Not parsing cookie, too large: " << cookie_line.size(); 926 return; 927 } 928 929 ParseTokenValuePairs(cookie_line); 930 if (pairs_.size() > 0) { 931 is_valid_ = true; 932 SetupAttributes(); 933 } 934 } 935 936 // Returns true if |c| occurs in |chars| 937 // TODO maybe make this take an iterator, could check for end also? 938 static inline bool CharIsA(const char c, const char* chars) { 939 return strchr(chars, c) != NULL; 940 } 941 // Seek the iterator to the first occurrence of a character in |chars|. 942 // Returns true if it hit the end, false otherwise. 943 static inline bool SeekTo(std::string::const_iterator* it, 944 const std::string::const_iterator& end, 945 const char* chars) { 946 for (; *it != end && !CharIsA(**it, chars); ++(*it)); 947 return *it == end; 948 } 949 // Seek the iterator to the first occurrence of a character not in |chars|. 950 // Returns true if it hit the end, false otherwise. 951 static inline bool SeekPast(std::string::const_iterator* it, 952 const std::string::const_iterator& end, 953 const char* chars) { 954 for (; *it != end && CharIsA(**it, chars); ++(*it)); 955 return *it == end; 956 } 957 static inline bool SeekBackPast(std::string::const_iterator* it, 958 const std::string::const_iterator& end, 959 const char* chars) { 960 for (; *it != end && CharIsA(**it, chars); --(*it)); 961 return *it == end; 962 } 963 964 // Parse all token/value pairs and populate pairs_. 965 void CookieMonster::ParsedCookie::ParseTokenValuePairs( 966 const std::string& cookie_line) { 967 static const char kTerminator[] = "\n\r\0"; 968 static const int kTerminatorLen = sizeof(kTerminator) - 1; 969 static const char kWhitespace[] = " \t"; 970 static const char kValueSeparator[] = ";"; 971 static const char kTokenSeparator[] = ";="; 972 973 pairs_.clear(); 974 975 // Ok, here we go. We should be expecting to be starting somewhere 976 // before the cookie line, not including any header name... 977 std::string::const_iterator start = cookie_line.begin(); 978 std::string::const_iterator end = cookie_line.end(); 979 std::string::const_iterator it = start; 980 981 // TODO Make sure we're stripping \r\n in the network code. Then we 982 // can log any unexpected terminators. 983 size_t term_pos = 984 cookie_line.find_first_of(std::string(kTerminator, kTerminatorLen)); 985 if (term_pos != std::string::npos) { 986 // We found a character we should treat as an end of string. 987 end = start + term_pos; 988 } 989 990 for (int pair_num = 0; pair_num < kMaxPairs && it != end; ++pair_num) { 991 TokenValuePair pair; 992 std::string::const_iterator token_start, token_real_end, token_end; 993 994 // Seek past any whitespace before the "token" (the name). 995 // token_start should point at the first character in the token 996 if (SeekPast(&it, end, kWhitespace)) 997 break; // No token, whitespace or empty. 998 token_start = it; 999 1000 // Seek over the token, to the token separator. 1001 // token_real_end should point at the token separator, i.e. '='. 1002 // If it == end after the seek, we probably have a token-value. 1003 SeekTo(&it, end, kTokenSeparator); 1004 token_real_end = it; 1005 1006 // Ignore any whitespace between the token and the token separator. 1007 // token_end should point after the last interesting token character, 1008 // pointing at either whitespace, or at '=' (and equal to token_real_end). 1009 if (it != token_start) { // We could have an empty token name. 1010 --it; // Go back before the token separator. 1011 // Skip over any whitespace to the first non-whitespace character. 1012 SeekBackPast(&it, token_start, kWhitespace); 1013 // Point after it. 1014 ++it; 1015 } 1016 token_end = it; 1017 1018 // Seek us back to the end of the token. 1019 it = token_real_end; 1020 1021 if (it == end || *it != '=') { 1022 // We have a token-value, we didn't have any token name. 1023 if (pair_num == 0) { 1024 // For the first time around, we want to treat single values 1025 // as a value with an empty name. (Mozilla bug 169091). 1026 // IE seems to also have this behavior, ex "AAA", and "AAA=10" will 1027 // set 2 different cookies, and setting "BBB" will then replace "AAA". 1028 pair.first = ""; 1029 // Rewind to the beginning of what we thought was the token name, 1030 // and let it get parsed as a value. 1031 it = token_start; 1032 } else { 1033 // Any not-first attribute we want to treat a value as a 1034 // name with an empty value... This is so something like 1035 // "secure;" will get parsed as a Token name, and not a value. 1036 pair.first = std::string(token_start, token_end); 1037 } 1038 } else { 1039 // We have a TOKEN=VALUE. 1040 pair.first = std::string(token_start, token_end); 1041 ++it; // Skip past the '='. 1042 } 1043 1044 // OK, now try to parse a value. 1045 std::string::const_iterator value_start, value_end; 1046 1047 // Seek past any whitespace that might in-between the token and value. 1048 SeekPast(&it, end, kWhitespace); 1049 // value_start should point at the first character of the value. 1050 value_start = it; 1051 1052 // It is unclear exactly how quoted string values should be handled. 1053 // Major browsers do different things, for example, Firefox supports 1054 // semicolons embedded in a quoted value, while IE does not. Looking at 1055 // the specs, RFC 2109 and 2965 allow for a quoted-string as the value. 1056 // However, these specs were apparently written after browsers had 1057 // implemented cookies, and they seem very distant from the reality of 1058 // what is actually implemented and used on the web. The original spec 1059 // from Netscape is possibly what is closest to the cookies used today. 1060 // This spec didn't have explicit support for double quoted strings, and 1061 // states that ; is not allowed as part of a value. We had originally 1062 // implement the Firefox behavior (A="B;C"; -> A="B;C";). However, since 1063 // there is no standard that makes sense, we decided to follow the behavior 1064 // of IE and Safari, which is closer to the original Netscape proposal. 1065 // This means that A="B;C" -> A="B;. This also makes the code much simpler 1066 // and reduces the possibility for invalid cookies, where other browsers 1067 // like Opera currently reject those invalid cookies (ex A="B" "C";). 1068 1069 // Just look for ';' to terminate ('=' allowed). 1070 // We can hit the end, maybe they didn't terminate. 1071 SeekTo(&it, end, kValueSeparator); 1072 1073 // Will be pointed at the ; seperator or the end. 1074 value_end = it; 1075 1076 // Ignore any unwanted whitespace after the value. 1077 if (value_end != value_start) { // Could have an empty value 1078 --value_end; 1079 SeekBackPast(&value_end, value_start, kWhitespace); 1080 ++value_end; 1081 } 1082 1083 // OK, we're finished with a Token/Value. 1084 pair.second = std::string(value_start, value_end); 1085 // From RFC2109: "Attributes (names) (attr) are case-insensitive." 1086 if (pair_num != 0) 1087 StringToLowerASCII(&pair.first); 1088 pairs_.push_back(pair); 1089 1090 // We've processed a token/value pair, we're either at the end of 1091 // the string or a ValueSeparator like ';', which we want to skip. 1092 if (it != end) 1093 ++it; 1094 } 1095 } 1096 1097 void CookieMonster::ParsedCookie::SetupAttributes() { 1098 static const char kPathTokenName[] = "path"; 1099 static const char kDomainTokenName[] = "domain"; 1100 static const char kExpiresTokenName[] = "expires"; 1101 static const char kMaxAgeTokenName[] = "max-age"; 1102 static const char kSecureTokenName[] = "secure"; 1103 static const char kHttpOnlyTokenName[] = "httponly"; 1104 1105 // We skip over the first token/value, the user supplied one. 1106 for (size_t i = 1; i < pairs_.size(); ++i) { 1107 if (pairs_[i].first == kPathTokenName) 1108 path_index_ = i; 1109 else if (pairs_[i].first == kDomainTokenName) 1110 domain_index_ = i; 1111 else if (pairs_[i].first == kExpiresTokenName) 1112 expires_index_ = i; 1113 else if (pairs_[i].first == kMaxAgeTokenName) 1114 maxage_index_ = i; 1115 else if (pairs_[i].first == kSecureTokenName) 1116 secure_index_ = i; 1117 else if (pairs_[i].first == kHttpOnlyTokenName) 1118 httponly_index_ = i; 1119 else { /* some attribute we don't know or don't care about. */ } 1120 } 1121 } 1122 1123 // Create a cookie-line for the cookie. For debugging only! 1124 // If we want to use this for something more than debugging, we 1125 // should rewrite it better... 1126 std::string CookieMonster::ParsedCookie::DebugString() const { 1127 std::string out; 1128 for (PairList::const_iterator it = pairs_.begin(); 1129 it != pairs_.end(); ++it) { 1130 out.append(it->first); 1131 out.append("="); 1132 out.append(it->second); 1133 out.append("; "); 1134 } 1135 return out; 1136 } 1137 1138 bool CookieMonster::CanonicalCookie::IsOnPath( 1139 const std::string& url_path) const { 1140 1141 // A zero length would be unsafe for our trailing '/' checks, and 1142 // would also make no sense for our prefix match. The code that 1143 // creates a CanonicalCookie should make sure the path is never zero length, 1144 // but we double check anyway. 1145 if (path_.empty()) 1146 return false; 1147 1148 // The Mozilla code broke it into 3 cases, if it's strings lengths 1149 // are less than, equal, or greater. I think this is simpler: 1150 1151 // Make sure the cookie path is a prefix of the url path. If the 1152 // url path is shorter than the cookie path, then the cookie path 1153 // can't be a prefix. 1154 if (url_path.find(path_) != 0) 1155 return false; 1156 1157 // Now we know that url_path is >= cookie_path, and that cookie_path 1158 // is a prefix of url_path. If they are the are the same length then 1159 // they are identical, otherwise we need an additional check: 1160 1161 // In order to avoid in correctly matching a cookie path of /blah 1162 // with a request path of '/blahblah/', we need to make sure that either 1163 // the cookie path ends in a trailing '/', or that we prefix up to a '/' 1164 // in the url path. Since we know that the url path length is greater 1165 // than the cookie path length, it's safe to index one byte past. 1166 if (path_.length() != url_path.length() && 1167 path_[path_.length() - 1] != '/' && 1168 url_path[path_.length()] != '/') 1169 return false; 1170 1171 return true; 1172 } 1173 1174 std::string CookieMonster::CanonicalCookie::DebugString() const { 1175 return StringPrintf("name: %s value: %s path: %s creation: %" PRId64, 1176 name_.c_str(), value_.c_str(), path_.c_str(), 1177 static_cast<int64>(creation_date_.ToTimeT())); 1178 } 1179 1180 } // namespace 1181