1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Portions of this code based on Mozilla: 6 // (netwerk/cookie/src/nsCookieService.cpp) 7 /* ***** BEGIN LICENSE BLOCK ***** 8 * Version: MPL 1.1/GPL 2.0/LGPL 2.1 9 * 10 * The contents of this file are subject to the Mozilla Public License Version 11 * 1.1 (the "License"); you may not use this file except in compliance with 12 * the License. You may obtain a copy of the License at 13 * http://www.mozilla.org/MPL/ 14 * 15 * Software distributed under the License is distributed on an "AS IS" basis, 16 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 17 * for the specific language governing rights and limitations under the 18 * License. 19 * 20 * The Original Code is mozilla.org code. 21 * 22 * The Initial Developer of the Original Code is 23 * Netscape Communications Corporation. 24 * Portions created by the Initial Developer are Copyright (C) 2003 25 * the Initial Developer. All Rights Reserved. 26 * 27 * Contributor(s): 28 * Daniel Witte (dwitte (at) stanford.edu) 29 * Michiel van Leeuwen (mvl (at) exedo.nl) 30 * 31 * Alternatively, the contents of this file may be used under the terms of 32 * either the GNU General Public License Version 2 or later (the "GPL"), or 33 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 34 * in which case the provisions of the GPL or the LGPL are applicable instead 35 * of those above. If you wish to allow use of your version of this file only 36 * under the terms of either the GPL or the LGPL, and not to allow others to 37 * use your version of this file under the terms of the MPL, indicate your 38 * decision by deleting the provisions above and replace them with the notice 39 * and other provisions required by the GPL or the LGPL. If you do not delete 40 * the provisions above, a recipient may use your version of this file under 41 * the terms of any one of the MPL, the GPL or the LGPL. 42 * 43 * ***** END LICENSE BLOCK ***** */ 44 45 #include "net/cookies/parsed_cookie.h" 46 47 #include "base/logging.h" 48 #include "base/metrics/histogram.h" 49 #include "base/strings/string_util.h" 50 51 // TODO(jww): We are collecting several UMA statistics in this file, and they 52 // relate to http://crbug.com/238041. We are measuring stats related to control 53 // characters in cookies because, currently, we allow control characters in a 54 // variety of scenarios where various RFCs theoretically disallow them. These 55 // control characters have the potential to cause problems with certain web 56 // servers that reject HTTP requests that contain cookies with control 57 // characters. We are measuring whether disallowing such cookies would have a 58 // notable impact on our users. We want to collect these stats through 1 stable 59 // release, so these UMA stats should remain at least through the M29 60 // branch-point. 61 62 namespace { 63 64 const char kPathTokenName[] = "path"; 65 const char kDomainTokenName[] = "domain"; 66 const char kExpiresTokenName[] = "expires"; 67 const char kMaxAgeTokenName[] = "max-age"; 68 const char kSecureTokenName[] = "secure"; 69 const char kHttpOnlyTokenName[] = "httponly"; 70 const char kPriorityTokenName[] = "priority"; 71 72 const char kTerminator[] = "\n\r\0"; 73 const int kTerminatorLen = sizeof(kTerminator) - 1; 74 const char kWhitespace[] = " \t"; 75 const char kValueSeparator[] = ";"; 76 const char kTokenSeparator[] = ";="; 77 78 // Returns true if |c| occurs in |chars| 79 // TODO(erikwright): maybe make this take an iterator, could check for end also? 80 inline bool CharIsA(const char c, const char* chars) { 81 return strchr(chars, c) != NULL; 82 } 83 // Seek the iterator to the first occurrence of a character in |chars|. 84 // Returns true if it hit the end, false otherwise. 85 inline bool SeekTo(std::string::const_iterator* it, 86 const std::string::const_iterator& end, 87 const char* chars) { 88 for (; *it != end && !CharIsA(**it, chars); ++(*it)) {} 89 return *it == end; 90 } 91 // Seek the iterator to the first occurrence of a character not in |chars|. 92 // Returns true if it hit the end, false otherwise. 93 inline bool SeekPast(std::string::const_iterator* it, 94 const std::string::const_iterator& end, 95 const char* chars) { 96 for (; *it != end && CharIsA(**it, chars); ++(*it)) {} 97 return *it == end; 98 } 99 inline bool SeekBackPast(std::string::const_iterator* it, 100 const std::string::const_iterator& end, 101 const char* chars) { 102 for (; *it != end && CharIsA(**it, chars); --(*it)) {} 103 return *it == end; 104 } 105 106 // Validate whether |value| is a valid token according to [RFC2616], 107 // Section 2.2. 108 bool IsValidToken(const std::string& value) { 109 if (value.empty()) 110 return false; 111 112 // Check that |value| has no separators. 113 std::string separators = "()<>@,;:\\\"/[]?={} \t"; 114 if (value.find_first_of(separators) != std::string::npos) 115 return false; 116 117 // Check that |value| has no CTLs. 118 for (std::string::const_iterator i = value.begin(); i != value.end(); ++i) { 119 if ((*i >= 0 && *i <= 31) || *i >= 127) 120 return false; 121 } 122 123 return true; 124 } 125 126 // Validate value, which may be according to RFC 6265 127 // cookie-value = *cookie-octet / ( DQUOTE *cookie-octet DQUOTE ) 128 // cookie-octet = %x21 / %x23-2B / %x2D-3A / %x3C-5B / %x5D-7E 129 // ; US-ASCII characters excluding CTLs, 130 // ; whitespace DQUOTE, comma, semicolon, 131 // ; and backslash 132 bool IsValidCookieValue(const std::string& value) { 133 // Number of characters to skip in validation at beginning and end of string. 134 size_t skip = 0; 135 if (value.size() >= 2 && *value.begin() == '"' && *(value.end()-1) == '"') 136 skip = 1; 137 for (std::string::const_iterator i = value.begin() + skip; 138 i != value.end() - skip; ++i) { 139 bool valid_octet = 140 (*i == 0x21 || 141 (*i >= 0x23 && *i <= 0x2B) || 142 (*i >= 0x2D && *i <= 0x3A) || 143 (*i >= 0x3C && *i <= 0x5B) || 144 (*i >= 0x5D && *i <= 0x7E)); 145 if (!valid_octet) 146 return false; 147 } 148 return true; 149 } 150 151 bool IsValidCookieAttributeValue(const std::string& value) { 152 // The greatest common denominator of cookie attribute values is 153 // <any CHAR except CTLs or ";"> according to RFC 6265. 154 for (std::string::const_iterator i = value.begin(); i != value.end(); ++i) { 155 if ((*i >= 0 && *i <= 31) || *i == ';') 156 return false; 157 } 158 return true; 159 } 160 161 } // namespace 162 163 namespace net { 164 165 ParsedCookie::ParsedCookie(const std::string& cookie_line) 166 : path_index_(0), 167 domain_index_(0), 168 expires_index_(0), 169 maxage_index_(0), 170 secure_index_(0), 171 httponly_index_(0), 172 priority_index_(0) { 173 174 if (cookie_line.size() > kMaxCookieSize) { 175 VLOG(1) << "Not parsing cookie, too large: " << cookie_line.size(); 176 return; 177 } 178 179 ParseTokenValuePairs(cookie_line); 180 if (!pairs_.empty()) 181 SetupAttributes(); 182 } 183 184 ParsedCookie::~ParsedCookie() { 185 } 186 187 bool ParsedCookie::IsValid() const { 188 return !pairs_.empty(); 189 } 190 191 CookiePriority ParsedCookie::Priority() const { 192 return (priority_index_ == 0) ? COOKIE_PRIORITY_DEFAULT : 193 StringToCookiePriority(pairs_[priority_index_].second); 194 } 195 196 bool ParsedCookie::SetName(const std::string& name) { 197 bool valid_token = IsValidToken(name); 198 UMA_HISTOGRAM_BOOLEAN("Cookie.SetNameVaildity", valid_token); 199 if (!valid_token) 200 return false; 201 if (pairs_.empty()) 202 pairs_.push_back(std::make_pair("", "")); 203 pairs_[0].first = name; 204 return true; 205 } 206 207 bool ParsedCookie::SetValue(const std::string& value) { 208 bool valid_cookie_value = IsValidCookieValue(value); 209 UMA_HISTOGRAM_BOOLEAN("Cookie.SetValueCookieValueValidity", 210 valid_cookie_value); 211 if (!valid_cookie_value) 212 return false; 213 if (pairs_.empty()) 214 pairs_.push_back(std::make_pair("", "")); 215 pairs_[0].second = value; 216 return true; 217 } 218 219 bool ParsedCookie::SetPath(const std::string& path) { 220 return SetString(&path_index_, kPathTokenName, path); 221 } 222 223 bool ParsedCookie::SetDomain(const std::string& domain) { 224 return SetString(&domain_index_, kDomainTokenName, domain); 225 } 226 227 bool ParsedCookie::SetExpires(const std::string& expires) { 228 return SetString(&expires_index_, kExpiresTokenName, expires); 229 } 230 231 bool ParsedCookie::SetMaxAge(const std::string& maxage) { 232 return SetString(&maxage_index_, kMaxAgeTokenName, maxage); 233 } 234 235 bool ParsedCookie::SetIsSecure(bool is_secure) { 236 return SetBool(&secure_index_, kSecureTokenName, is_secure); 237 } 238 239 bool ParsedCookie::SetIsHttpOnly(bool is_http_only) { 240 return SetBool(&httponly_index_, kHttpOnlyTokenName, is_http_only); 241 } 242 243 bool ParsedCookie::SetPriority(const std::string& priority) { 244 return SetString(&priority_index_, kPriorityTokenName, priority); 245 } 246 247 std::string ParsedCookie::ToCookieLine() const { 248 std::string out; 249 for (PairList::const_iterator it = pairs_.begin(); 250 it != pairs_.end(); ++it) { 251 if (!out.empty()) 252 out.append("; "); 253 out.append(it->first); 254 if (it->first != kSecureTokenName && it->first != kHttpOnlyTokenName) { 255 out.append("="); 256 out.append(it->second); 257 } 258 } 259 return out; 260 } 261 262 std::string::const_iterator ParsedCookie::FindFirstTerminator( 263 const std::string& s) { 264 std::string::const_iterator end = s.end(); 265 size_t term_pos = 266 s.find_first_of(std::string(kTerminator, kTerminatorLen)); 267 if (term_pos != std::string::npos) { 268 // We found a character we should treat as an end of string. 269 end = s.begin() + term_pos; 270 } 271 return end; 272 } 273 274 bool ParsedCookie::ParseToken(std::string::const_iterator* it, 275 const std::string::const_iterator& end, 276 std::string::const_iterator* token_start, 277 std::string::const_iterator* token_end) { 278 DCHECK(it && token_start && token_end); 279 std::string::const_iterator token_real_end; 280 281 // Seek past any whitespace before the "token" (the name). 282 // token_start should point at the first character in the token 283 if (SeekPast(it, end, kWhitespace)) 284 return false; // No token, whitespace or empty. 285 *token_start = *it; 286 287 // Seek over the token, to the token separator. 288 // token_real_end should point at the token separator, i.e. '='. 289 // If it == end after the seek, we probably have a token-value. 290 SeekTo(it, end, kTokenSeparator); 291 token_real_end = *it; 292 293 // Ignore any whitespace between the token and the token separator. 294 // token_end should point after the last interesting token character, 295 // pointing at either whitespace, or at '=' (and equal to token_real_end). 296 if (*it != *token_start) { // We could have an empty token name. 297 --(*it); // Go back before the token separator. 298 // Skip over any whitespace to the first non-whitespace character. 299 SeekBackPast(it, *token_start, kWhitespace); 300 // Point after it. 301 ++(*it); 302 } 303 *token_end = *it; 304 305 // Seek us back to the end of the token. 306 *it = token_real_end; 307 return true; 308 } 309 310 void ParsedCookie::ParseValue(std::string::const_iterator* it, 311 const std::string::const_iterator& end, 312 std::string::const_iterator* value_start, 313 std::string::const_iterator* value_end) { 314 DCHECK(it && value_start && value_end); 315 316 // Seek past any whitespace that might in-between the token and value. 317 SeekPast(it, end, kWhitespace); 318 // value_start should point at the first character of the value. 319 *value_start = *it; 320 321 // Just look for ';' to terminate ('=' allowed). 322 // We can hit the end, maybe they didn't terminate. 323 SeekTo(it, end, kValueSeparator); 324 325 // Will be pointed at the ; seperator or the end. 326 *value_end = *it; 327 328 // Ignore any unwanted whitespace after the value. 329 if (*value_end != *value_start) { // Could have an empty value 330 --(*value_end); 331 SeekBackPast(value_end, *value_start, kWhitespace); 332 ++(*value_end); 333 } 334 } 335 336 std::string ParsedCookie::ParseTokenString(const std::string& token) { 337 std::string::const_iterator it = token.begin(); 338 std::string::const_iterator end = FindFirstTerminator(token); 339 340 std::string::const_iterator token_start, token_end; 341 if (ParseToken(&it, end, &token_start, &token_end)) 342 return std::string(token_start, token_end); 343 return std::string(); 344 } 345 346 std::string ParsedCookie::ParseValueString(const std::string& value) { 347 std::string::const_iterator it = value.begin(); 348 std::string::const_iterator end = FindFirstTerminator(value); 349 350 std::string::const_iterator value_start, value_end; 351 ParseValue(&it, end, &value_start, &value_end); 352 return std::string(value_start, value_end); 353 } 354 355 // Parse all token/value pairs and populate pairs_. 356 void ParsedCookie::ParseTokenValuePairs(const std::string& cookie_line) { 357 enum ParsedCookieStatus { 358 PARSED_COOKIE_STATUS_NOTHING = 0x0, 359 PARSED_COOKIE_STATUS_CONTROL_CHAR = 0x1, 360 PARSED_COOKIE_STATUS_INVALID = 0x2, 361 PARSED_COOKIE_STATUS_BOTH = 362 PARSED_COOKIE_STATUS_CONTROL_CHAR | PARSED_COOKIE_STATUS_INVALID 363 }; 364 int parsed_cookie_status = PARSED_COOKIE_STATUS_NOTHING; 365 366 pairs_.clear(); 367 368 // Ok, here we go. We should be expecting to be starting somewhere 369 // before the cookie line, not including any header name... 370 std::string::const_iterator start = cookie_line.begin(); 371 std::string::const_iterator it = start; 372 373 // TODO(erikwright): Make sure we're stripping \r\n in the network code. 374 // Then we can log any unexpected terminators. 375 std::string::const_iterator end = FindFirstTerminator(cookie_line); 376 377 for (int pair_num = 0; pair_num < kMaxPairs && it != end; ++pair_num) { 378 TokenValuePair pair; 379 380 std::string::const_iterator token_start, token_end; 381 if (!ParseToken(&it, end, &token_start, &token_end)) 382 break; 383 384 if (it == end || *it != '=') { 385 // We have a token-value, we didn't have any token name. 386 if (pair_num == 0) { 387 // For the first time around, we want to treat single values 388 // as a value with an empty name. (Mozilla bug 169091). 389 // IE seems to also have this behavior, ex "AAA", and "AAA=10" will 390 // set 2 different cookies, and setting "BBB" will then replace "AAA". 391 pair.first = ""; 392 // Rewind to the beginning of what we thought was the token name, 393 // and let it get parsed as a value. 394 it = token_start; 395 } else { 396 // Any not-first attribute we want to treat a value as a 397 // name with an empty value... This is so something like 398 // "secure;" will get parsed as a Token name, and not a value. 399 pair.first = std::string(token_start, token_end); 400 } 401 } else { 402 // We have a TOKEN=VALUE. 403 pair.first = std::string(token_start, token_end); 404 ++it; // Skip past the '='. 405 } 406 407 // OK, now try to parse a value. 408 std::string::const_iterator value_start, value_end; 409 ParseValue(&it, end, &value_start, &value_end); 410 // OK, we're finished with a Token/Value. 411 pair.second = std::string(value_start, value_end); 412 413 if (!IsValidCookieAttributeValue(pair.second)) 414 parsed_cookie_status |= PARSED_COOKIE_STATUS_CONTROL_CHAR; 415 if (!IsValidToken(pair.second)) 416 parsed_cookie_status |= PARSED_COOKIE_STATUS_INVALID; 417 418 // From RFC2109: "Attributes (names) (attr) are case-insensitive." 419 if (pair_num != 0) 420 StringToLowerASCII(&pair.first); 421 pairs_.push_back(pair); 422 423 // We've processed a token/value pair, we're either at the end of 424 // the string or a ValueSeparator like ';', which we want to skip. 425 if (it != end) 426 ++it; 427 } 428 429 UMA_HISTOGRAM_ENUMERATION("Cookie.ParsedCookieStatus", parsed_cookie_status, 430 PARSED_COOKIE_STATUS_BOTH + 1); 431 } 432 433 void ParsedCookie::SetupAttributes() { 434 // We skip over the first token/value, the user supplied one. 435 for (size_t i = 1; i < pairs_.size(); ++i) { 436 if (pairs_[i].first == kPathTokenName) { 437 path_index_ = i; 438 } else if (pairs_[i].first == kDomainTokenName) { 439 domain_index_ = i; 440 } else if (pairs_[i].first == kExpiresTokenName) { 441 expires_index_ = i; 442 } else if (pairs_[i].first == kMaxAgeTokenName) { 443 maxage_index_ = i; 444 } else if (pairs_[i].first == kSecureTokenName) { 445 secure_index_ = i; 446 } else if (pairs_[i].first == kHttpOnlyTokenName) { 447 httponly_index_ = i; 448 } else if (pairs_[i].first == kPriorityTokenName) { 449 priority_index_ = i; 450 } else { 451 /* some attribute we don't know or don't care about. */ 452 } 453 } 454 } 455 456 bool ParsedCookie::SetString(size_t* index, 457 const std::string& key, 458 const std::string& value) { 459 if (value.empty()) { 460 ClearAttributePair(*index); 461 return true; 462 } else { 463 return SetAttributePair(index, key, value); 464 } 465 } 466 467 bool ParsedCookie::SetBool(size_t* index, 468 const std::string& key, 469 bool value) { 470 if (!value) { 471 ClearAttributePair(*index); 472 return true; 473 } else { 474 return SetAttributePair(index, key, std::string()); 475 } 476 } 477 478 bool ParsedCookie::SetAttributePair(size_t* index, 479 const std::string& key, 480 const std::string& value) { 481 bool valid_attribute_pair = IsValidToken(key) && 482 IsValidCookieAttributeValue(value); 483 UMA_HISTOGRAM_BOOLEAN("Cookie.SetAttributePairCharsValidity", 484 valid_attribute_pair); 485 if (!valid_attribute_pair) 486 return false; 487 if (!IsValid()) 488 return false; 489 if (*index) { 490 pairs_[*index].second = value; 491 } else { 492 pairs_.push_back(std::make_pair(key, value)); 493 *index = pairs_.size() - 1; 494 } 495 return true; 496 } 497 498 void ParsedCookie::ClearAttributePair(size_t index) { 499 // The first pair (name/value of cookie at pairs_[0]) cannot be cleared. 500 // Cookie attributes that don't have a value at the moment, are represented 501 // with an index being equal to 0. 502 if (index == 0) 503 return; 504 505 size_t* indexes[] = { &path_index_, &domain_index_, &expires_index_, 506 &maxage_index_, &secure_index_, &httponly_index_, 507 &priority_index_ }; 508 for (size_t i = 0; i < arraysize(indexes); ++i) { 509 if (*indexes[i] == index) 510 *indexes[i] = 0; 511 else if (*indexes[i] > index) 512 --*indexes[i]; 513 } 514 pairs_.erase(pairs_.begin() + index); 515 } 516 517 } // namespace 518