Home | History | Annotate | Download | only in cookies
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Portions of this code based on Mozilla:
      6 //   (netwerk/cookie/src/nsCookieService.cpp)
      7 /* ***** BEGIN LICENSE BLOCK *****
      8  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
      9  *
     10  * The contents of this file are subject to the Mozilla Public License Version
     11  * 1.1 (the "License"); you may not use this file except in compliance with
     12  * the License. You may obtain a copy of the License at
     13  * http://www.mozilla.org/MPL/
     14  *
     15  * Software distributed under the License is distributed on an "AS IS" basis,
     16  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
     17  * for the specific language governing rights and limitations under the
     18  * License.
     19  *
     20  * The Original Code is mozilla.org code.
     21  *
     22  * The Initial Developer of the Original Code is
     23  * Netscape Communications Corporation.
     24  * Portions created by the Initial Developer are Copyright (C) 2003
     25  * the Initial Developer. All Rights Reserved.
     26  *
     27  * Contributor(s):
     28  *   Daniel Witte (dwitte (at) stanford.edu)
     29  *   Michiel van Leeuwen (mvl (at) exedo.nl)
     30  *
     31  * Alternatively, the contents of this file may be used under the terms of
     32  * either the GNU General Public License Version 2 or later (the "GPL"), or
     33  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
     34  * in which case the provisions of the GPL or the LGPL are applicable instead
     35  * of those above. If you wish to allow use of your version of this file only
     36  * under the terms of either the GPL or the LGPL, and not to allow others to
     37  * use your version of this file under the terms of the MPL, indicate your
     38  * decision by deleting the provisions above and replace them with the notice
     39  * and other provisions required by the GPL or the LGPL. If you do not delete
     40  * the provisions above, a recipient may use your version of this file under
     41  * the terms of any one of the MPL, the GPL or the LGPL.
     42  *
     43  * ***** END LICENSE BLOCK ***** */
     44 
     45 #include "net/cookies/parsed_cookie.h"
     46 
     47 #include "base/logging.h"
     48 #include "base/metrics/histogram.h"
     49 #include "base/strings/string_util.h"
     50 
     51 // TODO(jww): We are collecting several UMA statistics in this file, and they
     52 // relate to http://crbug.com/238041. We are measuring stats related to control
     53 // characters in cookies because, currently, we allow control characters in a
     54 // variety of scenarios where various RFCs theoretically disallow them. These
     55 // control characters have the potential to cause problems with certain web
     56 // servers that reject HTTP requests that contain cookies with control
     57 // characters. We are measuring whether disallowing such cookies would have a
     58 // notable impact on our users. We want to collect these stats through 1 stable
     59 // release, so these UMA stats should remain at least through the M29
     60 // branch-point.
     61 
     62 namespace {
     63 
     64 const char kPathTokenName[] = "path";
     65 const char kDomainTokenName[] = "domain";
     66 const char kExpiresTokenName[] = "expires";
     67 const char kMaxAgeTokenName[] = "max-age";
     68 const char kSecureTokenName[] = "secure";
     69 const char kHttpOnlyTokenName[] = "httponly";
     70 const char kPriorityTokenName[] = "priority";
     71 
     72 const char kTerminator[] = "\n\r\0";
     73 const int kTerminatorLen = sizeof(kTerminator) - 1;
     74 const char kWhitespace[] = " \t";
     75 const char kValueSeparator[] = ";";
     76 const char kTokenSeparator[] = ";=";
     77 
     78 // Returns true if |c| occurs in |chars|
     79 // TODO(erikwright): maybe make this take an iterator, could check for end also?
     80 inline bool CharIsA(const char c, const char* chars) {
     81   return strchr(chars, c) != NULL;
     82 }
     83 // Seek the iterator to the first occurrence of a character in |chars|.
     84 // Returns true if it hit the end, false otherwise.
     85 inline bool SeekTo(std::string::const_iterator* it,
     86                    const std::string::const_iterator& end,
     87                    const char* chars) {
     88   for (; *it != end && !CharIsA(**it, chars); ++(*it)) {}
     89   return *it == end;
     90 }
     91 // Seek the iterator to the first occurrence of a character not in |chars|.
     92 // Returns true if it hit the end, false otherwise.
     93 inline bool SeekPast(std::string::const_iterator* it,
     94                      const std::string::const_iterator& end,
     95                      const char* chars) {
     96   for (; *it != end && CharIsA(**it, chars); ++(*it)) {}
     97   return *it == end;
     98 }
     99 inline bool SeekBackPast(std::string::const_iterator* it,
    100                          const std::string::const_iterator& end,
    101                          const char* chars) {
    102   for (; *it != end && CharIsA(**it, chars); --(*it)) {}
    103   return *it == end;
    104 }
    105 
    106 // Validate whether |value| is a valid token according to [RFC2616],
    107 // Section 2.2.
    108 bool IsValidToken(const std::string& value) {
    109   if (value.empty())
    110     return false;
    111 
    112   // Check that |value| has no separators.
    113   std::string separators = "()<>@,;:\\\"/[]?={} \t";
    114   if (value.find_first_of(separators) != std::string::npos)
    115     return false;
    116 
    117   // Check that |value| has no CTLs.
    118   for (std::string::const_iterator i = value.begin(); i != value.end(); ++i) {
    119     if ((*i >= 0 && *i <= 31) || *i >= 127)
    120       return false;
    121   }
    122 
    123   return true;
    124 }
    125 
    126 // Validate value, which may be according to RFC 6265
    127 // cookie-value      = *cookie-octet / ( DQUOTE *cookie-octet DQUOTE )
    128 // cookie-octet      = %x21 / %x23-2B / %x2D-3A / %x3C-5B / %x5D-7E
    129 //                      ; US-ASCII characters excluding CTLs,
    130 //                      ; whitespace DQUOTE, comma, semicolon,
    131 //                      ; and backslash
    132 bool IsValidCookieValue(const std::string& value) {
    133   // Number of characters to skip in validation at beginning and end of string.
    134   size_t skip = 0;
    135   if (value.size() >= 2 && *value.begin() == '"' && *(value.end()-1) == '"')
    136     skip = 1;
    137   for (std::string::const_iterator i = value.begin() + skip;
    138        i != value.end() - skip; ++i) {
    139     bool valid_octet =
    140         (*i == 0x21 ||
    141          (*i >= 0x23 && *i <= 0x2B) ||
    142          (*i >= 0x2D && *i <= 0x3A) ||
    143          (*i >= 0x3C && *i <= 0x5B) ||
    144          (*i >= 0x5D && *i <= 0x7E));
    145     if (!valid_octet)
    146       return false;
    147   }
    148   return true;
    149 }
    150 
    151 bool IsValidCookieAttributeValue(const std::string& value) {
    152   // The greatest common denominator of cookie attribute values is
    153   // <any CHAR except CTLs or ";"> according to RFC 6265.
    154   for (std::string::const_iterator i = value.begin(); i != value.end(); ++i) {
    155     if ((*i >= 0 && *i <= 31) || *i == ';')
    156       return false;
    157   }
    158   return true;
    159 }
    160 
    161 }  // namespace
    162 
    163 namespace net {
    164 
    165 ParsedCookie::ParsedCookie(const std::string& cookie_line)
    166     : path_index_(0),
    167       domain_index_(0),
    168       expires_index_(0),
    169       maxage_index_(0),
    170       secure_index_(0),
    171       httponly_index_(0),
    172       priority_index_(0) {
    173 
    174   if (cookie_line.size() > kMaxCookieSize) {
    175     VLOG(1) << "Not parsing cookie, too large: " << cookie_line.size();
    176     return;
    177   }
    178 
    179   ParseTokenValuePairs(cookie_line);
    180   if (!pairs_.empty())
    181     SetupAttributes();
    182 }
    183 
    184 ParsedCookie::~ParsedCookie() {
    185 }
    186 
    187 bool ParsedCookie::IsValid() const {
    188   return !pairs_.empty();
    189 }
    190 
    191 CookiePriority ParsedCookie::Priority() const {
    192   return (priority_index_ == 0) ? COOKIE_PRIORITY_DEFAULT :
    193       StringToCookiePriority(pairs_[priority_index_].second);
    194 }
    195 
    196 bool ParsedCookie::SetName(const std::string& name) {
    197   bool valid_token = IsValidToken(name);
    198   UMA_HISTOGRAM_BOOLEAN("Cookie.SetNameVaildity", valid_token);
    199   if (!valid_token)
    200     return false;
    201   if (pairs_.empty())
    202     pairs_.push_back(std::make_pair("", ""));
    203   pairs_[0].first = name;
    204   return true;
    205 }
    206 
    207 bool ParsedCookie::SetValue(const std::string& value) {
    208   bool valid_cookie_value = IsValidCookieValue(value);
    209   UMA_HISTOGRAM_BOOLEAN("Cookie.SetValueCookieValueValidity",
    210     valid_cookie_value);
    211   if (!valid_cookie_value)
    212     return false;
    213   if (pairs_.empty())
    214     pairs_.push_back(std::make_pair("", ""));
    215   pairs_[0].second = value;
    216   return true;
    217 }
    218 
    219 bool ParsedCookie::SetPath(const std::string& path) {
    220   return SetString(&path_index_, kPathTokenName, path);
    221 }
    222 
    223 bool ParsedCookie::SetDomain(const std::string& domain) {
    224   return SetString(&domain_index_, kDomainTokenName, domain);
    225 }
    226 
    227 bool ParsedCookie::SetExpires(const std::string& expires) {
    228   return SetString(&expires_index_, kExpiresTokenName, expires);
    229 }
    230 
    231 bool ParsedCookie::SetMaxAge(const std::string& maxage) {
    232   return SetString(&maxage_index_, kMaxAgeTokenName, maxage);
    233 }
    234 
    235 bool ParsedCookie::SetIsSecure(bool is_secure) {
    236   return SetBool(&secure_index_, kSecureTokenName, is_secure);
    237 }
    238 
    239 bool ParsedCookie::SetIsHttpOnly(bool is_http_only) {
    240   return SetBool(&httponly_index_, kHttpOnlyTokenName, is_http_only);
    241 }
    242 
    243 bool ParsedCookie::SetPriority(const std::string& priority) {
    244   return SetString(&priority_index_, kPriorityTokenName, priority);
    245 }
    246 
    247 std::string ParsedCookie::ToCookieLine() const {
    248   std::string out;
    249   for (PairList::const_iterator it = pairs_.begin();
    250        it != pairs_.end(); ++it) {
    251     if (!out.empty())
    252       out.append("; ");
    253     out.append(it->first);
    254     if (it->first != kSecureTokenName && it->first != kHttpOnlyTokenName) {
    255       out.append("=");
    256       out.append(it->second);
    257     }
    258   }
    259   return out;
    260 }
    261 
    262 std::string::const_iterator ParsedCookie::FindFirstTerminator(
    263     const std::string& s) {
    264   std::string::const_iterator end = s.end();
    265   size_t term_pos =
    266       s.find_first_of(std::string(kTerminator, kTerminatorLen));
    267   if (term_pos != std::string::npos) {
    268     // We found a character we should treat as an end of string.
    269     end = s.begin() + term_pos;
    270   }
    271   return end;
    272 }
    273 
    274 bool ParsedCookie::ParseToken(std::string::const_iterator* it,
    275                               const std::string::const_iterator& end,
    276                               std::string::const_iterator* token_start,
    277                               std::string::const_iterator* token_end) {
    278   DCHECK(it && token_start && token_end);
    279   std::string::const_iterator token_real_end;
    280 
    281   // Seek past any whitespace before the "token" (the name).
    282   // token_start should point at the first character in the token
    283   if (SeekPast(it, end, kWhitespace))
    284     return false;  // No token, whitespace or empty.
    285   *token_start = *it;
    286 
    287   // Seek over the token, to the token separator.
    288   // token_real_end should point at the token separator, i.e. '='.
    289   // If it == end after the seek, we probably have a token-value.
    290   SeekTo(it, end, kTokenSeparator);
    291   token_real_end = *it;
    292 
    293   // Ignore any whitespace between the token and the token separator.
    294   // token_end should point after the last interesting token character,
    295   // pointing at either whitespace, or at '=' (and equal to token_real_end).
    296   if (*it != *token_start) {  // We could have an empty token name.
    297     --(*it);  // Go back before the token separator.
    298     // Skip over any whitespace to the first non-whitespace character.
    299     SeekBackPast(it, *token_start, kWhitespace);
    300     // Point after it.
    301     ++(*it);
    302   }
    303   *token_end = *it;
    304 
    305   // Seek us back to the end of the token.
    306   *it = token_real_end;
    307   return true;
    308 }
    309 
    310 void ParsedCookie::ParseValue(std::string::const_iterator* it,
    311                               const std::string::const_iterator& end,
    312                               std::string::const_iterator* value_start,
    313                               std::string::const_iterator* value_end) {
    314   DCHECK(it && value_start && value_end);
    315 
    316   // Seek past any whitespace that might in-between the token and value.
    317   SeekPast(it, end, kWhitespace);
    318   // value_start should point at the first character of the value.
    319   *value_start = *it;
    320 
    321   // Just look for ';' to terminate ('=' allowed).
    322   // We can hit the end, maybe they didn't terminate.
    323   SeekTo(it, end, kValueSeparator);
    324 
    325   // Will be pointed at the ; seperator or the end.
    326   *value_end = *it;
    327 
    328   // Ignore any unwanted whitespace after the value.
    329   if (*value_end != *value_start) {  // Could have an empty value
    330     --(*value_end);
    331     SeekBackPast(value_end, *value_start, kWhitespace);
    332     ++(*value_end);
    333   }
    334 }
    335 
    336 std::string ParsedCookie::ParseTokenString(const std::string& token) {
    337   std::string::const_iterator it = token.begin();
    338   std::string::const_iterator end = FindFirstTerminator(token);
    339 
    340   std::string::const_iterator token_start, token_end;
    341   if (ParseToken(&it, end, &token_start, &token_end))
    342     return std::string(token_start, token_end);
    343   return std::string();
    344 }
    345 
    346 std::string ParsedCookie::ParseValueString(const std::string& value) {
    347   std::string::const_iterator it = value.begin();
    348   std::string::const_iterator end = FindFirstTerminator(value);
    349 
    350   std::string::const_iterator value_start, value_end;
    351   ParseValue(&it, end, &value_start, &value_end);
    352   return std::string(value_start, value_end);
    353 }
    354 
    355 // Parse all token/value pairs and populate pairs_.
    356 void ParsedCookie::ParseTokenValuePairs(const std::string& cookie_line) {
    357   enum ParsedCookieStatus {
    358     PARSED_COOKIE_STATUS_NOTHING = 0x0,
    359     PARSED_COOKIE_STATUS_CONTROL_CHAR = 0x1,
    360     PARSED_COOKIE_STATUS_INVALID = 0x2,
    361     PARSED_COOKIE_STATUS_BOTH =
    362       PARSED_COOKIE_STATUS_CONTROL_CHAR | PARSED_COOKIE_STATUS_INVALID
    363   };
    364   int parsed_cookie_status = PARSED_COOKIE_STATUS_NOTHING;
    365 
    366   pairs_.clear();
    367 
    368   // Ok, here we go.  We should be expecting to be starting somewhere
    369   // before the cookie line, not including any header name...
    370   std::string::const_iterator start = cookie_line.begin();
    371   std::string::const_iterator it = start;
    372 
    373   // TODO(erikwright): Make sure we're stripping \r\n in the network code.
    374   // Then we can log any unexpected terminators.
    375   std::string::const_iterator end = FindFirstTerminator(cookie_line);
    376 
    377   for (int pair_num = 0; pair_num < kMaxPairs && it != end; ++pair_num) {
    378     TokenValuePair pair;
    379 
    380     std::string::const_iterator token_start, token_end;
    381     if (!ParseToken(&it, end, &token_start, &token_end))
    382       break;
    383 
    384     if (it == end || *it != '=') {
    385       // We have a token-value, we didn't have any token name.
    386       if (pair_num == 0) {
    387         // For the first time around, we want to treat single values
    388         // as a value with an empty name. (Mozilla bug 169091).
    389         // IE seems to also have this behavior, ex "AAA", and "AAA=10" will
    390         // set 2 different cookies, and setting "BBB" will then replace "AAA".
    391         pair.first = "";
    392         // Rewind to the beginning of what we thought was the token name,
    393         // and let it get parsed as a value.
    394         it = token_start;
    395       } else {
    396         // Any not-first attribute we want to treat a value as a
    397         // name with an empty value...  This is so something like
    398         // "secure;" will get parsed as a Token name, and not a value.
    399         pair.first = std::string(token_start, token_end);
    400       }
    401     } else {
    402       // We have a TOKEN=VALUE.
    403       pair.first = std::string(token_start, token_end);
    404       ++it;  // Skip past the '='.
    405     }
    406 
    407     // OK, now try to parse a value.
    408     std::string::const_iterator value_start, value_end;
    409     ParseValue(&it, end, &value_start, &value_end);
    410     // OK, we're finished with a Token/Value.
    411     pair.second = std::string(value_start, value_end);
    412 
    413     if (!IsValidCookieAttributeValue(pair.second))
    414       parsed_cookie_status |= PARSED_COOKIE_STATUS_CONTROL_CHAR;
    415     if (!IsValidToken(pair.second))
    416       parsed_cookie_status |= PARSED_COOKIE_STATUS_INVALID;
    417 
    418     // From RFC2109: "Attributes (names) (attr) are case-insensitive."
    419     if (pair_num != 0)
    420       StringToLowerASCII(&pair.first);
    421     pairs_.push_back(pair);
    422 
    423     // We've processed a token/value pair, we're either at the end of
    424     // the string or a ValueSeparator like ';', which we want to skip.
    425     if (it != end)
    426       ++it;
    427   }
    428 
    429   UMA_HISTOGRAM_ENUMERATION("Cookie.ParsedCookieStatus", parsed_cookie_status,
    430     PARSED_COOKIE_STATUS_BOTH + 1);
    431 }
    432 
    433 void ParsedCookie::SetupAttributes() {
    434   // We skip over the first token/value, the user supplied one.
    435   for (size_t i = 1; i < pairs_.size(); ++i) {
    436     if (pairs_[i].first == kPathTokenName) {
    437       path_index_ = i;
    438     } else if (pairs_[i].first == kDomainTokenName) {
    439       domain_index_ = i;
    440     } else if (pairs_[i].first == kExpiresTokenName) {
    441       expires_index_ = i;
    442     } else if (pairs_[i].first == kMaxAgeTokenName) {
    443       maxage_index_ = i;
    444     } else if (pairs_[i].first == kSecureTokenName) {
    445       secure_index_ = i;
    446     } else if (pairs_[i].first == kHttpOnlyTokenName) {
    447       httponly_index_ = i;
    448     } else if (pairs_[i].first == kPriorityTokenName) {
    449       priority_index_ = i;
    450     } else {
    451       /* some attribute we don't know or don't care about. */
    452     }
    453   }
    454 }
    455 
    456 bool ParsedCookie::SetString(size_t* index,
    457                              const std::string& key,
    458                              const std::string& value) {
    459   if (value.empty()) {
    460     ClearAttributePair(*index);
    461     return true;
    462   } else {
    463     return SetAttributePair(index, key, value);
    464   }
    465 }
    466 
    467 bool ParsedCookie::SetBool(size_t* index,
    468                            const std::string& key,
    469                            bool value) {
    470   if (!value) {
    471     ClearAttributePair(*index);
    472     return true;
    473   } else {
    474     return SetAttributePair(index, key, std::string());
    475   }
    476 }
    477 
    478 bool ParsedCookie::SetAttributePair(size_t* index,
    479                                     const std::string& key,
    480                                     const std::string& value) {
    481   bool valid_attribute_pair = IsValidToken(key) &&
    482                               IsValidCookieAttributeValue(value);
    483   UMA_HISTOGRAM_BOOLEAN("Cookie.SetAttributePairCharsValidity",
    484     valid_attribute_pair);
    485   if (!valid_attribute_pair)
    486     return false;
    487   if (!IsValid())
    488     return false;
    489   if (*index) {
    490     pairs_[*index].second = value;
    491   } else {
    492     pairs_.push_back(std::make_pair(key, value));
    493     *index = pairs_.size() - 1;
    494   }
    495   return true;
    496 }
    497 
    498 void ParsedCookie::ClearAttributePair(size_t index) {
    499   // The first pair (name/value of cookie at pairs_[0]) cannot be cleared.
    500   // Cookie attributes that don't have a value at the moment, are represented
    501   // with an index being equal to 0.
    502   if (index == 0)
    503     return;
    504 
    505   size_t* indexes[] = { &path_index_, &domain_index_, &expires_index_,
    506                         &maxage_index_, &secure_index_, &httponly_index_,
    507                         &priority_index_ };
    508   for (size_t i = 0; i < arraysize(indexes); ++i) {
    509     if (*indexes[i] == index)
    510       *indexes[i] = 0;
    511     else if (*indexes[i] > index)
    512       --*indexes[i];
    513   }
    514   pairs_.erase(pairs_.begin() + index);
    515 }
    516 
    517 }  // namespace
    518