Home | History | Annotate | Download | only in http
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // The rules for parsing content-types were borrowed from Firefox:
      6 // http://lxr.mozilla.org/mozilla/source/netwerk/base/src/nsURLHelper.cpp#834
      7 
      8 #include "net/http/http_util.h"
      9 
     10 #include <algorithm>
     11 
     12 #include "base/basictypes.h"
     13 #include "base/logging.h"
     14 #include "base/strings/string_number_conversions.h"
     15 #include "base/strings/string_piece.h"
     16 #include "base/strings/string_tokenizer.h"
     17 #include "base/strings/string_util.h"
     18 #include "base/strings/stringprintf.h"
     19 #include "base/time/time.h"
     20 
     21 using std::string;
     22 
     23 namespace net {
     24 
     25 //-----------------------------------------------------------------------------
     26 
     27 // Return the index of the closing quote of the string, if any.
     28 static size_t FindStringEnd(const string& line, size_t start, char delim) {
     29   DCHECK(start < line.length() && line[start] == delim &&
     30          (delim == '"' || delim == '\''));
     31 
     32   const char set[] = { delim, '\\', '\0' };
     33   for (;;) {
     34     // start points to either the start quote or the last
     35     // escaped char (the char following a '\\')
     36 
     37     size_t end = line.find_first_of(set, start + 1);
     38     if (end == string::npos)
     39       return line.length();
     40 
     41     if (line[end] == '\\') {
     42       // Hit a backslash-escaped char.  Need to skip over it.
     43       start = end + 1;
     44       if (start == line.length())
     45         return start;
     46 
     47       // Go back to looking for the next escape or the string end
     48       continue;
     49     }
     50 
     51     return end;
     52   }
     53 
     54   NOTREACHED();
     55   return line.length();
     56 }
     57 
     58 //-----------------------------------------------------------------------------
     59 
     60 // static
     61 size_t HttpUtil::FindDelimiter(const string& line, size_t search_start,
     62                                char delimiter) {
     63   do {
     64     // search_start points to the spot from which we should start looking
     65     // for the delimiter.
     66     const char delim_str[] = { delimiter, '"', '\'', '\0' };
     67     size_t cur_delim_pos = line.find_first_of(delim_str, search_start);
     68     if (cur_delim_pos == string::npos)
     69       return line.length();
     70 
     71     char ch = line[cur_delim_pos];
     72     if (ch == delimiter) {
     73       // Found delimiter
     74       return cur_delim_pos;
     75     }
     76 
     77     // We hit the start of a quoted string.  Look for its end.
     78     search_start = FindStringEnd(line, cur_delim_pos, ch);
     79     if (search_start == line.length())
     80       return search_start;
     81 
     82     ++search_start;
     83 
     84     // search_start now points to the first char after the end of the
     85     // string, so just go back to the top of the loop and look for
     86     // |delimiter| again.
     87   } while (true);
     88 
     89   NOTREACHED();
     90   return line.length();
     91 }
     92 
     93 // static
     94 void HttpUtil::ParseContentType(const string& content_type_str,
     95                                 string* mime_type,
     96                                 string* charset,
     97                                 bool* had_charset,
     98                                 string* boundary) {
     99   const string::const_iterator begin = content_type_str.begin();
    100 
    101   // Trim leading and trailing whitespace from type.  We include '(' in
    102   // the trailing trim set to catch media-type comments, which are not at all
    103   // standard, but may occur in rare cases.
    104   size_t type_val = content_type_str.find_first_not_of(HTTP_LWS);
    105   type_val = std::min(type_val, content_type_str.length());
    106   size_t type_end = content_type_str.find_first_of(HTTP_LWS ";(", type_val);
    107   if (string::npos == type_end)
    108     type_end = content_type_str.length();
    109 
    110   size_t charset_val = 0;
    111   size_t charset_end = 0;
    112   bool type_has_charset = false;
    113 
    114   // Iterate over parameters
    115   size_t param_start = content_type_str.find_first_of(';', type_end);
    116   if (param_start != string::npos) {
    117     base::StringTokenizer tokenizer(begin + param_start, content_type_str.end(),
    118                                     ";");
    119     tokenizer.set_quote_chars("\"");
    120     while (tokenizer.GetNext()) {
    121       string::const_iterator equals_sign =
    122           std::find(tokenizer.token_begin(), tokenizer.token_end(), '=');
    123       if (equals_sign == tokenizer.token_end())
    124         continue;
    125 
    126       string::const_iterator param_name_begin = tokenizer.token_begin();
    127       string::const_iterator param_name_end = equals_sign;
    128       TrimLWS(&param_name_begin, &param_name_end);
    129 
    130       string::const_iterator param_value_begin = equals_sign + 1;
    131       string::const_iterator param_value_end = tokenizer.token_end();
    132       DCHECK(param_value_begin <= tokenizer.token_end());
    133       TrimLWS(&param_value_begin, &param_value_end);
    134 
    135       if (LowerCaseEqualsASCII(param_name_begin, param_name_end, "charset")) {
    136         // TODO(abarth): Refactor this function to consistently use iterators.
    137         charset_val = param_value_begin - begin;
    138         charset_end = param_value_end - begin;
    139         type_has_charset = true;
    140       } else if (LowerCaseEqualsASCII(param_name_begin, param_name_end,
    141                                       "boundary")) {
    142         if (boundary)
    143           boundary->assign(param_value_begin, param_value_end);
    144       }
    145     }
    146   }
    147 
    148   if (type_has_charset) {
    149     // Trim leading and trailing whitespace from charset_val.  We include
    150     // '(' in the trailing trim set to catch media-type comments, which are
    151     // not at all standard, but may occur in rare cases.
    152     charset_val = content_type_str.find_first_not_of(HTTP_LWS, charset_val);
    153     charset_val = std::min(charset_val, charset_end);
    154     char first_char = content_type_str[charset_val];
    155     if (first_char == '"' || first_char == '\'') {
    156       charset_end = FindStringEnd(content_type_str, charset_val, first_char);
    157       ++charset_val;
    158       DCHECK(charset_end >= charset_val);
    159     } else {
    160       charset_end = std::min(content_type_str.find_first_of(HTTP_LWS ";(",
    161                                                             charset_val),
    162                              charset_end);
    163     }
    164   }
    165 
    166   // if the server sent "*/*", it is meaningless, so do not store it.
    167   // also, if type_val is the same as mime_type, then just update the
    168   // charset.  however, if charset is empty and mime_type hasn't
    169   // changed, then don't wipe-out an existing charset.  We
    170   // also want to reject a mime-type if it does not include a slash.
    171   // some servers give junk after the charset parameter, which may
    172   // include a comma, so this check makes us a bit more tolerant.
    173   if (content_type_str.length() != 0 &&
    174       content_type_str != "*/*" &&
    175       content_type_str.find_first_of('/') != string::npos) {
    176     // Common case here is that mime_type is empty
    177     bool eq = !mime_type->empty() && LowerCaseEqualsASCII(begin + type_val,
    178                                                           begin + type_end,
    179                                                           mime_type->data());
    180     if (!eq) {
    181       mime_type->assign(begin + type_val, begin + type_end);
    182       StringToLowerASCII(mime_type);
    183     }
    184     if ((!eq && *had_charset) || type_has_charset) {
    185       *had_charset = true;
    186       charset->assign(begin + charset_val, begin + charset_end);
    187       StringToLowerASCII(charset);
    188     }
    189   }
    190 }
    191 
    192 // static
    193 // Parse the Range header according to RFC 2616 14.35.1
    194 // ranges-specifier = byte-ranges-specifier
    195 // byte-ranges-specifier = bytes-unit "=" byte-range-set
    196 // byte-range-set  = 1#( byte-range-spec | suffix-byte-range-spec )
    197 // byte-range-spec = first-byte-pos "-" [last-byte-pos]
    198 // first-byte-pos  = 1*DIGIT
    199 // last-byte-pos   = 1*DIGIT
    200 bool HttpUtil::ParseRanges(const std::string& headers,
    201                            std::vector<HttpByteRange>* ranges) {
    202   std::string ranges_specifier;
    203   HttpUtil::HeadersIterator it(headers.begin(), headers.end(), "\r\n");
    204 
    205   while (it.GetNext()) {
    206     // Look for "Range" header.
    207     if (!LowerCaseEqualsASCII(it.name(), "range"))
    208       continue;
    209     ranges_specifier = it.values();
    210     // We just care about the first "Range" header, so break here.
    211     break;
    212   }
    213 
    214   if (ranges_specifier.empty())
    215     return false;
    216 
    217   return ParseRangeHeader(ranges_specifier, ranges);
    218 }
    219 
    220 // static
    221 bool HttpUtil::ParseRangeHeader(const std::string& ranges_specifier,
    222                                 std::vector<HttpByteRange>* ranges) {
    223   size_t equal_char_offset = ranges_specifier.find('=');
    224   if (equal_char_offset == std::string::npos)
    225     return false;
    226 
    227   // Try to extract bytes-unit part.
    228   std::string::const_iterator bytes_unit_begin = ranges_specifier.begin();
    229   std::string::const_iterator bytes_unit_end = bytes_unit_begin +
    230                                                equal_char_offset;
    231   std::string::const_iterator byte_range_set_begin = bytes_unit_end + 1;
    232   std::string::const_iterator byte_range_set_end = ranges_specifier.end();
    233 
    234   TrimLWS(&bytes_unit_begin, &bytes_unit_end);
    235   // "bytes" unit identifier is not found.
    236   if (!LowerCaseEqualsASCII(bytes_unit_begin, bytes_unit_end, "bytes"))
    237     return false;
    238 
    239   ValuesIterator byte_range_set_iterator(byte_range_set_begin,
    240                                          byte_range_set_end, ',');
    241   while (byte_range_set_iterator.GetNext()) {
    242     size_t minus_char_offset = byte_range_set_iterator.value().find('-');
    243     // If '-' character is not found, reports failure.
    244     if (minus_char_offset == std::string::npos)
    245       return false;
    246 
    247     std::string::const_iterator first_byte_pos_begin =
    248         byte_range_set_iterator.value_begin();
    249     std::string::const_iterator first_byte_pos_end =
    250         first_byte_pos_begin +  minus_char_offset;
    251     TrimLWS(&first_byte_pos_begin, &first_byte_pos_end);
    252     std::string first_byte_pos(first_byte_pos_begin, first_byte_pos_end);
    253 
    254     HttpByteRange range;
    255     // Try to obtain first-byte-pos.
    256     if (!first_byte_pos.empty()) {
    257       int64 first_byte_position = -1;
    258       if (!base::StringToInt64(first_byte_pos, &first_byte_position))
    259         return false;
    260       range.set_first_byte_position(first_byte_position);
    261     }
    262 
    263     std::string::const_iterator last_byte_pos_begin =
    264         byte_range_set_iterator.value_begin() + minus_char_offset + 1;
    265     std::string::const_iterator last_byte_pos_end =
    266         byte_range_set_iterator.value_end();
    267     TrimLWS(&last_byte_pos_begin, &last_byte_pos_end);
    268     std::string last_byte_pos(last_byte_pos_begin, last_byte_pos_end);
    269 
    270     // We have last-byte-pos or suffix-byte-range-spec in this case.
    271     if (!last_byte_pos.empty()) {
    272       int64 last_byte_position;
    273       if (!base::StringToInt64(last_byte_pos, &last_byte_position))
    274         return false;
    275       if (range.HasFirstBytePosition())
    276         range.set_last_byte_position(last_byte_position);
    277       else
    278         range.set_suffix_length(last_byte_position);
    279     } else if (!range.HasFirstBytePosition()) {
    280       return false;
    281     }
    282 
    283     // Do a final check on the HttpByteRange object.
    284     if (!range.IsValid())
    285       return false;
    286     ranges->push_back(range);
    287   }
    288   return !ranges->empty();
    289 }
    290 
    291 // static
    292 bool HttpUtil::HasHeader(const std::string& headers, const char* name) {
    293   size_t name_len = strlen(name);
    294   string::const_iterator it =
    295       std::search(headers.begin(),
    296                   headers.end(),
    297                   name,
    298                   name + name_len,
    299                   base::CaseInsensitiveCompareASCII<char>());
    300   if (it == headers.end())
    301     return false;
    302 
    303   // ensure match is prefixed by newline
    304   if (it != headers.begin() && it[-1] != '\n')
    305     return false;
    306 
    307   // ensure match is suffixed by colon
    308   if (it + name_len >= headers.end() || it[name_len] != ':')
    309     return false;
    310 
    311   return true;
    312 }
    313 
    314 namespace {
    315 // A header string containing any of the following fields will cause
    316 // an error. The list comes from the XMLHttpRequest standard.
    317 // http://www.w3.org/TR/XMLHttpRequest/#the-setrequestheader-method
    318 const char* const kForbiddenHeaderFields[] = {
    319   "accept-charset",
    320   "accept-encoding",
    321   "access-control-request-headers",
    322   "access-control-request-method",
    323   "connection",
    324   "content-length",
    325   "cookie",
    326   "cookie2",
    327   "content-transfer-encoding",
    328   "date",
    329   "expect",
    330   "host",
    331   "keep-alive",
    332   "origin",
    333   "referer",
    334   "te",
    335   "trailer",
    336   "transfer-encoding",
    337   "upgrade",
    338   "user-agent",
    339   "via",
    340 };
    341 }  // anonymous namespace
    342 
    343 // static
    344 bool HttpUtil::IsSafeHeader(const std::string& name) {
    345   std::string lower_name(StringToLowerASCII(name));
    346   if (StartsWithASCII(lower_name, "proxy-", true) ||
    347       StartsWithASCII(lower_name, "sec-", true))
    348     return false;
    349   for (size_t i = 0; i < arraysize(kForbiddenHeaderFields); ++i) {
    350     if (lower_name == kForbiddenHeaderFields[i])
    351       return false;
    352   }
    353   return true;
    354 }
    355 
    356 // static
    357 std::string HttpUtil::StripHeaders(const std::string& headers,
    358                                    const char* const headers_to_remove[],
    359                                    size_t headers_to_remove_len) {
    360   std::string stripped_headers;
    361   net::HttpUtil::HeadersIterator it(headers.begin(), headers.end(), "\r\n");
    362 
    363   while (it.GetNext()) {
    364     bool should_remove = false;
    365     for (size_t i = 0; i < headers_to_remove_len; ++i) {
    366       if (LowerCaseEqualsASCII(it.name_begin(), it.name_end(),
    367                                headers_to_remove[i])) {
    368         should_remove = true;
    369         break;
    370       }
    371     }
    372     if (!should_remove) {
    373       // Assume that name and values are on the same line.
    374       stripped_headers.append(it.name_begin(), it.values_end());
    375       stripped_headers.append("\r\n");
    376     }
    377   }
    378   return stripped_headers;
    379 }
    380 
    381 // static
    382 bool HttpUtil::IsNonCoalescingHeader(string::const_iterator name_begin,
    383                                      string::const_iterator name_end) {
    384   // NOTE: "set-cookie2" headers do not support expires attributes, so we don't
    385   // have to list them here.
    386   const char* kNonCoalescingHeaders[] = {
    387     "date",
    388     "expires",
    389     "last-modified",
    390     "location",  // See bug 1050541 for details
    391     "retry-after",
    392     "set-cookie",
    393     // The format of auth-challenges mixes both space separated tokens and
    394     // comma separated properties, so coalescing on comma won't work.
    395     "www-authenticate",
    396     "proxy-authenticate",
    397     // STS specifies that UAs must not process any STS headers after the first
    398     // one.
    399     "strict-transport-security"
    400   };
    401   for (size_t i = 0; i < arraysize(kNonCoalescingHeaders); ++i) {
    402     if (LowerCaseEqualsASCII(name_begin, name_end, kNonCoalescingHeaders[i]))
    403       return true;
    404   }
    405   return false;
    406 }
    407 
    408 bool HttpUtil::IsLWS(char c) {
    409   return strchr(HTTP_LWS, c) != NULL;
    410 }
    411 
    412 void HttpUtil::TrimLWS(string::const_iterator* begin,
    413                        string::const_iterator* end) {
    414   // leading whitespace
    415   while (*begin < *end && IsLWS((*begin)[0]))
    416     ++(*begin);
    417 
    418   // trailing whitespace
    419   while (*begin < *end && IsLWS((*end)[-1]))
    420     --(*end);
    421 }
    422 
    423 bool HttpUtil::IsQuote(char c) {
    424   // Single quote mark isn't actually part of quoted-text production,
    425   // but apparently some servers rely on this.
    426   return c == '"' || c == '\'';
    427 }
    428 
    429 // See RFC 2616 Sec 2.2 for the definition of |token|.
    430 bool HttpUtil::IsToken(string::const_iterator begin,
    431                        string::const_iterator end) {
    432   if (begin == end)
    433     return false;
    434   for (std::string::const_iterator iter = begin; iter != end; ++iter) {
    435     unsigned char c = *iter;
    436     if (c >= 0x80 || c <= 0x1F || c == 0x7F ||
    437         c == '(' || c == ')' || c == '<' || c == '>' || c == '@' ||
    438         c == ',' || c == ';' || c == ':' || c == '\\' || c == '"' ||
    439         c == '/' || c == '[' || c == ']' || c == '?' || c == '=' ||
    440         c == '{' || c == '}' || c == ' ' || c == '\t')
    441       return false;
    442   }
    443   return true;
    444 }
    445 
    446 std::string HttpUtil::Unquote(std::string::const_iterator begin,
    447                               std::string::const_iterator end) {
    448   // Empty string
    449   if (begin == end)
    450     return std::string();
    451 
    452   // Nothing to unquote.
    453   if (!IsQuote(*begin))
    454     return std::string(begin, end);
    455 
    456   // No terminal quote mark.
    457   if (end - begin < 2 || *begin != *(end - 1))
    458     return std::string(begin, end);
    459 
    460   // Strip quotemarks
    461   ++begin;
    462   --end;
    463 
    464   // Unescape quoted-pair (defined in RFC 2616 section 2.2)
    465   std::string unescaped;
    466   bool prev_escape = false;
    467   for (; begin != end; ++begin) {
    468     char c = *begin;
    469     if (c == '\\' && !prev_escape) {
    470       prev_escape = true;
    471       continue;
    472     }
    473     prev_escape = false;
    474     unescaped.push_back(c);
    475   }
    476   return unescaped;
    477 }
    478 
    479 // static
    480 std::string HttpUtil::Unquote(const std::string& str) {
    481   return Unquote(str.begin(), str.end());
    482 }
    483 
    484 // static
    485 std::string HttpUtil::Quote(const std::string& str) {
    486   std::string escaped;
    487   escaped.reserve(2 + str.size());
    488 
    489   std::string::const_iterator begin = str.begin();
    490   std::string::const_iterator end = str.end();
    491 
    492   // Esape any backslashes or quotemarks within the string, and
    493   // then surround with quotes.
    494   escaped.push_back('"');
    495   for (; begin != end; ++begin) {
    496     char c = *begin;
    497     if (c == '"' || c == '\\')
    498       escaped.push_back('\\');
    499     escaped.push_back(c);
    500   }
    501   escaped.push_back('"');
    502   return escaped;
    503 }
    504 
    505 // Find the "http" substring in a status line. This allows for
    506 // some slop at the start. If the "http" string could not be found
    507 // then returns -1.
    508 // static
    509 int HttpUtil::LocateStartOfStatusLine(const char* buf, int buf_len) {
    510   const int slop = 4;
    511   const int http_len = 4;
    512 
    513   if (buf_len >= http_len) {
    514     int i_max = std::min(buf_len - http_len, slop);
    515     for (int i = 0; i <= i_max; ++i) {
    516       if (LowerCaseEqualsASCII(buf + i, buf + i + http_len, "http"))
    517         return i;
    518     }
    519   }
    520   return -1;  // Not found
    521 }
    522 
    523 int HttpUtil::LocateEndOfHeaders(const char* buf, int buf_len, int i) {
    524   bool was_lf = false;
    525   char last_c = '\0';
    526   for (; i < buf_len; ++i) {
    527     char c = buf[i];
    528     if (c == '\n') {
    529       if (was_lf)
    530         return i + 1;
    531       was_lf = true;
    532     } else if (c != '\r' || last_c != '\n') {
    533       was_lf = false;
    534     }
    535     last_c = c;
    536   }
    537   return -1;
    538 }
    539 
    540 // In order for a line to be continuable, it must specify a
    541 // non-blank header-name. Line continuations are specifically for
    542 // header values -- do not allow headers names to span lines.
    543 static bool IsLineSegmentContinuable(const char* begin, const char* end) {
    544   if (begin == end)
    545     return false;
    546 
    547   const char* colon = std::find(begin, end, ':');
    548   if (colon == end)
    549     return false;
    550 
    551   const char* name_begin = begin;
    552   const char* name_end = colon;
    553 
    554   // Name can't be empty.
    555   if (name_begin == name_end)
    556     return false;
    557 
    558   // Can't start with LWS (this would imply the segment is a continuation)
    559   if (HttpUtil::IsLWS(*name_begin))
    560     return false;
    561 
    562   return true;
    563 }
    564 
    565 // Helper used by AssembleRawHeaders, to find the end of the status line.
    566 static const char* FindStatusLineEnd(const char* begin, const char* end) {
    567   size_t i = base::StringPiece(begin, end - begin).find_first_of("\r\n");
    568   if (i == base::StringPiece::npos)
    569     return end;
    570   return begin + i;
    571 }
    572 
    573 // Helper used by AssembleRawHeaders, to skip past leading LWS.
    574 static const char* FindFirstNonLWS(const char* begin, const char* end) {
    575   for (const char* cur = begin; cur != end; ++cur) {
    576     if (!HttpUtil::IsLWS(*cur))
    577       return cur;
    578   }
    579   return end;  // Not found.
    580 }
    581 
    582 std::string HttpUtil::AssembleRawHeaders(const char* input_begin,
    583                                          int input_len) {
    584   std::string raw_headers;
    585   raw_headers.reserve(input_len);
    586 
    587   const char* input_end = input_begin + input_len;
    588 
    589   // Skip any leading slop, since the consumers of this output
    590   // (HttpResponseHeaders) don't deal with it.
    591   int status_begin_offset = LocateStartOfStatusLine(input_begin, input_len);
    592   if (status_begin_offset != -1)
    593     input_begin += status_begin_offset;
    594 
    595   // Copy the status line.
    596   const char* status_line_end = FindStatusLineEnd(input_begin, input_end);
    597   raw_headers.append(input_begin, status_line_end);
    598 
    599   // After the status line, every subsequent line is a header line segment.
    600   // Should a segment start with LWS, it is a continuation of the previous
    601   // line's field-value.
    602 
    603   // TODO(ericroman): is this too permissive? (delimits on [\r\n]+)
    604   base::CStringTokenizer lines(status_line_end, input_end, "\r\n");
    605 
    606   // This variable is true when the previous line was continuable.
    607   bool prev_line_continuable = false;
    608 
    609   while (lines.GetNext()) {
    610     const char* line_begin = lines.token_begin();
    611     const char* line_end = lines.token_end();
    612 
    613     if (prev_line_continuable && IsLWS(*line_begin)) {
    614       // Join continuation; reduce the leading LWS to a single SP.
    615       raw_headers.push_back(' ');
    616       raw_headers.append(FindFirstNonLWS(line_begin, line_end), line_end);
    617     } else {
    618       // Terminate the previous line.
    619       raw_headers.push_back('\n');
    620 
    621       // Copy the raw data to output.
    622       raw_headers.append(line_begin, line_end);
    623 
    624       // Check if the current line can be continued.
    625       prev_line_continuable = IsLineSegmentContinuable(line_begin, line_end);
    626     }
    627   }
    628 
    629   raw_headers.append("\n\n", 2);
    630 
    631   // Use '\0' as the canonical line terminator. If the input already contained
    632   // any embeded '\0' characters we will strip them first to avoid interpreting
    633   // them as line breaks.
    634   raw_headers.erase(std::remove(raw_headers.begin(), raw_headers.end(), '\0'),
    635                     raw_headers.end());
    636   std::replace(raw_headers.begin(), raw_headers.end(), '\n', '\0');
    637 
    638   return raw_headers;
    639 }
    640 
    641 std::string HttpUtil::ConvertHeadersBackToHTTPResponse(const std::string& str) {
    642   std::string disassembled_headers;
    643   base::StringTokenizer tokenizer(str, std::string(1, '\0'));
    644   while (tokenizer.GetNext()) {
    645     disassembled_headers.append(tokenizer.token_begin(), tokenizer.token_end());
    646     disassembled_headers.append("\r\n");
    647   }
    648   disassembled_headers.append("\r\n");
    649 
    650   return disassembled_headers;
    651 }
    652 
    653 // TODO(jungshik): 1. If the list is 'fr-CA,fr-FR,en,de', we have to add
    654 // 'fr' after 'fr-CA' with the same q-value as 'fr-CA' because
    655 // web servers, in general, do not fall back to 'fr' and may end up picking
    656 // 'en' which has a lower preference than 'fr-CA' and 'fr-FR'.
    657 // 2. This function assumes that the input is a comma separated list
    658 // without any whitespace. As long as it comes from the preference and
    659 // a user does not manually edit the preference file, it's the case. Still,
    660 // we may have to make it more robust.
    661 std::string HttpUtil::GenerateAcceptLanguageHeader(
    662     const std::string& raw_language_list) {
    663   // We use integers for qvalue and qvalue decrement that are 10 times
    664   // larger than actual values to avoid a problem with comparing
    665   // two floating point numbers.
    666   const unsigned int kQvalueDecrement10 = 2;
    667   unsigned int qvalue10 = 10;
    668   base::StringTokenizer t(raw_language_list, ",");
    669   std::string lang_list_with_q;
    670   while (t.GetNext()) {
    671     std::string language = t.token();
    672     if (qvalue10 == 10) {
    673       // q=1.0 is implicit.
    674       lang_list_with_q = language;
    675     } else {
    676       DCHECK_LT(qvalue10, 10U);
    677       base::StringAppendF(&lang_list_with_q, ",%s;q=0.%d", language.c_str(),
    678                           qvalue10);
    679     }
    680     // It does not make sense to have 'q=0'.
    681     if (qvalue10 > kQvalueDecrement10)
    682       qvalue10 -= kQvalueDecrement10;
    683   }
    684   return lang_list_with_q;
    685 }
    686 
    687 void HttpUtil::AppendHeaderIfMissing(const char* header_name,
    688                                      const std::string& header_value,
    689                                      std::string* headers) {
    690   if (header_value.empty())
    691     return;
    692   if (net::HttpUtil::HasHeader(*headers, header_name))
    693     return;
    694   *headers += std::string(header_name) + ": " + header_value + "\r\n";
    695 }
    696 
    697 bool HttpUtil::HasStrongValidators(HttpVersion version,
    698                                    const std::string& etag_header,
    699                                    const std::string& last_modified_header,
    700                                    const std::string& date_header) {
    701   if (version < HttpVersion(1, 1))
    702     return false;
    703 
    704   if (!etag_header.empty()) {
    705     size_t slash = etag_header.find('/');
    706     if (slash == std::string::npos || slash == 0)
    707       return true;
    708 
    709     std::string::const_iterator i = etag_header.begin();
    710     std::string::const_iterator j = etag_header.begin() + slash;
    711     TrimLWS(&i, &j);
    712     if (!LowerCaseEqualsASCII(i, j, "w"))
    713       return true;
    714   }
    715 
    716   base::Time last_modified;
    717   if (!base::Time::FromString(last_modified_header.c_str(), &last_modified))
    718     return false;
    719 
    720   base::Time date;
    721   if (!base::Time::FromString(date_header.c_str(), &date))
    722     return false;
    723 
    724   return ((date - last_modified).InSeconds() >= 60);
    725 }
    726 
    727 // Functions for histogram initialization.  The code 0 is put in the map to
    728 // track status codes that are invalid.
    729 // TODO(gavinp): Greatly prune the collected codes once we learn which
    730 // ones are not sent in practice, to reduce upload size & memory use.
    731 
    732 enum {
    733   HISTOGRAM_MIN_HTTP_STATUS_CODE = 100,
    734   HISTOGRAM_MAX_HTTP_STATUS_CODE = 599,
    735 };
    736 
    737 // static
    738 std::vector<int> HttpUtil::GetStatusCodesForHistogram() {
    739   std::vector<int> codes;
    740   codes.reserve(
    741       HISTOGRAM_MAX_HTTP_STATUS_CODE - HISTOGRAM_MIN_HTTP_STATUS_CODE + 2);
    742   codes.push_back(0);
    743   for (int i = HISTOGRAM_MIN_HTTP_STATUS_CODE;
    744        i <= HISTOGRAM_MAX_HTTP_STATUS_CODE; ++i)
    745     codes.push_back(i);
    746   return codes;
    747 }
    748 
    749 // static
    750 int HttpUtil::MapStatusCodeForHistogram(int code) {
    751   if (HISTOGRAM_MIN_HTTP_STATUS_CODE <= code &&
    752       code <= HISTOGRAM_MAX_HTTP_STATUS_CODE)
    753     return code;
    754   return 0;
    755 }
    756 
    757 // BNF from section 4.2 of RFC 2616:
    758 //
    759 //   message-header = field-name ":" [ field-value ]
    760 //   field-name     = token
    761 //   field-value    = *( field-content | LWS )
    762 //   field-content  = <the OCTETs making up the field-value
    763 //                     and consisting of either *TEXT or combinations
    764 //                     of token, separators, and quoted-string>
    765 //
    766 
    767 HttpUtil::HeadersIterator::HeadersIterator(string::const_iterator headers_begin,
    768                                            string::const_iterator headers_end,
    769                                            const std::string& line_delimiter)
    770     : lines_(headers_begin, headers_end, line_delimiter) {
    771 }
    772 
    773 HttpUtil::HeadersIterator::~HeadersIterator() {
    774 }
    775 
    776 bool HttpUtil::HeadersIterator::GetNext() {
    777   while (lines_.GetNext()) {
    778     name_begin_ = lines_.token_begin();
    779     values_end_ = lines_.token_end();
    780 
    781     string::const_iterator colon = std::find(name_begin_, values_end_, ':');
    782     if (colon == values_end_)
    783       continue;  // skip malformed header
    784 
    785     name_end_ = colon;
    786 
    787     // If the name starts with LWS, it is an invalid line.
    788     // Leading LWS implies a line continuation, and these should have
    789     // already been joined by AssembleRawHeaders().
    790     if (name_begin_ == name_end_ || IsLWS(*name_begin_))
    791       continue;
    792 
    793     TrimLWS(&name_begin_, &name_end_);
    794     if (name_begin_ == name_end_)
    795       continue;  // skip malformed header
    796 
    797     values_begin_ = colon + 1;
    798     TrimLWS(&values_begin_, &values_end_);
    799 
    800     // if we got a header name, then we are done.
    801     return true;
    802   }
    803   return false;
    804 }
    805 
    806 bool HttpUtil::HeadersIterator::AdvanceTo(const char* name) {
    807   DCHECK(name != NULL);
    808   DCHECK_EQ(0, StringToLowerASCII<std::string>(name).compare(name))
    809       << "the header name must be in all lower case";
    810 
    811   while (GetNext()) {
    812     if (LowerCaseEqualsASCII(name_begin_, name_end_, name)) {
    813       return true;
    814     }
    815   }
    816 
    817   return false;
    818 }
    819 
    820 HttpUtil::ValuesIterator::ValuesIterator(
    821     string::const_iterator values_begin,
    822     string::const_iterator values_end,
    823     char delimiter)
    824     : values_(values_begin, values_end, string(1, delimiter)) {
    825   values_.set_quote_chars("\'\"");
    826 }
    827 
    828 HttpUtil::ValuesIterator::~ValuesIterator() {
    829 }
    830 
    831 bool HttpUtil::ValuesIterator::GetNext() {
    832   while (values_.GetNext()) {
    833     value_begin_ = values_.token_begin();
    834     value_end_ = values_.token_end();
    835     TrimLWS(&value_begin_, &value_end_);
    836 
    837     // bypass empty values.
    838     if (value_begin_ != value_end_)
    839       return true;
    840   }
    841   return false;
    842 }
    843 
    844 HttpUtil::NameValuePairsIterator::NameValuePairsIterator(
    845     string::const_iterator begin,
    846     string::const_iterator end,
    847     char delimiter)
    848     : props_(begin, end, delimiter),
    849       valid_(true),
    850       name_begin_(end),
    851       name_end_(end),
    852       value_begin_(end),
    853       value_end_(end),
    854       value_is_quoted_(false) {
    855 }
    856 
    857 HttpUtil::NameValuePairsIterator::~NameValuePairsIterator() {}
    858 
    859 // We expect properties to be formatted as one of:
    860 //   name="value"
    861 //   name='value'
    862 //   name='\'value\''
    863 //   name=value
    864 //   name = value
    865 //   name=
    866 // Due to buggy implementations found in some embedded devices, we also
    867 // accept values with missing close quotemark (http://crbug.com/39836):
    868 //   name="value
    869 bool HttpUtil::NameValuePairsIterator::GetNext() {
    870   if (!props_.GetNext())
    871     return false;
    872 
    873   // Set the value as everything. Next we will split out the name.
    874   value_begin_ = props_.value_begin();
    875   value_end_ = props_.value_end();
    876   name_begin_ = name_end_ = value_end_;
    877 
    878   // Scan for the equals sign.
    879   std::string::const_iterator equals = std::find(value_begin_, value_end_, '=');
    880   if (equals == value_end_ || equals == value_begin_)
    881     return valid_ = false;  // Malformed, no equals sign
    882 
    883   // Verify that the equals sign we found wasn't inside of quote marks.
    884   for (std::string::const_iterator it = value_begin_; it != equals; ++it) {
    885     if (HttpUtil::IsQuote(*it))
    886       return valid_ = false;  // Malformed, quote appears before equals sign
    887   }
    888 
    889   name_begin_ = value_begin_;
    890   name_end_ = equals;
    891   value_begin_ = equals + 1;
    892 
    893   TrimLWS(&name_begin_, &name_end_);
    894   TrimLWS(&value_begin_, &value_end_);
    895   value_is_quoted_ = false;
    896   unquoted_value_.clear();
    897 
    898   if (value_begin_ == value_end_)
    899     return valid_ = false;  // Malformed, value is empty
    900 
    901   if (HttpUtil::IsQuote(*value_begin_)) {
    902     // Trim surrounding quotemarks off the value
    903     if (*value_begin_ != *(value_end_ - 1) || value_begin_ + 1 == value_end_) {
    904       // NOTE: This is not as graceful as it sounds:
    905       // * quoted-pairs will no longer be unquoted
    906       //   (["\"hello] should give ["hello]).
    907       // * Does not detect when the final quote is escaped
    908       //   (["value\"] should give [value"])
    909       ++value_begin_;  // Gracefully recover from mismatching quotes.
    910     } else {
    911       value_is_quoted_ = true;
    912       // Do not store iterators into this. See declaration of unquoted_value_.
    913       unquoted_value_ = HttpUtil::Unquote(value_begin_, value_end_);
    914     }
    915   }
    916 
    917   return true;
    918 }
    919 
    920 }  // namespace net
    921