Home | History | Annotate | Download | only in http
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // The rules for parsing content-types were borrowed from Firefox:
      6 // http://lxr.mozilla.org/mozilla/source/netwerk/base/src/nsURLHelper.cpp#834
      7 
      8 #include "net/http/http_util.h"
      9 
     10 #include <algorithm>
     11 
     12 #include "base/basictypes.h"
     13 #include "base/logging.h"
     14 #include "base/strings/string_number_conversions.h"
     15 #include "base/strings/string_piece.h"
     16 #include "base/strings/string_tokenizer.h"
     17 #include "base/strings/string_util.h"
     18 #include "base/strings/stringprintf.h"
     19 #include "base/time/time.h"
     20 
     21 
     22 namespace net {
     23 
     24 // Helpers --------------------------------------------------------------------
     25 
     26 // Returns the index of the closing quote of the string, if any.  |start| points
     27 // at the opening quote.
     28 static size_t FindStringEnd(const std::string& line, size_t start, char delim) {
     29   DCHECK_LT(start, line.length());
     30   DCHECK_EQ(line[start], delim);
     31   DCHECK((delim == '"') || (delim == '\''));
     32 
     33   const char set[] = { delim, '\\', '\0' };
     34   for (size_t end = line.find_first_of(set, start + 1);
     35        end != std::string::npos; end = line.find_first_of(set, end + 2)) {
     36     if (line[end] != '\\')
     37       return end;
     38   }
     39   return line.length();
     40 }
     41 
     42 
     43 // HttpUtil -------------------------------------------------------------------
     44 
     45 // static
     46 size_t HttpUtil::FindDelimiter(const std::string& line,
     47                                size_t search_start,
     48                                char delimiter) {
     49   do {
     50     // search_start points to the spot from which we should start looking
     51     // for the delimiter.
     52     const char delim_str[] = { delimiter, '"', '\'', '\0' };
     53     size_t cur_delim_pos = line.find_first_of(delim_str, search_start);
     54     if (cur_delim_pos == std::string::npos)
     55       return line.length();
     56 
     57     char ch = line[cur_delim_pos];
     58     if (ch == delimiter) {
     59       // Found delimiter
     60       return cur_delim_pos;
     61     }
     62 
     63     // We hit the start of a quoted string.  Look for its end.
     64     search_start = FindStringEnd(line, cur_delim_pos, ch);
     65     if (search_start == line.length())
     66       return search_start;
     67 
     68     ++search_start;
     69 
     70     // search_start now points to the first char after the end of the
     71     // string, so just go back to the top of the loop and look for
     72     // |delimiter| again.
     73   } while (true);
     74 
     75   NOTREACHED();
     76   return line.length();
     77 }
     78 
     79 // static
     80 void HttpUtil::ParseContentType(const std::string& content_type_str,
     81                                 std::string* mime_type,
     82                                 std::string* charset,
     83                                 bool* had_charset,
     84                                 std::string* boundary) {
     85   const std::string::const_iterator begin = content_type_str.begin();
     86 
     87   // Trim leading and trailing whitespace from type.  We include '(' in
     88   // the trailing trim set to catch media-type comments, which are not at all
     89   // standard, but may occur in rare cases.
     90   size_t type_val = content_type_str.find_first_not_of(HTTP_LWS);
     91   type_val = std::min(type_val, content_type_str.length());
     92   size_t type_end = content_type_str.find_first_of(HTTP_LWS ";(", type_val);
     93   if (type_end == std::string::npos)
     94     type_end = content_type_str.length();
     95 
     96   size_t charset_val = 0;
     97   size_t charset_end = 0;
     98   bool type_has_charset = false;
     99 
    100   // Iterate over parameters
    101   size_t param_start = content_type_str.find_first_of(';', type_end);
    102   if (param_start != std::string::npos) {
    103     base::StringTokenizer tokenizer(begin + param_start, content_type_str.end(),
    104                                     ";");
    105     tokenizer.set_quote_chars("\"");
    106     while (tokenizer.GetNext()) {
    107       std::string::const_iterator equals_sign =
    108           std::find(tokenizer.token_begin(), tokenizer.token_end(), '=');
    109       if (equals_sign == tokenizer.token_end())
    110         continue;
    111 
    112       std::string::const_iterator param_name_begin = tokenizer.token_begin();
    113       std::string::const_iterator param_name_end = equals_sign;
    114       TrimLWS(&param_name_begin, &param_name_end);
    115 
    116       std::string::const_iterator param_value_begin = equals_sign + 1;
    117       std::string::const_iterator param_value_end = tokenizer.token_end();
    118       DCHECK(param_value_begin <= tokenizer.token_end());
    119       TrimLWS(&param_value_begin, &param_value_end);
    120 
    121       if (LowerCaseEqualsASCII(param_name_begin, param_name_end, "charset")) {
    122         // TODO(abarth): Refactor this function to consistently use iterators.
    123         charset_val = param_value_begin - begin;
    124         charset_end = param_value_end - begin;
    125         type_has_charset = true;
    126       } else if (LowerCaseEqualsASCII(param_name_begin, param_name_end,
    127                                       "boundary")) {
    128         if (boundary)
    129           boundary->assign(param_value_begin, param_value_end);
    130       }
    131     }
    132   }
    133 
    134   if (type_has_charset) {
    135     // Trim leading and trailing whitespace from charset_val.  We include
    136     // '(' in the trailing trim set to catch media-type comments, which are
    137     // not at all standard, but may occur in rare cases.
    138     charset_val = content_type_str.find_first_not_of(HTTP_LWS, charset_val);
    139     charset_val = std::min(charset_val, charset_end);
    140     char first_char = content_type_str[charset_val];
    141     if (first_char == '"' || first_char == '\'') {
    142       charset_end = FindStringEnd(content_type_str, charset_val, first_char);
    143       ++charset_val;
    144       DCHECK(charset_end >= charset_val);
    145     } else {
    146       charset_end = std::min(content_type_str.find_first_of(HTTP_LWS ";(",
    147                                                             charset_val),
    148                              charset_end);
    149     }
    150   }
    151 
    152   // if the server sent "*/*", it is meaningless, so do not store it.
    153   // also, if type_val is the same as mime_type, then just update the
    154   // charset.  however, if charset is empty and mime_type hasn't
    155   // changed, then don't wipe-out an existing charset.  We
    156   // also want to reject a mime-type if it does not include a slash.
    157   // some servers give junk after the charset parameter, which may
    158   // include a comma, so this check makes us a bit more tolerant.
    159   if (content_type_str.length() != 0 &&
    160       content_type_str != "*/*" &&
    161       content_type_str.find_first_of('/') != std::string::npos) {
    162     // Common case here is that mime_type is empty
    163     bool eq = !mime_type->empty() && LowerCaseEqualsASCII(begin + type_val,
    164                                                           begin + type_end,
    165                                                           mime_type->data());
    166     if (!eq) {
    167       mime_type->assign(begin + type_val, begin + type_end);
    168       base::StringToLowerASCII(mime_type);
    169     }
    170     if ((!eq && *had_charset) || type_has_charset) {
    171       *had_charset = true;
    172       charset->assign(begin + charset_val, begin + charset_end);
    173       base::StringToLowerASCII(charset);
    174     }
    175   }
    176 }
    177 
    178 // static
    179 // Parse the Range header according to RFC 2616 14.35.1
    180 // ranges-specifier = byte-ranges-specifier
    181 // byte-ranges-specifier = bytes-unit "=" byte-range-set
    182 // byte-range-set  = 1#( byte-range-spec | suffix-byte-range-spec )
    183 // byte-range-spec = first-byte-pos "-" [last-byte-pos]
    184 // first-byte-pos  = 1*DIGIT
    185 // last-byte-pos   = 1*DIGIT
    186 bool HttpUtil::ParseRanges(const std::string& headers,
    187                            std::vector<HttpByteRange>* ranges) {
    188   std::string ranges_specifier;
    189   HttpUtil::HeadersIterator it(headers.begin(), headers.end(), "\r\n");
    190 
    191   while (it.GetNext()) {
    192     // Look for "Range" header.
    193     if (!LowerCaseEqualsASCII(it.name(), "range"))
    194       continue;
    195     ranges_specifier = it.values();
    196     // We just care about the first "Range" header, so break here.
    197     break;
    198   }
    199 
    200   if (ranges_specifier.empty())
    201     return false;
    202 
    203   return ParseRangeHeader(ranges_specifier, ranges);
    204 }
    205 
    206 // static
    207 bool HttpUtil::ParseRangeHeader(const std::string& ranges_specifier,
    208                                 std::vector<HttpByteRange>* ranges) {
    209   size_t equal_char_offset = ranges_specifier.find('=');
    210   if (equal_char_offset == std::string::npos)
    211     return false;
    212 
    213   // Try to extract bytes-unit part.
    214   std::string::const_iterator bytes_unit_begin = ranges_specifier.begin();
    215   std::string::const_iterator bytes_unit_end = bytes_unit_begin +
    216                                                equal_char_offset;
    217   std::string::const_iterator byte_range_set_begin = bytes_unit_end + 1;
    218   std::string::const_iterator byte_range_set_end = ranges_specifier.end();
    219 
    220   TrimLWS(&bytes_unit_begin, &bytes_unit_end);
    221   // "bytes" unit identifier is not found.
    222   if (!LowerCaseEqualsASCII(bytes_unit_begin, bytes_unit_end, "bytes"))
    223     return false;
    224 
    225   ValuesIterator byte_range_set_iterator(byte_range_set_begin,
    226                                          byte_range_set_end, ',');
    227   while (byte_range_set_iterator.GetNext()) {
    228     size_t minus_char_offset = byte_range_set_iterator.value().find('-');
    229     // If '-' character is not found, reports failure.
    230     if (minus_char_offset == std::string::npos)
    231       return false;
    232 
    233     std::string::const_iterator first_byte_pos_begin =
    234         byte_range_set_iterator.value_begin();
    235     std::string::const_iterator first_byte_pos_end =
    236         first_byte_pos_begin +  minus_char_offset;
    237     TrimLWS(&first_byte_pos_begin, &first_byte_pos_end);
    238     std::string first_byte_pos(first_byte_pos_begin, first_byte_pos_end);
    239 
    240     HttpByteRange range;
    241     // Try to obtain first-byte-pos.
    242     if (!first_byte_pos.empty()) {
    243       int64 first_byte_position = -1;
    244       if (!base::StringToInt64(first_byte_pos, &first_byte_position))
    245         return false;
    246       range.set_first_byte_position(first_byte_position);
    247     }
    248 
    249     std::string::const_iterator last_byte_pos_begin =
    250         byte_range_set_iterator.value_begin() + minus_char_offset + 1;
    251     std::string::const_iterator last_byte_pos_end =
    252         byte_range_set_iterator.value_end();
    253     TrimLWS(&last_byte_pos_begin, &last_byte_pos_end);
    254     std::string last_byte_pos(last_byte_pos_begin, last_byte_pos_end);
    255 
    256     // We have last-byte-pos or suffix-byte-range-spec in this case.
    257     if (!last_byte_pos.empty()) {
    258       int64 last_byte_position;
    259       if (!base::StringToInt64(last_byte_pos, &last_byte_position))
    260         return false;
    261       if (range.HasFirstBytePosition())
    262         range.set_last_byte_position(last_byte_position);
    263       else
    264         range.set_suffix_length(last_byte_position);
    265     } else if (!range.HasFirstBytePosition()) {
    266       return false;
    267     }
    268 
    269     // Do a final check on the HttpByteRange object.
    270     if (!range.IsValid())
    271       return false;
    272     ranges->push_back(range);
    273   }
    274   return !ranges->empty();
    275 }
    276 
    277 // static
    278 bool HttpUtil::HasHeader(const std::string& headers, const char* name) {
    279   size_t name_len = strlen(name);
    280   std::string::const_iterator it =
    281       std::search(headers.begin(),
    282                   headers.end(),
    283                   name,
    284                   name + name_len,
    285                   base::CaseInsensitiveCompareASCII<char>());
    286   if (it == headers.end())
    287     return false;
    288 
    289   // ensure match is prefixed by newline
    290   if (it != headers.begin() && it[-1] != '\n')
    291     return false;
    292 
    293   // ensure match is suffixed by colon
    294   if (it + name_len >= headers.end() || it[name_len] != ':')
    295     return false;
    296 
    297   return true;
    298 }
    299 
    300 namespace {
    301 // A header string containing any of the following fields will cause
    302 // an error. The list comes from the XMLHttpRequest standard.
    303 // http://www.w3.org/TR/XMLHttpRequest/#the-setrequestheader-method
    304 const char* const kForbiddenHeaderFields[] = {
    305   "accept-charset",
    306   "accept-encoding",
    307   "access-control-request-headers",
    308   "access-control-request-method",
    309   "connection",
    310   "content-length",
    311   "cookie",
    312   "cookie2",
    313   "content-transfer-encoding",
    314   "date",
    315   "expect",
    316   "host",
    317   "keep-alive",
    318   "origin",
    319   "referer",
    320   "te",
    321   "trailer",
    322   "transfer-encoding",
    323   "upgrade",
    324   "user-agent",
    325   "via",
    326 };
    327 }  // anonymous namespace
    328 
    329 // static
    330 bool HttpUtil::IsSafeHeader(const std::string& name) {
    331   std::string lower_name(base::StringToLowerASCII(name));
    332   if (StartsWithASCII(lower_name, "proxy-", true) ||
    333       StartsWithASCII(lower_name, "sec-", true))
    334     return false;
    335   for (size_t i = 0; i < arraysize(kForbiddenHeaderFields); ++i) {
    336     if (lower_name == kForbiddenHeaderFields[i])
    337       return false;
    338   }
    339   return true;
    340 }
    341 
    342 // static
    343 bool HttpUtil::IsValidHeaderName(const std::string& name) {
    344   // Check whether the header name is RFC 2616-compliant.
    345   return HttpUtil::IsToken(name);
    346 }
    347 
    348 // static
    349 bool HttpUtil::IsValidHeaderValue(const std::string& value) {
    350   // Just a sanity check: disallow NUL and CRLF.
    351   return value.find('\0') == std::string::npos &&
    352       value.find("\r\n") == std::string::npos;
    353 }
    354 
    355 // static
    356 std::string HttpUtil::StripHeaders(const std::string& headers,
    357                                    const char* const headers_to_remove[],
    358                                    size_t headers_to_remove_len) {
    359   std::string stripped_headers;
    360   net::HttpUtil::HeadersIterator it(headers.begin(), headers.end(), "\r\n");
    361 
    362   while (it.GetNext()) {
    363     bool should_remove = false;
    364     for (size_t i = 0; i < headers_to_remove_len; ++i) {
    365       if (LowerCaseEqualsASCII(it.name_begin(), it.name_end(),
    366                                headers_to_remove[i])) {
    367         should_remove = true;
    368         break;
    369       }
    370     }
    371     if (!should_remove) {
    372       // Assume that name and values are on the same line.
    373       stripped_headers.append(it.name_begin(), it.values_end());
    374       stripped_headers.append("\r\n");
    375     }
    376   }
    377   return stripped_headers;
    378 }
    379 
    380 // static
    381 bool HttpUtil::IsNonCoalescingHeader(std::string::const_iterator name_begin,
    382                                      std::string::const_iterator name_end) {
    383   // NOTE: "set-cookie2" headers do not support expires attributes, so we don't
    384   // have to list them here.
    385   const char* kNonCoalescingHeaders[] = {
    386     "date",
    387     "expires",
    388     "last-modified",
    389     "location",  // See bug 1050541 for details
    390     "retry-after",
    391     "set-cookie",
    392     // The format of auth-challenges mixes both space separated tokens and
    393     // comma separated properties, so coalescing on comma won't work.
    394     "www-authenticate",
    395     "proxy-authenticate",
    396     // STS specifies that UAs must not process any STS headers after the first
    397     // one.
    398     "strict-transport-security"
    399   };
    400   for (size_t i = 0; i < arraysize(kNonCoalescingHeaders); ++i) {
    401     if (LowerCaseEqualsASCII(name_begin, name_end, kNonCoalescingHeaders[i]))
    402       return true;
    403   }
    404   return false;
    405 }
    406 
    407 bool HttpUtil::IsLWS(char c) {
    408   return strchr(HTTP_LWS, c) != NULL;
    409 }
    410 
    411 void HttpUtil::TrimLWS(std::string::const_iterator* begin,
    412                        std::string::const_iterator* end) {
    413   // leading whitespace
    414   while (*begin < *end && IsLWS((*begin)[0]))
    415     ++(*begin);
    416 
    417   // trailing whitespace
    418   while (*begin < *end && IsLWS((*end)[-1]))
    419     --(*end);
    420 }
    421 
    422 bool HttpUtil::IsQuote(char c) {
    423   // Single quote mark isn't actually part of quoted-text production,
    424   // but apparently some servers rely on this.
    425   return c == '"' || c == '\'';
    426 }
    427 
    428 // See RFC 2616 Sec 2.2 for the definition of |token|.
    429 bool HttpUtil::IsToken(std::string::const_iterator begin,
    430                        std::string::const_iterator end) {
    431   if (begin == end)
    432     return false;
    433   for (std::string::const_iterator iter = begin; iter != end; ++iter) {
    434     unsigned char c = *iter;
    435     if (c >= 0x80 || c <= 0x1F || c == 0x7F ||
    436         c == '(' || c == ')' || c == '<' || c == '>' || c == '@' ||
    437         c == ',' || c == ';' || c == ':' || c == '\\' || c == '"' ||
    438         c == '/' || c == '[' || c == ']' || c == '?' || c == '=' ||
    439         c == '{' || c == '}' || c == ' ' || c == '\t')
    440       return false;
    441   }
    442   return true;
    443 }
    444 
    445 std::string HttpUtil::Unquote(std::string::const_iterator begin,
    446                               std::string::const_iterator end) {
    447   // Empty string
    448   if (begin == end)
    449     return std::string();
    450 
    451   // Nothing to unquote.
    452   if (!IsQuote(*begin))
    453     return std::string(begin, end);
    454 
    455   // No terminal quote mark.
    456   if (end - begin < 2 || *begin != *(end - 1))
    457     return std::string(begin, end);
    458 
    459   // Strip quotemarks
    460   ++begin;
    461   --end;
    462 
    463   // Unescape quoted-pair (defined in RFC 2616 section 2.2)
    464   std::string unescaped;
    465   bool prev_escape = false;
    466   for (; begin != end; ++begin) {
    467     char c = *begin;
    468     if (c == '\\' && !prev_escape) {
    469       prev_escape = true;
    470       continue;
    471     }
    472     prev_escape = false;
    473     unescaped.push_back(c);
    474   }
    475   return unescaped;
    476 }
    477 
    478 // static
    479 std::string HttpUtil::Unquote(const std::string& str) {
    480   return Unquote(str.begin(), str.end());
    481 }
    482 
    483 // static
    484 std::string HttpUtil::Quote(const std::string& str) {
    485   std::string escaped;
    486   escaped.reserve(2 + str.size());
    487 
    488   std::string::const_iterator begin = str.begin();
    489   std::string::const_iterator end = str.end();
    490 
    491   // Esape any backslashes or quotemarks within the string, and
    492   // then surround with quotes.
    493   escaped.push_back('"');
    494   for (; begin != end; ++begin) {
    495     char c = *begin;
    496     if (c == '"' || c == '\\')
    497       escaped.push_back('\\');
    498     escaped.push_back(c);
    499   }
    500   escaped.push_back('"');
    501   return escaped;
    502 }
    503 
    504 // Find the "http" substring in a status line. This allows for
    505 // some slop at the start. If the "http" string could not be found
    506 // then returns -1.
    507 // static
    508 int HttpUtil::LocateStartOfStatusLine(const char* buf, int buf_len) {
    509   const int slop = 4;
    510   const int http_len = 4;
    511 
    512   if (buf_len >= http_len) {
    513     int i_max = std::min(buf_len - http_len, slop);
    514     for (int i = 0; i <= i_max; ++i) {
    515       if (LowerCaseEqualsASCII(buf + i, buf + i + http_len, "http"))
    516         return i;
    517     }
    518   }
    519   return -1;  // Not found
    520 }
    521 
    522 int HttpUtil::LocateEndOfHeaders(const char* buf, int buf_len, int i) {
    523   bool was_lf = false;
    524   char last_c = '\0';
    525   for (; i < buf_len; ++i) {
    526     char c = buf[i];
    527     if (c == '\n') {
    528       if (was_lf)
    529         return i + 1;
    530       was_lf = true;
    531     } else if (c != '\r' || last_c != '\n') {
    532       was_lf = false;
    533     }
    534     last_c = c;
    535   }
    536   return -1;
    537 }
    538 
    539 // In order for a line to be continuable, it must specify a
    540 // non-blank header-name. Line continuations are specifically for
    541 // header values -- do not allow headers names to span lines.
    542 static bool IsLineSegmentContinuable(const char* begin, const char* end) {
    543   if (begin == end)
    544     return false;
    545 
    546   const char* colon = std::find(begin, end, ':');
    547   if (colon == end)
    548     return false;
    549 
    550   const char* name_begin = begin;
    551   const char* name_end = colon;
    552 
    553   // Name can't be empty.
    554   if (name_begin == name_end)
    555     return false;
    556 
    557   // Can't start with LWS (this would imply the segment is a continuation)
    558   if (HttpUtil::IsLWS(*name_begin))
    559     return false;
    560 
    561   return true;
    562 }
    563 
    564 // Helper used by AssembleRawHeaders, to find the end of the status line.
    565 static const char* FindStatusLineEnd(const char* begin, const char* end) {
    566   size_t i = base::StringPiece(begin, end - begin).find_first_of("\r\n");
    567   if (i == base::StringPiece::npos)
    568     return end;
    569   return begin + i;
    570 }
    571 
    572 // Helper used by AssembleRawHeaders, to skip past leading LWS.
    573 static const char* FindFirstNonLWS(const char* begin, const char* end) {
    574   for (const char* cur = begin; cur != end; ++cur) {
    575     if (!HttpUtil::IsLWS(*cur))
    576       return cur;
    577   }
    578   return end;  // Not found.
    579 }
    580 
    581 std::string HttpUtil::AssembleRawHeaders(const char* input_begin,
    582                                          int input_len) {
    583   std::string raw_headers;
    584   raw_headers.reserve(input_len);
    585 
    586   const char* input_end = input_begin + input_len;
    587 
    588   // Skip any leading slop, since the consumers of this output
    589   // (HttpResponseHeaders) don't deal with it.
    590   int status_begin_offset = LocateStartOfStatusLine(input_begin, input_len);
    591   if (status_begin_offset != -1)
    592     input_begin += status_begin_offset;
    593 
    594   // Copy the status line.
    595   const char* status_line_end = FindStatusLineEnd(input_begin, input_end);
    596   raw_headers.append(input_begin, status_line_end);
    597 
    598   // After the status line, every subsequent line is a header line segment.
    599   // Should a segment start with LWS, it is a continuation of the previous
    600   // line's field-value.
    601 
    602   // TODO(ericroman): is this too permissive? (delimits on [\r\n]+)
    603   base::CStringTokenizer lines(status_line_end, input_end, "\r\n");
    604 
    605   // This variable is true when the previous line was continuable.
    606   bool prev_line_continuable = false;
    607 
    608   while (lines.GetNext()) {
    609     const char* line_begin = lines.token_begin();
    610     const char* line_end = lines.token_end();
    611 
    612     if (prev_line_continuable && IsLWS(*line_begin)) {
    613       // Join continuation; reduce the leading LWS to a single SP.
    614       raw_headers.push_back(' ');
    615       raw_headers.append(FindFirstNonLWS(line_begin, line_end), line_end);
    616     } else {
    617       // Terminate the previous line.
    618       raw_headers.push_back('\n');
    619 
    620       // Copy the raw data to output.
    621       raw_headers.append(line_begin, line_end);
    622 
    623       // Check if the current line can be continued.
    624       prev_line_continuable = IsLineSegmentContinuable(line_begin, line_end);
    625     }
    626   }
    627 
    628   raw_headers.append("\n\n", 2);
    629 
    630   // Use '\0' as the canonical line terminator. If the input already contained
    631   // any embeded '\0' characters we will strip them first to avoid interpreting
    632   // them as line breaks.
    633   raw_headers.erase(std::remove(raw_headers.begin(), raw_headers.end(), '\0'),
    634                     raw_headers.end());
    635   std::replace(raw_headers.begin(), raw_headers.end(), '\n', '\0');
    636 
    637   return raw_headers;
    638 }
    639 
    640 std::string HttpUtil::ConvertHeadersBackToHTTPResponse(const std::string& str) {
    641   std::string disassembled_headers;
    642   base::StringTokenizer tokenizer(str, std::string(1, '\0'));
    643   while (tokenizer.GetNext()) {
    644     disassembled_headers.append(tokenizer.token_begin(), tokenizer.token_end());
    645     disassembled_headers.append("\r\n");
    646   }
    647   disassembled_headers.append("\r\n");
    648 
    649   return disassembled_headers;
    650 }
    651 
    652 // TODO(jungshik): 1. If the list is 'fr-CA,fr-FR,en,de', we have to add
    653 // 'fr' after 'fr-CA' with the same q-value as 'fr-CA' because
    654 // web servers, in general, do not fall back to 'fr' and may end up picking
    655 // 'en' which has a lower preference than 'fr-CA' and 'fr-FR'.
    656 // 2. This function assumes that the input is a comma separated list
    657 // without any whitespace. As long as it comes from the preference and
    658 // a user does not manually edit the preference file, it's the case. Still,
    659 // we may have to make it more robust.
    660 std::string HttpUtil::GenerateAcceptLanguageHeader(
    661     const std::string& raw_language_list) {
    662   // We use integers for qvalue and qvalue decrement that are 10 times
    663   // larger than actual values to avoid a problem with comparing
    664   // two floating point numbers.
    665   const unsigned int kQvalueDecrement10 = 2;
    666   unsigned int qvalue10 = 10;
    667   base::StringTokenizer t(raw_language_list, ",");
    668   std::string lang_list_with_q;
    669   while (t.GetNext()) {
    670     std::string language = t.token();
    671     if (qvalue10 == 10) {
    672       // q=1.0 is implicit.
    673       lang_list_with_q = language;
    674     } else {
    675       DCHECK_LT(qvalue10, 10U);
    676       base::StringAppendF(&lang_list_with_q, ",%s;q=0.%d", language.c_str(),
    677                           qvalue10);
    678     }
    679     // It does not make sense to have 'q=0'.
    680     if (qvalue10 > kQvalueDecrement10)
    681       qvalue10 -= kQvalueDecrement10;
    682   }
    683   return lang_list_with_q;
    684 }
    685 
    686 void HttpUtil::AppendHeaderIfMissing(const char* header_name,
    687                                      const std::string& header_value,
    688                                      std::string* headers) {
    689   if (header_value.empty())
    690     return;
    691   if (net::HttpUtil::HasHeader(*headers, header_name))
    692     return;
    693   *headers += std::string(header_name) + ": " + header_value + "\r\n";
    694 }
    695 
    696 bool HttpUtil::HasStrongValidators(HttpVersion version,
    697                                    const std::string& etag_header,
    698                                    const std::string& last_modified_header,
    699                                    const std::string& date_header) {
    700   if (version < HttpVersion(1, 1))
    701     return false;
    702 
    703   if (!etag_header.empty()) {
    704     size_t slash = etag_header.find('/');
    705     if (slash == std::string::npos || slash == 0)
    706       return true;
    707 
    708     std::string::const_iterator i = etag_header.begin();
    709     std::string::const_iterator j = etag_header.begin() + slash;
    710     TrimLWS(&i, &j);
    711     if (!LowerCaseEqualsASCII(i, j, "w"))
    712       return true;
    713   }
    714 
    715   base::Time last_modified;
    716   if (!base::Time::FromString(last_modified_header.c_str(), &last_modified))
    717     return false;
    718 
    719   base::Time date;
    720   if (!base::Time::FromString(date_header.c_str(), &date))
    721     return false;
    722 
    723   return ((date - last_modified).InSeconds() >= 60);
    724 }
    725 
    726 // Functions for histogram initialization.  The code 0 is put in the map to
    727 // track status codes that are invalid.
    728 // TODO(gavinp): Greatly prune the collected codes once we learn which
    729 // ones are not sent in practice, to reduce upload size & memory use.
    730 
    731 enum {
    732   HISTOGRAM_MIN_HTTP_STATUS_CODE = 100,
    733   HISTOGRAM_MAX_HTTP_STATUS_CODE = 599,
    734 };
    735 
    736 // static
    737 std::vector<int> HttpUtil::GetStatusCodesForHistogram() {
    738   std::vector<int> codes;
    739   codes.reserve(
    740       HISTOGRAM_MAX_HTTP_STATUS_CODE - HISTOGRAM_MIN_HTTP_STATUS_CODE + 2);
    741   codes.push_back(0);
    742   for (int i = HISTOGRAM_MIN_HTTP_STATUS_CODE;
    743        i <= HISTOGRAM_MAX_HTTP_STATUS_CODE; ++i)
    744     codes.push_back(i);
    745   return codes;
    746 }
    747 
    748 // static
    749 int HttpUtil::MapStatusCodeForHistogram(int code) {
    750   if (HISTOGRAM_MIN_HTTP_STATUS_CODE <= code &&
    751       code <= HISTOGRAM_MAX_HTTP_STATUS_CODE)
    752     return code;
    753   return 0;
    754 }
    755 
    756 // BNF from section 4.2 of RFC 2616:
    757 //
    758 //   message-header = field-name ":" [ field-value ]
    759 //   field-name     = token
    760 //   field-value    = *( field-content | LWS )
    761 //   field-content  = <the OCTETs making up the field-value
    762 //                     and consisting of either *TEXT or combinations
    763 //                     of token, separators, and quoted-string>
    764 //
    765 
    766 HttpUtil::HeadersIterator::HeadersIterator(
    767     std::string::const_iterator headers_begin,
    768     std::string::const_iterator headers_end,
    769     const std::string& line_delimiter)
    770     : lines_(headers_begin, headers_end, line_delimiter) {
    771 }
    772 
    773 HttpUtil::HeadersIterator::~HeadersIterator() {
    774 }
    775 
    776 bool HttpUtil::HeadersIterator::GetNext() {
    777   while (lines_.GetNext()) {
    778     name_begin_ = lines_.token_begin();
    779     values_end_ = lines_.token_end();
    780 
    781     std::string::const_iterator colon(std::find(name_begin_, values_end_, ':'));
    782     if (colon == values_end_)
    783       continue;  // skip malformed header
    784 
    785     name_end_ = colon;
    786 
    787     // If the name starts with LWS, it is an invalid line.
    788     // Leading LWS implies a line continuation, and these should have
    789     // already been joined by AssembleRawHeaders().
    790     if (name_begin_ == name_end_ || IsLWS(*name_begin_))
    791       continue;
    792 
    793     TrimLWS(&name_begin_, &name_end_);
    794     if (name_begin_ == name_end_)
    795       continue;  // skip malformed header
    796 
    797     values_begin_ = colon + 1;
    798     TrimLWS(&values_begin_, &values_end_);
    799 
    800     // if we got a header name, then we are done.
    801     return true;
    802   }
    803   return false;
    804 }
    805 
    806 bool HttpUtil::HeadersIterator::AdvanceTo(const char* name) {
    807   DCHECK(name != NULL);
    808   DCHECK_EQ(0, base::StringToLowerASCII<std::string>(name).compare(name))
    809       << "the header name must be in all lower case";
    810 
    811   while (GetNext()) {
    812     if (LowerCaseEqualsASCII(name_begin_, name_end_, name)) {
    813       return true;
    814     }
    815   }
    816 
    817   return false;
    818 }
    819 
    820 HttpUtil::ValuesIterator::ValuesIterator(
    821     std::string::const_iterator values_begin,
    822     std::string::const_iterator values_end,
    823     char delimiter)
    824     : values_(values_begin, values_end, std::string(1, delimiter)) {
    825   values_.set_quote_chars("\'\"");
    826 }
    827 
    828 HttpUtil::ValuesIterator::~ValuesIterator() {
    829 }
    830 
    831 bool HttpUtil::ValuesIterator::GetNext() {
    832   while (values_.GetNext()) {
    833     value_begin_ = values_.token_begin();
    834     value_end_ = values_.token_end();
    835     TrimLWS(&value_begin_, &value_end_);
    836 
    837     // bypass empty values.
    838     if (value_begin_ != value_end_)
    839       return true;
    840   }
    841   return false;
    842 }
    843 
    844 HttpUtil::NameValuePairsIterator::NameValuePairsIterator(
    845     std::string::const_iterator begin,
    846     std::string::const_iterator end,
    847     char delimiter)
    848     : props_(begin, end, delimiter),
    849       valid_(true),
    850       name_begin_(end),
    851       name_end_(end),
    852       value_begin_(end),
    853       value_end_(end),
    854       value_is_quoted_(false) {
    855 }
    856 
    857 HttpUtil::NameValuePairsIterator::~NameValuePairsIterator() {}
    858 
    859 // We expect properties to be formatted as one of:
    860 //   name="value"
    861 //   name='value'
    862 //   name='\'value\''
    863 //   name=value
    864 //   name = value
    865 //   name=
    866 // Due to buggy implementations found in some embedded devices, we also
    867 // accept values with missing close quotemark (http://crbug.com/39836):
    868 //   name="value
    869 bool HttpUtil::NameValuePairsIterator::GetNext() {
    870   if (!props_.GetNext())
    871     return false;
    872 
    873   // Set the value as everything. Next we will split out the name.
    874   value_begin_ = props_.value_begin();
    875   value_end_ = props_.value_end();
    876   name_begin_ = name_end_ = value_end_;
    877 
    878   // Scan for the equals sign.
    879   std::string::const_iterator equals = std::find(value_begin_, value_end_, '=');
    880   if (equals == value_end_ || equals == value_begin_)
    881     return valid_ = false;  // Malformed, no equals sign
    882 
    883   // Verify that the equals sign we found wasn't inside of quote marks.
    884   for (std::string::const_iterator it = value_begin_; it != equals; ++it) {
    885     if (HttpUtil::IsQuote(*it))
    886       return valid_ = false;  // Malformed, quote appears before equals sign
    887   }
    888 
    889   name_begin_ = value_begin_;
    890   name_end_ = equals;
    891   value_begin_ = equals + 1;
    892 
    893   TrimLWS(&name_begin_, &name_end_);
    894   TrimLWS(&value_begin_, &value_end_);
    895   value_is_quoted_ = false;
    896   unquoted_value_.clear();
    897 
    898   if (value_begin_ == value_end_)
    899     return valid_ = false;  // Malformed, value is empty
    900 
    901   if (HttpUtil::IsQuote(*value_begin_)) {
    902     // Trim surrounding quotemarks off the value
    903     if (*value_begin_ != *(value_end_ - 1) || value_begin_ + 1 == value_end_) {
    904       // NOTE: This is not as graceful as it sounds:
    905       // * quoted-pairs will no longer be unquoted
    906       //   (["\"hello] should give ["hello]).
    907       // * Does not detect when the final quote is escaped
    908       //   (["value\"] should give [value"])
    909       ++value_begin_;  // Gracefully recover from mismatching quotes.
    910     } else {
    911       value_is_quoted_ = true;
    912       // Do not store iterators into this. See declaration of unquoted_value_.
    913       unquoted_value_ = HttpUtil::Unquote(value_begin_, value_end_);
    914     }
    915   }
    916 
    917   return true;
    918 }
    919 
    920 }  // namespace net
    921