Home | History | Annotate | Download | only in http
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // The rules for parsing content-types were borrowed from Firefox:
      6 // http://lxr.mozilla.org/mozilla/source/netwerk/base/src/nsURLHelper.cpp#834
      7 
      8 #include "net/http/http_util.h"
      9 
     10 #include <algorithm>
     11 
     12 #include "base/basictypes.h"
     13 #include "base/logging.h"
     14 #include "base/strings/string_number_conversions.h"
     15 #include "base/strings/string_piece.h"
     16 #include "base/strings/string_tokenizer.h"
     17 #include "base/strings/string_util.h"
     18 #include "base/strings/stringprintf.h"
     19 #include "base/time/time.h"
     20 
     21 
     22 namespace net {
     23 
     24 // Helpers --------------------------------------------------------------------
     25 
     26 // Returns the index of the closing quote of the string, if any.  |start| points
     27 // at the opening quote.
     28 static size_t FindStringEnd(const std::string& line, size_t start, char delim) {
     29   DCHECK_LT(start, line.length());
     30   DCHECK_EQ(line[start], delim);
     31   DCHECK((delim == '"') || (delim == '\''));
     32 
     33   const char set[] = { delim, '\\', '\0' };
     34   for (size_t end = line.find_first_of(set, start + 1);
     35        end != std::string::npos; end = line.find_first_of(set, end + 2)) {
     36     if (line[end] != '\\')
     37       return end;
     38   }
     39   return line.length();
     40 }
     41 
     42 
     43 // HttpUtil -------------------------------------------------------------------
     44 
     45 // static
     46 size_t HttpUtil::FindDelimiter(const std::string& line,
     47                                size_t search_start,
     48                                char delimiter) {
     49   do {
     50     // search_start points to the spot from which we should start looking
     51     // for the delimiter.
     52     const char delim_str[] = { delimiter, '"', '\'', '\0' };
     53     size_t cur_delim_pos = line.find_first_of(delim_str, search_start);
     54     if (cur_delim_pos == std::string::npos)
     55       return line.length();
     56 
     57     char ch = line[cur_delim_pos];
     58     if (ch == delimiter) {
     59       // Found delimiter
     60       return cur_delim_pos;
     61     }
     62 
     63     // We hit the start of a quoted string.  Look for its end.
     64     search_start = FindStringEnd(line, cur_delim_pos, ch);
     65     if (search_start == line.length())
     66       return search_start;
     67 
     68     ++search_start;
     69 
     70     // search_start now points to the first char after the end of the
     71     // string, so just go back to the top of the loop and look for
     72     // |delimiter| again.
     73   } while (true);
     74 
     75   NOTREACHED();
     76   return line.length();
     77 }
     78 
     79 // static
     80 void HttpUtil::ParseContentType(const std::string& content_type_str,
     81                                 std::string* mime_type,
     82                                 std::string* charset,
     83                                 bool* had_charset,
     84                                 std::string* boundary) {
     85   const std::string::const_iterator begin = content_type_str.begin();
     86 
     87   // Trim leading and trailing whitespace from type.  We include '(' in
     88   // the trailing trim set to catch media-type comments, which are not at all
     89   // standard, but may occur in rare cases.
     90   size_t type_val = content_type_str.find_first_not_of(HTTP_LWS);
     91   type_val = std::min(type_val, content_type_str.length());
     92   size_t type_end = content_type_str.find_first_of(HTTP_LWS ";(", type_val);
     93   if (type_end == std::string::npos)
     94     type_end = content_type_str.length();
     95 
     96   size_t charset_val = 0;
     97   size_t charset_end = 0;
     98   bool type_has_charset = false;
     99 
    100   // Iterate over parameters
    101   size_t param_start = content_type_str.find_first_of(';', type_end);
    102   if (param_start != std::string::npos) {
    103     base::StringTokenizer tokenizer(begin + param_start, content_type_str.end(),
    104                                     ";");
    105     tokenizer.set_quote_chars("\"");
    106     while (tokenizer.GetNext()) {
    107       std::string::const_iterator equals_sign =
    108           std::find(tokenizer.token_begin(), tokenizer.token_end(), '=');
    109       if (equals_sign == tokenizer.token_end())
    110         continue;
    111 
    112       std::string::const_iterator param_name_begin = tokenizer.token_begin();
    113       std::string::const_iterator param_name_end = equals_sign;
    114       TrimLWS(&param_name_begin, &param_name_end);
    115 
    116       std::string::const_iterator param_value_begin = equals_sign + 1;
    117       std::string::const_iterator param_value_end = tokenizer.token_end();
    118       DCHECK(param_value_begin <= tokenizer.token_end());
    119       TrimLWS(&param_value_begin, &param_value_end);
    120 
    121       if (LowerCaseEqualsASCII(param_name_begin, param_name_end, "charset")) {
    122         // TODO(abarth): Refactor this function to consistently use iterators.
    123         charset_val = param_value_begin - begin;
    124         charset_end = param_value_end - begin;
    125         type_has_charset = true;
    126       } else if (LowerCaseEqualsASCII(param_name_begin, param_name_end,
    127                                       "boundary")) {
    128         if (boundary)
    129           boundary->assign(param_value_begin, param_value_end);
    130       }
    131     }
    132   }
    133 
    134   if (type_has_charset) {
    135     // Trim leading and trailing whitespace from charset_val.  We include
    136     // '(' in the trailing trim set to catch media-type comments, which are
    137     // not at all standard, but may occur in rare cases.
    138     charset_val = content_type_str.find_first_not_of(HTTP_LWS, charset_val);
    139     charset_val = std::min(charset_val, charset_end);
    140     char first_char = content_type_str[charset_val];
    141     if (first_char == '"' || first_char == '\'') {
    142       charset_end = FindStringEnd(content_type_str, charset_val, first_char);
    143       ++charset_val;
    144       DCHECK(charset_end >= charset_val);
    145     } else {
    146       charset_end = std::min(content_type_str.find_first_of(HTTP_LWS ";(",
    147                                                             charset_val),
    148                              charset_end);
    149     }
    150   }
    151 
    152   // if the server sent "*/*", it is meaningless, so do not store it.
    153   // also, if type_val is the same as mime_type, then just update the
    154   // charset.  however, if charset is empty and mime_type hasn't
    155   // changed, then don't wipe-out an existing charset.  We
    156   // also want to reject a mime-type if it does not include a slash.
    157   // some servers give junk after the charset parameter, which may
    158   // include a comma, so this check makes us a bit more tolerant.
    159   if (content_type_str.length() != 0 &&
    160       content_type_str != "*/*" &&
    161       content_type_str.find_first_of('/') != std::string::npos) {
    162     // Common case here is that mime_type is empty
    163     bool eq = !mime_type->empty() && LowerCaseEqualsASCII(begin + type_val,
    164                                                           begin + type_end,
    165                                                           mime_type->data());
    166     if (!eq) {
    167       mime_type->assign(begin + type_val, begin + type_end);
    168       StringToLowerASCII(mime_type);
    169     }
    170     if ((!eq && *had_charset) || type_has_charset) {
    171       *had_charset = true;
    172       charset->assign(begin + charset_val, begin + charset_end);
    173       StringToLowerASCII(charset);
    174     }
    175   }
    176 }
    177 
    178 // static
    179 // Parse the Range header according to RFC 2616 14.35.1
    180 // ranges-specifier = byte-ranges-specifier
    181 // byte-ranges-specifier = bytes-unit "=" byte-range-set
    182 // byte-range-set  = 1#( byte-range-spec | suffix-byte-range-spec )
    183 // byte-range-spec = first-byte-pos "-" [last-byte-pos]
    184 // first-byte-pos  = 1*DIGIT
    185 // last-byte-pos   = 1*DIGIT
    186 bool HttpUtil::ParseRanges(const std::string& headers,
    187                            std::vector<HttpByteRange>* ranges) {
    188   std::string ranges_specifier;
    189   HttpUtil::HeadersIterator it(headers.begin(), headers.end(), "\r\n");
    190 
    191   while (it.GetNext()) {
    192     // Look for "Range" header.
    193     if (!LowerCaseEqualsASCII(it.name(), "range"))
    194       continue;
    195     ranges_specifier = it.values();
    196     // We just care about the first "Range" header, so break here.
    197     break;
    198   }
    199 
    200   if (ranges_specifier.empty())
    201     return false;
    202 
    203   return ParseRangeHeader(ranges_specifier, ranges);
    204 }
    205 
    206 // static
    207 bool HttpUtil::ParseRangeHeader(const std::string& ranges_specifier,
    208                                 std::vector<HttpByteRange>* ranges) {
    209   size_t equal_char_offset = ranges_specifier.find('=');
    210   if (equal_char_offset == std::string::npos)
    211     return false;
    212 
    213   // Try to extract bytes-unit part.
    214   std::string::const_iterator bytes_unit_begin = ranges_specifier.begin();
    215   std::string::const_iterator bytes_unit_end = bytes_unit_begin +
    216                                                equal_char_offset;
    217   std::string::const_iterator byte_range_set_begin = bytes_unit_end + 1;
    218   std::string::const_iterator byte_range_set_end = ranges_specifier.end();
    219 
    220   TrimLWS(&bytes_unit_begin, &bytes_unit_end);
    221   // "bytes" unit identifier is not found.
    222   if (!LowerCaseEqualsASCII(bytes_unit_begin, bytes_unit_end, "bytes"))
    223     return false;
    224 
    225   ValuesIterator byte_range_set_iterator(byte_range_set_begin,
    226                                          byte_range_set_end, ',');
    227   while (byte_range_set_iterator.GetNext()) {
    228     size_t minus_char_offset = byte_range_set_iterator.value().find('-');
    229     // If '-' character is not found, reports failure.
    230     if (minus_char_offset == std::string::npos)
    231       return false;
    232 
    233     std::string::const_iterator first_byte_pos_begin =
    234         byte_range_set_iterator.value_begin();
    235     std::string::const_iterator first_byte_pos_end =
    236         first_byte_pos_begin +  minus_char_offset;
    237     TrimLWS(&first_byte_pos_begin, &first_byte_pos_end);
    238     std::string first_byte_pos(first_byte_pos_begin, first_byte_pos_end);
    239 
    240     HttpByteRange range;
    241     // Try to obtain first-byte-pos.
    242     if (!first_byte_pos.empty()) {
    243       int64 first_byte_position = -1;
    244       if (!base::StringToInt64(first_byte_pos, &first_byte_position))
    245         return false;
    246       range.set_first_byte_position(first_byte_position);
    247     }
    248 
    249     std::string::const_iterator last_byte_pos_begin =
    250         byte_range_set_iterator.value_begin() + minus_char_offset + 1;
    251     std::string::const_iterator last_byte_pos_end =
    252         byte_range_set_iterator.value_end();
    253     TrimLWS(&last_byte_pos_begin, &last_byte_pos_end);
    254     std::string last_byte_pos(last_byte_pos_begin, last_byte_pos_end);
    255 
    256     // We have last-byte-pos or suffix-byte-range-spec in this case.
    257     if (!last_byte_pos.empty()) {
    258       int64 last_byte_position;
    259       if (!base::StringToInt64(last_byte_pos, &last_byte_position))
    260         return false;
    261       if (range.HasFirstBytePosition())
    262         range.set_last_byte_position(last_byte_position);
    263       else
    264         range.set_suffix_length(last_byte_position);
    265     } else if (!range.HasFirstBytePosition()) {
    266       return false;
    267     }
    268 
    269     // Do a final check on the HttpByteRange object.
    270     if (!range.IsValid())
    271       return false;
    272     ranges->push_back(range);
    273   }
    274   return !ranges->empty();
    275 }
    276 
    277 // static
    278 bool HttpUtil::HasHeader(const std::string& headers, const char* name) {
    279   size_t name_len = strlen(name);
    280   std::string::const_iterator it =
    281       std::search(headers.begin(),
    282                   headers.end(),
    283                   name,
    284                   name + name_len,
    285                   base::CaseInsensitiveCompareASCII<char>());
    286   if (it == headers.end())
    287     return false;
    288 
    289   // ensure match is prefixed by newline
    290   if (it != headers.begin() && it[-1] != '\n')
    291     return false;
    292 
    293   // ensure match is suffixed by colon
    294   if (it + name_len >= headers.end() || it[name_len] != ':')
    295     return false;
    296 
    297   return true;
    298 }
    299 
    300 namespace {
    301 // A header string containing any of the following fields will cause
    302 // an error. The list comes from the XMLHttpRequest standard.
    303 // http://www.w3.org/TR/XMLHttpRequest/#the-setrequestheader-method
    304 const char* const kForbiddenHeaderFields[] = {
    305   "accept-charset",
    306   "accept-encoding",
    307   "access-control-request-headers",
    308   "access-control-request-method",
    309   "connection",
    310   "content-length",
    311   "cookie",
    312   "cookie2",
    313   "content-transfer-encoding",
    314   "date",
    315   "expect",
    316   "host",
    317   "keep-alive",
    318   "origin",
    319   "referer",
    320   "te",
    321   "trailer",
    322   "transfer-encoding",
    323   "upgrade",
    324   "user-agent",
    325   "via",
    326 };
    327 }  // anonymous namespace
    328 
    329 // static
    330 bool HttpUtil::IsSafeHeader(const std::string& name) {
    331   std::string lower_name(StringToLowerASCII(name));
    332   if (StartsWithASCII(lower_name, "proxy-", true) ||
    333       StartsWithASCII(lower_name, "sec-", true))
    334     return false;
    335   for (size_t i = 0; i < arraysize(kForbiddenHeaderFields); ++i) {
    336     if (lower_name == kForbiddenHeaderFields[i])
    337       return false;
    338   }
    339   return true;
    340 }
    341 
    342 // static
    343 std::string HttpUtil::StripHeaders(const std::string& headers,
    344                                    const char* const headers_to_remove[],
    345                                    size_t headers_to_remove_len) {
    346   std::string stripped_headers;
    347   net::HttpUtil::HeadersIterator it(headers.begin(), headers.end(), "\r\n");
    348 
    349   while (it.GetNext()) {
    350     bool should_remove = false;
    351     for (size_t i = 0; i < headers_to_remove_len; ++i) {
    352       if (LowerCaseEqualsASCII(it.name_begin(), it.name_end(),
    353                                headers_to_remove[i])) {
    354         should_remove = true;
    355         break;
    356       }
    357     }
    358     if (!should_remove) {
    359       // Assume that name and values are on the same line.
    360       stripped_headers.append(it.name_begin(), it.values_end());
    361       stripped_headers.append("\r\n");
    362     }
    363   }
    364   return stripped_headers;
    365 }
    366 
    367 // static
    368 bool HttpUtil::IsNonCoalescingHeader(std::string::const_iterator name_begin,
    369                                      std::string::const_iterator name_end) {
    370   // NOTE: "set-cookie2" headers do not support expires attributes, so we don't
    371   // have to list them here.
    372   const char* kNonCoalescingHeaders[] = {
    373     "date",
    374     "expires",
    375     "last-modified",
    376     "location",  // See bug 1050541 for details
    377     "retry-after",
    378     "set-cookie",
    379     // The format of auth-challenges mixes both space separated tokens and
    380     // comma separated properties, so coalescing on comma won't work.
    381     "www-authenticate",
    382     "proxy-authenticate",
    383     // STS specifies that UAs must not process any STS headers after the first
    384     // one.
    385     "strict-transport-security"
    386   };
    387   for (size_t i = 0; i < arraysize(kNonCoalescingHeaders); ++i) {
    388     if (LowerCaseEqualsASCII(name_begin, name_end, kNonCoalescingHeaders[i]))
    389       return true;
    390   }
    391   return false;
    392 }
    393 
    394 bool HttpUtil::IsLWS(char c) {
    395   return strchr(HTTP_LWS, c) != NULL;
    396 }
    397 
    398 void HttpUtil::TrimLWS(std::string::const_iterator* begin,
    399                        std::string::const_iterator* end) {
    400   // leading whitespace
    401   while (*begin < *end && IsLWS((*begin)[0]))
    402     ++(*begin);
    403 
    404   // trailing whitespace
    405   while (*begin < *end && IsLWS((*end)[-1]))
    406     --(*end);
    407 }
    408 
    409 bool HttpUtil::IsQuote(char c) {
    410   // Single quote mark isn't actually part of quoted-text production,
    411   // but apparently some servers rely on this.
    412   return c == '"' || c == '\'';
    413 }
    414 
    415 // See RFC 2616 Sec 2.2 for the definition of |token|.
    416 bool HttpUtil::IsToken(std::string::const_iterator begin,
    417                        std::string::const_iterator end) {
    418   if (begin == end)
    419     return false;
    420   for (std::string::const_iterator iter = begin; iter != end; ++iter) {
    421     unsigned char c = *iter;
    422     if (c >= 0x80 || c <= 0x1F || c == 0x7F ||
    423         c == '(' || c == ')' || c == '<' || c == '>' || c == '@' ||
    424         c == ',' || c == ';' || c == ':' || c == '\\' || c == '"' ||
    425         c == '/' || c == '[' || c == ']' || c == '?' || c == '=' ||
    426         c == '{' || c == '}' || c == ' ' || c == '\t')
    427       return false;
    428   }
    429   return true;
    430 }
    431 
    432 std::string HttpUtil::Unquote(std::string::const_iterator begin,
    433                               std::string::const_iterator end) {
    434   // Empty string
    435   if (begin == end)
    436     return std::string();
    437 
    438   // Nothing to unquote.
    439   if (!IsQuote(*begin))
    440     return std::string(begin, end);
    441 
    442   // No terminal quote mark.
    443   if (end - begin < 2 || *begin != *(end - 1))
    444     return std::string(begin, end);
    445 
    446   // Strip quotemarks
    447   ++begin;
    448   --end;
    449 
    450   // Unescape quoted-pair (defined in RFC 2616 section 2.2)
    451   std::string unescaped;
    452   bool prev_escape = false;
    453   for (; begin != end; ++begin) {
    454     char c = *begin;
    455     if (c == '\\' && !prev_escape) {
    456       prev_escape = true;
    457       continue;
    458     }
    459     prev_escape = false;
    460     unescaped.push_back(c);
    461   }
    462   return unescaped;
    463 }
    464 
    465 // static
    466 std::string HttpUtil::Unquote(const std::string& str) {
    467   return Unquote(str.begin(), str.end());
    468 }
    469 
    470 // static
    471 std::string HttpUtil::Quote(const std::string& str) {
    472   std::string escaped;
    473   escaped.reserve(2 + str.size());
    474 
    475   std::string::const_iterator begin = str.begin();
    476   std::string::const_iterator end = str.end();
    477 
    478   // Esape any backslashes or quotemarks within the string, and
    479   // then surround with quotes.
    480   escaped.push_back('"');
    481   for (; begin != end; ++begin) {
    482     char c = *begin;
    483     if (c == '"' || c == '\\')
    484       escaped.push_back('\\');
    485     escaped.push_back(c);
    486   }
    487   escaped.push_back('"');
    488   return escaped;
    489 }
    490 
    491 // Find the "http" substring in a status line. This allows for
    492 // some slop at the start. If the "http" string could not be found
    493 // then returns -1.
    494 // static
    495 int HttpUtil::LocateStartOfStatusLine(const char* buf, int buf_len) {
    496   const int slop = 4;
    497   const int http_len = 4;
    498 
    499   if (buf_len >= http_len) {
    500     int i_max = std::min(buf_len - http_len, slop);
    501     for (int i = 0; i <= i_max; ++i) {
    502       if (LowerCaseEqualsASCII(buf + i, buf + i + http_len, "http"))
    503         return i;
    504     }
    505   }
    506   return -1;  // Not found
    507 }
    508 
    509 int HttpUtil::LocateEndOfHeaders(const char* buf, int buf_len, int i) {
    510   bool was_lf = false;
    511   char last_c = '\0';
    512   for (; i < buf_len; ++i) {
    513     char c = buf[i];
    514     if (c == '\n') {
    515       if (was_lf)
    516         return i + 1;
    517       was_lf = true;
    518     } else if (c != '\r' || last_c != '\n') {
    519       was_lf = false;
    520     }
    521     last_c = c;
    522   }
    523   return -1;
    524 }
    525 
    526 // In order for a line to be continuable, it must specify a
    527 // non-blank header-name. Line continuations are specifically for
    528 // header values -- do not allow headers names to span lines.
    529 static bool IsLineSegmentContinuable(const char* begin, const char* end) {
    530   if (begin == end)
    531     return false;
    532 
    533   const char* colon = std::find(begin, end, ':');
    534   if (colon == end)
    535     return false;
    536 
    537   const char* name_begin = begin;
    538   const char* name_end = colon;
    539 
    540   // Name can't be empty.
    541   if (name_begin == name_end)
    542     return false;
    543 
    544   // Can't start with LWS (this would imply the segment is a continuation)
    545   if (HttpUtil::IsLWS(*name_begin))
    546     return false;
    547 
    548   return true;
    549 }
    550 
    551 // Helper used by AssembleRawHeaders, to find the end of the status line.
    552 static const char* FindStatusLineEnd(const char* begin, const char* end) {
    553   size_t i = base::StringPiece(begin, end - begin).find_first_of("\r\n");
    554   if (i == base::StringPiece::npos)
    555     return end;
    556   return begin + i;
    557 }
    558 
    559 // Helper used by AssembleRawHeaders, to skip past leading LWS.
    560 static const char* FindFirstNonLWS(const char* begin, const char* end) {
    561   for (const char* cur = begin; cur != end; ++cur) {
    562     if (!HttpUtil::IsLWS(*cur))
    563       return cur;
    564   }
    565   return end;  // Not found.
    566 }
    567 
    568 std::string HttpUtil::AssembleRawHeaders(const char* input_begin,
    569                                          int input_len) {
    570   std::string raw_headers;
    571   raw_headers.reserve(input_len);
    572 
    573   const char* input_end = input_begin + input_len;
    574 
    575   // Skip any leading slop, since the consumers of this output
    576   // (HttpResponseHeaders) don't deal with it.
    577   int status_begin_offset = LocateStartOfStatusLine(input_begin, input_len);
    578   if (status_begin_offset != -1)
    579     input_begin += status_begin_offset;
    580 
    581   // Copy the status line.
    582   const char* status_line_end = FindStatusLineEnd(input_begin, input_end);
    583   raw_headers.append(input_begin, status_line_end);
    584 
    585   // After the status line, every subsequent line is a header line segment.
    586   // Should a segment start with LWS, it is a continuation of the previous
    587   // line's field-value.
    588 
    589   // TODO(ericroman): is this too permissive? (delimits on [\r\n]+)
    590   base::CStringTokenizer lines(status_line_end, input_end, "\r\n");
    591 
    592   // This variable is true when the previous line was continuable.
    593   bool prev_line_continuable = false;
    594 
    595   while (lines.GetNext()) {
    596     const char* line_begin = lines.token_begin();
    597     const char* line_end = lines.token_end();
    598 
    599     if (prev_line_continuable && IsLWS(*line_begin)) {
    600       // Join continuation; reduce the leading LWS to a single SP.
    601       raw_headers.push_back(' ');
    602       raw_headers.append(FindFirstNonLWS(line_begin, line_end), line_end);
    603     } else {
    604       // Terminate the previous line.
    605       raw_headers.push_back('\n');
    606 
    607       // Copy the raw data to output.
    608       raw_headers.append(line_begin, line_end);
    609 
    610       // Check if the current line can be continued.
    611       prev_line_continuable = IsLineSegmentContinuable(line_begin, line_end);
    612     }
    613   }
    614 
    615   raw_headers.append("\n\n", 2);
    616 
    617   // Use '\0' as the canonical line terminator. If the input already contained
    618   // any embeded '\0' characters we will strip them first to avoid interpreting
    619   // them as line breaks.
    620   raw_headers.erase(std::remove(raw_headers.begin(), raw_headers.end(), '\0'),
    621                     raw_headers.end());
    622   std::replace(raw_headers.begin(), raw_headers.end(), '\n', '\0');
    623 
    624   return raw_headers;
    625 }
    626 
    627 std::string HttpUtil::ConvertHeadersBackToHTTPResponse(const std::string& str) {
    628   std::string disassembled_headers;
    629   base::StringTokenizer tokenizer(str, std::string(1, '\0'));
    630   while (tokenizer.GetNext()) {
    631     disassembled_headers.append(tokenizer.token_begin(), tokenizer.token_end());
    632     disassembled_headers.append("\r\n");
    633   }
    634   disassembled_headers.append("\r\n");
    635 
    636   return disassembled_headers;
    637 }
    638 
    639 // TODO(jungshik): 1. If the list is 'fr-CA,fr-FR,en,de', we have to add
    640 // 'fr' after 'fr-CA' with the same q-value as 'fr-CA' because
    641 // web servers, in general, do not fall back to 'fr' and may end up picking
    642 // 'en' which has a lower preference than 'fr-CA' and 'fr-FR'.
    643 // 2. This function assumes that the input is a comma separated list
    644 // without any whitespace. As long as it comes from the preference and
    645 // a user does not manually edit the preference file, it's the case. Still,
    646 // we may have to make it more robust.
    647 std::string HttpUtil::GenerateAcceptLanguageHeader(
    648     const std::string& raw_language_list) {
    649   // We use integers for qvalue and qvalue decrement that are 10 times
    650   // larger than actual values to avoid a problem with comparing
    651   // two floating point numbers.
    652   const unsigned int kQvalueDecrement10 = 2;
    653   unsigned int qvalue10 = 10;
    654   base::StringTokenizer t(raw_language_list, ",");
    655   std::string lang_list_with_q;
    656   while (t.GetNext()) {
    657     std::string language = t.token();
    658     if (qvalue10 == 10) {
    659       // q=1.0 is implicit.
    660       lang_list_with_q = language;
    661     } else {
    662       DCHECK_LT(qvalue10, 10U);
    663       base::StringAppendF(&lang_list_with_q, ",%s;q=0.%d", language.c_str(),
    664                           qvalue10);
    665     }
    666     // It does not make sense to have 'q=0'.
    667     if (qvalue10 > kQvalueDecrement10)
    668       qvalue10 -= kQvalueDecrement10;
    669   }
    670   return lang_list_with_q;
    671 }
    672 
    673 void HttpUtil::AppendHeaderIfMissing(const char* header_name,
    674                                      const std::string& header_value,
    675                                      std::string* headers) {
    676   if (header_value.empty())
    677     return;
    678   if (net::HttpUtil::HasHeader(*headers, header_name))
    679     return;
    680   *headers += std::string(header_name) + ": " + header_value + "\r\n";
    681 }
    682 
    683 bool HttpUtil::HasStrongValidators(HttpVersion version,
    684                                    const std::string& etag_header,
    685                                    const std::string& last_modified_header,
    686                                    const std::string& date_header) {
    687   if (version < HttpVersion(1, 1))
    688     return false;
    689 
    690   if (!etag_header.empty()) {
    691     size_t slash = etag_header.find('/');
    692     if (slash == std::string::npos || slash == 0)
    693       return true;
    694 
    695     std::string::const_iterator i = etag_header.begin();
    696     std::string::const_iterator j = etag_header.begin() + slash;
    697     TrimLWS(&i, &j);
    698     if (!LowerCaseEqualsASCII(i, j, "w"))
    699       return true;
    700   }
    701 
    702   base::Time last_modified;
    703   if (!base::Time::FromString(last_modified_header.c_str(), &last_modified))
    704     return false;
    705 
    706   base::Time date;
    707   if (!base::Time::FromString(date_header.c_str(), &date))
    708     return false;
    709 
    710   return ((date - last_modified).InSeconds() >= 60);
    711 }
    712 
    713 // Functions for histogram initialization.  The code 0 is put in the map to
    714 // track status codes that are invalid.
    715 // TODO(gavinp): Greatly prune the collected codes once we learn which
    716 // ones are not sent in practice, to reduce upload size & memory use.
    717 
    718 enum {
    719   HISTOGRAM_MIN_HTTP_STATUS_CODE = 100,
    720   HISTOGRAM_MAX_HTTP_STATUS_CODE = 599,
    721 };
    722 
    723 // static
    724 std::vector<int> HttpUtil::GetStatusCodesForHistogram() {
    725   std::vector<int> codes;
    726   codes.reserve(
    727       HISTOGRAM_MAX_HTTP_STATUS_CODE - HISTOGRAM_MIN_HTTP_STATUS_CODE + 2);
    728   codes.push_back(0);
    729   for (int i = HISTOGRAM_MIN_HTTP_STATUS_CODE;
    730        i <= HISTOGRAM_MAX_HTTP_STATUS_CODE; ++i)
    731     codes.push_back(i);
    732   return codes;
    733 }
    734 
    735 // static
    736 int HttpUtil::MapStatusCodeForHistogram(int code) {
    737   if (HISTOGRAM_MIN_HTTP_STATUS_CODE <= code &&
    738       code <= HISTOGRAM_MAX_HTTP_STATUS_CODE)
    739     return code;
    740   return 0;
    741 }
    742 
    743 // BNF from section 4.2 of RFC 2616:
    744 //
    745 //   message-header = field-name ":" [ field-value ]
    746 //   field-name     = token
    747 //   field-value    = *( field-content | LWS )
    748 //   field-content  = <the OCTETs making up the field-value
    749 //                     and consisting of either *TEXT or combinations
    750 //                     of token, separators, and quoted-string>
    751 //
    752 
    753 HttpUtil::HeadersIterator::HeadersIterator(
    754     std::string::const_iterator headers_begin,
    755     std::string::const_iterator headers_end,
    756     const std::string& line_delimiter)
    757     : lines_(headers_begin, headers_end, line_delimiter) {
    758 }
    759 
    760 HttpUtil::HeadersIterator::~HeadersIterator() {
    761 }
    762 
    763 bool HttpUtil::HeadersIterator::GetNext() {
    764   while (lines_.GetNext()) {
    765     name_begin_ = lines_.token_begin();
    766     values_end_ = lines_.token_end();
    767 
    768     std::string::const_iterator colon(std::find(name_begin_, values_end_, ':'));
    769     if (colon == values_end_)
    770       continue;  // skip malformed header
    771 
    772     name_end_ = colon;
    773 
    774     // If the name starts with LWS, it is an invalid line.
    775     // Leading LWS implies a line continuation, and these should have
    776     // already been joined by AssembleRawHeaders().
    777     if (name_begin_ == name_end_ || IsLWS(*name_begin_))
    778       continue;
    779 
    780     TrimLWS(&name_begin_, &name_end_);
    781     if (name_begin_ == name_end_)
    782       continue;  // skip malformed header
    783 
    784     values_begin_ = colon + 1;
    785     TrimLWS(&values_begin_, &values_end_);
    786 
    787     // if we got a header name, then we are done.
    788     return true;
    789   }
    790   return false;
    791 }
    792 
    793 bool HttpUtil::HeadersIterator::AdvanceTo(const char* name) {
    794   DCHECK(name != NULL);
    795   DCHECK_EQ(0, StringToLowerASCII<std::string>(name).compare(name))
    796       << "the header name must be in all lower case";
    797 
    798   while (GetNext()) {
    799     if (LowerCaseEqualsASCII(name_begin_, name_end_, name)) {
    800       return true;
    801     }
    802   }
    803 
    804   return false;
    805 }
    806 
    807 HttpUtil::ValuesIterator::ValuesIterator(
    808     std::string::const_iterator values_begin,
    809     std::string::const_iterator values_end,
    810     char delimiter)
    811     : values_(values_begin, values_end, std::string(1, delimiter)) {
    812   values_.set_quote_chars("\'\"");
    813 }
    814 
    815 HttpUtil::ValuesIterator::~ValuesIterator() {
    816 }
    817 
    818 bool HttpUtil::ValuesIterator::GetNext() {
    819   while (values_.GetNext()) {
    820     value_begin_ = values_.token_begin();
    821     value_end_ = values_.token_end();
    822     TrimLWS(&value_begin_, &value_end_);
    823 
    824     // bypass empty values.
    825     if (value_begin_ != value_end_)
    826       return true;
    827   }
    828   return false;
    829 }
    830 
    831 HttpUtil::NameValuePairsIterator::NameValuePairsIterator(
    832     std::string::const_iterator begin,
    833     std::string::const_iterator end,
    834     char delimiter)
    835     : props_(begin, end, delimiter),
    836       valid_(true),
    837       name_begin_(end),
    838       name_end_(end),
    839       value_begin_(end),
    840       value_end_(end),
    841       value_is_quoted_(false) {
    842 }
    843 
    844 HttpUtil::NameValuePairsIterator::~NameValuePairsIterator() {}
    845 
    846 // We expect properties to be formatted as one of:
    847 //   name="value"
    848 //   name='value'
    849 //   name='\'value\''
    850 //   name=value
    851 //   name = value
    852 //   name=
    853 // Due to buggy implementations found in some embedded devices, we also
    854 // accept values with missing close quotemark (http://crbug.com/39836):
    855 //   name="value
    856 bool HttpUtil::NameValuePairsIterator::GetNext() {
    857   if (!props_.GetNext())
    858     return false;
    859 
    860   // Set the value as everything. Next we will split out the name.
    861   value_begin_ = props_.value_begin();
    862   value_end_ = props_.value_end();
    863   name_begin_ = name_end_ = value_end_;
    864 
    865   // Scan for the equals sign.
    866   std::string::const_iterator equals = std::find(value_begin_, value_end_, '=');
    867   if (equals == value_end_ || equals == value_begin_)
    868     return valid_ = false;  // Malformed, no equals sign
    869 
    870   // Verify that the equals sign we found wasn't inside of quote marks.
    871   for (std::string::const_iterator it = value_begin_; it != equals; ++it) {
    872     if (HttpUtil::IsQuote(*it))
    873       return valid_ = false;  // Malformed, quote appears before equals sign
    874   }
    875 
    876   name_begin_ = value_begin_;
    877   name_end_ = equals;
    878   value_begin_ = equals + 1;
    879 
    880   TrimLWS(&name_begin_, &name_end_);
    881   TrimLWS(&value_begin_, &value_end_);
    882   value_is_quoted_ = false;
    883   unquoted_value_.clear();
    884 
    885   if (value_begin_ == value_end_)
    886     return valid_ = false;  // Malformed, value is empty
    887 
    888   if (HttpUtil::IsQuote(*value_begin_)) {
    889     // Trim surrounding quotemarks off the value
    890     if (*value_begin_ != *(value_end_ - 1) || value_begin_ + 1 == value_end_) {
    891       // NOTE: This is not as graceful as it sounds:
    892       // * quoted-pairs will no longer be unquoted
    893       //   (["\"hello] should give ["hello]).
    894       // * Does not detect when the final quote is escaped
    895       //   (["value\"] should give [value"])
    896       ++value_begin_;  // Gracefully recover from mismatching quotes.
    897     } else {
    898       value_is_quoted_ = true;
    899       // Do not store iterators into this. See declaration of unquoted_value_.
    900       unquoted_value_ = HttpUtil::Unquote(value_begin_, value_end_);
    901     }
    902   }
    903 
    904   return true;
    905 }
    906 
    907 }  // namespace net
    908