1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // The rules for parsing content-types were borrowed from Firefox: 6 // http://lxr.mozilla.org/mozilla/source/netwerk/base/src/nsURLHelper.cpp#834 7 8 #include "net/http/http_util.h" 9 10 #include <algorithm> 11 12 #include "base/basictypes.h" 13 #include "base/logging.h" 14 #include "base/strings/string_number_conversions.h" 15 #include "base/strings/string_piece.h" 16 #include "base/strings/string_tokenizer.h" 17 #include "base/strings/string_util.h" 18 #include "base/strings/stringprintf.h" 19 #include "base/time/time.h" 20 21 using std::string; 22 23 namespace net { 24 25 //----------------------------------------------------------------------------- 26 27 // Return the index of the closing quote of the string, if any. 28 static size_t FindStringEnd(const string& line, size_t start, char delim) { 29 DCHECK(start < line.length() && line[start] == delim && 30 (delim == '"' || delim == '\'')); 31 32 const char set[] = { delim, '\\', '\0' }; 33 for (;;) { 34 // start points to either the start quote or the last 35 // escaped char (the char following a '\\') 36 37 size_t end = line.find_first_of(set, start + 1); 38 if (end == string::npos) 39 return line.length(); 40 41 if (line[end] == '\\') { 42 // Hit a backslash-escaped char. Need to skip over it. 43 start = end + 1; 44 if (start == line.length()) 45 return start; 46 47 // Go back to looking for the next escape or the string end 48 continue; 49 } 50 51 return end; 52 } 53 54 NOTREACHED(); 55 return line.length(); 56 } 57 58 //----------------------------------------------------------------------------- 59 60 // static 61 size_t HttpUtil::FindDelimiter(const string& line, size_t search_start, 62 char delimiter) { 63 do { 64 // search_start points to the spot from which we should start looking 65 // for the delimiter. 66 const char delim_str[] = { delimiter, '"', '\'', '\0' }; 67 size_t cur_delim_pos = line.find_first_of(delim_str, search_start); 68 if (cur_delim_pos == string::npos) 69 return line.length(); 70 71 char ch = line[cur_delim_pos]; 72 if (ch == delimiter) { 73 // Found delimiter 74 return cur_delim_pos; 75 } 76 77 // We hit the start of a quoted string. Look for its end. 78 search_start = FindStringEnd(line, cur_delim_pos, ch); 79 if (search_start == line.length()) 80 return search_start; 81 82 ++search_start; 83 84 // search_start now points to the first char after the end of the 85 // string, so just go back to the top of the loop and look for 86 // |delimiter| again. 87 } while (true); 88 89 NOTREACHED(); 90 return line.length(); 91 } 92 93 // static 94 void HttpUtil::ParseContentType(const string& content_type_str, 95 string* mime_type, 96 string* charset, 97 bool* had_charset, 98 string* boundary) { 99 const string::const_iterator begin = content_type_str.begin(); 100 101 // Trim leading and trailing whitespace from type. We include '(' in 102 // the trailing trim set to catch media-type comments, which are not at all 103 // standard, but may occur in rare cases. 104 size_t type_val = content_type_str.find_first_not_of(HTTP_LWS); 105 type_val = std::min(type_val, content_type_str.length()); 106 size_t type_end = content_type_str.find_first_of(HTTP_LWS ";(", type_val); 107 if (string::npos == type_end) 108 type_end = content_type_str.length(); 109 110 size_t charset_val = 0; 111 size_t charset_end = 0; 112 bool type_has_charset = false; 113 114 // Iterate over parameters 115 size_t param_start = content_type_str.find_first_of(';', type_end); 116 if (param_start != string::npos) { 117 base::StringTokenizer tokenizer(begin + param_start, content_type_str.end(), 118 ";"); 119 tokenizer.set_quote_chars("\""); 120 while (tokenizer.GetNext()) { 121 string::const_iterator equals_sign = 122 std::find(tokenizer.token_begin(), tokenizer.token_end(), '='); 123 if (equals_sign == tokenizer.token_end()) 124 continue; 125 126 string::const_iterator param_name_begin = tokenizer.token_begin(); 127 string::const_iterator param_name_end = equals_sign; 128 TrimLWS(¶m_name_begin, ¶m_name_end); 129 130 string::const_iterator param_value_begin = equals_sign + 1; 131 string::const_iterator param_value_end = tokenizer.token_end(); 132 DCHECK(param_value_begin <= tokenizer.token_end()); 133 TrimLWS(¶m_value_begin, ¶m_value_end); 134 135 if (LowerCaseEqualsASCII(param_name_begin, param_name_end, "charset")) { 136 // TODO(abarth): Refactor this function to consistently use iterators. 137 charset_val = param_value_begin - begin; 138 charset_end = param_value_end - begin; 139 type_has_charset = true; 140 } else if (LowerCaseEqualsASCII(param_name_begin, param_name_end, 141 "boundary")) { 142 if (boundary) 143 boundary->assign(param_value_begin, param_value_end); 144 } 145 } 146 } 147 148 if (type_has_charset) { 149 // Trim leading and trailing whitespace from charset_val. We include 150 // '(' in the trailing trim set to catch media-type comments, which are 151 // not at all standard, but may occur in rare cases. 152 charset_val = content_type_str.find_first_not_of(HTTP_LWS, charset_val); 153 charset_val = std::min(charset_val, charset_end); 154 char first_char = content_type_str[charset_val]; 155 if (first_char == '"' || first_char == '\'') { 156 charset_end = FindStringEnd(content_type_str, charset_val, first_char); 157 ++charset_val; 158 DCHECK(charset_end >= charset_val); 159 } else { 160 charset_end = std::min(content_type_str.find_first_of(HTTP_LWS ";(", 161 charset_val), 162 charset_end); 163 } 164 } 165 166 // if the server sent "*/*", it is meaningless, so do not store it. 167 // also, if type_val is the same as mime_type, then just update the 168 // charset. however, if charset is empty and mime_type hasn't 169 // changed, then don't wipe-out an existing charset. We 170 // also want to reject a mime-type if it does not include a slash. 171 // some servers give junk after the charset parameter, which may 172 // include a comma, so this check makes us a bit more tolerant. 173 if (content_type_str.length() != 0 && 174 content_type_str != "*/*" && 175 content_type_str.find_first_of('/') != string::npos) { 176 // Common case here is that mime_type is empty 177 bool eq = !mime_type->empty() && LowerCaseEqualsASCII(begin + type_val, 178 begin + type_end, 179 mime_type->data()); 180 if (!eq) { 181 mime_type->assign(begin + type_val, begin + type_end); 182 StringToLowerASCII(mime_type); 183 } 184 if ((!eq && *had_charset) || type_has_charset) { 185 *had_charset = true; 186 charset->assign(begin + charset_val, begin + charset_end); 187 StringToLowerASCII(charset); 188 } 189 } 190 } 191 192 // static 193 // Parse the Range header according to RFC 2616 14.35.1 194 // ranges-specifier = byte-ranges-specifier 195 // byte-ranges-specifier = bytes-unit "=" byte-range-set 196 // byte-range-set = 1#( byte-range-spec | suffix-byte-range-spec ) 197 // byte-range-spec = first-byte-pos "-" [last-byte-pos] 198 // first-byte-pos = 1*DIGIT 199 // last-byte-pos = 1*DIGIT 200 bool HttpUtil::ParseRanges(const std::string& headers, 201 std::vector<HttpByteRange>* ranges) { 202 std::string ranges_specifier; 203 HttpUtil::HeadersIterator it(headers.begin(), headers.end(), "\r\n"); 204 205 while (it.GetNext()) { 206 // Look for "Range" header. 207 if (!LowerCaseEqualsASCII(it.name(), "range")) 208 continue; 209 ranges_specifier = it.values(); 210 // We just care about the first "Range" header, so break here. 211 break; 212 } 213 214 if (ranges_specifier.empty()) 215 return false; 216 217 return ParseRangeHeader(ranges_specifier, ranges); 218 } 219 220 // static 221 bool HttpUtil::ParseRangeHeader(const std::string& ranges_specifier, 222 std::vector<HttpByteRange>* ranges) { 223 size_t equal_char_offset = ranges_specifier.find('='); 224 if (equal_char_offset == std::string::npos) 225 return false; 226 227 // Try to extract bytes-unit part. 228 std::string::const_iterator bytes_unit_begin = ranges_specifier.begin(); 229 std::string::const_iterator bytes_unit_end = bytes_unit_begin + 230 equal_char_offset; 231 std::string::const_iterator byte_range_set_begin = bytes_unit_end + 1; 232 std::string::const_iterator byte_range_set_end = ranges_specifier.end(); 233 234 TrimLWS(&bytes_unit_begin, &bytes_unit_end); 235 // "bytes" unit identifier is not found. 236 if (!LowerCaseEqualsASCII(bytes_unit_begin, bytes_unit_end, "bytes")) 237 return false; 238 239 ValuesIterator byte_range_set_iterator(byte_range_set_begin, 240 byte_range_set_end, ','); 241 while (byte_range_set_iterator.GetNext()) { 242 size_t minus_char_offset = byte_range_set_iterator.value().find('-'); 243 // If '-' character is not found, reports failure. 244 if (minus_char_offset == std::string::npos) 245 return false; 246 247 std::string::const_iterator first_byte_pos_begin = 248 byte_range_set_iterator.value_begin(); 249 std::string::const_iterator first_byte_pos_end = 250 first_byte_pos_begin + minus_char_offset; 251 TrimLWS(&first_byte_pos_begin, &first_byte_pos_end); 252 std::string first_byte_pos(first_byte_pos_begin, first_byte_pos_end); 253 254 HttpByteRange range; 255 // Try to obtain first-byte-pos. 256 if (!first_byte_pos.empty()) { 257 int64 first_byte_position = -1; 258 if (!base::StringToInt64(first_byte_pos, &first_byte_position)) 259 return false; 260 range.set_first_byte_position(first_byte_position); 261 } 262 263 std::string::const_iterator last_byte_pos_begin = 264 byte_range_set_iterator.value_begin() + minus_char_offset + 1; 265 std::string::const_iterator last_byte_pos_end = 266 byte_range_set_iterator.value_end(); 267 TrimLWS(&last_byte_pos_begin, &last_byte_pos_end); 268 std::string last_byte_pos(last_byte_pos_begin, last_byte_pos_end); 269 270 // We have last-byte-pos or suffix-byte-range-spec in this case. 271 if (!last_byte_pos.empty()) { 272 int64 last_byte_position; 273 if (!base::StringToInt64(last_byte_pos, &last_byte_position)) 274 return false; 275 if (range.HasFirstBytePosition()) 276 range.set_last_byte_position(last_byte_position); 277 else 278 range.set_suffix_length(last_byte_position); 279 } else if (!range.HasFirstBytePosition()) { 280 return false; 281 } 282 283 // Do a final check on the HttpByteRange object. 284 if (!range.IsValid()) 285 return false; 286 ranges->push_back(range); 287 } 288 return !ranges->empty(); 289 } 290 291 // static 292 bool HttpUtil::HasHeader(const std::string& headers, const char* name) { 293 size_t name_len = strlen(name); 294 string::const_iterator it = 295 std::search(headers.begin(), 296 headers.end(), 297 name, 298 name + name_len, 299 base::CaseInsensitiveCompareASCII<char>()); 300 if (it == headers.end()) 301 return false; 302 303 // ensure match is prefixed by newline 304 if (it != headers.begin() && it[-1] != '\n') 305 return false; 306 307 // ensure match is suffixed by colon 308 if (it + name_len >= headers.end() || it[name_len] != ':') 309 return false; 310 311 return true; 312 } 313 314 namespace { 315 // A header string containing any of the following fields will cause 316 // an error. The list comes from the XMLHttpRequest standard. 317 // http://www.w3.org/TR/XMLHttpRequest/#the-setrequestheader-method 318 const char* const kForbiddenHeaderFields[] = { 319 "accept-charset", 320 "accept-encoding", 321 "access-control-request-headers", 322 "access-control-request-method", 323 "connection", 324 "content-length", 325 "cookie", 326 "cookie2", 327 "content-transfer-encoding", 328 "date", 329 "expect", 330 "host", 331 "keep-alive", 332 "origin", 333 "referer", 334 "te", 335 "trailer", 336 "transfer-encoding", 337 "upgrade", 338 "user-agent", 339 "via", 340 }; 341 } // anonymous namespace 342 343 // static 344 bool HttpUtil::IsSafeHeader(const std::string& name) { 345 std::string lower_name(StringToLowerASCII(name)); 346 if (StartsWithASCII(lower_name, "proxy-", true) || 347 StartsWithASCII(lower_name, "sec-", true)) 348 return false; 349 for (size_t i = 0; i < arraysize(kForbiddenHeaderFields); ++i) { 350 if (lower_name == kForbiddenHeaderFields[i]) 351 return false; 352 } 353 return true; 354 } 355 356 // static 357 std::string HttpUtil::StripHeaders(const std::string& headers, 358 const char* const headers_to_remove[], 359 size_t headers_to_remove_len) { 360 std::string stripped_headers; 361 net::HttpUtil::HeadersIterator it(headers.begin(), headers.end(), "\r\n"); 362 363 while (it.GetNext()) { 364 bool should_remove = false; 365 for (size_t i = 0; i < headers_to_remove_len; ++i) { 366 if (LowerCaseEqualsASCII(it.name_begin(), it.name_end(), 367 headers_to_remove[i])) { 368 should_remove = true; 369 break; 370 } 371 } 372 if (!should_remove) { 373 // Assume that name and values are on the same line. 374 stripped_headers.append(it.name_begin(), it.values_end()); 375 stripped_headers.append("\r\n"); 376 } 377 } 378 return stripped_headers; 379 } 380 381 // static 382 bool HttpUtil::IsNonCoalescingHeader(string::const_iterator name_begin, 383 string::const_iterator name_end) { 384 // NOTE: "set-cookie2" headers do not support expires attributes, so we don't 385 // have to list them here. 386 const char* kNonCoalescingHeaders[] = { 387 "date", 388 "expires", 389 "last-modified", 390 "location", // See bug 1050541 for details 391 "retry-after", 392 "set-cookie", 393 // The format of auth-challenges mixes both space separated tokens and 394 // comma separated properties, so coalescing on comma won't work. 395 "www-authenticate", 396 "proxy-authenticate", 397 // STS specifies that UAs must not process any STS headers after the first 398 // one. 399 "strict-transport-security" 400 }; 401 for (size_t i = 0; i < arraysize(kNonCoalescingHeaders); ++i) { 402 if (LowerCaseEqualsASCII(name_begin, name_end, kNonCoalescingHeaders[i])) 403 return true; 404 } 405 return false; 406 } 407 408 bool HttpUtil::IsLWS(char c) { 409 return strchr(HTTP_LWS, c) != NULL; 410 } 411 412 void HttpUtil::TrimLWS(string::const_iterator* begin, 413 string::const_iterator* end) { 414 // leading whitespace 415 while (*begin < *end && IsLWS((*begin)[0])) 416 ++(*begin); 417 418 // trailing whitespace 419 while (*begin < *end && IsLWS((*end)[-1])) 420 --(*end); 421 } 422 423 bool HttpUtil::IsQuote(char c) { 424 // Single quote mark isn't actually part of quoted-text production, 425 // but apparently some servers rely on this. 426 return c == '"' || c == '\''; 427 } 428 429 // See RFC 2616 Sec 2.2 for the definition of |token|. 430 bool HttpUtil::IsToken(string::const_iterator begin, 431 string::const_iterator end) { 432 if (begin == end) 433 return false; 434 for (std::string::const_iterator iter = begin; iter != end; ++iter) { 435 unsigned char c = *iter; 436 if (c >= 0x80 || c <= 0x1F || c == 0x7F || 437 c == '(' || c == ')' || c == '<' || c == '>' || c == '@' || 438 c == ',' || c == ';' || c == ':' || c == '\\' || c == '"' || 439 c == '/' || c == '[' || c == ']' || c == '?' || c == '=' || 440 c == '{' || c == '}' || c == ' ' || c == '\t') 441 return false; 442 } 443 return true; 444 } 445 446 std::string HttpUtil::Unquote(std::string::const_iterator begin, 447 std::string::const_iterator end) { 448 // Empty string 449 if (begin == end) 450 return std::string(); 451 452 // Nothing to unquote. 453 if (!IsQuote(*begin)) 454 return std::string(begin, end); 455 456 // No terminal quote mark. 457 if (end - begin < 2 || *begin != *(end - 1)) 458 return std::string(begin, end); 459 460 // Strip quotemarks 461 ++begin; 462 --end; 463 464 // Unescape quoted-pair (defined in RFC 2616 section 2.2) 465 std::string unescaped; 466 bool prev_escape = false; 467 for (; begin != end; ++begin) { 468 char c = *begin; 469 if (c == '\\' && !prev_escape) { 470 prev_escape = true; 471 continue; 472 } 473 prev_escape = false; 474 unescaped.push_back(c); 475 } 476 return unescaped; 477 } 478 479 // static 480 std::string HttpUtil::Unquote(const std::string& str) { 481 return Unquote(str.begin(), str.end()); 482 } 483 484 // static 485 std::string HttpUtil::Quote(const std::string& str) { 486 std::string escaped; 487 escaped.reserve(2 + str.size()); 488 489 std::string::const_iterator begin = str.begin(); 490 std::string::const_iterator end = str.end(); 491 492 // Esape any backslashes or quotemarks within the string, and 493 // then surround with quotes. 494 escaped.push_back('"'); 495 for (; begin != end; ++begin) { 496 char c = *begin; 497 if (c == '"' || c == '\\') 498 escaped.push_back('\\'); 499 escaped.push_back(c); 500 } 501 escaped.push_back('"'); 502 return escaped; 503 } 504 505 // Find the "http" substring in a status line. This allows for 506 // some slop at the start. If the "http" string could not be found 507 // then returns -1. 508 // static 509 int HttpUtil::LocateStartOfStatusLine(const char* buf, int buf_len) { 510 const int slop = 4; 511 const int http_len = 4; 512 513 if (buf_len >= http_len) { 514 int i_max = std::min(buf_len - http_len, slop); 515 for (int i = 0; i <= i_max; ++i) { 516 if (LowerCaseEqualsASCII(buf + i, buf + i + http_len, "http")) 517 return i; 518 } 519 } 520 return -1; // Not found 521 } 522 523 int HttpUtil::LocateEndOfHeaders(const char* buf, int buf_len, int i) { 524 bool was_lf = false; 525 char last_c = '\0'; 526 for (; i < buf_len; ++i) { 527 char c = buf[i]; 528 if (c == '\n') { 529 if (was_lf) 530 return i + 1; 531 was_lf = true; 532 } else if (c != '\r' || last_c != '\n') { 533 was_lf = false; 534 } 535 last_c = c; 536 } 537 return -1; 538 } 539 540 // In order for a line to be continuable, it must specify a 541 // non-blank header-name. Line continuations are specifically for 542 // header values -- do not allow headers names to span lines. 543 static bool IsLineSegmentContinuable(const char* begin, const char* end) { 544 if (begin == end) 545 return false; 546 547 const char* colon = std::find(begin, end, ':'); 548 if (colon == end) 549 return false; 550 551 const char* name_begin = begin; 552 const char* name_end = colon; 553 554 // Name can't be empty. 555 if (name_begin == name_end) 556 return false; 557 558 // Can't start with LWS (this would imply the segment is a continuation) 559 if (HttpUtil::IsLWS(*name_begin)) 560 return false; 561 562 return true; 563 } 564 565 // Helper used by AssembleRawHeaders, to find the end of the status line. 566 static const char* FindStatusLineEnd(const char* begin, const char* end) { 567 size_t i = base::StringPiece(begin, end - begin).find_first_of("\r\n"); 568 if (i == base::StringPiece::npos) 569 return end; 570 return begin + i; 571 } 572 573 // Helper used by AssembleRawHeaders, to skip past leading LWS. 574 static const char* FindFirstNonLWS(const char* begin, const char* end) { 575 for (const char* cur = begin; cur != end; ++cur) { 576 if (!HttpUtil::IsLWS(*cur)) 577 return cur; 578 } 579 return end; // Not found. 580 } 581 582 std::string HttpUtil::AssembleRawHeaders(const char* input_begin, 583 int input_len) { 584 std::string raw_headers; 585 raw_headers.reserve(input_len); 586 587 const char* input_end = input_begin + input_len; 588 589 // Skip any leading slop, since the consumers of this output 590 // (HttpResponseHeaders) don't deal with it. 591 int status_begin_offset = LocateStartOfStatusLine(input_begin, input_len); 592 if (status_begin_offset != -1) 593 input_begin += status_begin_offset; 594 595 // Copy the status line. 596 const char* status_line_end = FindStatusLineEnd(input_begin, input_end); 597 raw_headers.append(input_begin, status_line_end); 598 599 // After the status line, every subsequent line is a header line segment. 600 // Should a segment start with LWS, it is a continuation of the previous 601 // line's field-value. 602 603 // TODO(ericroman): is this too permissive? (delimits on [\r\n]+) 604 base::CStringTokenizer lines(status_line_end, input_end, "\r\n"); 605 606 // This variable is true when the previous line was continuable. 607 bool prev_line_continuable = false; 608 609 while (lines.GetNext()) { 610 const char* line_begin = lines.token_begin(); 611 const char* line_end = lines.token_end(); 612 613 if (prev_line_continuable && IsLWS(*line_begin)) { 614 // Join continuation; reduce the leading LWS to a single SP. 615 raw_headers.push_back(' '); 616 raw_headers.append(FindFirstNonLWS(line_begin, line_end), line_end); 617 } else { 618 // Terminate the previous line. 619 raw_headers.push_back('\n'); 620 621 // Copy the raw data to output. 622 raw_headers.append(line_begin, line_end); 623 624 // Check if the current line can be continued. 625 prev_line_continuable = IsLineSegmentContinuable(line_begin, line_end); 626 } 627 } 628 629 raw_headers.append("\n\n", 2); 630 631 // Use '\0' as the canonical line terminator. If the input already contained 632 // any embeded '\0' characters we will strip them first to avoid interpreting 633 // them as line breaks. 634 raw_headers.erase(std::remove(raw_headers.begin(), raw_headers.end(), '\0'), 635 raw_headers.end()); 636 std::replace(raw_headers.begin(), raw_headers.end(), '\n', '\0'); 637 638 return raw_headers; 639 } 640 641 std::string HttpUtil::ConvertHeadersBackToHTTPResponse(const std::string& str) { 642 std::string disassembled_headers; 643 base::StringTokenizer tokenizer(str, std::string(1, '\0')); 644 while (tokenizer.GetNext()) { 645 disassembled_headers.append(tokenizer.token_begin(), tokenizer.token_end()); 646 disassembled_headers.append("\r\n"); 647 } 648 disassembled_headers.append("\r\n"); 649 650 return disassembled_headers; 651 } 652 653 // TODO(jungshik): 1. If the list is 'fr-CA,fr-FR,en,de', we have to add 654 // 'fr' after 'fr-CA' with the same q-value as 'fr-CA' because 655 // web servers, in general, do not fall back to 'fr' and may end up picking 656 // 'en' which has a lower preference than 'fr-CA' and 'fr-FR'. 657 // 2. This function assumes that the input is a comma separated list 658 // without any whitespace. As long as it comes from the preference and 659 // a user does not manually edit the preference file, it's the case. Still, 660 // we may have to make it more robust. 661 std::string HttpUtil::GenerateAcceptLanguageHeader( 662 const std::string& raw_language_list) { 663 // We use integers for qvalue and qvalue decrement that are 10 times 664 // larger than actual values to avoid a problem with comparing 665 // two floating point numbers. 666 const unsigned int kQvalueDecrement10 = 2; 667 unsigned int qvalue10 = 10; 668 base::StringTokenizer t(raw_language_list, ","); 669 std::string lang_list_with_q; 670 while (t.GetNext()) { 671 std::string language = t.token(); 672 if (qvalue10 == 10) { 673 // q=1.0 is implicit. 674 lang_list_with_q = language; 675 } else { 676 DCHECK_LT(qvalue10, 10U); 677 base::StringAppendF(&lang_list_with_q, ",%s;q=0.%d", language.c_str(), 678 qvalue10); 679 } 680 // It does not make sense to have 'q=0'. 681 if (qvalue10 > kQvalueDecrement10) 682 qvalue10 -= kQvalueDecrement10; 683 } 684 return lang_list_with_q; 685 } 686 687 void HttpUtil::AppendHeaderIfMissing(const char* header_name, 688 const std::string& header_value, 689 std::string* headers) { 690 if (header_value.empty()) 691 return; 692 if (net::HttpUtil::HasHeader(*headers, header_name)) 693 return; 694 *headers += std::string(header_name) + ": " + header_value + "\r\n"; 695 } 696 697 bool HttpUtil::HasStrongValidators(HttpVersion version, 698 const std::string& etag_header, 699 const std::string& last_modified_header, 700 const std::string& date_header) { 701 if (version < HttpVersion(1, 1)) 702 return false; 703 704 if (!etag_header.empty()) { 705 size_t slash = etag_header.find('/'); 706 if (slash == std::string::npos || slash == 0) 707 return true; 708 709 std::string::const_iterator i = etag_header.begin(); 710 std::string::const_iterator j = etag_header.begin() + slash; 711 TrimLWS(&i, &j); 712 if (!LowerCaseEqualsASCII(i, j, "w")) 713 return true; 714 } 715 716 base::Time last_modified; 717 if (!base::Time::FromString(last_modified_header.c_str(), &last_modified)) 718 return false; 719 720 base::Time date; 721 if (!base::Time::FromString(date_header.c_str(), &date)) 722 return false; 723 724 return ((date - last_modified).InSeconds() >= 60); 725 } 726 727 // Functions for histogram initialization. The code 0 is put in the map to 728 // track status codes that are invalid. 729 // TODO(gavinp): Greatly prune the collected codes once we learn which 730 // ones are not sent in practice, to reduce upload size & memory use. 731 732 enum { 733 HISTOGRAM_MIN_HTTP_STATUS_CODE = 100, 734 HISTOGRAM_MAX_HTTP_STATUS_CODE = 599, 735 }; 736 737 // static 738 std::vector<int> HttpUtil::GetStatusCodesForHistogram() { 739 std::vector<int> codes; 740 codes.reserve( 741 HISTOGRAM_MAX_HTTP_STATUS_CODE - HISTOGRAM_MIN_HTTP_STATUS_CODE + 2); 742 codes.push_back(0); 743 for (int i = HISTOGRAM_MIN_HTTP_STATUS_CODE; 744 i <= HISTOGRAM_MAX_HTTP_STATUS_CODE; ++i) 745 codes.push_back(i); 746 return codes; 747 } 748 749 // static 750 int HttpUtil::MapStatusCodeForHistogram(int code) { 751 if (HISTOGRAM_MIN_HTTP_STATUS_CODE <= code && 752 code <= HISTOGRAM_MAX_HTTP_STATUS_CODE) 753 return code; 754 return 0; 755 } 756 757 // BNF from section 4.2 of RFC 2616: 758 // 759 // message-header = field-name ":" [ field-value ] 760 // field-name = token 761 // field-value = *( field-content | LWS ) 762 // field-content = <the OCTETs making up the field-value 763 // and consisting of either *TEXT or combinations 764 // of token, separators, and quoted-string> 765 // 766 767 HttpUtil::HeadersIterator::HeadersIterator(string::const_iterator headers_begin, 768 string::const_iterator headers_end, 769 const std::string& line_delimiter) 770 : lines_(headers_begin, headers_end, line_delimiter) { 771 } 772 773 HttpUtil::HeadersIterator::~HeadersIterator() { 774 } 775 776 bool HttpUtil::HeadersIterator::GetNext() { 777 while (lines_.GetNext()) { 778 name_begin_ = lines_.token_begin(); 779 values_end_ = lines_.token_end(); 780 781 string::const_iterator colon = std::find(name_begin_, values_end_, ':'); 782 if (colon == values_end_) 783 continue; // skip malformed header 784 785 name_end_ = colon; 786 787 // If the name starts with LWS, it is an invalid line. 788 // Leading LWS implies a line continuation, and these should have 789 // already been joined by AssembleRawHeaders(). 790 if (name_begin_ == name_end_ || IsLWS(*name_begin_)) 791 continue; 792 793 TrimLWS(&name_begin_, &name_end_); 794 if (name_begin_ == name_end_) 795 continue; // skip malformed header 796 797 values_begin_ = colon + 1; 798 TrimLWS(&values_begin_, &values_end_); 799 800 // if we got a header name, then we are done. 801 return true; 802 } 803 return false; 804 } 805 806 bool HttpUtil::HeadersIterator::AdvanceTo(const char* name) { 807 DCHECK(name != NULL); 808 DCHECK_EQ(0, StringToLowerASCII<std::string>(name).compare(name)) 809 << "the header name must be in all lower case"; 810 811 while (GetNext()) { 812 if (LowerCaseEqualsASCII(name_begin_, name_end_, name)) { 813 return true; 814 } 815 } 816 817 return false; 818 } 819 820 HttpUtil::ValuesIterator::ValuesIterator( 821 string::const_iterator values_begin, 822 string::const_iterator values_end, 823 char delimiter) 824 : values_(values_begin, values_end, string(1, delimiter)) { 825 values_.set_quote_chars("\'\""); 826 } 827 828 HttpUtil::ValuesIterator::~ValuesIterator() { 829 } 830 831 bool HttpUtil::ValuesIterator::GetNext() { 832 while (values_.GetNext()) { 833 value_begin_ = values_.token_begin(); 834 value_end_ = values_.token_end(); 835 TrimLWS(&value_begin_, &value_end_); 836 837 // bypass empty values. 838 if (value_begin_ != value_end_) 839 return true; 840 } 841 return false; 842 } 843 844 HttpUtil::NameValuePairsIterator::NameValuePairsIterator( 845 string::const_iterator begin, 846 string::const_iterator end, 847 char delimiter) 848 : props_(begin, end, delimiter), 849 valid_(true), 850 name_begin_(end), 851 name_end_(end), 852 value_begin_(end), 853 value_end_(end), 854 value_is_quoted_(false) { 855 } 856 857 HttpUtil::NameValuePairsIterator::~NameValuePairsIterator() {} 858 859 // We expect properties to be formatted as one of: 860 // name="value" 861 // name='value' 862 // name='\'value\'' 863 // name=value 864 // name = value 865 // name= 866 // Due to buggy implementations found in some embedded devices, we also 867 // accept values with missing close quotemark (http://crbug.com/39836): 868 // name="value 869 bool HttpUtil::NameValuePairsIterator::GetNext() { 870 if (!props_.GetNext()) 871 return false; 872 873 // Set the value as everything. Next we will split out the name. 874 value_begin_ = props_.value_begin(); 875 value_end_ = props_.value_end(); 876 name_begin_ = name_end_ = value_end_; 877 878 // Scan for the equals sign. 879 std::string::const_iterator equals = std::find(value_begin_, value_end_, '='); 880 if (equals == value_end_ || equals == value_begin_) 881 return valid_ = false; // Malformed, no equals sign 882 883 // Verify that the equals sign we found wasn't inside of quote marks. 884 for (std::string::const_iterator it = value_begin_; it != equals; ++it) { 885 if (HttpUtil::IsQuote(*it)) 886 return valid_ = false; // Malformed, quote appears before equals sign 887 } 888 889 name_begin_ = value_begin_; 890 name_end_ = equals; 891 value_begin_ = equals + 1; 892 893 TrimLWS(&name_begin_, &name_end_); 894 TrimLWS(&value_begin_, &value_end_); 895 value_is_quoted_ = false; 896 unquoted_value_.clear(); 897 898 if (value_begin_ == value_end_) 899 return valid_ = false; // Malformed, value is empty 900 901 if (HttpUtil::IsQuote(*value_begin_)) { 902 // Trim surrounding quotemarks off the value 903 if (*value_begin_ != *(value_end_ - 1) || value_begin_ + 1 == value_end_) { 904 // NOTE: This is not as graceful as it sounds: 905 // * quoted-pairs will no longer be unquoted 906 // (["\"hello] should give ["hello]). 907 // * Does not detect when the final quote is escaped 908 // (["value\"] should give [value"]) 909 ++value_begin_; // Gracefully recover from mismatching quotes. 910 } else { 911 value_is_quoted_ = true; 912 // Do not store iterators into this. See declaration of unquoted_value_. 913 unquoted_value_ = HttpUtil::Unquote(value_begin_, value_end_); 914 } 915 } 916 917 return true; 918 } 919 920 } // namespace net 921