1 // Copyright (c) 2006-2009 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // The rules for header parsing were borrowed from Firefox: 6 // http://lxr.mozilla.org/seamonkey/source/netwerk/protocol/http/src/nsHttpResponseHead.cpp 7 // The rules for parsing content-types were also borrowed from Firefox: 8 // http://lxr.mozilla.org/mozilla/source/netwerk/base/src/nsURLHelper.cpp#834 9 10 #include "net/http/http_response_headers.h" 11 12 #include <algorithm> 13 14 #include "base/logging.h" 15 #include "base/pickle.h" 16 #include "base/string_util.h" 17 #include "base/time.h" 18 #include "net/base/escape.h" 19 #include "net/http/http_util.h" 20 21 using base::Time; 22 using base::TimeDelta; 23 24 namespace net { 25 26 //----------------------------------------------------------------------------- 27 28 namespace { 29 30 // These headers are RFC 2616 hop-by-hop headers; 31 // not to be stored by caches. 32 const char* const kHopByHopResponseHeaders[] = { 33 "connection", 34 "proxy-connection", 35 "keep-alive", 36 "trailer", 37 "transfer-encoding", 38 "upgrade" 39 }; 40 41 // These headers are challenge response headers; 42 // not to be stored by caches. 43 const char* const kChallengeResponseHeaders[] = { 44 "www-authenticate", 45 "proxy-authenticate" 46 }; 47 48 // These headers are cookie setting headers; 49 // not to be stored by caches or disclosed otherwise. 50 const char* const kCookieResponseHeaders[] = { 51 "set-cookie", 52 "set-cookie2" 53 }; 54 55 // These response headers are not copied from a 304/206 response to the cached 56 // response headers. This list is based on Mozilla's nsHttpResponseHead.cpp. 57 const char* const kNonUpdatedHeaders[] = { 58 "connection", 59 "proxy-connection", 60 "keep-alive", 61 "www-authenticate", 62 "proxy-authenticate", 63 "trailer", 64 "transfer-encoding", 65 "upgrade", 66 // these should never change: 67 "content-location", 68 "content-md5", 69 "etag", 70 // assume cache-control: no-transform 71 "content-encoding", 72 "content-range", 73 "content-type", 74 // some broken microsoft servers send 'content-length: 0' with 304s 75 "content-length" 76 }; 77 78 bool ShouldUpdateHeader(const std::string::const_iterator& name_begin, 79 const std::string::const_iterator& name_end) { 80 for (size_t i = 0; i < arraysize(kNonUpdatedHeaders); ++i) { 81 if (LowerCaseEqualsASCII(name_begin, name_end, kNonUpdatedHeaders[i])) 82 return false; 83 } 84 return true; 85 } 86 87 } // namespace 88 89 //----------------------------------------------------------------------------- 90 91 HttpResponseHeaders::HttpResponseHeaders(const std::string& raw_input) 92 : response_code_(-1) { 93 Parse(raw_input); 94 } 95 96 HttpResponseHeaders::HttpResponseHeaders(const Pickle& pickle, void** iter) 97 : response_code_(-1) { 98 std::string raw_input; 99 if (pickle.ReadString(iter, &raw_input)) 100 Parse(raw_input); 101 } 102 103 void HttpResponseHeaders::Persist(Pickle* pickle, PersistOptions options) { 104 if (options == PERSIST_RAW) { 105 pickle->WriteString(raw_headers_); 106 return; // Done. 107 } 108 109 HeaderSet filter_headers; 110 111 // Construct set of headers to filter out based on options. 112 if ((options & PERSIST_SANS_NON_CACHEABLE) == PERSIST_SANS_NON_CACHEABLE) 113 AddNonCacheableHeaders(&filter_headers); 114 115 if ((options & PERSIST_SANS_COOKIES) == PERSIST_SANS_COOKIES) 116 AddCookieHeaders(&filter_headers); 117 118 if ((options & PERSIST_SANS_CHALLENGES) == PERSIST_SANS_CHALLENGES) 119 AddChallengeHeaders(&filter_headers); 120 121 if ((options & PERSIST_SANS_HOP_BY_HOP) == PERSIST_SANS_HOP_BY_HOP) 122 AddHopByHopHeaders(&filter_headers); 123 124 if ((options & PERSIST_SANS_RANGES) == PERSIST_SANS_RANGES) 125 AddHopContentRangeHeaders(&filter_headers); 126 127 std::string blob; 128 blob.reserve(raw_headers_.size()); 129 130 // This copies the status line w/ terminator null. 131 // Note raw_headers_ has embedded nulls instead of \n, 132 // so this just copies the first header line. 133 blob.assign(raw_headers_.c_str(), strlen(raw_headers_.c_str()) + 1); 134 135 for (size_t i = 0; i < parsed_.size(); ++i) { 136 DCHECK(!parsed_[i].is_continuation()); 137 138 // Locate the start of the next header. 139 size_t k = i; 140 while (++k < parsed_.size() && parsed_[k].is_continuation()); 141 --k; 142 143 std::string header_name(parsed_[i].name_begin, parsed_[i].name_end); 144 StringToLowerASCII(&header_name); 145 146 if (filter_headers.find(header_name) == filter_headers.end()) { 147 // Make sure there is a null after the value. 148 blob.append(parsed_[i].name_begin, parsed_[k].value_end); 149 blob.push_back('\0'); 150 } 151 152 i = k; 153 } 154 blob.push_back('\0'); 155 156 pickle->WriteString(blob); 157 } 158 159 void HttpResponseHeaders::Update(const HttpResponseHeaders& new_headers) { 160 DCHECK(new_headers.response_code() == 304 || 161 new_headers.response_code() == 206); 162 163 // Copy up to the null byte. This just copies the status line. 164 std::string new_raw_headers(raw_headers_.c_str()); 165 new_raw_headers.push_back('\0'); 166 167 HeaderSet updated_headers; 168 169 // NOTE: we write the new headers then the old headers for convenience. The 170 // order should not matter. 171 172 // Figure out which headers we want to take from new_headers: 173 for (size_t i = 0; i < new_headers.parsed_.size(); ++i) { 174 const HeaderList& new_parsed = new_headers.parsed_; 175 176 DCHECK(!new_parsed[i].is_continuation()); 177 178 // Locate the start of the next header. 179 size_t k = i; 180 while (++k < new_parsed.size() && new_parsed[k].is_continuation()); 181 --k; 182 183 const std::string::const_iterator& name_begin = new_parsed[i].name_begin; 184 const std::string::const_iterator& name_end = new_parsed[i].name_end; 185 if (ShouldUpdateHeader(name_begin, name_end)) { 186 std::string name(name_begin, name_end); 187 StringToLowerASCII(&name); 188 updated_headers.insert(name); 189 190 // Preserve this header line in the merged result, making sure there is 191 // a null after the value. 192 new_raw_headers.append(name_begin, new_parsed[k].value_end); 193 new_raw_headers.push_back('\0'); 194 } 195 196 i = k; 197 } 198 199 // Now, build the new raw headers. 200 MergeWithHeaders(new_raw_headers, updated_headers); 201 } 202 203 void HttpResponseHeaders::MergeWithHeaders(const std::string& raw_headers, 204 const HeaderSet& headers_to_remove) { 205 std::string new_raw_headers(raw_headers); 206 for (size_t i = 0; i < parsed_.size(); ++i) { 207 DCHECK(!parsed_[i].is_continuation()); 208 209 // Locate the start of the next header. 210 size_t k = i; 211 while (++k < parsed_.size() && parsed_[k].is_continuation()); 212 --k; 213 214 std::string name(parsed_[i].name_begin, parsed_[i].name_end); 215 StringToLowerASCII(&name); 216 if (headers_to_remove.find(name) == headers_to_remove.end()) { 217 // It's ok to preserve this header in the final result. 218 new_raw_headers.append(parsed_[i].name_begin, parsed_[k].value_end); 219 new_raw_headers.push_back('\0'); 220 } 221 222 i = k; 223 } 224 new_raw_headers.push_back('\0'); 225 226 // Make this object hold the new data. 227 raw_headers_.clear(); 228 parsed_.clear(); 229 Parse(new_raw_headers); 230 } 231 232 void HttpResponseHeaders::RemoveHeader(const std::string& name) { 233 // Copy up to the null byte. This just copies the status line. 234 std::string new_raw_headers(raw_headers_.c_str()); 235 new_raw_headers.push_back('\0'); 236 237 std::string lowercase_name(name); 238 StringToLowerASCII(&lowercase_name); 239 HeaderSet to_remove; 240 to_remove.insert(lowercase_name); 241 MergeWithHeaders(new_raw_headers, to_remove); 242 } 243 244 void HttpResponseHeaders::AddHeader(const std::string& header) { 245 DCHECK_EQ('\0', raw_headers_[raw_headers_.size() - 2]); 246 DCHECK_EQ('\0', raw_headers_[raw_headers_.size() - 1]); 247 // Don't copy the last null. 248 std::string new_raw_headers(raw_headers_, 0, raw_headers_.size() - 1); 249 new_raw_headers.append(header); 250 new_raw_headers.push_back('\0'); 251 new_raw_headers.push_back('\0'); 252 253 // Make this object hold the new data. 254 raw_headers_.clear(); 255 parsed_.clear(); 256 Parse(new_raw_headers); 257 } 258 259 void HttpResponseHeaders::ReplaceStatusLine(const std::string& new_status) { 260 // Copy up to the null byte. This just copies the status line. 261 std::string new_raw_headers(new_status); 262 new_raw_headers.push_back('\0'); 263 264 HeaderSet empty_to_remove; 265 MergeWithHeaders(new_raw_headers, empty_to_remove); 266 } 267 268 void HttpResponseHeaders::Parse(const std::string& raw_input) { 269 raw_headers_.reserve(raw_input.size()); 270 271 // ParseStatusLine adds a normalized status line to raw_headers_ 272 std::string::const_iterator line_begin = raw_input.begin(); 273 std::string::const_iterator line_end = 274 find(line_begin, raw_input.end(), '\0'); 275 // has_headers = true, if there is any data following the status line. 276 // Used by ParseStatusLine() to decide if a HTTP/0.9 is really a HTTP/1.0. 277 bool has_headers = line_end != raw_input.end() && 278 (line_end + 1) != raw_input.end() && *(line_end + 1) != '\0'; 279 ParseStatusLine(line_begin, line_end, has_headers); 280 281 if (line_end == raw_input.end()) { 282 raw_headers_.push_back('\0'); 283 return; 284 } 285 286 // Including a terminating null byte. 287 size_t status_line_len = raw_headers_.size(); 288 289 // Now, we add the rest of the raw headers to raw_headers_, and begin parsing 290 // it (to populate our parsed_ vector). 291 raw_headers_.append(line_end + 1, raw_input.end()); 292 293 // Adjust to point at the null byte following the status line 294 line_end = raw_headers_.begin() + status_line_len - 1; 295 296 HttpUtil::HeadersIterator headers(line_end + 1, raw_headers_.end(), 297 std::string(1, '\0')); 298 while (headers.GetNext()) { 299 AddHeader(headers.name_begin(), 300 headers.name_end(), 301 headers.values_begin(), 302 headers.values_end()); 303 } 304 } 305 306 // Append all of our headers to the final output string. 307 void HttpResponseHeaders::GetNormalizedHeaders(std::string* output) const { 308 // copy up to the null byte. this just copies the status line. 309 output->assign(raw_headers_.c_str()); 310 311 // headers may appear multiple times (not necessarily in succession) in the 312 // header data, so we build a map from header name to generated header lines. 313 // to preserve the order of the original headers, the actual values are kept 314 // in a separate list. finally, the list of headers is flattened to form 315 // the normalized block of headers. 316 // 317 // NOTE: We take special care to preserve the whitespace around any commas 318 // that may occur in the original response headers. Because our consumer may 319 // be a web app, we cannot be certain of the semantics of commas despite the 320 // fact that RFC 2616 says that they should be regarded as value separators. 321 // 322 typedef base::hash_map<std::string, size_t> HeadersMap; 323 HeadersMap headers_map; 324 HeadersMap::iterator iter = headers_map.end(); 325 326 std::vector<std::string> headers; 327 328 for (size_t i = 0; i < parsed_.size(); ++i) { 329 DCHECK(!parsed_[i].is_continuation()); 330 331 std::string name(parsed_[i].name_begin, parsed_[i].name_end); 332 std::string lower_name = StringToLowerASCII(name); 333 334 iter = headers_map.find(lower_name); 335 if (iter == headers_map.end()) { 336 iter = headers_map.insert( 337 HeadersMap::value_type(lower_name, headers.size())).first; 338 headers.push_back(name + ": "); 339 } else { 340 headers[iter->second].append(", "); 341 } 342 343 std::string::const_iterator value_begin = parsed_[i].value_begin; 344 std::string::const_iterator value_end = parsed_[i].value_end; 345 while (++i < parsed_.size() && parsed_[i].is_continuation()) 346 value_end = parsed_[i].value_end; 347 --i; 348 349 headers[iter->second].append(value_begin, value_end); 350 } 351 352 for (size_t i = 0; i < headers.size(); ++i) { 353 output->push_back('\n'); 354 output->append(headers[i]); 355 } 356 357 output->push_back('\n'); 358 } 359 360 bool HttpResponseHeaders::GetNormalizedHeader(const std::string& name, 361 std::string* value) const { 362 // If you hit this assertion, please use EnumerateHeader instead! 363 DCHECK(!HttpUtil::IsNonCoalescingHeader(name)); 364 365 value->clear(); 366 367 bool found = false; 368 size_t i = 0; 369 while (i < parsed_.size()) { 370 i = FindHeader(i, name); 371 if (i == std::string::npos) 372 break; 373 374 found = true; 375 376 if (!value->empty()) 377 value->append(", "); 378 379 std::string::const_iterator value_begin = parsed_[i].value_begin; 380 std::string::const_iterator value_end = parsed_[i].value_end; 381 while (++i < parsed_.size() && parsed_[i].is_continuation()) 382 value_end = parsed_[i].value_end; 383 value->append(value_begin, value_end); 384 } 385 386 return found; 387 } 388 389 std::string HttpResponseHeaders::GetStatusLine() const { 390 // copy up to the null byte. 391 return std::string(raw_headers_.c_str()); 392 } 393 394 std::string HttpResponseHeaders::GetStatusText() const { 395 // GetStatusLine() is already normalized, so it has the format: 396 // <http_version> SP <response_code> SP <status_text> 397 std::string status_text = GetStatusLine(); 398 std::string::const_iterator begin = status_text.begin(); 399 std::string::const_iterator end = status_text.end(); 400 for (int i = 0; i < 2; ++i) 401 begin = find(begin, end, ' ') + 1; 402 return std::string(begin, end); 403 } 404 405 bool HttpResponseHeaders::EnumerateHeaderLines(void** iter, 406 std::string* name, 407 std::string* value) const { 408 size_t i = reinterpret_cast<size_t>(*iter); 409 if (i == parsed_.size()) 410 return false; 411 412 DCHECK(!parsed_[i].is_continuation()); 413 414 name->assign(parsed_[i].name_begin, parsed_[i].name_end); 415 416 std::string::const_iterator value_begin = parsed_[i].value_begin; 417 std::string::const_iterator value_end = parsed_[i].value_end; 418 while (++i < parsed_.size() && parsed_[i].is_continuation()) 419 value_end = parsed_[i].value_end; 420 421 value->assign(value_begin, value_end); 422 423 *iter = reinterpret_cast<void*>(i); 424 return true; 425 } 426 427 bool HttpResponseHeaders::EnumerateHeader(void** iter, const std::string& name, 428 std::string* value) const { 429 size_t i; 430 if (!iter || !*iter) { 431 i = FindHeader(0, name); 432 } else { 433 i = reinterpret_cast<size_t>(*iter); 434 if (i >= parsed_.size()) { 435 i = std::string::npos; 436 } else if (!parsed_[i].is_continuation()) { 437 i = FindHeader(i, name); 438 } 439 } 440 441 if (i == std::string::npos) { 442 value->clear(); 443 return false; 444 } 445 446 if (iter) 447 *iter = reinterpret_cast<void*>(i + 1); 448 value->assign(parsed_[i].value_begin, parsed_[i].value_end); 449 return true; 450 } 451 452 bool HttpResponseHeaders::HasHeaderValue(const std::string& name, 453 const std::string& value) const { 454 // The value has to be an exact match. This is important since 455 // 'cache-control: no-cache' != 'cache-control: no-cache="foo"' 456 void* iter = NULL; 457 std::string temp; 458 while (EnumerateHeader(&iter, name, &temp)) { 459 if (value.size() == temp.size() && 460 std::equal(temp.begin(), temp.end(), value.begin(), 461 CaseInsensitiveCompare<char>())) 462 return true; 463 } 464 return false; 465 } 466 467 // Note: this implementation implicitly assumes that line_end points at a valid 468 // sentinel character (such as '\0'). 469 // static 470 HttpVersion HttpResponseHeaders::ParseVersion( 471 std::string::const_iterator line_begin, 472 std::string::const_iterator line_end) { 473 std::string::const_iterator p = line_begin; 474 475 // RFC2616 sec 3.1: HTTP-Version = "HTTP" "/" 1*DIGIT "." 1*DIGIT 476 // TODO: (1*DIGIT apparently means one or more digits, but we only handle 1). 477 // TODO: handle leading zeros, which is allowed by the rfc1616 sec 3.1. 478 479 if ((line_end - p < 4) || !LowerCaseEqualsASCII(p, p + 4, "http")) { 480 DLOG(INFO) << "missing status line"; 481 return HttpVersion(); 482 } 483 484 p += 4; 485 486 if (p >= line_end || *p != '/') { 487 DLOG(INFO) << "missing version"; 488 return HttpVersion(); 489 } 490 491 std::string::const_iterator dot = find(p, line_end, '.'); 492 if (dot == line_end) { 493 DLOG(INFO) << "malformed version"; 494 return HttpVersion(); 495 } 496 497 ++p; // from / to first digit. 498 ++dot; // from . to second digit. 499 500 if (!(*p >= '0' && *p <= '9' && *dot >= '0' && *dot <= '9')) { 501 DLOG(INFO) << "malformed version number"; 502 return HttpVersion(); 503 } 504 505 uint16 major = *p - '0'; 506 uint16 minor = *dot - '0'; 507 508 return HttpVersion(major, minor); 509 } 510 511 // Note: this implementation implicitly assumes that line_end points at a valid 512 // sentinel character (such as '\0'). 513 void HttpResponseHeaders::ParseStatusLine( 514 std::string::const_iterator line_begin, 515 std::string::const_iterator line_end, 516 bool has_headers) { 517 // Extract the version number 518 parsed_http_version_ = ParseVersion(line_begin, line_end); 519 520 // Clamp the version number to one of: {0.9, 1.0, 1.1} 521 if (parsed_http_version_ == HttpVersion(0, 9) && !has_headers) { 522 http_version_ = HttpVersion(0, 9); 523 raw_headers_ = "HTTP/0.9"; 524 } else if (parsed_http_version_ >= HttpVersion(1, 1)) { 525 http_version_ = HttpVersion(1, 1); 526 raw_headers_ = "HTTP/1.1"; 527 } else { 528 // Treat everything else like HTTP 1.0 529 http_version_ = HttpVersion(1, 0); 530 raw_headers_ = "HTTP/1.0"; 531 } 532 if (parsed_http_version_ != http_version_) { 533 DLOG(INFO) << "assuming HTTP/" << http_version_.major_value() << "." 534 << http_version_.minor_value(); 535 } 536 537 // TODO(eroman): this doesn't make sense if ParseVersion failed. 538 std::string::const_iterator p = find(line_begin, line_end, ' '); 539 540 if (p == line_end) { 541 DLOG(INFO) << "missing response status; assuming 200 OK"; 542 raw_headers_.append(" 200 OK"); 543 raw_headers_.push_back('\0'); 544 response_code_ = 200; 545 return; 546 } 547 548 // Skip whitespace. 549 while (*p == ' ') 550 ++p; 551 552 std::string::const_iterator code = p; 553 while (*p >= '0' && *p <= '9') 554 ++p; 555 556 if (p == code) { 557 DLOG(INFO) << "missing response status number; assuming 200"; 558 raw_headers_.append(" 200 OK"); 559 response_code_ = 200; 560 return; 561 } 562 raw_headers_.push_back(' '); 563 raw_headers_.append(code, p); 564 raw_headers_.push_back(' '); 565 response_code_ = static_cast<int>(StringToInt64(std::string(code, p))); 566 567 // Skip whitespace. 568 while (*p == ' ') 569 ++p; 570 571 // Trim trailing whitespace. 572 while (line_end > p && line_end[-1] == ' ') 573 --line_end; 574 575 if (p == line_end) { 576 DLOG(INFO) << "missing response status text; assuming OK"; 577 // Not super critical what we put here. Just use "OK" 578 // even if it isn't descriptive of response_code_. 579 raw_headers_.append("OK"); 580 } else { 581 raw_headers_.append(p, line_end); 582 } 583 584 raw_headers_.push_back('\0'); 585 } 586 587 size_t HttpResponseHeaders::FindHeader(size_t from, 588 const std::string& search) const { 589 for (size_t i = from; i < parsed_.size(); ++i) { 590 if (parsed_[i].is_continuation()) 591 continue; 592 const std::string::const_iterator& name_begin = parsed_[i].name_begin; 593 const std::string::const_iterator& name_end = parsed_[i].name_end; 594 if (static_cast<size_t>(name_end - name_begin) == search.size() && 595 std::equal(name_begin, name_end, search.begin(), 596 CaseInsensitiveCompare<char>())) 597 return i; 598 } 599 600 return std::string::npos; 601 } 602 603 void HttpResponseHeaders::AddHeader(std::string::const_iterator name_begin, 604 std::string::const_iterator name_end, 605 std::string::const_iterator values_begin, 606 std::string::const_iterator values_end) { 607 // If the header can be coalesced, then we should split it up. 608 if (values_begin == values_end || 609 HttpUtil::IsNonCoalescingHeader(name_begin, name_end)) { 610 AddToParsed(name_begin, name_end, values_begin, values_end); 611 } else { 612 HttpUtil::ValuesIterator it(values_begin, values_end, ','); 613 while (it.GetNext()) { 614 AddToParsed(name_begin, name_end, it.value_begin(), it.value_end()); 615 // clobber these so that subsequent values are treated as continuations 616 name_begin = name_end = raw_headers_.end(); 617 } 618 } 619 } 620 621 void HttpResponseHeaders::AddToParsed(std::string::const_iterator name_begin, 622 std::string::const_iterator name_end, 623 std::string::const_iterator value_begin, 624 std::string::const_iterator value_end) { 625 ParsedHeader header; 626 header.name_begin = name_begin; 627 header.name_end = name_end; 628 header.value_begin = value_begin; 629 header.value_end = value_end; 630 parsed_.push_back(header); 631 } 632 633 void HttpResponseHeaders::AddNonCacheableHeaders(HeaderSet* result) const { 634 // Add server specified transients. Any 'cache-control: no-cache="foo,bar"' 635 // headers present in the response specify additional headers that we should 636 // not store in the cache. 637 const std::string kCacheControl = "cache-control"; 638 const std::string kPrefix = "no-cache=\""; 639 std::string value; 640 void* iter = NULL; 641 while (EnumerateHeader(&iter, kCacheControl, &value)) { 642 if (value.size() > kPrefix.size() && 643 value.compare(0, kPrefix.size(), kPrefix) == 0) { 644 // if it doesn't end with a quote, then treat as malformed 645 if (value[value.size()-1] != '\"') 646 continue; 647 648 // trim off leading and trailing bits 649 size_t len = value.size() - kPrefix.size() - 1; 650 TrimString(value.substr(kPrefix.size(), len), HTTP_LWS, &value); 651 652 size_t begin_pos = 0; 653 for (;;) { 654 // find the end of this header name 655 size_t comma_pos = value.find(',', begin_pos); 656 if (comma_pos == std::string::npos) 657 comma_pos = value.size(); 658 size_t end = comma_pos; 659 while (end > begin_pos && strchr(HTTP_LWS, value[end - 1])) 660 end--; 661 662 // assuming the header is not emtpy, lowercase and insert into set 663 if (end > begin_pos) { 664 std::string name = value.substr(begin_pos, end - begin_pos); 665 StringToLowerASCII(&name); 666 result->insert(name); 667 } 668 669 // repeat 670 begin_pos = comma_pos + 1; 671 while (begin_pos < value.size() && strchr(HTTP_LWS, value[begin_pos])) 672 begin_pos++; 673 if (begin_pos >= value.size()) 674 break; 675 } 676 } 677 } 678 } 679 680 void HttpResponseHeaders::AddHopByHopHeaders(HeaderSet* result) { 681 for (size_t i = 0; i < arraysize(kHopByHopResponseHeaders); ++i) 682 result->insert(std::string(kHopByHopResponseHeaders[i])); 683 } 684 685 void HttpResponseHeaders::AddCookieHeaders(HeaderSet* result) { 686 for (size_t i = 0; i < arraysize(kCookieResponseHeaders); ++i) 687 result->insert(std::string(kCookieResponseHeaders[i])); 688 } 689 690 void HttpResponseHeaders::AddChallengeHeaders(HeaderSet* result) { 691 for (size_t i = 0; i < arraysize(kChallengeResponseHeaders); ++i) 692 result->insert(std::string(kChallengeResponseHeaders[i])); 693 } 694 695 void HttpResponseHeaders::AddHopContentRangeHeaders(HeaderSet* result) { 696 result->insert("content-range"); 697 } 698 699 void HttpResponseHeaders::GetMimeTypeAndCharset(std::string* mime_type, 700 std::string* charset) const { 701 mime_type->clear(); 702 charset->clear(); 703 704 std::string name = "content-type"; 705 std::string value; 706 707 bool had_charset = false; 708 709 void* iter = NULL; 710 while (EnumerateHeader(&iter, name, &value)) 711 HttpUtil::ParseContentType(value, mime_type, charset, &had_charset); 712 } 713 714 bool HttpResponseHeaders::GetMimeType(std::string* mime_type) const { 715 std::string unused; 716 GetMimeTypeAndCharset(mime_type, &unused); 717 return !mime_type->empty(); 718 } 719 720 bool HttpResponseHeaders::GetCharset(std::string* charset) const { 721 std::string unused; 722 GetMimeTypeAndCharset(&unused, charset); 723 return !charset->empty(); 724 } 725 726 bool HttpResponseHeaders::IsRedirect(std::string* location) const { 727 if (!IsRedirectResponseCode(response_code_)) 728 return false; 729 730 // If we lack a Location header, then we can't treat this as a redirect. 731 // We assume that the first non-empty location value is the target URL that 732 // we want to follow. TODO(darin): Is this consistent with other browsers? 733 size_t i = std::string::npos; 734 do { 735 i = FindHeader(++i, "location"); 736 if (i == std::string::npos) 737 return false; 738 // If the location value is empty, then it doesn't count. 739 } while (parsed_[i].value_begin == parsed_[i].value_end); 740 741 if (location) { 742 // Escape any non-ASCII characters to preserve them. The server should 743 // only be returning ASCII here, but for compat we need to do this. 744 *location = EscapeNonASCII( 745 std::string(parsed_[i].value_begin, parsed_[i].value_end)); 746 } 747 748 return true; 749 } 750 751 // static 752 bool HttpResponseHeaders::IsRedirectResponseCode(int response_code) { 753 // Users probably want to see 300 (multiple choice) pages, so we don't count 754 // them as redirects that need to be followed. 755 return (response_code == 301 || 756 response_code == 302 || 757 response_code == 303 || 758 response_code == 307); 759 } 760 761 // From RFC 2616 section 13.2.4: 762 // 763 // The calculation to determine if a response has expired is quite simple: 764 // 765 // response_is_fresh = (freshness_lifetime > current_age) 766 // 767 // Of course, there are other factors that can force a response to always be 768 // validated or re-fetched. 769 // 770 bool HttpResponseHeaders::RequiresValidation(const Time& request_time, 771 const Time& response_time, 772 const Time& current_time) const { 773 TimeDelta lifetime = 774 GetFreshnessLifetime(response_time); 775 if (lifetime == TimeDelta()) 776 return true; 777 778 return lifetime <= GetCurrentAge(request_time, response_time, current_time); 779 } 780 781 // From RFC 2616 section 13.2.4: 782 // 783 // The max-age directive takes priority over Expires, so if max-age is present 784 // in a response, the calculation is simply: 785 // 786 // freshness_lifetime = max_age_value 787 // 788 // Otherwise, if Expires is present in the response, the calculation is: 789 // 790 // freshness_lifetime = expires_value - date_value 791 // 792 // Note that neither of these calculations is vulnerable to clock skew, since 793 // all of the information comes from the origin server. 794 // 795 // Also, if the response does have a Last-Modified time, the heuristic 796 // expiration value SHOULD be no more than some fraction of the interval since 797 // that time. A typical setting of this fraction might be 10%: 798 // 799 // freshness_lifetime = (date_value - last_modified_value) * 0.10 800 // 801 TimeDelta HttpResponseHeaders::GetFreshnessLifetime( 802 const Time& response_time) const { 803 // Check for headers that force a response to never be fresh. For backwards 804 // compat, we treat "Pragma: no-cache" as a synonym for "Cache-Control: 805 // no-cache" even though RFC 2616 does not specify it. 806 if (HasHeaderValue("cache-control", "no-cache") || 807 HasHeaderValue("cache-control", "no-store") || 808 HasHeaderValue("pragma", "no-cache") || 809 HasHeaderValue("vary", "*")) // see RFC 2616 section 13.6 810 return TimeDelta(); // not fresh 811 812 // NOTE: "Cache-Control: max-age" overrides Expires, so we only check the 813 // Expires header after checking for max-age in GetFreshnessLifetime. This 814 // is important since "Expires: <date in the past>" means not fresh, but 815 // it should not trump a max-age value. 816 817 TimeDelta max_age_value; 818 if (GetMaxAgeValue(&max_age_value)) 819 return max_age_value; 820 821 // If there is no Date header, then assume that the server response was 822 // generated at the time when we received the response. 823 Time date_value; 824 if (!GetDateValue(&date_value)) 825 date_value = response_time; 826 827 Time expires_value; 828 if (GetExpiresValue(&expires_value)) { 829 // The expires value can be a date in the past! 830 if (expires_value > date_value) 831 return expires_value - date_value; 832 833 return TimeDelta(); // not fresh 834 } 835 836 // From RFC 2616 section 13.4: 837 // 838 // A response received with a status code of 200, 203, 206, 300, 301 or 410 839 // MAY be stored by a cache and used in reply to a subsequent request, 840 // subject to the expiration mechanism, unless a cache-control directive 841 // prohibits caching. 842 // ... 843 // A response received with any other status code (e.g. status codes 302 844 // and 307) MUST NOT be returned in a reply to a subsequent request unless 845 // there are cache-control directives or another header(s) that explicitly 846 // allow it. 847 // 848 // From RFC 2616 section 14.9.4: 849 // 850 // When the must-revalidate directive is present in a response received by 851 // a cache, that cache MUST NOT use the entry after it becomes stale to 852 // respond to a subsequent request without first revalidating it with the 853 // origin server. (I.e., the cache MUST do an end-to-end revalidation every 854 // time, if, based solely on the origin server's Expires or max-age value, 855 // the cached response is stale.) 856 // 857 if ((response_code_ == 200 || response_code_ == 203 || 858 response_code_ == 206) && 859 !HasHeaderValue("cache-control", "must-revalidate")) { 860 // TODO(darin): Implement a smarter heuristic. 861 Time last_modified_value; 862 if (GetLastModifiedValue(&last_modified_value)) { 863 // The last-modified value can be a date in the past! 864 if (last_modified_value <= date_value) 865 return (date_value - last_modified_value) / 10; 866 } 867 } 868 869 // These responses are implicitly fresh (unless otherwise overruled): 870 if (response_code_ == 300 || response_code_ == 301 || response_code_ == 410) 871 return TimeDelta::FromMicroseconds(kint64max); 872 873 return TimeDelta(); // not fresh 874 } 875 876 // From RFC 2616 section 13.2.3: 877 // 878 // Summary of age calculation algorithm, when a cache receives a response: 879 // 880 // /* 881 // * age_value 882 // * is the value of Age: header received by the cache with 883 // * this response. 884 // * date_value 885 // * is the value of the origin server's Date: header 886 // * request_time 887 // * is the (local) time when the cache made the request 888 // * that resulted in this cached response 889 // * response_time 890 // * is the (local) time when the cache received the 891 // * response 892 // * now 893 // * is the current (local) time 894 // */ 895 // apparent_age = max(0, response_time - date_value); 896 // corrected_received_age = max(apparent_age, age_value); 897 // response_delay = response_time - request_time; 898 // corrected_initial_age = corrected_received_age + response_delay; 899 // resident_time = now - response_time; 900 // current_age = corrected_initial_age + resident_time; 901 // 902 TimeDelta HttpResponseHeaders::GetCurrentAge(const Time& request_time, 903 const Time& response_time, 904 const Time& current_time) const { 905 // If there is no Date header, then assume that the server response was 906 // generated at the time when we received the response. 907 Time date_value; 908 if (!GetDateValue(&date_value)) 909 date_value = response_time; 910 911 // If there is no Age header, then assume age is zero. GetAgeValue does not 912 // modify its out param if the value does not exist. 913 TimeDelta age_value; 914 GetAgeValue(&age_value); 915 916 TimeDelta apparent_age = std::max(TimeDelta(), response_time - date_value); 917 TimeDelta corrected_received_age = std::max(apparent_age, age_value); 918 TimeDelta response_delay = response_time - request_time; 919 TimeDelta corrected_initial_age = corrected_received_age + response_delay; 920 TimeDelta resident_time = current_time - response_time; 921 TimeDelta current_age = corrected_initial_age + resident_time; 922 923 return current_age; 924 } 925 926 bool HttpResponseHeaders::GetMaxAgeValue(TimeDelta* result) const { 927 std::string name = "cache-control"; 928 std::string value; 929 930 const char kMaxAgePrefix[] = "max-age="; 931 const size_t kMaxAgePrefixLen = arraysize(kMaxAgePrefix) - 1; 932 933 void* iter = NULL; 934 while (EnumerateHeader(&iter, name, &value)) { 935 if (value.size() > kMaxAgePrefixLen) { 936 if (LowerCaseEqualsASCII(value.begin(), 937 value.begin() + kMaxAgePrefixLen, 938 kMaxAgePrefix)) { 939 *result = TimeDelta::FromSeconds( 940 StringToInt64(value.substr(kMaxAgePrefixLen))); 941 return true; 942 } 943 } 944 } 945 946 return false; 947 } 948 949 bool HttpResponseHeaders::GetAgeValue(TimeDelta* result) const { 950 std::string value; 951 if (!EnumerateHeader(NULL, "Age", &value)) 952 return false; 953 954 *result = TimeDelta::FromSeconds(StringToInt64(value)); 955 return true; 956 } 957 958 bool HttpResponseHeaders::GetDateValue(Time* result) const { 959 return GetTimeValuedHeader("Date", result); 960 } 961 962 bool HttpResponseHeaders::GetLastModifiedValue(Time* result) const { 963 return GetTimeValuedHeader("Last-Modified", result); 964 } 965 966 bool HttpResponseHeaders::GetExpiresValue(Time* result) const { 967 return GetTimeValuedHeader("Expires", result); 968 } 969 970 bool HttpResponseHeaders::GetTimeValuedHeader(const std::string& name, 971 Time* result) const { 972 std::string value; 973 if (!EnumerateHeader(NULL, name, &value)) 974 return false; 975 976 std::wstring value_wide(value.begin(), value.end()); // inflate ascii 977 return Time::FromString(value_wide.c_str(), result); 978 } 979 980 bool HttpResponseHeaders::IsKeepAlive() const { 981 if (http_version_ < HttpVersion(1, 0)) 982 return false; 983 984 // NOTE: It is perhaps risky to assume that a Proxy-Connection header is 985 // meaningful when we don't know that this response was from a proxy, but 986 // Mozilla also does this, so we'll do the same. 987 std::string connection_val; 988 if (!EnumerateHeader(NULL, "connection", &connection_val)) 989 EnumerateHeader(NULL, "proxy-connection", &connection_val); 990 991 bool keep_alive; 992 993 if (http_version_ == HttpVersion(1, 0)) { 994 // HTTP/1.0 responses default to NOT keep-alive 995 keep_alive = LowerCaseEqualsASCII(connection_val, "keep-alive"); 996 } else { 997 // HTTP/1.1 responses default to keep-alive 998 keep_alive = !LowerCaseEqualsASCII(connection_val, "close"); 999 } 1000 1001 return keep_alive; 1002 } 1003 1004 bool HttpResponseHeaders::HasStrongValidators() const { 1005 std::string etag_value; 1006 EnumerateHeader(NULL, "etag", &etag_value); 1007 if (!etag_value.empty()) { 1008 size_t slash = etag_value.find('/'); 1009 if (slash == std::string::npos || slash == 0) 1010 return true; 1011 1012 std::string::const_iterator i = etag_value.begin(); 1013 std::string::const_iterator j = etag_value.begin() + slash; 1014 HttpUtil::TrimLWS(&i, &j); 1015 if (!LowerCaseEqualsASCII(i, j, "w")) 1016 return true; 1017 } 1018 1019 Time last_modified; 1020 if (!GetLastModifiedValue(&last_modified)) 1021 return false; 1022 1023 Time date; 1024 if (!GetDateValue(&date)) 1025 return false; 1026 1027 return ((date - last_modified).InSeconds() >= 60); 1028 } 1029 1030 // From RFC 2616: 1031 // Content-Length = "Content-Length" ":" 1*DIGIT 1032 int64 HttpResponseHeaders::GetContentLength() const { 1033 void* iter = NULL; 1034 std::string content_length_val; 1035 if (!EnumerateHeader(&iter, "content-length", &content_length_val)) 1036 return -1; 1037 1038 if (content_length_val.empty()) 1039 return -1; 1040 1041 if (content_length_val[0] == '+') 1042 return -1; 1043 1044 int64 result; 1045 bool ok = StringToInt64(content_length_val, &result); 1046 if (!ok || result < 0) 1047 return -1; 1048 1049 return result; 1050 } 1051 1052 // From RFC 2616 14.16: 1053 // content-range-spec = 1054 // bytes-unit SP byte-range-resp-spec "/" ( instance-length | "*" ) 1055 // byte-range-resp-spec = (first-byte-pos "-" last-byte-pos) | "*" 1056 // instance-length = 1*DIGIT 1057 // bytes-unit = "bytes" 1058 bool HttpResponseHeaders::GetContentRange(int64* first_byte_position, 1059 int64* last_byte_position, 1060 int64* instance_length) const { 1061 void* iter = NULL; 1062 std::string content_range_spec; 1063 *first_byte_position = *last_byte_position = *instance_length = -1; 1064 if (!EnumerateHeader(&iter, "content-range", &content_range_spec)) 1065 return false; 1066 1067 // If the header value is empty, we have an invalid header. 1068 if (content_range_spec.empty()) 1069 return false; 1070 1071 size_t space_position = content_range_spec.find(' '); 1072 if (space_position == std::string::npos) 1073 return false; 1074 1075 // Invalid header if it doesn't contain "bytes-unit". 1076 std::string::const_iterator content_range_spec_begin = 1077 content_range_spec.begin(); 1078 std::string::const_iterator content_range_spec_end = 1079 content_range_spec.begin() + space_position; 1080 HttpUtil::TrimLWS(&content_range_spec_begin, &content_range_spec_end); 1081 if (!LowerCaseEqualsASCII(content_range_spec_begin, 1082 content_range_spec_end, 1083 "bytes")) { 1084 return false; 1085 } 1086 1087 size_t slash_position = content_range_spec.find('/', space_position + 1); 1088 if (slash_position == std::string::npos) 1089 return false; 1090 1091 // Obtain the part behind the space and before slash. 1092 std::string::const_iterator byte_range_resp_spec_begin = 1093 content_range_spec.begin() + space_position + 1; 1094 std::string::const_iterator byte_range_resp_spec_end = 1095 content_range_spec.begin() + slash_position; 1096 HttpUtil::TrimLWS(&byte_range_resp_spec_begin, &byte_range_resp_spec_end); 1097 1098 // Parse the byte-range-resp-spec part. 1099 std::string byte_range_resp_spec(byte_range_resp_spec_begin, 1100 byte_range_resp_spec_end); 1101 // If byte-range-resp-spec != "*". 1102 if (!LowerCaseEqualsASCII(byte_range_resp_spec, "*")) { 1103 size_t minus_position = byte_range_resp_spec.find('-'); 1104 if (minus_position != std::string::npos) { 1105 // Obtain first-byte-pos. 1106 std::string::const_iterator first_byte_pos_begin = 1107 byte_range_resp_spec.begin(); 1108 std::string::const_iterator first_byte_pos_end = 1109 byte_range_resp_spec.begin() + minus_position; 1110 HttpUtil::TrimLWS(&first_byte_pos_begin, &first_byte_pos_end); 1111 1112 bool ok = StringToInt64( 1113 std::string(first_byte_pos_begin, first_byte_pos_end), 1114 first_byte_position); 1115 1116 // Obtain last-byte-pos. 1117 std::string::const_iterator last_byte_pos_begin = 1118 byte_range_resp_spec.begin() + minus_position + 1; 1119 std::string::const_iterator last_byte_pos_end = 1120 byte_range_resp_spec.end(); 1121 HttpUtil::TrimLWS(&last_byte_pos_begin, &last_byte_pos_end); 1122 1123 ok &= StringToInt64( 1124 std::string(last_byte_pos_begin, last_byte_pos_end), 1125 last_byte_position); 1126 if (!ok) { 1127 *first_byte_position = *last_byte_position = -1; 1128 return false; 1129 } 1130 if (*first_byte_position < 0 || *last_byte_position < 0 || 1131 *first_byte_position > *last_byte_position) 1132 return false; 1133 } else { 1134 return false; 1135 } 1136 } 1137 1138 // Parse the instance-length part. 1139 // If instance-length == "*". 1140 std::string::const_iterator instance_length_begin = 1141 content_range_spec.begin() + slash_position + 1; 1142 std::string::const_iterator instance_length_end = 1143 content_range_spec.end(); 1144 HttpUtil::TrimLWS(&instance_length_begin, &instance_length_end); 1145 1146 if (LowerCaseEqualsASCII(instance_length_begin, instance_length_end, "*")) { 1147 return false; 1148 } else if (!StringToInt64( 1149 std::string(instance_length_begin, instance_length_end), 1150 instance_length)) { 1151 *instance_length = -1; 1152 return false; 1153 } 1154 1155 // We have all the values; let's verify that they make sense for a 206 1156 // response. 1157 if (*first_byte_position < 0 || *last_byte_position < 0 || 1158 *instance_length < 0 || *instance_length - 1 < *last_byte_position) 1159 return false; 1160 1161 return true; 1162 } 1163 1164 } // namespace net 1165