1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // The rules for header parsing were borrowed from Firefox: 6 // http://lxr.mozilla.org/seamonkey/source/netwerk/protocol/http/src/nsHttpResponseHead.cpp 7 // The rules for parsing content-types were also borrowed from Firefox: 8 // http://lxr.mozilla.org/mozilla/source/netwerk/base/src/nsURLHelper.cpp#834 9 10 #include "net/http/http_response_headers.h" 11 12 #include <algorithm> 13 14 #include "base/logging.h" 15 #include "base/metrics/histogram.h" 16 #include "base/pickle.h" 17 #include "base/string_number_conversions.h" 18 #include "base/string_util.h" 19 #include "base/time.h" 20 #include "net/base/escape.h" 21 #include "net/http/http_util.h" 22 23 using base::Time; 24 using base::TimeDelta; 25 26 namespace net { 27 28 //----------------------------------------------------------------------------- 29 30 namespace { 31 32 // These headers are RFC 2616 hop-by-hop headers; 33 // not to be stored by caches. 34 const char* const kHopByHopResponseHeaders[] = { 35 "connection", 36 "proxy-connection", 37 "keep-alive", 38 "trailer", 39 "transfer-encoding", 40 "upgrade" 41 }; 42 43 // These headers are challenge response headers; 44 // not to be stored by caches. 45 const char* const kChallengeResponseHeaders[] = { 46 "www-authenticate", 47 "proxy-authenticate" 48 }; 49 50 // These headers are cookie setting headers; 51 // not to be stored by caches or disclosed otherwise. 52 const char* const kCookieResponseHeaders[] = { 53 "set-cookie", 54 "set-cookie2" 55 }; 56 57 // These response headers are not copied from a 304/206 response to the cached 58 // response headers. This list is based on Mozilla's nsHttpResponseHead.cpp. 59 const char* const kNonUpdatedHeaders[] = { 60 "connection", 61 "proxy-connection", 62 "keep-alive", 63 "www-authenticate", 64 "proxy-authenticate", 65 "trailer", 66 "transfer-encoding", 67 "upgrade", 68 // these should never change: 69 "content-location", 70 "content-md5", 71 "etag", 72 // assume cache-control: no-transform 73 "content-encoding", 74 "content-range", 75 "content-type", 76 // some broken microsoft servers send 'content-length: 0' with 304s 77 "content-length" 78 }; 79 80 bool ShouldUpdateHeader(const std::string::const_iterator& name_begin, 81 const std::string::const_iterator& name_end) { 82 for (size_t i = 0; i < arraysize(kNonUpdatedHeaders); ++i) { 83 if (LowerCaseEqualsASCII(name_begin, name_end, kNonUpdatedHeaders[i])) 84 return false; 85 } 86 return true; 87 } 88 89 // Functions for histogram initialization. The code 0 is put in the 90 // response map to track response codes that are invalid. 91 // TODO(gavinp): Greatly prune the collected codes once we learn which 92 // ones are not sent in practice, to reduce upload size & memory use. 93 94 enum { 95 HISTOGRAM_MIN_HTTP_RESPONSE_CODE = 100, 96 HISTOGRAM_MAX_HTTP_RESPONSE_CODE = 599, 97 }; 98 99 std::vector<int> GetAllHttpResponseCodes() { 100 std::vector<int> codes; 101 codes.reserve( 102 HISTOGRAM_MAX_HTTP_RESPONSE_CODE - HISTOGRAM_MIN_HTTP_RESPONSE_CODE + 2); 103 codes.push_back(0); 104 for (int i = HISTOGRAM_MIN_HTTP_RESPONSE_CODE; 105 i <= HISTOGRAM_MAX_HTTP_RESPONSE_CODE; ++i) 106 codes.push_back(i); 107 return codes; 108 } 109 110 int MapHttpResponseCode(int code) { 111 if (HISTOGRAM_MIN_HTTP_RESPONSE_CODE <= code && 112 code <= HISTOGRAM_MAX_HTTP_RESPONSE_CODE) 113 return code; 114 return 0; 115 } 116 117 } // namespace 118 119 struct HttpResponseHeaders::ParsedHeader { 120 // A header "continuation" contains only a subsequent value for the 121 // preceding header. (Header values are comma separated.) 122 bool is_continuation() const { return name_begin == name_end; } 123 124 std::string::const_iterator name_begin; 125 std::string::const_iterator name_end; 126 std::string::const_iterator value_begin; 127 std::string::const_iterator value_end; 128 }; 129 130 //----------------------------------------------------------------------------- 131 132 HttpResponseHeaders::HttpResponseHeaders(const std::string& raw_input) 133 : response_code_(-1) { 134 Parse(raw_input); 135 136 // The most important thing to do with this histogram is find out 137 // the existence of unusual HTTP response codes. As it happens 138 // right now, there aren't double-constructions of response headers 139 // using this constructor, so our counts should also be accurate, 140 // without instantiating the histogram in two places. It is also 141 // important that this histogram not collect data in the other 142 // constructor, which rebuilds an histogram from a pickle, since 143 // that would actually create a double call between the original 144 // HttpResponseHeader that was serialized, and initialization of the 145 // new object from that pickle. 146 UMA_HISTOGRAM_CUSTOM_ENUMERATION("Net.HttpResponseCode", 147 MapHttpResponseCode(response_code_), 148 // Note the third argument is only 149 // evaluated once, see macro 150 // definition for details. 151 GetAllHttpResponseCodes()); 152 } 153 154 HttpResponseHeaders::HttpResponseHeaders(const Pickle& pickle, void** iter) 155 : response_code_(-1) { 156 std::string raw_input; 157 if (pickle.ReadString(iter, &raw_input)) 158 Parse(raw_input); 159 } 160 161 void HttpResponseHeaders::Persist(Pickle* pickle, PersistOptions options) { 162 if (options == PERSIST_RAW) { 163 pickle->WriteString(raw_headers_); 164 return; // Done. 165 } 166 167 HeaderSet filter_headers; 168 169 // Construct set of headers to filter out based on options. 170 if ((options & PERSIST_SANS_NON_CACHEABLE) == PERSIST_SANS_NON_CACHEABLE) 171 AddNonCacheableHeaders(&filter_headers); 172 173 if ((options & PERSIST_SANS_COOKIES) == PERSIST_SANS_COOKIES) 174 AddCookieHeaders(&filter_headers); 175 176 if ((options & PERSIST_SANS_CHALLENGES) == PERSIST_SANS_CHALLENGES) 177 AddChallengeHeaders(&filter_headers); 178 179 if ((options & PERSIST_SANS_HOP_BY_HOP) == PERSIST_SANS_HOP_BY_HOP) 180 AddHopByHopHeaders(&filter_headers); 181 182 if ((options & PERSIST_SANS_RANGES) == PERSIST_SANS_RANGES) 183 AddHopContentRangeHeaders(&filter_headers); 184 185 std::string blob; 186 blob.reserve(raw_headers_.size()); 187 188 // This copies the status line w/ terminator null. 189 // Note raw_headers_ has embedded nulls instead of \n, 190 // so this just copies the first header line. 191 blob.assign(raw_headers_.c_str(), strlen(raw_headers_.c_str()) + 1); 192 193 for (size_t i = 0; i < parsed_.size(); ++i) { 194 DCHECK(!parsed_[i].is_continuation()); 195 196 // Locate the start of the next header. 197 size_t k = i; 198 while (++k < parsed_.size() && parsed_[k].is_continuation()) {} 199 --k; 200 201 std::string header_name(parsed_[i].name_begin, parsed_[i].name_end); 202 StringToLowerASCII(&header_name); 203 204 if (filter_headers.find(header_name) == filter_headers.end()) { 205 // Make sure there is a null after the value. 206 blob.append(parsed_[i].name_begin, parsed_[k].value_end); 207 blob.push_back('\0'); 208 } 209 210 i = k; 211 } 212 blob.push_back('\0'); 213 214 pickle->WriteString(blob); 215 } 216 217 void HttpResponseHeaders::Update(const HttpResponseHeaders& new_headers) { 218 DCHECK(new_headers.response_code() == 304 || 219 new_headers.response_code() == 206); 220 221 // Copy up to the null byte. This just copies the status line. 222 std::string new_raw_headers(raw_headers_.c_str()); 223 new_raw_headers.push_back('\0'); 224 225 HeaderSet updated_headers; 226 227 // NOTE: we write the new headers then the old headers for convenience. The 228 // order should not matter. 229 230 // Figure out which headers we want to take from new_headers: 231 for (size_t i = 0; i < new_headers.parsed_.size(); ++i) { 232 const HeaderList& new_parsed = new_headers.parsed_; 233 234 DCHECK(!new_parsed[i].is_continuation()); 235 236 // Locate the start of the next header. 237 size_t k = i; 238 while (++k < new_parsed.size() && new_parsed[k].is_continuation()) {} 239 --k; 240 241 const std::string::const_iterator& name_begin = new_parsed[i].name_begin; 242 const std::string::const_iterator& name_end = new_parsed[i].name_end; 243 if (ShouldUpdateHeader(name_begin, name_end)) { 244 std::string name(name_begin, name_end); 245 StringToLowerASCII(&name); 246 updated_headers.insert(name); 247 248 // Preserve this header line in the merged result, making sure there is 249 // a null after the value. 250 new_raw_headers.append(name_begin, new_parsed[k].value_end); 251 new_raw_headers.push_back('\0'); 252 } 253 254 i = k; 255 } 256 257 // Now, build the new raw headers. 258 MergeWithHeaders(new_raw_headers, updated_headers); 259 } 260 261 void HttpResponseHeaders::MergeWithHeaders(const std::string& raw_headers, 262 const HeaderSet& headers_to_remove) { 263 std::string new_raw_headers(raw_headers); 264 for (size_t i = 0; i < parsed_.size(); ++i) { 265 DCHECK(!parsed_[i].is_continuation()); 266 267 // Locate the start of the next header. 268 size_t k = i; 269 while (++k < parsed_.size() && parsed_[k].is_continuation()) {} 270 --k; 271 272 std::string name(parsed_[i].name_begin, parsed_[i].name_end); 273 StringToLowerASCII(&name); 274 if (headers_to_remove.find(name) == headers_to_remove.end()) { 275 // It's ok to preserve this header in the final result. 276 new_raw_headers.append(parsed_[i].name_begin, parsed_[k].value_end); 277 new_raw_headers.push_back('\0'); 278 } 279 280 i = k; 281 } 282 new_raw_headers.push_back('\0'); 283 284 // Make this object hold the new data. 285 raw_headers_.clear(); 286 parsed_.clear(); 287 Parse(new_raw_headers); 288 } 289 290 void HttpResponseHeaders::RemoveHeader(const std::string& name) { 291 // Copy up to the null byte. This just copies the status line. 292 std::string new_raw_headers(raw_headers_.c_str()); 293 new_raw_headers.push_back('\0'); 294 295 std::string lowercase_name(name); 296 StringToLowerASCII(&lowercase_name); 297 HeaderSet to_remove; 298 to_remove.insert(lowercase_name); 299 MergeWithHeaders(new_raw_headers, to_remove); 300 } 301 302 void HttpResponseHeaders::AddHeader(const std::string& header) { 303 DCHECK_EQ('\0', raw_headers_[raw_headers_.size() - 2]); 304 DCHECK_EQ('\0', raw_headers_[raw_headers_.size() - 1]); 305 // Don't copy the last null. 306 std::string new_raw_headers(raw_headers_, 0, raw_headers_.size() - 1); 307 new_raw_headers.append(header); 308 new_raw_headers.push_back('\0'); 309 new_raw_headers.push_back('\0'); 310 311 // Make this object hold the new data. 312 raw_headers_.clear(); 313 parsed_.clear(); 314 Parse(new_raw_headers); 315 } 316 317 void HttpResponseHeaders::ReplaceStatusLine(const std::string& new_status) { 318 // Copy up to the null byte. This just copies the status line. 319 std::string new_raw_headers(new_status); 320 new_raw_headers.push_back('\0'); 321 322 HeaderSet empty_to_remove; 323 MergeWithHeaders(new_raw_headers, empty_to_remove); 324 } 325 326 void HttpResponseHeaders::Parse(const std::string& raw_input) { 327 raw_headers_.reserve(raw_input.size()); 328 329 // ParseStatusLine adds a normalized status line to raw_headers_ 330 std::string::const_iterator line_begin = raw_input.begin(); 331 std::string::const_iterator line_end = 332 find(line_begin, raw_input.end(), '\0'); 333 // has_headers = true, if there is any data following the status line. 334 // Used by ParseStatusLine() to decide if a HTTP/0.9 is really a HTTP/1.0. 335 bool has_headers = (line_end != raw_input.end() && 336 (line_end + 1) != raw_input.end() && 337 *(line_end + 1) != '\0'); 338 ParseStatusLine(line_begin, line_end, has_headers); 339 340 if (line_end == raw_input.end()) { 341 raw_headers_.push_back('\0'); 342 return; 343 } 344 345 // Including a terminating null byte. 346 size_t status_line_len = raw_headers_.size(); 347 348 // Now, we add the rest of the raw headers to raw_headers_, and begin parsing 349 // it (to populate our parsed_ vector). 350 raw_headers_.append(line_end + 1, raw_input.end()); 351 352 // Adjust to point at the null byte following the status line 353 line_end = raw_headers_.begin() + status_line_len - 1; 354 355 HttpUtil::HeadersIterator headers(line_end + 1, raw_headers_.end(), 356 std::string(1, '\0')); 357 while (headers.GetNext()) { 358 AddHeader(headers.name_begin(), 359 headers.name_end(), 360 headers.values_begin(), 361 headers.values_end()); 362 } 363 } 364 365 // Append all of our headers to the final output string. 366 void HttpResponseHeaders::GetNormalizedHeaders(std::string* output) const { 367 // copy up to the null byte. this just copies the status line. 368 output->assign(raw_headers_.c_str()); 369 370 // headers may appear multiple times (not necessarily in succession) in the 371 // header data, so we build a map from header name to generated header lines. 372 // to preserve the order of the original headers, the actual values are kept 373 // in a separate list. finally, the list of headers is flattened to form 374 // the normalized block of headers. 375 // 376 // NOTE: We take special care to preserve the whitespace around any commas 377 // that may occur in the original response headers. Because our consumer may 378 // be a web app, we cannot be certain of the semantics of commas despite the 379 // fact that RFC 2616 says that they should be regarded as value separators. 380 // 381 typedef base::hash_map<std::string, size_t> HeadersMap; 382 HeadersMap headers_map; 383 HeadersMap::iterator iter = headers_map.end(); 384 385 std::vector<std::string> headers; 386 387 for (size_t i = 0; i < parsed_.size(); ++i) { 388 DCHECK(!parsed_[i].is_continuation()); 389 390 std::string name(parsed_[i].name_begin, parsed_[i].name_end); 391 std::string lower_name = StringToLowerASCII(name); 392 393 iter = headers_map.find(lower_name); 394 if (iter == headers_map.end()) { 395 iter = headers_map.insert( 396 HeadersMap::value_type(lower_name, headers.size())).first; 397 headers.push_back(name + ": "); 398 } else { 399 headers[iter->second].append(", "); 400 } 401 402 std::string::const_iterator value_begin = parsed_[i].value_begin; 403 std::string::const_iterator value_end = parsed_[i].value_end; 404 while (++i < parsed_.size() && parsed_[i].is_continuation()) 405 value_end = parsed_[i].value_end; 406 --i; 407 408 headers[iter->second].append(value_begin, value_end); 409 } 410 411 for (size_t i = 0; i < headers.size(); ++i) { 412 output->push_back('\n'); 413 output->append(headers[i]); 414 } 415 416 output->push_back('\n'); 417 } 418 419 bool HttpResponseHeaders::GetNormalizedHeader(const std::string& name, 420 std::string* value) const { 421 // If you hit this assertion, please use EnumerateHeader instead! 422 DCHECK(!HttpUtil::IsNonCoalescingHeader(name)); 423 424 value->clear(); 425 426 bool found = false; 427 size_t i = 0; 428 while (i < parsed_.size()) { 429 i = FindHeader(i, name); 430 if (i == std::string::npos) 431 break; 432 433 found = true; 434 435 if (!value->empty()) 436 value->append(", "); 437 438 std::string::const_iterator value_begin = parsed_[i].value_begin; 439 std::string::const_iterator value_end = parsed_[i].value_end; 440 while (++i < parsed_.size() && parsed_[i].is_continuation()) 441 value_end = parsed_[i].value_end; 442 value->append(value_begin, value_end); 443 } 444 445 return found; 446 } 447 448 std::string HttpResponseHeaders::GetStatusLine() const { 449 // copy up to the null byte. 450 return std::string(raw_headers_.c_str()); 451 } 452 453 std::string HttpResponseHeaders::GetStatusText() const { 454 // GetStatusLine() is already normalized, so it has the format: 455 // <http_version> SP <response_code> SP <status_text> 456 std::string status_text = GetStatusLine(); 457 std::string::const_iterator begin = status_text.begin(); 458 std::string::const_iterator end = status_text.end(); 459 for (int i = 0; i < 2; ++i) 460 begin = find(begin, end, ' ') + 1; 461 return std::string(begin, end); 462 } 463 464 bool HttpResponseHeaders::EnumerateHeaderLines(void** iter, 465 std::string* name, 466 std::string* value) const { 467 size_t i = reinterpret_cast<size_t>(*iter); 468 if (i == parsed_.size()) 469 return false; 470 471 DCHECK(!parsed_[i].is_continuation()); 472 473 name->assign(parsed_[i].name_begin, parsed_[i].name_end); 474 475 std::string::const_iterator value_begin = parsed_[i].value_begin; 476 std::string::const_iterator value_end = parsed_[i].value_end; 477 while (++i < parsed_.size() && parsed_[i].is_continuation()) 478 value_end = parsed_[i].value_end; 479 480 value->assign(value_begin, value_end); 481 482 *iter = reinterpret_cast<void*>(i); 483 return true; 484 } 485 486 bool HttpResponseHeaders::EnumerateHeader(void** iter, const std::string& name, 487 std::string* value) const { 488 size_t i; 489 if (!iter || !*iter) { 490 i = FindHeader(0, name); 491 } else { 492 i = reinterpret_cast<size_t>(*iter); 493 if (i >= parsed_.size()) { 494 i = std::string::npos; 495 } else if (!parsed_[i].is_continuation()) { 496 i = FindHeader(i, name); 497 } 498 } 499 500 if (i == std::string::npos) { 501 value->clear(); 502 return false; 503 } 504 505 if (iter) 506 *iter = reinterpret_cast<void*>(i + 1); 507 value->assign(parsed_[i].value_begin, parsed_[i].value_end); 508 return true; 509 } 510 511 bool HttpResponseHeaders::HasHeaderValue(const std::string& name, 512 const std::string& value) const { 513 // The value has to be an exact match. This is important since 514 // 'cache-control: no-cache' != 'cache-control: no-cache="foo"' 515 void* iter = NULL; 516 std::string temp; 517 while (EnumerateHeader(&iter, name, &temp)) { 518 if (value.size() == temp.size() && 519 std::equal(temp.begin(), temp.end(), value.begin(), 520 base::CaseInsensitiveCompare<char>())) 521 return true; 522 } 523 return false; 524 } 525 526 bool HttpResponseHeaders::HasHeader(const std::string& name) const { 527 return FindHeader(0, name) != std::string::npos; 528 } 529 530 HttpResponseHeaders::HttpResponseHeaders() : response_code_(-1) { 531 } 532 533 HttpResponseHeaders::~HttpResponseHeaders() { 534 } 535 536 // Note: this implementation implicitly assumes that line_end points at a valid 537 // sentinel character (such as '\0'). 538 // static 539 HttpVersion HttpResponseHeaders::ParseVersion( 540 std::string::const_iterator line_begin, 541 std::string::const_iterator line_end) { 542 std::string::const_iterator p = line_begin; 543 544 // RFC2616 sec 3.1: HTTP-Version = "HTTP" "/" 1*DIGIT "." 1*DIGIT 545 // TODO: (1*DIGIT apparently means one or more digits, but we only handle 1). 546 // TODO: handle leading zeros, which is allowed by the rfc1616 sec 3.1. 547 548 if ((line_end - p < 4) || !LowerCaseEqualsASCII(p, p + 4, "http")) { 549 DVLOG(1) << "missing status line"; 550 return HttpVersion(); 551 } 552 553 p += 4; 554 555 if (p >= line_end || *p != '/') { 556 DVLOG(1) << "missing version"; 557 return HttpVersion(); 558 } 559 560 std::string::const_iterator dot = find(p, line_end, '.'); 561 if (dot == line_end) { 562 DVLOG(1) << "malformed version"; 563 return HttpVersion(); 564 } 565 566 ++p; // from / to first digit. 567 ++dot; // from . to second digit. 568 569 if (!(*p >= '0' && *p <= '9' && *dot >= '0' && *dot <= '9')) { 570 DVLOG(1) << "malformed version number"; 571 return HttpVersion(); 572 } 573 574 uint16 major = *p - '0'; 575 uint16 minor = *dot - '0'; 576 577 return HttpVersion(major, minor); 578 } 579 580 // Note: this implementation implicitly assumes that line_end points at a valid 581 // sentinel character (such as '\0'). 582 void HttpResponseHeaders::ParseStatusLine( 583 std::string::const_iterator line_begin, 584 std::string::const_iterator line_end, 585 bool has_headers) { 586 // Extract the version number 587 parsed_http_version_ = ParseVersion(line_begin, line_end); 588 589 // Clamp the version number to one of: {0.9, 1.0, 1.1} 590 if (parsed_http_version_ == HttpVersion(0, 9) && !has_headers) { 591 http_version_ = HttpVersion(0, 9); 592 raw_headers_ = "HTTP/0.9"; 593 } else if (parsed_http_version_ >= HttpVersion(1, 1)) { 594 http_version_ = HttpVersion(1, 1); 595 raw_headers_ = "HTTP/1.1"; 596 } else { 597 // Treat everything else like HTTP 1.0 598 http_version_ = HttpVersion(1, 0); 599 raw_headers_ = "HTTP/1.0"; 600 } 601 if (parsed_http_version_ != http_version_) { 602 DVLOG(1) << "assuming HTTP/" << http_version_.major_value() << "." 603 << http_version_.minor_value(); 604 } 605 606 // TODO(eroman): this doesn't make sense if ParseVersion failed. 607 std::string::const_iterator p = find(line_begin, line_end, ' '); 608 609 if (p == line_end) { 610 DVLOG(1) << "missing response status; assuming 200 OK"; 611 raw_headers_.append(" 200 OK"); 612 raw_headers_.push_back('\0'); 613 response_code_ = 200; 614 return; 615 } 616 617 // Skip whitespace. 618 while (*p == ' ') 619 ++p; 620 621 std::string::const_iterator code = p; 622 while (*p >= '0' && *p <= '9') 623 ++p; 624 625 if (p == code) { 626 DVLOG(1) << "missing response status number; assuming 200"; 627 raw_headers_.append(" 200 OK"); 628 response_code_ = 200; 629 return; 630 } 631 raw_headers_.push_back(' '); 632 raw_headers_.append(code, p); 633 raw_headers_.push_back(' '); 634 base::StringToInt(code, p, &response_code_); 635 636 // Skip whitespace. 637 while (*p == ' ') 638 ++p; 639 640 // Trim trailing whitespace. 641 while (line_end > p && line_end[-1] == ' ') 642 --line_end; 643 644 if (p == line_end) { 645 DVLOG(1) << "missing response status text; assuming OK"; 646 // Not super critical what we put here. Just use "OK" 647 // even if it isn't descriptive of response_code_. 648 raw_headers_.append("OK"); 649 } else { 650 raw_headers_.append(p, line_end); 651 } 652 653 raw_headers_.push_back('\0'); 654 } 655 656 size_t HttpResponseHeaders::FindHeader(size_t from, 657 const std::string& search) const { 658 for (size_t i = from; i < parsed_.size(); ++i) { 659 if (parsed_[i].is_continuation()) 660 continue; 661 const std::string::const_iterator& name_begin = parsed_[i].name_begin; 662 const std::string::const_iterator& name_end = parsed_[i].name_end; 663 if (static_cast<size_t>(name_end - name_begin) == search.size() && 664 std::equal(name_begin, name_end, search.begin(), 665 base::CaseInsensitiveCompare<char>())) 666 return i; 667 } 668 669 return std::string::npos; 670 } 671 672 void HttpResponseHeaders::AddHeader(std::string::const_iterator name_begin, 673 std::string::const_iterator name_end, 674 std::string::const_iterator values_begin, 675 std::string::const_iterator values_end) { 676 // If the header can be coalesced, then we should split it up. 677 if (values_begin == values_end || 678 HttpUtil::IsNonCoalescingHeader(name_begin, name_end)) { 679 AddToParsed(name_begin, name_end, values_begin, values_end); 680 } else { 681 HttpUtil::ValuesIterator it(values_begin, values_end, ','); 682 while (it.GetNext()) { 683 AddToParsed(name_begin, name_end, it.value_begin(), it.value_end()); 684 // clobber these so that subsequent values are treated as continuations 685 name_begin = name_end = raw_headers_.end(); 686 } 687 } 688 } 689 690 void HttpResponseHeaders::AddToParsed(std::string::const_iterator name_begin, 691 std::string::const_iterator name_end, 692 std::string::const_iterator value_begin, 693 std::string::const_iterator value_end) { 694 ParsedHeader header; 695 header.name_begin = name_begin; 696 header.name_end = name_end; 697 header.value_begin = value_begin; 698 header.value_end = value_end; 699 parsed_.push_back(header); 700 } 701 702 void HttpResponseHeaders::AddNonCacheableHeaders(HeaderSet* result) const { 703 // Add server specified transients. Any 'cache-control: no-cache="foo,bar"' 704 // headers present in the response specify additional headers that we should 705 // not store in the cache. 706 const std::string kCacheControl = "cache-control"; 707 const std::string kPrefix = "no-cache=\""; 708 std::string value; 709 void* iter = NULL; 710 while (EnumerateHeader(&iter, kCacheControl, &value)) { 711 if (value.size() > kPrefix.size() && 712 value.compare(0, kPrefix.size(), kPrefix) == 0) { 713 // if it doesn't end with a quote, then treat as malformed 714 if (value[value.size()-1] != '\"') 715 continue; 716 717 // trim off leading and trailing bits 718 size_t len = value.size() - kPrefix.size() - 1; 719 TrimString(value.substr(kPrefix.size(), len), HTTP_LWS, &value); 720 721 size_t begin_pos = 0; 722 for (;;) { 723 // find the end of this header name 724 size_t comma_pos = value.find(',', begin_pos); 725 if (comma_pos == std::string::npos) 726 comma_pos = value.size(); 727 size_t end = comma_pos; 728 while (end > begin_pos && strchr(HTTP_LWS, value[end - 1])) 729 end--; 730 731 // assuming the header is not emtpy, lowercase and insert into set 732 if (end > begin_pos) { 733 std::string name = value.substr(begin_pos, end - begin_pos); 734 StringToLowerASCII(&name); 735 result->insert(name); 736 } 737 738 // repeat 739 begin_pos = comma_pos + 1; 740 while (begin_pos < value.size() && strchr(HTTP_LWS, value[begin_pos])) 741 begin_pos++; 742 if (begin_pos >= value.size()) 743 break; 744 } 745 } 746 } 747 } 748 749 void HttpResponseHeaders::AddHopByHopHeaders(HeaderSet* result) { 750 for (size_t i = 0; i < arraysize(kHopByHopResponseHeaders); ++i) 751 result->insert(std::string(kHopByHopResponseHeaders[i])); 752 } 753 754 void HttpResponseHeaders::AddCookieHeaders(HeaderSet* result) { 755 for (size_t i = 0; i < arraysize(kCookieResponseHeaders); ++i) 756 result->insert(std::string(kCookieResponseHeaders[i])); 757 } 758 759 void HttpResponseHeaders::AddChallengeHeaders(HeaderSet* result) { 760 for (size_t i = 0; i < arraysize(kChallengeResponseHeaders); ++i) 761 result->insert(std::string(kChallengeResponseHeaders[i])); 762 } 763 764 void HttpResponseHeaders::AddHopContentRangeHeaders(HeaderSet* result) { 765 result->insert("content-range"); 766 } 767 768 void HttpResponseHeaders::GetMimeTypeAndCharset(std::string* mime_type, 769 std::string* charset) const { 770 mime_type->clear(); 771 charset->clear(); 772 773 std::string name = "content-type"; 774 std::string value; 775 776 bool had_charset = false; 777 778 void* iter = NULL; 779 while (EnumerateHeader(&iter, name, &value)) 780 HttpUtil::ParseContentType(value, mime_type, charset, &had_charset); 781 } 782 783 bool HttpResponseHeaders::GetMimeType(std::string* mime_type) const { 784 std::string unused; 785 GetMimeTypeAndCharset(mime_type, &unused); 786 return !mime_type->empty(); 787 } 788 789 bool HttpResponseHeaders::GetCharset(std::string* charset) const { 790 std::string unused; 791 GetMimeTypeAndCharset(&unused, charset); 792 return !charset->empty(); 793 } 794 795 bool HttpResponseHeaders::IsRedirect(std::string* location) const { 796 if (!IsRedirectResponseCode(response_code_)) 797 return false; 798 799 // If we lack a Location header, then we can't treat this as a redirect. 800 // We assume that the first non-empty location value is the target URL that 801 // we want to follow. TODO(darin): Is this consistent with other browsers? 802 size_t i = std::string::npos; 803 do { 804 i = FindHeader(++i, "location"); 805 if (i == std::string::npos) 806 return false; 807 // If the location value is empty, then it doesn't count. 808 } while (parsed_[i].value_begin == parsed_[i].value_end); 809 810 if (location) { 811 // Escape any non-ASCII characters to preserve them. The server should 812 // only be returning ASCII here, but for compat we need to do this. 813 *location = EscapeNonASCII( 814 std::string(parsed_[i].value_begin, parsed_[i].value_end)); 815 } 816 817 return true; 818 } 819 820 // static 821 bool HttpResponseHeaders::IsRedirectResponseCode(int response_code) { 822 // Users probably want to see 300 (multiple choice) pages, so we don't count 823 // them as redirects that need to be followed. 824 return (response_code == 301 || 825 response_code == 302 || 826 response_code == 303 || 827 response_code == 307); 828 } 829 830 // From RFC 2616 section 13.2.4: 831 // 832 // The calculation to determine if a response has expired is quite simple: 833 // 834 // response_is_fresh = (freshness_lifetime > current_age) 835 // 836 // Of course, there are other factors that can force a response to always be 837 // validated or re-fetched. 838 // 839 bool HttpResponseHeaders::RequiresValidation(const Time& request_time, 840 const Time& response_time, 841 const Time& current_time) const { 842 TimeDelta lifetime = 843 GetFreshnessLifetime(response_time); 844 if (lifetime == TimeDelta()) 845 return true; 846 847 return lifetime <= GetCurrentAge(request_time, response_time, current_time); 848 } 849 850 // From RFC 2616 section 13.2.4: 851 // 852 // The max-age directive takes priority over Expires, so if max-age is present 853 // in a response, the calculation is simply: 854 // 855 // freshness_lifetime = max_age_value 856 // 857 // Otherwise, if Expires is present in the response, the calculation is: 858 // 859 // freshness_lifetime = expires_value - date_value 860 // 861 // Note that neither of these calculations is vulnerable to clock skew, since 862 // all of the information comes from the origin server. 863 // 864 // Also, if the response does have a Last-Modified time, the heuristic 865 // expiration value SHOULD be no more than some fraction of the interval since 866 // that time. A typical setting of this fraction might be 10%: 867 // 868 // freshness_lifetime = (date_value - last_modified_value) * 0.10 869 // 870 TimeDelta HttpResponseHeaders::GetFreshnessLifetime( 871 const Time& response_time) const { 872 // Check for headers that force a response to never be fresh. For backwards 873 // compat, we treat "Pragma: no-cache" as a synonym for "Cache-Control: 874 // no-cache" even though RFC 2616 does not specify it. 875 if (HasHeaderValue("cache-control", "no-cache") || 876 HasHeaderValue("cache-control", "no-store") || 877 HasHeaderValue("pragma", "no-cache") || 878 HasHeaderValue("vary", "*")) // see RFC 2616 section 13.6 879 return TimeDelta(); // not fresh 880 881 // NOTE: "Cache-Control: max-age" overrides Expires, so we only check the 882 // Expires header after checking for max-age in GetFreshnessLifetime. This 883 // is important since "Expires: <date in the past>" means not fresh, but 884 // it should not trump a max-age value. 885 886 TimeDelta max_age_value; 887 if (GetMaxAgeValue(&max_age_value)) 888 return max_age_value; 889 890 // If there is no Date header, then assume that the server response was 891 // generated at the time when we received the response. 892 Time date_value; 893 if (!GetDateValue(&date_value)) 894 date_value = response_time; 895 896 Time expires_value; 897 if (GetExpiresValue(&expires_value)) { 898 // The expires value can be a date in the past! 899 if (expires_value > date_value) 900 return expires_value - date_value; 901 902 return TimeDelta(); // not fresh 903 } 904 905 // From RFC 2616 section 13.4: 906 // 907 // A response received with a status code of 200, 203, 206, 300, 301 or 410 908 // MAY be stored by a cache and used in reply to a subsequent request, 909 // subject to the expiration mechanism, unless a cache-control directive 910 // prohibits caching. 911 // ... 912 // A response received with any other status code (e.g. status codes 302 913 // and 307) MUST NOT be returned in a reply to a subsequent request unless 914 // there are cache-control directives or another header(s) that explicitly 915 // allow it. 916 // 917 // From RFC 2616 section 14.9.4: 918 // 919 // When the must-revalidate directive is present in a response received by 920 // a cache, that cache MUST NOT use the entry after it becomes stale to 921 // respond to a subsequent request without first revalidating it with the 922 // origin server. (I.e., the cache MUST do an end-to-end revalidation every 923 // time, if, based solely on the origin server's Expires or max-age value, 924 // the cached response is stale.) 925 // 926 if ((response_code_ == 200 || response_code_ == 203 || 927 response_code_ == 206) && 928 !HasHeaderValue("cache-control", "must-revalidate")) { 929 // TODO(darin): Implement a smarter heuristic. 930 Time last_modified_value; 931 if (GetLastModifiedValue(&last_modified_value)) { 932 // The last-modified value can be a date in the past! 933 if (last_modified_value <= date_value) 934 return (date_value - last_modified_value) / 10; 935 } 936 } 937 938 // These responses are implicitly fresh (unless otherwise overruled): 939 if (response_code_ == 300 || response_code_ == 301 || response_code_ == 410) 940 return TimeDelta::FromMicroseconds(kint64max); 941 942 return TimeDelta(); // not fresh 943 } 944 945 // From RFC 2616 section 13.2.3: 946 // 947 // Summary of age calculation algorithm, when a cache receives a response: 948 // 949 // /* 950 // * age_value 951 // * is the value of Age: header received by the cache with 952 // * this response. 953 // * date_value 954 // * is the value of the origin server's Date: header 955 // * request_time 956 // * is the (local) time when the cache made the request 957 // * that resulted in this cached response 958 // * response_time 959 // * is the (local) time when the cache received the 960 // * response 961 // * now 962 // * is the current (local) time 963 // */ 964 // apparent_age = max(0, response_time - date_value); 965 // corrected_received_age = max(apparent_age, age_value); 966 // response_delay = response_time - request_time; 967 // corrected_initial_age = corrected_received_age + response_delay; 968 // resident_time = now - response_time; 969 // current_age = corrected_initial_age + resident_time; 970 // 971 TimeDelta HttpResponseHeaders::GetCurrentAge(const Time& request_time, 972 const Time& response_time, 973 const Time& current_time) const { 974 // If there is no Date header, then assume that the server response was 975 // generated at the time when we received the response. 976 Time date_value; 977 if (!GetDateValue(&date_value)) 978 date_value = response_time; 979 980 // If there is no Age header, then assume age is zero. GetAgeValue does not 981 // modify its out param if the value does not exist. 982 TimeDelta age_value; 983 GetAgeValue(&age_value); 984 985 TimeDelta apparent_age = std::max(TimeDelta(), response_time - date_value); 986 TimeDelta corrected_received_age = std::max(apparent_age, age_value); 987 TimeDelta response_delay = response_time - request_time; 988 TimeDelta corrected_initial_age = corrected_received_age + response_delay; 989 TimeDelta resident_time = current_time - response_time; 990 TimeDelta current_age = corrected_initial_age + resident_time; 991 992 return current_age; 993 } 994 995 bool HttpResponseHeaders::GetMaxAgeValue(TimeDelta* result) const { 996 std::string name = "cache-control"; 997 std::string value; 998 999 const char kMaxAgePrefix[] = "max-age="; 1000 const size_t kMaxAgePrefixLen = arraysize(kMaxAgePrefix) - 1; 1001 1002 void* iter = NULL; 1003 while (EnumerateHeader(&iter, name, &value)) { 1004 if (value.size() > kMaxAgePrefixLen) { 1005 if (LowerCaseEqualsASCII(value.begin(), 1006 value.begin() + kMaxAgePrefixLen, 1007 kMaxAgePrefix)) { 1008 int64 seconds; 1009 base::StringToInt64(value.begin() + kMaxAgePrefixLen, 1010 value.end(), 1011 &seconds); 1012 *result = TimeDelta::FromSeconds(seconds); 1013 return true; 1014 } 1015 } 1016 } 1017 1018 return false; 1019 } 1020 1021 bool HttpResponseHeaders::GetAgeValue(TimeDelta* result) const { 1022 std::string value; 1023 if (!EnumerateHeader(NULL, "Age", &value)) 1024 return false; 1025 1026 int64 seconds; 1027 base::StringToInt64(value, &seconds); 1028 *result = TimeDelta::FromSeconds(seconds); 1029 return true; 1030 } 1031 1032 bool HttpResponseHeaders::GetDateValue(Time* result) const { 1033 return GetTimeValuedHeader("Date", result); 1034 } 1035 1036 bool HttpResponseHeaders::GetLastModifiedValue(Time* result) const { 1037 return GetTimeValuedHeader("Last-Modified", result); 1038 } 1039 1040 bool HttpResponseHeaders::GetExpiresValue(Time* result) const { 1041 return GetTimeValuedHeader("Expires", result); 1042 } 1043 1044 bool HttpResponseHeaders::GetTimeValuedHeader(const std::string& name, 1045 Time* result) const { 1046 std::string value; 1047 if (!EnumerateHeader(NULL, name, &value)) 1048 return false; 1049 1050 std::wstring value_wide(value.begin(), value.end()); // inflate ascii 1051 return Time::FromString(value_wide.c_str(), result); 1052 } 1053 1054 bool HttpResponseHeaders::IsKeepAlive() const { 1055 if (http_version_ < HttpVersion(1, 0)) 1056 return false; 1057 1058 // NOTE: It is perhaps risky to assume that a Proxy-Connection header is 1059 // meaningful when we don't know that this response was from a proxy, but 1060 // Mozilla also does this, so we'll do the same. 1061 std::string connection_val; 1062 if (!EnumerateHeader(NULL, "connection", &connection_val)) 1063 EnumerateHeader(NULL, "proxy-connection", &connection_val); 1064 1065 bool keep_alive; 1066 1067 if (http_version_ == HttpVersion(1, 0)) { 1068 // HTTP/1.0 responses default to NOT keep-alive 1069 keep_alive = LowerCaseEqualsASCII(connection_val, "keep-alive"); 1070 } else { 1071 // HTTP/1.1 responses default to keep-alive 1072 keep_alive = !LowerCaseEqualsASCII(connection_val, "close"); 1073 } 1074 1075 return keep_alive; 1076 } 1077 1078 bool HttpResponseHeaders::HasStrongValidators() const { 1079 std::string etag_value; 1080 EnumerateHeader(NULL, "etag", &etag_value); 1081 if (!etag_value.empty()) { 1082 size_t slash = etag_value.find('/'); 1083 if (slash == std::string::npos || slash == 0) 1084 return true; 1085 1086 std::string::const_iterator i = etag_value.begin(); 1087 std::string::const_iterator j = etag_value.begin() + slash; 1088 HttpUtil::TrimLWS(&i, &j); 1089 if (!LowerCaseEqualsASCII(i, j, "w")) 1090 return true; 1091 } 1092 1093 Time last_modified; 1094 if (!GetLastModifiedValue(&last_modified)) 1095 return false; 1096 1097 Time date; 1098 if (!GetDateValue(&date)) 1099 return false; 1100 1101 return ((date - last_modified).InSeconds() >= 60); 1102 } 1103 1104 // From RFC 2616: 1105 // Content-Length = "Content-Length" ":" 1*DIGIT 1106 int64 HttpResponseHeaders::GetContentLength() const { 1107 void* iter = NULL; 1108 std::string content_length_val; 1109 if (!EnumerateHeader(&iter, "content-length", &content_length_val)) 1110 return -1; 1111 1112 if (content_length_val.empty()) 1113 return -1; 1114 1115 if (content_length_val[0] == '+') 1116 return -1; 1117 1118 int64 result; 1119 bool ok = base::StringToInt64(content_length_val, &result); 1120 if (!ok || result < 0) 1121 return -1; 1122 1123 return result; 1124 } 1125 1126 // From RFC 2616 14.16: 1127 // content-range-spec = 1128 // bytes-unit SP byte-range-resp-spec "/" ( instance-length | "*" ) 1129 // byte-range-resp-spec = (first-byte-pos "-" last-byte-pos) | "*" 1130 // instance-length = 1*DIGIT 1131 // bytes-unit = "bytes" 1132 bool HttpResponseHeaders::GetContentRange(int64* first_byte_position, 1133 int64* last_byte_position, 1134 int64* instance_length) const { 1135 void* iter = NULL; 1136 std::string content_range_spec; 1137 *first_byte_position = *last_byte_position = *instance_length = -1; 1138 if (!EnumerateHeader(&iter, "content-range", &content_range_spec)) 1139 return false; 1140 1141 // If the header value is empty, we have an invalid header. 1142 if (content_range_spec.empty()) 1143 return false; 1144 1145 size_t space_position = content_range_spec.find(' '); 1146 if (space_position == std::string::npos) 1147 return false; 1148 1149 // Invalid header if it doesn't contain "bytes-unit". 1150 std::string::const_iterator content_range_spec_begin = 1151 content_range_spec.begin(); 1152 std::string::const_iterator content_range_spec_end = 1153 content_range_spec.begin() + space_position; 1154 HttpUtil::TrimLWS(&content_range_spec_begin, &content_range_spec_end); 1155 if (!LowerCaseEqualsASCII(content_range_spec_begin, 1156 content_range_spec_end, 1157 "bytes")) { 1158 return false; 1159 } 1160 1161 size_t slash_position = content_range_spec.find('/', space_position + 1); 1162 if (slash_position == std::string::npos) 1163 return false; 1164 1165 // Obtain the part behind the space and before slash. 1166 std::string::const_iterator byte_range_resp_spec_begin = 1167 content_range_spec.begin() + space_position + 1; 1168 std::string::const_iterator byte_range_resp_spec_end = 1169 content_range_spec.begin() + slash_position; 1170 HttpUtil::TrimLWS(&byte_range_resp_spec_begin, &byte_range_resp_spec_end); 1171 1172 // Parse the byte-range-resp-spec part. 1173 std::string byte_range_resp_spec(byte_range_resp_spec_begin, 1174 byte_range_resp_spec_end); 1175 // If byte-range-resp-spec != "*". 1176 if (!LowerCaseEqualsASCII(byte_range_resp_spec, "*")) { 1177 size_t minus_position = byte_range_resp_spec.find('-'); 1178 if (minus_position != std::string::npos) { 1179 // Obtain first-byte-pos. 1180 std::string::const_iterator first_byte_pos_begin = 1181 byte_range_resp_spec.begin(); 1182 std::string::const_iterator first_byte_pos_end = 1183 byte_range_resp_spec.begin() + minus_position; 1184 HttpUtil::TrimLWS(&first_byte_pos_begin, &first_byte_pos_end); 1185 1186 bool ok = base::StringToInt64(first_byte_pos_begin, 1187 first_byte_pos_end, 1188 first_byte_position); 1189 1190 // Obtain last-byte-pos. 1191 std::string::const_iterator last_byte_pos_begin = 1192 byte_range_resp_spec.begin() + minus_position + 1; 1193 std::string::const_iterator last_byte_pos_end = 1194 byte_range_resp_spec.end(); 1195 HttpUtil::TrimLWS(&last_byte_pos_begin, &last_byte_pos_end); 1196 1197 ok &= base::StringToInt64(last_byte_pos_begin, 1198 last_byte_pos_end, 1199 last_byte_position); 1200 if (!ok) { 1201 *first_byte_position = *last_byte_position = -1; 1202 return false; 1203 } 1204 if (*first_byte_position < 0 || *last_byte_position < 0 || 1205 *first_byte_position > *last_byte_position) 1206 return false; 1207 } else { 1208 return false; 1209 } 1210 } 1211 1212 // Parse the instance-length part. 1213 // If instance-length == "*". 1214 std::string::const_iterator instance_length_begin = 1215 content_range_spec.begin() + slash_position + 1; 1216 std::string::const_iterator instance_length_end = 1217 content_range_spec.end(); 1218 HttpUtil::TrimLWS(&instance_length_begin, &instance_length_end); 1219 1220 if (LowerCaseEqualsASCII(instance_length_begin, instance_length_end, "*")) { 1221 return false; 1222 } else if (!base::StringToInt64(instance_length_begin, 1223 instance_length_end, 1224 instance_length)) { 1225 *instance_length = -1; 1226 return false; 1227 } 1228 1229 // We have all the values; let's verify that they make sense for a 206 1230 // response. 1231 if (*first_byte_position < 0 || *last_byte_position < 0 || 1232 *instance_length < 0 || *instance_length - 1 < *last_byte_position) 1233 return false; 1234 1235 return true; 1236 } 1237 1238 } // namespace net 1239