1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "webkit/glue/multipart_response_delegate.h" 6 7 #include "base/logging.h" 8 #include "base/string_number_conversions.h" 9 #include "base/string_util.h" 10 #include "net/base/net_util.h" 11 #include "net/http/http_util.h" 12 #include "third_party/WebKit/Source/WebKit/chromium/public/WebHTTPHeaderVisitor.h" 13 #include "third_party/WebKit/Source/WebKit/chromium/public/WebString.h" 14 #include "third_party/WebKit/Source/WebKit/chromium/public/WebURL.h" 15 #include "third_party/WebKit/Source/WebKit/chromium/public/WebURLLoaderClient.h" 16 17 using WebKit::WebHTTPHeaderVisitor; 18 using WebKit::WebString; 19 using WebKit::WebURLLoader; 20 using WebKit::WebURLLoaderClient; 21 using WebKit::WebURLResponse; 22 23 namespace webkit_glue { 24 25 namespace { 26 27 // The list of response headers that we do not copy from the original 28 // response when generating a WebURLResponse for a MIME payload. 29 const char* kReplaceHeaders[] = { 30 "content-type", 31 "content-length", 32 "content-disposition", 33 "content-range", 34 "range", 35 "set-cookie" 36 }; 37 38 class HeaderCopier : public WebHTTPHeaderVisitor { 39 public: 40 HeaderCopier(WebURLResponse* response) 41 : response_(response) { 42 } 43 virtual void visitHeader(const WebString& name, const WebString& value) { 44 const std::string& name_utf8 = name.utf8(); 45 for (size_t i = 0; i < arraysize(kReplaceHeaders); ++i) { 46 if (LowerCaseEqualsASCII(name_utf8, kReplaceHeaders[i])) 47 return; 48 } 49 response_->setHTTPHeaderField(name, value); 50 } 51 private: 52 WebURLResponse* response_; 53 }; 54 55 } // namespace 56 57 MultipartResponseDelegate::MultipartResponseDelegate( 58 WebURLLoaderClient* client, 59 WebURLLoader* loader, 60 const WebURLResponse& response, 61 const std::string& boundary) 62 : client_(client), 63 loader_(loader), 64 original_response_(response), 65 encoded_data_length_(0), 66 boundary_("--"), 67 first_received_data_(true), 68 processing_headers_(false), 69 stop_sending_(false), 70 has_sent_first_response_(false) { 71 // Some servers report a boundary prefixed with "--". See bug 5786. 72 if (StartsWithASCII(boundary, "--", true)) { 73 boundary_.assign(boundary); 74 } else { 75 boundary_.append(boundary); 76 } 77 } 78 79 void MultipartResponseDelegate::OnReceivedData(const char* data, 80 int data_len, 81 int encoded_data_length) { 82 // stop_sending_ means that we've already received the final boundary token. 83 // The server should stop sending us data at this point, but if it does, we 84 // just throw it away. 85 if (stop_sending_) 86 return; 87 88 data_.append(data, data_len); 89 encoded_data_length_ += encoded_data_length; 90 if (first_received_data_) { 91 // Some servers don't send a boundary token before the first chunk of 92 // data. We handle this case anyway (Gecko does too). 93 first_received_data_ = false; 94 95 // Eat leading \r\n 96 int pos = PushOverLine(data_, 0); 97 if (pos) 98 data_ = data_.substr(pos); 99 100 if (data_.length() < boundary_.length() + 2) { 101 // We don't have enough data yet to make a boundary token. Just wait 102 // until the next chunk of data arrives. 103 first_received_data_ = true; 104 return; 105 } 106 107 if (0 != data_.compare(0, boundary_.length(), boundary_)) { 108 data_ = boundary_ + "\n" + data_; 109 } 110 } 111 DCHECK(!first_received_data_); 112 113 // Headers 114 if (processing_headers_) { 115 // Eat leading \r\n 116 int pos = PushOverLine(data_, 0); 117 if (pos) 118 data_ = data_.substr(pos); 119 120 if (ParseHeaders()) { 121 // Successfully parsed headers. 122 processing_headers_ = false; 123 } else { 124 // Get more data before trying again. 125 return; 126 } 127 } 128 DCHECK(!processing_headers_); 129 130 size_t boundary_pos; 131 while ((boundary_pos = FindBoundary()) != std::string::npos) { 132 if (client_) { 133 // Strip out trailing \n\r characters in the buffer preceding the 134 // boundary on the same lines as Firefox. 135 size_t data_length = boundary_pos; 136 if (boundary_pos > 0 && data_[boundary_pos - 1] == '\n') { 137 data_length--; 138 if (boundary_pos > 1 && data_[boundary_pos - 2] == '\r') { 139 data_length--; 140 } 141 } 142 if (data_length > 0) { 143 // Send the last data chunk. 144 client_->didReceiveData(loader_, 145 data_.data(), 146 static_cast<int>(data_length), 147 encoded_data_length_); 148 encoded_data_length_ = 0; 149 } 150 } 151 size_t boundary_end_pos = boundary_pos + boundary_.length(); 152 if (boundary_end_pos < data_.length() && '-' == data_[boundary_end_pos]) { 153 // This was the last boundary so we can stop processing. 154 stop_sending_ = true; 155 data_.clear(); 156 return; 157 } 158 159 // We can now throw out data up through the boundary 160 int offset = PushOverLine(data_, boundary_end_pos); 161 data_ = data_.substr(boundary_end_pos + offset); 162 163 // Ok, back to parsing headers 164 if (!ParseHeaders()) { 165 processing_headers_ = true; 166 break; 167 } 168 } 169 170 // At this point, we should send over any data we have, but keep enough data 171 // buffered to handle a boundary that may have been truncated. 172 if (!processing_headers_ && data_.length() > boundary_.length()) { 173 // If the last character is a new line character, go ahead and just send 174 // everything we have buffered. This matches an optimization in Gecko. 175 int send_length = data_.length() - boundary_.length(); 176 if (data_[data_.length() - 1] == '\n') 177 send_length = data_.length(); 178 if (client_) 179 client_->didReceiveData(loader_, 180 data_.data(), 181 send_length, 182 encoded_data_length_); 183 data_ = data_.substr(send_length); 184 encoded_data_length_ = 0; 185 } 186 } 187 188 void MultipartResponseDelegate::OnCompletedRequest() { 189 // If we have any pending data and we're not in a header, go ahead and send 190 // it to WebCore. 191 if (!processing_headers_ && !data_.empty() && !stop_sending_ && client_) { 192 client_->didReceiveData(loader_, 193 data_.data(), 194 static_cast<int>(data_.length()), 195 encoded_data_length_); 196 encoded_data_length_ = 0; 197 } 198 } 199 200 int MultipartResponseDelegate::PushOverLine(const std::string& data, 201 size_t pos) { 202 int offset = 0; 203 if (pos < data.length() && (data[pos] == '\r' || data[pos] == '\n')) { 204 ++offset; 205 if (pos + 1 < data.length() && data[pos + 1] == '\n') 206 ++offset; 207 } 208 return offset; 209 } 210 211 bool MultipartResponseDelegate::ParseHeaders() { 212 int line_feed_increment = 1; 213 214 // Grab the headers being liberal about line endings. 215 size_t line_start_pos = 0; 216 size_t line_end_pos = data_.find('\n'); 217 while (line_end_pos != std::string::npos) { 218 // Handle CRLF 219 if (line_end_pos > line_start_pos && data_[line_end_pos - 1] == '\r') { 220 line_feed_increment = 2; 221 --line_end_pos; 222 } else { 223 line_feed_increment = 1; 224 } 225 if (line_start_pos == line_end_pos) { 226 // A blank line, end of headers 227 line_end_pos += line_feed_increment; 228 break; 229 } 230 // Find the next header line. 231 line_start_pos = line_end_pos + line_feed_increment; 232 line_end_pos = data_.find('\n', line_start_pos); 233 } 234 // Truncated in the middle of a header, stop parsing. 235 if (line_end_pos == std::string::npos) 236 return false; 237 238 // Eat headers 239 std::string headers("\n"); 240 headers.append(data_, 0, line_end_pos); 241 data_ = data_.substr(line_end_pos); 242 243 // Create a WebURLResponse based on the original set of headers + the 244 // replacement headers. We only replace the same few headers that gecko 245 // does. See netwerk/streamconv/converters/nsMultiMixedConv.cpp. 246 std::string content_type = net::GetSpecificHeader(headers, "content-type"); 247 std::string mime_type; 248 std::string charset; 249 bool has_charset = false; 250 net::HttpUtil::ParseContentType(content_type, &mime_type, &charset, 251 &has_charset); 252 WebURLResponse response(original_response_.url()); 253 response.setMIMEType(WebString::fromUTF8(mime_type)); 254 response.setTextEncodingName(WebString::fromUTF8(charset)); 255 256 HeaderCopier copier(&response); 257 original_response_.visitHTTPHeaderFields(&copier); 258 259 for (size_t i = 0; i < arraysize(kReplaceHeaders); ++i) { 260 std::string name(kReplaceHeaders[i]); 261 std::string value = net::GetSpecificHeader(headers, name); 262 if (!value.empty()) { 263 response.setHTTPHeaderField(WebString::fromUTF8(name), 264 WebString::fromUTF8(value)); 265 } 266 } 267 // To avoid recording every multipart load as a separate visit in 268 // the history database, we want to keep track of whether the response 269 // is part of a multipart payload. We do want to record the first visit, 270 // so we only set isMultipartPayload to true after the first visit. 271 response.setIsMultipartPayload(has_sent_first_response_); 272 has_sent_first_response_ = true; 273 // Send the response! 274 if (client_) 275 client_->didReceiveResponse(loader_, response); 276 277 return true; 278 } 279 280 // Boundaries are supposed to be preceeded with --, but it looks like gecko 281 // doesn't require the dashes to exist. See nsMultiMixedConv::FindToken. 282 size_t MultipartResponseDelegate::FindBoundary() { 283 size_t boundary_pos = data_.find(boundary_); 284 if (boundary_pos != std::string::npos) { 285 // Back up over -- for backwards compat 286 // TODO(tc): Don't we only want to do this once? Gecko code doesn't seem 287 // to care. 288 if (boundary_pos >= 2) { 289 if ('-' == data_[boundary_pos - 1] && '-' == data_[boundary_pos - 2]) { 290 boundary_pos -= 2; 291 boundary_ = "--" + boundary_; 292 } 293 } 294 } 295 return boundary_pos; 296 } 297 298 bool MultipartResponseDelegate::ReadMultipartBoundary( 299 const WebURLResponse& response, 300 std::string* multipart_boundary) { 301 std::string content_type = 302 response.httpHeaderField(WebString::fromUTF8("Content-Type")).utf8(); 303 304 size_t boundary_start_offset = content_type.find("boundary="); 305 if (boundary_start_offset == std::string::npos) 306 return false; 307 308 boundary_start_offset += strlen("boundary="); 309 310 size_t boundary_end_offset = content_type.find(';', boundary_start_offset); 311 312 if (boundary_end_offset == std::string::npos) 313 boundary_end_offset = content_type.length(); 314 315 size_t boundary_length = boundary_end_offset - boundary_start_offset; 316 317 *multipart_boundary = 318 content_type.substr(boundary_start_offset, boundary_length); 319 // The byte range response can have quoted boundary strings. This is legal 320 // as per MIME specifications. Individual data fragements however don't 321 // contain quoted boundary strings. 322 TrimString(*multipart_boundary, "\"", multipart_boundary); 323 return true; 324 } 325 326 bool MultipartResponseDelegate::ReadContentRanges( 327 const WebURLResponse& response, 328 int* content_range_lower_bound, 329 int* content_range_upper_bound, 330 int* content_range_instance_size) { 331 332 std::string content_range = response.httpHeaderField("Content-Range").utf8(); 333 if (content_range.empty()) { 334 content_range = response.httpHeaderField("Range").utf8(); 335 } 336 337 if (content_range.empty()) { 338 DLOG(WARNING) << "Failed to read content range from response."; 339 return false; 340 } 341 342 size_t byte_range_lower_bound_start_offset = content_range.find(" "); 343 if (byte_range_lower_bound_start_offset == std::string::npos) { 344 return false; 345 } 346 347 // Skip over the initial space. 348 byte_range_lower_bound_start_offset++; 349 350 // Find the lower bound. 351 size_t byte_range_lower_bound_end_offset = 352 content_range.find("-", byte_range_lower_bound_start_offset); 353 if (byte_range_lower_bound_end_offset == std::string::npos) { 354 return false; 355 } 356 357 size_t byte_range_lower_bound_characters = 358 byte_range_lower_bound_end_offset - byte_range_lower_bound_start_offset; 359 std::string byte_range_lower_bound = 360 content_range.substr(byte_range_lower_bound_start_offset, 361 byte_range_lower_bound_characters); 362 363 // Find the upper bound. 364 size_t byte_range_upper_bound_start_offset = 365 byte_range_lower_bound_end_offset + 1; 366 367 size_t byte_range_upper_bound_end_offset = 368 content_range.find("/", byte_range_upper_bound_start_offset); 369 if (byte_range_upper_bound_end_offset == std::string::npos) { 370 return false; 371 } 372 373 size_t byte_range_upper_bound_characters = 374 byte_range_upper_bound_end_offset - byte_range_upper_bound_start_offset; 375 std::string byte_range_upper_bound = 376 content_range.substr(byte_range_upper_bound_start_offset, 377 byte_range_upper_bound_characters); 378 379 // Find the instance size. 380 size_t byte_range_instance_size_start_offset = 381 byte_range_upper_bound_end_offset + 1; 382 383 size_t byte_range_instance_size_end_offset = 384 content_range.length(); 385 386 size_t byte_range_instance_size_characters = 387 byte_range_instance_size_end_offset - 388 byte_range_instance_size_start_offset; 389 std::string byte_range_instance_size = 390 content_range.substr(byte_range_instance_size_start_offset, 391 byte_range_instance_size_characters); 392 393 if (!base::StringToInt(byte_range_lower_bound, content_range_lower_bound)) 394 return false; 395 if (!base::StringToInt(byte_range_upper_bound, content_range_upper_bound)) 396 return false; 397 if (!base::StringToInt(byte_range_instance_size, content_range_instance_size)) 398 return false; 399 return true; 400 } 401 402 } // namespace webkit_glue 403