Home | History | Annotate | Download | only in glue
      1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "webkit/glue/multipart_response_delegate.h"
      6 
      7 #include "base/logging.h"
      8 #include "base/string_number_conversions.h"
      9 #include "base/string_util.h"
     10 #include "net/base/net_util.h"
     11 #include "net/http/http_util.h"
     12 #include "third_party/WebKit/Source/WebKit/chromium/public/WebHTTPHeaderVisitor.h"
     13 #include "third_party/WebKit/Source/WebKit/chromium/public/WebString.h"
     14 #include "third_party/WebKit/Source/WebKit/chromium/public/WebURL.h"
     15 #include "third_party/WebKit/Source/WebKit/chromium/public/WebURLLoaderClient.h"
     16 
     17 using WebKit::WebHTTPHeaderVisitor;
     18 using WebKit::WebString;
     19 using WebKit::WebURLLoader;
     20 using WebKit::WebURLLoaderClient;
     21 using WebKit::WebURLResponse;
     22 
     23 namespace webkit_glue {
     24 
     25 namespace {
     26 
     27 // The list of response headers that we do not copy from the original
     28 // response when generating a WebURLResponse for a MIME payload.
     29 const char* kReplaceHeaders[] = {
     30   "content-type",
     31   "content-length",
     32   "content-disposition",
     33   "content-range",
     34   "range",
     35   "set-cookie"
     36 };
     37 
     38 class HeaderCopier : public WebHTTPHeaderVisitor {
     39  public:
     40   HeaderCopier(WebURLResponse* response)
     41       : response_(response) {
     42   }
     43   virtual void visitHeader(const WebString& name, const WebString& value) {
     44     const std::string& name_utf8 = name.utf8();
     45     for (size_t i = 0; i < arraysize(kReplaceHeaders); ++i) {
     46       if (LowerCaseEqualsASCII(name_utf8, kReplaceHeaders[i]))
     47         return;
     48     }
     49     response_->setHTTPHeaderField(name, value);
     50   }
     51  private:
     52   WebURLResponse* response_;
     53 };
     54 
     55 }  // namespace
     56 
     57 MultipartResponseDelegate::MultipartResponseDelegate(
     58     WebURLLoaderClient* client,
     59     WebURLLoader* loader,
     60     const WebURLResponse& response,
     61     const std::string& boundary)
     62     : client_(client),
     63       loader_(loader),
     64       original_response_(response),
     65       encoded_data_length_(0),
     66       boundary_("--"),
     67       first_received_data_(true),
     68       processing_headers_(false),
     69       stop_sending_(false),
     70       has_sent_first_response_(false) {
     71   // Some servers report a boundary prefixed with "--".  See bug 5786.
     72   if (StartsWithASCII(boundary, "--", true)) {
     73     boundary_.assign(boundary);
     74   } else {
     75     boundary_.append(boundary);
     76   }
     77 }
     78 
     79 void MultipartResponseDelegate::OnReceivedData(const char* data,
     80                                                int data_len,
     81                                                int encoded_data_length) {
     82   // stop_sending_ means that we've already received the final boundary token.
     83   // The server should stop sending us data at this point, but if it does, we
     84   // just throw it away.
     85   if (stop_sending_)
     86     return;
     87 
     88   data_.append(data, data_len);
     89   encoded_data_length_ += encoded_data_length;
     90   if (first_received_data_) {
     91     // Some servers don't send a boundary token before the first chunk of
     92     // data.  We handle this case anyway (Gecko does too).
     93     first_received_data_ = false;
     94 
     95     // Eat leading \r\n
     96     int pos = PushOverLine(data_, 0);
     97     if (pos)
     98       data_ = data_.substr(pos);
     99 
    100     if (data_.length() < boundary_.length() + 2) {
    101       // We don't have enough data yet to make a boundary token.  Just wait
    102       // until the next chunk of data arrives.
    103       first_received_data_ = true;
    104       return;
    105     }
    106 
    107     if (0 != data_.compare(0, boundary_.length(), boundary_)) {
    108       data_ = boundary_ + "\n" + data_;
    109     }
    110   }
    111   DCHECK(!first_received_data_);
    112 
    113   // Headers
    114   if (processing_headers_) {
    115     // Eat leading \r\n
    116     int pos = PushOverLine(data_, 0);
    117     if (pos)
    118       data_ = data_.substr(pos);
    119 
    120     if (ParseHeaders()) {
    121       // Successfully parsed headers.
    122       processing_headers_ = false;
    123     } else {
    124       // Get more data before trying again.
    125       return;
    126     }
    127   }
    128   DCHECK(!processing_headers_);
    129 
    130   size_t boundary_pos;
    131   while ((boundary_pos = FindBoundary()) != std::string::npos) {
    132     if (client_) {
    133       // Strip out trailing \n\r characters in the buffer preceding the
    134       // boundary on the same lines as Firefox.
    135       size_t data_length = boundary_pos;
    136       if (boundary_pos > 0 && data_[boundary_pos - 1] == '\n') {
    137         data_length--;
    138         if (boundary_pos > 1 && data_[boundary_pos - 2] == '\r') {
    139           data_length--;
    140         }
    141       }
    142       if (data_length > 0) {
    143         // Send the last data chunk.
    144         client_->didReceiveData(loader_,
    145                                 data_.data(),
    146                                 static_cast<int>(data_length),
    147                                 encoded_data_length_);
    148         encoded_data_length_ = 0;
    149       }
    150     }
    151     size_t boundary_end_pos = boundary_pos + boundary_.length();
    152     if (boundary_end_pos < data_.length() && '-' == data_[boundary_end_pos]) {
    153       // This was the last boundary so we can stop processing.
    154       stop_sending_ = true;
    155       data_.clear();
    156       return;
    157     }
    158 
    159     // We can now throw out data up through the boundary
    160     int offset = PushOverLine(data_, boundary_end_pos);
    161     data_ = data_.substr(boundary_end_pos + offset);
    162 
    163     // Ok, back to parsing headers
    164     if (!ParseHeaders()) {
    165       processing_headers_ = true;
    166       break;
    167     }
    168   }
    169 
    170   // At this point, we should send over any data we have, but keep enough data
    171   // buffered to handle a boundary that may have been truncated.
    172   if (!processing_headers_ && data_.length() > boundary_.length()) {
    173     // If the last character is a new line character, go ahead and just send
    174     // everything we have buffered.  This matches an optimization in Gecko.
    175     int send_length = data_.length() - boundary_.length();
    176     if (data_[data_.length() - 1] == '\n')
    177       send_length = data_.length();
    178     if (client_)
    179       client_->didReceiveData(loader_,
    180                               data_.data(),
    181                               send_length,
    182                               encoded_data_length_);
    183     data_ = data_.substr(send_length);
    184     encoded_data_length_ = 0;
    185   }
    186 }
    187 
    188 void MultipartResponseDelegate::OnCompletedRequest() {
    189   // If we have any pending data and we're not in a header, go ahead and send
    190   // it to WebCore.
    191   if (!processing_headers_ && !data_.empty() && !stop_sending_ && client_) {
    192     client_->didReceiveData(loader_,
    193                             data_.data(),
    194                             static_cast<int>(data_.length()),
    195                             encoded_data_length_);
    196     encoded_data_length_ = 0;
    197   }
    198 }
    199 
    200 int MultipartResponseDelegate::PushOverLine(const std::string& data,
    201                                             size_t pos) {
    202   int offset = 0;
    203   if (pos < data.length() && (data[pos] == '\r' || data[pos] == '\n')) {
    204     ++offset;
    205     if (pos + 1 < data.length() && data[pos + 1] == '\n')
    206       ++offset;
    207   }
    208   return offset;
    209 }
    210 
    211 bool MultipartResponseDelegate::ParseHeaders() {
    212   int line_feed_increment = 1;
    213 
    214   // Grab the headers being liberal about line endings.
    215   size_t line_start_pos = 0;
    216   size_t line_end_pos = data_.find('\n');
    217   while (line_end_pos != std::string::npos) {
    218     // Handle CRLF
    219     if (line_end_pos > line_start_pos && data_[line_end_pos - 1] == '\r') {
    220       line_feed_increment = 2;
    221       --line_end_pos;
    222     } else {
    223       line_feed_increment = 1;
    224     }
    225     if (line_start_pos == line_end_pos) {
    226       // A blank line, end of headers
    227       line_end_pos += line_feed_increment;
    228       break;
    229     }
    230     // Find the next header line.
    231     line_start_pos = line_end_pos + line_feed_increment;
    232     line_end_pos = data_.find('\n', line_start_pos);
    233   }
    234   // Truncated in the middle of a header, stop parsing.
    235   if (line_end_pos == std::string::npos)
    236     return false;
    237 
    238   // Eat headers
    239   std::string headers("\n");
    240   headers.append(data_, 0, line_end_pos);
    241   data_ = data_.substr(line_end_pos);
    242 
    243   // Create a WebURLResponse based on the original set of headers + the
    244   // replacement headers.  We only replace the same few headers that gecko
    245   // does.  See netwerk/streamconv/converters/nsMultiMixedConv.cpp.
    246   std::string content_type = net::GetSpecificHeader(headers, "content-type");
    247   std::string mime_type;
    248   std::string charset;
    249   bool has_charset = false;
    250   net::HttpUtil::ParseContentType(content_type, &mime_type, &charset,
    251                                   &has_charset);
    252   WebURLResponse response(original_response_.url());
    253   response.setMIMEType(WebString::fromUTF8(mime_type));
    254   response.setTextEncodingName(WebString::fromUTF8(charset));
    255 
    256   HeaderCopier copier(&response);
    257   original_response_.visitHTTPHeaderFields(&copier);
    258 
    259   for (size_t i = 0; i < arraysize(kReplaceHeaders); ++i) {
    260     std::string name(kReplaceHeaders[i]);
    261     std::string value = net::GetSpecificHeader(headers, name);
    262     if (!value.empty()) {
    263       response.setHTTPHeaderField(WebString::fromUTF8(name),
    264                                   WebString::fromUTF8(value));
    265     }
    266   }
    267   // To avoid recording every multipart load as a separate visit in
    268   // the history database, we want to keep track of whether the response
    269   // is part of a multipart payload.  We do want to record the first visit,
    270   // so we only set isMultipartPayload to true after the first visit.
    271   response.setIsMultipartPayload(has_sent_first_response_);
    272   has_sent_first_response_ = true;
    273   // Send the response!
    274   if (client_)
    275     client_->didReceiveResponse(loader_, response);
    276 
    277   return true;
    278 }
    279 
    280 // Boundaries are supposed to be preceeded with --, but it looks like gecko
    281 // doesn't require the dashes to exist.  See nsMultiMixedConv::FindToken.
    282 size_t MultipartResponseDelegate::FindBoundary() {
    283   size_t boundary_pos = data_.find(boundary_);
    284   if (boundary_pos != std::string::npos) {
    285     // Back up over -- for backwards compat
    286     // TODO(tc): Don't we only want to do this once?  Gecko code doesn't seem
    287     // to care.
    288     if (boundary_pos >= 2) {
    289       if ('-' == data_[boundary_pos - 1] && '-' == data_[boundary_pos - 2]) {
    290         boundary_pos -= 2;
    291         boundary_ = "--" + boundary_;
    292       }
    293     }
    294   }
    295   return boundary_pos;
    296 }
    297 
    298 bool MultipartResponseDelegate::ReadMultipartBoundary(
    299     const WebURLResponse& response,
    300     std::string* multipart_boundary) {
    301   std::string content_type =
    302       response.httpHeaderField(WebString::fromUTF8("Content-Type")).utf8();
    303 
    304   size_t boundary_start_offset = content_type.find("boundary=");
    305   if (boundary_start_offset == std::string::npos)
    306     return false;
    307 
    308   boundary_start_offset += strlen("boundary=");
    309 
    310   size_t boundary_end_offset = content_type.find(';', boundary_start_offset);
    311 
    312   if (boundary_end_offset == std::string::npos)
    313     boundary_end_offset = content_type.length();
    314 
    315   size_t boundary_length = boundary_end_offset - boundary_start_offset;
    316 
    317   *multipart_boundary =
    318       content_type.substr(boundary_start_offset, boundary_length);
    319   // The byte range response can have quoted boundary strings. This is legal
    320   // as per MIME specifications. Individual data fragements however don't
    321   // contain quoted boundary strings.
    322   TrimString(*multipart_boundary, "\"", multipart_boundary);
    323   return true;
    324 }
    325 
    326 bool MultipartResponseDelegate::ReadContentRanges(
    327     const WebURLResponse& response,
    328     int* content_range_lower_bound,
    329     int* content_range_upper_bound,
    330     int* content_range_instance_size) {
    331 
    332   std::string content_range = response.httpHeaderField("Content-Range").utf8();
    333   if (content_range.empty()) {
    334     content_range = response.httpHeaderField("Range").utf8();
    335   }
    336 
    337   if (content_range.empty()) {
    338     DLOG(WARNING) << "Failed to read content range from response.";
    339     return false;
    340   }
    341 
    342   size_t byte_range_lower_bound_start_offset = content_range.find(" ");
    343   if (byte_range_lower_bound_start_offset == std::string::npos) {
    344     return false;
    345   }
    346 
    347   // Skip over the initial space.
    348   byte_range_lower_bound_start_offset++;
    349 
    350   // Find the lower bound.
    351   size_t byte_range_lower_bound_end_offset =
    352       content_range.find("-", byte_range_lower_bound_start_offset);
    353   if (byte_range_lower_bound_end_offset == std::string::npos) {
    354     return false;
    355   }
    356 
    357   size_t byte_range_lower_bound_characters =
    358       byte_range_lower_bound_end_offset - byte_range_lower_bound_start_offset;
    359   std::string byte_range_lower_bound =
    360       content_range.substr(byte_range_lower_bound_start_offset,
    361                            byte_range_lower_bound_characters);
    362 
    363   // Find the upper bound.
    364   size_t byte_range_upper_bound_start_offset =
    365       byte_range_lower_bound_end_offset + 1;
    366 
    367   size_t byte_range_upper_bound_end_offset =
    368       content_range.find("/", byte_range_upper_bound_start_offset);
    369   if (byte_range_upper_bound_end_offset == std::string::npos) {
    370     return false;
    371   }
    372 
    373   size_t byte_range_upper_bound_characters =
    374       byte_range_upper_bound_end_offset - byte_range_upper_bound_start_offset;
    375   std::string byte_range_upper_bound =
    376       content_range.substr(byte_range_upper_bound_start_offset,
    377                            byte_range_upper_bound_characters);
    378 
    379   // Find the instance size.
    380   size_t byte_range_instance_size_start_offset =
    381       byte_range_upper_bound_end_offset + 1;
    382 
    383   size_t byte_range_instance_size_end_offset =
    384       content_range.length();
    385 
    386   size_t byte_range_instance_size_characters =
    387       byte_range_instance_size_end_offset -
    388       byte_range_instance_size_start_offset;
    389   std::string byte_range_instance_size =
    390       content_range.substr(byte_range_instance_size_start_offset,
    391                            byte_range_instance_size_characters);
    392 
    393   if (!base::StringToInt(byte_range_lower_bound, content_range_lower_bound))
    394     return false;
    395   if (!base::StringToInt(byte_range_upper_bound, content_range_upper_bound))
    396     return false;
    397   if (!base::StringToInt(byte_range_instance_size, content_range_instance_size))
    398     return false;
    399   return true;
    400 }
    401 
    402 }  // namespace webkit_glue
    403