Home | History | Annotate | Download | only in proxy
      1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "net/proxy/proxy_script_fetcher_impl.h"
      6 
      7 #include "base/compiler_specific.h"
      8 #include "base/i18n/icu_string_conversions.h"
      9 #include "base/logging.h"
     10 #include "base/message_loop.h"
     11 #include "base/string_util.h"
     12 #include "net/base/data_url.h"
     13 #include "net/base/io_buffer.h"
     14 #include "net/base/load_flags.h"
     15 #include "net/base/net_errors.h"
     16 #include "net/http/http_response_headers.h"
     17 #include "net/url_request/url_request_context.h"
     18 
     19 // TODO(eroman):
     20 //   - Support auth-prompts (http://crbug.com/77366)
     21 
     22 namespace net {
     23 
     24 namespace {
     25 
     26 // The maximum size (in bytes) allowed for a PAC script. Responses exceeding
     27 // this will fail with ERR_FILE_TOO_BIG.
     28 const int kDefaultMaxResponseBytes = 1048576;  // 1 megabyte
     29 
     30 // The maximum duration (in milliseconds) allowed for fetching the PAC script.
     31 // Responses exceeding this will fail with ERR_TIMED_OUT.
     32 const int kDefaultMaxDurationMs = 300000;  // 5 minutes
     33 
     34 // Returns true if |mime_type| is one of the known PAC mime type.
     35 bool IsPacMimeType(const std::string& mime_type) {
     36   static const char * const kSupportedPacMimeTypes[] = {
     37     "application/x-ns-proxy-autoconfig",
     38     "application/x-javascript-config",
     39   };
     40   for (size_t i = 0; i < arraysize(kSupportedPacMimeTypes); ++i) {
     41     if (LowerCaseEqualsASCII(mime_type, kSupportedPacMimeTypes[i]))
     42       return true;
     43   }
     44   return false;
     45 }
     46 
     47 // Converts |bytes| (which is encoded by |charset|) to UTF16, saving the resul
     48 // to |*utf16|.
     49 // If |charset| is empty, then we don't know what it was and guess.
     50 void ConvertResponseToUTF16(const std::string& charset,
     51                             const std::string& bytes,
     52                             string16* utf16) {
     53   const char* codepage;
     54 
     55   if (charset.empty()) {
     56     // Assume ISO-8859-1 if no charset was specified.
     57     codepage = base::kCodepageLatin1;
     58   } else {
     59     // Otherwise trust the charset that was provided.
     60     codepage = charset.c_str();
     61   }
     62 
     63   // We will be generous in the conversion -- if any characters lie
     64   // outside of |charset| (i.e. invalid), then substitute them with
     65   // U+FFFD rather than failing.
     66   base::CodepageToUTF16(bytes, codepage,
     67                         base::OnStringConversionError::SUBSTITUTE,
     68                         utf16);
     69 }
     70 
     71 }  // namespace
     72 
     73 ProxyScriptFetcherImpl::ProxyScriptFetcherImpl(
     74     URLRequestContext* url_request_context)
     75     : ALLOW_THIS_IN_INITIALIZER_LIST(task_factory_(this)),
     76       url_request_context_(url_request_context),
     77       buf_(new IOBuffer(kBufSize)),
     78       next_id_(0),
     79       cur_request_(NULL),
     80       cur_request_id_(0),
     81       callback_(NULL),
     82       result_code_(OK),
     83       result_text_(NULL),
     84       max_response_bytes_(kDefaultMaxResponseBytes),
     85       max_duration_(base::TimeDelta::FromMilliseconds(kDefaultMaxDurationMs)) {
     86   DCHECK(url_request_context);
     87 }
     88 
     89 ProxyScriptFetcherImpl::~ProxyScriptFetcherImpl() {
     90   // The URLRequest's destructor will cancel the outstanding request, and
     91   // ensure that the delegate (this) is not called again.
     92 }
     93 
     94 base::TimeDelta ProxyScriptFetcherImpl::SetTimeoutConstraint(
     95     base::TimeDelta timeout) {
     96   base::TimeDelta prev = max_duration_;
     97   max_duration_ = timeout;
     98   return prev;
     99 }
    100 
    101 size_t ProxyScriptFetcherImpl::SetSizeConstraint(size_t size_bytes) {
    102   size_t prev = max_response_bytes_;
    103   max_response_bytes_ = size_bytes;
    104   return prev;
    105 }
    106 
    107 void ProxyScriptFetcherImpl::OnResponseCompleted(URLRequest* request) {
    108   DCHECK_EQ(request, cur_request_.get());
    109 
    110   // Use |result_code_| as the request's error if we have already set it to
    111   // something specific.
    112   if (result_code_ == OK && !request->status().is_success())
    113     result_code_ = request->status().os_error();
    114 
    115   FetchCompleted();
    116 }
    117 
    118 int ProxyScriptFetcherImpl::Fetch(const GURL& url,
    119                                   string16* text,
    120                                   CompletionCallback* callback) {
    121   // It is invalid to call Fetch() while a request is already in progress.
    122   DCHECK(!cur_request_.get());
    123 
    124   DCHECK(callback);
    125   DCHECK(text);
    126 
    127   // Handle base-64 encoded data-urls that contain custom PAC scripts.
    128   if (url.SchemeIs("data")) {
    129     std::string mime_type;
    130     std::string charset;
    131     std::string data;
    132     if (!DataURL::Parse(url, &mime_type, &charset, &data))
    133       return ERR_FAILED;
    134 
    135     ConvertResponseToUTF16(charset, data, text);
    136     return OK;
    137   }
    138 
    139   cur_request_.reset(new URLRequest(url, this));
    140   cur_request_->set_context(url_request_context_);
    141   cur_request_->set_method("GET");
    142 
    143   // Make sure that the PAC script is downloaded using a direct connection,
    144   // to avoid circular dependencies (fetching is a part of proxy resolution).
    145   // Also disable the use of the disk cache. The cache is disabled so that if
    146   // the user switches networks we don't potentially use the cached response
    147   // from old network when we should in fact be re-fetching on the new network.
    148   cur_request_->set_load_flags(LOAD_BYPASS_PROXY | LOAD_DISABLE_CACHE);
    149 
    150   // Save the caller's info for notification on completion.
    151   callback_ = callback;
    152   result_text_ = text;
    153 
    154   bytes_read_so_far_.clear();
    155 
    156   // Post a task to timeout this request if it takes too long.
    157   cur_request_id_ = ++next_id_;
    158   MessageLoop::current()->PostDelayedTask(FROM_HERE,
    159       task_factory_.NewRunnableMethod(&ProxyScriptFetcherImpl::OnTimeout,
    160                                       cur_request_id_),
    161       static_cast<int>(max_duration_.InMilliseconds()));
    162 
    163   // Start the request.
    164   cur_request_->Start();
    165   return ERR_IO_PENDING;
    166 }
    167 
    168 void ProxyScriptFetcherImpl::Cancel() {
    169   // ResetCurRequestState will free the URLRequest, which will cause
    170   // cancellation.
    171   ResetCurRequestState();
    172 }
    173 
    174 URLRequestContext* ProxyScriptFetcherImpl::GetRequestContext() {
    175   return url_request_context_;
    176 }
    177 
    178 void ProxyScriptFetcherImpl::OnAuthRequired(URLRequest* request,
    179                                             AuthChallengeInfo* auth_info) {
    180   DCHECK_EQ(request, cur_request_.get());
    181   // TODO(eroman): http://crbug.com/77366
    182   LOG(WARNING) << "Auth required to fetch PAC script, aborting.";
    183   result_code_ = ERR_NOT_IMPLEMENTED;
    184   request->CancelAuth();
    185 }
    186 
    187 void ProxyScriptFetcherImpl::OnSSLCertificateError(URLRequest* request,
    188                                                    int cert_error,
    189                                                    X509Certificate* cert) {
    190   DCHECK_EQ(request, cur_request_.get());
    191   LOG(WARNING) << "SSL certificate error when fetching PAC script, aborting.";
    192   // Certificate errors are in same space as net errors.
    193   result_code_ = cert_error;
    194   request->Cancel();
    195 }
    196 
    197 void ProxyScriptFetcherImpl::OnResponseStarted(URLRequest* request) {
    198   DCHECK_EQ(request, cur_request_.get());
    199 
    200   if (!request->status().is_success()) {
    201     OnResponseCompleted(request);
    202     return;
    203   }
    204 
    205   // Require HTTP responses to have a success status code.
    206   if (request->url().SchemeIs("http") || request->url().SchemeIs("https")) {
    207     // NOTE about status codes: We are like Firefox 3 in this respect.
    208     // {IE 7, Safari 3, Opera 9.5} do not care about the status code.
    209     if (request->GetResponseCode() != 200) {
    210       VLOG(1) << "Fetched PAC script had (bad) status line: "
    211               << request->response_headers()->GetStatusLine();
    212       result_code_ = ERR_PAC_STATUS_NOT_OK;
    213       request->Cancel();
    214       return;
    215     }
    216 
    217     // NOTE about mime types: We do not enforce mime types on PAC files.
    218     // This is for compatibility with {IE 7, Firefox 3, Opera 9.5}. We will
    219     // however log mismatches to help with debugging.
    220     std::string mime_type;
    221     cur_request_->GetMimeType(&mime_type);
    222     if (!IsPacMimeType(mime_type)) {
    223       VLOG(1) << "Fetched PAC script does not have a proper mime type: "
    224               << mime_type;
    225     }
    226   }
    227 
    228   ReadBody(request);
    229 }
    230 
    231 void ProxyScriptFetcherImpl::OnReadCompleted(URLRequest* request,
    232                                              int num_bytes) {
    233   DCHECK_EQ(request, cur_request_.get());
    234   if (ConsumeBytesRead(request, num_bytes)) {
    235     // Keep reading.
    236     ReadBody(request);
    237   }
    238 }
    239 
    240 void ProxyScriptFetcherImpl::ReadBody(URLRequest* request) {
    241   // Read as many bytes as are available synchronously.
    242   while (true) {
    243     int num_bytes;
    244     if (!request->Read(buf_, kBufSize, &num_bytes)) {
    245       // Check whether the read failed synchronously.
    246       if (!request->status().is_io_pending())
    247         OnResponseCompleted(request);
    248       return;
    249     }
    250     if (!ConsumeBytesRead(request, num_bytes))
    251       return;
    252   }
    253 }
    254 
    255 bool ProxyScriptFetcherImpl::ConsumeBytesRead(URLRequest* request,
    256                                               int num_bytes) {
    257   if (num_bytes <= 0) {
    258     // Error while reading, or EOF.
    259     OnResponseCompleted(request);
    260     return false;
    261   }
    262 
    263   // Enforce maximum size bound.
    264   if (num_bytes + bytes_read_so_far_.size() >
    265       static_cast<size_t>(max_response_bytes_)) {
    266     result_code_ = ERR_FILE_TOO_BIG;
    267     request->Cancel();
    268     return false;
    269   }
    270 
    271   bytes_read_so_far_.append(buf_->data(), num_bytes);
    272   return true;
    273 }
    274 
    275 void ProxyScriptFetcherImpl::FetchCompleted() {
    276   if (result_code_ == OK) {
    277     // The caller expects the response to be encoded as UTF16.
    278     std::string charset;
    279     cur_request_->GetCharset(&charset);
    280     ConvertResponseToUTF16(charset, bytes_read_so_far_, result_text_);
    281   } else {
    282     // On error, the caller expects empty string for bytes.
    283     result_text_->clear();
    284   }
    285 
    286   int result_code = result_code_;
    287   CompletionCallback* callback = callback_;
    288 
    289   // Hold a reference to the URLRequestContext to prevent re-entrancy from
    290   // ~URLRequestContext.
    291   scoped_refptr<URLRequestContext> context(cur_request_->context());
    292   ResetCurRequestState();
    293 
    294   callback->Run(result_code);
    295 }
    296 
    297 void ProxyScriptFetcherImpl::ResetCurRequestState() {
    298   cur_request_.reset();
    299   cur_request_id_ = 0;
    300   callback_ = NULL;
    301   result_code_ = OK;
    302   result_text_ = NULL;
    303 }
    304 
    305 void ProxyScriptFetcherImpl::OnTimeout(int id) {
    306   // Timeout tasks may outlive the URLRequest they reference. Make sure it
    307   // is still applicable.
    308   if (cur_request_id_ != id)
    309     return;
    310 
    311   DCHECK(cur_request_.get());
    312   result_code_ = ERR_TIMED_OUT;
    313   cur_request_->Cancel();
    314 }
    315 
    316 }  // namespace net
    317