Home | History | Annotate | Download | only in proxy
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "net/proxy/proxy_script_fetcher_impl.h"
      6 
      7 #include "base/compiler_specific.h"
      8 #include "base/logging.h"
      9 #include "base/message_loop/message_loop.h"
     10 #include "base/strings/string_util.h"
     11 #include "net/base/data_url.h"
     12 #include "net/base/io_buffer.h"
     13 #include "net/base/load_flags.h"
     14 #include "net/base/net_errors.h"
     15 #include "net/base/net_string_util.h"
     16 #include "net/base/request_priority.h"
     17 #include "net/cert/cert_status_flags.h"
     18 #include "net/http/http_response_headers.h"
     19 #include "net/url_request/url_request_context.h"
     20 
     21 // TODO(eroman):
     22 //   - Support auth-prompts (http://crbug.com/77366)
     23 
     24 namespace net {
     25 
     26 namespace {
     27 
     28 // The maximum size (in bytes) allowed for a PAC script. Responses exceeding
     29 // this will fail with ERR_FILE_TOO_BIG.
     30 const int kDefaultMaxResponseBytes = 1048576;  // 1 megabyte
     31 
     32 // The maximum duration (in milliseconds) allowed for fetching the PAC script.
     33 // Responses exceeding this will fail with ERR_TIMED_OUT.
     34 const int kDefaultMaxDurationMs = 300000;  // 5 minutes
     35 
     36 // Returns true if |mime_type| is one of the known PAC mime type.
     37 bool IsPacMimeType(const std::string& mime_type) {
     38   static const char * const kSupportedPacMimeTypes[] = {
     39     "application/x-ns-proxy-autoconfig",
     40     "application/x-javascript-config",
     41   };
     42   for (size_t i = 0; i < arraysize(kSupportedPacMimeTypes); ++i) {
     43     if (LowerCaseEqualsASCII(mime_type, kSupportedPacMimeTypes[i]))
     44       return true;
     45   }
     46   return false;
     47 }
     48 
     49 // Converts |bytes| (which is encoded by |charset|) to UTF16, saving the resul
     50 // to |*utf16|.
     51 // If |charset| is empty, then we don't know what it was and guess.
     52 void ConvertResponseToUTF16(const std::string& charset,
     53                             const std::string& bytes,
     54                             base::string16* utf16) {
     55   const char* codepage;
     56 
     57   if (charset.empty()) {
     58     // Assume ISO-8859-1 if no charset was specified.
     59     codepage = kCharsetLatin1;
     60   } else {
     61     // Otherwise trust the charset that was provided.
     62     codepage = charset.c_str();
     63   }
     64 
     65   // Be generous in the conversion -- if any characters lie outside of |charset|
     66   // (i.e. invalid), then substitute them with U+FFFD rather than failing.
     67   ConvertToUTF16WithSubstitutions(bytes, codepage, utf16);
     68 }
     69 
     70 }  // namespace
     71 
     72 ProxyScriptFetcherImpl::ProxyScriptFetcherImpl(
     73     URLRequestContext* url_request_context)
     74     : url_request_context_(url_request_context),
     75       buf_(new IOBuffer(kBufSize)),
     76       next_id_(0),
     77       cur_request_id_(0),
     78       result_code_(OK),
     79       result_text_(NULL),
     80       max_response_bytes_(kDefaultMaxResponseBytes),
     81       max_duration_(base::TimeDelta::FromMilliseconds(kDefaultMaxDurationMs)),
     82       weak_factory_(this) {
     83   DCHECK(url_request_context);
     84 }
     85 
     86 ProxyScriptFetcherImpl::~ProxyScriptFetcherImpl() {
     87   // The URLRequest's destructor will cancel the outstanding request, and
     88   // ensure that the delegate (this) is not called again.
     89 }
     90 
     91 base::TimeDelta ProxyScriptFetcherImpl::SetTimeoutConstraint(
     92     base::TimeDelta timeout) {
     93   base::TimeDelta prev = max_duration_;
     94   max_duration_ = timeout;
     95   return prev;
     96 }
     97 
     98 size_t ProxyScriptFetcherImpl::SetSizeConstraint(size_t size_bytes) {
     99   size_t prev = max_response_bytes_;
    100   max_response_bytes_ = size_bytes;
    101   return prev;
    102 }
    103 
    104 void ProxyScriptFetcherImpl::OnResponseCompleted(URLRequest* request) {
    105   DCHECK_EQ(request, cur_request_.get());
    106 
    107   // Use |result_code_| as the request's error if we have already set it to
    108   // something specific.
    109   if (result_code_ == OK && !request->status().is_success())
    110     result_code_ = request->status().error();
    111 
    112   FetchCompleted();
    113 }
    114 
    115 int ProxyScriptFetcherImpl::Fetch(
    116     const GURL& url, base::string16* text, const CompletionCallback& callback) {
    117   // It is invalid to call Fetch() while a request is already in progress.
    118   DCHECK(!cur_request_.get());
    119   DCHECK(!callback.is_null());
    120   DCHECK(text);
    121 
    122   // Handle base-64 encoded data-urls that contain custom PAC scripts.
    123   if (url.SchemeIs("data")) {
    124     std::string mime_type;
    125     std::string charset;
    126     std::string data;
    127     if (!DataURL::Parse(url, &mime_type, &charset, &data))
    128       return ERR_FAILED;
    129 
    130     ConvertResponseToUTF16(charset, data, text);
    131     return OK;
    132   }
    133 
    134   cur_request_ =
    135       url_request_context_->CreateRequest(url, DEFAULT_PRIORITY, this, NULL);
    136   cur_request_->set_method("GET");
    137 
    138   // Make sure that the PAC script is downloaded using a direct connection,
    139   // to avoid circular dependencies (fetching is a part of proxy resolution).
    140   // Also disable the use of the disk cache. The cache is disabled so that if
    141   // the user switches networks we don't potentially use the cached response
    142   // from old network when we should in fact be re-fetching on the new network.
    143   // If the PAC script is hosted on an HTTPS server we bypass revocation
    144   // checking in order to avoid a circular dependency when attempting to fetch
    145   // the OCSP response or CRL. We could make the revocation check go direct but
    146   // the proxy might be the only way to the outside world.
    147   cur_request_->SetLoadFlags(LOAD_BYPASS_PROXY | LOAD_DISABLE_CACHE |
    148                              LOAD_DISABLE_CERT_REVOCATION_CHECKING);
    149 
    150   // Save the caller's info for notification on completion.
    151   callback_ = callback;
    152   result_text_ = text;
    153 
    154   bytes_read_so_far_.clear();
    155 
    156   // Post a task to timeout this request if it takes too long.
    157   cur_request_id_ = ++next_id_;
    158   base::MessageLoop::current()->PostDelayedTask(
    159       FROM_HERE,
    160       base::Bind(&ProxyScriptFetcherImpl::OnTimeout,
    161                  weak_factory_.GetWeakPtr(),
    162                  cur_request_id_),
    163       max_duration_);
    164 
    165   // Start the request.
    166   cur_request_->Start();
    167   return ERR_IO_PENDING;
    168 }
    169 
    170 void ProxyScriptFetcherImpl::Cancel() {
    171   // ResetCurRequestState will free the URLRequest, which will cause
    172   // cancellation.
    173   ResetCurRequestState();
    174 }
    175 
    176 URLRequestContext* ProxyScriptFetcherImpl::GetRequestContext() const {
    177   return url_request_context_;
    178 }
    179 
    180 void ProxyScriptFetcherImpl::OnAuthRequired(URLRequest* request,
    181                                             AuthChallengeInfo* auth_info) {
    182   DCHECK_EQ(request, cur_request_.get());
    183   // TODO(eroman): http://crbug.com/77366
    184   LOG(WARNING) << "Auth required to fetch PAC script, aborting.";
    185   result_code_ = ERR_NOT_IMPLEMENTED;
    186   request->CancelAuth();
    187 }
    188 
    189 void ProxyScriptFetcherImpl::OnSSLCertificateError(URLRequest* request,
    190                                                    const SSLInfo& ssl_info,
    191                                                    bool fatal) {
    192   DCHECK_EQ(request, cur_request_.get());
    193   // Revocation check failures are not fatal.
    194   if (IsCertStatusMinorError(ssl_info.cert_status)) {
    195     request->ContinueDespiteLastError();
    196     return;
    197   }
    198   LOG(WARNING) << "SSL certificate error when fetching PAC script, aborting.";
    199   // Certificate errors are in same space as net errors.
    200   result_code_ = MapCertStatusToNetError(ssl_info.cert_status);
    201   request->Cancel();
    202 }
    203 
    204 void ProxyScriptFetcherImpl::OnResponseStarted(URLRequest* request) {
    205   DCHECK_EQ(request, cur_request_.get());
    206 
    207   if (!request->status().is_success()) {
    208     OnResponseCompleted(request);
    209     return;
    210   }
    211 
    212   // Require HTTP responses to have a success status code.
    213   if (request->url().SchemeIsHTTPOrHTTPS()) {
    214     // NOTE about status codes: We are like Firefox 3 in this respect.
    215     // {IE 7, Safari 3, Opera 9.5} do not care about the status code.
    216     if (request->GetResponseCode() != 200) {
    217       VLOG(1) << "Fetched PAC script had (bad) status line: "
    218               << request->response_headers()->GetStatusLine();
    219       result_code_ = ERR_PAC_STATUS_NOT_OK;
    220       request->Cancel();
    221       return;
    222     }
    223 
    224     // NOTE about mime types: We do not enforce mime types on PAC files.
    225     // This is for compatibility with {IE 7, Firefox 3, Opera 9.5}. We will
    226     // however log mismatches to help with debugging.
    227     std::string mime_type;
    228     cur_request_->GetMimeType(&mime_type);
    229     if (!IsPacMimeType(mime_type)) {
    230       VLOG(1) << "Fetched PAC script does not have a proper mime type: "
    231               << mime_type;
    232     }
    233   }
    234 
    235   ReadBody(request);
    236 }
    237 
    238 void ProxyScriptFetcherImpl::OnReadCompleted(URLRequest* request,
    239                                              int num_bytes) {
    240   DCHECK_EQ(request, cur_request_.get());
    241   if (ConsumeBytesRead(request, num_bytes)) {
    242     // Keep reading.
    243     ReadBody(request);
    244   }
    245 }
    246 
    247 void ProxyScriptFetcherImpl::ReadBody(URLRequest* request) {
    248   // Read as many bytes as are available synchronously.
    249   while (true) {
    250     int num_bytes;
    251     if (!request->Read(buf_.get(), kBufSize, &num_bytes)) {
    252       // Check whether the read failed synchronously.
    253       if (!request->status().is_io_pending())
    254         OnResponseCompleted(request);
    255       return;
    256     }
    257     if (!ConsumeBytesRead(request, num_bytes))
    258       return;
    259   }
    260 }
    261 
    262 bool ProxyScriptFetcherImpl::ConsumeBytesRead(URLRequest* request,
    263                                               int num_bytes) {
    264   if (num_bytes <= 0) {
    265     // Error while reading, or EOF.
    266     OnResponseCompleted(request);
    267     return false;
    268   }
    269 
    270   // Enforce maximum size bound.
    271   if (num_bytes + bytes_read_so_far_.size() >
    272       static_cast<size_t>(max_response_bytes_)) {
    273     result_code_ = ERR_FILE_TOO_BIG;
    274     request->Cancel();
    275     return false;
    276   }
    277 
    278   bytes_read_so_far_.append(buf_->data(), num_bytes);
    279   return true;
    280 }
    281 
    282 void ProxyScriptFetcherImpl::FetchCompleted() {
    283   if (result_code_ == OK) {
    284     // The caller expects the response to be encoded as UTF16.
    285     std::string charset;
    286     cur_request_->GetCharset(&charset);
    287     ConvertResponseToUTF16(charset, bytes_read_so_far_, result_text_);
    288   } else {
    289     // On error, the caller expects empty string for bytes.
    290     result_text_->clear();
    291   }
    292 
    293   int result_code = result_code_;
    294   CompletionCallback callback = callback_;
    295 
    296   ResetCurRequestState();
    297 
    298   callback.Run(result_code);
    299 }
    300 
    301 void ProxyScriptFetcherImpl::ResetCurRequestState() {
    302   cur_request_.reset();
    303   cur_request_id_ = 0;
    304   callback_.Reset();
    305   result_code_ = OK;
    306   result_text_ = NULL;
    307 }
    308 
    309 void ProxyScriptFetcherImpl::OnTimeout(int id) {
    310   // Timeout tasks may outlive the URLRequest they reference. Make sure it
    311   // is still applicable.
    312   if (cur_request_id_ != id)
    313     return;
    314 
    315   DCHECK(cur_request_.get());
    316   result_code_ = ERR_TIMED_OUT;
    317   cur_request_->Cancel();
    318 }
    319 
    320 }  // namespace net
    321