Home | History | Annotate | Download | only in proxy
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "net/proxy/proxy_script_fetcher_impl.h"
      6 
      7 #include "base/compiler_specific.h"
      8 #include "base/i18n/icu_string_conversions.h"
      9 #include "base/logging.h"
     10 #include "base/message_loop/message_loop.h"
     11 #include "base/strings/string_util.h"
     12 #include "net/base/data_url.h"
     13 #include "net/base/io_buffer.h"
     14 #include "net/base/load_flags.h"
     15 #include "net/base/net_errors.h"
     16 #include "net/base/request_priority.h"
     17 #include "net/cert/cert_status_flags.h"
     18 #include "net/http/http_response_headers.h"
     19 #include "net/url_request/url_request_context.h"
     20 
     21 // TODO(eroman):
     22 //   - Support auth-prompts (http://crbug.com/77366)
     23 
     24 namespace net {
     25 
     26 namespace {
     27 
     28 // The maximum size (in bytes) allowed for a PAC script. Responses exceeding
     29 // this will fail with ERR_FILE_TOO_BIG.
     30 const int kDefaultMaxResponseBytes = 1048576;  // 1 megabyte
     31 
     32 // The maximum duration (in milliseconds) allowed for fetching the PAC script.
     33 // Responses exceeding this will fail with ERR_TIMED_OUT.
     34 const int kDefaultMaxDurationMs = 300000;  // 5 minutes
     35 
     36 // Returns true if |mime_type| is one of the known PAC mime type.
     37 bool IsPacMimeType(const std::string& mime_type) {
     38   static const char * const kSupportedPacMimeTypes[] = {
     39     "application/x-ns-proxy-autoconfig",
     40     "application/x-javascript-config",
     41   };
     42   for (size_t i = 0; i < arraysize(kSupportedPacMimeTypes); ++i) {
     43     if (LowerCaseEqualsASCII(mime_type, kSupportedPacMimeTypes[i]))
     44       return true;
     45   }
     46   return false;
     47 }
     48 
     49 // Converts |bytes| (which is encoded by |charset|) to UTF16, saving the resul
     50 // to |*utf16|.
     51 // If |charset| is empty, then we don't know what it was and guess.
     52 void ConvertResponseToUTF16(const std::string& charset,
     53                             const std::string& bytes,
     54                             base::string16* utf16) {
     55   const char* codepage;
     56 
     57   if (charset.empty()) {
     58     // Assume ISO-8859-1 if no charset was specified.
     59     codepage = base::kCodepageLatin1;
     60   } else {
     61     // Otherwise trust the charset that was provided.
     62     codepage = charset.c_str();
     63   }
     64 
     65   // We will be generous in the conversion -- if any characters lie
     66   // outside of |charset| (i.e. invalid), then substitute them with
     67   // U+FFFD rather than failing.
     68   base::CodepageToUTF16(bytes, codepage,
     69                         base::OnStringConversionError::SUBSTITUTE,
     70                         utf16);
     71 }
     72 
     73 }  // namespace
     74 
     75 ProxyScriptFetcherImpl::ProxyScriptFetcherImpl(
     76     URLRequestContext* url_request_context)
     77     : weak_factory_(this),
     78       url_request_context_(url_request_context),
     79       buf_(new IOBuffer(kBufSize)),
     80       next_id_(0),
     81       cur_request_id_(0),
     82       result_code_(OK),
     83       result_text_(NULL),
     84       max_response_bytes_(kDefaultMaxResponseBytes),
     85       max_duration_(base::TimeDelta::FromMilliseconds(kDefaultMaxDurationMs)) {
     86   DCHECK(url_request_context);
     87 }
     88 
     89 ProxyScriptFetcherImpl::~ProxyScriptFetcherImpl() {
     90   // The URLRequest's destructor will cancel the outstanding request, and
     91   // ensure that the delegate (this) is not called again.
     92 }
     93 
     94 base::TimeDelta ProxyScriptFetcherImpl::SetTimeoutConstraint(
     95     base::TimeDelta timeout) {
     96   base::TimeDelta prev = max_duration_;
     97   max_duration_ = timeout;
     98   return prev;
     99 }
    100 
    101 size_t ProxyScriptFetcherImpl::SetSizeConstraint(size_t size_bytes) {
    102   size_t prev = max_response_bytes_;
    103   max_response_bytes_ = size_bytes;
    104   return prev;
    105 }
    106 
    107 void ProxyScriptFetcherImpl::OnResponseCompleted(URLRequest* request) {
    108   DCHECK_EQ(request, cur_request_.get());
    109 
    110   // Use |result_code_| as the request's error if we have already set it to
    111   // something specific.
    112   if (result_code_ == OK && !request->status().is_success())
    113     result_code_ = request->status().error();
    114 
    115   FetchCompleted();
    116 }
    117 
    118 int ProxyScriptFetcherImpl::Fetch(
    119     const GURL& url, base::string16* text, const CompletionCallback& callback) {
    120   // It is invalid to call Fetch() while a request is already in progress.
    121   DCHECK(!cur_request_.get());
    122   DCHECK(!callback.is_null());
    123   DCHECK(text);
    124 
    125   // Handle base-64 encoded data-urls that contain custom PAC scripts.
    126   if (url.SchemeIs("data")) {
    127     std::string mime_type;
    128     std::string charset;
    129     std::string data;
    130     if (!DataURL::Parse(url, &mime_type, &charset, &data))
    131       return ERR_FAILED;
    132 
    133     ConvertResponseToUTF16(charset, data, text);
    134     return OK;
    135   }
    136 
    137   cur_request_ =
    138       url_request_context_->CreateRequest(url, DEFAULT_PRIORITY, this);
    139   cur_request_->set_method("GET");
    140 
    141   // Make sure that the PAC script is downloaded using a direct connection,
    142   // to avoid circular dependencies (fetching is a part of proxy resolution).
    143   // Also disable the use of the disk cache. The cache is disabled so that if
    144   // the user switches networks we don't potentially use the cached response
    145   // from old network when we should in fact be re-fetching on the new network.
    146   // If the PAC script is hosted on an HTTPS server we bypass revocation
    147   // checking in order to avoid a circular dependency when attempting to fetch
    148   // the OCSP response or CRL. We could make the revocation check go direct but
    149   // the proxy might be the only way to the outside world.
    150   cur_request_->SetLoadFlags(LOAD_BYPASS_PROXY | LOAD_DISABLE_CACHE |
    151                              LOAD_DISABLE_CERT_REVOCATION_CHECKING);
    152 
    153   // Save the caller's info for notification on completion.
    154   callback_ = callback;
    155   result_text_ = text;
    156 
    157   bytes_read_so_far_.clear();
    158 
    159   // Post a task to timeout this request if it takes too long.
    160   cur_request_id_ = ++next_id_;
    161   base::MessageLoop::current()->PostDelayedTask(
    162       FROM_HERE,
    163       base::Bind(&ProxyScriptFetcherImpl::OnTimeout,
    164                  weak_factory_.GetWeakPtr(),
    165                  cur_request_id_),
    166       max_duration_);
    167 
    168   // Start the request.
    169   cur_request_->Start();
    170   return ERR_IO_PENDING;
    171 }
    172 
    173 void ProxyScriptFetcherImpl::Cancel() {
    174   // ResetCurRequestState will free the URLRequest, which will cause
    175   // cancellation.
    176   ResetCurRequestState();
    177 }
    178 
    179 URLRequestContext* ProxyScriptFetcherImpl::GetRequestContext() const {
    180   return url_request_context_;
    181 }
    182 
    183 void ProxyScriptFetcherImpl::OnAuthRequired(URLRequest* request,
    184                                             AuthChallengeInfo* auth_info) {
    185   DCHECK_EQ(request, cur_request_.get());
    186   // TODO(eroman): http://crbug.com/77366
    187   LOG(WARNING) << "Auth required to fetch PAC script, aborting.";
    188   result_code_ = ERR_NOT_IMPLEMENTED;
    189   request->CancelAuth();
    190 }
    191 
    192 void ProxyScriptFetcherImpl::OnSSLCertificateError(URLRequest* request,
    193                                                    const SSLInfo& ssl_info,
    194                                                    bool fatal) {
    195   DCHECK_EQ(request, cur_request_.get());
    196   // Revocation check failures are not fatal.
    197   if (IsCertStatusMinorError(ssl_info.cert_status)) {
    198     request->ContinueDespiteLastError();
    199     return;
    200   }
    201   LOG(WARNING) << "SSL certificate error when fetching PAC script, aborting.";
    202   // Certificate errors are in same space as net errors.
    203   result_code_ = MapCertStatusToNetError(ssl_info.cert_status);
    204   request->Cancel();
    205 }
    206 
    207 void ProxyScriptFetcherImpl::OnResponseStarted(URLRequest* request) {
    208   DCHECK_EQ(request, cur_request_.get());
    209 
    210   if (!request->status().is_success()) {
    211     OnResponseCompleted(request);
    212     return;
    213   }
    214 
    215   // Require HTTP responses to have a success status code.
    216   if (request->url().SchemeIsHTTPOrHTTPS()) {
    217     // NOTE about status codes: We are like Firefox 3 in this respect.
    218     // {IE 7, Safari 3, Opera 9.5} do not care about the status code.
    219     if (request->GetResponseCode() != 200) {
    220       VLOG(1) << "Fetched PAC script had (bad) status line: "
    221               << request->response_headers()->GetStatusLine();
    222       result_code_ = ERR_PAC_STATUS_NOT_OK;
    223       request->Cancel();
    224       return;
    225     }
    226 
    227     // NOTE about mime types: We do not enforce mime types on PAC files.
    228     // This is for compatibility with {IE 7, Firefox 3, Opera 9.5}. We will
    229     // however log mismatches to help with debugging.
    230     std::string mime_type;
    231     cur_request_->GetMimeType(&mime_type);
    232     if (!IsPacMimeType(mime_type)) {
    233       VLOG(1) << "Fetched PAC script does not have a proper mime type: "
    234               << mime_type;
    235     }
    236   }
    237 
    238   ReadBody(request);
    239 }
    240 
    241 void ProxyScriptFetcherImpl::OnReadCompleted(URLRequest* request,
    242                                              int num_bytes) {
    243   DCHECK_EQ(request, cur_request_.get());
    244   if (ConsumeBytesRead(request, num_bytes)) {
    245     // Keep reading.
    246     ReadBody(request);
    247   }
    248 }
    249 
    250 void ProxyScriptFetcherImpl::ReadBody(URLRequest* request) {
    251   // Read as many bytes as are available synchronously.
    252   while (true) {
    253     int num_bytes;
    254     if (!request->Read(buf_.get(), kBufSize, &num_bytes)) {
    255       // Check whether the read failed synchronously.
    256       if (!request->status().is_io_pending())
    257         OnResponseCompleted(request);
    258       return;
    259     }
    260     if (!ConsumeBytesRead(request, num_bytes))
    261       return;
    262   }
    263 }
    264 
    265 bool ProxyScriptFetcherImpl::ConsumeBytesRead(URLRequest* request,
    266                                               int num_bytes) {
    267   if (num_bytes <= 0) {
    268     // Error while reading, or EOF.
    269     OnResponseCompleted(request);
    270     return false;
    271   }
    272 
    273   // Enforce maximum size bound.
    274   if (num_bytes + bytes_read_so_far_.size() >
    275       static_cast<size_t>(max_response_bytes_)) {
    276     result_code_ = ERR_FILE_TOO_BIG;
    277     request->Cancel();
    278     return false;
    279   }
    280 
    281   bytes_read_so_far_.append(buf_->data(), num_bytes);
    282   return true;
    283 }
    284 
    285 void ProxyScriptFetcherImpl::FetchCompleted() {
    286   if (result_code_ == OK) {
    287     // The caller expects the response to be encoded as UTF16.
    288     std::string charset;
    289     cur_request_->GetCharset(&charset);
    290     ConvertResponseToUTF16(charset, bytes_read_so_far_, result_text_);
    291   } else {
    292     // On error, the caller expects empty string for bytes.
    293     result_text_->clear();
    294   }
    295 
    296   int result_code = result_code_;
    297   CompletionCallback callback = callback_;
    298 
    299   ResetCurRequestState();
    300 
    301   callback.Run(result_code);
    302 }
    303 
    304 void ProxyScriptFetcherImpl::ResetCurRequestState() {
    305   cur_request_.reset();
    306   cur_request_id_ = 0;
    307   callback_.Reset();
    308   result_code_ = OK;
    309   result_text_ = NULL;
    310 }
    311 
    312 void ProxyScriptFetcherImpl::OnTimeout(int id) {
    313   // Timeout tasks may outlive the URLRequest they reference. Make sure it
    314   // is still applicable.
    315   if (cur_request_id_ != id)
    316     return;
    317 
    318   DCHECK(cur_request_.get());
    319   result_code_ = ERR_TIMED_OUT;
    320   cur_request_->Cancel();
    321 }
    322 
    323 }  // namespace net
    324