Home | History | Annotate | Download | only in proxy
      1 // Copyright (c) 2008 The Chromium Authors. All rights reserved.  Use of this
      2 // source code is governed by a BSD-style license that can be found in the
      3 // LICENSE file.
      4 
      5 #include "net/proxy/proxy_script_fetcher.h"
      6 
      7 #include "base/compiler_specific.h"
      8 #include "base/i18n/icu_string_conversions.h"
      9 #include "base/logging.h"
     10 #include "base/message_loop.h"
     11 #include "base/ref_counted.h"
     12 #include "base/string_util.h"
     13 #include "base/utf_string_conversions.h"
     14 #include "net/base/io_buffer.h"
     15 #include "net/base/load_flags.h"
     16 #include "net/base/net_errors.h"
     17 #include "net/http/http_response_headers.h"
     18 #include "net/url_request/url_request.h"
     19 
     20 // TODO(eroman):
     21 //   - Support auth-prompts.
     22 
     23 namespace net {
     24 
     25 namespace {
     26 
     27 // The maximum size (in bytes) allowed for a PAC script. Responses exceeding
     28 // this will fail with ERR_FILE_TOO_BIG.
     29 int max_response_bytes = 1048576;  // 1 megabyte
     30 
     31 // The maximum duration (in milliseconds) allowed for fetching the PAC script.
     32 // Responses exceeding this will fail with ERR_TIMED_OUT.
     33 int max_duration_ms = 300000;  // 5 minutes
     34 
     35 // Returns true if |mime_type| is one of the known PAC mime type.
     36 bool IsPacMimeType(const std::string& mime_type) {
     37   static const char * const kSupportedPacMimeTypes[] = {
     38     "application/x-ns-proxy-autoconfig",
     39     "application/x-javascript-config",
     40   };
     41   for (size_t i = 0; i < arraysize(kSupportedPacMimeTypes); ++i) {
     42     if (LowerCaseEqualsASCII(mime_type, kSupportedPacMimeTypes[i]))
     43       return true;
     44   }
     45   return false;
     46 }
     47 
     48 // Convert |bytes| (which is encoded by |charset|) in place to UTF8.
     49 // If |charset| is empty, then we don't know what it was and guess.
     50 void ConvertResponseToUTF8(const std::string& charset, std::string* bytes) {
     51   const char* codepage;
     52 
     53   if (charset.empty()) {
     54     // Assume ISO-8859-1 if no charset was specified.
     55     codepage = base::kCodepageLatin1;
     56   } else {
     57     // Otherwise trust the charset that was provided.
     58     codepage = charset.c_str();
     59   }
     60 
     61   // We will be generous in the conversion -- if any characters lie
     62   // outside of |charset| (i.e. invalid), then substitute them with
     63   // U+FFFD rather than failing.
     64   std::wstring tmp_wide;
     65   base::CodepageToWide(*bytes, codepage,
     66                        base::OnStringConversionError::SUBSTITUTE,
     67                        &tmp_wide);
     68   // TODO(eroman): would be nice to have a CodepageToUTF8() function.
     69   *bytes = WideToUTF8(tmp_wide);
     70 }
     71 
     72 }  // namespace
     73 
     74 class ProxyScriptFetcherImpl : public ProxyScriptFetcher,
     75                                public URLRequest::Delegate {
     76  public:
     77   // Creates a ProxyScriptFetcher that issues requests through
     78   // |url_request_context|. |url_request_context| must remain valid for the
     79   // lifetime of ProxyScriptFetcherImpl.
     80   explicit ProxyScriptFetcherImpl(URLRequestContext* url_request_context);
     81 
     82   virtual ~ProxyScriptFetcherImpl();
     83 
     84   // ProxyScriptFetcher methods:
     85 
     86   virtual int Fetch(const GURL& url, std::string* bytes,
     87                     CompletionCallback* callback);
     88   virtual void Cancel();
     89   virtual URLRequestContext* GetRequestContext();
     90 
     91   // URLRequest::Delegate methods:
     92 
     93   virtual void OnAuthRequired(URLRequest* request,
     94                               AuthChallengeInfo* auth_info);
     95   virtual void OnSSLCertificateError(URLRequest* request, int cert_error,
     96                                      X509Certificate* cert);
     97   virtual void OnResponseStarted(URLRequest* request);
     98   virtual void OnReadCompleted(URLRequest* request, int num_bytes);
     99   virtual void OnResponseCompleted(URLRequest* request);
    100 
    101  private:
    102   // Read more bytes from the response.
    103   void ReadBody(URLRequest* request);
    104 
    105   // Called once the request has completed to notify the caller of
    106   // |response_code_| and |response_bytes_|.
    107   void FetchCompleted();
    108 
    109   // Clear out the state for the current request.
    110   void ResetCurRequestState();
    111 
    112   // Callback for time-out task of request with id |id|.
    113   void OnTimeout(int id);
    114 
    115   // Factory for creating the time-out task. This takes care of revoking
    116   // outstanding tasks when |this| is deleted.
    117   ScopedRunnableMethodFactory<ProxyScriptFetcherImpl> task_factory_;
    118 
    119   // The context used for making network requests.
    120   URLRequestContext* url_request_context_;
    121 
    122   // Buffer that URLRequest writes into.
    123   enum { kBufSize = 4096 };
    124   scoped_refptr<net::IOBuffer> buf_;
    125 
    126   // The next ID to use for |cur_request_| (monotonically increasing).
    127   int next_id_;
    128 
    129   // The current (in progress) request, or NULL.
    130   scoped_ptr<URLRequest> cur_request_;
    131 
    132   // State for current request (only valid when |cur_request_| is not NULL):
    133 
    134   // Unique ID for the current request.
    135   int cur_request_id_;
    136 
    137   // Callback to invoke on completion of the fetch.
    138   CompletionCallback* callback_;
    139 
    140   // Holds the error condition that was hit on the current request, or OK.
    141   int result_code_;
    142 
    143   // Holds the bytes read so far. Will not exceed |max_response_bytes|. This
    144   // buffer is owned by the owner of |callback|.
    145   std::string* result_bytes_;
    146 };
    147 
    148 ProxyScriptFetcherImpl::ProxyScriptFetcherImpl(
    149     URLRequestContext* url_request_context)
    150     : ALLOW_THIS_IN_INITIALIZER_LIST(task_factory_(this)),
    151       url_request_context_(url_request_context),
    152       buf_(new net::IOBuffer(kBufSize)),
    153       next_id_(0),
    154       cur_request_(NULL),
    155       cur_request_id_(0),
    156       callback_(NULL),
    157       result_code_(OK),
    158       result_bytes_(NULL) {
    159   DCHECK(url_request_context);
    160 }
    161 
    162 ProxyScriptFetcherImpl::~ProxyScriptFetcherImpl() {
    163   // The URLRequest's destructor will cancel the outstanding request, and
    164   // ensure that the delegate (this) is not called again.
    165 }
    166 
    167 int ProxyScriptFetcherImpl::Fetch(const GURL& url,
    168                                   std::string* bytes,
    169                                   CompletionCallback* callback) {
    170   // It is invalid to call Fetch() while a request is already in progress.
    171   DCHECK(!cur_request_.get());
    172 
    173   DCHECK(callback);
    174   DCHECK(bytes);
    175 
    176   cur_request_.reset(new URLRequest(url, this));
    177   cur_request_->set_context(url_request_context_);
    178   cur_request_->set_method("GET");
    179 
    180   // Make sure that the PAC script is downloaded using a direct connection,
    181   // to avoid circular dependencies (fetching is a part of proxy resolution).
    182   // Also disable the use of the disk cache. The cache is disabled so that if
    183   // the user switches networks we don't potentially use the cached response
    184   // from old network when we should in fact be re-fetching on the new network.
    185   cur_request_->set_load_flags(LOAD_BYPASS_PROXY | LOAD_DISABLE_CACHE);
    186 
    187   // Save the caller's info for notification on completion.
    188   callback_ = callback;
    189   result_bytes_ = bytes;
    190   result_bytes_->clear();
    191 
    192   // Post a task to timeout this request if it takes too long.
    193   cur_request_id_ = ++next_id_;
    194   MessageLoop::current()->PostDelayedTask(FROM_HERE,
    195       task_factory_.NewRunnableMethod(&ProxyScriptFetcherImpl::OnTimeout,
    196                                       cur_request_id_),
    197       static_cast<int>(max_duration_ms));
    198 
    199   // Start the request.
    200   cur_request_->Start();
    201   return ERR_IO_PENDING;
    202 }
    203 
    204 void ProxyScriptFetcherImpl::Cancel() {
    205   // ResetCurRequestState will free the URLRequest, which will cause
    206   // cancellation.
    207   ResetCurRequestState();
    208 }
    209 
    210 URLRequestContext* ProxyScriptFetcherImpl::GetRequestContext() {
    211   return url_request_context_;
    212 }
    213 
    214 void ProxyScriptFetcherImpl::OnAuthRequired(URLRequest* request,
    215                                             AuthChallengeInfo* auth_info) {
    216   DCHECK(request == cur_request_.get());
    217   // TODO(eroman):
    218   LOG(WARNING) << "Auth required to fetch PAC script, aborting.";
    219   result_code_ = ERR_NOT_IMPLEMENTED;
    220   request->CancelAuth();
    221 }
    222 
    223 void ProxyScriptFetcherImpl::OnSSLCertificateError(URLRequest* request,
    224                                                    int cert_error,
    225                                                    X509Certificate* cert) {
    226   DCHECK(request == cur_request_.get());
    227   LOG(WARNING) << "SSL certificate error when fetching PAC script, aborting.";
    228   // Certificate errors are in same space as net errors.
    229   result_code_ = cert_error;
    230   request->Cancel();
    231 }
    232 
    233 void ProxyScriptFetcherImpl::OnResponseStarted(URLRequest* request) {
    234   DCHECK(request == cur_request_.get());
    235 
    236   if (!request->status().is_success()) {
    237     OnResponseCompleted(request);
    238     return;
    239   }
    240 
    241   // Require HTTP responses to have a success status code.
    242   if (request->url().SchemeIs("http") || request->url().SchemeIs("https")) {
    243     // NOTE about status codes: We are like Firefox 3 in this respect.
    244     // {IE 7, Safari 3, Opera 9.5} do not care about the status code.
    245     if (request->GetResponseCode() != 200) {
    246       LOG(INFO) << "Fetched PAC script had (bad) status line: "
    247                 << request->response_headers()->GetStatusLine();
    248       result_code_ = ERR_PAC_STATUS_NOT_OK;
    249       request->Cancel();
    250       return;
    251     }
    252 
    253     // NOTE about mime types: We do not enforce mime types on PAC files.
    254     // This is for compatibility with {IE 7, Firefox 3, Opera 9.5}. We will
    255     // however log mismatches to help with debugging.
    256     std::string mime_type;
    257     cur_request_->GetMimeType(&mime_type);
    258     if (!IsPacMimeType(mime_type)) {
    259       LOG(INFO) << "Fetched PAC script does not have a proper mime type: "
    260                 << mime_type;
    261     }
    262   }
    263 
    264   ReadBody(request);
    265 }
    266 
    267 void ProxyScriptFetcherImpl::OnReadCompleted(URLRequest* request,
    268                                              int num_bytes) {
    269   DCHECK(request == cur_request_.get());
    270   if (num_bytes > 0) {
    271     // Enforce maximum size bound.
    272     if (num_bytes + result_bytes_->size() >
    273         static_cast<size_t>(max_response_bytes)) {
    274       result_code_ = ERR_FILE_TOO_BIG;
    275       request->Cancel();
    276       return;
    277     }
    278     result_bytes_->append(buf_->data(), num_bytes);
    279     ReadBody(request);
    280   } else {  // Error while reading, or EOF
    281     OnResponseCompleted(request);
    282   }
    283 }
    284 
    285 void ProxyScriptFetcherImpl::OnResponseCompleted(URLRequest* request) {
    286   DCHECK(request == cur_request_.get());
    287 
    288   // Use |result_code_| as the request's error if we have already set it to
    289   // something specific.
    290   if (result_code_ == OK && !request->status().is_success())
    291     result_code_ = request->status().os_error();
    292 
    293   FetchCompleted();
    294 }
    295 
    296 void ProxyScriptFetcherImpl::ReadBody(URLRequest* request) {
    297   int num_bytes;
    298   if (request->Read(buf_, kBufSize, &num_bytes)) {
    299     OnReadCompleted(request, num_bytes);
    300   } else if (!request->status().is_io_pending()) {
    301     // Read failed synchronously.
    302     OnResponseCompleted(request);
    303   }
    304 }
    305 
    306 void ProxyScriptFetcherImpl::FetchCompleted() {
    307   if (result_code_ == OK) {
    308     // The caller expects the response to be encoded as UTF8.
    309     std::string charset;
    310     cur_request_->GetCharset(&charset);
    311     ConvertResponseToUTF8(charset, result_bytes_);
    312   } else {
    313     // On error, the caller expects empty string for bytes.
    314     result_bytes_->clear();
    315   }
    316 
    317   int result_code = result_code_;
    318   CompletionCallback* callback = callback_;
    319 
    320   ResetCurRequestState();
    321 
    322   callback->Run(result_code);
    323 }
    324 
    325 void ProxyScriptFetcherImpl::ResetCurRequestState() {
    326   cur_request_.reset();
    327   cur_request_id_ = 0;
    328   callback_ = NULL;
    329   result_code_ = OK;
    330   result_bytes_ = NULL;
    331 }
    332 
    333 void ProxyScriptFetcherImpl::OnTimeout(int id) {
    334   // Timeout tasks may outlive the URLRequest they reference. Make sure it
    335   // is still applicable.
    336   if (cur_request_id_ != id)
    337     return;
    338 
    339   DCHECK(cur_request_.get());
    340   result_code_ = ERR_TIMED_OUT;
    341   cur_request_->Cancel();
    342 }
    343 
    344 // static
    345 ProxyScriptFetcher* ProxyScriptFetcher::Create(
    346     URLRequestContext* url_request_context) {
    347   return new ProxyScriptFetcherImpl(url_request_context);
    348 }
    349 
    350 // static
    351 int ProxyScriptFetcher::SetTimeoutConstraintForUnittest(
    352     int timeout_ms) {
    353   int prev = max_duration_ms;
    354   max_duration_ms = timeout_ms;
    355   return prev;
    356 }
    357 
    358 // static
    359 size_t ProxyScriptFetcher::SetSizeConstraintForUnittest(size_t size_bytes) {
    360   size_t prev = max_response_bytes;
    361   max_response_bytes = size_bytes;
    362   return prev;
    363 }
    364 
    365 }  // namespace net
    366