Home | History | Annotate | Download | only in url_request
      1 // Copyright (c) 2006-2009 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #ifndef NET_URL_REQUEST_URL_REQUEST_JOB_H_
      6 #define NET_URL_REQUEST_URL_REQUEST_JOB_H_
      7 
      8 #include <string>
      9 #include <vector>
     10 
     11 #include "base/ref_counted.h"
     12 #include "base/scoped_ptr.h"
     13 #include "base/time.h"
     14 #include "googleurl/src/gurl.h"
     15 #include "net/base/filter.h"
     16 #include "net/base/load_states.h"
     17 
     18 namespace net {
     19 class AuthChallengeInfo;
     20 class HttpResponseInfo;
     21 class IOBuffer;
     22 class UploadData;
     23 class X509Certificate;
     24 }
     25 
     26 class URLRequest;
     27 class URLRequestStatus;
     28 class URLRequestJobMetrics;
     29 
     30 // The URLRequestJob is using RefCounterThreadSafe because some sub classes
     31 // can be destroyed on multiple threads. This is the case of the
     32 // UrlRequestFileJob.
     33 class URLRequestJob : public base::RefCountedThreadSafe<URLRequestJob>,
     34                       public FilterContext {
     35  public:
     36   // When histogramming results related to SDCH and/or an SDCH latency test, the
     37   // number of packets for which we need to record arrival times so as to
     38   // calculate interpacket latencies.  We currently are only looking at the
     39   // first few packets, as we're monitoring the impact of the initial TCP
     40   // congestion window on stalling of transmissions.
     41   static const size_t kSdchPacketHistogramCount = 5;
     42 
     43   explicit URLRequestJob(URLRequest* request);
     44 
     45   // Returns the request that owns this job. THIS POINTER MAY BE NULL if the
     46   // request was destroyed.
     47   URLRequest* request() const {
     48     return request_;
     49   }
     50 
     51   // Sets the upload data, most requests have no upload data, so this is a NOP.
     52   // Job types supporting upload data will override this.
     53   virtual void SetUpload(net::UploadData* upload) { }
     54 
     55   // Sets extra request headers for Job types that support request headers.
     56   virtual void SetExtraRequestHeaders(const std::string& headers) { }
     57 
     58   // If any error occurs while starting the Job, NotifyStartError should be
     59   // called.
     60   // This helps ensure that all errors follow more similar notification code
     61   // paths, which should simplify testing.
     62   virtual void Start() = 0;
     63 
     64   // This function MUST somehow call NotifyDone/NotifyCanceled or some requests
     65   // will get leaked. Certain callers use that message to know when they can
     66   // delete their URLRequest object, even when doing a cancel. The default Kill
     67   // implementation calls NotifyCanceled, so it is recommended that subclasses
     68   // call URLRequestJob::Kill() after doing any additional work.
     69   //
     70   // The job should endeavor to stop working as soon as is convenient, but must
     71   // not send and complete notifications from inside this function. Instead,
     72   // complete notifications (including "canceled") should be sent from a
     73   // callback run from the message loop.
     74   //
     75   // The job is not obliged to immediately stop sending data in response to
     76   // this call, nor is it obliged to fail with "canceled" unless not all data
     77   // was sent as a result. A typical case would be where the job is almost
     78   // complete and can succeed before the canceled notification can be
     79   // dispatched (from the message loop).
     80   //
     81   // The job should be prepared to receive multiple calls to kill it, but only
     82   // one notification must be issued.
     83   virtual void Kill();
     84 
     85   // Called to detach the request from this Job.  Results in the Job being
     86   // killed off eventually. The job must not use the request pointer any more.
     87   void DetachRequest();
     88 
     89   // Called to read post-filtered data from this Job, returning the number of
     90   // bytes read, 0 when there is no more data, or -1 if there was an error.
     91   // This is just the backend for URLRequest::Read, see that function for more
     92   // info.
     93   bool Read(net::IOBuffer* buf, int buf_size, int *bytes_read);
     94 
     95   // Called to fetch the current load state for the job.
     96   virtual net::LoadState GetLoadState() const { return net::LOAD_STATE_IDLE; }
     97 
     98   // Called to get the upload progress in bytes.
     99   virtual uint64 GetUploadProgress() const { return 0; }
    100 
    101   // Called to fetch the charset for this request.  Only makes sense for some
    102   // types of requests. Returns true on success.  Calling this on a type that
    103   // doesn't have a charset will return false.
    104   virtual bool GetCharset(std::string* charset) { return false; }
    105 
    106   // Called to get response info.
    107   virtual void GetResponseInfo(net::HttpResponseInfo* info) {}
    108 
    109   // Returns the cookie values included in the response, if applicable.
    110   // Returns true if applicable.
    111   // NOTE: This removes the cookies from the job, so it will only return
    112   //       useful results once per job.
    113   virtual bool GetResponseCookies(std::vector<std::string>* cookies) {
    114     return false;
    115   }
    116 
    117   // Called to fetch the encoding types for this request. Only makes sense for
    118   // some types of requests. Returns true on success. Calling this on a request
    119   // that doesn't have or specify an encoding type will return false.
    120   // Returns a array of strings showing the sequential encodings used on the
    121   // content.
    122   // For example, encoding_types[0] = FILTER_TYPE_SDCH and encoding_types[1] =
    123   // FILTER_TYPE_GZIP, means the content was first encoded by sdch, and then
    124   // result was encoded by gzip.  To decode, a series of filters must be applied
    125   // in the reverse order (in the above example, ungzip first, and then sdch
    126   // expand).
    127   virtual bool GetContentEncodings(
    128       std::vector<Filter::FilterType>* encoding_types) {
    129     return false;
    130   }
    131 
    132   // Find out if this is a download.
    133   virtual bool IsDownload() const;
    134 
    135   // Find out if this is a response to a request that advertised an SDCH
    136   // dictionary.  Only makes sense for some types of requests.
    137   virtual bool IsSdchResponse() const { return false; }
    138 
    139   // Called to setup stream filter for this request. An example of filter is
    140   // content encoding/decoding.
    141   void SetupFilter();
    142 
    143   // Called to determine if this response is a redirect.  Only makes sense
    144   // for some types of requests.  This method returns true if the response
    145   // is a redirect, and fills in the location param with the URL of the
    146   // redirect.  The HTTP status code (e.g., 302) is filled into
    147   // |*http_status_code| to signify the type of redirect.
    148   //
    149   // The caller is responsible for following the redirect by setting up an
    150   // appropriate replacement Job. Note that the redirected location may be
    151   // invalid, the caller should be sure it can handle this.
    152   //
    153   // The default implementation inspects the response_info_.
    154   virtual bool IsRedirectResponse(GURL* location, int* http_status_code);
    155 
    156   // Called to determine if it is okay to redirect this job to the specified
    157   // location.  This may be used to implement protocol-specific restrictions.
    158   // If this function returns false, then the URLRequest will fail reporting
    159   // net::ERR_UNSAFE_REDIRECT.
    160   virtual bool IsSafeRedirect(const GURL& location) {
    161     return true;
    162   }
    163 
    164   // Called to determine if this response is asking for authentication.  Only
    165   // makes sense for some types of requests.  The caller is responsible for
    166   // obtaining the credentials passing them to SetAuth.
    167   virtual bool NeedsAuth() { return false; }
    168 
    169   // Fills the authentication info with the server's response.
    170   virtual void GetAuthChallengeInfo(
    171       scoped_refptr<net::AuthChallengeInfo>* auth_info);
    172 
    173   // Resend the request with authentication credentials.
    174   virtual void SetAuth(const std::wstring& username,
    175                        const std::wstring& password);
    176 
    177   // Display the error page without asking for credentials again.
    178   virtual void CancelAuth();
    179 
    180   virtual void ContinueWithCertificate(net::X509Certificate* client_cert);
    181 
    182   // Continue processing the request ignoring the last error.
    183   virtual void ContinueDespiteLastError();
    184 
    185   void FollowDeferredRedirect();
    186 
    187   // Returns true if the Job is done producing response data and has called
    188   // NotifyDone on the request.
    189   bool is_done() const { return done_; }
    190 
    191   // Returns true if the job is doing performance profiling
    192   bool is_profiling() const { return is_profiling_; }
    193 
    194   // Retrieve the performance measurement of the job. The data is encapsulated
    195   // with a URLRequestJobMetrics object. The caller owns this object from now
    196   // on.
    197   URLRequestJobMetrics* RetrieveMetrics();
    198 
    199   // Get/Set expected content size
    200   int64 expected_content_size() const { return expected_content_size_; }
    201   void set_expected_content_size(const int64& size) {
    202     expected_content_size_ = size;
    203   }
    204 
    205   // Whether we have processed the response for that request yet.
    206   bool has_response_started() const { return has_handled_response_; }
    207 
    208   // FilterContext methods:
    209   // These methods are not applicable to all connections.
    210   virtual bool GetMimeType(std::string* mime_type) const { return false; }
    211   virtual bool GetURL(GURL* gurl) const;
    212   virtual base::Time GetRequestTime() const;
    213   virtual bool IsCachedContent() const { return false; }
    214   virtual int64 GetByteReadCount() const;
    215   virtual int GetResponseCode() const { return -1; }
    216   virtual int GetInputStreamBufferSize() const { return kFilterBufSize; }
    217   virtual void RecordPacketStats(StatisticSelector statistic) const;
    218 
    219  protected:
    220   friend class base::RefCountedThreadSafe<URLRequestJob>;
    221   virtual ~URLRequestJob();
    222 
    223   // Notifies the job that headers have been received.
    224   void NotifyHeadersComplete();
    225 
    226   // Notifies the request that the job has completed a Read operation.
    227   void NotifyReadComplete(int bytes_read);
    228 
    229   // Notifies the request that a start error has occurred.
    230   void NotifyStartError(const URLRequestStatus& status);
    231 
    232   // NotifyDone marks when we are done with a request.  It is really
    233   // a glorified set_status, but also does internal state checking and
    234   // job tracking.  It should be called once per request, when the job is
    235   // finished doing all IO.
    236   void NotifyDone(const URLRequestStatus& status);
    237 
    238   // Some work performed by NotifyDone must be completed on a separate task
    239   // so as to avoid re-entering the delegate.  This method exists to perform
    240   // that work.
    241   void CompleteNotifyDone();
    242 
    243   // Used as an asynchronous callback for Kill to notify the URLRequest that
    244   // we were canceled.
    245   void NotifyCanceled();
    246 
    247   // Notifies the job the request should be restarted.
    248   // Should only be called if the job has not started a resposne.
    249   void NotifyRestartRequired();
    250 
    251   // Called to read raw (pre-filtered) data from this Job.
    252   // If returning true, data was read from the job.  buf will contain
    253   // the data, and bytes_read will receive the number of bytes read.
    254   // If returning true, and bytes_read is returned as 0, there is no
    255   // additional data to be read.
    256   // If returning false, an error occurred or an async IO is now pending.
    257   // If async IO is pending, the status of the request will be
    258   // URLRequestStatus::IO_PENDING, and buf must remain available until the
    259   // operation is completed.  See comments on URLRequest::Read for more info.
    260   virtual bool ReadRawData(net::IOBuffer* buf, int buf_size, int *bytes_read);
    261 
    262   // Informs the filter that data has been read into its buffer
    263   void FilteredDataRead(int bytes_read);
    264 
    265   // Reads filtered data from the request.  Returns true if successful,
    266   // false otherwise.  Note, if there is not enough data received to
    267   // return data, this call can issue a new async IO request under
    268   // the hood.
    269   bool ReadFilteredData(int *bytes_read);
    270 
    271   // Facilitate histogramming by turning on packet counting.
    272   // If called more than once, the largest value will be used.
    273   void EnablePacketCounting(size_t max_packets_timed);
    274 
    275   // At or near destruction time, a derived class may request that the filters
    276   // be destroyed so that statistics can be gathered while the derived class is
    277   // still present to assist in calculations.  This is used by URLRequestHttpJob
    278   // to get SDCH to emit stats.
    279   void DestroyFilters() { filter_.reset(); }
    280 
    281   // The request that initiated this job. This value MAY BE NULL if the
    282   // request was released by DetachRequest().
    283   URLRequest* request_;
    284 
    285   // The status of the job.
    286   const URLRequestStatus GetStatus();
    287 
    288   // Set the status of the job.
    289   void SetStatus(const URLRequestStatus& status);
    290 
    291   // Whether the job is doing performance profiling
    292   bool is_profiling_;
    293 
    294   // Contains IO performance measurement when profiling is enabled.
    295   scoped_ptr<URLRequestJobMetrics> metrics_;
    296 
    297  private:
    298   // Size of filter input buffers used by this class.
    299   static const int kFilterBufSize;
    300 
    301   // When data filtering is enabled, this function is used to read data
    302   // for the filter.  Returns true if raw data was read.  Returns false if
    303   // an error occurred (or we are waiting for IO to complete).
    304   bool ReadRawDataForFilter(int *bytes_read);
    305 
    306   // Called in response to a redirect that was not canceled to follow the
    307   // redirect. The current job will be replaced with a new job loading the
    308   // given redirect destination.
    309   void FollowRedirect(const GURL& location, int http_status_code);
    310 
    311   // Updates the profiling info and notifies observers that bytes_read bytes
    312   // have been read.
    313   void RecordBytesRead(int bytes_read);
    314 
    315   // Called to query whether there is data available in the filter to be read
    316   // out.
    317   bool FilterHasData();
    318 
    319   // Record packet arrival times for possible use in histograms.
    320   void UpdatePacketReadTimes();
    321 
    322   // Indicates that the job is done producing data, either it has completed
    323   // all the data or an error has been encountered. Set exclusively by
    324   // NotifyDone so that it is kept in sync with the request.
    325   bool done_;
    326 
    327   // Cache the load flags from request_ because it might go away.
    328   int load_flags_;
    329 
    330   // The data stream filter which is enabled on demand.
    331   scoped_ptr<Filter> filter_;
    332 
    333   // If the filter filled its output buffer, then there is a change that it
    334   // still has internal data to emit, and this flag is set.
    335   bool filter_needs_more_output_space_;
    336 
    337   // When we filter data, we receive data into the filter buffers.  After
    338   // processing the filtered data, we return the data in the caller's buffer.
    339   // While the async IO is in progress, we save the user buffer here, and
    340   // when the IO completes, we fill this in.
    341   net::IOBuffer *read_buffer_;
    342   int read_buffer_len_;
    343 
    344   // Used by HandleResponseIfNecessary to track whether we've sent the
    345   // OnResponseStarted callback and potentially redirect callbacks as well.
    346   bool has_handled_response_;
    347 
    348   // Expected content size
    349   int64 expected_content_size_;
    350 
    351   // Set when a redirect is deferred.
    352   GURL deferred_redirect_url_;
    353   int deferred_redirect_status_code_;
    354 
    355   //----------------------------------------------------------------------------
    356   // Data used for statistics gathering in some instances.  This data is only
    357   // used for histograms etc., and is not required.  It is optionally gathered
    358   // based on the settings of several control variables.
    359 
    360   // Enable recording of packet arrival times for histogramming.
    361   bool packet_timing_enabled_;
    362 
    363   // TODO(jar): improve the quality of the gathered info by gathering most times
    364   // at a lower point in the network stack, assuring we have actual packet
    365   // boundaries, rather than approximations.  Also note that input byte count
    366   // as gathered here is post-SSL, and post-cache-fetch, and does not reflect
    367   // true packet arrival times in such cases.
    368 
    369   // Total number of bytes read from network (or cache) and and typically handed
    370   // to filter to process.  Used to histogram compression ratios, and error
    371   // recovery scenarios in filters.
    372   int64 filter_input_byte_count_;
    373 
    374   // The number of bytes that have been accounted for in packets (where some of
    375   // those packets may possibly have had their time of arrival recorded).
    376   int64 bytes_observed_in_packets_;
    377 
    378   // Limit on the size of the array packet_times_.  This can be set to
    379   // zero, and then no packet times will be gathered.
    380   size_t max_packets_timed_;
    381 
    382   // Arrival times for some of the first few packets.
    383   std::vector<base::Time> packet_times_;
    384 
    385   // The request time may not be available when we are being destroyed, so we
    386   // snapshot it early on.
    387   base::Time request_time_snapshot_;
    388 
    389   // Since we don't save all packet times in packet_times_, we save the
    390   // last time for use in histograms.
    391   base::Time final_packet_time_;
    392 
    393   // The count of the number of packets, some of which may not have been timed.
    394   // We're ignoring overflow, as 1430 x 2^31 is a LOT of bytes.
    395   int observed_packet_count_;
    396 
    397   DISALLOW_COPY_AND_ASSIGN(URLRequestJob);
    398 };
    399 
    400 #endif  // NET_URL_REQUEST_URL_REQUEST_JOB_H_
    401