Home | History | Annotate | Download | only in safe_browsing
      1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 //
      5 // Helper class which handles communication with the SafeBrowsing backends for
      6 // client-side phishing detection.  This class can be used to get a file
      7 // descriptor to the client-side phishing model and also to send a ping back to
      8 // Google to verify if a particular site is really phishing or not.
      9 //
     10 // This class is not thread-safe and expects all calls to GetModelFile() and
     11 // SendClientReportPhishingRequest() to be made on the UI thread.  We also
     12 // expect that the calling thread runs a message loop and that there is a FILE
     13 // thread running to execute asynchronous file operations.
     14 
     15 #ifndef CHROME_BROWSER_SAFE_BROWSING_CLIENT_SIDE_DETECTION_SERVICE_H_
     16 #define CHROME_BROWSER_SAFE_BROWSING_CLIENT_SIDE_DETECTION_SERVICE_H_
     17 #pragma once
     18 
     19 #include <map>
     20 #include <queue>
     21 #include <string>
     22 #include <utility>
     23 #include <vector>
     24 
     25 #include "base/basictypes.h"
     26 #include "base/callback.h"
     27 #include "base/file_path.h"
     28 #include "base/gtest_prod_util.h"
     29 #include "base/memory/linked_ptr.h"
     30 #include "base/memory/ref_counted.h"
     31 #include "base/memory/scoped_callback_factory.h"
     32 #include "base/memory/scoped_ptr.h"
     33 #include "base/platform_file.h"
     34 #include "base/task.h"
     35 #include "base/time.h"
     36 #include "chrome/common/net/url_fetcher.h"
     37 #include "googleurl/src/gurl.h"
     38 #include "net/base/net_util.h"
     39 
     40 namespace net {
     41 class URLRequestContextGetter;
     42 class URLRequestStatus;
     43 }  // namespace net
     44 
     45 namespace safe_browsing {
     46 class ClientPhishingRequest;
     47 
     48 class ClientSideDetectionService : public URLFetcher::Delegate {
     49  public:
     50   typedef Callback1<base::PlatformFile>::Type OpenModelDoneCallback;
     51 
     52   typedef Callback2<GURL /* phishing URL */, bool /* is phishing */>::Type
     53       ClientReportPhishingRequestCallback;
     54 
     55   virtual ~ClientSideDetectionService();
     56 
     57   // Creates a client-side detection service and starts fetching the client-side
     58   // detection model if necessary.  The model will be stored in |model_path|.
     59   // The caller takes ownership of the object.  This function may return NULL.
     60   static ClientSideDetectionService* Create(
     61       const FilePath& model_path,
     62       net::URLRequestContextGetter* request_context_getter);
     63 
     64   // From the URLFetcher::Delegate interface.
     65   virtual void OnURLFetchComplete(const URLFetcher* source,
     66                                   const GURL& url,
     67                                   const net::URLRequestStatus& status,
     68                                   int response_code,
     69                                   const ResponseCookies& cookies,
     70                                   const std::string& data);
     71 
     72   // Gets the model file descriptor once the model is ready and stored
     73   // on disk.  If there was an error the callback is called and the
     74   // platform file is set to kInvalidPlatformFileValue. The
     75   // ClientSideDetectionService takes ownership of the |callback|.
     76   // The callback is always called after GetModelFile() returns and on the
     77   // same thread as GetModelFile() was called.
     78   void GetModelFile(OpenModelDoneCallback* callback);
     79 
     80   // Sends a request to the SafeBrowsing servers with the ClientPhishingRequest.
     81   // The URL scheme of the |url()| in the request should be HTTP.  This method
     82   // takes ownership of the |verdict| as well as the |callback| and calls the
     83   // the callback once the result has come back from the server or if an error
     84   // occurs during the fetch.  If an error occurs the phishing verdict will
     85   // always be false.  The callback is always called after
     86   // SendClientReportPhishingRequest() returns and on the same thread as
     87   // SendClientReportPhishingRequest() was called.
     88   virtual void SendClientReportPhishingRequest(
     89       ClientPhishingRequest* verdict,
     90       ClientReportPhishingRequestCallback* callback);
     91 
     92   // Returns true if the given IP address string falls within a private
     93   // (unroutable) network block.  Pages which are hosted on these IP addresses
     94   // are exempt from client-side phishing detection.  This is called by the
     95   // ClientSideDetectionHost prior to sending the renderer a
     96   // SafeBrowsingMsg_StartPhishingDetection IPC.
     97   //
     98   // ip_address should be a dotted IPv4 address, or an unbracketed IPv6
     99   // address.
    100   virtual bool IsPrivateIPAddress(const std::string& ip_address) const;
    101 
    102   // Returns true and sets is_phishing if url is in the cache and valid.
    103   virtual bool GetValidCachedResult(const GURL& url, bool* is_phishing);
    104 
    105   // Returns true if the url is in the cache.
    106   virtual bool IsInCache(const GURL& url);
    107 
    108   // Returns true if we have sent more than kMaxReportsPerInterval in the last
    109   // kReportsInterval.
    110   virtual bool OverReportLimit();
    111 
    112  protected:
    113   // Use Create() method to create an instance of this object.
    114   ClientSideDetectionService(
    115       const FilePath& model_path,
    116       net::URLRequestContextGetter* request_context_getter);
    117 
    118  private:
    119   friend class ClientSideDetectionServiceTest;
    120 
    121   enum ModelStatus {
    122     // It's unclear whether or not the model was already fetched.
    123     UNKNOWN_STATUS,
    124     // Model is fetched and is stored on disk.
    125     READY_STATUS,
    126     // Error occured during fetching or writing.
    127     ERROR_STATUS,
    128   };
    129 
    130   // CacheState holds all information necessary to respond to a caller without
    131   // actually making a HTTP request.
    132   struct CacheState {
    133     bool is_phishing;
    134     base::Time timestamp;
    135 
    136     CacheState(bool phish, base::Time time);
    137   };
    138   typedef std::map<GURL, linked_ptr<CacheState> > PhishingCache;
    139 
    140   // A tuple of (IP address block, prefix size) representing a private
    141   // IP address range.
    142   typedef std::pair<net::IPAddressNumber, size_t> AddressRange;
    143 
    144   static const char kClientReportPhishingUrl[];
    145   static const char kClientModelUrl[];
    146   static const int kMaxReportsPerInterval;
    147   static const base::TimeDelta kReportsInterval;
    148   static const base::TimeDelta kNegativeCacheInterval;
    149   static const base::TimeDelta kPositiveCacheInterval;
    150 
    151   // Sets the model status and invokes all the pending callbacks in
    152   // |open_callbacks_| with the current |model_file_| as parameter.
    153   void SetModelStatus(ModelStatus status);
    154 
    155   // Called once the initial open() of the model file is done.  If the file
    156   // exists we're done and we can call all the pending callbacks.  If the
    157   // file doesn't exist this method will asynchronously fetch the model
    158   // from the server by invoking StartFetchingModel().
    159   void OpenModelFileDone(base::PlatformFileError error_code,
    160                          base::PassPlatformFile file,
    161                          bool created);
    162 
    163   // Callback that is invoked once the attempt to create the model
    164   // file on disk is done.  If the file was created successfully we
    165   // start writing the model to disk (asynchronously).  Otherwise, we
    166   // give up and send an invalid platform file to all the pending callbacks.
    167   void CreateModelFileDone(base::PlatformFileError error_code,
    168                            base::PassPlatformFile file,
    169                            bool created);
    170 
    171   // Callback is invoked once we're done writing the model file to disk.
    172   // If everything went well then |model_file_| is a valid platform file
    173   // that can be sent to all the pending callbacks.  If an error occurs
    174   // we give up and send an invalid platform file to all the pending callbacks.
    175   void WriteModelFileDone(base::PlatformFileError error_code,
    176                           int bytes_written);
    177 
    178   // Helper function which closes the |model_file_| if necessary.
    179   void CloseModelFile();
    180 
    181   // Starts sending the request to the client-side detection frontends.
    182   // This method takes ownership of both pointers.
    183   void StartClientReportPhishingRequest(
    184       ClientPhishingRequest* verdict,
    185       ClientReportPhishingRequestCallback* callback);
    186 
    187   // Starts getting the model file.
    188   void StartGetModelFile(OpenModelDoneCallback* callback);
    189 
    190   // Called by OnURLFetchComplete to handle the response from fetching the
    191   // model.
    192   void HandleModelResponse(const URLFetcher* source,
    193                            const GURL& url,
    194                            const net::URLRequestStatus& status,
    195                            int response_code,
    196                            const ResponseCookies& cookies,
    197                            const std::string& data);
    198 
    199   // Called by OnURLFetchComplete to handle the server response from
    200   // sending the client-side phishing request.
    201   void HandlePhishingVerdict(const URLFetcher* source,
    202                              const GURL& url,
    203                              const net::URLRequestStatus& status,
    204                              int response_code,
    205                              const ResponseCookies& cookies,
    206                              const std::string& data);
    207 
    208   // Invalidate cache results which are no longer useful.
    209   void UpdateCache();
    210 
    211   // Get the number of phishing reports that we have sent over kReportsInterval
    212   int GetNumReports();
    213 
    214   // Initializes the |private_networks_| vector with the network blocks
    215   // that we consider non-public IP addresses.  Returns true on success.
    216   bool InitializePrivateNetworks();
    217 
    218   FilePath model_path_;
    219   ModelStatus model_status_;
    220   base::PlatformFile model_file_;
    221   scoped_ptr<URLFetcher> model_fetcher_;
    222   scoped_ptr<std::string> tmp_model_string_;
    223   std::vector<OpenModelDoneCallback*> open_callbacks_;
    224 
    225   // Map of client report phishing request to the corresponding callback that
    226   // has to be invoked when the request is done.
    227   struct ClientReportInfo;
    228   std::map<const URLFetcher*, ClientReportInfo*> client_phishing_reports_;
    229 
    230   // Cache of completed requests. Used to satisfy requests for the same urls
    231   // as long as the next request falls within our caching window (which is
    232   // determined by kNegativeCacheInterval and kPositiveCacheInterval). The
    233   // size of this cache is limited by kMaxReportsPerDay *
    234   // ceil(InDays(max(kNegativeCacheInterval, kPositiveCacheInterval))).
    235   // TODO(gcasto): Serialize this so that it doesn't reset on browser restart.
    236   PhishingCache cache_;
    237 
    238   // Timestamp of when we sent a phishing request. Used to limit the number
    239   // of phishing requests that we send in a day.
    240   // TODO(gcasto): Serialize this so that it doesn't reset on browser restart.
    241   std::queue<base::Time> phishing_report_times_;
    242 
    243   // Used to asynchronously call the callbacks for GetModelFile and
    244   // SendClientReportPhishingRequest.
    245   ScopedRunnableMethodFactory<ClientSideDetectionService> method_factory_;
    246 
    247   // The client-side detection service object (this) might go away before some
    248   // of the callbacks are done (e.g., asynchronous file operations).  The
    249   // callback factory will revoke all pending callbacks if this goes away to
    250   // avoid a crash.
    251   base::ScopedCallbackFactory<ClientSideDetectionService> callback_factory_;
    252 
    253   // The context we use to issue network requests.
    254   scoped_refptr<net::URLRequestContextGetter> request_context_getter_;
    255 
    256   // The network blocks that we consider private IP address ranges.
    257   std::vector<AddressRange> private_networks_;
    258 
    259   DISALLOW_COPY_AND_ASSIGN(ClientSideDetectionService);
    260 };
    261 
    262 }  // namepsace safe_browsing
    263 
    264 #endif  // CHROME_BROWSER_SAFE_BROWSING_CLIENT_SIDE_DETECTION_SERVICE_H_
    265