Home | History | Annotate | Download | only in safe_browsing
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 //
      5 // Helper class which handles communication with the SafeBrowsing backends for
      6 // client-side phishing detection.  This class is used to fetch the client-side
      7 // model and send it to all renderers.  This class is also used to send a ping
      8 // back to Google to verify if a particular site is really phishing or not.
      9 //
     10 // This class is not thread-safe and expects all calls to be made on the UI
     11 // thread.  We also expect that the calling thread runs a message loop.
     12 
     13 #ifndef CHROME_BROWSER_SAFE_BROWSING_CLIENT_SIDE_DETECTION_SERVICE_H_
     14 #define CHROME_BROWSER_SAFE_BROWSING_CLIENT_SIDE_DETECTION_SERVICE_H_
     15 
     16 #include <map>
     17 #include <queue>
     18 #include <set>
     19 #include <string>
     20 #include <utility>
     21 #include <vector>
     22 
     23 #include "base/basictypes.h"
     24 #include "base/callback_forward.h"
     25 #include "base/gtest_prod_util.h"
     26 #include "base/memory/linked_ptr.h"
     27 #include "base/memory/ref_counted.h"
     28 #include "base/memory/scoped_ptr.h"
     29 #include "base/memory/weak_ptr.h"
     30 #include "base/time/time.h"
     31 #include "content/public/browser/notification_observer.h"
     32 #include "content/public/browser/notification_registrar.h"
     33 #include "net/base/net_util.h"
     34 #include "net/url_request/url_fetcher_delegate.h"
     35 #include "url/gurl.h"
     36 
     37 class SafeBrowsingService;
     38 
     39 namespace base {
     40 class TimeDelta;
     41 }
     42 
     43 namespace content {
     44 class RenderProcessHost;
     45 }
     46 
     47 namespace net {
     48 class URLFetcher;
     49 class URLRequestContextGetter;
     50 class URLRequestStatus;
     51 typedef std::vector<std::string> ResponseCookies;
     52 }  // namespace net
     53 
     54 namespace safe_browsing {
     55 class ClientMalwareRequest;
     56 class ClientPhishingRequest;
     57 class ClientPhishingResponse;
     58 class ClientSideModel;
     59 
     60 class ClientSideDetectionService : public net::URLFetcherDelegate,
     61                                    public content::NotificationObserver {
     62  public:
     63   // void(GURL phishing_url, bool is_phishing).
     64   typedef base::Callback<void(GURL, bool)> ClientReportPhishingRequestCallback;
     65   // void(GURL original_url, GURL malware_url, bool is_malware).
     66   typedef base::Callback<void(GURL, GURL, bool)>
     67       ClientReportMalwareRequestCallback;
     68 
     69   virtual ~ClientSideDetectionService();
     70 
     71   // Creates a client-side detection service.  The service is initially
     72   // disabled, use SetEnabledAndRefreshState() to start it.  The caller takes
     73   // ownership of the object.  This function may return NULL.
     74   static ClientSideDetectionService* Create(
     75       net::URLRequestContextGetter* request_context_getter);
     76 
     77   // Enables or disables the service, and refreshes the state of all renderers.
     78   // This is usually called by the SafeBrowsingService, which tracks whether
     79   // any profile uses these services at all.  Disabling cancels any pending
     80   // requests; existing ClientSideDetectionHosts will have their callbacks
     81   // called with "false" verdicts.  Enabling starts downloading the model after
     82   // a delay.  In all cases, each render process is updated to match the state
     83   // of the SafeBrowsing preference for that profile.
     84   void SetEnabledAndRefreshState(bool enabled);
     85 
     86   bool enabled() const {
     87     return enabled_;
     88   }
     89 
     90   // From the net::URLFetcherDelegate interface.
     91   virtual void OnURLFetchComplete(const net::URLFetcher* source) OVERRIDE;
     92 
     93   // content::NotificationObserver overrides:
     94   virtual void Observe(int type,
     95                        const content::NotificationSource& source,
     96                        const content::NotificationDetails& details) OVERRIDE;
     97 
     98   // Sends a request to the SafeBrowsing servers with the ClientPhishingRequest.
     99   // The URL scheme of the |url()| in the request should be HTTP.  This method
    100   // takes ownership of the |verdict| as well as the |callback| and calls the
    101   // the callback once the result has come back from the server or if an error
    102   // occurs during the fetch.  If the service is disabled or an error occurs
    103   // the phishing verdict will always be false.  The callback is always called
    104   // after SendClientReportPhishingRequest() returns and on the same thread as
    105   // SendClientReportPhishingRequest() was called.  You may set |callback| to
    106   // NULL if you don't care about the server verdict.
    107   virtual void SendClientReportPhishingRequest(
    108       ClientPhishingRequest* verdict,
    109       const ClientReportPhishingRequestCallback& callback);
    110 
    111   // Similar to above one, instead send ClientMalwareRequest
    112   virtual void SendClientReportMalwareRequest(
    113       ClientMalwareRequest* verdict,
    114       const ClientReportMalwareRequestCallback& callback);
    115 
    116   // Returns true if the given IP address string falls within a private
    117   // (unroutable) network block.  Pages which are hosted on these IP addresses
    118   // are exempt from client-side phishing detection.  This is called by the
    119   // ClientSideDetectionHost prior to sending the renderer a
    120   // SafeBrowsingMsg_StartPhishingDetection IPC.
    121   //
    122   // ip_address should be a dotted IPv4 address, or an unbracketed IPv6
    123   // address.
    124   virtual bool IsPrivateIPAddress(const std::string& ip_address) const;
    125 
    126   // Returns true and sets is_phishing if url is in the cache and valid.
    127   virtual bool GetValidCachedResult(const GURL& url, bool* is_phishing);
    128 
    129   // Returns true if the url is in the cache.
    130   virtual bool IsInCache(const GURL& url);
    131 
    132   // Returns true if we have sent more than kMaxReportsPerInterval phishing
    133   // reports in the last kReportsInterval.
    134   virtual bool OverPhishingReportLimit();
    135 
    136   // Returns true if we have sent more than kMaxReportsPerInterval malware
    137   // reports in the last kReportsInterval.
    138   virtual bool OverMalwareReportLimit();
    139 
    140  protected:
    141   // Use Create() method to create an instance of this object.
    142   explicit ClientSideDetectionService(
    143       net::URLRequestContextGetter* request_context_getter);
    144 
    145   // Enum used to keep stats about why we fail to get the client model.
    146   enum ClientModelStatus {
    147     MODEL_SUCCESS,
    148     MODEL_NOT_CHANGED,
    149     MODEL_FETCH_FAILED,
    150     MODEL_EMPTY,
    151     MODEL_TOO_LARGE,
    152     MODEL_PARSE_ERROR,
    153     MODEL_MISSING_FIELDS,
    154     MODEL_INVALID_VERSION_NUMBER,
    155     MODEL_BAD_HASH_IDS,
    156     MODEL_STATUS_MAX  // Always add new values before this one.
    157   };
    158 
    159   // Starts fetching the model from the network or the cache.  This method
    160   // is called periodically to check whether a new client model is available
    161   // for download.
    162   void StartFetchModel();
    163 
    164   // Schedules the next fetch of the model.
    165   virtual void ScheduleFetchModel(int64 delay_ms);  // Virtual for testing.
    166 
    167   // This method is called when we're done fetching the model either because
    168   // we hit an error somewhere or because we're actually done fetch and
    169   // validating the model.
    170   virtual void EndFetchModel(ClientModelStatus status);  // Virtual for testing.
    171 
    172  private:
    173   friend class ClientSideDetectionServiceTest;
    174   FRIEND_TEST_ALL_PREFIXES(ClientSideDetectionServiceTest, FetchModelTest);
    175   FRIEND_TEST_ALL_PREFIXES(ClientSideDetectionServiceTest, SetBadSubnets);
    176   FRIEND_TEST_ALL_PREFIXES(ClientSideDetectionServiceTest,
    177                            SetEnabledAndRefreshState);
    178   FRIEND_TEST_ALL_PREFIXES(ClientSideDetectionServiceTest, IsBadIpAddress);
    179   FRIEND_TEST_ALL_PREFIXES(ClientSideDetectionServiceTest,
    180                            ModelHasValidHashIds);
    181 
    182   // CacheState holds all information necessary to respond to a caller without
    183   // actually making a HTTP request.
    184   struct CacheState {
    185     bool is_phishing;
    186     base::Time timestamp;
    187 
    188     CacheState(bool phish, base::Time time);
    189   };
    190   typedef std::map<GURL, linked_ptr<CacheState> > PhishingCache;
    191 
    192   // A tuple of (IP address block, prefix size) representing a private
    193   // IP address range.
    194   typedef std::pair<net::IPAddressNumber, size_t> AddressRange;
    195 
    196   // Maps a IPv6 subnet mask to a set of hashed IPv6 subnets.  The IPv6
    197   // subnets are in network order and hashed with sha256.
    198   typedef std::map<std::string /* subnet mask */,
    199                    std::set<std::string /* hashed subnet */> > BadSubnetMap;
    200 
    201   static const char kClientReportMalwareUrl[];
    202   static const char kClientReportPhishingUrl[];
    203   static const char kClientModelUrl[];
    204   static const size_t kMaxModelSizeBytes;
    205   static const int kMaxReportsPerInterval;
    206   static const int kClientModelFetchIntervalMs;
    207   static const int kInitialClientModelFetchDelayMs;
    208   static const int kReportsIntervalDays;
    209   static const int kNegativeCacheIntervalDays;
    210   static const int kPositiveCacheIntervalMinutes;
    211 
    212   // Starts sending the request to the client-side detection frontends.
    213   // This method takes ownership of both pointers.
    214   void StartClientReportPhishingRequest(
    215       ClientPhishingRequest* verdict,
    216       const ClientReportPhishingRequestCallback& callback);
    217 
    218   void StartClientReportMalwareRequest(
    219       ClientMalwareRequest* verdict,
    220       const ClientReportMalwareRequestCallback& callback);
    221 
    222   // Called by OnURLFetchComplete to handle the response from fetching the
    223   // model.
    224   void HandleModelResponse(const net::URLFetcher* source,
    225                            const GURL& url,
    226                            const net::URLRequestStatus& status,
    227                            int response_code,
    228                            const net::ResponseCookies& cookies,
    229                            const std::string& data);
    230 
    231   // Called by OnURLFetchComplete to handle the server response from
    232   // sending the client-side phishing request.
    233   void HandlePhishingVerdict(const net::URLFetcher* source,
    234                              const GURL& url,
    235                              const net::URLRequestStatus& status,
    236                              int response_code,
    237                              const net::ResponseCookies& cookies,
    238                              const std::string& data);
    239 
    240   // Called by OnURLFetchComplete to handle the server response from
    241   // sending the client-side malware request.
    242   void HandleMalwareVerdict(const net::URLFetcher* source,
    243                             const GURL& url,
    244                             const net::URLRequestStatus& status,
    245                             int response_code,
    246                             const net::ResponseCookies& cookies,
    247                             const std::string& data);
    248 
    249   // Invalidate cache results which are no longer useful.
    250   void UpdateCache();
    251 
    252   // Get the number of malware reports that we have sent over kReportsInterval.
    253   int GetMalwareNumReports();
    254 
    255   // Get the number of phishing reports that we have sent over kReportsInterval.
    256   int GetPhishingNumReports();
    257 
    258   // Get the number of reports that we have sent over kReportsInterval, and
    259   // trims off the old elements.
    260   int GetNumReports(std::queue<base::Time>* report_times);
    261 
    262   // Initializes the |private_networks_| vector with the network blocks
    263   // that we consider non-public IP addresses.  Returns true on success.
    264   bool InitializePrivateNetworks();
    265 
    266   // Send the model to the given renderer.
    267   void SendModelToProcess(content::RenderProcessHost* process);
    268 
    269   // Same as above but sends the model to all rendereres.
    270   void SendModelToRenderers();
    271 
    272   // Reads the bad subnets from the client model and inserts them into
    273   // |bad_subnets| for faster lookups.  This method is static to simplify
    274   // testing.
    275   static void SetBadSubnets(const ClientSideModel& model,
    276                             BadSubnetMap* bad_subnets);
    277 
    278 
    279   // Returns true iff all the hash id's in the client-side model point to
    280   // valid hashes in the model.
    281   static bool ModelHasValidHashIds(const ClientSideModel& model);
    282 
    283   // Returns the URL that will be used for phishing requests.
    284   static GURL GetClientReportUrl(const std::string& report_url);
    285 
    286   // Whether the service is running or not.  When the service is not running,
    287   // it won't download the model nor report detected phishing URLs.
    288   bool enabled_;
    289 
    290   std::string model_str_;
    291   scoped_ptr<ClientSideModel> model_;
    292   scoped_ptr<base::TimeDelta> model_max_age_;
    293   scoped_ptr<net::URLFetcher> model_fetcher_;
    294 
    295   // Map of client report phishing request to the corresponding callback that
    296   // has to be invoked when the request is done.
    297   struct ClientReportInfo;
    298   std::map<const net::URLFetcher*, ClientReportInfo*>
    299       client_phishing_reports_;
    300   // Map of client malware ip request to the corresponding callback that
    301   // has to be invoked when the request is done.
    302   struct ClientMalwareReportInfo;
    303   std::map<const net::URLFetcher*, ClientMalwareReportInfo*>
    304       client_malware_reports_;
    305 
    306   // Cache of completed requests. Used to satisfy requests for the same urls
    307   // as long as the next request falls within our caching window (which is
    308   // determined by kNegativeCacheInterval and kPositiveCacheInterval). The
    309   // size of this cache is limited by kMaxReportsPerDay *
    310   // ceil(InDays(max(kNegativeCacheInterval, kPositiveCacheInterval))).
    311   // TODO(gcasto): Serialize this so that it doesn't reset on browser restart.
    312   PhishingCache cache_;
    313 
    314   // Timestamp of when we sent a phishing request. Used to limit the number
    315   // of phishing requests that we send in a day.
    316   // TODO(gcasto): Serialize this so that it doesn't reset on browser restart.
    317   std::queue<base::Time> phishing_report_times_;
    318 
    319   // Timestamp of when we sent a malware request. Used to limit the number
    320   // of malware requests that we send in a day.
    321   std::queue<base::Time> malware_report_times_;
    322 
    323   // Used to asynchronously call the callbacks for
    324   // SendClientReportPhishingRequest.
    325   base::WeakPtrFactory<ClientSideDetectionService> weak_factory_;
    326 
    327   // The context we use to issue network requests.
    328   scoped_refptr<net::URLRequestContextGetter> request_context_getter_;
    329 
    330   // The network blocks that we consider private IP address ranges.
    331   std::vector<AddressRange> private_networks_;
    332 
    333   // Map of bad subnets which are copied from the client model and put into
    334   // this map to speed up lookups.
    335   BadSubnetMap bad_subnets_;
    336 
    337   content::NotificationRegistrar registrar_;
    338 
    339   DISALLOW_COPY_AND_ASSIGN(ClientSideDetectionService);
    340 };
    341 }  // namespace safe_browsing
    342 
    343 #endif  // CHROME_BROWSER_SAFE_BROWSING_CLIENT_SIDE_DETECTION_SERVICE_H_
    344