Home | History | Annotate | Download | only in safe_browsing
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #ifndef CHROME_BROWSER_SAFE_BROWSING_PROTOCOL_MANAGER_H_
      6 #define CHROME_BROWSER_SAFE_BROWSING_PROTOCOL_MANAGER_H_
      7 
      8 // A class that implements Chrome's interface with the SafeBrowsing protocol.
      9 // See https://developers.google.com/safe-browsing/developers_guide_v2 for
     10 // protocol details.
     11 //
     12 // The SafeBrowsingProtocolManager handles formatting and making requests of,
     13 // and handling responses from, Google's SafeBrowsing servers. This class uses
     14 // The SafeBrowsingProtocolParser class to do the actual parsing.
     15 
     16 #include <deque>
     17 #include <set>
     18 #include <string>
     19 #include <vector>
     20 
     21 #include "base/containers/hash_tables.h"
     22 #include "base/gtest_prod_util.h"
     23 #include "base/memory/scoped_ptr.h"
     24 #include "base/threading/non_thread_safe.h"
     25 #include "base/time/time.h"
     26 #include "base/timer/timer.h"
     27 #include "chrome/browser/safe_browsing/chunk_range.h"
     28 #include "chrome/browser/safe_browsing/protocol_manager_helper.h"
     29 #include "chrome/browser/safe_browsing/protocol_parser.h"
     30 #include "chrome/browser/safe_browsing/safe_browsing_util.h"
     31 #include "net/url_request/url_fetcher_delegate.h"
     32 #include "url/gurl.h"
     33 
     34 namespace net {
     35 class URLFetcher;
     36 class URLRequestContextGetter;
     37 }  // namespace net
     38 
     39 #if defined(COMPILER_GCC)
     40 // Allows us to use URLFetchers in a hash_map with gcc (MSVC is okay without
     41 // specifying this).
     42 namespace BASE_HASH_NAMESPACE {
     43 template<>
     44 struct hash<const net::URLFetcher*> {
     45   size_t operator()(const net::URLFetcher* fetcher) const {
     46     return reinterpret_cast<size_t>(fetcher);
     47   }
     48 };
     49 }
     50 #endif
     51 
     52 class SBProtocolManagerFactory;
     53 class SafeBrowsingProtocolManagerDelegate;
     54 
     55 class SafeBrowsingProtocolManager : public net::URLFetcherDelegate,
     56                                     public base::NonThreadSafe {
     57  public:
     58   // FullHashCallback is invoked when GetFullHash completes.
     59   // Parameters:
     60   //   - The vector of full hash results. If empty, indicates that there
     61   //     were no matches, and that the resource is safe.
     62   //   - The cache lifetime of the result. A lifetime of 0 indicates the results
     63   //     should not be cached.
     64   typedef base::Callback<void(const std::vector<SBFullHashResult>&,
     65                               const base::TimeDelta&)> FullHashCallback;
     66 
     67   virtual ~SafeBrowsingProtocolManager();
     68 
     69   // Makes the passed |factory| the factory used to instantiate
     70   // a SafeBrowsingService. Useful for tests.
     71   static void RegisterFactory(SBProtocolManagerFactory* factory) {
     72     factory_ = factory;
     73   }
     74 
     75   // Create an instance of the safe browsing protocol manager.
     76   static SafeBrowsingProtocolManager* Create(
     77       SafeBrowsingProtocolManagerDelegate* delegate,
     78       net::URLRequestContextGetter* request_context_getter,
     79       const SafeBrowsingProtocolConfig& config);
     80 
     81   // Sets up the update schedule and internal state for making periodic requests
     82   // of the Safebrowsing servers.
     83   virtual void Initialize();
     84 
     85   // net::URLFetcherDelegate interface.
     86   virtual void OnURLFetchComplete(const net::URLFetcher* source) OVERRIDE;
     87 
     88   // Retrieve the full hash for a set of prefixes, and invoke the callback
     89   // argument when the results are retrieved. The callback may be invoked
     90   // synchronously.
     91   virtual void GetFullHash(const std::vector<SBPrefix>& prefixes,
     92                            FullHashCallback callback,
     93                            bool is_download);
     94 
     95   // Forces the start of next update after |interval| time.
     96   void ForceScheduleNextUpdate(base::TimeDelta interval);
     97 
     98   // Scheduled update callback.
     99   void GetNextUpdate();
    100 
    101   // Called by the SafeBrowsingService when our request for a list of all chunks
    102   // for each list is done.  If database_error is true, that means the protocol
    103   // manager shouldn't fetch updates since they can't be written to disk.  It
    104   // should try again later to open the database.
    105   void OnGetChunksComplete(const std::vector<SBListChunkRanges>& list,
    106                            bool database_error);
    107 
    108   // The last time we received an update.
    109   base::Time last_update() const { return last_update_; }
    110 
    111   // Setter for additional_query_. To make sure the additional_query_ won't
    112   // be changed in the middle of an update, caller (e.g.: SafeBrowsingService)
    113   // should call this after callbacks triggered in UpdateFinished() or before
    114   // IssueUpdateRequest().
    115   void set_additional_query(const std::string& query) {
    116     additional_query_ = query;
    117   }
    118   const std::string& additional_query() const {
    119     return additional_query_;
    120   }
    121 
    122   // Enumerate failures for histogramming purposes.  DO NOT CHANGE THE
    123   // ORDERING OF THESE VALUES.
    124   enum ResultType {
    125     // 200 response code means that the server recognized the hash
    126     // prefix, while 204 is an empty response indicating that the
    127     // server did not recognize it.
    128     GET_HASH_STATUS_200,
    129     GET_HASH_STATUS_204,
    130 
    131     // Subset of successful responses which returned no full hashes.
    132     // This includes the STATUS_204 case, and the *_ERROR cases.
    133     GET_HASH_FULL_HASH_EMPTY,
    134 
    135     // Subset of successful responses for which one or more of the
    136     // full hashes matched (should lead to an interstitial).
    137     GET_HASH_FULL_HASH_HIT,
    138 
    139     // Subset of successful responses which weren't empty and have no
    140     // matches.  It means that there was a prefix collision which was
    141     // cleared up by the full hashes.
    142     GET_HASH_FULL_HASH_MISS,
    143 
    144     // Subset of successful responses where the response body wasn't parsable.
    145     GET_HASH_PARSE_ERROR,
    146 
    147     // Gethash request failed (network error).
    148     GET_HASH_NETWORK_ERROR,
    149 
    150     // Gethash request returned HTTP result code other than 200 or 204.
    151     GET_HASH_HTTP_ERROR,
    152 
    153     // Gethash attempted during error backoff, no request sent.
    154     GET_HASH_BACKOFF_ERROR,
    155 
    156     // Memory space for histograms is determined by the max.  ALWAYS
    157     // ADD NEW VALUES BEFORE THIS ONE.
    158     GET_HASH_RESULT_MAX
    159   };
    160 
    161   // Record a GetHash result. |is_download| indicates if the get
    162   // hash is triggered by download related lookup.
    163   static void RecordGetHashResult(bool is_download,
    164                                   ResultType result_type);
    165 
    166   // Returns whether another update is currently scheduled.
    167   bool IsUpdateScheduled() const;
    168 
    169   // Called when app changes status of foreground or background.
    170   void SetAppInForeground(bool foreground) {
    171     app_in_foreground_ = foreground;
    172   }
    173 
    174  protected:
    175   // Constructs a SafeBrowsingProtocolManager for |delegate| that issues
    176   // network requests using |request_context_getter|.
    177   SafeBrowsingProtocolManager(
    178       SafeBrowsingProtocolManagerDelegate* delegate,
    179       net::URLRequestContextGetter* request_context_getter,
    180       const SafeBrowsingProtocolConfig& config);
    181 
    182  private:
    183   FRIEND_TEST_ALL_PREFIXES(SafeBrowsingProtocolManagerTest, TestBackOffTimes);
    184   FRIEND_TEST_ALL_PREFIXES(SafeBrowsingProtocolManagerTest, TestChunkStrings);
    185   FRIEND_TEST_ALL_PREFIXES(SafeBrowsingProtocolManagerTest, TestGetHashUrl);
    186   FRIEND_TEST_ALL_PREFIXES(SafeBrowsingProtocolManagerTest,
    187                            TestGetHashBackOffTimes);
    188   FRIEND_TEST_ALL_PREFIXES(SafeBrowsingProtocolManagerTest, TestNextChunkUrl);
    189   FRIEND_TEST_ALL_PREFIXES(SafeBrowsingProtocolManagerTest, TestUpdateUrl);
    190   friend class SafeBrowsingServerTest;
    191   friend class SBProtocolManagerFactoryImpl;
    192 
    193   // Internal API for fetching information from the SafeBrowsing servers. The
    194   // GetHash requests are higher priority since they can block user requests
    195   // so are handled separately.
    196   enum SafeBrowsingRequestType {
    197     NO_REQUEST = 0,     // No requests in progress
    198     UPDATE_REQUEST,     // Request for redirect URLs
    199     BACKUP_UPDATE_REQUEST, // Request for redirect URLs to a backup URL.
    200     CHUNK_REQUEST,      // Request for a specific chunk
    201   };
    202 
    203   // Which type of backup update request is being used.
    204   enum BackupUpdateReason {
    205     BACKUP_UPDATE_REASON_CONNECT,
    206     BACKUP_UPDATE_REASON_HTTP,
    207     BACKUP_UPDATE_REASON_NETWORK,
    208     BACKUP_UPDATE_REASON_MAX,
    209   };
    210 
    211   // Generates Update URL for querying about the latest set of chunk updates.
    212   GURL UpdateUrl() const;
    213 
    214   // Generates backup Update URL for querying about the latest set of chunk
    215   // updates. |url_prefix| is the base prefix to use.
    216   GURL BackupUpdateUrl(BackupUpdateReason reason) const;
    217 
    218   // Generates GetHash request URL for retrieving full hashes.
    219   GURL GetHashUrl() const;
    220   // Generates URL for reporting safe browsing hits for UMA users.
    221 
    222   // Composes a ChunkUrl based on input string.
    223   GURL NextChunkUrl(const std::string& input) const;
    224 
    225   // Returns the time for the next update request. If |back_off| is true,
    226   // the time returned will increment an error count and return the appriate
    227   // next time (see ScheduleNextUpdate below).
    228   base::TimeDelta GetNextUpdateInterval(bool back_off);
    229 
    230   // Worker function for calculating GetHash and Update backoff times (in
    231   // seconds). |multiplier| is doubled for each consecutive error between the
    232   // 2nd and 5th, and |error_count| is incremented with each call.
    233   base::TimeDelta GetNextBackOffInterval(size_t* error_count,
    234                                          size_t* multiplier) const;
    235 
    236   // Manages our update with the next allowable update time. If 'back_off_' is
    237   // true, we must decrease the frequency of requests of the SafeBrowsing
    238   // service according to section 5 of the protocol specification.
    239   // When disable_auto_update_ is set, ScheduleNextUpdate will do nothing.
    240   // ForceScheduleNextUpdate has to be called to trigger the update.
    241   void ScheduleNextUpdate(bool back_off);
    242 
    243   // Sends a request for a list of chunks we should download to the SafeBrowsing
    244   // servers. In order to format this request, we need to send all the chunk
    245   // numbers for each list that we have to the server. Getting the chunk numbers
    246   // requires a database query (run on the database thread), and the request
    247   // is sent upon completion of that query in OnGetChunksComplete.
    248   void IssueUpdateRequest();
    249 
    250   // Sends a backup request for a list of chunks to download, when the primary
    251   // update request failed. |reason| specifies why the backup is needed. Unlike
    252   // the primary IssueUpdateRequest, this does not need to hit the local
    253   // SafeBrowsing database since the existing chunk numbers are remembered from
    254   // the primary update request. Returns whether the backup request was issued -
    255   // this may be false in cases where there is not a prefix specified.
    256   bool IssueBackupUpdateRequest(BackupUpdateReason reason);
    257 
    258   // Sends a request for a chunk to the SafeBrowsing servers.
    259   void IssueChunkRequest();
    260 
    261   // Runs the protocol parser on received data and update the
    262   // SafeBrowsingService with the new content. Returns 'true' on successful
    263   // parse, 'false' on error.
    264   bool HandleServiceResponse(const GURL& url, const char* data, size_t length);
    265 
    266   // Updates internal state for each GetHash response error, assuming that the
    267   // current time is |now|.
    268   void HandleGetHashError(const base::Time& now);
    269 
    270   // Helper function for update completion.
    271   void UpdateFinished(bool success);
    272   void UpdateFinished(bool success, bool back_off);
    273 
    274   // A callback that runs if we timeout waiting for a response to an update
    275   // request. We use this to properly set our update state.
    276   void UpdateResponseTimeout();
    277 
    278   // Called after the chunks are added to the database.
    279   void OnAddChunksComplete();
    280 
    281  private:
    282   // Map of GetHash requests to parameters which created it.
    283   struct FullHashDetails {
    284     FullHashDetails();
    285     FullHashDetails(FullHashCallback callback, bool is_download);
    286     ~FullHashDetails();
    287 
    288     FullHashCallback callback;
    289     bool is_download;
    290   };
    291   typedef base::hash_map<const net::URLFetcher*, FullHashDetails> HashRequests;
    292 
    293   // The factory that controls the creation of SafeBrowsingProtocolManager.
    294   // This is used by tests.
    295   static SBProtocolManagerFactory* factory_;
    296 
    297   // Our delegate.
    298   SafeBrowsingProtocolManagerDelegate* delegate_;
    299 
    300   // Current active request (in case we need to cancel) for updates or chunks
    301   // from the SafeBrowsing service. We can only have one of these outstanding
    302   // at any given time unlike GetHash requests, which are tracked separately.
    303   scoped_ptr<net::URLFetcher> request_;
    304 
    305   // The kind of request that is currently in progress.
    306   SafeBrowsingRequestType request_type_;
    307 
    308   // The number of HTTP response errors, used for request backoff timing.
    309   size_t update_error_count_;
    310   size_t gethash_error_count_;
    311 
    312   // Multipliers which double (max == 8) for each error after the second.
    313   size_t update_back_off_mult_;
    314   size_t gethash_back_off_mult_;
    315 
    316   // Multiplier between 0 and 1 to spread clients over an interval.
    317   float back_off_fuzz_;
    318 
    319   // The list for which we are make a request.
    320   std::string list_name_;
    321 
    322   // For managing the next earliest time to query the SafeBrowsing servers for
    323   // updates.
    324   base::TimeDelta next_update_interval_;
    325   base::OneShotTimer<SafeBrowsingProtocolManager> update_timer_;
    326 
    327   // timeout_timer_ is used to interrupt update requests which are taking
    328   // too long.
    329   base::OneShotTimer<SafeBrowsingProtocolManager> timeout_timer_;
    330 
    331   // All chunk requests that need to be made.
    332   std::deque<ChunkUrl> chunk_request_urls_;
    333 
    334   HashRequests hash_requests_;
    335 
    336   // The next scheduled update has special behavior for the first 2 requests.
    337   enum UpdateRequestState {
    338     FIRST_REQUEST = 0,
    339     SECOND_REQUEST,
    340     NORMAL_REQUEST
    341   };
    342   UpdateRequestState update_state_;
    343 
    344   // True if the service has been given an add/sub chunk but it hasn't been
    345   // added to the database yet.
    346   bool chunk_pending_to_write_;
    347 
    348   // The last time we successfully received an update.
    349   base::Time last_update_;
    350 
    351   // While in GetHash backoff, we can't make another GetHash until this time.
    352   base::Time next_gethash_time_;
    353 
    354   // Current product version sent in each request.
    355   std::string version_;
    356 
    357   // Used for measuring chunk request latency.
    358   base::Time chunk_request_start_;
    359 
    360   // Tracks the size of each update (in bytes).
    361   size_t update_size_;
    362 
    363   // The safe browsing client name sent in each request.
    364   std::string client_name_;
    365 
    366   // A string that is appended to the end of URLs for download, gethash,
    367   // safebrowsing hits and chunk update requests.
    368   std::string additional_query_;
    369 
    370   // The context we use to issue network requests.
    371   scoped_refptr<net::URLRequestContextGetter> request_context_getter_;
    372 
    373   // URL prefix where browser fetches safebrowsing chunk updates, and hashes.
    374   std::string url_prefix_;
    375 
    376   // Backup URL prefixes for updates.
    377   std::string backup_url_prefixes_[BACKUP_UPDATE_REASON_MAX];
    378 
    379   // The current reason why the backup update request is happening.
    380   BackupUpdateReason backup_update_reason_;
    381 
    382   // Data to POST when doing an update.
    383   std::string update_list_data_;
    384 
    385   // When true, protocol manager will not start an update unless
    386   // ForceScheduleNextUpdate() is called. This is set for testing purpose.
    387   bool disable_auto_update_;
    388 
    389   // ID for URLFetchers for testing.
    390   int url_fetcher_id_;
    391 
    392   // Whether the app is in foreground or background.
    393   bool app_in_foreground_;
    394 
    395   DISALLOW_COPY_AND_ASSIGN(SafeBrowsingProtocolManager);
    396 };
    397 
    398 // Interface of a factory to create ProtocolManager.  Useful for tests.
    399 class SBProtocolManagerFactory {
    400  public:
    401   SBProtocolManagerFactory() {}
    402   virtual ~SBProtocolManagerFactory() {}
    403   virtual SafeBrowsingProtocolManager* CreateProtocolManager(
    404       SafeBrowsingProtocolManagerDelegate* delegate,
    405       net::URLRequestContextGetter* request_context_getter,
    406       const SafeBrowsingProtocolConfig& config) = 0;
    407  private:
    408   DISALLOW_COPY_AND_ASSIGN(SBProtocolManagerFactory);
    409 };
    410 
    411 // Delegate interface for the SafeBrowsingProtocolManager.
    412 class SafeBrowsingProtocolManagerDelegate {
    413  public:
    414   typedef base::Callback<void(const std::vector<SBListChunkRanges>&, bool)>
    415       GetChunksCallback;
    416   typedef base::Callback<void(void)> AddChunksCallback;
    417 
    418   virtual ~SafeBrowsingProtocolManagerDelegate();
    419 
    420   // |UpdateStarted()| is called just before the SafeBrowsing update protocol
    421   // has begun.
    422   virtual void UpdateStarted() = 0;
    423 
    424   // |UpdateFinished()| is called just after the SafeBrowsing update protocol
    425   // has completed.
    426   virtual void UpdateFinished(bool success) = 0;
    427 
    428   // Wipe out the local database. The SafeBrowsing server can request this.
    429   virtual void ResetDatabase() = 0;
    430 
    431   // Retrieve all the local database chunks, and invoke |callback| with the
    432   // results. The SafeBrowsingProtocolManagerDelegate must only invoke the
    433   // callback if the SafeBrowsingProtocolManager is still alive. Only one call
    434   // may be made to GetChunks at a time.
    435   virtual void GetChunks(GetChunksCallback callback) = 0;
    436 
    437   // Add new chunks to the database. Invokes |callback| when complete, but must
    438   // call at a later time.
    439   virtual void AddChunks(const std::string& list,
    440                          scoped_ptr<ScopedVector<SBChunkData> > chunks,
    441                          AddChunksCallback callback) = 0;
    442 
    443   // Delete chunks from the database.
    444   virtual void DeleteChunks(
    445       scoped_ptr<std::vector<SBChunkDelete> > chunk_deletes) = 0;
    446 };
    447 
    448 #endif  // CHROME_BROWSER_SAFE_BROWSING_PROTOCOL_MANAGER_H_
    449