Home | History | Annotate | Download | only in safe_browsing
      1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #ifndef CHROME_BROWSER_SAFE_BROWSING_PROTOCOL_MANAGER_H_
      6 #define CHROME_BROWSER_SAFE_BROWSING_PROTOCOL_MANAGER_H_
      7 #pragma once
      8 
      9 // A class that implements Chrome's interface with the SafeBrowsing protocol.
     10 // The SafeBrowsingProtocolManager handles formatting and making requests of,
     11 // and handling responses from, Google's SafeBrowsing servers. This class uses
     12 // The SafeBrowsingProtocolParser class to do the actual parsing.
     13 
     14 #include <deque>
     15 #include <set>
     16 #include <string>
     17 #include <vector>
     18 
     19 #include "base/gtest_prod_util.h"
     20 #include "base/hash_tables.h"
     21 #include "base/memory/scoped_ptr.h"
     22 #include "base/time.h"
     23 #include "base/timer.h"
     24 #include "chrome/browser/safe_browsing/chunk_range.h"
     25 #include "chrome/browser/safe_browsing/protocol_parser.h"
     26 #include "chrome/browser/safe_browsing/safe_browsing_service.h"
     27 #include "chrome/browser/safe_browsing/safe_browsing_util.h"
     28 #include "chrome/common/net/url_fetcher.h"
     29 
     30 namespace net {
     31 class URLRequestStatus;
     32 }  // namespace net
     33 
     34 #if defined(COMPILER_GCC)
     35 // Allows us to use URLFetchers in a hash_map with gcc (MSVC is okay without
     36 // specifying this).
     37 namespace __gnu_cxx {
     38 template<>
     39 struct hash<const URLFetcher*> {
     40   size_t operator()(const URLFetcher* fetcher) const {
     41     return reinterpret_cast<size_t>(fetcher);
     42   }
     43 };
     44 }
     45 #endif
     46 
     47 class SafeBrowsingProtocolManager;
     48 // Interface of a factory to create ProtocolManager.  Useful for tests.
     49 class SBProtocolManagerFactory {
     50  public:
     51   SBProtocolManagerFactory() {}
     52   virtual ~SBProtocolManagerFactory() {}
     53   virtual SafeBrowsingProtocolManager* CreateProtocolManager(
     54       SafeBrowsingService* sb_service,
     55       const std::string& client_name,
     56       const std::string& client_key,
     57       const std::string& wrapped_key,
     58       net::URLRequestContextGetter* request_context_getter,
     59       const std::string& info_url_prefix,
     60       const std::string& mackey_url_prefix,
     61       bool disable_auto_update) = 0;
     62  private:
     63   DISALLOW_COPY_AND_ASSIGN(SBProtocolManagerFactory);
     64 };
     65 
     66 class SafeBrowsingProtocolManager : public URLFetcher::Delegate {
     67   FRIEND_TEST_ALL_PREFIXES(SafeBrowsingProtocolManagerTest, TestBackOffTimes);
     68   FRIEND_TEST_ALL_PREFIXES(SafeBrowsingProtocolManagerTest, TestChunkStrings);
     69   FRIEND_TEST_ALL_PREFIXES(SafeBrowsingProtocolManagerTest, TestGetHashUrl);
     70   FRIEND_TEST_ALL_PREFIXES(SafeBrowsingProtocolManagerTest,
     71                            TestGetHashBackOffTimes);
     72   FRIEND_TEST_ALL_PREFIXES(SafeBrowsingProtocolManagerTest, TestMacKeyUrl);
     73   FRIEND_TEST_ALL_PREFIXES(SafeBrowsingProtocolManagerTest,
     74                            TestSafeBrowsingHitUrl);
     75   FRIEND_TEST_ALL_PREFIXES(SafeBrowsingProtocolManagerTest,
     76                            TestMalwareDetailsUrl);
     77   FRIEND_TEST_ALL_PREFIXES(SafeBrowsingProtocolManagerTest, TestNextChunkUrl);
     78   FRIEND_TEST_ALL_PREFIXES(SafeBrowsingProtocolManagerTest, TestUpdateUrl);
     79   friend class SafeBrowsingServiceTest;
     80 
     81  public:
     82   virtual ~SafeBrowsingProtocolManager();
     83 
     84   // Makes the passed |factory| the factory used to instantiate
     85   // a SafeBrowsingService. Useful for tests.
     86   static void RegisterFactory(SBProtocolManagerFactory* factory) {
     87     factory_ = factory;
     88   }
     89 
     90   // Create an instance of the safe browsing service.
     91   static SafeBrowsingProtocolManager* Create(
     92       SafeBrowsingService* sb_service,
     93       const std::string& client_name,
     94       const std::string& client_key,
     95       const std::string& wrapped_key,
     96       net::URLRequestContextGetter* request_context_getter,
     97       const std::string& info_url_prefix,
     98       const std::string& mackey_url_prefix,
     99       bool disable_auto_update);
    100 
    101   // Sets up the update schedule and internal state for making periodic requests
    102   // of the SafeBrowsing service.
    103   virtual void Initialize();
    104 
    105   // URLFetcher::Delegate interface.
    106   virtual void OnURLFetchComplete(const URLFetcher* source,
    107                                   const GURL& url,
    108                                   const net::URLRequestStatus& status,
    109                                   int response_code,
    110                                   const ResponseCookies& cookies,
    111                                   const std::string& data);
    112 
    113   // API used by the SafeBrowsingService for issuing queries. When the results
    114   // are available, SafeBrowsingService::HandleGetHashResults is called.
    115   virtual void GetFullHash(SafeBrowsingService::SafeBrowsingCheck* check,
    116                            const std::vector<SBPrefix>& prefixes);
    117 
    118   // Forces the start of next update after |next_update_msec| in msec.
    119   void ForceScheduleNextUpdate(int next_update_msec);
    120 
    121   // Scheduled update callback.
    122   void GetNextUpdate();
    123 
    124   // Called by the SafeBrowsingService when our request for a list of all chunks
    125   // for each list is done.  If database_error is true, that means the protocol
    126   // manager shouldn't fetch updates since they can't be written to disk.  It
    127   // should try again later to open the database.
    128   void OnGetChunksComplete(const std::vector<SBListChunkRanges>& list,
    129                            bool database_error);
    130 
    131   // Called after the chunks that were parsed were inserted in the database.
    132   void OnChunkInserted();
    133 
    134   // For UMA users we report to Google when a SafeBrowsing interstitial is shown
    135   // to the user.  |threat_type| should be one of the types known by
    136   // SafeBrowsingHitUrl.
    137   void ReportSafeBrowsingHit(const GURL& malicious_url,
    138                              const GURL& page_url,
    139                              const GURL& referrer_url,
    140                              bool is_subresource,
    141                              SafeBrowsingService::UrlCheckResult threat_type,
    142                              const std::string& post_data);
    143 
    144   // Users can opt-in on the SafeBrowsing interstitial to send detailed
    145   // malware reports. |report| is the serialized report.
    146   void ReportMalwareDetails(const std::string& report);
    147 
    148   bool is_initial_request() const { return initial_request_; }
    149 
    150   // The last time we received an update.
    151   base::Time last_update() const { return last_update_; }
    152 
    153   // Setter for additional_query_. To make sure the additional_query_ won't
    154   // be changed in the middle of an update, caller (e.g.: SafeBrowsingService)
    155   // should call this after callbacks triggered in UpdateFinished() or before
    156   // IssueUpdateRequest().
    157   void set_additional_query(const std::string& query) {
    158     additional_query_ = query;
    159   }
    160   const std::string& additional_query() const {
    161     return additional_query_;
    162   }
    163 
    164   // Enumerate failures for histogramming purposes.  DO NOT CHANGE THE
    165   // ORDERING OF THESE VALUES.
    166   enum ResultType {
    167     // 200 response code means that the server recognized the hash
    168     // prefix, while 204 is an empty response indicating that the
    169     // server did not recognize it.
    170     GET_HASH_STATUS_200,
    171     GET_HASH_STATUS_204,
    172 
    173     // Subset of successful responses which returned no full hashes.
    174     // This includes the 204 case, and also 200 responses for stale
    175     // prefixes (deleted at the server but yet deleted on the client).
    176     GET_HASH_FULL_HASH_EMPTY,
    177 
    178     // Subset of successful responses for which one or more of the
    179     // full hashes matched (should lead to an interstitial).
    180     GET_HASH_FULL_HASH_HIT,
    181 
    182     // Subset of successful responses which weren't empty and have no
    183     // matches.  It means that there was a prefix collision which was
    184     // cleared up by the full hashes.
    185     GET_HASH_FULL_HASH_MISS,
    186 
    187     // Memory space for histograms is determined by the max.  ALWAYS
    188     // ADD NEW VALUES BEFORE THIS ONE.
    189     GET_HASH_RESULT_MAX
    190   };
    191 
    192   // Record a GetHash result. |is_download| indicates if the get
    193   // hash is triggered by download related lookup.
    194   static void RecordGetHashResult(bool is_download,
    195                                   ResultType result_type);
    196 
    197  protected:
    198   // Constructs a SafeBrowsingProtocolManager for |sb_service| that issues
    199   // network requests using |request_context_getter|. When |disable_auto_update|
    200   // is true, protocol manager won't schedule next update until
    201   // ForceScheduleNextUpdate is called.
    202   SafeBrowsingProtocolManager(
    203       SafeBrowsingService* sb_service,
    204       const std::string& client_name,
    205       const std::string& client_key,
    206       const std::string& wrapped_key,
    207       net::URLRequestContextGetter* request_context_getter,
    208       const std::string& http_url_prefix,
    209       const std::string& https_url_prefix,
    210       bool disable_auto_update);
    211  private:
    212   friend class SBProtocolManagerFactoryImpl;
    213 
    214   // Internal API for fetching information from the SafeBrowsing servers. The
    215   // GetHash requests are higher priority since they can block user requests
    216   // so are handled separately.
    217   enum SafeBrowsingRequestType {
    218     NO_REQUEST = 0,     // No requests in progress
    219     UPDATE_REQUEST,     // Request for redirect URLs
    220     CHUNK_REQUEST,      // Request for a specific chunk
    221     GETKEY_REQUEST      // Update the client's MAC key
    222   };
    223 
    224   // Composes a URL using |prefix|, |method| (e.g.: gethash, download,
    225   // newkey, report), |client_name| and |version|. When not empty,
    226   // |additional_query| is appended to the URL with an additional "&"
    227   // in the front.
    228   static std::string ComposeUrl(const std::string& prefix,
    229                                 const std::string& method,
    230                                 const std::string& client_name,
    231                                 const std::string& version,
    232                                 const std::string& additional_query);
    233 
    234   // Generates Update URL for querying about the latest set of chunk updates.
    235   // Append "wrkey=xxx" to the URL when |use_mac| is true.
    236   GURL UpdateUrl(bool use_mac) const;
    237   // Generates GetHash request URL for retrieving full hashes.
    238   // Append "wrkey=xxx" to the URL when |use_mac| is true.
    239   GURL GetHashUrl(bool use_mac) const;
    240   // Generates new MAC client key request URL.
    241   GURL MacKeyUrl() const;
    242   // Generates URL for reporting safe browsing hits for UMA users.
    243   GURL SafeBrowsingHitUrl(
    244       const GURL& malicious_url, const GURL& page_url, const GURL& referrer_url,
    245       bool is_subresource,
    246       SafeBrowsingService::UrlCheckResult threat_type) const;
    247   // Generates URL for reporting malware details for users who opt-in.
    248   GURL MalwareDetailsUrl() const;
    249 
    250   // Composes a ChunkUrl based on input string.
    251   GURL NextChunkUrl(const std::string& input) const;
    252 
    253   // Returns the time (in milliseconds) for the next update request. If
    254   // 'back_off' is true, the time returned will increment an error count and
    255   // return the appriate next time (see ScheduleNextUpdate below).
    256   int GetNextUpdateTime(bool back_off);
    257 
    258   // Worker function for calculating GetHash and Update backoff times (in
    259   // seconds). 'Multiplier' is doubled for each consecutive error between the
    260   // 2nd and 5th, and 'error_count' is incremented with each call.
    261   int GetNextBackOffTime(int* error_count, int* multiplier);
    262 
    263   // Manages our update with the next allowable update time. If 'back_off_' is
    264   // true, we must decrease the frequency of requests of the SafeBrowsing
    265   // service according to section 5 of the protocol specification.
    266   // When disable_auto_update_ is set, ScheduleNextUpdate will do nothing.
    267   // ForceScheduleNextUpdate has to be called to trigger the update.
    268   void ScheduleNextUpdate(bool back_off);
    269 
    270   // Sends a request for a list of chunks we should download to the SafeBrowsing
    271   // servers. In order to format this request, we need to send all the chunk
    272   // numbers for each list that we have to the server. Getting the chunk numbers
    273   // requires a database query (run on the database thread), and the request
    274   // is sent upon completion of that query in OnGetChunksComplete.
    275   void IssueUpdateRequest();
    276 
    277   // Sends a request for a chunk to the SafeBrowsing servers.
    278   void IssueChunkRequest();
    279 
    280   // Gets a key from the SafeBrowsing servers for use with MAC. This should only
    281   // be called once per client unless the server directly tells us to update.
    282   void IssueKeyRequest();
    283 
    284   // Formats a string returned from the database into:
    285   //   "list_name;a:<add_chunk_ranges>:s:<sub_chunk_ranges>:mac\n"
    286   static std::string FormatList(const SBListChunkRanges& list, bool use_mac);
    287 
    288   // Runs the protocol parser on received data and update the
    289   // SafeBrowsingService with the new content. Returns 'true' on successful
    290   // parse, 'false' on error.
    291   bool HandleServiceResponse(const GURL& url, const char* data, int length);
    292 
    293   // If the SafeBrowsing service wants us to re-key, we clear our key state and
    294   // issue the request.
    295   void HandleReKey();
    296 
    297   // Updates internal state for each GetHash response error, assuming that the
    298   // current time is |now|.
    299   void HandleGetHashError(const base::Time& now);
    300 
    301   // Helper function for update completion.
    302   void UpdateFinished(bool success);
    303 
    304   // A callback that runs if we timeout waiting for a response to an update
    305   // request. We use this to properly set our update state.
    306   void UpdateResponseTimeout();
    307 
    308  private:
    309   // The factory that controls the creation of SafeBrowsingProtocolManager.
    310   // This is used by tests.
    311   static SBProtocolManagerFactory* factory_;
    312 
    313   // Main SafeBrowsing interface object.
    314   SafeBrowsingService* sb_service_;
    315 
    316   // Current active request (in case we need to cancel) for updates or chunks
    317   // from the SafeBrowsing service. We can only have one of these outstanding
    318   // at any given time unlike GetHash requests, which are tracked separately.
    319   scoped_ptr<URLFetcher> request_;
    320 
    321   // The kind of request that is currently in progress.
    322   SafeBrowsingRequestType request_type_;
    323 
    324   // The number of HTTP response errors, used for request backoff timing.
    325   int update_error_count_;
    326   int gethash_error_count_;
    327 
    328   // Multipliers which double (max == 8) for each error after the second.
    329   int update_back_off_mult_;
    330   int gethash_back_off_mult_;
    331 
    332   // Multiplier between 0 and 1 to spread clients over an interval.
    333   float back_off_fuzz_;
    334 
    335   // The list for which we are make a request.
    336   std::string list_name_;
    337 
    338   // For managing the next earliest time to query the SafeBrowsing servers for
    339   // updates.
    340   int next_update_sec_;
    341   base::OneShotTimer<SafeBrowsingProtocolManager> update_timer_;
    342 
    343   // All chunk requests that need to be made, along with their MAC.
    344   std::deque<ChunkUrl> chunk_request_urls_;
    345 
    346   // Map of GetHash requests.
    347   typedef base::hash_map<const URLFetcher*,
    348                          SafeBrowsingService::SafeBrowsingCheck*> HashRequests;
    349   HashRequests hash_requests_;
    350 
    351   // The next scheduled update has special behavior for the first 2 requests.
    352   enum UpdateRequestState {
    353     FIRST_REQUEST = 0,
    354     SECOND_REQUEST,
    355     NORMAL_REQUEST
    356   };
    357   UpdateRequestState update_state_;
    358 
    359   // We'll attempt to get keys once per browser session if we don't already have
    360   // them. They are not essential to operation, but provide a layer of
    361   // verification.
    362   bool initial_request_;
    363 
    364   // True if the service has been given an add/sub chunk but it hasn't been
    365   // added to the database yet.
    366   bool chunk_pending_to_write_;
    367 
    368   // The keys used for MAC. Empty keys mean we aren't using MAC.
    369   std::string client_key_;
    370   std::string wrapped_key_;
    371 
    372   // The last time we successfully received an update.
    373   base::Time last_update_;
    374 
    375   // While in GetHash backoff, we can't make another GetHash until this time.
    376   base::Time next_gethash_time_;
    377 
    378   // Current product version sent in each request.
    379   std::string version_;
    380 
    381   // Used for measuring chunk request latency.
    382   base::Time chunk_request_start_;
    383 
    384   // Tracks the size of each update (in bytes).
    385   int update_size_;
    386 
    387   // Track outstanding SafeBrowsing report fetchers for clean up.
    388   // We add both "hit" and "detail" fetchers in this set.
    389   std::set<const URLFetcher*> safebrowsing_reports_;
    390 
    391   // The safe browsing client name sent in each request.
    392   std::string client_name_;
    393 
    394   // A string that is appended to the end of URLs for download, gethash,
    395   // newkey, safebrowsing hits and chunk update requests.
    396   std::string additional_query_;
    397 
    398   // The context we use to issue network requests.
    399   scoped_refptr<net::URLRequestContextGetter> request_context_getter_;
    400 
    401   // URL prefix where browser fetches safebrowsing chunk updates, hashes, and
    402   // reports hits to the safebrowsing list for UMA users.
    403   std::string http_url_prefix_;
    404 
    405   // URL prefix where browser fetches MAC client key, and reports detailed
    406   // malware reports for users who opt-in.
    407   std::string https_url_prefix_;
    408 
    409   // When true, protocol manager will not start an update unless
    410   // ForceScheduleNextUpdate() is called. This is set for testing purpose.
    411   bool disable_auto_update_;
    412 
    413   DISALLOW_COPY_AND_ASSIGN(SafeBrowsingProtocolManager);
    414 };
    415 
    416 #endif  // CHROME_BROWSER_SAFE_BROWSING_PROTOCOL_MANAGER_H_
    417