Home | History | Annotate | Download | only in safe_browsing
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 //
      5 // The Safe Browsing service is responsible for downloading anti-phishing and
      6 // anti-malware tables and checking urls against them.
      7 
      8 #ifndef CHROME_BROWSER_SAFE_BROWSING_DATABASE_MANAGER_H_
      9 #define CHROME_BROWSER_SAFE_BROWSING_DATABASE_MANAGER_H_
     10 
     11 #include <deque>
     12 #include <map>
     13 #include <set>
     14 #include <string>
     15 #include <vector>
     16 
     17 #include "base/callback.h"
     18 #include "base/containers/hash_tables.h"
     19 #include "base/memory/ref_counted.h"
     20 #include "base/memory/scoped_ptr.h"
     21 #include "base/synchronization/lock.h"
     22 #include "base/time/time.h"
     23 #include "chrome/browser/safe_browsing/protocol_manager.h"
     24 #include "chrome/browser/safe_browsing/safe_browsing_util.h"
     25 #include "url/gurl.h"
     26 
     27 class SafeBrowsingService;
     28 class SafeBrowsingDatabase;
     29 
     30 namespace base {
     31 class Thread;
     32 }
     33 
     34 namespace net {
     35 class URLRequestContext;
     36 class URLRequestContextGetter;
     37 }
     38 
     39 namespace safe_browsing {
     40 class ClientSideDetectionService;
     41 class DownloadProtectionService;
     42 }
     43 
     44 // Construction needs to happen on the main thread.
     45 class SafeBrowsingDatabaseManager
     46     : public base::RefCountedThreadSafe<SafeBrowsingDatabaseManager>,
     47       public SafeBrowsingProtocolManagerDelegate {
     48  public:
     49   class Client;
     50 
     51   // Bundle of SafeBrowsing state while performing a URL or hash prefix check.
     52   struct SafeBrowsingCheck {
     53     // |check_type| should correspond to the type of item that is being
     54     // checked, either a URL or a binary hash/URL. We store this for two
     55     // purposes: to know which of Client's methods to call when a result is
     56     // known, and for logging purposes. It *isn't* used to predict the response
     57     // list type, that is information that the server gives us.
     58     SafeBrowsingCheck(const std::vector<GURL>& urls,
     59                       const std::vector<SBFullHash>& full_hashes,
     60                       Client* client,
     61                       safe_browsing_util::ListType check_type,
     62                       const std::vector<SBThreatType>& expected_threats);
     63     ~SafeBrowsingCheck();
     64 
     65     // Either |urls| or |full_hashes| is used to lookup database. |*_results|
     66     // are parallel vectors containing the results. They are initialized to
     67     // contain SB_THREAT_TYPE_SAFE.
     68     std::vector<GURL> urls;
     69     std::vector<SBThreatType> url_results;
     70     std::vector<std::string> url_metadata;
     71     std::vector<SBFullHash> full_hashes;
     72     std::vector<SBThreatType> full_hash_results;
     73 
     74     Client* client;
     75     bool need_get_hash;
     76     base::TimeTicks start;  // When check was sent to SB service.
     77     safe_browsing_util::ListType check_type;  // See comment in constructor.
     78     std::vector<SBThreatType> expected_threats;
     79     std::vector<SBPrefix> prefix_hits;
     80     std::vector<SBFullHashResult> cache_hits;
     81 
     82     // Vends weak pointers for TimeoutCallback().  If the response is
     83     // received before the timeout fires, factory is destructed and
     84     // the timeout won't be fired.
     85     // TODO(lzheng): We should consider to use this time out check
     86     // for browsing too (instead of implementin in
     87     // safe_browsing_resource_handler.cc).
     88     scoped_ptr<base::WeakPtrFactory<
     89         SafeBrowsingDatabaseManager> > timeout_factory_;
     90 
     91    private:
     92     DISALLOW_COPY_AND_ASSIGN(SafeBrowsingCheck);
     93   };
     94 
     95   class Client {
     96    public:
     97     void OnSafeBrowsingResult(const SafeBrowsingCheck& check);
     98 
     99    protected:
    100     virtual ~Client() {}
    101 
    102     // Called when the result of checking a browse URL is known.
    103     virtual void OnCheckBrowseUrlResult(const GURL& url,
    104                                         SBThreatType threat_type,
    105                                         const std::string& metadata) {}
    106 
    107     // Called when the result of checking a download URL is known.
    108     virtual void OnCheckDownloadUrlResult(const std::vector<GURL>& url_chain,
    109                                           SBThreatType threat_type) {}
    110 
    111     // Called when the result of checking a set of extensions is known.
    112     virtual void OnCheckExtensionsResult(
    113         const std::set<std::string>& threats) {}
    114   };
    115 
    116   // Creates the safe browsing service.  Need to initialize before using.
    117   explicit SafeBrowsingDatabaseManager(
    118       const scoped_refptr<SafeBrowsingService>& service);
    119 
    120   // Returns true if the url's scheme can be checked.
    121   bool CanCheckUrl(const GURL& url) const;
    122 
    123   // Returns whether download protection is enabled.
    124   bool download_protection_enabled() const {
    125     return enable_download_protection_;
    126   }
    127 
    128   // Called on the IO thread to check if the given url is safe or not.  If we
    129   // can synchronously determine that the url is safe, CheckUrl returns true.
    130   // Otherwise it returns false, and "client" is called asynchronously with the
    131   // result when it is ready.
    132   virtual bool CheckBrowseUrl(const GURL& url, Client* client);
    133 
    134   // Check if the prefix for |url| is in safebrowsing download add lists.
    135   // Result will be passed to callback in |client|.
    136   virtual bool CheckDownloadUrl(const std::vector<GURL>& url_chain,
    137                                 Client* client);
    138 
    139   // Check which prefixes in |extension_ids| are in the safebrowsing blacklist.
    140   // Returns true if not, false if further checks need to be made in which case
    141   // the result will be passed to |client|.
    142   virtual bool CheckExtensionIDs(const std::set<std::string>& extension_ids,
    143                                  Client* client);
    144 
    145   // Check if the given url is on the side-effect free whitelist.
    146   // Can be called on any thread. Returns false if the check cannot be performed
    147   // (e.g. because we are disabled or because of an invalid scheme in the URL).
    148   // Otherwise, returns true if the URL is on the whitelist based on matching
    149   // the hash prefix only (so there may be false positives).
    150   virtual bool CheckSideEffectFreeWhitelistUrl(const GURL& url);
    151 
    152   // Check if the |url| matches any of the full-length hashes from the
    153   // client-side phishing detection whitelist.  Returns true if there was a
    154   // match and false otherwise.  To make sure we are conservative we will return
    155   // true if an error occurs. This method is expected to be called on the IO
    156   // thread.
    157   virtual bool MatchCsdWhitelistUrl(const GURL& url);
    158 
    159   // Check if the given IP address (either IPv4 or IPv6) matches the malware
    160   // IP blacklist.
    161   virtual bool MatchMalwareIP(const std::string& ip_address);
    162 
    163   // Check if the |url| matches any of the full-length hashes from the
    164   // download whitelist.  Returns true if there was a match and false otherwise.
    165   // To make sure we are conservative we will return true if an error occurs.
    166   // This method is expected to be called on the IO thread.
    167   virtual bool MatchDownloadWhitelistUrl(const GURL& url);
    168 
    169   // Check if |str| matches any of the full-length hashes from the download
    170   // whitelist.  Returns true if there was a match and false otherwise.
    171   // To make sure we are conservative we will return true if an error occurs.
    172   // This method is expected to be called on the IO thread.
    173   virtual bool MatchDownloadWhitelistString(const std::string& str);
    174 
    175   // Check if the CSD malware IP matching kill switch is turned on.
    176   virtual bool IsMalwareKillSwitchOn();
    177 
    178   // Check if the CSD whitelist kill switch is turned on.
    179   virtual bool IsCsdWhitelistKillSwitchOn();
    180 
    181   // Called on the IO thread to cancel a pending check if the result is no
    182   // longer needed.
    183   void CancelCheck(Client* client);
    184 
    185   // Called on the IO thread when the SafeBrowsingProtocolManager has received
    186   // the full hash results for prefix hits detected in the database.
    187   void HandleGetHashResults(SafeBrowsingCheck* check,
    188                             const std::vector<SBFullHashResult>& full_hashes,
    189                             const base::TimeDelta& cache_lifetime);
    190 
    191   // Log the user perceived delay caused by SafeBrowsing. This delay is the time
    192   // delta starting from when we would have started reading data from the
    193   // network, and ending when the SafeBrowsing check completes indicating that
    194   // the current page is 'safe'.
    195   void LogPauseDelay(base::TimeDelta time);
    196 
    197   // Called to initialize objects that are used on the io_thread.  This may be
    198   // called multiple times during the life of the DatabaseManager. Should be
    199   // called on IO thread.
    200   void StartOnIOThread();
    201 
    202   // Called to stop or shutdown operations on the io_thread. This may be called
    203   // multiple times during the life of the DatabaseManager. Should be called
    204   // on IO thread. If shutdown is true, the manager is disabled permanently.
    205   void StopOnIOThread(bool shutdown);
    206 
    207  protected:
    208   virtual ~SafeBrowsingDatabaseManager();
    209 
    210   // protected for tests.
    211   void NotifyDatabaseUpdateFinished(bool update_succeeded);
    212 
    213  private:
    214   friend class base::RefCountedThreadSafe<SafeBrowsingDatabaseManager>;
    215   friend class SafeBrowsingServerTest;
    216   friend class SafeBrowsingServiceTest;
    217   friend class SafeBrowsingServiceTestHelper;
    218   friend class SafeBrowsingDatabaseManagerTest;
    219   FRIEND_TEST_ALL_PREFIXES(SafeBrowsingDatabaseManagerTest, GetUrlThreatType);
    220 
    221   typedef std::set<SafeBrowsingCheck*> CurrentChecks;
    222   typedef std::vector<SafeBrowsingCheck*> GetHashRequestors;
    223   typedef base::hash_map<SBPrefix, GetHashRequestors> GetHashRequests;
    224 
    225   // Clients that we've queued up for checking later once the database is ready.
    226   struct QueuedCheck {
    227     QueuedCheck(const safe_browsing_util::ListType check_type,
    228                 Client* client,
    229                 const GURL& url,
    230                 const std::vector<SBThreatType>& expected_threats,
    231                 const base::TimeTicks& start);
    232     ~QueuedCheck();
    233     safe_browsing_util::ListType check_type;
    234     Client* client;
    235     GURL url;
    236     std::vector<SBThreatType> expected_threats;
    237     base::TimeTicks start;  // When check was queued.
    238   };
    239 
    240   // Return the threat type from the first result in |full_hashes| which matches
    241   // |hash|, or SAFE if none match.
    242   static SBThreatType GetHashThreatType(
    243       const SBFullHash& hash,
    244       const std::vector<SBFullHashResult>& full_hashes);
    245 
    246   // Given a URL, compare all the possible host + path full hashes to the set of
    247   // provided full hashes.  Returns the threat type of the matching result from
    248   // |full_hashes|, or SAFE if none match.
    249   static SBThreatType GetUrlThreatType(
    250       const GURL& url,
    251       const std::vector<SBFullHashResult>& full_hashes,
    252       size_t* index);
    253 
    254   // Called to stop operations on the io_thread. This may be called multiple
    255   // times during the life of the DatabaseManager. Should be called on IO
    256   // thread.
    257   void DoStopOnIOThread();
    258 
    259   // Returns whether |database_| exists and is accessible.
    260   bool DatabaseAvailable() const;
    261 
    262   // Called on the IO thread.  If the database does not exist, queues up a call
    263   // on the db thread to create it.  Returns whether the database is available.
    264   //
    265   // Note that this is only needed outside the db thread, since functions on the
    266   // db thread can call GetDatabase() directly.
    267   bool MakeDatabaseAvailable();
    268 
    269   // Should only be called on db thread as SafeBrowsingDatabase is not
    270   // threadsafe.
    271   SafeBrowsingDatabase* GetDatabase();
    272 
    273   // Called on the IO thread with the check result.
    274   void OnCheckDone(SafeBrowsingCheck* info);
    275 
    276   // Called on the database thread to retrieve chunks.
    277   void GetAllChunksFromDatabase(GetChunksCallback callback);
    278 
    279   // Called on the IO thread with the results of all chunks.
    280   void OnGetAllChunksFromDatabase(const std::vector<SBListChunkRanges>& lists,
    281                                   bool database_error,
    282                                   GetChunksCallback callback);
    283 
    284   // Called on the IO thread after the database reports that it added a chunk.
    285   void OnAddChunksComplete(AddChunksCallback callback);
    286 
    287   // Notification that the database is done loading its bloom filter.  We may
    288   // have had to queue checks until the database is ready, and if so, this
    289   // checks them.
    290   void DatabaseLoadComplete();
    291 
    292   // Called on the database thread to add/remove chunks and host keys.
    293   void AddDatabaseChunks(const std::string& list,
    294                          scoped_ptr<ScopedVector<SBChunkData> > chunks,
    295                          AddChunksCallback callback);
    296 
    297   void DeleteDatabaseChunks(
    298       scoped_ptr<std::vector<SBChunkDelete> > chunk_deletes);
    299 
    300   void NotifyClientBlockingComplete(Client* client, bool proceed);
    301 
    302   void DatabaseUpdateFinished(bool update_succeeded);
    303 
    304   // Called on the db thread to close the database.  See CloseDatabase().
    305   void OnCloseDatabase();
    306 
    307   // Runs on the db thread to reset the database. We assume that resetting the
    308   // database is a synchronous operation.
    309   void OnResetDatabase();
    310 
    311   // Internal worker function for processing full hashes.
    312   void OnHandleGetHashResults(SafeBrowsingCheck* check,
    313                               const std::vector<SBFullHashResult>& full_hashes);
    314 
    315   // Run one check against |full_hashes|.  Returns |true| if the check
    316   // finds a match in |full_hashes|.
    317   bool HandleOneCheck(SafeBrowsingCheck* check,
    318                       const std::vector<SBFullHashResult>& full_hashes);
    319 
    320   // Invoked by CheckDownloadUrl. It checks the download URL on
    321   // safe_browsing_thread_.
    322   void CheckDownloadUrlOnSBThread(SafeBrowsingCheck* check);
    323 
    324   // The callback function when a safebrowsing check is timed out. Client will
    325   // be notified that the safebrowsing check is SAFE when this happens.
    326   void TimeoutCallback(SafeBrowsingCheck* check);
    327 
    328   // Calls the Client's callback on IO thread after CheckDownloadUrl finishes.
    329   void CheckDownloadUrlDone(SafeBrowsingCheck* check);
    330 
    331   // Checks all extension ID hashes on safe_browsing_thread_.
    332   void CheckExtensionIDsOnSBThread(SafeBrowsingCheck* check);
    333 
    334   // Helper function that calls safe browsing client and cleans up |checks_|.
    335   void SafeBrowsingCheckDone(SafeBrowsingCheck* check);
    336 
    337   // Helper function to set |check| with default values and start a safe
    338   // browsing check with timeout of |timeout|. |task| will be called on
    339   // success, otherwise TimeoutCallback will be called.
    340   void StartSafeBrowsingCheck(SafeBrowsingCheck* check,
    341                               const base::Closure& task);
    342 
    343   // SafeBrowsingProtocolManageDelegate override
    344   virtual void ResetDatabase() OVERRIDE;
    345   virtual void UpdateStarted() OVERRIDE;
    346   virtual void UpdateFinished(bool success) OVERRIDE;
    347   virtual void GetChunks(GetChunksCallback callback) OVERRIDE;
    348   virtual void AddChunks(const std::string& list,
    349                          scoped_ptr<ScopedVector<SBChunkData> > chunks,
    350                          AddChunksCallback callback) OVERRIDE;
    351   virtual void DeleteChunks(
    352       scoped_ptr<std::vector<SBChunkDelete> > chunk_deletes) OVERRIDE;
    353 
    354   scoped_refptr<SafeBrowsingService> sb_service_;
    355 
    356   CurrentChecks checks_;
    357 
    358   // Used for issuing only one GetHash request for a given prefix.
    359   GetHashRequests gethash_requests_;
    360 
    361   // The persistent database.  We don't use a scoped_ptr because it
    362   // needs to be destroyed on a different thread than this object.
    363   SafeBrowsingDatabase* database_;
    364 
    365   // Lock used to prevent possible data races due to compiler optimizations.
    366   mutable base::Lock database_lock_;
    367 
    368   // Whether the service is running. 'enabled_' is used by the
    369   // SafeBrowsingDatabaseManager on the IO thread during normal operations.
    370   bool enabled_;
    371 
    372   // Indicate if download_protection is enabled by command switch
    373   // so we allow this feature to be exersized.
    374   bool enable_download_protection_;
    375 
    376   // Indicate if client-side phishing detection whitelist should be enabled
    377   // or not.
    378   bool enable_csd_whitelist_;
    379 
    380   // Indicate if the download whitelist should be enabled or not.
    381   bool enable_download_whitelist_;
    382 
    383   // Indicate if the extension blacklist should be enabled.
    384   bool enable_extension_blacklist_;
    385 
    386   // Indicate if the side effect free whitelist should be enabled.
    387   bool enable_side_effect_free_whitelist_;
    388 
    389   // Indicate if the csd malware IP blacklist should be enabled.
    390   bool enable_ip_blacklist_;
    391 
    392   // The SafeBrowsing thread that runs database operations.
    393   //
    394   // Note: Functions that run on this thread should run synchronously and return
    395   // to the IO thread, not post additional tasks back to this thread, lest we
    396   // cause a race condition at shutdown time that leads to a database leak.
    397   scoped_ptr<base::Thread> safe_browsing_thread_;
    398 
    399   // Indicates if we're currently in an update cycle.
    400   bool update_in_progress_;
    401 
    402   // When true, newly fetched chunks may not in the database yet since the
    403   // database is still updating.
    404   bool database_update_in_progress_;
    405 
    406   // Indicates if we're in the midst of trying to close the database.  If this
    407   // is true, nothing on the IO thread should access the database.
    408   bool closing_database_;
    409 
    410   std::deque<QueuedCheck> queued_checks_;
    411 
    412   // Timeout to use for safe browsing checks.
    413   base::TimeDelta check_timeout_;
    414 
    415   DISALLOW_COPY_AND_ASSIGN(SafeBrowsingDatabaseManager);
    416 };
    417 
    418 #endif  // CHROME_BROWSER_SAFE_BROWSING_DATABASE_MANAGER_H_
    419