Home | History | Annotate | Download | only in safe_browsing
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 //
      5 // The Safe Browsing service is responsible for downloading anti-phishing and
      6 // anti-malware tables and checking urls against them.
      7 
      8 #ifndef CHROME_BROWSER_SAFE_BROWSING_DATABASE_MANAGER_H_
      9 #define CHROME_BROWSER_SAFE_BROWSING_DATABASE_MANAGER_H_
     10 
     11 #include <deque>
     12 #include <map>
     13 #include <set>
     14 #include <string>
     15 #include <vector>
     16 
     17 #include "base/callback.h"
     18 #include "base/containers/hash_tables.h"
     19 #include "base/memory/ref_counted.h"
     20 #include "base/memory/scoped_ptr.h"
     21 #include "base/synchronization/lock.h"
     22 #include "base/time/time.h"
     23 #include "chrome/browser/safe_browsing/protocol_manager.h"
     24 #include "chrome/browser/safe_browsing/safe_browsing_util.h"
     25 #include "url/gurl.h"
     26 
     27 class SafeBrowsingService;
     28 class SafeBrowsingDatabase;
     29 
     30 namespace base {
     31 class Thread;
     32 }
     33 
     34 namespace net {
     35 class URLRequestContext;
     36 class URLRequestContextGetter;
     37 }
     38 
     39 namespace safe_browsing {
     40 class ClientSideDetectionService;
     41 class DownloadProtectionService;
     42 }
     43 
     44 // Construction needs to happen on the main thread.
     45 class SafeBrowsingDatabaseManager
     46     : public base::RefCountedThreadSafe<SafeBrowsingDatabaseManager>,
     47       public SafeBrowsingProtocolManagerDelegate {
     48  public:
     49   class Client;
     50 
     51   // Bundle of SafeBrowsing state while performing a URL or hash prefix check.
     52   struct SafeBrowsingCheck {
     53     // |check_type| should correspond to the type of item that is being
     54     // checked, either a URL or a binary hash/URL. We store this for two
     55     // purposes: to know which of Client's methods to call when a result is
     56     // known, and for logging purposes. It *isn't* used to predict the response
     57     // list type, that is information that the server gives us.
     58     SafeBrowsingCheck(const std::vector<GURL>& urls,
     59                       const std::vector<SBFullHash>& full_hashes,
     60                       Client* client,
     61                       safe_browsing_util::ListType check_type,
     62                       const std::vector<SBThreatType>& expected_threats);
     63     ~SafeBrowsingCheck();
     64 
     65     // Either |urls| or |full_hashes| is used to lookup database. |*_results|
     66     // are parallel vectors containing the results. They are initialized to
     67     // contain SB_THREAT_TYPE_SAFE.
     68     std::vector<GURL> urls;
     69     std::vector<SBThreatType> url_results;
     70     std::vector<SBFullHash> full_hashes;
     71     std::vector<SBThreatType> full_hash_results;
     72 
     73     Client* client;
     74     bool need_get_hash;
     75     base::TimeTicks start;  // When check was sent to SB service.
     76     safe_browsing_util::ListType check_type;  // See comment in constructor.
     77     std::vector<SBThreatType> expected_threats;
     78     std::vector<SBPrefix> prefix_hits;
     79     std::vector<SBFullHashResult> cache_hits;
     80 
     81     // Vends weak pointers for TimeoutCallback().  If the response is
     82     // received before the timeout fires, factory is destructed and
     83     // the timeout won't be fired.
     84     // TODO(lzheng): We should consider to use this time out check
     85     // for browsing too (instead of implementin in
     86     // safe_browsing_resource_handler.cc).
     87     scoped_ptr<base::WeakPtrFactory<
     88         SafeBrowsingDatabaseManager> > timeout_factory_;
     89 
     90    private:
     91     DISALLOW_COPY_AND_ASSIGN(SafeBrowsingCheck);
     92   };
     93 
     94   class Client {
     95    public:
     96     void OnSafeBrowsingResult(const SafeBrowsingCheck& check);
     97 
     98    protected:
     99     virtual ~Client() {}
    100 
    101     // Called when the result of checking a browse URL is known.
    102     virtual void OnCheckBrowseUrlResult(const GURL& url,
    103                                         SBThreatType threat_type) {}
    104 
    105     // Called when the result of checking a download URL is known.
    106     virtual void OnCheckDownloadUrlResult(const std::vector<GURL>& url_chain,
    107                                           SBThreatType threat_type) {}
    108 
    109     // Called when the result of checking a set of extensions is known.
    110     virtual void OnCheckExtensionsResult(
    111         const std::set<std::string>& threats) {}
    112   };
    113 
    114   // Creates the safe browsing service.  Need to initialize before using.
    115   explicit SafeBrowsingDatabaseManager(
    116       const scoped_refptr<SafeBrowsingService>& service);
    117 
    118   // Returns true if the url's scheme can be checked.
    119   bool CanCheckUrl(const GURL& url) const;
    120 
    121   // Returns whether download protection is enabled.
    122   bool download_protection_enabled() const {
    123     return enable_download_protection_;
    124   }
    125 
    126   // Called on the IO thread to check if the given url is safe or not.  If we
    127   // can synchronously determine that the url is safe, CheckUrl returns true.
    128   // Otherwise it returns false, and "client" is called asynchronously with the
    129   // result when it is ready.
    130   virtual bool CheckBrowseUrl(const GURL& url, Client* client);
    131 
    132   // Check if the prefix for |url| is in safebrowsing download add lists.
    133   // Result will be passed to callback in |client|.
    134   virtual bool CheckDownloadUrl(const std::vector<GURL>& url_chain,
    135                                 Client* client);
    136 
    137   // Check which prefixes in |extension_ids| are in the safebrowsing blacklist.
    138   // Returns true if not, false if further checks need to be made in which case
    139   // the result will be passed to |client|.
    140   virtual bool CheckExtensionIDs(const std::set<std::string>& extension_ids,
    141                                  Client* client);
    142 
    143   // Check if the given url is on the side-effect free whitelist.
    144   // Can be called on any thread. Returns false if the check cannot be performed
    145   // (e.g. because we are disabled or because of an invalid scheme in the URL).
    146   // Otherwise, returns true if the URL is on the whitelist based on matching
    147   // the hash prefix only (so there may be false positives).
    148   virtual bool CheckSideEffectFreeWhitelistUrl(const GURL& url);
    149 
    150   // Check if the |url| matches any of the full-length hashes from the
    151   // client-side phishing detection whitelist.  Returns true if there was a
    152   // match and false otherwise.  To make sure we are conservative we will return
    153   // true if an error occurs. This method is expected to be called on the IO
    154   // thread.
    155   virtual bool MatchCsdWhitelistUrl(const GURL& url);
    156 
    157   // Check if the given IP address (either IPv4 or IPv6) matches the malware
    158   // IP blacklist.
    159   virtual bool MatchMalwareIP(const std::string& ip_address);
    160 
    161   // Check if the |url| matches any of the full-length hashes from the
    162   // download whitelist.  Returns true if there was a match and false otherwise.
    163   // To make sure we are conservative we will return true if an error occurs.
    164   // This method is expected to be called on the IO thread.
    165   virtual bool MatchDownloadWhitelistUrl(const GURL& url);
    166 
    167   // Check if |str| matches any of the full-length hashes from the download
    168   // whitelist.  Returns true if there was a match and false otherwise.
    169   // To make sure we are conservative we will return true if an error occurs.
    170   // This method is expected to be called on the IO thread.
    171   virtual bool MatchDownloadWhitelistString(const std::string& str);
    172 
    173   // Check if the CSD malware IP matching kill switch is turned on.
    174   virtual bool IsMalwareKillSwitchOn();
    175 
    176   // Check if the CSD whitelist kill switch is turned on.
    177   virtual bool IsCsdWhitelistKillSwitchOn();
    178 
    179   // Called on the IO thread to cancel a pending check if the result is no
    180   // longer needed.
    181   void CancelCheck(Client* client);
    182 
    183   // Called on the IO thread when the SafeBrowsingProtocolManager has received
    184   // the full hash results for prefix hits detected in the database.
    185   void HandleGetHashResults(SafeBrowsingCheck* check,
    186                             const std::vector<SBFullHashResult>& full_hashes,
    187                             const base::TimeDelta& cache_lifetime);
    188 
    189   // Log the user perceived delay caused by SafeBrowsing. This delay is the time
    190   // delta starting from when we would have started reading data from the
    191   // network, and ending when the SafeBrowsing check completes indicating that
    192   // the current page is 'safe'.
    193   void LogPauseDelay(base::TimeDelta time);
    194 
    195   // Called to initialize objects that are used on the io_thread.  This may be
    196   // called multiple times during the life of the DatabaseManager. Should be
    197   // called on IO thread.
    198   void StartOnIOThread();
    199 
    200   // Called to stop or shutdown operations on the io_thread. This may be called
    201   // multiple times during the life of the DatabaseManager. Should be called
    202   // on IO thread. If shutdown is true, the manager is disabled permanently.
    203   void StopOnIOThread(bool shutdown);
    204 
    205  protected:
    206   virtual ~SafeBrowsingDatabaseManager();
    207 
    208   // protected for tests.
    209   void NotifyDatabaseUpdateFinished(bool update_succeeded);
    210 
    211  private:
    212   friend class base::RefCountedThreadSafe<SafeBrowsingDatabaseManager>;
    213   friend class SafeBrowsingServerTest;
    214   friend class SafeBrowsingServiceTest;
    215   friend class SafeBrowsingServiceTestHelper;
    216   friend class SafeBrowsingDatabaseManagerTest;
    217   FRIEND_TEST_ALL_PREFIXES(SafeBrowsingDatabaseManagerTest, GetUrlThreatType);
    218 
    219   typedef std::set<SafeBrowsingCheck*> CurrentChecks;
    220   typedef std::vector<SafeBrowsingCheck*> GetHashRequestors;
    221   typedef base::hash_map<SBPrefix, GetHashRequestors> GetHashRequests;
    222 
    223   // Clients that we've queued up for checking later once the database is ready.
    224   struct QueuedCheck {
    225     QueuedCheck(const safe_browsing_util::ListType check_type,
    226                 Client* client,
    227                 const GURL& url,
    228                 const std::vector<SBThreatType>& expected_threats,
    229                 const base::TimeTicks& start);
    230     ~QueuedCheck();
    231     safe_browsing_util::ListType check_type;
    232     Client* client;
    233     GURL url;
    234     std::vector<SBThreatType> expected_threats;
    235     base::TimeTicks start;  // When check was queued.
    236   };
    237 
    238   // Return the threat type from the first result in |full_hashes| which matches
    239   // |hash|, or SAFE if none match.
    240   static SBThreatType GetHashThreatType(
    241       const SBFullHash& hash,
    242       const std::vector<SBFullHashResult>& full_hashes);
    243 
    244   // Given a URL, compare all the possible host + path full hashes to the set of
    245   // provided full hashes.  Returns the threat type of the matching result from
    246   // |full_hashes|, or SAFE if none match.
    247   static SBThreatType GetUrlThreatType(
    248       const GURL& url,
    249       const std::vector<SBFullHashResult>& full_hashes);
    250 
    251   // Called to stop operations on the io_thread. This may be called multiple
    252   // times during the life of the DatabaseManager. Should be called on IO
    253   // thread.
    254   void DoStopOnIOThread();
    255 
    256   // Returns whether |database_| exists and is accessible.
    257   bool DatabaseAvailable() const;
    258 
    259   // Called on the IO thread.  If the database does not exist, queues up a call
    260   // on the db thread to create it.  Returns whether the database is available.
    261   //
    262   // Note that this is only needed outside the db thread, since functions on the
    263   // db thread can call GetDatabase() directly.
    264   bool MakeDatabaseAvailable();
    265 
    266   // Should only be called on db thread as SafeBrowsingDatabase is not
    267   // threadsafe.
    268   SafeBrowsingDatabase* GetDatabase();
    269 
    270   // Called on the IO thread with the check result.
    271   void OnCheckDone(SafeBrowsingCheck* info);
    272 
    273   // Called on the database thread to retrieve chunks.
    274   void GetAllChunksFromDatabase(GetChunksCallback callback);
    275 
    276   // Called on the IO thread with the results of all chunks.
    277   void OnGetAllChunksFromDatabase(const std::vector<SBListChunkRanges>& lists,
    278                                   bool database_error,
    279                                   GetChunksCallback callback);
    280 
    281   // Called on the IO thread after the database reports that it added a chunk.
    282   void OnAddChunksComplete(AddChunksCallback callback);
    283 
    284   // Notification that the database is done loading its bloom filter.  We may
    285   // have had to queue checks until the database is ready, and if so, this
    286   // checks them.
    287   void DatabaseLoadComplete();
    288 
    289   // Called on the database thread to add/remove chunks and host keys.
    290   void AddDatabaseChunks(const std::string& list,
    291                          scoped_ptr<ScopedVector<SBChunkData> > chunks,
    292                          AddChunksCallback callback);
    293 
    294   void DeleteDatabaseChunks(
    295       scoped_ptr<std::vector<SBChunkDelete> > chunk_deletes);
    296 
    297   void NotifyClientBlockingComplete(Client* client, bool proceed);
    298 
    299   void DatabaseUpdateFinished(bool update_succeeded);
    300 
    301   // Called on the db thread to close the database.  See CloseDatabase().
    302   void OnCloseDatabase();
    303 
    304   // Runs on the db thread to reset the database. We assume that resetting the
    305   // database is a synchronous operation.
    306   void OnResetDatabase();
    307 
    308   // Internal worker function for processing full hashes.
    309   void OnHandleGetHashResults(SafeBrowsingCheck* check,
    310                               const std::vector<SBFullHashResult>& full_hashes);
    311 
    312   // Run one check against |full_hashes|.  Returns |true| if the check
    313   // finds a match in |full_hashes|.
    314   bool HandleOneCheck(SafeBrowsingCheck* check,
    315                       const std::vector<SBFullHashResult>& full_hashes);
    316 
    317   // Invoked by CheckDownloadUrl. It checks the download URL on
    318   // safe_browsing_thread_.
    319   void CheckDownloadUrlOnSBThread(SafeBrowsingCheck* check);
    320 
    321   // The callback function when a safebrowsing check is timed out. Client will
    322   // be notified that the safebrowsing check is SAFE when this happens.
    323   void TimeoutCallback(SafeBrowsingCheck* check);
    324 
    325   // Calls the Client's callback on IO thread after CheckDownloadUrl finishes.
    326   void CheckDownloadUrlDone(SafeBrowsingCheck* check);
    327 
    328   // Checks all extension ID hashes on safe_browsing_thread_.
    329   void CheckExtensionIDsOnSBThread(SafeBrowsingCheck* check);
    330 
    331   // Helper function that calls safe browsing client and cleans up |checks_|.
    332   void SafeBrowsingCheckDone(SafeBrowsingCheck* check);
    333 
    334   // Helper function to set |check| with default values and start a safe
    335   // browsing check with timeout of |timeout|. |task| will be called on
    336   // success, otherwise TimeoutCallback will be called.
    337   void StartSafeBrowsingCheck(SafeBrowsingCheck* check,
    338                               const base::Closure& task);
    339 
    340   // SafeBrowsingProtocolManageDelegate override
    341   virtual void ResetDatabase() OVERRIDE;
    342   virtual void UpdateStarted() OVERRIDE;
    343   virtual void UpdateFinished(bool success) OVERRIDE;
    344   virtual void GetChunks(GetChunksCallback callback) OVERRIDE;
    345   virtual void AddChunks(const std::string& list,
    346                          scoped_ptr<ScopedVector<SBChunkData> > chunks,
    347                          AddChunksCallback callback) OVERRIDE;
    348   virtual void DeleteChunks(
    349       scoped_ptr<std::vector<SBChunkDelete> > chunk_deletes) OVERRIDE;
    350 
    351   scoped_refptr<SafeBrowsingService> sb_service_;
    352 
    353   CurrentChecks checks_;
    354 
    355   // Used for issuing only one GetHash request for a given prefix.
    356   GetHashRequests gethash_requests_;
    357 
    358   // The persistent database.  We don't use a scoped_ptr because it
    359   // needs to be destroyed on a different thread than this object.
    360   SafeBrowsingDatabase* database_;
    361 
    362   // Lock used to prevent possible data races due to compiler optimizations.
    363   mutable base::Lock database_lock_;
    364 
    365   // Whether the service is running. 'enabled_' is used by the
    366   // SafeBrowsingDatabaseManager on the IO thread during normal operations.
    367   bool enabled_;
    368 
    369   // Indicate if download_protection is enabled by command switch
    370   // so we allow this feature to be exersized.
    371   bool enable_download_protection_;
    372 
    373   // Indicate if client-side phishing detection whitelist should be enabled
    374   // or not.
    375   bool enable_csd_whitelist_;
    376 
    377   // Indicate if the download whitelist should be enabled or not.
    378   bool enable_download_whitelist_;
    379 
    380   // Indicate if the extension blacklist should be enabled.
    381   bool enable_extension_blacklist_;
    382 
    383   // Indicate if the side effect free whitelist should be enabled.
    384   bool enable_side_effect_free_whitelist_;
    385 
    386   // Indicate if the csd malware IP blacklist should be enabled.
    387   bool enable_ip_blacklist_;
    388 
    389   // The SafeBrowsing thread that runs database operations.
    390   //
    391   // Note: Functions that run on this thread should run synchronously and return
    392   // to the IO thread, not post additional tasks back to this thread, lest we
    393   // cause a race condition at shutdown time that leads to a database leak.
    394   scoped_ptr<base::Thread> safe_browsing_thread_;
    395 
    396   // Indicates if we're currently in an update cycle.
    397   bool update_in_progress_;
    398 
    399   // When true, newly fetched chunks may not in the database yet since the
    400   // database is still updating.
    401   bool database_update_in_progress_;
    402 
    403   // Indicates if we're in the midst of trying to close the database.  If this
    404   // is true, nothing on the IO thread should access the database.
    405   bool closing_database_;
    406 
    407   std::deque<QueuedCheck> queued_checks_;
    408 
    409   // Timeout to use for safe browsing checks.
    410   base::TimeDelta check_timeout_;
    411 
    412   DISALLOW_COPY_AND_ASSIGN(SafeBrowsingDatabaseManager);
    413 };
    414 
    415 #endif  // CHROME_BROWSER_SAFE_BROWSING_DATABASE_MANAGER_H_
    416