Home | History | Annotate | Download | only in safe_browsing
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 //
      5 // The Safe Browsing service is responsible for downloading anti-phishing and
      6 // anti-malware tables and checking urls against them.
      7 
      8 #ifndef CHROME_BROWSER_SAFE_BROWSING_DATABASE_MANAGER_H_
      9 #define CHROME_BROWSER_SAFE_BROWSING_DATABASE_MANAGER_H_
     10 
     11 #include <deque>
     12 #include <map>
     13 #include <set>
     14 #include <string>
     15 #include <vector>
     16 
     17 #include "base/callback.h"
     18 #include "base/containers/hash_tables.h"
     19 #include "base/memory/ref_counted.h"
     20 #include "base/memory/scoped_ptr.h"
     21 #include "base/synchronization/lock.h"
     22 #include "base/time/time.h"
     23 #include "chrome/browser/safe_browsing/protocol_manager.h"
     24 #include "chrome/browser/safe_browsing/safe_browsing_util.h"
     25 #include "url/gurl.h"
     26 
     27 class SafeBrowsingService;
     28 class SafeBrowsingDatabase;
     29 
     30 namespace base {
     31 class Thread;
     32 }
     33 
     34 namespace net {
     35 class URLRequestContext;
     36 class URLRequestContextGetter;
     37 }
     38 
     39 namespace safe_browsing {
     40 class ClientSideDetectionService;
     41 class DownloadProtectionService;
     42 }
     43 
     44 // Construction needs to happen on the main thread.
     45 class SafeBrowsingDatabaseManager
     46     : public base::RefCountedThreadSafe<SafeBrowsingDatabaseManager>,
     47       public SafeBrowsingProtocolManagerDelegate {
     48  public:
     49   class Client;
     50 
     51   // Bundle of SafeBrowsing state while performing a URL or hash prefix check.
     52   struct SafeBrowsingCheck {
     53     // |check_type| should correspond to the type of item that is being
     54     // checked, either a URL or a binary hash/URL. We store this for two
     55     // purposes: to know which of Client's methods to call when a result is
     56     // known, and for logging purposes. It *isn't* used to predict the response
     57     // list type, that is information that the server gives us.
     58     SafeBrowsingCheck(const std::vector<GURL>& urls,
     59                       const std::vector<SBFullHash>& full_hashes,
     60                       Client* client,
     61                       safe_browsing_util::ListType check_type,
     62                       const std::vector<SBThreatType>& expected_threats);
     63     ~SafeBrowsingCheck();
     64 
     65     // Either |urls| or |full_hashes| is used to lookup database. |*_results|
     66     // are parallel vectors containing the results. They are initialized to
     67     // contain SB_THREAT_TYPE_SAFE.
     68     std::vector<GURL> urls;
     69     std::vector<SBThreatType> url_results;
     70     std::vector<SBFullHash> full_hashes;
     71     std::vector<SBThreatType> full_hash_results;
     72 
     73     Client* client;
     74     bool need_get_hash;
     75     base::TimeTicks start;  // When check was sent to SB service.
     76     safe_browsing_util::ListType check_type;  // See comment in constructor.
     77     std::vector<SBThreatType> expected_threats;
     78     std::vector<SBPrefix> prefix_hits;
     79     std::vector<SBFullHashResult> full_hits;
     80 
     81     // Vends weak pointers for TimeoutCallback().  If the response is
     82     // received before the timeout fires, factory is destructed and
     83     // the timeout won't be fired.
     84     // TODO(lzheng): We should consider to use this time out check
     85     // for browsing too (instead of implementin in
     86     // safe_browsing_resource_handler.cc).
     87     scoped_ptr<base::WeakPtrFactory<
     88         SafeBrowsingDatabaseManager> > timeout_factory_;
     89 
     90    private:
     91     DISALLOW_COPY_AND_ASSIGN(SafeBrowsingCheck);
     92   };
     93 
     94   class Client {
     95    public:
     96     void OnSafeBrowsingResult(const SafeBrowsingCheck& check);
     97 
     98    protected:
     99     virtual ~Client() {}
    100 
    101     // Called when the result of checking a browse URL is known.
    102     virtual void OnCheckBrowseUrlResult(const GURL& url,
    103                                         SBThreatType threat_type) {}
    104 
    105     // Called when the result of checking a download URL is known.
    106     virtual void OnCheckDownloadUrlResult(const std::vector<GURL>& url_chain,
    107                                           SBThreatType threat_type) {}
    108 
    109     // Called when the result of checking a download binary hash is known.
    110     virtual void OnCheckDownloadHashResult(const std::string& hash,
    111                                            SBThreatType threat_type) {}
    112 
    113     // Called when the result of checking a set of extensions is known.
    114     virtual void OnCheckExtensionsResult(
    115         const std::set<std::string>& threats) {}
    116   };
    117 
    118   // Creates the safe browsing service.  Need to initialize before using.
    119   explicit SafeBrowsingDatabaseManager(
    120       const scoped_refptr<SafeBrowsingService>& service);
    121 
    122   // Returns true if the url's scheme can be checked.
    123   bool CanCheckUrl(const GURL& url) const;
    124 
    125   // Returns whether download protection is enabled.
    126   bool download_protection_enabled() const {
    127     return enable_download_protection_;
    128   }
    129 
    130   // Called on the IO thread to check if the given url is safe or not.  If we
    131   // can synchronously determine that the url is safe, CheckUrl returns true.
    132   // Otherwise it returns false, and "client" is called asynchronously with the
    133   // result when it is ready.
    134   virtual bool CheckBrowseUrl(const GURL& url, Client* client);
    135 
    136   // Check if the prefix for |url| is in safebrowsing download add lists.
    137   // Result will be passed to callback in |client|.
    138   virtual bool CheckDownloadUrl(const std::vector<GURL>& url_chain,
    139                                 Client* client);
    140 
    141   // Check if the prefix for |full_hash| is in safebrowsing binhash add lists.
    142   // Result will be passed to callback in |client|.
    143   virtual bool CheckDownloadHash(const std::string& full_hash, Client* client);
    144 
    145   // Check which prefixes in |extension_ids| are in the safebrowsing blacklist.
    146   // Returns true if not, false if further checks need to be made in which case
    147   // the result will be passed to |client|.
    148   virtual bool CheckExtensionIDs(const std::set<std::string>& extension_ids,
    149                                  Client* client);
    150 
    151   // Check if the given url is on the side-effect free whitelist.
    152   // Can be called on any thread. Returns false if the check cannot be performed
    153   // (e.g. because we are disabled or because of an invalid scheme in the URL).
    154   // Otherwise, returns true if the URL is on the whitelist based on matching
    155   // the hash prefix only (so there may be false positives).
    156   virtual bool CheckSideEffectFreeWhitelistUrl(const GURL& url);
    157 
    158   // Check if the |url| matches any of the full-length hashes from the
    159   // client-side phishing detection whitelist.  Returns true if there was a
    160   // match and false otherwise.  To make sure we are conservative we will return
    161   // true if an error occurs. This method is expected to be called on the IO
    162   // thread.
    163   virtual bool MatchCsdWhitelistUrl(const GURL& url);
    164 
    165   // Check if the given IP address (either IPv4 or IPv6) matches the malware
    166   // IP blacklist.
    167   virtual bool MatchMalwareIP(const std::string& ip_address);
    168 
    169   // Check if the |url| matches any of the full-length hashes from the
    170   // download whitelist.  Returns true if there was a match and false otherwise.
    171   // To make sure we are conservative we will return true if an error occurs.
    172   // This method is expected to be called on the IO thread.
    173   virtual bool MatchDownloadWhitelistUrl(const GURL& url);
    174 
    175   // Check if |str| matches any of the full-length hashes from the download
    176   // whitelist.  Returns true if there was a match and false otherwise.
    177   // To make sure we are conservative we will return true if an error occurs.
    178   // This method is expected to be called on the IO thread.
    179   virtual bool MatchDownloadWhitelistString(const std::string& str);
    180 
    181   // Check if the CSD malware IP matching kill switch is turned on.
    182   virtual bool IsMalwareKillSwitchOn();
    183 
    184   // Called on the IO thread to cancel a pending check if the result is no
    185   // longer needed.
    186   void CancelCheck(Client* client);
    187 
    188   // Called on the IO thread when the SafeBrowsingProtocolManager has received
    189   // the full hash results for prefix hits detected in the database.
    190   void HandleGetHashResults(
    191       SafeBrowsingCheck* check,
    192       const std::vector<SBFullHashResult>& full_hashes,
    193       bool can_cache);
    194 
    195   // Called on the IO thread to release memory.
    196   void PurgeMemory();
    197 
    198   // Log the user perceived delay caused by SafeBrowsing. This delay is the time
    199   // delta starting from when we would have started reading data from the
    200   // network, and ending when the SafeBrowsing check completes indicating that
    201   // the current page is 'safe'.
    202   void LogPauseDelay(base::TimeDelta time);
    203 
    204   // Called to initialize objects that are used on the io_thread.  This may be
    205   // called multiple times during the life of the DatabaseManager. Should be
    206   // called on IO thread.
    207   void StartOnIOThread();
    208 
    209   // Called to stop or shutdown operations on the io_thread. This may be called
    210   // multiple times during the life of the DatabaseManager. Should be called
    211   // on IO thread. If shutdown is true, the manager is disabled permanently.
    212   void StopOnIOThread(bool shutdown);
    213 
    214  protected:
    215   virtual ~SafeBrowsingDatabaseManager();
    216 
    217   // protected for tests.
    218   void NotifyDatabaseUpdateFinished(bool update_succeeded);
    219 
    220  private:
    221   friend class base::RefCountedThreadSafe<SafeBrowsingDatabaseManager>;
    222   friend class SafeBrowsingServerTest;
    223   friend class SafeBrowsingServiceTest;
    224   friend class SafeBrowsingServiceTestHelper;
    225   friend class SafeBrowsingDatabaseManagerTest;
    226 
    227   typedef std::set<SafeBrowsingCheck*> CurrentChecks;
    228   typedef std::vector<SafeBrowsingCheck*> GetHashRequestors;
    229   typedef base::hash_map<SBPrefix, GetHashRequestors> GetHashRequests;
    230 
    231   // Clients that we've queued up for checking later once the database is ready.
    232   struct QueuedCheck {
    233     QueuedCheck(const safe_browsing_util::ListType check_type,
    234                 Client* client,
    235                 const GURL& url,
    236                 const std::vector<SBThreatType>& expected_threats,
    237                 const base::TimeTicks& start);
    238     ~QueuedCheck();
    239     safe_browsing_util::ListType check_type;
    240     Client* client;
    241     GURL url;
    242     std::vector<SBThreatType> expected_threats;
    243     base::TimeTicks start;  // When check was queued.
    244   };
    245 
    246   // Called to stop operations on the io_thread. This may be called multiple
    247   // times during the life of the DatabaseManager. Should be called on IO
    248   // thread.
    249   void DoStopOnIOThread();
    250 
    251   // Returns whether |database_| exists and is accessible.
    252   bool DatabaseAvailable() const;
    253 
    254   // Called on the IO thread.  If the database does not exist, queues up a call
    255   // on the db thread to create it.  Returns whether the database is available.
    256   //
    257   // Note that this is only needed outside the db thread, since functions on the
    258   // db thread can call GetDatabase() directly.
    259   bool MakeDatabaseAvailable();
    260 
    261   // Called on the IO thread to try to close the database, freeing the memory
    262   // associated with it.  The database will be automatically reopened as needed.
    263   //
    264   // NOTE: Actual database closure is asynchronous, and until it happens, the IO
    265   // thread is not allowed to access it; may not actually trigger a close if one
    266   // is already pending or doing so would cause problems.
    267   void CloseDatabase();
    268 
    269   // Should only be called on db thread as SafeBrowsingDatabase is not
    270   // threadsafe.
    271   SafeBrowsingDatabase* GetDatabase();
    272 
    273   // Called on the IO thread with the check result.
    274   void OnCheckDone(SafeBrowsingCheck* info);
    275 
    276   // Called on the database thread to retrieve chunks.
    277   void GetAllChunksFromDatabase(GetChunksCallback callback);
    278 
    279   // Called on the IO thread with the results of all chunks.
    280   void OnGetAllChunksFromDatabase(const std::vector<SBListChunkRanges>& lists,
    281                                   bool database_error,
    282                                   GetChunksCallback callback);
    283 
    284   // Called on the IO thread after the database reports that it added a chunk.
    285   void OnAddChunksComplete(AddChunksCallback callback);
    286 
    287   // Notification that the database is done loading its bloom filter.  We may
    288   // have had to queue checks until the database is ready, and if so, this
    289   // checks them.
    290   void DatabaseLoadComplete();
    291 
    292   // Called on the database thread to add/remove chunks and host keys.
    293   // Callee will free the data when it's done.
    294   void AddDatabaseChunks(const std::string& list, SBChunkList* chunks,
    295                          AddChunksCallback callback);
    296 
    297   void DeleteDatabaseChunks(std::vector<SBChunkDelete>* chunk_deletes);
    298 
    299   static SBThreatType GetThreatTypeFromListname(const std::string& list_name);
    300 
    301   void NotifyClientBlockingComplete(Client* client, bool proceed);
    302 
    303   void DatabaseUpdateFinished(bool update_succeeded);
    304 
    305   // Called on the db thread to close the database.  See CloseDatabase().
    306   void OnCloseDatabase();
    307 
    308   // Runs on the db thread to reset the database. We assume that resetting the
    309   // database is a synchronous operation.
    310   void OnResetDatabase();
    311 
    312   // Store in-memory the GetHash response. Runs on the database thread.
    313   void CacheHashResults(const std::vector<SBPrefix>& prefixes,
    314                         const std::vector<SBFullHashResult>& full_hashes);
    315 
    316   // Internal worker function for processing full hashes.
    317   void OnHandleGetHashResults(SafeBrowsingCheck* check,
    318                               const std::vector<SBFullHashResult>& full_hashes);
    319 
    320   // Run one check against |full_hashes|.  Returns |true| if the check
    321   // finds a match in |full_hashes|.
    322   bool HandleOneCheck(SafeBrowsingCheck* check,
    323                       const std::vector<SBFullHashResult>& full_hashes);
    324 
    325   // Checks the download hash on safe_browsing_thread_.
    326   void CheckDownloadHashOnSBThread(SafeBrowsingCheck* check);
    327 
    328   // Invoked by CheckDownloadUrl. It checks the download URL on
    329   // safe_browsing_thread_.
    330   void CheckDownloadUrlOnSBThread(SafeBrowsingCheck* check);
    331 
    332   // The callback function when a safebrowsing check is timed out. Client will
    333   // be notified that the safebrowsing check is SAFE when this happens.
    334   void TimeoutCallback(SafeBrowsingCheck* check);
    335 
    336   // Calls the Client's callback on IO thread after CheckDownloadUrl finishes.
    337   void CheckDownloadUrlDone(SafeBrowsingCheck* check);
    338 
    339   // Calls the Client's callback on IO thread after CheckDownloadHash finishes.
    340   void CheckDownloadHashDone(SafeBrowsingCheck* check);
    341 
    342   // Checks all extension ID hashes on safe_browsing_thread_.
    343   void CheckExtensionIDsOnSBThread(SafeBrowsingCheck* check);
    344 
    345   // Helper function that calls safe browsing client and cleans up |checks_|.
    346   void SafeBrowsingCheckDone(SafeBrowsingCheck* check);
    347 
    348   // Helper function to set |check| with default values and start a safe
    349   // browsing check with timeout of |timeout|. |task| will be called on
    350   // success, otherwise TimeoutCallback will be called.
    351   void StartSafeBrowsingCheck(SafeBrowsingCheck* check,
    352                               const base::Closure& task);
    353 
    354   // SafeBrowsingProtocolManageDelegate override
    355   virtual void ResetDatabase() OVERRIDE;
    356   virtual void UpdateStarted() OVERRIDE;
    357   virtual void UpdateFinished(bool success) OVERRIDE;
    358   virtual void GetChunks(GetChunksCallback callback) OVERRIDE;
    359   virtual void AddChunks(const std::string& list, SBChunkList* chunks,
    360                          AddChunksCallback callback) OVERRIDE;
    361   virtual void DeleteChunks(
    362       std::vector<SBChunkDelete>* delete_chunks) OVERRIDE;
    363 
    364   scoped_refptr<SafeBrowsingService> sb_service_;
    365 
    366   CurrentChecks checks_;
    367 
    368   // Used for issuing only one GetHash request for a given prefix.
    369   GetHashRequests gethash_requests_;
    370 
    371   // The persistent database.  We don't use a scoped_ptr because it
    372   // needs to be destroyed on a different thread than this object.
    373   SafeBrowsingDatabase* database_;
    374 
    375   // Lock used to prevent possible data races due to compiler optimizations.
    376   mutable base::Lock database_lock_;
    377 
    378   // Whether the service is running. 'enabled_' is used by the
    379   // SafeBrowsingDatabaseManager on the IO thread during normal operations.
    380   bool enabled_;
    381 
    382   // Indicate if download_protection is enabled by command switch
    383   // so we allow this feature to be exersized.
    384   bool enable_download_protection_;
    385 
    386   // Indicate if client-side phishing detection whitelist should be enabled
    387   // or not.
    388   bool enable_csd_whitelist_;
    389 
    390   // Indicate if the download whitelist should be enabled or not.
    391   bool enable_download_whitelist_;
    392 
    393   // Indicate if the extension blacklist should be enabled.
    394   bool enable_extension_blacklist_;
    395 
    396   // Indicate if the side effect free whitelist should be enabled.
    397   bool enable_side_effect_free_whitelist_;
    398 
    399   // Indicate if the csd malware IP blacklist should be enabled.
    400   bool enable_ip_blacklist_;
    401 
    402   // The SafeBrowsing thread that runs database operations.
    403   //
    404   // Note: Functions that run on this thread should run synchronously and return
    405   // to the IO thread, not post additional tasks back to this thread, lest we
    406   // cause a race condition at shutdown time that leads to a database leak.
    407   scoped_ptr<base::Thread> safe_browsing_thread_;
    408 
    409   // Indicates if we're currently in an update cycle.
    410   bool update_in_progress_;
    411 
    412   // When true, newly fetched chunks may not in the database yet since the
    413   // database is still updating.
    414   bool database_update_in_progress_;
    415 
    416   // Indicates if we're in the midst of trying to close the database.  If this
    417   // is true, nothing on the IO thread should access the database.
    418   bool closing_database_;
    419 
    420   std::deque<QueuedCheck> queued_checks_;
    421 
    422   // Timeout to use for safe browsing checks.
    423   base::TimeDelta check_timeout_;
    424 
    425   DISALLOW_COPY_AND_ASSIGN(SafeBrowsingDatabaseManager);
    426 };
    427 
    428 #endif  // CHROME_BROWSER_SAFE_BROWSING_DATABASE_MANAGER_H_
    429