Home | History | Annotate | Download | only in safe_browsing
      1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 //
      5 // The Safe Browsing service is responsible for downloading anti-phishing and
      6 // anti-malware tables and checking urls against them.
      7 
      8 #ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_SERVICE_H_
      9 #define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_SERVICE_H_
     10 #pragma once
     11 
     12 #include <deque>
     13 #include <set>
     14 #include <string>
     15 #include <vector>
     16 
     17 #include "base/hash_tables.h"
     18 #include "base/memory/ref_counted.h"
     19 #include "base/memory/scoped_ptr.h"
     20 #include "base/synchronization/lock.h"
     21 #include "base/task.h"
     22 #include "base/time.h"
     23 #include "chrome/browser/safe_browsing/safe_browsing_util.h"
     24 #include "googleurl/src/gurl.h"
     25 #include "webkit/glue/resource_type.h"
     26 
     27 class MalwareDetails;
     28 class PrefService;
     29 class SafeBrowsingDatabase;
     30 class SafeBrowsingProtocolManager;
     31 class SafeBrowsingServiceFactory;
     32 
     33 namespace base {
     34 class Thread;
     35 }
     36 
     37 namespace net {
     38 class URLRequestContextGetter;
     39 }
     40 
     41 // Construction needs to happen on the main thread.
     42 class SafeBrowsingService
     43     : public base::RefCountedThreadSafe<SafeBrowsingService> {
     44  public:
     45   class Client;
     46   // Users of this service implement this interface to be notified
     47   // asynchronously of the result.
     48   enum UrlCheckResult {
     49     SAFE,
     50     URL_PHISHING,
     51     URL_MALWARE,
     52     BINARY_MALWARE_URL,  // Binary url leads to a malware.
     53     BINARY_MALWARE_HASH,  // Binary hash indicates this is a malware.
     54   };
     55 
     56   // Structure used to pass parameters between the IO and UI thread when
     57   // interacting with the blocking page.
     58   struct UnsafeResource {
     59     UnsafeResource();
     60     ~UnsafeResource();
     61 
     62     GURL url;
     63     GURL original_url;
     64     std::vector<GURL> redirect_urls;
     65     ResourceType::Type resource_type;
     66     UrlCheckResult threat_type;
     67     Client* client;
     68     int render_process_host_id;
     69     int render_view_id;
     70   };
     71 
     72   // Bundle of SafeBrowsing state for one URL or hash prefix check.
     73   struct SafeBrowsingCheck {
     74     SafeBrowsingCheck();
     75     ~SafeBrowsingCheck();
     76 
     77     // Either |urls| or |prefix| is used to lookup database.
     78     std::vector<GURL> urls;
     79     scoped_ptr<SBFullHash> full_hash;
     80 
     81     Client* client;
     82     bool need_get_hash;
     83     base::TimeTicks start;  // When check was sent to SB service.
     84     UrlCheckResult result;
     85     bool is_download;  // If this check for download url or hash.
     86     std::vector<SBPrefix> prefix_hits;
     87     std::vector<SBFullHashResult> full_hits;
     88 
     89     // Task to make the callback to safebrowsing clients in case
     90     // safebrowsing check takes too long to finish. Not owned by
     91     // this class.
     92     // TODO(lzheng): We should consider to use this time out check
     93     // for browsing too (instead of implementin in
     94     // safe_browsing_resource_handler.cc).
     95     CancelableTask* timeout_task;
     96 
     97    private:
     98     DISALLOW_COPY_AND_ASSIGN(SafeBrowsingCheck);
     99   };
    100 
    101   class Client {
    102    public:
    103     virtual ~Client() {}
    104 
    105     void OnSafeBrowsingResult(const SafeBrowsingCheck& check);
    106 
    107     // Called when the user has made a decision about how to handle the
    108     // SafeBrowsing interstitial page.
    109     virtual void OnBlockingPageComplete(bool proceed) {}
    110 
    111    protected:
    112     // Called when the result of checking a browse URL is known.
    113     virtual void OnBrowseUrlCheckResult(const GURL& url,
    114                                         UrlCheckResult result) {}
    115 
    116     // Called when the result of checking a download URL is known.
    117     virtual void OnDownloadUrlCheckResult(const std::vector<GURL>& url_chain,
    118                                           UrlCheckResult result) {}
    119 
    120     // Called when the result of checking a download binary hash is known.
    121     virtual void OnDownloadHashCheckResult(const std::string& hash,
    122                                            UrlCheckResult result) {}
    123   };
    124 
    125 
    126   // Makes the passed |factory| the factory used to instanciate
    127   // a SafeBrowsingService. Useful for tests.
    128   static void RegisterFactory(SafeBrowsingServiceFactory* factory) {
    129     factory_ = factory;
    130   }
    131 
    132   // Create an instance of the safe browsing service.
    133   static SafeBrowsingService* CreateSafeBrowsingService();
    134 
    135   // Called on the UI thread to initialize the service.
    136   void Initialize();
    137 
    138   // Called on the main thread to let us know that the io_thread is going away.
    139   void ShutDown();
    140 
    141   // Returns true if the url's scheme can be checked.
    142   bool CanCheckUrl(const GURL& url) const;
    143 
    144   // Called on UI thread to decide if safe browsing related stats
    145   // could be reported.
    146   bool CanReportStats() const;
    147 
    148   // Called on UI thread to decide if the download file's sha256 hash
    149   // should be calculated for safebrowsing.
    150   bool DownloadBinHashNeeded() const;
    151 
    152   // Called on the IO thread to check if the given url is safe or not.  If we
    153   // can synchronously determine that the url is safe, CheckUrl returns true.
    154   // Otherwise it returns false, and "client" is called asynchronously with the
    155   // result when it is ready.
    156   virtual bool CheckBrowseUrl(const GURL& url, Client* client);
    157 
    158   // Check if the prefix for |url| is in safebrowsing download add lists.
    159   // Result will be passed to callback in |client|.
    160   bool CheckDownloadUrl(const std::vector<GURL>& url_chain, Client* client);
    161 
    162   // Check if the prefix for |full_hash| is in safebrowsing binhash add lists.
    163   // Result will be passed to callback in |client|.
    164   virtual bool CheckDownloadHash(const std::string& full_hash, Client* client);
    165 
    166   // Check if the |url| matches any of the full-length hashes from the
    167   // client-side phishing detection whitelist.  Returns true if there was a
    168   // match and false otherwise.  To make sure we are conservative we will return
    169   // true if an error occurs. This method is expected to be called on the IO
    170   // thread.
    171   virtual bool MatchCsdWhitelistUrl(const GURL& url);
    172 
    173   // Called on the IO thread to cancel a pending check if the result is no
    174   // longer needed.
    175   void CancelCheck(Client* client);
    176 
    177   // Called on the IO thread to display an interstitial page.
    178   // |url| is the url of the resource that matches a safe browsing list.
    179   // If the request contained a chain of redirects, |url| is the last url
    180   // in the chain, and |original_url| is the first one (the root of the
    181   // chain). Otherwise, |original_url| = |url|.
    182   virtual void DisplayBlockingPage(const GURL& url,
    183                                    const GURL& original_url,
    184                                    const std::vector<GURL>& redirect_urls,
    185                                    ResourceType::Type resource_type,
    186                                    UrlCheckResult result,
    187                                    Client* client,
    188                                    int render_process_host_id,
    189                                    int render_view_id);
    190 
    191   // Called on the IO thread when the SafeBrowsingProtocolManager has received
    192   // the full hash results for prefix hits detected in the database.
    193   void HandleGetHashResults(
    194       SafeBrowsingCheck* check,
    195       const std::vector<SBFullHashResult>& full_hashes,
    196       bool can_cache);
    197 
    198   // Called on the IO thread.
    199   void HandleChunk(const std::string& list, SBChunkList* chunks);
    200   void HandleChunkDelete(std::vector<SBChunkDelete>* chunk_deletes);
    201 
    202   // Update management.  Called on the IO thread.
    203   void UpdateStarted();
    204   void UpdateFinished(bool update_succeeded);
    205   // Whether there is an update in progress. Called on the IO thread.
    206   bool IsUpdateInProgress() const;
    207 
    208   // The blocking page on the UI thread has completed.
    209   void OnBlockingPageDone(const std::vector<UnsafeResource>& resources,
    210                           bool proceed);
    211 
    212   // Called on the UI thread when the SafeBrowsingProtocolManager has received
    213   // updated MAC keys.
    214   void OnNewMacKeys(const std::string& client_key,
    215                     const std::string& wrapped_key);
    216 
    217   // Notification on the UI thread from the advanced options UI.
    218   void OnEnable(bool enabled);
    219 
    220   bool enabled() const { return enabled_; }
    221 
    222   bool download_protection_enabled() const {
    223     return enabled_ && enable_download_protection_;
    224   }
    225 
    226   // Preference handling.
    227   static void RegisterPrefs(PrefService* prefs);
    228 
    229   // Called on the IO thread to try to close the database, freeing the memory
    230   // associated with it.  The database will be automatically reopened as needed.
    231   //
    232   // NOTE: Actual database closure is asynchronous, and until it happens, the IO
    233   // thread is not allowed to access it; may not actually trigger a close if one
    234   // is already pending or doing so would cause problems.
    235   void CloseDatabase();
    236 
    237   // Called on the IO thread to reset the database.
    238   void ResetDatabase();
    239 
    240   // Log the user perceived delay caused by SafeBrowsing. This delay is the time
    241   // delta starting from when we would have started reading data from the
    242   // network, and ending when the SafeBrowsing check completes indicating that
    243   // the current page is 'safe'.
    244   void LogPauseDelay(base::TimeDelta time);
    245 
    246   // Called on the IO thread by the MalwareDetails with the serialized
    247   // protocol buffer, so the service can send it over.
    248   virtual void SendSerializedMalwareDetails(const std::string& serialized);
    249 
    250   // Report hits to the unsafe contents (malware, phishing, unsafe download URL)
    251   // to the server. Can only be called on UI thread.  If |post_data| is
    252   // non-empty, the request will be sent as a POST instead of a GET.
    253   void ReportSafeBrowsingHit(const GURL& malicious_url,
    254                              const GURL& page_url,
    255                              const GURL& referrer_url,
    256                              bool is_subresource,
    257                              UrlCheckResult threat_type,
    258                              const std::string& post_data);
    259 
    260  protected:
    261   // Creates the safe browsing service.  Need to initialize before using.
    262   SafeBrowsingService();
    263 
    264   virtual ~SafeBrowsingService();
    265 
    266  private:
    267   friend class SafeBrowsingServiceFactoryImpl;
    268 
    269   typedef std::set<SafeBrowsingCheck*> CurrentChecks;
    270   typedef std::vector<SafeBrowsingCheck*> GetHashRequestors;
    271   typedef base::hash_map<SBPrefix, GetHashRequestors> GetHashRequests;
    272 
    273   // Used for whitelisting a render view when the user ignores our warning.
    274   struct WhiteListedEntry;
    275 
    276   // Clients that we've queued up for checking later once the database is ready.
    277   struct QueuedCheck {
    278     Client* client;
    279     GURL url;
    280     base::TimeTicks start;  // When check was queued.
    281   };
    282 
    283   friend class base::RefCountedThreadSafe<SafeBrowsingService>;
    284   friend class SafeBrowsingServiceTest;
    285 
    286   // Called to initialize objects that are used on the io_thread.
    287   void OnIOInitialize(const std::string& client_key,
    288                       const std::string& wrapped_key,
    289                       net::URLRequestContextGetter* request_context_getter);
    290 
    291   // Called to shutdown operations on the io_thread.
    292   void OnIOShutdown();
    293 
    294   // Returns whether |database_| exists and is accessible.
    295   bool DatabaseAvailable() const;
    296 
    297   // Called on the IO thread.  If the database does not exist, queues up a call
    298   // on the db thread to create it.  Returns whether the database is available.
    299   //
    300   // Note that this is only needed outside the db thread, since functions on the
    301   // db thread can call GetDatabase() directly.
    302   bool MakeDatabaseAvailable();
    303 
    304   // Should only be called on db thread as SafeBrowsingDatabase is not
    305   // threadsafe.
    306   SafeBrowsingDatabase* GetDatabase();
    307 
    308   // Called on the IO thread with the check result.
    309   void OnCheckDone(SafeBrowsingCheck* info);
    310 
    311   // Called on the database thread to retrieve chunks.
    312   void GetAllChunksFromDatabase();
    313 
    314   // Called on the IO thread with the results of all chunks.
    315   void OnGetAllChunksFromDatabase(const std::vector<SBListChunkRanges>& lists,
    316                                   bool database_error);
    317 
    318   // Called on the IO thread after the database reports that it added a chunk.
    319   void OnChunkInserted();
    320 
    321   // Notification that the database is done loading its bloom filter.  We may
    322   // have had to queue checks until the database is ready, and if so, this
    323   // checks them.
    324   void DatabaseLoadComplete();
    325 
    326   // Called on the database thread to add/remove chunks and host keys.
    327   // Callee will free the data when it's done.
    328   void HandleChunkForDatabase(const std::string& list,
    329                               SBChunkList* chunks);
    330 
    331   void DeleteChunks(std::vector<SBChunkDelete>* chunk_deletes);
    332 
    333   static UrlCheckResult GetResultFromListname(const std::string& list_name);
    334 
    335   void NotifyClientBlockingComplete(Client* client, bool proceed);
    336 
    337   void DatabaseUpdateFinished(bool update_succeeded);
    338 
    339   // Start up SafeBrowsing objects. This can be called at browser start, or when
    340   // the user checks the "Enable SafeBrowsing" option in the Advanced options
    341   // UI.
    342   void Start();
    343 
    344   // Called on the db thread to close the database.  See CloseDatabase().
    345   void OnCloseDatabase();
    346 
    347   // Runs on the db thread to reset the database. We assume that resetting the
    348   // database is a synchronous operation.
    349   void OnResetDatabase();
    350 
    351   // Store in-memory the GetHash response. Runs on the database thread.
    352   void CacheHashResults(const std::vector<SBPrefix>& prefixes,
    353                         const std::vector<SBFullHashResult>& full_hashes);
    354 
    355   // Internal worker function for processing full hashes.
    356   void OnHandleGetHashResults(SafeBrowsingCheck* check,
    357                               const std::vector<SBFullHashResult>& full_hashes);
    358 
    359   // Run one check against |full_hashes|.  Returns |true| if the check
    360   // finds a match in |full_hashes|.
    361   bool HandleOneCheck(SafeBrowsingCheck* check,
    362                       const std::vector<SBFullHashResult>& full_hashes);
    363 
    364   // Invoked on the UI thread to show the blocking page.
    365   void DoDisplayBlockingPage(const UnsafeResource& resource);
    366 
    367   // Call protocol manager on IO thread to report hits of unsafe contents.
    368   void ReportSafeBrowsingHitOnIOThread(const GURL& malicious_url,
    369                                        const GURL& page_url,
    370                                        const GURL& referrer_url,
    371                                        bool is_subresource,
    372                                        UrlCheckResult threat_type,
    373                                        const std::string& post_data);
    374 
    375   // Checks the download hash on safe_browsing_thread_.
    376   void CheckDownloadHashOnSBThread(SafeBrowsingCheck* check);
    377 
    378   // Invoked by CheckDownloadUrl. It checks the download URL on
    379   // safe_browsing_thread_.
    380   void CheckDownloadUrlOnSBThread(SafeBrowsingCheck* check);
    381 
    382   // The callback function when a safebrowsing check is timed out. Client will
    383   // be notified that the safebrowsing check is SAFE when this happens.
    384   void TimeoutCallback(SafeBrowsingCheck* check);
    385 
    386   // Calls the Client's callback on IO thread after CheckDownloadUrl finishes.
    387   void CheckDownloadUrlDone(SafeBrowsingCheck* check);
    388 
    389   // Calls the Client's callback on IO thread after CheckDownloadHash finishes.
    390   void CheckDownloadHashDone(SafeBrowsingCheck* check);
    391 
    392   // Helper function that calls safe browsing client and cleans up |checks_|.
    393   void SafeBrowsingCheckDone(SafeBrowsingCheck* check);
    394 
    395   // Helper function to set |check| with default values and start a safe
    396   // browsing check with timeout of |timeout_ms|. |task| will be called upon
    397   // success, otherwise TimeoutCallback will be called.
    398   void StartDownloadCheck(SafeBrowsingCheck* check,
    399                           Client* client,
    400                           CancelableTask* task,
    401                           int64 timeout_ms);
    402 
    403   // The factory used to instanciate a SafeBrowsingService object.
    404   // Useful for tests, so they can provide their own implementation of
    405   // SafeBrowsingService.
    406   static SafeBrowsingServiceFactory* factory_;
    407 
    408   CurrentChecks checks_;
    409 
    410   // Used for issuing only one GetHash request for a given prefix.
    411   GetHashRequests gethash_requests_;
    412 
    413   // The persistent database.  We don't use a scoped_ptr because it
    414   // needs to be destructed on a different thread than this object.
    415   SafeBrowsingDatabase* database_;
    416 
    417   // Lock used to prevent possible data races due to compiler optimizations.
    418   mutable base::Lock database_lock_;
    419 
    420   // Handles interaction with SafeBrowsing servers.
    421   SafeBrowsingProtocolManager* protocol_manager_;
    422 
    423   std::vector<WhiteListedEntry> white_listed_entries_;
    424 
    425   // Whether the service is running. 'enabled_' is used by SafeBrowsingService
    426   // on the IO thread during normal operations.
    427   bool enabled_;
    428 
    429   // Indicate if download_protection is enabled by command switch
    430   // so we allow this feature to be exersized.
    431   bool enable_download_protection_;
    432 
    433   // Indicate if client-side phishing detection whitelist should be enabled
    434   // or not.
    435   bool enable_csd_whitelist_;
    436 
    437   // The SafeBrowsing thread that runs database operations.
    438   //
    439   // Note: Functions that run on this thread should run synchronously and return
    440   // to the IO thread, not post additional tasks back to this thread, lest we
    441   // cause a race condition at shutdown time that leads to a database leak.
    442   scoped_ptr<base::Thread> safe_browsing_thread_;
    443 
    444   // Indicates if we're currently in an update cycle.
    445   bool update_in_progress_;
    446 
    447   // When true, newly fetched chunks may not in the database yet since the
    448   // database is still updating.
    449   bool database_update_in_progress_;
    450 
    451   // Indicates if we're in the midst of trying to close the database.  If this
    452   // is true, nothing on the IO thread should access the database.
    453   bool closing_database_;
    454 
    455   std::deque<QueuedCheck> queued_checks_;
    456 
    457   // When download url check takes this long, client's callback will be called
    458   // without waiting for the result.
    459   int64 download_urlcheck_timeout_ms_;
    460 
    461   // Similar to |download_urlcheck_timeout_ms_|, but for download hash checks.
    462   int64 download_hashcheck_timeout_ms_;
    463 
    464   DISALLOW_COPY_AND_ASSIGN(SafeBrowsingService);
    465 };
    466 
    467 // Factory for creating SafeBrowsingService.  Useful for tests.
    468 class SafeBrowsingServiceFactory {
    469  public:
    470   SafeBrowsingServiceFactory() { }
    471   virtual ~SafeBrowsingServiceFactory() { }
    472   virtual SafeBrowsingService* CreateSafeBrowsingService() = 0;
    473  private:
    474   DISALLOW_COPY_AND_ASSIGN(SafeBrowsingServiceFactory);
    475 };
    476 
    477 #endif  // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_SERVICE_H_
    478