Home | History | Annotate | Download | only in safe_browsing
      1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_DATABASE_H_
      6 #define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_DATABASE_H_
      7 
      8 #include <set>
      9 #include <string>
     10 #include <vector>
     11 
     12 #include "base/files/file_path.h"
     13 #include "base/gtest_prod_util.h"
     14 #include "base/memory/scoped_ptr.h"
     15 #include "base/memory/weak_ptr.h"
     16 #include "base/synchronization/lock.h"
     17 #include "chrome/browser/safe_browsing/safe_browsing_store.h"
     18 
     19 namespace base {
     20 class MessageLoop;
     21 class Time;
     22 }
     23 
     24 namespace safe_browsing {
     25 class PrefixSet;
     26 }
     27 
     28 class GURL;
     29 class SafeBrowsingDatabase;
     30 
     31 // Factory for creating SafeBrowsingDatabase. Tests implement this factory
     32 // to create fake Databases for testing.
     33 class SafeBrowsingDatabaseFactory {
     34  public:
     35   SafeBrowsingDatabaseFactory() { }
     36   virtual ~SafeBrowsingDatabaseFactory() { }
     37   virtual SafeBrowsingDatabase* CreateSafeBrowsingDatabase(
     38       bool enable_download_protection,
     39       bool enable_client_side_whitelist,
     40       bool enable_download_whitelist,
     41       bool enable_extension_blacklist,
     42       bool enable_side_effect_free_whitelist) = 0;
     43  private:
     44   DISALLOW_COPY_AND_ASSIGN(SafeBrowsingDatabaseFactory);
     45 };
     46 
     47 
     48 // Encapsulates on-disk databases that for safebrowsing. There are
     49 // four databases: browse, download, download whitelist and
     50 // client-side detection (csd) whitelist databases. The browse database contains
     51 // information about phishing and malware urls. The download database contains
     52 // URLs for bad binaries (e.g: those containing virus) and hash of
     53 // these downloaded contents. The download whitelist contains whitelisted
     54 // download hosting sites as well as whitelisted binary signing certificates
     55 // etc.  The csd whitelist database contains URLs that will never be considered
     56 // as phishing by the client-side phishing detection. These on-disk databases
     57 // are shared among all profiles, as it doesn't contain user-specific data. This
     58 // object is not thread-safe, i.e. all its methods should be used on the same
     59 // thread that it was created on.
     60 class SafeBrowsingDatabase {
     61  public:
     62   // Factory method for obtaining a SafeBrowsingDatabase implementation.
     63   // It is not thread safe.
     64   // |enable_download_protection| is used to control the download database
     65   // feature.
     66   // |enable_client_side_whitelist| is used to control the csd whitelist
     67   // database feature.
     68   // |enable_download_whitelist| is used to control the download whitelist
     69   // database feature.
     70   static SafeBrowsingDatabase* Create(bool enable_download_protection,
     71                                       bool enable_client_side_whitelist,
     72                                       bool enable_download_whitelist,
     73                                       bool enable_extension_blacklist,
     74                                       bool side_effect_free_whitelist);
     75 
     76   // Makes the passed |factory| the factory used to instantiate
     77   // a SafeBrowsingDatabase. This is used for tests.
     78   static void RegisterFactory(SafeBrowsingDatabaseFactory* factory) {
     79     factory_ = factory;
     80   }
     81 
     82   virtual ~SafeBrowsingDatabase();
     83 
     84   // Initializes the database with the given filename.
     85   virtual void Init(const base::FilePath& filename) = 0;
     86 
     87   // Deletes the current database and creates a new one.
     88   virtual bool ResetDatabase() = 0;
     89 
     90   // Returns false if |url| is not in the browse database.  If it
     91   // returns true, then either |matching_list| is the name of the matching
     92   // list, or |prefix_hits| and |full_hits| contains the matching hash
     93   // prefixes.  This function is safe to call from threads other than
     94   // the creation thread.
     95   virtual bool ContainsBrowseUrl(const GURL& url,
     96                                  std::string* matching_list,
     97                                  std::vector<SBPrefix>* prefix_hits,
     98                                  std::vector<SBFullHashResult>* full_hits,
     99                                  base::Time last_update) = 0;
    100 
    101   // Returns false if none of |urls| are in Download database. If it returns
    102   // true, |prefix_hits| should contain the prefixes for the URLs that were in
    103   // the database.  This function could ONLY be accessed from creation thread.
    104   virtual bool ContainsDownloadUrl(const std::vector<GURL>& urls,
    105                                    std::vector<SBPrefix>* prefix_hits) = 0;
    106 
    107   // Returns false if |prefix| is not in Download database.
    108   // This function could ONLY be accessed from creation thread.
    109   virtual bool ContainsDownloadHashPrefix(const SBPrefix& prefix) = 0;
    110 
    111   // Returns false if |url| is not on the client-side phishing detection
    112   // whitelist.  Otherwise, this function returns true.  Note: the whitelist
    113   // only contains full-length hashes so we don't return any prefix hit.
    114   // This function should only be called from the IO thread.
    115   virtual bool ContainsCsdWhitelistedUrl(const GURL& url) = 0;
    116 
    117   // The download whitelist is used for two purposes: a white-domain list of
    118   // sites that are considered to host only harmless binaries as well as a
    119   // whitelist of arbitrary strings such as hashed certificate authorities that
    120   // are considered to be trusted.  The two methods below let you lookup
    121   // the whitelist either for a URL or an arbitrary string.  These methods will
    122   // return false if no match is found and true otherwise.
    123   // This function could ONLY be accessed from the IO thread.
    124   virtual bool ContainsDownloadWhitelistedUrl(const GURL& url) = 0;
    125   virtual bool ContainsDownloadWhitelistedString(const std::string& str) = 0;
    126 
    127   // Populates |prefix_hits| with any prefixes in |prefixes| that have matches
    128   // in the database.
    129   //
    130   // This function can ONLY be accessed from the creation thread.
    131   virtual bool ContainsExtensionPrefixes(
    132       const std::vector<SBPrefix>& prefixes,
    133       std::vector<SBPrefix>* prefix_hits) = 0;
    134 
    135   // Returns false unless the hash of |url| is on the side-effect free
    136   // whitelist.
    137   virtual bool ContainsSideEffectFreeWhitelistUrl(const GURL& url) = 0;
    138 
    139   // A database transaction should look like:
    140   //
    141   // std::vector<SBListChunkRanges> lists;
    142   // if (db.UpdateStarted(&lists)) {
    143   //   // Do something with |lists|.
    144   //
    145   //   // Process add/sub commands.
    146   //   db.InsertChunks(list_name, chunks);
    147   //
    148   //   // Process adddel/subdel commands.
    149   //   db.DeleteChunks(chunks_deletes);
    150   //
    151   //   // If passed true, processes the collected chunk info and
    152   //   // rebuilds the filter.  If passed false, rolls everything
    153   //   // back.
    154   //   db.UpdateFinished(success);
    155   // }
    156   //
    157   // If UpdateStarted() returns true, the caller MUST eventually call
    158   // UpdateFinished().  If it returns false, the caller MUST NOT call
    159   // the other functions.
    160   virtual bool UpdateStarted(std::vector<SBListChunkRanges>* lists) = 0;
    161   virtual void InsertChunks(const std::string& list_name,
    162                             const SBChunkList& chunks) = 0;
    163   virtual void DeleteChunks(
    164       const std::vector<SBChunkDelete>& chunk_deletes) = 0;
    165   virtual void UpdateFinished(bool update_succeeded) = 0;
    166 
    167   // Store the results of a GetHash response. In the case of empty results, we
    168   // cache the prefixes until the next update so that we don't have to issue
    169   // further GetHash requests we know will be empty.
    170   virtual void CacheHashResults(
    171       const std::vector<SBPrefix>& prefixes,
    172       const std::vector<SBFullHashResult>& full_hits) = 0;
    173 
    174   // Returns true if the malware IP blacklisting killswitch URL is present
    175   // in the csd whitelist.
    176   virtual bool IsMalwareIPMatchKillSwitchOn() = 0;
    177 
    178   // The name of the bloom-filter file for the given database file.
    179   // NOTE(shess): OBSOLETE.  Present for deleting stale files.
    180   static base::FilePath BloomFilterForFilename(
    181       const base::FilePath& db_filename);
    182 
    183   // The name of the prefix set file for the given database file.
    184   static base::FilePath PrefixSetForFilename(const base::FilePath& db_filename);
    185 
    186   // Filename for malware and phishing URL database.
    187   static base::FilePath BrowseDBFilename(
    188       const base::FilePath& db_base_filename);
    189 
    190   // Filename for download URL and download binary hash database.
    191   static base::FilePath DownloadDBFilename(
    192       const base::FilePath& db_base_filename);
    193 
    194   // Filename for client-side phishing detection whitelist databsae.
    195   static base::FilePath CsdWhitelistDBFilename(
    196       const base::FilePath& csd_whitelist_base_filename);
    197 
    198   // Filename for download whitelist databsae.
    199   static base::FilePath DownloadWhitelistDBFilename(
    200       const base::FilePath& download_whitelist_base_filename);
    201 
    202   // Filename for extension blacklist database.
    203   static base::FilePath ExtensionBlacklistDBFilename(
    204       const base::FilePath& extension_blacklist_base_filename);
    205 
    206   // Filename for side-effect free whitelist database.
    207   static base::FilePath SideEffectFreeWhitelistDBFilename(
    208       const base::FilePath& side_effect_free_whitelist_base_filename);
    209 
    210   // Enumerate failures for histogramming purposes.  DO NOT CHANGE THE
    211   // ORDERING OF THESE VALUES.
    212   enum FailureType {
    213     FAILURE_DATABASE_CORRUPT,
    214     FAILURE_DATABASE_CORRUPT_HANDLER,
    215     FAILURE_BROWSE_DATABASE_UPDATE_BEGIN,
    216     FAILURE_BROWSE_DATABASE_UPDATE_FINISH,
    217     FAILURE_DATABASE_FILTER_MISSING_OBSOLETE,
    218     FAILURE_DATABASE_FILTER_READ_OBSOLETE,
    219     FAILURE_DATABASE_FILTER_WRITE_OBSOLETE,
    220     FAILURE_DATABASE_FILTER_DELETE,
    221     FAILURE_DATABASE_STORE_MISSING,
    222     FAILURE_DATABASE_STORE_DELETE,
    223     FAILURE_DOWNLOAD_DATABASE_UPDATE_BEGIN,
    224     FAILURE_DOWNLOAD_DATABASE_UPDATE_FINISH,
    225     FAILURE_WHITELIST_DATABASE_UPDATE_BEGIN,
    226     FAILURE_WHITELIST_DATABASE_UPDATE_FINISH,
    227     FAILURE_BROWSE_PREFIX_SET_MISSING,
    228     FAILURE_BROWSE_PREFIX_SET_READ,
    229     FAILURE_BROWSE_PREFIX_SET_WRITE,
    230     FAILURE_BROWSE_PREFIX_SET_DELETE,
    231     FAILURE_EXTENSION_BLACKLIST_UPDATE_BEGIN,
    232     FAILURE_EXTENSION_BLACKLIST_UPDATE_FINISH,
    233     FAILURE_EXTENSION_BLACKLIST_DELETE,
    234     FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_BEGIN,
    235     FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_FINISH,
    236     FAILURE_SIDE_EFFECT_FREE_WHITELIST_DELETE,
    237     FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_READ,
    238     FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_WRITE,
    239     FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_DELETE,
    240 
    241     // Memory space for histograms is determined by the max.  ALWAYS
    242     // ADD NEW VALUES BEFORE THIS ONE.
    243     FAILURE_DATABASE_MAX
    244   };
    245 
    246   static void RecordFailure(FailureType failure_type);
    247 
    248  private:
    249   // The factory used to instantiate a SafeBrowsingDatabase object.
    250   // Useful for tests, so they can provide their own implementation of
    251   // SafeBrowsingDatabase.
    252   static SafeBrowsingDatabaseFactory* factory_;
    253 };
    254 
    255 class SafeBrowsingDatabaseNew : public SafeBrowsingDatabase {
    256  public:
    257   // Create a database with a browse, download, download whitelist and
    258   // csd whitelist store objects. Takes ownership of all the store objects.
    259   // When |download_store| is NULL, the database will ignore any operations
    260   // related download (url hashes and binary hashes).  The same is true for
    261   // the |csd_whitelist_store| and |download_whitelist_store|.
    262   SafeBrowsingDatabaseNew(SafeBrowsingStore* browse_store,
    263                           SafeBrowsingStore* download_store,
    264                           SafeBrowsingStore* csd_whitelist_store,
    265                           SafeBrowsingStore* download_whitelist_store,
    266                           SafeBrowsingStore* extension_blacklist_store,
    267                           SafeBrowsingStore* side_effect_free_whitelist_store);
    268 
    269   // Create a database with a browse store. This is a legacy interface that
    270   // useds Sqlite.
    271   SafeBrowsingDatabaseNew();
    272 
    273   virtual ~SafeBrowsingDatabaseNew();
    274 
    275   // Implement SafeBrowsingDatabase interface.
    276   virtual void Init(const base::FilePath& filename) OVERRIDE;
    277   virtual bool ResetDatabase() OVERRIDE;
    278   virtual bool ContainsBrowseUrl(const GURL& url,
    279                                  std::string* matching_list,
    280                                  std::vector<SBPrefix>* prefix_hits,
    281                                  std::vector<SBFullHashResult>* full_hits,
    282                                  base::Time last_update) OVERRIDE;
    283   virtual bool ContainsDownloadUrl(const std::vector<GURL>& urls,
    284                                    std::vector<SBPrefix>* prefix_hits) OVERRIDE;
    285   virtual bool ContainsDownloadHashPrefix(const SBPrefix& prefix) OVERRIDE;
    286   virtual bool ContainsCsdWhitelistedUrl(const GURL& url) OVERRIDE;
    287   virtual bool ContainsDownloadWhitelistedUrl(const GURL& url) OVERRIDE;
    288   virtual bool ContainsDownloadWhitelistedString(
    289       const std::string& str) OVERRIDE;
    290   virtual bool ContainsExtensionPrefixes(
    291       const std::vector<SBPrefix>& prefixes,
    292       std::vector<SBPrefix>* prefix_hits) OVERRIDE;
    293   virtual bool ContainsSideEffectFreeWhitelistUrl(const GURL& url)  OVERRIDE;
    294   virtual bool UpdateStarted(std::vector<SBListChunkRanges>* lists) OVERRIDE;
    295   virtual void InsertChunks(const std::string& list_name,
    296                             const SBChunkList& chunks) OVERRIDE;
    297   virtual void DeleteChunks(
    298       const std::vector<SBChunkDelete>& chunk_deletes) OVERRIDE;
    299   virtual void UpdateFinished(bool update_succeeded) OVERRIDE;
    300   virtual void CacheHashResults(
    301       const std::vector<SBPrefix>& prefixes,
    302       const std::vector<SBFullHashResult>& full_hits) OVERRIDE;
    303 
    304   // Returns the value of malware_kill_switch_;
    305   virtual bool IsMalwareIPMatchKillSwitchOn() OVERRIDE;
    306 
    307  private:
    308   friend class SafeBrowsingDatabaseTest;
    309   FRIEND_TEST_ALL_PREFIXES(SafeBrowsingDatabaseTest, HashCaching);
    310 
    311   // A SafeBrowsing whitelist contains a list of whitelisted full-hashes (stored
    312   // in a sorted vector) as well as a boolean flag indicating whether all
    313   // lookups in the whitelist should be considered matches for safety.
    314   typedef std::pair<std::vector<SBFullHash>, bool> SBWhitelist;
    315 
    316   // Returns true if the whitelist is disabled or if any of the given hashes
    317   // matches the whitelist.
    318   bool ContainsWhitelistedHashes(const SBWhitelist& whitelist,
    319                                  const std::vector<SBFullHash>& hashes);
    320 
    321   // Return the browse_store_, download_store_, download_whitelist_store or
    322   // csd_whitelist_store_ based on list_id.
    323   SafeBrowsingStore* GetStore(int list_id);
    324 
    325   // Deletes the files on disk.
    326   bool Delete();
    327 
    328   // Load the prefix set off disk, if available.
    329   void LoadPrefixSet();
    330 
    331   // Writes the current prefix set to disk.
    332   void WritePrefixSet();
    333 
    334   // Loads the given full-length hashes to the given whitelist.  If the number
    335   // of hashes is too large or if the kill switch URL is on the whitelist
    336   // we will whitelist everything.
    337   void LoadWhitelist(const std::vector<SBAddFullHash>& full_hashes,
    338                      SBWhitelist* whitelist);
    339 
    340   // Call this method if an error occured with the given whitelist.  This will
    341   // result in all lookups to the whitelist to return true.
    342   void WhitelistEverything(SBWhitelist* whitelist);
    343 
    344   // Helpers for handling database corruption.
    345   // |OnHandleCorruptDatabase()| runs |ResetDatabase()| and sets
    346   // |corruption_detected_|, |HandleCorruptDatabase()| posts
    347   // |OnHandleCorruptDatabase()| to the current thread, to be run
    348   // after the current task completes.
    349   // TODO(shess): Wire things up to entirely abort the update
    350   // transaction when this happens.
    351   void HandleCorruptDatabase();
    352   void OnHandleCorruptDatabase();
    353 
    354   // Helpers for InsertChunks().
    355   void InsertAdd(int chunk, SBPrefix host, const SBEntry* entry, int list_id);
    356   void InsertAddChunks(safe_browsing_util::ListType list_id,
    357                        const SBChunkList& chunks);
    358   void InsertSub(int chunk, SBPrefix host, const SBEntry* entry, int list_id);
    359   void InsertSubChunks(safe_browsing_util::ListType list_id,
    360                        const SBChunkList& chunks);
    361 
    362   // Returns the size in bytes of the store after the update.
    363   int64 UpdateHashPrefixStore(const base::FilePath& store_filename,
    364                                SafeBrowsingStore* store,
    365                                FailureType failure_type);
    366   void UpdateBrowseStore();
    367   void UpdateSideEffectFreeWhitelistStore();
    368   void UpdateWhitelistStore(const base::FilePath& store_filename,
    369                             SafeBrowsingStore* store,
    370                             SBWhitelist* whitelist);
    371 
    372   // Used to verify that various calls are made from the thread the
    373   // object was created on.
    374   base::MessageLoop* creation_loop_;
    375 
    376   // Lock for protecting access to variables that may be used on the
    377   // IO thread.  This includes |prefix_set_|, |full_browse_hashes_|,
    378   // |pending_browse_hashes_|, |prefix_miss_cache_|, |csd_whitelist_|.
    379   base::Lock lookup_lock_;
    380 
    381   // Underlying persistent store for chunk data.
    382   // For browsing related (phishing and malware URLs) chunks and prefixes.
    383   base::FilePath browse_filename_;
    384   scoped_ptr<SafeBrowsingStore> browse_store_;
    385 
    386   // For download related (download URL and binary hash) chunks and prefixes.
    387   base::FilePath download_filename_;
    388   scoped_ptr<SafeBrowsingStore> download_store_;
    389 
    390   // For the client-side phishing detection whitelist chunks and full-length
    391   // hashes.  This list only contains 256 bit hashes.
    392   base::FilePath csd_whitelist_filename_;
    393   scoped_ptr<SafeBrowsingStore> csd_whitelist_store_;
    394 
    395   // For the download whitelist chunks and full-length hashes.  This list only
    396   // contains 256 bit hashes.
    397   base::FilePath download_whitelist_filename_;
    398   scoped_ptr<SafeBrowsingStore> download_whitelist_store_;
    399 
    400   // For extension IDs.
    401   base::FilePath extension_blacklist_filename_;
    402   scoped_ptr<SafeBrowsingStore> extension_blacklist_store_;
    403 
    404   // For side-effect free whitelist.
    405   base::FilePath side_effect_free_whitelist_filename_;
    406   scoped_ptr<SafeBrowsingStore> side_effect_free_whitelist_store_;
    407 
    408   SBWhitelist csd_whitelist_;
    409   SBWhitelist download_whitelist_;
    410   SBWhitelist extension_blacklist_;
    411 
    412   // Cached browse store related full-hash items, ordered by prefix for
    413   // efficient scanning.
    414   // |full_browse_hashes_| are items from |browse_store_|,
    415   // |pending_browse_hashes_| are items from |CacheHashResults()|, which
    416   // will be pushed to the store on the next update.
    417   std::vector<SBAddFullHash> full_browse_hashes_;
    418   std::vector<SBAddFullHash> pending_browse_hashes_;
    419 
    420   // Cache of prefixes that returned empty results (no full hash
    421   // match) to |CacheHashResults()|.  Cached to prevent asking for
    422   // them every time.  Cleared on next update.
    423   std::set<SBPrefix> prefix_miss_cache_;
    424 
    425   // Used to schedule resetting the database because of corruption.
    426   base::WeakPtrFactory<SafeBrowsingDatabaseNew> reset_factory_;
    427 
    428   // Set if corruption is detected during the course of an update.
    429   // Causes the update functions to fail with no side effects, until
    430   // the next call to |UpdateStarted()|.
    431   bool corruption_detected_;
    432 
    433   // Set to true if any chunks are added or deleted during an update.
    434   // Used to optimize away database update.
    435   bool change_detected_;
    436 
    437   // Used to check if a prefix was in the browse database.
    438   base::FilePath browse_prefix_set_filename_;
    439   scoped_ptr<safe_browsing::PrefixSet> browse_prefix_set_;
    440 
    441   // Used to check if a prefix was in the browse database.
    442   base::FilePath side_effect_free_whitelist_prefix_set_filename_;
    443   scoped_ptr<safe_browsing::PrefixSet> side_effect_free_whitelist_prefix_set_;
    444 };
    445 
    446 #endif  // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_DATABASE_H_
    447