Home | History | Annotate | Download | only in safe_browsing
      1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_DATABASE_H_
      6 #define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_DATABASE_H_
      7 
      8 #include <map>
      9 #include <set>
     10 #include <string>
     11 #include <vector>
     12 
     13 #include "base/containers/hash_tables.h"
     14 #include "base/files/file_path.h"
     15 #include "base/gtest_prod_util.h"
     16 #include "base/memory/scoped_ptr.h"
     17 #include "base/memory/weak_ptr.h"
     18 #include "base/synchronization/lock.h"
     19 #include "chrome/browser/safe_browsing/safe_browsing_store.h"
     20 
     21 namespace base {
     22 class MessageLoop;
     23 class Time;
     24 }
     25 
     26 namespace safe_browsing {
     27 class PrefixSet;
     28 }
     29 
     30 class GURL;
     31 class SafeBrowsingDatabase;
     32 
     33 // Factory for creating SafeBrowsingDatabase. Tests implement this factory
     34 // to create fake Databases for testing.
     35 class SafeBrowsingDatabaseFactory {
     36  public:
     37   SafeBrowsingDatabaseFactory() { }
     38   virtual ~SafeBrowsingDatabaseFactory() { }
     39   virtual SafeBrowsingDatabase* CreateSafeBrowsingDatabase(
     40       bool enable_download_protection,
     41       bool enable_client_side_whitelist,
     42       bool enable_download_whitelist,
     43       bool enable_extension_blacklist,
     44       bool enable_side_effect_free_whitelist,
     45       bool enable_ip_blacklist) = 0;
     46  private:
     47   DISALLOW_COPY_AND_ASSIGN(SafeBrowsingDatabaseFactory);
     48 };
     49 
     50 
     51 // Encapsulates on-disk databases that for safebrowsing. There are
     52 // four databases: browse, download, download whitelist and
     53 // client-side detection (csd) whitelist databases. The browse database contains
     54 // information about phishing and malware urls. The download database contains
     55 // URLs for bad binaries (e.g: those containing virus) and hash of
     56 // these downloaded contents. The download whitelist contains whitelisted
     57 // download hosting sites as well as whitelisted binary signing certificates
     58 // etc.  The csd whitelist database contains URLs that will never be considered
     59 // as phishing by the client-side phishing detection. These on-disk databases
     60 // are shared among all profiles, as it doesn't contain user-specific data. This
     61 // object is not thread-safe, i.e. all its methods should be used on the same
     62 // thread that it was created on.
     63 class SafeBrowsingDatabase {
     64  public:
     65   // Factory method for obtaining a SafeBrowsingDatabase implementation.
     66   // It is not thread safe.
     67   // |enable_download_protection| is used to control the download database
     68   // feature.
     69   // |enable_client_side_whitelist| is used to control the csd whitelist
     70   // database feature.
     71   // |enable_download_whitelist| is used to control the download whitelist
     72   // database feature.
     73   // |enable_ip_blacklist| is used to control the csd malware IP blacklist
     74   // database feature.
     75   static SafeBrowsingDatabase* Create(bool enable_download_protection,
     76                                       bool enable_client_side_whitelist,
     77                                       bool enable_download_whitelist,
     78                                       bool enable_extension_blacklist,
     79                                       bool side_effect_free_whitelist,
     80                                       bool enable_ip_blacklist);
     81 
     82   // Makes the passed |factory| the factory used to instantiate
     83   // a SafeBrowsingDatabase. This is used for tests.
     84   static void RegisterFactory(SafeBrowsingDatabaseFactory* factory) {
     85     factory_ = factory;
     86   }
     87 
     88   virtual ~SafeBrowsingDatabase();
     89 
     90   // Initializes the database with the given filename.
     91   virtual void Init(const base::FilePath& filename) = 0;
     92 
     93   // Deletes the current database and creates a new one.
     94   virtual bool ResetDatabase() = 0;
     95 
     96   // Returns false if |url| is not in the browse database.  If it
     97   // returns true, then either |matching_list| is the name of the matching
     98   // list, or |prefix_hits| and |full_hits| contains the matching hash
     99   // prefixes.  This function is safe to call from threads other than
    100   // the creation thread.
    101   virtual bool ContainsBrowseUrl(const GURL& url,
    102                                  std::string* matching_list,
    103                                  std::vector<SBPrefix>* prefix_hits,
    104                                  std::vector<SBFullHashResult>* full_hits,
    105                                  base::Time last_update) = 0;
    106 
    107   // Returns false if none of |urls| are in Download database. If it returns
    108   // true, |prefix_hits| should contain the prefixes for the URLs that were in
    109   // the database.  This function could ONLY be accessed from creation thread.
    110   virtual bool ContainsDownloadUrl(const std::vector<GURL>& urls,
    111                                    std::vector<SBPrefix>* prefix_hits) = 0;
    112 
    113   // Returns false if |prefix| is not in Download database.
    114   // This function could ONLY be accessed from creation thread.
    115   virtual bool ContainsDownloadHashPrefix(const SBPrefix& prefix) = 0;
    116 
    117   // Returns false if |url| is not on the client-side phishing detection
    118   // whitelist.  Otherwise, this function returns true.  Note: the whitelist
    119   // only contains full-length hashes so we don't return any prefix hit.
    120   // This function should only be called from the IO thread.
    121   virtual bool ContainsCsdWhitelistedUrl(const GURL& url) = 0;
    122 
    123   // The download whitelist is used for two purposes: a white-domain list of
    124   // sites that are considered to host only harmless binaries as well as a
    125   // whitelist of arbitrary strings such as hashed certificate authorities that
    126   // are considered to be trusted.  The two methods below let you lookup
    127   // the whitelist either for a URL or an arbitrary string.  These methods will
    128   // return false if no match is found and true otherwise.
    129   // This function could ONLY be accessed from the IO thread.
    130   virtual bool ContainsDownloadWhitelistedUrl(const GURL& url) = 0;
    131   virtual bool ContainsDownloadWhitelistedString(const std::string& str) = 0;
    132 
    133   // Populates |prefix_hits| with any prefixes in |prefixes| that have matches
    134   // in the database.
    135   //
    136   // This function can ONLY be accessed from the creation thread.
    137   virtual bool ContainsExtensionPrefixes(
    138       const std::vector<SBPrefix>& prefixes,
    139       std::vector<SBPrefix>* prefix_hits) = 0;
    140 
    141   // Returns false unless the hash of |url| is on the side-effect free
    142   // whitelist.
    143   virtual bool ContainsSideEffectFreeWhitelistUrl(const GURL& url) = 0;
    144 
    145   // Returns true iff the given IP is currently on the csd malware IP blacklist.
    146   virtual bool ContainsMalwareIP(const std::string& ip_address) = 0;
    147 
    148   // A database transaction should look like:
    149   //
    150   // std::vector<SBListChunkRanges> lists;
    151   // if (db.UpdateStarted(&lists)) {
    152   //   // Do something with |lists|.
    153   //
    154   //   // Process add/sub commands.
    155   //   db.InsertChunks(list_name, chunks);
    156   //
    157   //   // Process adddel/subdel commands.
    158   //   db.DeleteChunks(chunks_deletes);
    159   //
    160   //   // If passed true, processes the collected chunk info and
    161   //   // rebuilds the filter.  If passed false, rolls everything
    162   //   // back.
    163   //   db.UpdateFinished(success);
    164   // }
    165   //
    166   // If UpdateStarted() returns true, the caller MUST eventually call
    167   // UpdateFinished().  If it returns false, the caller MUST NOT call
    168   // the other functions.
    169   virtual bool UpdateStarted(std::vector<SBListChunkRanges>* lists) = 0;
    170   virtual void InsertChunks(const std::string& list_name,
    171                             const SBChunkList& chunks) = 0;
    172   virtual void DeleteChunks(
    173       const std::vector<SBChunkDelete>& chunk_deletes) = 0;
    174   virtual void UpdateFinished(bool update_succeeded) = 0;
    175 
    176   // Store the results of a GetHash response. In the case of empty results, we
    177   // cache the prefixes until the next update so that we don't have to issue
    178   // further GetHash requests we know will be empty.
    179   virtual void CacheHashResults(
    180       const std::vector<SBPrefix>& prefixes,
    181       const std::vector<SBFullHashResult>& full_hits) = 0;
    182 
    183   // Returns true if the malware IP blacklisting killswitch URL is present
    184   // in the csd whitelist.
    185   virtual bool IsMalwareIPMatchKillSwitchOn() = 0;
    186 
    187   // The name of the bloom-filter file for the given database file.
    188   // NOTE(shess): OBSOLETE.  Present for deleting stale files.
    189   static base::FilePath BloomFilterForFilename(
    190       const base::FilePath& db_filename);
    191 
    192   // The name of the prefix set file for the given database file.
    193   static base::FilePath PrefixSetForFilename(const base::FilePath& db_filename);
    194 
    195   // Filename for malware and phishing URL database.
    196   static base::FilePath BrowseDBFilename(
    197       const base::FilePath& db_base_filename);
    198 
    199   // Filename for download URL and download binary hash database.
    200   static base::FilePath DownloadDBFilename(
    201       const base::FilePath& db_base_filename);
    202 
    203   // Filename for client-side phishing detection whitelist databsae.
    204   static base::FilePath CsdWhitelistDBFilename(
    205       const base::FilePath& csd_whitelist_base_filename);
    206 
    207   // Filename for download whitelist databsae.
    208   static base::FilePath DownloadWhitelistDBFilename(
    209       const base::FilePath& download_whitelist_base_filename);
    210 
    211   // Filename for extension blacklist database.
    212   static base::FilePath ExtensionBlacklistDBFilename(
    213       const base::FilePath& extension_blacklist_base_filename);
    214 
    215   // Filename for side-effect free whitelist database.
    216   static base::FilePath SideEffectFreeWhitelistDBFilename(
    217       const base::FilePath& side_effect_free_whitelist_base_filename);
    218 
    219   // Filename for the csd malware IP blacklist database.
    220   static base::FilePath IpBlacklistDBFilename(
    221       const base::FilePath& ip_blacklist_base_filename);
    222 
    223   // Enumerate failures for histogramming purposes.  DO NOT CHANGE THE
    224   // ORDERING OF THESE VALUES.
    225   enum FailureType {
    226     FAILURE_DATABASE_CORRUPT,
    227     FAILURE_DATABASE_CORRUPT_HANDLER,
    228     FAILURE_BROWSE_DATABASE_UPDATE_BEGIN,
    229     FAILURE_BROWSE_DATABASE_UPDATE_FINISH,
    230     FAILURE_DATABASE_FILTER_MISSING_OBSOLETE,
    231     FAILURE_DATABASE_FILTER_READ_OBSOLETE,
    232     FAILURE_DATABASE_FILTER_WRITE_OBSOLETE,
    233     FAILURE_DATABASE_FILTER_DELETE,
    234     FAILURE_DATABASE_STORE_MISSING,
    235     FAILURE_DATABASE_STORE_DELETE,
    236     FAILURE_DOWNLOAD_DATABASE_UPDATE_BEGIN,
    237     FAILURE_DOWNLOAD_DATABASE_UPDATE_FINISH,
    238     FAILURE_WHITELIST_DATABASE_UPDATE_BEGIN,
    239     FAILURE_WHITELIST_DATABASE_UPDATE_FINISH,
    240     FAILURE_BROWSE_PREFIX_SET_MISSING,
    241     FAILURE_BROWSE_PREFIX_SET_READ,
    242     FAILURE_BROWSE_PREFIX_SET_WRITE,
    243     FAILURE_BROWSE_PREFIX_SET_DELETE,
    244     FAILURE_EXTENSION_BLACKLIST_UPDATE_BEGIN,
    245     FAILURE_EXTENSION_BLACKLIST_UPDATE_FINISH,
    246     FAILURE_EXTENSION_BLACKLIST_DELETE,
    247     FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_BEGIN,
    248     FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_FINISH,
    249     FAILURE_SIDE_EFFECT_FREE_WHITELIST_DELETE,
    250     FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_READ,
    251     FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_WRITE,
    252     FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_DELETE,
    253     FAILURE_IP_BLACKLIST_UPDATE_BEGIN,
    254     FAILURE_IP_BLACKLIST_UPDATE_FINISH,
    255     FAILURE_IP_BLACKLIST_UPDATE_INVALID,
    256     FAILURE_IP_BLACKLIST_DELETE,
    257 
    258     // Memory space for histograms is determined by the max.  ALWAYS
    259     // ADD NEW VALUES BEFORE THIS ONE.
    260     FAILURE_DATABASE_MAX
    261   };
    262 
    263   static void RecordFailure(FailureType failure_type);
    264 
    265  private:
    266   // The factory used to instantiate a SafeBrowsingDatabase object.
    267   // Useful for tests, so they can provide their own implementation of
    268   // SafeBrowsingDatabase.
    269   static SafeBrowsingDatabaseFactory* factory_;
    270 };
    271 
    272 class SafeBrowsingDatabaseNew : public SafeBrowsingDatabase {
    273  public:
    274   // Create a database with a browse, download, download whitelist and
    275   // csd whitelist store objects. Takes ownership of all the store objects.
    276   // When |download_store| is NULL, the database will ignore any operations
    277   // related download (url hashes and binary hashes).  The same is true for
    278   // the |csd_whitelist_store|, |download_whitelist_store| and
    279   // |ip_blacklist_store|.
    280   SafeBrowsingDatabaseNew(SafeBrowsingStore* browse_store,
    281                           SafeBrowsingStore* download_store,
    282                           SafeBrowsingStore* csd_whitelist_store,
    283                           SafeBrowsingStore* download_whitelist_store,
    284                           SafeBrowsingStore* extension_blacklist_store,
    285                           SafeBrowsingStore* side_effect_free_whitelist_store,
    286                           SafeBrowsingStore* ip_blacklist_store);
    287 
    288   // Create a database with a browse store. This is a legacy interface that
    289   // useds Sqlite.
    290   SafeBrowsingDatabaseNew();
    291 
    292   virtual ~SafeBrowsingDatabaseNew();
    293 
    294   // Implement SafeBrowsingDatabase interface.
    295   virtual void Init(const base::FilePath& filename) OVERRIDE;
    296   virtual bool ResetDatabase() OVERRIDE;
    297   virtual bool ContainsBrowseUrl(const GURL& url,
    298                                  std::string* matching_list,
    299                                  std::vector<SBPrefix>* prefix_hits,
    300                                  std::vector<SBFullHashResult>* full_hits,
    301                                  base::Time last_update) OVERRIDE;
    302   virtual bool ContainsDownloadUrl(const std::vector<GURL>& urls,
    303                                    std::vector<SBPrefix>* prefix_hits) OVERRIDE;
    304   virtual bool ContainsDownloadHashPrefix(const SBPrefix& prefix) OVERRIDE;
    305   virtual bool ContainsCsdWhitelistedUrl(const GURL& url) OVERRIDE;
    306   virtual bool ContainsDownloadWhitelistedUrl(const GURL& url) OVERRIDE;
    307   virtual bool ContainsDownloadWhitelistedString(
    308       const std::string& str) OVERRIDE;
    309   virtual bool ContainsExtensionPrefixes(
    310       const std::vector<SBPrefix>& prefixes,
    311       std::vector<SBPrefix>* prefix_hits) OVERRIDE;
    312   virtual bool ContainsSideEffectFreeWhitelistUrl(const GURL& url)  OVERRIDE;
    313   virtual bool ContainsMalwareIP(const std::string& ip_address) OVERRIDE;
    314   virtual bool UpdateStarted(std::vector<SBListChunkRanges>* lists) OVERRIDE;
    315   virtual void InsertChunks(const std::string& list_name,
    316                             const SBChunkList& chunks) OVERRIDE;
    317   virtual void DeleteChunks(
    318       const std::vector<SBChunkDelete>& chunk_deletes) OVERRIDE;
    319   virtual void UpdateFinished(bool update_succeeded) OVERRIDE;
    320   virtual void CacheHashResults(
    321       const std::vector<SBPrefix>& prefixes,
    322       const std::vector<SBFullHashResult>& full_hits) OVERRIDE;
    323 
    324   // Returns the value of malware_kill_switch_;
    325   virtual bool IsMalwareIPMatchKillSwitchOn() OVERRIDE;
    326 
    327  private:
    328   friend class SafeBrowsingDatabaseTest;
    329   FRIEND_TEST_ALL_PREFIXES(SafeBrowsingDatabaseTest, HashCaching);
    330 
    331   // A SafeBrowsing whitelist contains a list of whitelisted full-hashes (stored
    332   // in a sorted vector) as well as a boolean flag indicating whether all
    333   // lookups in the whitelist should be considered matches for safety.
    334   typedef std::pair<std::vector<SBFullHash>, bool> SBWhitelist;
    335 
    336   // This map holds a csd malware IP blacklist which maps a prefix mask
    337   // to a set of hashed blacklisted IP prefixes.  Each IP prefix is a hashed
    338   // IPv6 IP prefix using SHA-1.
    339   typedef std::map<std::string, base::hash_set<std::string> > IPBlacklist;
    340 
    341   // Returns true if the whitelist is disabled or if any of the given hashes
    342   // matches the whitelist.
    343   bool ContainsWhitelistedHashes(const SBWhitelist& whitelist,
    344                                  const std::vector<SBFullHash>& hashes);
    345 
    346   // Return the browse_store_, download_store_, download_whitelist_store or
    347   // csd_whitelist_store_ based on list_id.
    348   SafeBrowsingStore* GetStore(int list_id);
    349 
    350   // Deletes the files on disk.
    351   bool Delete();
    352 
    353   // Load the prefix set off disk, if available.
    354   void LoadPrefixSet();
    355 
    356   // Writes the current prefix set to disk.
    357   void WritePrefixSet();
    358 
    359   // Loads the given full-length hashes to the given whitelist.  If the number
    360   // of hashes is too large or if the kill switch URL is on the whitelist
    361   // we will whitelist everything.
    362   void LoadWhitelist(const std::vector<SBAddFullHash>& full_hashes,
    363                      SBWhitelist* whitelist);
    364 
    365   // Call this method if an error occured with the given whitelist.  This will
    366   // result in all lookups to the whitelist to return true.
    367   void WhitelistEverything(SBWhitelist* whitelist);
    368 
    369   // Parses the IP blacklist from the given full-length hashes.
    370   void LoadIpBlacklist(const std::vector<SBAddFullHash>& full_hashes);
    371 
    372   // Helpers for handling database corruption.
    373   // |OnHandleCorruptDatabase()| runs |ResetDatabase()| and sets
    374   // |corruption_detected_|, |HandleCorruptDatabase()| posts
    375   // |OnHandleCorruptDatabase()| to the current thread, to be run
    376   // after the current task completes.
    377   // TODO(shess): Wire things up to entirely abort the update
    378   // transaction when this happens.
    379   void HandleCorruptDatabase();
    380   void OnHandleCorruptDatabase();
    381 
    382   // Helpers for InsertChunks().
    383   void InsertAdd(int chunk, SBPrefix host, const SBEntry* entry, int list_id);
    384   void InsertAddChunks(safe_browsing_util::ListType list_id,
    385                        const SBChunkList& chunks);
    386   void InsertSub(int chunk, SBPrefix host, const SBEntry* entry, int list_id);
    387   void InsertSubChunks(safe_browsing_util::ListType list_id,
    388                        const SBChunkList& chunks);
    389 
    390   // Returns the size in bytes of the store after the update.
    391   int64 UpdateHashPrefixStore(const base::FilePath& store_filename,
    392                                SafeBrowsingStore* store,
    393                                FailureType failure_type);
    394   void UpdateBrowseStore();
    395   void UpdateSideEffectFreeWhitelistStore();
    396   void UpdateWhitelistStore(const base::FilePath& store_filename,
    397                             SafeBrowsingStore* store,
    398                             SBWhitelist* whitelist);
    399   void UpdateIpBlacklistStore();
    400 
    401   // Used to verify that various calls are made from the thread the
    402   // object was created on.
    403   base::MessageLoop* creation_loop_;
    404 
    405   // Lock for protecting access to variables that may be used on the
    406   // IO thread.  This includes |prefix_set_|, |full_browse_hashes_|,
    407   // |pending_browse_hashes_|, |prefix_miss_cache_|, |csd_whitelist_|.
    408   base::Lock lookup_lock_;
    409 
    410   // Underlying persistent store for chunk data.
    411   // For browsing related (phishing and malware URLs) chunks and prefixes.
    412   base::FilePath browse_filename_;
    413   scoped_ptr<SafeBrowsingStore> browse_store_;
    414 
    415   // For download related (download URL and binary hash) chunks and prefixes.
    416   base::FilePath download_filename_;
    417   scoped_ptr<SafeBrowsingStore> download_store_;
    418 
    419   // For the client-side phishing detection whitelist chunks and full-length
    420   // hashes.  This list only contains 256 bit hashes.
    421   base::FilePath csd_whitelist_filename_;
    422   scoped_ptr<SafeBrowsingStore> csd_whitelist_store_;
    423 
    424   // For the download whitelist chunks and full-length hashes.  This list only
    425   // contains 256 bit hashes.
    426   base::FilePath download_whitelist_filename_;
    427   scoped_ptr<SafeBrowsingStore> download_whitelist_store_;
    428 
    429   // For extension IDs.
    430   base::FilePath extension_blacklist_filename_;
    431   scoped_ptr<SafeBrowsingStore> extension_blacklist_store_;
    432 
    433   // For side-effect free whitelist.
    434   base::FilePath side_effect_free_whitelist_filename_;
    435   scoped_ptr<SafeBrowsingStore> side_effect_free_whitelist_store_;
    436 
    437   // For IP blacklist.
    438   base::FilePath ip_blacklist_filename_;
    439   scoped_ptr<SafeBrowsingStore> ip_blacklist_store_;
    440 
    441   SBWhitelist csd_whitelist_;
    442   SBWhitelist download_whitelist_;
    443   SBWhitelist extension_blacklist_;
    444 
    445   // The IP blacklist should be small.  At most a couple hundred IPs.
    446   IPBlacklist ip_blacklist_;
    447 
    448   // Cached browse store related full-hash items, ordered by prefix for
    449   // efficient scanning.
    450   // |full_browse_hashes_| are items from |browse_store_|,
    451   // |pending_browse_hashes_| are items from |CacheHashResults()|, which
    452   // will be pushed to the store on the next update.
    453   std::vector<SBAddFullHash> full_browse_hashes_;
    454   std::vector<SBAddFullHash> pending_browse_hashes_;
    455 
    456   // Cache of prefixes that returned empty results (no full hash
    457   // match) to |CacheHashResults()|.  Cached to prevent asking for
    458   // them every time.  Cleared on next update.
    459   std::set<SBPrefix> prefix_miss_cache_;
    460 
    461   // Used to schedule resetting the database because of corruption.
    462   base::WeakPtrFactory<SafeBrowsingDatabaseNew> reset_factory_;
    463 
    464   // Set if corruption is detected during the course of an update.
    465   // Causes the update functions to fail with no side effects, until
    466   // the next call to |UpdateStarted()|.
    467   bool corruption_detected_;
    468 
    469   // Set to true if any chunks are added or deleted during an update.
    470   // Used to optimize away database update.
    471   bool change_detected_;
    472 
    473   // Used to check if a prefix was in the browse database.
    474   base::FilePath browse_prefix_set_filename_;
    475   scoped_ptr<safe_browsing::PrefixSet> browse_prefix_set_;
    476 
    477   // Used to check if a prefix was in the browse database.
    478   base::FilePath side_effect_free_whitelist_prefix_set_filename_;
    479   scoped_ptr<safe_browsing::PrefixSet> side_effect_free_whitelist_prefix_set_;
    480 };
    481 
    482 #endif  // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_DATABASE_H_
    483