1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_DATABASE_H_ 6 #define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_DATABASE_H_ 7 #pragma once 8 9 #include <set> 10 #include <vector> 11 12 #include "base/file_path.h" 13 #include "base/memory/scoped_ptr.h" 14 #include "base/synchronization/lock.h" 15 #include "base/task.h" 16 #include "chrome/browser/safe_browsing/safe_browsing_store.h" 17 #include "testing/gtest/include/gtest/gtest_prod.h" 18 19 namespace base { 20 class Time; 21 } 22 23 namespace safe_browsing { 24 class PrefixSet; 25 } 26 27 class BloomFilter; 28 class GURL; 29 class MessageLoop; 30 class SafeBrowsingDatabase; 31 32 // Factory for creating SafeBrowsingDatabase. Tests implement this factory 33 // to create fake Databases for testing. 34 class SafeBrowsingDatabaseFactory { 35 public: 36 SafeBrowsingDatabaseFactory() { } 37 virtual ~SafeBrowsingDatabaseFactory() { } 38 virtual SafeBrowsingDatabase* CreateSafeBrowsingDatabase( 39 bool enable_download_protection, 40 bool enable_client_side_whitelist) = 0; 41 private: 42 DISALLOW_COPY_AND_ASSIGN(SafeBrowsingDatabaseFactory); 43 }; 44 45 46 // Encapsulates on-disk databases that for safebrowsing. There are 47 // three databases: browse, download and client-side detection (csd) 48 // whitelist databases. The browse database contains information 49 // about phishing and malware urls. The download database contains 50 // URLs for bad binaries (e.g: those containing virus) and hash of 51 // these downloaded contents. The csd whitelist database contains URLs 52 // that will never be considered as phishing by the client-side 53 // phishing detection. These on-disk databases are shared among all 54 // profiles, as it doesn't contain user-specific data. This object is 55 // not thread-safe, i.e. all its methods should be used on the same 56 // thread that it was created on. 57 class SafeBrowsingDatabase { 58 public: 59 // Factory method for obtaining a SafeBrowsingDatabase implementation. 60 // It is not thread safe. 61 // |enable_download_protection| is used to control the download database 62 // feature. 63 // |enable_client_side_whitelist| is used to control the csd whitelist 64 // database feature. 65 static SafeBrowsingDatabase* Create(bool enable_download_protection, 66 bool enable_client_side_whitelist); 67 68 // Makes the passed |factory| the factory used to instantiate 69 // a SafeBrowsingDatabase. This is used for tests. 70 static void RegisterFactory(SafeBrowsingDatabaseFactory* factory) { 71 factory_ = factory; 72 } 73 74 virtual ~SafeBrowsingDatabase(); 75 76 // Initializes the database with the given filename. 77 virtual void Init(const FilePath& filename) = 0; 78 79 // Deletes the current database and creates a new one. 80 virtual bool ResetDatabase() = 0; 81 82 // Returns false if |url| is not in the browse database. If it 83 // returns true, then either |matching_list| is the name of the matching 84 // list, or |prefix_hits| and |full_hits| contains the matching hash 85 // prefixes. This function is safe to call from threads other than 86 // the creation thread. 87 virtual bool ContainsBrowseUrl(const GURL& url, 88 std::string* matching_list, 89 std::vector<SBPrefix>* prefix_hits, 90 std::vector<SBFullHashResult>* full_hits, 91 base::Time last_update) = 0; 92 93 // Returns false if none of |urls| are in Download database. If it returns 94 // true, |prefix_hits| should contain the prefixes for the URLs that were in 95 // the database. This function could ONLY be accessed from creation thread. 96 virtual bool ContainsDownloadUrl(const std::vector<GURL>& urls, 97 std::vector<SBPrefix>* prefix_hits) = 0; 98 99 // Returns false if |prefix| is not in Download database. 100 // This function could ONLY be accessed from creation thread. 101 virtual bool ContainsDownloadHashPrefix(const SBPrefix& prefix) = 0; 102 103 // Returns false if |url| is not on the client-side phishing detection 104 // whitelist. Otherwise, this function returns true. Note: the whitelist 105 // only contains full-length hashes so we don't return any prefix hit. 106 // This function should only be called from the IO thread. 107 virtual bool ContainsCsdWhitelistedUrl(const GURL& url) = 0; 108 109 // A database transaction should look like: 110 // 111 // std::vector<SBListChunkRanges> lists; 112 // if (db.UpdateStarted(&lists)) { 113 // // Do something with |lists|. 114 // 115 // // Process add/sub commands. 116 // db.InsertChunks(list_name, chunks); 117 // 118 // // Process adddel/subdel commands. 119 // db.DeleteChunks(chunks_deletes); 120 // 121 // // If passed true, processes the collected chunk info and 122 // // rebuilds the bloom filter. If passed false, rolls everything 123 // // back. 124 // db.UpdateFinished(success); 125 // } 126 // 127 // If UpdateStarted() returns true, the caller MUST eventually call 128 // UpdateFinished(). If it returns false, the caller MUST NOT call 129 // the other functions. 130 virtual bool UpdateStarted(std::vector<SBListChunkRanges>* lists) = 0; 131 virtual void InsertChunks(const std::string& list_name, 132 const SBChunkList& chunks) = 0; 133 virtual void DeleteChunks( 134 const std::vector<SBChunkDelete>& chunk_deletes) = 0; 135 virtual void UpdateFinished(bool update_succeeded) = 0; 136 137 // Store the results of a GetHash response. In the case of empty results, we 138 // cache the prefixes until the next update so that we don't have to issue 139 // further GetHash requests we know will be empty. 140 virtual void CacheHashResults( 141 const std::vector<SBPrefix>& prefixes, 142 const std::vector<SBFullHashResult>& full_hits) = 0; 143 144 // The name of the bloom-filter file for the given database file. 145 static FilePath BloomFilterForFilename(const FilePath& db_filename); 146 147 // Filename for malware and phishing URL database. 148 static FilePath BrowseDBFilename(const FilePath& db_base_filename); 149 150 // Filename for download URL and download binary hash database. 151 static FilePath DownloadDBFilename(const FilePath& db_base_filename); 152 153 // Filename for client-side phishing detection whitelist databsae. 154 static FilePath CsdWhitelistDBFilename( 155 const FilePath& csd_whitelist_base_filename); 156 157 // Enumerate failures for histogramming purposes. DO NOT CHANGE THE 158 // ORDERING OF THESE VALUES. 159 enum FailureType { 160 FAILURE_DATABASE_CORRUPT, 161 FAILURE_DATABASE_CORRUPT_HANDLER, 162 FAILURE_BROWSE_DATABASE_UPDATE_BEGIN, 163 FAILURE_BROWSE_DATABASE_UPDATE_FINISH, 164 FAILURE_DATABASE_FILTER_MISSING, 165 FAILURE_DATABASE_FILTER_READ, 166 FAILURE_DATABASE_FILTER_WRITE, 167 FAILURE_DATABASE_FILTER_DELETE, 168 FAILURE_DATABASE_STORE_MISSING, 169 FAILURE_DATABASE_STORE_DELETE, 170 FAILURE_DOWNLOAD_DATABASE_UPDATE_BEGIN, 171 FAILURE_DOWNLOAD_DATABASE_UPDATE_FINISH, 172 FAILURE_CSD_WHITELIST_DATABASE_UPDATE_BEGIN, 173 FAILURE_CSD_WHITELIST_DATABASE_UPDATE_FINISH, 174 175 // Memory space for histograms is determined by the max. ALWAYS 176 // ADD NEW VALUES BEFORE THIS ONE. 177 FAILURE_DATABASE_MAX 178 }; 179 180 static void RecordFailure(FailureType failure_type); 181 182 private: 183 // The factory used to instantiate a SafeBrowsingDatabase object. 184 // Useful for tests, so they can provide their own implementation of 185 // SafeBrowsingDatabase. 186 static SafeBrowsingDatabaseFactory* factory_; 187 }; 188 189 class SafeBrowsingDatabaseNew : public SafeBrowsingDatabase { 190 public: 191 // Create a database with a browse store, download store and 192 // csd_whitelist_store. Takes ownership of browse_store, download_store and 193 // csd_whitelist_store. When |download_store| is NULL, the database 194 // will ignore any operations related download (url hashes and 195 // binary hashes). Same for the |csd_whitelist_store|. 196 SafeBrowsingDatabaseNew(SafeBrowsingStore* browse_store, 197 SafeBrowsingStore* download_store, 198 SafeBrowsingStore* csd_whitelist_store); 199 200 // Create a database with a browse store. This is a legacy interface that 201 // useds Sqlite. 202 SafeBrowsingDatabaseNew(); 203 204 virtual ~SafeBrowsingDatabaseNew(); 205 206 // Implement SafeBrowsingDatabase interface. 207 virtual void Init(const FilePath& filename); 208 virtual bool ResetDatabase(); 209 virtual bool ContainsBrowseUrl(const GURL& url, 210 std::string* matching_list, 211 std::vector<SBPrefix>* prefix_hits, 212 std::vector<SBFullHashResult>* full_hits, 213 base::Time last_update); 214 virtual bool ContainsDownloadUrl(const std::vector<GURL>& urls, 215 std::vector<SBPrefix>* prefix_hits); 216 virtual bool ContainsDownloadHashPrefix(const SBPrefix& prefix); 217 virtual bool ContainsCsdWhitelistedUrl(const GURL& url); 218 virtual bool UpdateStarted(std::vector<SBListChunkRanges>* lists); 219 virtual void InsertChunks(const std::string& list_name, 220 const SBChunkList& chunks); 221 virtual void DeleteChunks(const std::vector<SBChunkDelete>& chunk_deletes); 222 virtual void UpdateFinished(bool update_succeeded); 223 virtual void CacheHashResults(const std::vector<SBPrefix>& prefixes, 224 const std::vector<SBFullHashResult>& full_hits); 225 226 private: 227 friend class SafeBrowsingDatabaseTest; 228 FRIEND_TEST(SafeBrowsingDatabaseTest, HashCaching); 229 230 // Return the browse_store_, download_store_ or csd_whitelist_store_ 231 // based on list_id. 232 SafeBrowsingStore* GetStore(int list_id); 233 234 // Deletes the files on disk. 235 bool Delete(); 236 237 // Load the bloom filter off disk, or generates one if it doesn't exist. 238 void LoadBloomFilter(); 239 240 // Writes the current bloom filter to disk. 241 void WriteBloomFilter(); 242 243 // Loads the given full-length hashes to the csd whitelist. If the number 244 // of hashes is too large or if the kill switch URL is on the whitelist 245 // we will whitelist all URLs. 246 void LoadCsdWhitelist(const std::vector<SBAddFullHash>& full_hashes); 247 248 // Call this method if an error occured with the csd whitelist. This will 249 // result in all calls to ContainsCsdWhitelistedUrl() to returning true. 250 void CsdWhitelistAllUrls(); 251 252 // Helpers for handling database corruption. 253 // |OnHandleCorruptDatabase()| runs |ResetDatabase()| and sets 254 // |corruption_detected_|, |HandleCorruptDatabase()| posts 255 // |OnHandleCorruptDatabase()| to the current thread, to be run 256 // after the current task completes. 257 // TODO(shess): Wire things up to entirely abort the update 258 // transaction when this happens. 259 void HandleCorruptDatabase(); 260 void OnHandleCorruptDatabase(); 261 262 // Helpers for InsertChunks(). 263 void InsertAdd(int chunk, SBPrefix host, const SBEntry* entry, int list_id); 264 void InsertAddChunks(int list_id, const SBChunkList& chunks); 265 void InsertSub(int chunk, SBPrefix host, const SBEntry* entry, int list_id); 266 void InsertSubChunks(int list_id, const SBChunkList& chunks); 267 268 void UpdateDownloadStore(); 269 void UpdateBrowseStore(); 270 void UpdateCsdWhitelistStore(); 271 272 // Helper function to compare addprefixes in download_store_ with |prefixes|. 273 // The |list_bit| indicates which list (download url or download hash) 274 // to compare. 275 // Returns true if there is a match, |*prefix_hits| will contain the actual 276 // matching prefixes. 277 bool MatchDownloadAddPrefixes(int list_bit, 278 const std::vector<SBPrefix>& prefixes, 279 std::vector<SBPrefix>* prefix_hits); 280 281 // Used to verify that various calls are made from the thread the 282 // object was created on. 283 MessageLoop* creation_loop_; 284 285 // Lock for protecting access to variables that may be used on the 286 // IO thread. This includes |browse_bloom_filter_|, |full_browse_hashes_|, 287 // |pending_browse_hashes_|, |prefix_miss_cache_|, |csd_whitelist_|, and 288 // |csd_whitelist_all_urls_|. 289 base::Lock lookup_lock_; 290 291 // Underlying persistent store for chunk data. 292 // For browsing related (phishing and malware URLs) chunks and prefixes. 293 FilePath browse_filename_; 294 scoped_ptr<SafeBrowsingStore> browse_store_; 295 296 // For download related (download URL and binary hash) chunks and prefixes. 297 FilePath download_filename_; 298 scoped_ptr<SafeBrowsingStore> download_store_; 299 300 // For the client-side phishing detection whitelist chunks and full-length 301 // hashes. This list only contains 256 bit hashes. 302 FilePath csd_whitelist_filename_; 303 scoped_ptr<SafeBrowsingStore> csd_whitelist_store_; 304 305 // All the client-side phishing detection whitelist entries are loaded in 306 // a sorted vector. 307 std::vector<SBFullHash> csd_whitelist_; 308 309 // If true, ContainsCsdWhitelistedUrl will always return true for all URLs. 310 // This is set to true if the csd whitelist is too large to be stored in 311 // memory, if the kill switch URL is on the csd whitelist or if there was 312 // an error during the most recent update. 313 bool csd_whitelist_all_urls_; 314 315 // Bloom filter generated from the add-prefixes in |browse_store_|. 316 // Only browse_store_ requires the BloomFilter for fast query. 317 FilePath bloom_filter_filename_; 318 scoped_refptr<BloomFilter> browse_bloom_filter_; 319 320 // Cached browse store related full-hash items, ordered by prefix for 321 // efficient scanning. 322 // |full_browse_hashes_| are items from |browse_store_|, 323 // |pending_browse_hashes_| are items from |CacheHashResults()|, which 324 // will be pushed to the store on the next update. 325 std::vector<SBAddFullHash> full_browse_hashes_; 326 std::vector<SBAddFullHash> pending_browse_hashes_; 327 328 // Cache of prefixes that returned empty results (no full hash 329 // match) to |CacheHashResults()|. Cached to prevent asking for 330 // them every time. Cleared on next update. 331 std::set<SBPrefix> prefix_miss_cache_; 332 333 // Used to schedule resetting the database because of corruption. 334 ScopedRunnableMethodFactory<SafeBrowsingDatabaseNew> reset_factory_; 335 336 // Set if corruption is detected during the course of an update. 337 // Causes the update functions to fail with no side effects, until 338 // the next call to |UpdateStarted()|. 339 bool corruption_detected_; 340 341 // Set to true if any chunks are added or deleted during an update. 342 // Used to optimize away database update. 343 bool change_detected_; 344 345 // Used to check if a prefix was in the database. 346 scoped_ptr<safe_browsing::PrefixSet> prefix_set_; 347 }; 348 349 #endif // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_DATABASE_H_ 350