1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_DATABASE_H_ 6 #define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_DATABASE_H_ 7 8 #include <set> 9 #include <string> 10 #include <vector> 11 12 #include "base/files/file_path.h" 13 #include "base/gtest_prod_util.h" 14 #include "base/memory/scoped_ptr.h" 15 #include "base/memory/weak_ptr.h" 16 #include "base/synchronization/lock.h" 17 #include "chrome/browser/safe_browsing/safe_browsing_store.h" 18 19 namespace base { 20 class MessageLoop; 21 class Time; 22 } 23 24 namespace safe_browsing { 25 class PrefixSet; 26 } 27 28 class GURL; 29 class SafeBrowsingDatabase; 30 31 // Factory for creating SafeBrowsingDatabase. Tests implement this factory 32 // to create fake Databases for testing. 33 class SafeBrowsingDatabaseFactory { 34 public: 35 SafeBrowsingDatabaseFactory() { } 36 virtual ~SafeBrowsingDatabaseFactory() { } 37 virtual SafeBrowsingDatabase* CreateSafeBrowsingDatabase( 38 bool enable_download_protection, 39 bool enable_client_side_whitelist, 40 bool enable_download_whitelist, 41 bool enable_extension_blacklist, 42 bool enable_side_effect_free_whitelist) = 0; 43 private: 44 DISALLOW_COPY_AND_ASSIGN(SafeBrowsingDatabaseFactory); 45 }; 46 47 48 // Encapsulates on-disk databases that for safebrowsing. There are 49 // four databases: browse, download, download whitelist and 50 // client-side detection (csd) whitelist databases. The browse database contains 51 // information about phishing and malware urls. The download database contains 52 // URLs for bad binaries (e.g: those containing virus) and hash of 53 // these downloaded contents. The download whitelist contains whitelisted 54 // download hosting sites as well as whitelisted binary signing certificates 55 // etc. The csd whitelist database contains URLs that will never be considered 56 // as phishing by the client-side phishing detection. These on-disk databases 57 // are shared among all profiles, as it doesn't contain user-specific data. This 58 // object is not thread-safe, i.e. all its methods should be used on the same 59 // thread that it was created on. 60 class SafeBrowsingDatabase { 61 public: 62 // Factory method for obtaining a SafeBrowsingDatabase implementation. 63 // It is not thread safe. 64 // |enable_download_protection| is used to control the download database 65 // feature. 66 // |enable_client_side_whitelist| is used to control the csd whitelist 67 // database feature. 68 // |enable_download_whitelist| is used to control the download whitelist 69 // database feature. 70 static SafeBrowsingDatabase* Create(bool enable_download_protection, 71 bool enable_client_side_whitelist, 72 bool enable_download_whitelist, 73 bool enable_extension_blacklist, 74 bool side_effect_free_whitelist); 75 76 // Makes the passed |factory| the factory used to instantiate 77 // a SafeBrowsingDatabase. This is used for tests. 78 static void RegisterFactory(SafeBrowsingDatabaseFactory* factory) { 79 factory_ = factory; 80 } 81 82 virtual ~SafeBrowsingDatabase(); 83 84 // Initializes the database with the given filename. 85 virtual void Init(const base::FilePath& filename) = 0; 86 87 // Deletes the current database and creates a new one. 88 virtual bool ResetDatabase() = 0; 89 90 // Returns false if |url| is not in the browse database. If it 91 // returns true, then either |matching_list| is the name of the matching 92 // list, or |prefix_hits| and |full_hits| contains the matching hash 93 // prefixes. This function is safe to call from threads other than 94 // the creation thread. 95 virtual bool ContainsBrowseUrl(const GURL& url, 96 std::string* matching_list, 97 std::vector<SBPrefix>* prefix_hits, 98 std::vector<SBFullHashResult>* full_hits, 99 base::Time last_update) = 0; 100 101 // Returns false if none of |urls| are in Download database. If it returns 102 // true, |prefix_hits| should contain the prefixes for the URLs that were in 103 // the database. This function could ONLY be accessed from creation thread. 104 virtual bool ContainsDownloadUrl(const std::vector<GURL>& urls, 105 std::vector<SBPrefix>* prefix_hits) = 0; 106 107 // Returns false if |prefix| is not in Download database. 108 // This function could ONLY be accessed from creation thread. 109 virtual bool ContainsDownloadHashPrefix(const SBPrefix& prefix) = 0; 110 111 // Returns false if |url| is not on the client-side phishing detection 112 // whitelist. Otherwise, this function returns true. Note: the whitelist 113 // only contains full-length hashes so we don't return any prefix hit. 114 // This function should only be called from the IO thread. 115 virtual bool ContainsCsdWhitelistedUrl(const GURL& url) = 0; 116 117 // The download whitelist is used for two purposes: a white-domain list of 118 // sites that are considered to host only harmless binaries as well as a 119 // whitelist of arbitrary strings such as hashed certificate authorities that 120 // are considered to be trusted. The two methods below let you lookup 121 // the whitelist either for a URL or an arbitrary string. These methods will 122 // return false if no match is found and true otherwise. 123 // This function could ONLY be accessed from the IO thread. 124 virtual bool ContainsDownloadWhitelistedUrl(const GURL& url) = 0; 125 virtual bool ContainsDownloadWhitelistedString(const std::string& str) = 0; 126 127 // Populates |prefix_hits| with any prefixes in |prefixes| that have matches 128 // in the database. 129 // 130 // This function can ONLY be accessed from the creation thread. 131 virtual bool ContainsExtensionPrefixes( 132 const std::vector<SBPrefix>& prefixes, 133 std::vector<SBPrefix>* prefix_hits) = 0; 134 135 // Returns false unless the hash of |url| is on the side-effect free 136 // whitelist. 137 virtual bool ContainsSideEffectFreeWhitelistUrl(const GURL& url) = 0; 138 139 // A database transaction should look like: 140 // 141 // std::vector<SBListChunkRanges> lists; 142 // if (db.UpdateStarted(&lists)) { 143 // // Do something with |lists|. 144 // 145 // // Process add/sub commands. 146 // db.InsertChunks(list_name, chunks); 147 // 148 // // Process adddel/subdel commands. 149 // db.DeleteChunks(chunks_deletes); 150 // 151 // // If passed true, processes the collected chunk info and 152 // // rebuilds the filter. If passed false, rolls everything 153 // // back. 154 // db.UpdateFinished(success); 155 // } 156 // 157 // If UpdateStarted() returns true, the caller MUST eventually call 158 // UpdateFinished(). If it returns false, the caller MUST NOT call 159 // the other functions. 160 virtual bool UpdateStarted(std::vector<SBListChunkRanges>* lists) = 0; 161 virtual void InsertChunks(const std::string& list_name, 162 const SBChunkList& chunks) = 0; 163 virtual void DeleteChunks( 164 const std::vector<SBChunkDelete>& chunk_deletes) = 0; 165 virtual void UpdateFinished(bool update_succeeded) = 0; 166 167 // Store the results of a GetHash response. In the case of empty results, we 168 // cache the prefixes until the next update so that we don't have to issue 169 // further GetHash requests we know will be empty. 170 virtual void CacheHashResults( 171 const std::vector<SBPrefix>& prefixes, 172 const std::vector<SBFullHashResult>& full_hits) = 0; 173 174 // Returns true if the malware IP blacklisting killswitch URL is present 175 // in the csd whitelist. 176 virtual bool IsMalwareIPMatchKillSwitchOn() = 0; 177 178 // The name of the bloom-filter file for the given database file. 179 // NOTE(shess): OBSOLETE. Present for deleting stale files. 180 static base::FilePath BloomFilterForFilename( 181 const base::FilePath& db_filename); 182 183 // The name of the prefix set file for the given database file. 184 static base::FilePath PrefixSetForFilename(const base::FilePath& db_filename); 185 186 // Filename for malware and phishing URL database. 187 static base::FilePath BrowseDBFilename( 188 const base::FilePath& db_base_filename); 189 190 // Filename for download URL and download binary hash database. 191 static base::FilePath DownloadDBFilename( 192 const base::FilePath& db_base_filename); 193 194 // Filename for client-side phishing detection whitelist databsae. 195 static base::FilePath CsdWhitelistDBFilename( 196 const base::FilePath& csd_whitelist_base_filename); 197 198 // Filename for download whitelist databsae. 199 static base::FilePath DownloadWhitelistDBFilename( 200 const base::FilePath& download_whitelist_base_filename); 201 202 // Filename for extension blacklist database. 203 static base::FilePath ExtensionBlacklistDBFilename( 204 const base::FilePath& extension_blacklist_base_filename); 205 206 // Filename for side-effect free whitelist database. 207 static base::FilePath SideEffectFreeWhitelistDBFilename( 208 const base::FilePath& side_effect_free_whitelist_base_filename); 209 210 // Enumerate failures for histogramming purposes. DO NOT CHANGE THE 211 // ORDERING OF THESE VALUES. 212 enum FailureType { 213 FAILURE_DATABASE_CORRUPT, 214 FAILURE_DATABASE_CORRUPT_HANDLER, 215 FAILURE_BROWSE_DATABASE_UPDATE_BEGIN, 216 FAILURE_BROWSE_DATABASE_UPDATE_FINISH, 217 FAILURE_DATABASE_FILTER_MISSING_OBSOLETE, 218 FAILURE_DATABASE_FILTER_READ_OBSOLETE, 219 FAILURE_DATABASE_FILTER_WRITE_OBSOLETE, 220 FAILURE_DATABASE_FILTER_DELETE, 221 FAILURE_DATABASE_STORE_MISSING, 222 FAILURE_DATABASE_STORE_DELETE, 223 FAILURE_DOWNLOAD_DATABASE_UPDATE_BEGIN, 224 FAILURE_DOWNLOAD_DATABASE_UPDATE_FINISH, 225 FAILURE_WHITELIST_DATABASE_UPDATE_BEGIN, 226 FAILURE_WHITELIST_DATABASE_UPDATE_FINISH, 227 FAILURE_BROWSE_PREFIX_SET_MISSING, 228 FAILURE_BROWSE_PREFIX_SET_READ, 229 FAILURE_BROWSE_PREFIX_SET_WRITE, 230 FAILURE_BROWSE_PREFIX_SET_DELETE, 231 FAILURE_EXTENSION_BLACKLIST_UPDATE_BEGIN, 232 FAILURE_EXTENSION_BLACKLIST_UPDATE_FINISH, 233 FAILURE_EXTENSION_BLACKLIST_DELETE, 234 FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_BEGIN, 235 FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_FINISH, 236 FAILURE_SIDE_EFFECT_FREE_WHITELIST_DELETE, 237 FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_READ, 238 FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_WRITE, 239 FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_DELETE, 240 241 // Memory space for histograms is determined by the max. ALWAYS 242 // ADD NEW VALUES BEFORE THIS ONE. 243 FAILURE_DATABASE_MAX 244 }; 245 246 static void RecordFailure(FailureType failure_type); 247 248 private: 249 // The factory used to instantiate a SafeBrowsingDatabase object. 250 // Useful for tests, so they can provide their own implementation of 251 // SafeBrowsingDatabase. 252 static SafeBrowsingDatabaseFactory* factory_; 253 }; 254 255 class SafeBrowsingDatabaseNew : public SafeBrowsingDatabase { 256 public: 257 // Create a database with a browse, download, download whitelist and 258 // csd whitelist store objects. Takes ownership of all the store objects. 259 // When |download_store| is NULL, the database will ignore any operations 260 // related download (url hashes and binary hashes). The same is true for 261 // the |csd_whitelist_store| and |download_whitelist_store|. 262 SafeBrowsingDatabaseNew(SafeBrowsingStore* browse_store, 263 SafeBrowsingStore* download_store, 264 SafeBrowsingStore* csd_whitelist_store, 265 SafeBrowsingStore* download_whitelist_store, 266 SafeBrowsingStore* extension_blacklist_store, 267 SafeBrowsingStore* side_effect_free_whitelist_store); 268 269 // Create a database with a browse store. This is a legacy interface that 270 // useds Sqlite. 271 SafeBrowsingDatabaseNew(); 272 273 virtual ~SafeBrowsingDatabaseNew(); 274 275 // Implement SafeBrowsingDatabase interface. 276 virtual void Init(const base::FilePath& filename) OVERRIDE; 277 virtual bool ResetDatabase() OVERRIDE; 278 virtual bool ContainsBrowseUrl(const GURL& url, 279 std::string* matching_list, 280 std::vector<SBPrefix>* prefix_hits, 281 std::vector<SBFullHashResult>* full_hits, 282 base::Time last_update) OVERRIDE; 283 virtual bool ContainsDownloadUrl(const std::vector<GURL>& urls, 284 std::vector<SBPrefix>* prefix_hits) OVERRIDE; 285 virtual bool ContainsDownloadHashPrefix(const SBPrefix& prefix) OVERRIDE; 286 virtual bool ContainsCsdWhitelistedUrl(const GURL& url) OVERRIDE; 287 virtual bool ContainsDownloadWhitelistedUrl(const GURL& url) OVERRIDE; 288 virtual bool ContainsDownloadWhitelistedString( 289 const std::string& str) OVERRIDE; 290 virtual bool ContainsExtensionPrefixes( 291 const std::vector<SBPrefix>& prefixes, 292 std::vector<SBPrefix>* prefix_hits) OVERRIDE; 293 virtual bool ContainsSideEffectFreeWhitelistUrl(const GURL& url) OVERRIDE; 294 virtual bool UpdateStarted(std::vector<SBListChunkRanges>* lists) OVERRIDE; 295 virtual void InsertChunks(const std::string& list_name, 296 const SBChunkList& chunks) OVERRIDE; 297 virtual void DeleteChunks( 298 const std::vector<SBChunkDelete>& chunk_deletes) OVERRIDE; 299 virtual void UpdateFinished(bool update_succeeded) OVERRIDE; 300 virtual void CacheHashResults( 301 const std::vector<SBPrefix>& prefixes, 302 const std::vector<SBFullHashResult>& full_hits) OVERRIDE; 303 304 // Returns the value of malware_kill_switch_; 305 virtual bool IsMalwareIPMatchKillSwitchOn() OVERRIDE; 306 307 private: 308 friend class SafeBrowsingDatabaseTest; 309 FRIEND_TEST_ALL_PREFIXES(SafeBrowsingDatabaseTest, HashCaching); 310 311 // A SafeBrowsing whitelist contains a list of whitelisted full-hashes (stored 312 // in a sorted vector) as well as a boolean flag indicating whether all 313 // lookups in the whitelist should be considered matches for safety. 314 typedef std::pair<std::vector<SBFullHash>, bool> SBWhitelist; 315 316 // Returns true if the whitelist is disabled or if any of the given hashes 317 // matches the whitelist. 318 bool ContainsWhitelistedHashes(const SBWhitelist& whitelist, 319 const std::vector<SBFullHash>& hashes); 320 321 // Return the browse_store_, download_store_, download_whitelist_store or 322 // csd_whitelist_store_ based on list_id. 323 SafeBrowsingStore* GetStore(int list_id); 324 325 // Deletes the files on disk. 326 bool Delete(); 327 328 // Load the prefix set off disk, if available. 329 void LoadPrefixSet(); 330 331 // Writes the current prefix set to disk. 332 void WritePrefixSet(); 333 334 // Loads the given full-length hashes to the given whitelist. If the number 335 // of hashes is too large or if the kill switch URL is on the whitelist 336 // we will whitelist everything. 337 void LoadWhitelist(const std::vector<SBAddFullHash>& full_hashes, 338 SBWhitelist* whitelist); 339 340 // Call this method if an error occured with the given whitelist. This will 341 // result in all lookups to the whitelist to return true. 342 void WhitelistEverything(SBWhitelist* whitelist); 343 344 // Helpers for handling database corruption. 345 // |OnHandleCorruptDatabase()| runs |ResetDatabase()| and sets 346 // |corruption_detected_|, |HandleCorruptDatabase()| posts 347 // |OnHandleCorruptDatabase()| to the current thread, to be run 348 // after the current task completes. 349 // TODO(shess): Wire things up to entirely abort the update 350 // transaction when this happens. 351 void HandleCorruptDatabase(); 352 void OnHandleCorruptDatabase(); 353 354 // Helpers for InsertChunks(). 355 void InsertAdd(int chunk, SBPrefix host, const SBEntry* entry, int list_id); 356 void InsertAddChunks(safe_browsing_util::ListType list_id, 357 const SBChunkList& chunks); 358 void InsertSub(int chunk, SBPrefix host, const SBEntry* entry, int list_id); 359 void InsertSubChunks(safe_browsing_util::ListType list_id, 360 const SBChunkList& chunks); 361 362 // Returns the size in bytes of the store after the update. 363 int64 UpdateHashPrefixStore(const base::FilePath& store_filename, 364 SafeBrowsingStore* store, 365 FailureType failure_type); 366 void UpdateBrowseStore(); 367 void UpdateSideEffectFreeWhitelistStore(); 368 void UpdateWhitelistStore(const base::FilePath& store_filename, 369 SafeBrowsingStore* store, 370 SBWhitelist* whitelist); 371 372 // Used to verify that various calls are made from the thread the 373 // object was created on. 374 base::MessageLoop* creation_loop_; 375 376 // Lock for protecting access to variables that may be used on the 377 // IO thread. This includes |prefix_set_|, |full_browse_hashes_|, 378 // |pending_browse_hashes_|, |prefix_miss_cache_|, |csd_whitelist_|. 379 base::Lock lookup_lock_; 380 381 // Underlying persistent store for chunk data. 382 // For browsing related (phishing and malware URLs) chunks and prefixes. 383 base::FilePath browse_filename_; 384 scoped_ptr<SafeBrowsingStore> browse_store_; 385 386 // For download related (download URL and binary hash) chunks and prefixes. 387 base::FilePath download_filename_; 388 scoped_ptr<SafeBrowsingStore> download_store_; 389 390 // For the client-side phishing detection whitelist chunks and full-length 391 // hashes. This list only contains 256 bit hashes. 392 base::FilePath csd_whitelist_filename_; 393 scoped_ptr<SafeBrowsingStore> csd_whitelist_store_; 394 395 // For the download whitelist chunks and full-length hashes. This list only 396 // contains 256 bit hashes. 397 base::FilePath download_whitelist_filename_; 398 scoped_ptr<SafeBrowsingStore> download_whitelist_store_; 399 400 // For extension IDs. 401 base::FilePath extension_blacklist_filename_; 402 scoped_ptr<SafeBrowsingStore> extension_blacklist_store_; 403 404 // For side-effect free whitelist. 405 base::FilePath side_effect_free_whitelist_filename_; 406 scoped_ptr<SafeBrowsingStore> side_effect_free_whitelist_store_; 407 408 SBWhitelist csd_whitelist_; 409 SBWhitelist download_whitelist_; 410 SBWhitelist extension_blacklist_; 411 412 // Cached browse store related full-hash items, ordered by prefix for 413 // efficient scanning. 414 // |full_browse_hashes_| are items from |browse_store_|, 415 // |pending_browse_hashes_| are items from |CacheHashResults()|, which 416 // will be pushed to the store on the next update. 417 std::vector<SBAddFullHash> full_browse_hashes_; 418 std::vector<SBAddFullHash> pending_browse_hashes_; 419 420 // Cache of prefixes that returned empty results (no full hash 421 // match) to |CacheHashResults()|. Cached to prevent asking for 422 // them every time. Cleared on next update. 423 std::set<SBPrefix> prefix_miss_cache_; 424 425 // Used to schedule resetting the database because of corruption. 426 base::WeakPtrFactory<SafeBrowsingDatabaseNew> reset_factory_; 427 428 // Set if corruption is detected during the course of an update. 429 // Causes the update functions to fail with no side effects, until 430 // the next call to |UpdateStarted()|. 431 bool corruption_detected_; 432 433 // Set to true if any chunks are added or deleted during an update. 434 // Used to optimize away database update. 435 bool change_detected_; 436 437 // Used to check if a prefix was in the browse database. 438 base::FilePath browse_prefix_set_filename_; 439 scoped_ptr<safe_browsing::PrefixSet> browse_prefix_set_; 440 441 // Used to check if a prefix was in the browse database. 442 base::FilePath side_effect_free_whitelist_prefix_set_filename_; 443 scoped_ptr<safe_browsing::PrefixSet> side_effect_free_whitelist_prefix_set_; 444 }; 445 446 #endif // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_DATABASE_H_ 447