1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_DATABASE_H_ 6 #define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_DATABASE_H_ 7 8 #include <map> 9 #include <set> 10 #include <string> 11 #include <vector> 12 13 #include "base/containers/hash_tables.h" 14 #include "base/files/file_path.h" 15 #include "base/gtest_prod_util.h" 16 #include "base/memory/scoped_ptr.h" 17 #include "base/memory/weak_ptr.h" 18 #include "base/synchronization/lock.h" 19 #include "base/time/time.h" 20 #include "chrome/browser/safe_browsing/safe_browsing_store.h" 21 22 namespace base { 23 class MessageLoop; 24 } 25 26 namespace safe_browsing { 27 class PrefixSet; 28 } 29 30 class GURL; 31 class SafeBrowsingDatabase; 32 33 // Factory for creating SafeBrowsingDatabase. Tests implement this factory 34 // to create fake Databases for testing. 35 class SafeBrowsingDatabaseFactory { 36 public: 37 SafeBrowsingDatabaseFactory() { } 38 virtual ~SafeBrowsingDatabaseFactory() { } 39 virtual SafeBrowsingDatabase* CreateSafeBrowsingDatabase( 40 bool enable_download_protection, 41 bool enable_client_side_whitelist, 42 bool enable_download_whitelist, 43 bool enable_extension_blacklist, 44 bool enable_side_effect_free_whitelist, 45 bool enable_ip_blacklist) = 0; 46 private: 47 DISALLOW_COPY_AND_ASSIGN(SafeBrowsingDatabaseFactory); 48 }; 49 50 // Contains full_hash elements which are cached in memory. Differs from 51 // SBAddFullHash in deriving |list_id| from |chunk_id|. Differs from 52 // SBFullHashResult in adding |received| for later expiration. 53 // TODO(shess): Remove/refactor this as part of converting to v2.3 caching 54 // semantics. 55 struct SBFullHashCached { 56 SBFullHash hash; 57 int list_id; // TODO(shess): Use safe_browsing_util::ListType. 58 base::Time expire_after; 59 }; 60 61 // Encapsulates on-disk databases that for safebrowsing. There are 62 // four databases: browse, download, download whitelist and 63 // client-side detection (csd) whitelist databases. The browse database contains 64 // information about phishing and malware urls. The download database contains 65 // URLs for bad binaries (e.g: those containing virus) and hash of 66 // these downloaded contents. The download whitelist contains whitelisted 67 // download hosting sites as well as whitelisted binary signing certificates 68 // etc. The csd whitelist database contains URLs that will never be considered 69 // as phishing by the client-side phishing detection. These on-disk databases 70 // are shared among all profiles, as it doesn't contain user-specific data. This 71 // object is not thread-safe, i.e. all its methods should be used on the same 72 // thread that it was created on. 73 class SafeBrowsingDatabase { 74 public: 75 // Factory method for obtaining a SafeBrowsingDatabase implementation. 76 // It is not thread safe. 77 // |enable_download_protection| is used to control the download database 78 // feature. 79 // |enable_client_side_whitelist| is used to control the csd whitelist 80 // database feature. 81 // |enable_download_whitelist| is used to control the download whitelist 82 // database feature. 83 // |enable_ip_blacklist| is used to control the csd malware IP blacklist 84 // database feature. 85 static SafeBrowsingDatabase* Create(bool enable_download_protection, 86 bool enable_client_side_whitelist, 87 bool enable_download_whitelist, 88 bool enable_extension_blacklist, 89 bool side_effect_free_whitelist, 90 bool enable_ip_blacklist); 91 92 // Makes the passed |factory| the factory used to instantiate 93 // a SafeBrowsingDatabase. This is used for tests. 94 static void RegisterFactory(SafeBrowsingDatabaseFactory* factory) { 95 factory_ = factory; 96 } 97 98 virtual ~SafeBrowsingDatabase(); 99 100 // Initializes the database with the given filename. 101 virtual void Init(const base::FilePath& filename) = 0; 102 103 // Deletes the current database and creates a new one. 104 virtual bool ResetDatabase() = 0; 105 106 // Returns false if |url| is not in the browse database. If it returns true, 107 // then |prefix_hits| contains the list of prefix matches, and |cache_hits| 108 // contains the cached gethash results for those prefixes (if any). This 109 // function is safe to call from threads other than the creation thread. 110 virtual bool ContainsBrowseUrl( 111 const GURL& url, 112 std::vector<SBPrefix>* prefix_hits, 113 std::vector<SBFullHashResult>* cache_hits) = 0; 114 115 // Returns false if none of |urls| are in Download database. If it returns 116 // true, |prefix_hits| should contain the prefixes for the URLs that were in 117 // the database. This function could ONLY be accessed from creation thread. 118 virtual bool ContainsDownloadUrl(const std::vector<GURL>& urls, 119 std::vector<SBPrefix>* prefix_hits) = 0; 120 121 // Returns false if |url| is not on the client-side phishing detection 122 // whitelist. Otherwise, this function returns true. Note: the whitelist 123 // only contains full-length hashes so we don't return any prefix hit. 124 // This function should only be called from the IO thread. 125 virtual bool ContainsCsdWhitelistedUrl(const GURL& url) = 0; 126 127 // The download whitelist is used for two purposes: a white-domain list of 128 // sites that are considered to host only harmless binaries as well as a 129 // whitelist of arbitrary strings such as hashed certificate authorities that 130 // are considered to be trusted. The two methods below let you lookup 131 // the whitelist either for a URL or an arbitrary string. These methods will 132 // return false if no match is found and true otherwise. 133 // This function could ONLY be accessed from the IO thread. 134 virtual bool ContainsDownloadWhitelistedUrl(const GURL& url) = 0; 135 virtual bool ContainsDownloadWhitelistedString(const std::string& str) = 0; 136 137 // Populates |prefix_hits| with any prefixes in |prefixes| that have matches 138 // in the database. 139 // 140 // This function can ONLY be accessed from the creation thread. 141 virtual bool ContainsExtensionPrefixes( 142 const std::vector<SBPrefix>& prefixes, 143 std::vector<SBPrefix>* prefix_hits) = 0; 144 145 // Returns false unless the hash of |url| is on the side-effect free 146 // whitelist. 147 virtual bool ContainsSideEffectFreeWhitelistUrl(const GURL& url) = 0; 148 149 // Returns true iff the given IP is currently on the csd malware IP blacklist. 150 virtual bool ContainsMalwareIP(const std::string& ip_address) = 0; 151 152 // A database transaction should look like: 153 // 154 // std::vector<SBListChunkRanges> lists; 155 // if (db.UpdateStarted(&lists)) { 156 // // Do something with |lists|. 157 // 158 // // Process add/sub commands. 159 // db.InsertChunks(list_name, chunks); 160 // 161 // // Process adddel/subdel commands. 162 // db.DeleteChunks(chunks_deletes); 163 // 164 // // If passed true, processes the collected chunk info and 165 // // rebuilds the filter. If passed false, rolls everything 166 // // back. 167 // db.UpdateFinished(success); 168 // } 169 // 170 // If UpdateStarted() returns true, the caller MUST eventually call 171 // UpdateFinished(). If it returns false, the caller MUST NOT call 172 // the other functions. 173 virtual bool UpdateStarted(std::vector<SBListChunkRanges>* lists) = 0; 174 virtual void InsertChunks(const std::string& list_name, 175 const std::vector<SBChunkData*>& chunks) = 0; 176 virtual void DeleteChunks( 177 const std::vector<SBChunkDelete>& chunk_deletes) = 0; 178 virtual void UpdateFinished(bool update_succeeded) = 0; 179 180 // Store the results of a GetHash response. In the case of empty results, we 181 // cache the prefixes until the next update so that we don't have to issue 182 // further GetHash requests we know will be empty. 183 virtual void CacheHashResults( 184 const std::vector<SBPrefix>& prefixes, 185 const std::vector<SBFullHashResult>& full_hits, 186 const base::TimeDelta& cache_lifetime) = 0; 187 188 // Returns true if the malware IP blacklisting killswitch URL is present 189 // in the csd whitelist. 190 virtual bool IsMalwareIPMatchKillSwitchOn() = 0; 191 192 // Returns true if the whitelist killswitch URL is present in the csd 193 // whitelist. 194 virtual bool IsCsdWhitelistKillSwitchOn() = 0; 195 196 // The name of the bloom-filter file for the given database file. 197 // NOTE(shess): OBSOLETE. Present for deleting stale files. 198 static base::FilePath BloomFilterForFilename( 199 const base::FilePath& db_filename); 200 201 // The name of the prefix set file for the given database file. 202 static base::FilePath PrefixSetForFilename(const base::FilePath& db_filename); 203 204 // Filename for malware and phishing URL database. 205 static base::FilePath BrowseDBFilename( 206 const base::FilePath& db_base_filename); 207 208 // Filename for download URL and download binary hash database. 209 static base::FilePath DownloadDBFilename( 210 const base::FilePath& db_base_filename); 211 212 // Filename for client-side phishing detection whitelist databsae. 213 static base::FilePath CsdWhitelistDBFilename( 214 const base::FilePath& csd_whitelist_base_filename); 215 216 // Filename for download whitelist databsae. 217 static base::FilePath DownloadWhitelistDBFilename( 218 const base::FilePath& download_whitelist_base_filename); 219 220 // Filename for extension blacklist database. 221 static base::FilePath ExtensionBlacklistDBFilename( 222 const base::FilePath& extension_blacklist_base_filename); 223 224 // Filename for side-effect free whitelist database. 225 static base::FilePath SideEffectFreeWhitelistDBFilename( 226 const base::FilePath& side_effect_free_whitelist_base_filename); 227 228 // Filename for the csd malware IP blacklist database. 229 static base::FilePath IpBlacklistDBFilename( 230 const base::FilePath& ip_blacklist_base_filename); 231 232 // Enumerate failures for histogramming purposes. DO NOT CHANGE THE 233 // ORDERING OF THESE VALUES. 234 enum FailureType { 235 FAILURE_DATABASE_CORRUPT, 236 FAILURE_DATABASE_CORRUPT_HANDLER, 237 FAILURE_BROWSE_DATABASE_UPDATE_BEGIN, 238 FAILURE_BROWSE_DATABASE_UPDATE_FINISH, 239 FAILURE_DATABASE_FILTER_MISSING_OBSOLETE, 240 FAILURE_DATABASE_FILTER_READ_OBSOLETE, 241 FAILURE_DATABASE_FILTER_WRITE_OBSOLETE, 242 FAILURE_DATABASE_FILTER_DELETE, 243 FAILURE_DATABASE_STORE_MISSING, 244 FAILURE_DATABASE_STORE_DELETE, 245 FAILURE_DOWNLOAD_DATABASE_UPDATE_BEGIN, 246 FAILURE_DOWNLOAD_DATABASE_UPDATE_FINISH, 247 FAILURE_WHITELIST_DATABASE_UPDATE_BEGIN, 248 FAILURE_WHITELIST_DATABASE_UPDATE_FINISH, 249 FAILURE_BROWSE_PREFIX_SET_MISSING, 250 FAILURE_BROWSE_PREFIX_SET_READ, 251 FAILURE_BROWSE_PREFIX_SET_WRITE, 252 FAILURE_BROWSE_PREFIX_SET_DELETE, 253 FAILURE_EXTENSION_BLACKLIST_UPDATE_BEGIN, 254 FAILURE_EXTENSION_BLACKLIST_UPDATE_FINISH, 255 FAILURE_EXTENSION_BLACKLIST_DELETE, 256 FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_BEGIN, 257 FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_FINISH, 258 FAILURE_SIDE_EFFECT_FREE_WHITELIST_DELETE, 259 FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_READ, 260 FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_WRITE, 261 FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_DELETE, 262 FAILURE_IP_BLACKLIST_UPDATE_BEGIN, 263 FAILURE_IP_BLACKLIST_UPDATE_FINISH, 264 FAILURE_IP_BLACKLIST_UPDATE_INVALID, 265 FAILURE_IP_BLACKLIST_DELETE, 266 267 // Memory space for histograms is determined by the max. ALWAYS 268 // ADD NEW VALUES BEFORE THIS ONE. 269 FAILURE_DATABASE_MAX 270 }; 271 272 static void RecordFailure(FailureType failure_type); 273 274 private: 275 // The factory used to instantiate a SafeBrowsingDatabase object. 276 // Useful for tests, so they can provide their own implementation of 277 // SafeBrowsingDatabase. 278 static SafeBrowsingDatabaseFactory* factory_; 279 }; 280 281 class SafeBrowsingDatabaseNew : public SafeBrowsingDatabase { 282 public: 283 // Create a database with a browse, download, download whitelist and 284 // csd whitelist store objects. Takes ownership of all the store objects. 285 // When |download_store| is NULL, the database will ignore any operations 286 // related download (url hashes and binary hashes). The same is true for 287 // the |csd_whitelist_store|, |download_whitelist_store| and 288 // |ip_blacklist_store|. 289 SafeBrowsingDatabaseNew(SafeBrowsingStore* browse_store, 290 SafeBrowsingStore* download_store, 291 SafeBrowsingStore* csd_whitelist_store, 292 SafeBrowsingStore* download_whitelist_store, 293 SafeBrowsingStore* extension_blacklist_store, 294 SafeBrowsingStore* side_effect_free_whitelist_store, 295 SafeBrowsingStore* ip_blacklist_store); 296 297 // Create a database with a browse store. This is a legacy interface that 298 // useds Sqlite. 299 SafeBrowsingDatabaseNew(); 300 301 virtual ~SafeBrowsingDatabaseNew(); 302 303 // Implement SafeBrowsingDatabase interface. 304 virtual void Init(const base::FilePath& filename) OVERRIDE; 305 virtual bool ResetDatabase() OVERRIDE; 306 virtual bool ContainsBrowseUrl( 307 const GURL& url, 308 std::vector<SBPrefix>* prefix_hits, 309 std::vector<SBFullHashResult>* cache_hits) OVERRIDE; 310 virtual bool ContainsDownloadUrl(const std::vector<GURL>& urls, 311 std::vector<SBPrefix>* prefix_hits) OVERRIDE; 312 virtual bool ContainsCsdWhitelistedUrl(const GURL& url) OVERRIDE; 313 virtual bool ContainsDownloadWhitelistedUrl(const GURL& url) OVERRIDE; 314 virtual bool ContainsDownloadWhitelistedString( 315 const std::string& str) OVERRIDE; 316 virtual bool ContainsExtensionPrefixes( 317 const std::vector<SBPrefix>& prefixes, 318 std::vector<SBPrefix>* prefix_hits) OVERRIDE; 319 virtual bool ContainsSideEffectFreeWhitelistUrl(const GURL& url) OVERRIDE; 320 virtual bool ContainsMalwareIP(const std::string& ip_address) OVERRIDE; 321 virtual bool UpdateStarted(std::vector<SBListChunkRanges>* lists) OVERRIDE; 322 virtual void InsertChunks(const std::string& list_name, 323 const std::vector<SBChunkData*>& chunks) OVERRIDE; 324 virtual void DeleteChunks( 325 const std::vector<SBChunkDelete>& chunk_deletes) OVERRIDE; 326 virtual void UpdateFinished(bool update_succeeded) OVERRIDE; 327 virtual void CacheHashResults( 328 const std::vector<SBPrefix>& prefixes, 329 const std::vector<SBFullHashResult>& full_hits, 330 const base::TimeDelta& cache_lifetime) OVERRIDE; 331 332 // Returns the value of malware_kill_switch_; 333 virtual bool IsMalwareIPMatchKillSwitchOn() OVERRIDE; 334 335 // Returns true if the CSD whitelist has everything whitelisted. 336 virtual bool IsCsdWhitelistKillSwitchOn() OVERRIDE; 337 338 private: 339 friend class SafeBrowsingDatabaseTest; 340 FRIEND_TEST_ALL_PREFIXES(SafeBrowsingDatabaseTest, HashCaching); 341 342 // A SafeBrowsing whitelist contains a list of whitelisted full-hashes (stored 343 // in a sorted vector) as well as a boolean flag indicating whether all 344 // lookups in the whitelist should be considered matches for safety. 345 typedef std::pair<std::vector<SBFullHash>, bool> SBWhitelist; 346 347 // This map holds a csd malware IP blacklist which maps a prefix mask 348 // to a set of hashed blacklisted IP prefixes. Each IP prefix is a hashed 349 // IPv6 IP prefix using SHA-1. 350 typedef std::map<std::string, base::hash_set<std::string> > IPBlacklist; 351 352 // Returns true if the whitelist is disabled or if any of the given hashes 353 // matches the whitelist. 354 bool ContainsWhitelistedHashes(const SBWhitelist& whitelist, 355 const std::vector<SBFullHash>& hashes); 356 357 // Return the browse_store_, download_store_, download_whitelist_store or 358 // csd_whitelist_store_ based on list_id. 359 SafeBrowsingStore* GetStore(int list_id); 360 361 // Deletes the files on disk. 362 bool Delete(); 363 364 // Load the prefix set off disk, if available. 365 void LoadPrefixSet(); 366 367 // Writes the current prefix set to disk. 368 void WritePrefixSet(); 369 370 // Loads the given full-length hashes to the given whitelist. If the number 371 // of hashes is too large or if the kill switch URL is on the whitelist 372 // we will whitelist everything. 373 void LoadWhitelist(const std::vector<SBAddFullHash>& full_hashes, 374 SBWhitelist* whitelist); 375 376 // Call this method if an error occured with the given whitelist. This will 377 // result in all lookups to the whitelist to return true. 378 void WhitelistEverything(SBWhitelist* whitelist); 379 380 // Parses the IP blacklist from the given full-length hashes. 381 void LoadIpBlacklist(const std::vector<SBAddFullHash>& full_hashes); 382 383 // Helpers for handling database corruption. 384 // |OnHandleCorruptDatabase()| runs |ResetDatabase()| and sets 385 // |corruption_detected_|, |HandleCorruptDatabase()| posts 386 // |OnHandleCorruptDatabase()| to the current thread, to be run 387 // after the current task completes. 388 // TODO(shess): Wire things up to entirely abort the update 389 // transaction when this happens. 390 void HandleCorruptDatabase(); 391 void OnHandleCorruptDatabase(); 392 393 // Helpers for InsertChunks(). 394 void InsertAddChunk(SafeBrowsingStore* store, 395 safe_browsing_util::ListType list_id, 396 const SBChunkData& chunk); 397 void InsertSubChunk(SafeBrowsingStore* store, 398 safe_browsing_util::ListType list_id, 399 const SBChunkData& chunk); 400 401 // Returns the size in bytes of the store after the update. 402 int64 UpdateHashPrefixStore(const base::FilePath& store_filename, 403 SafeBrowsingStore* store, 404 FailureType failure_type); 405 void UpdateBrowseStore(); 406 void UpdateSideEffectFreeWhitelistStore(); 407 void UpdateWhitelistStore(const base::FilePath& store_filename, 408 SafeBrowsingStore* store, 409 SBWhitelist* whitelist); 410 void UpdateIpBlacklistStore(); 411 412 // Used to verify that various calls are made from the thread the 413 // object was created on. 414 base::MessageLoop* creation_loop_; 415 416 // Lock for protecting access to variables that may be used on the 417 // IO thread. This includes |prefix_set_|, |cached_browse_hashes_|, 418 // |prefix_miss_cache_|, |csd_whitelist_|. 419 base::Lock lookup_lock_; 420 421 // The base filename passed to Init(), used to generate the store and prefix 422 // set filenames used to store data on disk. 423 base::FilePath filename_base_; 424 425 // Underlying persistent store for chunk data. 426 // For browsing related (phishing and malware URLs) chunks and prefixes. 427 scoped_ptr<SafeBrowsingStore> browse_store_; 428 429 // For download related (download URL and binary hash) chunks and prefixes. 430 scoped_ptr<SafeBrowsingStore> download_store_; 431 432 // For the client-side phishing detection whitelist chunks and full-length 433 // hashes. This list only contains 256 bit hashes. 434 scoped_ptr<SafeBrowsingStore> csd_whitelist_store_; 435 436 // For the download whitelist chunks and full-length hashes. This list only 437 // contains 256 bit hashes. 438 scoped_ptr<SafeBrowsingStore> download_whitelist_store_; 439 440 // For extension IDs. 441 scoped_ptr<SafeBrowsingStore> extension_blacklist_store_; 442 443 // For side-effect free whitelist. 444 scoped_ptr<SafeBrowsingStore> side_effect_free_whitelist_store_; 445 446 // For IP blacklist. 447 scoped_ptr<SafeBrowsingStore> ip_blacklist_store_; 448 449 SBWhitelist csd_whitelist_; 450 SBWhitelist download_whitelist_; 451 SBWhitelist extension_blacklist_; 452 453 // The IP blacklist should be small. At most a couple hundred IPs. 454 IPBlacklist ip_blacklist_; 455 456 // Store items from CacheHashResults(), ordered by hash for efficient 457 // scanning. Discarded on next update. 458 std::vector<SBFullHashCached> cached_browse_hashes_; 459 460 // Cache of prefixes that returned empty results (no full hash 461 // match) to |CacheHashResults()|. Cached to prevent asking for 462 // them every time. Cleared on next update. 463 std::set<SBPrefix> prefix_miss_cache_; 464 465 // Used to schedule resetting the database because of corruption. 466 base::WeakPtrFactory<SafeBrowsingDatabaseNew> reset_factory_; 467 468 // Set if corruption is detected during the course of an update. 469 // Causes the update functions to fail with no side effects, until 470 // the next call to |UpdateStarted()|. 471 bool corruption_detected_; 472 473 // Set to true if any chunks are added or deleted during an update. 474 // Used to optimize away database update. 475 bool change_detected_; 476 477 // Used to check if a prefix was in the browse database. 478 scoped_ptr<safe_browsing::PrefixSet> browse_prefix_set_; 479 480 // Used to check if a prefix was in the browse database. 481 scoped_ptr<safe_browsing::PrefixSet> side_effect_free_whitelist_prefix_set_; 482 }; 483 484 #endif // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_DATABASE_H_ 485