1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_DATABASE_H_ 6 #define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_DATABASE_H_ 7 8 #include <map> 9 #include <set> 10 #include <string> 11 #include <vector> 12 13 #include "base/containers/hash_tables.h" 14 #include "base/files/file_path.h" 15 #include "base/gtest_prod_util.h" 16 #include "base/memory/scoped_ptr.h" 17 #include "base/memory/weak_ptr.h" 18 #include "base/synchronization/lock.h" 19 #include "base/time/time.h" 20 #include "chrome/browser/safe_browsing/safe_browsing_store.h" 21 22 namespace base { 23 class MessageLoop; 24 } 25 26 namespace safe_browsing { 27 class PrefixSet; 28 } 29 30 class GURL; 31 class SafeBrowsingDatabase; 32 33 // Factory for creating SafeBrowsingDatabase. Tests implement this factory 34 // to create fake Databases for testing. 35 class SafeBrowsingDatabaseFactory { 36 public: 37 SafeBrowsingDatabaseFactory() { } 38 virtual ~SafeBrowsingDatabaseFactory() { } 39 virtual SafeBrowsingDatabase* CreateSafeBrowsingDatabase( 40 bool enable_download_protection, 41 bool enable_client_side_whitelist, 42 bool enable_download_whitelist, 43 bool enable_extension_blacklist, 44 bool enable_side_effect_free_whitelist, 45 bool enable_ip_blacklist) = 0; 46 private: 47 DISALLOW_COPY_AND_ASSIGN(SafeBrowsingDatabaseFactory); 48 }; 49 50 // Encapsulates on-disk databases that for safebrowsing. There are 51 // four databases: browse, download, download whitelist and 52 // client-side detection (csd) whitelist databases. The browse database contains 53 // information about phishing and malware urls. The download database contains 54 // URLs for bad binaries (e.g: those containing virus) and hash of 55 // these downloaded contents. The download whitelist contains whitelisted 56 // download hosting sites as well as whitelisted binary signing certificates 57 // etc. The csd whitelist database contains URLs that will never be considered 58 // as phishing by the client-side phishing detection. These on-disk databases 59 // are shared among all profiles, as it doesn't contain user-specific data. This 60 // object is not thread-safe, i.e. all its methods should be used on the same 61 // thread that it was created on. 62 class SafeBrowsingDatabase { 63 public: 64 // Factory method for obtaining a SafeBrowsingDatabase implementation. 65 // It is not thread safe. 66 // |enable_download_protection| is used to control the download database 67 // feature. 68 // |enable_client_side_whitelist| is used to control the csd whitelist 69 // database feature. 70 // |enable_download_whitelist| is used to control the download whitelist 71 // database feature. 72 // |enable_ip_blacklist| is used to control the csd malware IP blacklist 73 // database feature. 74 static SafeBrowsingDatabase* Create(bool enable_download_protection, 75 bool enable_client_side_whitelist, 76 bool enable_download_whitelist, 77 bool enable_extension_blacklist, 78 bool side_effect_free_whitelist, 79 bool enable_ip_blacklist); 80 81 // Makes the passed |factory| the factory used to instantiate 82 // a SafeBrowsingDatabase. This is used for tests. 83 static void RegisterFactory(SafeBrowsingDatabaseFactory* factory) { 84 factory_ = factory; 85 } 86 87 virtual ~SafeBrowsingDatabase(); 88 89 // Initializes the database with the given filename. 90 virtual void Init(const base::FilePath& filename) = 0; 91 92 // Deletes the current database and creates a new one. 93 virtual bool ResetDatabase() = 0; 94 95 // Returns false if |url| is not in the browse database or already was cached 96 // as a miss. If it returns true, |prefix_hits| contains matching hash 97 // prefixes which had no cached results and |cache_hits| contains any matching 98 // cached gethash results. This function is safe to call from any thread. 99 virtual bool ContainsBrowseUrl( 100 const GURL& url, 101 std::vector<SBPrefix>* prefix_hits, 102 std::vector<SBFullHashResult>* cache_hits) = 0; 103 104 // Returns false if none of |urls| are in Download database. If it returns 105 // true, |prefix_hits| should contain the prefixes for the URLs that were in 106 // the database. This function could ONLY be accessed from creation thread. 107 virtual bool ContainsDownloadUrl(const std::vector<GURL>& urls, 108 std::vector<SBPrefix>* prefix_hits) = 0; 109 110 // Returns false if |url| is not on the client-side phishing detection 111 // whitelist. Otherwise, this function returns true. Note: the whitelist 112 // only contains full-length hashes so we don't return any prefix hit. 113 // This function should only be called from the IO thread. 114 virtual bool ContainsCsdWhitelistedUrl(const GURL& url) = 0; 115 116 // The download whitelist is used for two purposes: a white-domain list of 117 // sites that are considered to host only harmless binaries as well as a 118 // whitelist of arbitrary strings such as hashed certificate authorities that 119 // are considered to be trusted. The two methods below let you lookup 120 // the whitelist either for a URL or an arbitrary string. These methods will 121 // return false if no match is found and true otherwise. 122 // This function could ONLY be accessed from the IO thread. 123 virtual bool ContainsDownloadWhitelistedUrl(const GURL& url) = 0; 124 virtual bool ContainsDownloadWhitelistedString(const std::string& str) = 0; 125 126 // Populates |prefix_hits| with any prefixes in |prefixes| that have matches 127 // in the database. 128 // 129 // This function can ONLY be accessed from the creation thread. 130 virtual bool ContainsExtensionPrefixes( 131 const std::vector<SBPrefix>& prefixes, 132 std::vector<SBPrefix>* prefix_hits) = 0; 133 134 // Returns false unless the hash of |url| is on the side-effect free 135 // whitelist. 136 virtual bool ContainsSideEffectFreeWhitelistUrl(const GURL& url) = 0; 137 138 // Returns true iff the given IP is currently on the csd malware IP blacklist. 139 virtual bool ContainsMalwareIP(const std::string& ip_address) = 0; 140 141 // A database transaction should look like: 142 // 143 // std::vector<SBListChunkRanges> lists; 144 // if (db.UpdateStarted(&lists)) { 145 // // Do something with |lists|. 146 // 147 // // Process add/sub commands. 148 // db.InsertChunks(list_name, chunks); 149 // 150 // // Process adddel/subdel commands. 151 // db.DeleteChunks(chunks_deletes); 152 // 153 // // If passed true, processes the collected chunk info and 154 // // rebuilds the filter. If passed false, rolls everything 155 // // back. 156 // db.UpdateFinished(success); 157 // } 158 // 159 // If UpdateStarted() returns true, the caller MUST eventually call 160 // UpdateFinished(). If it returns false, the caller MUST NOT call 161 // the other functions. 162 virtual bool UpdateStarted(std::vector<SBListChunkRanges>* lists) = 0; 163 virtual void InsertChunks(const std::string& list_name, 164 const std::vector<SBChunkData*>& chunks) = 0; 165 virtual void DeleteChunks( 166 const std::vector<SBChunkDelete>& chunk_deletes) = 0; 167 virtual void UpdateFinished(bool update_succeeded) = 0; 168 169 // Store the results of a GetHash response. In the case of empty results, we 170 // cache the prefixes until the next update so that we don't have to issue 171 // further GetHash requests we know will be empty. 172 virtual void CacheHashResults( 173 const std::vector<SBPrefix>& prefixes, 174 const std::vector<SBFullHashResult>& full_hits, 175 const base::TimeDelta& cache_lifetime) = 0; 176 177 // Returns true if the malware IP blacklisting killswitch URL is present 178 // in the csd whitelist. 179 virtual bool IsMalwareIPMatchKillSwitchOn() = 0; 180 181 // Returns true if the whitelist killswitch URL is present in the csd 182 // whitelist. 183 virtual bool IsCsdWhitelistKillSwitchOn() = 0; 184 185 // The name of the bloom-filter file for the given database file. 186 // NOTE(shess): OBSOLETE. Present for deleting stale files. 187 static base::FilePath BloomFilterForFilename( 188 const base::FilePath& db_filename); 189 190 // The name of the prefix set file for the given database file. 191 static base::FilePath PrefixSetForFilename(const base::FilePath& db_filename); 192 193 // Filename for malware and phishing URL database. 194 static base::FilePath BrowseDBFilename( 195 const base::FilePath& db_base_filename); 196 197 // Filename for download URL and download binary hash database. 198 static base::FilePath DownloadDBFilename( 199 const base::FilePath& db_base_filename); 200 201 // Filename for client-side phishing detection whitelist databsae. 202 static base::FilePath CsdWhitelistDBFilename( 203 const base::FilePath& csd_whitelist_base_filename); 204 205 // Filename for download whitelist databsae. 206 static base::FilePath DownloadWhitelistDBFilename( 207 const base::FilePath& download_whitelist_base_filename); 208 209 // Filename for extension blacklist database. 210 static base::FilePath ExtensionBlacklistDBFilename( 211 const base::FilePath& extension_blacklist_base_filename); 212 213 // Filename for side-effect free whitelist database. 214 static base::FilePath SideEffectFreeWhitelistDBFilename( 215 const base::FilePath& side_effect_free_whitelist_base_filename); 216 217 // Filename for the csd malware IP blacklist database. 218 static base::FilePath IpBlacklistDBFilename( 219 const base::FilePath& ip_blacklist_base_filename); 220 221 // Enumerate failures for histogramming purposes. DO NOT CHANGE THE 222 // ORDERING OF THESE VALUES. 223 enum FailureType { 224 FAILURE_DATABASE_CORRUPT, 225 FAILURE_DATABASE_CORRUPT_HANDLER, 226 FAILURE_BROWSE_DATABASE_UPDATE_BEGIN, 227 FAILURE_BROWSE_DATABASE_UPDATE_FINISH, 228 FAILURE_DATABASE_FILTER_MISSING_OBSOLETE, 229 FAILURE_DATABASE_FILTER_READ_OBSOLETE, 230 FAILURE_DATABASE_FILTER_WRITE_OBSOLETE, 231 FAILURE_DATABASE_FILTER_DELETE, 232 FAILURE_DATABASE_STORE_MISSING, 233 FAILURE_DATABASE_STORE_DELETE, 234 FAILURE_DOWNLOAD_DATABASE_UPDATE_BEGIN, 235 FAILURE_DOWNLOAD_DATABASE_UPDATE_FINISH, 236 FAILURE_WHITELIST_DATABASE_UPDATE_BEGIN, 237 FAILURE_WHITELIST_DATABASE_UPDATE_FINISH, 238 FAILURE_BROWSE_PREFIX_SET_MISSING, 239 FAILURE_BROWSE_PREFIX_SET_READ, 240 FAILURE_BROWSE_PREFIX_SET_WRITE, 241 FAILURE_BROWSE_PREFIX_SET_DELETE, 242 FAILURE_EXTENSION_BLACKLIST_UPDATE_BEGIN, 243 FAILURE_EXTENSION_BLACKLIST_UPDATE_FINISH, 244 FAILURE_EXTENSION_BLACKLIST_DELETE, 245 FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_BEGIN, 246 FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_FINISH, 247 FAILURE_SIDE_EFFECT_FREE_WHITELIST_DELETE, 248 FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_READ, 249 FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_WRITE, 250 FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_DELETE, 251 FAILURE_IP_BLACKLIST_UPDATE_BEGIN, 252 FAILURE_IP_BLACKLIST_UPDATE_FINISH, 253 FAILURE_IP_BLACKLIST_UPDATE_INVALID, 254 FAILURE_IP_BLACKLIST_DELETE, 255 256 // Memory space for histograms is determined by the max. ALWAYS 257 // ADD NEW VALUES BEFORE THIS ONE. 258 FAILURE_DATABASE_MAX 259 }; 260 261 static void RecordFailure(FailureType failure_type); 262 263 private: 264 // The factory used to instantiate a SafeBrowsingDatabase object. 265 // Useful for tests, so they can provide their own implementation of 266 // SafeBrowsingDatabase. 267 static SafeBrowsingDatabaseFactory* factory_; 268 }; 269 270 class SafeBrowsingDatabaseNew : public SafeBrowsingDatabase { 271 public: 272 // Create a database with a browse, download, download whitelist and 273 // csd whitelist store objects. Takes ownership of all the store objects. 274 // When |download_store| is NULL, the database will ignore any operations 275 // related download (url hashes and binary hashes). The same is true for 276 // the |csd_whitelist_store|, |download_whitelist_store| and 277 // |ip_blacklist_store|. 278 SafeBrowsingDatabaseNew(SafeBrowsingStore* browse_store, 279 SafeBrowsingStore* download_store, 280 SafeBrowsingStore* csd_whitelist_store, 281 SafeBrowsingStore* download_whitelist_store, 282 SafeBrowsingStore* extension_blacklist_store, 283 SafeBrowsingStore* side_effect_free_whitelist_store, 284 SafeBrowsingStore* ip_blacklist_store); 285 286 // Create a database with a browse store. This is a legacy interface that 287 // useds Sqlite. 288 SafeBrowsingDatabaseNew(); 289 290 virtual ~SafeBrowsingDatabaseNew(); 291 292 // Implement SafeBrowsingDatabase interface. 293 virtual void Init(const base::FilePath& filename) OVERRIDE; 294 virtual bool ResetDatabase() OVERRIDE; 295 virtual bool ContainsBrowseUrl( 296 const GURL& url, 297 std::vector<SBPrefix>* prefix_hits, 298 std::vector<SBFullHashResult>* cache_hits) OVERRIDE; 299 virtual bool ContainsDownloadUrl(const std::vector<GURL>& urls, 300 std::vector<SBPrefix>* prefix_hits) OVERRIDE; 301 virtual bool ContainsCsdWhitelistedUrl(const GURL& url) OVERRIDE; 302 virtual bool ContainsDownloadWhitelistedUrl(const GURL& url) OVERRIDE; 303 virtual bool ContainsDownloadWhitelistedString( 304 const std::string& str) OVERRIDE; 305 virtual bool ContainsExtensionPrefixes( 306 const std::vector<SBPrefix>& prefixes, 307 std::vector<SBPrefix>* prefix_hits) OVERRIDE; 308 virtual bool ContainsSideEffectFreeWhitelistUrl(const GURL& url) OVERRIDE; 309 virtual bool ContainsMalwareIP(const std::string& ip_address) OVERRIDE; 310 virtual bool UpdateStarted(std::vector<SBListChunkRanges>* lists) OVERRIDE; 311 virtual void InsertChunks(const std::string& list_name, 312 const std::vector<SBChunkData*>& chunks) OVERRIDE; 313 virtual void DeleteChunks( 314 const std::vector<SBChunkDelete>& chunk_deletes) OVERRIDE; 315 virtual void UpdateFinished(bool update_succeeded) OVERRIDE; 316 virtual void CacheHashResults( 317 const std::vector<SBPrefix>& prefixes, 318 const std::vector<SBFullHashResult>& full_hits, 319 const base::TimeDelta& cache_lifetime) OVERRIDE; 320 321 // Returns the value of malware_kill_switch_; 322 virtual bool IsMalwareIPMatchKillSwitchOn() OVERRIDE; 323 324 // Returns true if the CSD whitelist has everything whitelisted. 325 virtual bool IsCsdWhitelistKillSwitchOn() OVERRIDE; 326 327 private: 328 friend class SafeBrowsingDatabaseTest; 329 FRIEND_TEST_ALL_PREFIXES(SafeBrowsingDatabaseTest, HashCaching); 330 FRIEND_TEST_ALL_PREFIXES(SafeBrowsingDatabaseTest, CachedFullMiss); 331 FRIEND_TEST_ALL_PREFIXES(SafeBrowsingDatabaseTest, CachedPrefixHitFullMiss); 332 FRIEND_TEST_ALL_PREFIXES(SafeBrowsingDatabaseTest, BrowseFullHashMatching); 333 FRIEND_TEST_ALL_PREFIXES(SafeBrowsingDatabaseTest, 334 BrowseFullHashAndPrefixMatching); 335 336 // A SafeBrowsing whitelist contains a list of whitelisted full-hashes (stored 337 // in a sorted vector) as well as a boolean flag indicating whether all 338 // lookups in the whitelist should be considered matches for safety. 339 typedef std::pair<std::vector<SBFullHash>, bool> SBWhitelist; 340 341 // This map holds a csd malware IP blacklist which maps a prefix mask 342 // to a set of hashed blacklisted IP prefixes. Each IP prefix is a hashed 343 // IPv6 IP prefix using SHA-1. 344 typedef std::map<std::string, base::hash_set<std::string> > IPBlacklist; 345 346 // Helper for ContainsBrowseUrl, exposed for testing. 347 bool ContainsBrowseUrlHashes(const std::vector<SBFullHash>& full_hashes, 348 std::vector<SBPrefix>* prefix_hits, 349 std::vector<SBFullHashResult>* cache_hits); 350 351 // Returns true if the whitelist is disabled or if any of the given hashes 352 // matches the whitelist. 353 bool ContainsWhitelistedHashes(const SBWhitelist& whitelist, 354 const std::vector<SBFullHash>& hashes); 355 356 // Return the browse_store_, download_store_, download_whitelist_store or 357 // csd_whitelist_store_ based on list_id. 358 SafeBrowsingStore* GetStore(int list_id); 359 360 // Deletes the files on disk. 361 bool Delete(); 362 363 // Load the prefix set off disk, if available. 364 void LoadPrefixSet(); 365 366 // Writes the current prefix set to disk. 367 void WritePrefixSet(); 368 369 // Loads the given full-length hashes to the given whitelist. If the number 370 // of hashes is too large or if the kill switch URL is on the whitelist 371 // we will whitelist everything. 372 void LoadWhitelist(const std::vector<SBAddFullHash>& full_hashes, 373 SBWhitelist* whitelist); 374 375 // Call this method if an error occured with the given whitelist. This will 376 // result in all lookups to the whitelist to return true. 377 void WhitelistEverything(SBWhitelist* whitelist); 378 379 // Parses the IP blacklist from the given full-length hashes. 380 void LoadIpBlacklist(const std::vector<SBAddFullHash>& full_hashes); 381 382 // Helpers for handling database corruption. 383 // |OnHandleCorruptDatabase()| runs |ResetDatabase()| and sets 384 // |corruption_detected_|, |HandleCorruptDatabase()| posts 385 // |OnHandleCorruptDatabase()| to the current thread, to be run 386 // after the current task completes. 387 // TODO(shess): Wire things up to entirely abort the update 388 // transaction when this happens. 389 void HandleCorruptDatabase(); 390 void OnHandleCorruptDatabase(); 391 392 // Helpers for InsertChunks(). 393 void InsertAddChunk(SafeBrowsingStore* store, 394 safe_browsing_util::ListType list_id, 395 const SBChunkData& chunk); 396 void InsertSubChunk(SafeBrowsingStore* store, 397 safe_browsing_util::ListType list_id, 398 const SBChunkData& chunk); 399 400 // Returns the size in bytes of the store after the update. 401 int64 UpdateHashPrefixStore(const base::FilePath& store_filename, 402 SafeBrowsingStore* store, 403 FailureType failure_type); 404 void UpdateBrowseStore(); 405 void UpdateSideEffectFreeWhitelistStore(); 406 void UpdateWhitelistStore(const base::FilePath& store_filename, 407 SafeBrowsingStore* store, 408 SBWhitelist* whitelist); 409 void UpdateIpBlacklistStore(); 410 411 // Used to verify that various calls are made from the thread the 412 // object was created on. 413 base::MessageLoop* creation_loop_; 414 415 // Lock for protecting access to variables that may be used on the IO thread. 416 // This includes |prefix_set_|, |browse_gethash_cache_|, |csd_whitelist_|. 417 base::Lock lookup_lock_; 418 419 // The base filename passed to Init(), used to generate the store and prefix 420 // set filenames used to store data on disk. 421 base::FilePath filename_base_; 422 423 // Underlying persistent store for chunk data. 424 // For browsing related (phishing and malware URLs) chunks and prefixes. 425 scoped_ptr<SafeBrowsingStore> browse_store_; 426 427 // For download related (download URL and binary hash) chunks and prefixes. 428 scoped_ptr<SafeBrowsingStore> download_store_; 429 430 // For the client-side phishing detection whitelist chunks and full-length 431 // hashes. This list only contains 256 bit hashes. 432 scoped_ptr<SafeBrowsingStore> csd_whitelist_store_; 433 434 // For the download whitelist chunks and full-length hashes. This list only 435 // contains 256 bit hashes. 436 scoped_ptr<SafeBrowsingStore> download_whitelist_store_; 437 438 // For extension IDs. 439 scoped_ptr<SafeBrowsingStore> extension_blacklist_store_; 440 441 // For side-effect free whitelist. 442 scoped_ptr<SafeBrowsingStore> side_effect_free_whitelist_store_; 443 444 // For IP blacklist. 445 scoped_ptr<SafeBrowsingStore> ip_blacklist_store_; 446 447 SBWhitelist csd_whitelist_; 448 SBWhitelist download_whitelist_; 449 SBWhitelist extension_blacklist_; 450 451 // The IP blacklist should be small. At most a couple hundred IPs. 452 IPBlacklist ip_blacklist_; 453 454 // Cache of gethash results for browse store. Entries should not be used if 455 // they are older than their expire_after field. Cached misses will have 456 // empty full_hashes field. Cleared on each update. 457 std::map<SBPrefix, SBCachedFullHashResult> browse_gethash_cache_; 458 459 // Set if corruption is detected during the course of an update. 460 // Causes the update functions to fail with no side effects, until 461 // the next call to |UpdateStarted()|. 462 bool corruption_detected_; 463 464 // Set to true if any chunks are added or deleted during an update. 465 // Used to optimize away database update. 466 bool change_detected_; 467 468 // Used to check if a prefix was in the browse database. 469 scoped_ptr<safe_browsing::PrefixSet> browse_prefix_set_; 470 471 // Used to check if a prefix was in the browse database. 472 scoped_ptr<safe_browsing::PrefixSet> side_effect_free_whitelist_prefix_set_; 473 474 // Used to schedule resetting the database because of corruption. 475 base::WeakPtrFactory<SafeBrowsingDatabaseNew> reset_factory_; 476 }; 477 478 #endif // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_DATABASE_H_ 479