1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_DATABASE_H_ 6 #define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_DATABASE_H_ 7 8 #include <map> 9 #include <set> 10 #include <string> 11 #include <vector> 12 13 #include "base/containers/hash_tables.h" 14 #include "base/files/file_path.h" 15 #include "base/gtest_prod_util.h" 16 #include "base/memory/scoped_ptr.h" 17 #include "base/memory/weak_ptr.h" 18 #include "base/synchronization/lock.h" 19 #include "chrome/browser/safe_browsing/safe_browsing_store.h" 20 21 namespace base { 22 class MessageLoop; 23 class Time; 24 } 25 26 namespace safe_browsing { 27 class PrefixSet; 28 } 29 30 class GURL; 31 class SafeBrowsingDatabase; 32 33 // Factory for creating SafeBrowsingDatabase. Tests implement this factory 34 // to create fake Databases for testing. 35 class SafeBrowsingDatabaseFactory { 36 public: 37 SafeBrowsingDatabaseFactory() { } 38 virtual ~SafeBrowsingDatabaseFactory() { } 39 virtual SafeBrowsingDatabase* CreateSafeBrowsingDatabase( 40 bool enable_download_protection, 41 bool enable_client_side_whitelist, 42 bool enable_download_whitelist, 43 bool enable_extension_blacklist, 44 bool enable_side_effect_free_whitelist, 45 bool enable_ip_blacklist) = 0; 46 private: 47 DISALLOW_COPY_AND_ASSIGN(SafeBrowsingDatabaseFactory); 48 }; 49 50 51 // Encapsulates on-disk databases that for safebrowsing. There are 52 // four databases: browse, download, download whitelist and 53 // client-side detection (csd) whitelist databases. The browse database contains 54 // information about phishing and malware urls. The download database contains 55 // URLs for bad binaries (e.g: those containing virus) and hash of 56 // these downloaded contents. The download whitelist contains whitelisted 57 // download hosting sites as well as whitelisted binary signing certificates 58 // etc. The csd whitelist database contains URLs that will never be considered 59 // as phishing by the client-side phishing detection. These on-disk databases 60 // are shared among all profiles, as it doesn't contain user-specific data. This 61 // object is not thread-safe, i.e. all its methods should be used on the same 62 // thread that it was created on. 63 class SafeBrowsingDatabase { 64 public: 65 // Factory method for obtaining a SafeBrowsingDatabase implementation. 66 // It is not thread safe. 67 // |enable_download_protection| is used to control the download database 68 // feature. 69 // |enable_client_side_whitelist| is used to control the csd whitelist 70 // database feature. 71 // |enable_download_whitelist| is used to control the download whitelist 72 // database feature. 73 // |enable_ip_blacklist| is used to control the csd malware IP blacklist 74 // database feature. 75 static SafeBrowsingDatabase* Create(bool enable_download_protection, 76 bool enable_client_side_whitelist, 77 bool enable_download_whitelist, 78 bool enable_extension_blacklist, 79 bool side_effect_free_whitelist, 80 bool enable_ip_blacklist); 81 82 // Makes the passed |factory| the factory used to instantiate 83 // a SafeBrowsingDatabase. This is used for tests. 84 static void RegisterFactory(SafeBrowsingDatabaseFactory* factory) { 85 factory_ = factory; 86 } 87 88 virtual ~SafeBrowsingDatabase(); 89 90 // Initializes the database with the given filename. 91 virtual void Init(const base::FilePath& filename) = 0; 92 93 // Deletes the current database and creates a new one. 94 virtual bool ResetDatabase() = 0; 95 96 // Returns false if |url| is not in the browse database. If it 97 // returns true, then either |matching_list| is the name of the matching 98 // list, or |prefix_hits| and |full_hits| contains the matching hash 99 // prefixes. This function is safe to call from threads other than 100 // the creation thread. 101 virtual bool ContainsBrowseUrl(const GURL& url, 102 std::string* matching_list, 103 std::vector<SBPrefix>* prefix_hits, 104 std::vector<SBFullHashResult>* full_hits, 105 base::Time last_update) = 0; 106 107 // Returns false if none of |urls| are in Download database. If it returns 108 // true, |prefix_hits| should contain the prefixes for the URLs that were in 109 // the database. This function could ONLY be accessed from creation thread. 110 virtual bool ContainsDownloadUrl(const std::vector<GURL>& urls, 111 std::vector<SBPrefix>* prefix_hits) = 0; 112 113 // Returns false if |prefix| is not in Download database. 114 // This function could ONLY be accessed from creation thread. 115 virtual bool ContainsDownloadHashPrefix(const SBPrefix& prefix) = 0; 116 117 // Returns false if |url| is not on the client-side phishing detection 118 // whitelist. Otherwise, this function returns true. Note: the whitelist 119 // only contains full-length hashes so we don't return any prefix hit. 120 // This function should only be called from the IO thread. 121 virtual bool ContainsCsdWhitelistedUrl(const GURL& url) = 0; 122 123 // The download whitelist is used for two purposes: a white-domain list of 124 // sites that are considered to host only harmless binaries as well as a 125 // whitelist of arbitrary strings such as hashed certificate authorities that 126 // are considered to be trusted. The two methods below let you lookup 127 // the whitelist either for a URL or an arbitrary string. These methods will 128 // return false if no match is found and true otherwise. 129 // This function could ONLY be accessed from the IO thread. 130 virtual bool ContainsDownloadWhitelistedUrl(const GURL& url) = 0; 131 virtual bool ContainsDownloadWhitelistedString(const std::string& str) = 0; 132 133 // Populates |prefix_hits| with any prefixes in |prefixes| that have matches 134 // in the database. 135 // 136 // This function can ONLY be accessed from the creation thread. 137 virtual bool ContainsExtensionPrefixes( 138 const std::vector<SBPrefix>& prefixes, 139 std::vector<SBPrefix>* prefix_hits) = 0; 140 141 // Returns false unless the hash of |url| is on the side-effect free 142 // whitelist. 143 virtual bool ContainsSideEffectFreeWhitelistUrl(const GURL& url) = 0; 144 145 // Returns true iff the given IP is currently on the csd malware IP blacklist. 146 virtual bool ContainsMalwareIP(const std::string& ip_address) = 0; 147 148 // A database transaction should look like: 149 // 150 // std::vector<SBListChunkRanges> lists; 151 // if (db.UpdateStarted(&lists)) { 152 // // Do something with |lists|. 153 // 154 // // Process add/sub commands. 155 // db.InsertChunks(list_name, chunks); 156 // 157 // // Process adddel/subdel commands. 158 // db.DeleteChunks(chunks_deletes); 159 // 160 // // If passed true, processes the collected chunk info and 161 // // rebuilds the filter. If passed false, rolls everything 162 // // back. 163 // db.UpdateFinished(success); 164 // } 165 // 166 // If UpdateStarted() returns true, the caller MUST eventually call 167 // UpdateFinished(). If it returns false, the caller MUST NOT call 168 // the other functions. 169 virtual bool UpdateStarted(std::vector<SBListChunkRanges>* lists) = 0; 170 virtual void InsertChunks(const std::string& list_name, 171 const SBChunkList& chunks) = 0; 172 virtual void DeleteChunks( 173 const std::vector<SBChunkDelete>& chunk_deletes) = 0; 174 virtual void UpdateFinished(bool update_succeeded) = 0; 175 176 // Store the results of a GetHash response. In the case of empty results, we 177 // cache the prefixes until the next update so that we don't have to issue 178 // further GetHash requests we know will be empty. 179 virtual void CacheHashResults( 180 const std::vector<SBPrefix>& prefixes, 181 const std::vector<SBFullHashResult>& full_hits) = 0; 182 183 // Returns true if the malware IP blacklisting killswitch URL is present 184 // in the csd whitelist. 185 virtual bool IsMalwareIPMatchKillSwitchOn() = 0; 186 187 // The name of the bloom-filter file for the given database file. 188 // NOTE(shess): OBSOLETE. Present for deleting stale files. 189 static base::FilePath BloomFilterForFilename( 190 const base::FilePath& db_filename); 191 192 // The name of the prefix set file for the given database file. 193 static base::FilePath PrefixSetForFilename(const base::FilePath& db_filename); 194 195 // Filename for malware and phishing URL database. 196 static base::FilePath BrowseDBFilename( 197 const base::FilePath& db_base_filename); 198 199 // Filename for download URL and download binary hash database. 200 static base::FilePath DownloadDBFilename( 201 const base::FilePath& db_base_filename); 202 203 // Filename for client-side phishing detection whitelist databsae. 204 static base::FilePath CsdWhitelistDBFilename( 205 const base::FilePath& csd_whitelist_base_filename); 206 207 // Filename for download whitelist databsae. 208 static base::FilePath DownloadWhitelistDBFilename( 209 const base::FilePath& download_whitelist_base_filename); 210 211 // Filename for extension blacklist database. 212 static base::FilePath ExtensionBlacklistDBFilename( 213 const base::FilePath& extension_blacklist_base_filename); 214 215 // Filename for side-effect free whitelist database. 216 static base::FilePath SideEffectFreeWhitelistDBFilename( 217 const base::FilePath& side_effect_free_whitelist_base_filename); 218 219 // Filename for the csd malware IP blacklist database. 220 static base::FilePath IpBlacklistDBFilename( 221 const base::FilePath& ip_blacklist_base_filename); 222 223 // Enumerate failures for histogramming purposes. DO NOT CHANGE THE 224 // ORDERING OF THESE VALUES. 225 enum FailureType { 226 FAILURE_DATABASE_CORRUPT, 227 FAILURE_DATABASE_CORRUPT_HANDLER, 228 FAILURE_BROWSE_DATABASE_UPDATE_BEGIN, 229 FAILURE_BROWSE_DATABASE_UPDATE_FINISH, 230 FAILURE_DATABASE_FILTER_MISSING_OBSOLETE, 231 FAILURE_DATABASE_FILTER_READ_OBSOLETE, 232 FAILURE_DATABASE_FILTER_WRITE_OBSOLETE, 233 FAILURE_DATABASE_FILTER_DELETE, 234 FAILURE_DATABASE_STORE_MISSING, 235 FAILURE_DATABASE_STORE_DELETE, 236 FAILURE_DOWNLOAD_DATABASE_UPDATE_BEGIN, 237 FAILURE_DOWNLOAD_DATABASE_UPDATE_FINISH, 238 FAILURE_WHITELIST_DATABASE_UPDATE_BEGIN, 239 FAILURE_WHITELIST_DATABASE_UPDATE_FINISH, 240 FAILURE_BROWSE_PREFIX_SET_MISSING, 241 FAILURE_BROWSE_PREFIX_SET_READ, 242 FAILURE_BROWSE_PREFIX_SET_WRITE, 243 FAILURE_BROWSE_PREFIX_SET_DELETE, 244 FAILURE_EXTENSION_BLACKLIST_UPDATE_BEGIN, 245 FAILURE_EXTENSION_BLACKLIST_UPDATE_FINISH, 246 FAILURE_EXTENSION_BLACKLIST_DELETE, 247 FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_BEGIN, 248 FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_FINISH, 249 FAILURE_SIDE_EFFECT_FREE_WHITELIST_DELETE, 250 FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_READ, 251 FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_WRITE, 252 FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_DELETE, 253 FAILURE_IP_BLACKLIST_UPDATE_BEGIN, 254 FAILURE_IP_BLACKLIST_UPDATE_FINISH, 255 FAILURE_IP_BLACKLIST_UPDATE_INVALID, 256 FAILURE_IP_BLACKLIST_DELETE, 257 258 // Memory space for histograms is determined by the max. ALWAYS 259 // ADD NEW VALUES BEFORE THIS ONE. 260 FAILURE_DATABASE_MAX 261 }; 262 263 static void RecordFailure(FailureType failure_type); 264 265 private: 266 // The factory used to instantiate a SafeBrowsingDatabase object. 267 // Useful for tests, so they can provide their own implementation of 268 // SafeBrowsingDatabase. 269 static SafeBrowsingDatabaseFactory* factory_; 270 }; 271 272 class SafeBrowsingDatabaseNew : public SafeBrowsingDatabase { 273 public: 274 // Create a database with a browse, download, download whitelist and 275 // csd whitelist store objects. Takes ownership of all the store objects. 276 // When |download_store| is NULL, the database will ignore any operations 277 // related download (url hashes and binary hashes). The same is true for 278 // the |csd_whitelist_store|, |download_whitelist_store| and 279 // |ip_blacklist_store|. 280 SafeBrowsingDatabaseNew(SafeBrowsingStore* browse_store, 281 SafeBrowsingStore* download_store, 282 SafeBrowsingStore* csd_whitelist_store, 283 SafeBrowsingStore* download_whitelist_store, 284 SafeBrowsingStore* extension_blacklist_store, 285 SafeBrowsingStore* side_effect_free_whitelist_store, 286 SafeBrowsingStore* ip_blacklist_store); 287 288 // Create a database with a browse store. This is a legacy interface that 289 // useds Sqlite. 290 SafeBrowsingDatabaseNew(); 291 292 virtual ~SafeBrowsingDatabaseNew(); 293 294 // Implement SafeBrowsingDatabase interface. 295 virtual void Init(const base::FilePath& filename) OVERRIDE; 296 virtual bool ResetDatabase() OVERRIDE; 297 virtual bool ContainsBrowseUrl(const GURL& url, 298 std::string* matching_list, 299 std::vector<SBPrefix>* prefix_hits, 300 std::vector<SBFullHashResult>* full_hits, 301 base::Time last_update) OVERRIDE; 302 virtual bool ContainsDownloadUrl(const std::vector<GURL>& urls, 303 std::vector<SBPrefix>* prefix_hits) OVERRIDE; 304 virtual bool ContainsDownloadHashPrefix(const SBPrefix& prefix) OVERRIDE; 305 virtual bool ContainsCsdWhitelistedUrl(const GURL& url) OVERRIDE; 306 virtual bool ContainsDownloadWhitelistedUrl(const GURL& url) OVERRIDE; 307 virtual bool ContainsDownloadWhitelistedString( 308 const std::string& str) OVERRIDE; 309 virtual bool ContainsExtensionPrefixes( 310 const std::vector<SBPrefix>& prefixes, 311 std::vector<SBPrefix>* prefix_hits) OVERRIDE; 312 virtual bool ContainsSideEffectFreeWhitelistUrl(const GURL& url) OVERRIDE; 313 virtual bool ContainsMalwareIP(const std::string& ip_address) OVERRIDE; 314 virtual bool UpdateStarted(std::vector<SBListChunkRanges>* lists) OVERRIDE; 315 virtual void InsertChunks(const std::string& list_name, 316 const SBChunkList& chunks) OVERRIDE; 317 virtual void DeleteChunks( 318 const std::vector<SBChunkDelete>& chunk_deletes) OVERRIDE; 319 virtual void UpdateFinished(bool update_succeeded) OVERRIDE; 320 virtual void CacheHashResults( 321 const std::vector<SBPrefix>& prefixes, 322 const std::vector<SBFullHashResult>& full_hits) OVERRIDE; 323 324 // Returns the value of malware_kill_switch_; 325 virtual bool IsMalwareIPMatchKillSwitchOn() OVERRIDE; 326 327 private: 328 friend class SafeBrowsingDatabaseTest; 329 FRIEND_TEST_ALL_PREFIXES(SafeBrowsingDatabaseTest, HashCaching); 330 331 // A SafeBrowsing whitelist contains a list of whitelisted full-hashes (stored 332 // in a sorted vector) as well as a boolean flag indicating whether all 333 // lookups in the whitelist should be considered matches for safety. 334 typedef std::pair<std::vector<SBFullHash>, bool> SBWhitelist; 335 336 // This map holds a csd malware IP blacklist which maps a prefix mask 337 // to a set of hashed blacklisted IP prefixes. Each IP prefix is a hashed 338 // IPv6 IP prefix using SHA-1. 339 typedef std::map<std::string, base::hash_set<std::string> > IPBlacklist; 340 341 // Returns true if the whitelist is disabled or if any of the given hashes 342 // matches the whitelist. 343 bool ContainsWhitelistedHashes(const SBWhitelist& whitelist, 344 const std::vector<SBFullHash>& hashes); 345 346 // Return the browse_store_, download_store_, download_whitelist_store or 347 // csd_whitelist_store_ based on list_id. 348 SafeBrowsingStore* GetStore(int list_id); 349 350 // Deletes the files on disk. 351 bool Delete(); 352 353 // Load the prefix set off disk, if available. 354 void LoadPrefixSet(); 355 356 // Writes the current prefix set to disk. 357 void WritePrefixSet(); 358 359 // Loads the given full-length hashes to the given whitelist. If the number 360 // of hashes is too large or if the kill switch URL is on the whitelist 361 // we will whitelist everything. 362 void LoadWhitelist(const std::vector<SBAddFullHash>& full_hashes, 363 SBWhitelist* whitelist); 364 365 // Call this method if an error occured with the given whitelist. This will 366 // result in all lookups to the whitelist to return true. 367 void WhitelistEverything(SBWhitelist* whitelist); 368 369 // Parses the IP blacklist from the given full-length hashes. 370 void LoadIpBlacklist(const std::vector<SBAddFullHash>& full_hashes); 371 372 // Helpers for handling database corruption. 373 // |OnHandleCorruptDatabase()| runs |ResetDatabase()| and sets 374 // |corruption_detected_|, |HandleCorruptDatabase()| posts 375 // |OnHandleCorruptDatabase()| to the current thread, to be run 376 // after the current task completes. 377 // TODO(shess): Wire things up to entirely abort the update 378 // transaction when this happens. 379 void HandleCorruptDatabase(); 380 void OnHandleCorruptDatabase(); 381 382 // Helpers for InsertChunks(). 383 void InsertAdd(int chunk, SBPrefix host, const SBEntry* entry, int list_id); 384 void InsertAddChunks(safe_browsing_util::ListType list_id, 385 const SBChunkList& chunks); 386 void InsertSub(int chunk, SBPrefix host, const SBEntry* entry, int list_id); 387 void InsertSubChunks(safe_browsing_util::ListType list_id, 388 const SBChunkList& chunks); 389 390 // Returns the size in bytes of the store after the update. 391 int64 UpdateHashPrefixStore(const base::FilePath& store_filename, 392 SafeBrowsingStore* store, 393 FailureType failure_type); 394 void UpdateBrowseStore(); 395 void UpdateSideEffectFreeWhitelistStore(); 396 void UpdateWhitelistStore(const base::FilePath& store_filename, 397 SafeBrowsingStore* store, 398 SBWhitelist* whitelist); 399 void UpdateIpBlacklistStore(); 400 401 // Used to verify that various calls are made from the thread the 402 // object was created on. 403 base::MessageLoop* creation_loop_; 404 405 // Lock for protecting access to variables that may be used on the 406 // IO thread. This includes |prefix_set_|, |full_browse_hashes_|, 407 // |pending_browse_hashes_|, |prefix_miss_cache_|, |csd_whitelist_|. 408 base::Lock lookup_lock_; 409 410 // Underlying persistent store for chunk data. 411 // For browsing related (phishing and malware URLs) chunks and prefixes. 412 base::FilePath browse_filename_; 413 scoped_ptr<SafeBrowsingStore> browse_store_; 414 415 // For download related (download URL and binary hash) chunks and prefixes. 416 base::FilePath download_filename_; 417 scoped_ptr<SafeBrowsingStore> download_store_; 418 419 // For the client-side phishing detection whitelist chunks and full-length 420 // hashes. This list only contains 256 bit hashes. 421 base::FilePath csd_whitelist_filename_; 422 scoped_ptr<SafeBrowsingStore> csd_whitelist_store_; 423 424 // For the download whitelist chunks and full-length hashes. This list only 425 // contains 256 bit hashes. 426 base::FilePath download_whitelist_filename_; 427 scoped_ptr<SafeBrowsingStore> download_whitelist_store_; 428 429 // For extension IDs. 430 base::FilePath extension_blacklist_filename_; 431 scoped_ptr<SafeBrowsingStore> extension_blacklist_store_; 432 433 // For side-effect free whitelist. 434 base::FilePath side_effect_free_whitelist_filename_; 435 scoped_ptr<SafeBrowsingStore> side_effect_free_whitelist_store_; 436 437 // For IP blacklist. 438 base::FilePath ip_blacklist_filename_; 439 scoped_ptr<SafeBrowsingStore> ip_blacklist_store_; 440 441 SBWhitelist csd_whitelist_; 442 SBWhitelist download_whitelist_; 443 SBWhitelist extension_blacklist_; 444 445 // The IP blacklist should be small. At most a couple hundred IPs. 446 IPBlacklist ip_blacklist_; 447 448 // Cached browse store related full-hash items, ordered by prefix for 449 // efficient scanning. 450 // |full_browse_hashes_| are items from |browse_store_|, 451 // |pending_browse_hashes_| are items from |CacheHashResults()|, which 452 // will be pushed to the store on the next update. 453 std::vector<SBAddFullHash> full_browse_hashes_; 454 std::vector<SBAddFullHash> pending_browse_hashes_; 455 456 // Cache of prefixes that returned empty results (no full hash 457 // match) to |CacheHashResults()|. Cached to prevent asking for 458 // them every time. Cleared on next update. 459 std::set<SBPrefix> prefix_miss_cache_; 460 461 // Used to schedule resetting the database because of corruption. 462 base::WeakPtrFactory<SafeBrowsingDatabaseNew> reset_factory_; 463 464 // Set if corruption is detected during the course of an update. 465 // Causes the update functions to fail with no side effects, until 466 // the next call to |UpdateStarted()|. 467 bool corruption_detected_; 468 469 // Set to true if any chunks are added or deleted during an update. 470 // Used to optimize away database update. 471 bool change_detected_; 472 473 // Used to check if a prefix was in the browse database. 474 base::FilePath browse_prefix_set_filename_; 475 scoped_ptr<safe_browsing::PrefixSet> browse_prefix_set_; 476 477 // Used to check if a prefix was in the browse database. 478 base::FilePath side_effect_free_whitelist_prefix_set_filename_; 479 scoped_ptr<safe_browsing::PrefixSet> side_effect_free_whitelist_prefix_set_; 480 }; 481 482 #endif // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_DATABASE_H_ 483