1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "chrome/browser/safe_browsing/safe_browsing_database.h" 6 7 #include <algorithm> 8 #include <iterator> 9 10 #include "base/bind.h" 11 #include "base/files/file_util.h" 12 #include "base/message_loop/message_loop.h" 13 #include "base/metrics/histogram.h" 14 #include "base/metrics/stats_counters.h" 15 #include "base/process/process.h" 16 #include "base/process/process_metrics.h" 17 #include "base/sha1.h" 18 #include "base/strings/string_number_conversions.h" 19 #include "base/strings/stringprintf.h" 20 #include "base/time/time.h" 21 #include "chrome/browser/safe_browsing/prefix_set.h" 22 #include "chrome/browser/safe_browsing/safe_browsing_store_file.h" 23 #include "content/public/browser/browser_thread.h" 24 #include "crypto/sha2.h" 25 #include "net/base/net_util.h" 26 #include "url/gurl.h" 27 28 #if defined(OS_MACOSX) 29 #include "base/mac/mac_util.h" 30 #endif 31 32 using content::BrowserThread; 33 34 namespace { 35 36 // Filename suffix for the bloom filter. 37 const base::FilePath::CharType kBloomFilterFile[] = 38 FILE_PATH_LITERAL(" Filter 2"); 39 // Filename suffix for the prefix set. 40 const base::FilePath::CharType kPrefixSetFile[] = 41 FILE_PATH_LITERAL(" Prefix Set"); 42 // Filename suffix for download store. 43 const base::FilePath::CharType kDownloadDBFile[] = 44 FILE_PATH_LITERAL(" Download"); 45 // Filename suffix for client-side phishing detection whitelist store. 46 const base::FilePath::CharType kCsdWhitelistDBFile[] = 47 FILE_PATH_LITERAL(" Csd Whitelist"); 48 // Filename suffix for the download whitelist store. 49 const base::FilePath::CharType kDownloadWhitelistDBFile[] = 50 FILE_PATH_LITERAL(" Download Whitelist"); 51 // Filename suffix for the extension blacklist store. 52 const base::FilePath::CharType kExtensionBlacklistDBFile[] = 53 FILE_PATH_LITERAL(" Extension Blacklist"); 54 // Filename suffix for the side-effect free whitelist store. 55 const base::FilePath::CharType kSideEffectFreeWhitelistDBFile[] = 56 FILE_PATH_LITERAL(" Side-Effect Free Whitelist"); 57 // Filename suffix for the csd malware IP blacklist store. 58 const base::FilePath::CharType kIPBlacklistDBFile[] = 59 FILE_PATH_LITERAL(" IP Blacklist"); 60 61 // Filename suffix for browse store. 62 // TODO(shess): "Safe Browsing Bloom Prefix Set" is full of win. 63 // Unfortunately, to change the name implies lots of transition code 64 // for little benefit. If/when file formats change (say to put all 65 // the data in one file), that would be a convenient point to rectify 66 // this. 67 // TODO(shess): This shouldn't be OS-driven <http://crbug.com/394379> 68 #if defined(OS_ANDROID) 69 // NOTE(shess): This difference is also reflected in the list name in 70 // safe_browsing_util.cc. 71 // TODO(shess): Spin up an alternate list id which can be persisted in the 72 // store. Then if a mistake is made, it won't cause confusion between 73 // incompatible lists. 74 const base::FilePath::CharType kBrowseDBFile[] = FILE_PATH_LITERAL(" Mobile"); 75 #else 76 const base::FilePath::CharType kBrowseDBFile[] = FILE_PATH_LITERAL(" Bloom"); 77 #endif 78 79 // Maximum number of entries we allow in any of the whitelists. 80 // If a whitelist on disk contains more entries then all lookups to 81 // the whitelist will be considered a match. 82 const size_t kMaxWhitelistSize = 5000; 83 84 // If the hash of this exact expression is on a whitelist then all 85 // lookups to this whitelist will be considered a match. 86 const char kWhitelistKillSwitchUrl[] = 87 "sb-ssl.google.com/safebrowsing/csd/killswitch"; // Don't change this! 88 89 // If the hash of this exact expression is on a whitelist then the 90 // malware IP blacklisting feature will be disabled in csd. 91 // Don't change this! 92 const char kMalwareIPKillSwitchUrl[] = 93 "sb-ssl.google.com/safebrowsing/csd/killswitch_malware"; 94 95 const size_t kMaxIpPrefixSize = 128; 96 const size_t kMinIpPrefixSize = 1; 97 98 // To save space, the incoming |chunk_id| and |list_id| are combined 99 // into an |encoded_chunk_id| for storage by shifting the |list_id| 100 // into the low-order bits. These functions decode that information. 101 // TODO(lzheng): It was reasonable when database is saved in sqlite, but 102 // there should be better ways to save chunk_id and list_id after we use 103 // SafeBrowsingStoreFile. 104 int GetListIdBit(const int encoded_chunk_id) { 105 return encoded_chunk_id & 1; 106 } 107 int DecodeChunkId(int encoded_chunk_id) { 108 return encoded_chunk_id >> 1; 109 } 110 int EncodeChunkId(const int chunk, const int list_id) { 111 DCHECK_NE(list_id, safe_browsing_util::INVALID); 112 return chunk << 1 | list_id % 2; 113 } 114 115 // Generate the set of full hashes to check for |url|. If 116 // |include_whitelist_hashes| is true we will generate additional path-prefixes 117 // to match against the csd whitelist. E.g., if the path-prefix /foo is on the 118 // whitelist it should also match /foo/bar which is not the case for all the 119 // other lists. We'll also always add a pattern for the empty path. 120 // TODO(shess): This function is almost the same as 121 // |CompareFullHashes()| in safe_browsing_util.cc, except that code 122 // does an early exit on match. Since match should be the infrequent 123 // case (phishing or malware found), consider combining this function 124 // with that one. 125 void BrowseFullHashesToCheck(const GURL& url, 126 bool include_whitelist_hashes, 127 std::vector<SBFullHash>* full_hashes) { 128 std::vector<std::string> hosts; 129 if (url.HostIsIPAddress()) { 130 hosts.push_back(url.host()); 131 } else { 132 safe_browsing_util::GenerateHostsToCheck(url, &hosts); 133 } 134 135 std::vector<std::string> paths; 136 safe_browsing_util::GeneratePathsToCheck(url, &paths); 137 138 for (size_t i = 0; i < hosts.size(); ++i) { 139 for (size_t j = 0; j < paths.size(); ++j) { 140 const std::string& path = paths[j]; 141 full_hashes->push_back(SBFullHashForString(hosts[i] + path)); 142 143 // We may have /foo as path-prefix in the whitelist which should 144 // also match with /foo/bar and /foo?bar. Hence, for every path 145 // that ends in '/' we also add the path without the slash. 146 if (include_whitelist_hashes && 147 path.size() > 1 && 148 path[path.size() - 1] == '/') { 149 full_hashes->push_back( 150 SBFullHashForString(hosts[i] + path.substr(0, path.size() - 1))); 151 } 152 } 153 } 154 } 155 156 // Get the prefixes matching the download |urls|. 157 void GetDownloadUrlPrefixes(const std::vector<GURL>& urls, 158 std::vector<SBPrefix>* prefixes) { 159 std::vector<SBFullHash> full_hashes; 160 for (size_t i = 0; i < urls.size(); ++i) 161 BrowseFullHashesToCheck(urls[i], false, &full_hashes); 162 163 for (size_t i = 0; i < full_hashes.size(); ++i) 164 prefixes->push_back(full_hashes[i].prefix); 165 } 166 167 // Helper function to compare addprefixes in |store| with |prefixes|. 168 // The |list_bit| indicates which list (url or hash) to compare. 169 // 170 // Returns true if there is a match, |*prefix_hits| (if non-NULL) will contain 171 // the actual matching prefixes. 172 bool MatchAddPrefixes(SafeBrowsingStore* store, 173 int list_bit, 174 const std::vector<SBPrefix>& prefixes, 175 std::vector<SBPrefix>* prefix_hits) { 176 prefix_hits->clear(); 177 bool found_match = false; 178 179 SBAddPrefixes add_prefixes; 180 store->GetAddPrefixes(&add_prefixes); 181 for (SBAddPrefixes::const_iterator iter = add_prefixes.begin(); 182 iter != add_prefixes.end(); ++iter) { 183 for (size_t j = 0; j < prefixes.size(); ++j) { 184 const SBPrefix& prefix = prefixes[j]; 185 if (prefix == iter->prefix && 186 GetListIdBit(iter->chunk_id) == list_bit) { 187 prefix_hits->push_back(prefix); 188 found_match = true; 189 } 190 } 191 } 192 return found_match; 193 } 194 195 // This function generates a chunk range string for |chunks|. It 196 // outputs one chunk range string per list and writes it to the 197 // |list_ranges| vector. We expect |list_ranges| to already be of the 198 // right size. E.g., if |chunks| contains chunks with two different 199 // list ids then |list_ranges| must contain two elements. 200 void GetChunkRanges(const std::vector<int>& chunks, 201 std::vector<std::string>* list_ranges) { 202 // Since there are 2 possible list ids, there must be exactly two 203 // list ranges. Even if the chunk data should only contain one 204 // line, this code has to somehow handle corruption. 205 DCHECK_EQ(2U, list_ranges->size()); 206 207 std::vector<std::vector<int> > decoded_chunks(list_ranges->size()); 208 for (std::vector<int>::const_iterator iter = chunks.begin(); 209 iter != chunks.end(); ++iter) { 210 int mod_list_id = GetListIdBit(*iter); 211 DCHECK_GE(mod_list_id, 0); 212 DCHECK_LT(static_cast<size_t>(mod_list_id), decoded_chunks.size()); 213 decoded_chunks[mod_list_id].push_back(DecodeChunkId(*iter)); 214 } 215 for (size_t i = 0; i < decoded_chunks.size(); ++i) { 216 ChunksToRangeString(decoded_chunks[i], &((*list_ranges)[i])); 217 } 218 } 219 220 // Helper function to create chunk range lists for Browse related 221 // lists. 222 void UpdateChunkRanges(SafeBrowsingStore* store, 223 const std::vector<std::string>& listnames, 224 std::vector<SBListChunkRanges>* lists) { 225 if (!store) 226 return; 227 228 DCHECK_GT(listnames.size(), 0U); 229 DCHECK_LE(listnames.size(), 2U); 230 std::vector<int> add_chunks; 231 std::vector<int> sub_chunks; 232 store->GetAddChunks(&add_chunks); 233 store->GetSubChunks(&sub_chunks); 234 235 // Always decode 2 ranges, even if only the first one is expected. 236 // The loop below will only load as many into |lists| as |listnames| 237 // indicates. 238 std::vector<std::string> adds(2); 239 std::vector<std::string> subs(2); 240 GetChunkRanges(add_chunks, &adds); 241 GetChunkRanges(sub_chunks, &subs); 242 243 for (size_t i = 0; i < listnames.size(); ++i) { 244 const std::string& listname = listnames[i]; 245 DCHECK_EQ(safe_browsing_util::GetListId(listname) % 2, 246 static_cast<int>(i % 2)); 247 DCHECK_NE(safe_browsing_util::GetListId(listname), 248 safe_browsing_util::INVALID); 249 lists->push_back(SBListChunkRanges(listname)); 250 lists->back().adds.swap(adds[i]); 251 lists->back().subs.swap(subs[i]); 252 } 253 } 254 255 void UpdateChunkRangesForLists(SafeBrowsingStore* store, 256 const std::string& listname0, 257 const std::string& listname1, 258 std::vector<SBListChunkRanges>* lists) { 259 std::vector<std::string> listnames; 260 listnames.push_back(listname0); 261 listnames.push_back(listname1); 262 UpdateChunkRanges(store, listnames, lists); 263 } 264 265 void UpdateChunkRangesForList(SafeBrowsingStore* store, 266 const std::string& listname, 267 std::vector<SBListChunkRanges>* lists) { 268 UpdateChunkRanges(store, std::vector<std::string>(1, listname), lists); 269 } 270 271 // This code always checks for non-zero file size. This helper makes 272 // that less verbose. 273 int64 GetFileSizeOrZero(const base::FilePath& file_path) { 274 int64 size_64; 275 if (!base::GetFileSize(file_path, &size_64)) 276 return 0; 277 return size_64; 278 } 279 280 // Helper for ContainsBrowseUrlHashes(). Returns true if an un-expired match 281 // for |full_hash| is found in |cache|, with any matches appended to |results| 282 // (true can be returned with zero matches). |expire_base| is used to check the 283 // cache lifetime of matches, expired matches will be discarded from |cache|. 284 bool GetCachedFullHash(std::map<SBPrefix, SBCachedFullHashResult>* cache, 285 const SBFullHash& full_hash, 286 const base::Time& expire_base, 287 std::vector<SBFullHashResult>* results) { 288 // First check if there is a valid cached result for this prefix. 289 std::map<SBPrefix, SBCachedFullHashResult>::iterator 290 citer = cache->find(full_hash.prefix); 291 if (citer == cache->end()) 292 return false; 293 294 // Remove expired entries. 295 SBCachedFullHashResult& cached_result = citer->second; 296 if (cached_result.expire_after <= expire_base) { 297 cache->erase(citer); 298 return false; 299 } 300 301 // Find full-hash matches. 302 std::vector<SBFullHashResult>& cached_hashes = cached_result.full_hashes; 303 for (size_t i = 0; i < cached_hashes.size(); ++i) { 304 if (SBFullHashEqual(full_hash, cached_hashes[i].hash)) 305 results->push_back(cached_hashes[i]); 306 } 307 308 return true; 309 } 310 311 } // namespace 312 313 // The default SafeBrowsingDatabaseFactory. 314 class SafeBrowsingDatabaseFactoryImpl : public SafeBrowsingDatabaseFactory { 315 public: 316 virtual SafeBrowsingDatabase* CreateSafeBrowsingDatabase( 317 bool enable_download_protection, 318 bool enable_client_side_whitelist, 319 bool enable_download_whitelist, 320 bool enable_extension_blacklist, 321 bool enable_side_effect_free_whitelist, 322 bool enable_ip_blacklist) OVERRIDE { 323 return new SafeBrowsingDatabaseNew( 324 new SafeBrowsingStoreFile, 325 enable_download_protection ? new SafeBrowsingStoreFile : NULL, 326 enable_client_side_whitelist ? new SafeBrowsingStoreFile : NULL, 327 enable_download_whitelist ? new SafeBrowsingStoreFile : NULL, 328 enable_extension_blacklist ? new SafeBrowsingStoreFile : NULL, 329 enable_side_effect_free_whitelist ? new SafeBrowsingStoreFile : NULL, 330 enable_ip_blacklist ? new SafeBrowsingStoreFile : NULL); 331 } 332 333 SafeBrowsingDatabaseFactoryImpl() { } 334 335 private: 336 DISALLOW_COPY_AND_ASSIGN(SafeBrowsingDatabaseFactoryImpl); 337 }; 338 339 // static 340 SafeBrowsingDatabaseFactory* SafeBrowsingDatabase::factory_ = NULL; 341 342 // Factory method, non-thread safe. Caller has to make sure this s called 343 // on SafeBrowsing Thread. 344 // TODO(shess): There's no need for a factory any longer. Convert 345 // SafeBrowsingDatabaseNew to SafeBrowsingDatabase, and have Create() 346 // callers just construct things directly. 347 SafeBrowsingDatabase* SafeBrowsingDatabase::Create( 348 bool enable_download_protection, 349 bool enable_client_side_whitelist, 350 bool enable_download_whitelist, 351 bool enable_extension_blacklist, 352 bool enable_side_effect_free_whitelist, 353 bool enable_ip_blacklist) { 354 if (!factory_) 355 factory_ = new SafeBrowsingDatabaseFactoryImpl(); 356 return factory_->CreateSafeBrowsingDatabase( 357 enable_download_protection, 358 enable_client_side_whitelist, 359 enable_download_whitelist, 360 enable_extension_blacklist, 361 enable_side_effect_free_whitelist, 362 enable_ip_blacklist); 363 } 364 365 SafeBrowsingDatabase::~SafeBrowsingDatabase() { 366 } 367 368 // static 369 base::FilePath SafeBrowsingDatabase::BrowseDBFilename( 370 const base::FilePath& db_base_filename) { 371 return base::FilePath(db_base_filename.value() + kBrowseDBFile); 372 } 373 374 // static 375 base::FilePath SafeBrowsingDatabase::DownloadDBFilename( 376 const base::FilePath& db_base_filename) { 377 return base::FilePath(db_base_filename.value() + kDownloadDBFile); 378 } 379 380 // static 381 base::FilePath SafeBrowsingDatabase::BloomFilterForFilename( 382 const base::FilePath& db_filename) { 383 return base::FilePath(db_filename.value() + kBloomFilterFile); 384 } 385 386 // static 387 base::FilePath SafeBrowsingDatabase::PrefixSetForFilename( 388 const base::FilePath& db_filename) { 389 return base::FilePath(db_filename.value() + kPrefixSetFile); 390 } 391 392 // static 393 base::FilePath SafeBrowsingDatabase::CsdWhitelistDBFilename( 394 const base::FilePath& db_filename) { 395 return base::FilePath(db_filename.value() + kCsdWhitelistDBFile); 396 } 397 398 // static 399 base::FilePath SafeBrowsingDatabase::DownloadWhitelistDBFilename( 400 const base::FilePath& db_filename) { 401 return base::FilePath(db_filename.value() + kDownloadWhitelistDBFile); 402 } 403 404 // static 405 base::FilePath SafeBrowsingDatabase::ExtensionBlacklistDBFilename( 406 const base::FilePath& db_filename) { 407 return base::FilePath(db_filename.value() + kExtensionBlacklistDBFile); 408 } 409 410 // static 411 base::FilePath SafeBrowsingDatabase::SideEffectFreeWhitelistDBFilename( 412 const base::FilePath& db_filename) { 413 return base::FilePath(db_filename.value() + kSideEffectFreeWhitelistDBFile); 414 } 415 416 // static 417 base::FilePath SafeBrowsingDatabase::IpBlacklistDBFilename( 418 const base::FilePath& db_filename) { 419 return base::FilePath(db_filename.value() + kIPBlacklistDBFile); 420 } 421 422 SafeBrowsingStore* SafeBrowsingDatabaseNew::GetStore(const int list_id) { 423 if (list_id == safe_browsing_util::PHISH || 424 list_id == safe_browsing_util::MALWARE) { 425 return browse_store_.get(); 426 } else if (list_id == safe_browsing_util::BINURL) { 427 return download_store_.get(); 428 } else if (list_id == safe_browsing_util::CSDWHITELIST) { 429 return csd_whitelist_store_.get(); 430 } else if (list_id == safe_browsing_util::DOWNLOADWHITELIST) { 431 return download_whitelist_store_.get(); 432 } else if (list_id == safe_browsing_util::EXTENSIONBLACKLIST) { 433 return extension_blacklist_store_.get(); 434 } else if (list_id == safe_browsing_util::SIDEEFFECTFREEWHITELIST) { 435 return side_effect_free_whitelist_store_.get(); 436 } else if (list_id == safe_browsing_util::IPBLACKLIST) { 437 return ip_blacklist_store_.get(); 438 } 439 return NULL; 440 } 441 442 // static 443 void SafeBrowsingDatabase::RecordFailure(FailureType failure_type) { 444 UMA_HISTOGRAM_ENUMERATION("SB2.DatabaseFailure", failure_type, 445 FAILURE_DATABASE_MAX); 446 } 447 448 SafeBrowsingDatabaseNew::SafeBrowsingDatabaseNew() 449 : creation_loop_(base::MessageLoop::current()), 450 browse_store_(new SafeBrowsingStoreFile), 451 corruption_detected_(false), 452 change_detected_(false), 453 reset_factory_(this) { 454 DCHECK(browse_store_.get()); 455 DCHECK(!download_store_.get()); 456 DCHECK(!csd_whitelist_store_.get()); 457 DCHECK(!download_whitelist_store_.get()); 458 DCHECK(!extension_blacklist_store_.get()); 459 DCHECK(!side_effect_free_whitelist_store_.get()); 460 DCHECK(!ip_blacklist_store_.get()); 461 } 462 463 SafeBrowsingDatabaseNew::SafeBrowsingDatabaseNew( 464 SafeBrowsingStore* browse_store, 465 SafeBrowsingStore* download_store, 466 SafeBrowsingStore* csd_whitelist_store, 467 SafeBrowsingStore* download_whitelist_store, 468 SafeBrowsingStore* extension_blacklist_store, 469 SafeBrowsingStore* side_effect_free_whitelist_store, 470 SafeBrowsingStore* ip_blacklist_store) 471 : creation_loop_(base::MessageLoop::current()), 472 browse_store_(browse_store), 473 download_store_(download_store), 474 csd_whitelist_store_(csd_whitelist_store), 475 download_whitelist_store_(download_whitelist_store), 476 extension_blacklist_store_(extension_blacklist_store), 477 side_effect_free_whitelist_store_(side_effect_free_whitelist_store), 478 ip_blacklist_store_(ip_blacklist_store), 479 corruption_detected_(false), 480 reset_factory_(this) { 481 DCHECK(browse_store_.get()); 482 } 483 484 SafeBrowsingDatabaseNew::~SafeBrowsingDatabaseNew() { 485 // The DCHECK is disabled due to crbug.com/338486 . 486 // DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 487 } 488 489 void SafeBrowsingDatabaseNew::Init(const base::FilePath& filename_base) { 490 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 491 492 // This should not be run multiple times. 493 DCHECK(filename_base_.empty()); 494 495 filename_base_ = filename_base; 496 497 // TODO(shess): The various stores are really only necessary while doing 498 // updates, or when querying a store directly (see |ContainsDownloadUrl()|). 499 // The store variables are also tested to see if a list is enabled. Perhaps 500 // the stores could be refactored into an update object so that they are only 501 // live in memory while being actively used. The sense of enabled probably 502 // belongs in protocol_manager or database_manager. 503 504 browse_store_->Init( 505 BrowseDBFilename(filename_base_), 506 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase, 507 base::Unretained(this))); 508 509 { 510 // NOTE: There is no need to grab the lock in this function, since 511 // until it returns, there are no pointers to this class on other 512 // threads. Then again, that means there is no possibility of 513 // contention on the lock... 514 base::AutoLock locked(lookup_lock_); 515 browse_gethash_cache_.clear(); 516 LoadPrefixSet(); 517 } 518 519 if (download_store_.get()) { 520 download_store_->Init( 521 DownloadDBFilename(filename_base_), 522 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase, 523 base::Unretained(this))); 524 } 525 526 if (csd_whitelist_store_.get()) { 527 csd_whitelist_store_->Init( 528 CsdWhitelistDBFilename(filename_base_), 529 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase, 530 base::Unretained(this))); 531 532 std::vector<SBAddFullHash> full_hashes; 533 if (csd_whitelist_store_->GetAddFullHashes(&full_hashes)) { 534 LoadWhitelist(full_hashes, &csd_whitelist_); 535 } else { 536 WhitelistEverything(&csd_whitelist_); 537 } 538 } else { 539 WhitelistEverything(&csd_whitelist_); // Just to be safe. 540 } 541 542 if (download_whitelist_store_.get()) { 543 download_whitelist_store_->Init( 544 DownloadWhitelistDBFilename(filename_base_), 545 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase, 546 base::Unretained(this))); 547 548 std::vector<SBAddFullHash> full_hashes; 549 if (download_whitelist_store_->GetAddFullHashes(&full_hashes)) { 550 LoadWhitelist(full_hashes, &download_whitelist_); 551 } else { 552 WhitelistEverything(&download_whitelist_); 553 } 554 } else { 555 WhitelistEverything(&download_whitelist_); // Just to be safe. 556 } 557 558 if (extension_blacklist_store_.get()) { 559 extension_blacklist_store_->Init( 560 ExtensionBlacklistDBFilename(filename_base_), 561 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase, 562 base::Unretained(this))); 563 } 564 565 if (side_effect_free_whitelist_store_.get()) { 566 const base::FilePath side_effect_free_whitelist_filename = 567 SideEffectFreeWhitelistDBFilename(filename_base_); 568 const base::FilePath side_effect_free_whitelist_prefix_set_filename = 569 PrefixSetForFilename(side_effect_free_whitelist_filename); 570 side_effect_free_whitelist_store_->Init( 571 side_effect_free_whitelist_filename, 572 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase, 573 base::Unretained(this))); 574 575 // Only use the prefix set if database is present and non-empty. 576 if (GetFileSizeOrZero(side_effect_free_whitelist_filename)) { 577 const base::TimeTicks before = base::TimeTicks::Now(); 578 side_effect_free_whitelist_prefix_set_ = 579 safe_browsing::PrefixSet::LoadFile( 580 side_effect_free_whitelist_prefix_set_filename); 581 UMA_HISTOGRAM_TIMES("SB2.SideEffectFreeWhitelistPrefixSetLoad", 582 base::TimeTicks::Now() - before); 583 if (!side_effect_free_whitelist_prefix_set_.get()) 584 RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_READ); 585 } 586 } else { 587 // Delete any files of the side-effect free sidelist that may be around 588 // from when it was previously enabled. 589 SafeBrowsingStoreFile::DeleteStore( 590 SideEffectFreeWhitelistDBFilename(filename_base_)); 591 base::DeleteFile( 592 PrefixSetForFilename(SideEffectFreeWhitelistDBFilename(filename_base_)), 593 false); 594 } 595 596 if (ip_blacklist_store_.get()) { 597 ip_blacklist_store_->Init( 598 IpBlacklistDBFilename(filename_base_), 599 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase, 600 base::Unretained(this))); 601 602 std::vector<SBAddFullHash> full_hashes; 603 if (ip_blacklist_store_->GetAddFullHashes(&full_hashes)) { 604 LoadIpBlacklist(full_hashes); 605 } else { 606 LoadIpBlacklist(std::vector<SBAddFullHash>()); // Clear the list. 607 } 608 } 609 } 610 611 bool SafeBrowsingDatabaseNew::ResetDatabase() { 612 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 613 614 // Delete files on disk. 615 // TODO(shess): Hard to see where one might want to delete without a 616 // reset. Perhaps inline |Delete()|? 617 if (!Delete()) 618 return false; 619 620 // Reset objects in memory. 621 { 622 base::AutoLock locked(lookup_lock_); 623 browse_gethash_cache_.clear(); 624 browse_prefix_set_.reset(); 625 side_effect_free_whitelist_prefix_set_.reset(); 626 ip_blacklist_.clear(); 627 } 628 // Wants to acquire the lock itself. 629 WhitelistEverything(&csd_whitelist_); 630 WhitelistEverything(&download_whitelist_); 631 return true; 632 } 633 634 bool SafeBrowsingDatabaseNew::ContainsBrowseUrl( 635 const GURL& url, 636 std::vector<SBPrefix>* prefix_hits, 637 std::vector<SBFullHashResult>* cache_hits) { 638 // Clear the results first. 639 prefix_hits->clear(); 640 cache_hits->clear(); 641 642 std::vector<SBFullHash> full_hashes; 643 BrowseFullHashesToCheck(url, false, &full_hashes); 644 if (full_hashes.empty()) 645 return false; 646 647 return ContainsBrowseUrlHashes(full_hashes, prefix_hits, cache_hits); 648 } 649 650 bool SafeBrowsingDatabaseNew::ContainsBrowseUrlHashes( 651 const std::vector<SBFullHash>& full_hashes, 652 std::vector<SBPrefix>* prefix_hits, 653 std::vector<SBFullHashResult>* cache_hits) { 654 // Used to determine cache expiration. 655 const base::Time now = base::Time::Now(); 656 657 // This function is called on the I/O thread, prevent changes to 658 // filter and caches. 659 base::AutoLock locked(lookup_lock_); 660 661 // |browse_prefix_set_| is empty until it is either read from disk, or the 662 // first update populates it. Bail out without a hit if not yet 663 // available. 664 if (!browse_prefix_set_.get()) 665 return false; 666 667 for (size_t i = 0; i < full_hashes.size(); ++i) { 668 if (!GetCachedFullHash(&browse_gethash_cache_, 669 full_hashes[i], 670 now, 671 cache_hits)) { 672 // No valid cached result, check the database. 673 if (browse_prefix_set_->Exists(full_hashes[i])) 674 prefix_hits->push_back(full_hashes[i].prefix); 675 } 676 } 677 678 // Multiple full hashes could share prefix, remove duplicates. 679 std::sort(prefix_hits->begin(), prefix_hits->end()); 680 prefix_hits->erase(std::unique(prefix_hits->begin(), prefix_hits->end()), 681 prefix_hits->end()); 682 683 return !prefix_hits->empty() || !cache_hits->empty(); 684 } 685 686 bool SafeBrowsingDatabaseNew::ContainsDownloadUrl( 687 const std::vector<GURL>& urls, 688 std::vector<SBPrefix>* prefix_hits) { 689 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 690 691 // Ignore this check when download checking is not enabled. 692 if (!download_store_.get()) 693 return false; 694 695 std::vector<SBPrefix> prefixes; 696 GetDownloadUrlPrefixes(urls, &prefixes); 697 return MatchAddPrefixes(download_store_.get(), 698 safe_browsing_util::BINURL % 2, 699 prefixes, 700 prefix_hits); 701 } 702 703 bool SafeBrowsingDatabaseNew::ContainsCsdWhitelistedUrl(const GURL& url) { 704 // This method is theoretically thread-safe but we expect all calls to 705 // originate from the IO thread. 706 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 707 std::vector<SBFullHash> full_hashes; 708 BrowseFullHashesToCheck(url, true, &full_hashes); 709 return ContainsWhitelistedHashes(csd_whitelist_, full_hashes); 710 } 711 712 bool SafeBrowsingDatabaseNew::ContainsDownloadWhitelistedUrl(const GURL& url) { 713 std::vector<SBFullHash> full_hashes; 714 BrowseFullHashesToCheck(url, true, &full_hashes); 715 return ContainsWhitelistedHashes(download_whitelist_, full_hashes); 716 } 717 718 bool SafeBrowsingDatabaseNew::ContainsExtensionPrefixes( 719 const std::vector<SBPrefix>& prefixes, 720 std::vector<SBPrefix>* prefix_hits) { 721 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 722 if (!extension_blacklist_store_) 723 return false; 724 725 return MatchAddPrefixes(extension_blacklist_store_.get(), 726 safe_browsing_util::EXTENSIONBLACKLIST % 2, 727 prefixes, 728 prefix_hits); 729 } 730 731 bool SafeBrowsingDatabaseNew::ContainsSideEffectFreeWhitelistUrl( 732 const GURL& url) { 733 std::string host; 734 std::string path; 735 std::string query; 736 safe_browsing_util::CanonicalizeUrl(url, &host, &path, &query); 737 std::string url_to_check = host + path; 738 if (!query.empty()) 739 url_to_check += "?" + query; 740 SBFullHash full_hash = SBFullHashForString(url_to_check); 741 742 // This function can be called on any thread, so lock against any changes 743 base::AutoLock locked(lookup_lock_); 744 745 // |side_effect_free_whitelist_prefix_set_| is empty until it is either read 746 // from disk, or the first update populates it. Bail out without a hit if 747 // not yet available. 748 if (!side_effect_free_whitelist_prefix_set_.get()) 749 return false; 750 751 return side_effect_free_whitelist_prefix_set_->Exists(full_hash); 752 } 753 754 bool SafeBrowsingDatabaseNew::ContainsMalwareIP(const std::string& ip_address) { 755 net::IPAddressNumber ip_number; 756 if (!net::ParseIPLiteralToNumber(ip_address, &ip_number)) 757 return false; 758 if (ip_number.size() == net::kIPv4AddressSize) 759 ip_number = net::ConvertIPv4NumberToIPv6Number(ip_number); 760 if (ip_number.size() != net::kIPv6AddressSize) 761 return false; // better safe than sorry. 762 763 // This function can be called from any thread. 764 base::AutoLock locked(lookup_lock_); 765 for (IPBlacklist::const_iterator it = ip_blacklist_.begin(); 766 it != ip_blacklist_.end(); 767 ++it) { 768 const std::string& mask = it->first; 769 DCHECK_EQ(mask.size(), ip_number.size()); 770 std::string subnet(net::kIPv6AddressSize, '\0'); 771 for (size_t i = 0; i < net::kIPv6AddressSize; ++i) { 772 subnet[i] = ip_number[i] & mask[i]; 773 } 774 const std::string hash = base::SHA1HashString(subnet); 775 DVLOG(2) << "Lookup Malware IP: " 776 << " ip:" << ip_address 777 << " mask:" << base::HexEncode(mask.data(), mask.size()) 778 << " subnet:" << base::HexEncode(subnet.data(), subnet.size()) 779 << " hash:" << base::HexEncode(hash.data(), hash.size()); 780 if (it->second.count(hash) > 0) { 781 return true; 782 } 783 } 784 return false; 785 } 786 787 bool SafeBrowsingDatabaseNew::ContainsDownloadWhitelistedString( 788 const std::string& str) { 789 std::vector<SBFullHash> hashes; 790 hashes.push_back(SBFullHashForString(str)); 791 return ContainsWhitelistedHashes(download_whitelist_, hashes); 792 } 793 794 bool SafeBrowsingDatabaseNew::ContainsWhitelistedHashes( 795 const SBWhitelist& whitelist, 796 const std::vector<SBFullHash>& hashes) { 797 base::AutoLock l(lookup_lock_); 798 if (whitelist.second) 799 return true; 800 for (std::vector<SBFullHash>::const_iterator it = hashes.begin(); 801 it != hashes.end(); ++it) { 802 if (std::binary_search(whitelist.first.begin(), whitelist.first.end(), 803 *it, SBFullHashLess)) { 804 return true; 805 } 806 } 807 return false; 808 } 809 810 // Helper to insert add-chunk entries. 811 void SafeBrowsingDatabaseNew::InsertAddChunk( 812 SafeBrowsingStore* store, 813 const safe_browsing_util::ListType list_id, 814 const SBChunkData& chunk_data) { 815 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 816 DCHECK(store); 817 818 // The server can give us a chunk that we already have because 819 // it's part of a range. Don't add it again. 820 const int chunk_id = chunk_data.ChunkNumber(); 821 const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id); 822 if (store->CheckAddChunk(encoded_chunk_id)) 823 return; 824 825 store->SetAddChunk(encoded_chunk_id); 826 if (chunk_data.IsPrefix()) { 827 const size_t c = chunk_data.PrefixCount(); 828 for (size_t i = 0; i < c; ++i) { 829 STATS_COUNTER("SB.PrefixAdd", 1); 830 store->WriteAddPrefix(encoded_chunk_id, chunk_data.PrefixAt(i)); 831 } 832 } else { 833 const size_t c = chunk_data.FullHashCount(); 834 for (size_t i = 0; i < c; ++i) { 835 STATS_COUNTER("SB.PrefixAddFull", 1); 836 store->WriteAddHash(encoded_chunk_id, chunk_data.FullHashAt(i)); 837 } 838 } 839 } 840 841 // Helper to insert sub-chunk entries. 842 void SafeBrowsingDatabaseNew::InsertSubChunk( 843 SafeBrowsingStore* store, 844 const safe_browsing_util::ListType list_id, 845 const SBChunkData& chunk_data) { 846 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 847 DCHECK(store); 848 849 // The server can give us a chunk that we already have because 850 // it's part of a range. Don't add it again. 851 const int chunk_id = chunk_data.ChunkNumber(); 852 const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id); 853 if (store->CheckSubChunk(encoded_chunk_id)) 854 return; 855 856 store->SetSubChunk(encoded_chunk_id); 857 if (chunk_data.IsPrefix()) { 858 const size_t c = chunk_data.PrefixCount(); 859 for (size_t i = 0; i < c; ++i) { 860 STATS_COUNTER("SB.PrefixSub", 1); 861 const int add_chunk_id = chunk_data.AddChunkNumberAt(i); 862 const int encoded_add_chunk_id = EncodeChunkId(add_chunk_id, list_id); 863 store->WriteSubPrefix(encoded_chunk_id, encoded_add_chunk_id, 864 chunk_data.PrefixAt(i)); 865 } 866 } else { 867 const size_t c = chunk_data.FullHashCount(); 868 for (size_t i = 0; i < c; ++i) { 869 STATS_COUNTER("SB.PrefixSubFull", 1); 870 const int add_chunk_id = chunk_data.AddChunkNumberAt(i); 871 const int encoded_add_chunk_id = EncodeChunkId(add_chunk_id, list_id); 872 store->WriteSubHash(encoded_chunk_id, encoded_add_chunk_id, 873 chunk_data.FullHashAt(i)); 874 } 875 } 876 } 877 878 void SafeBrowsingDatabaseNew::InsertChunks( 879 const std::string& list_name, 880 const std::vector<SBChunkData*>& chunks) { 881 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 882 883 if (corruption_detected_ || chunks.empty()) 884 return; 885 886 const base::TimeTicks before = base::TimeTicks::Now(); 887 888 // TODO(shess): The caller should just pass list_id. 889 const safe_browsing_util::ListType list_id = 890 safe_browsing_util::GetListId(list_name); 891 892 SafeBrowsingStore* store = GetStore(list_id); 893 if (!store) return; 894 895 change_detected_ = true; 896 897 // TODO(shess): I believe that the list is always add or sub. Can this use 898 // that productively? 899 store->BeginChunk(); 900 for (size_t i = 0; i < chunks.size(); ++i) { 901 if (chunks[i]->IsAdd()) { 902 InsertAddChunk(store, list_id, *chunks[i]); 903 } else if (chunks[i]->IsSub()) { 904 InsertSubChunk(store, list_id, *chunks[i]); 905 } else { 906 NOTREACHED(); 907 } 908 } 909 store->FinishChunk(); 910 911 UMA_HISTOGRAM_TIMES("SB2.ChunkInsert", base::TimeTicks::Now() - before); 912 } 913 914 void SafeBrowsingDatabaseNew::DeleteChunks( 915 const std::vector<SBChunkDelete>& chunk_deletes) { 916 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 917 918 if (corruption_detected_ || chunk_deletes.empty()) 919 return; 920 921 const std::string& list_name = chunk_deletes.front().list_name; 922 const safe_browsing_util::ListType list_id = 923 safe_browsing_util::GetListId(list_name); 924 925 SafeBrowsingStore* store = GetStore(list_id); 926 if (!store) return; 927 928 change_detected_ = true; 929 930 for (size_t i = 0; i < chunk_deletes.size(); ++i) { 931 std::vector<int> chunk_numbers; 932 RangesToChunks(chunk_deletes[i].chunk_del, &chunk_numbers); 933 for (size_t j = 0; j < chunk_numbers.size(); ++j) { 934 const int encoded_chunk_id = EncodeChunkId(chunk_numbers[j], list_id); 935 if (chunk_deletes[i].is_sub_del) 936 store->DeleteSubChunk(encoded_chunk_id); 937 else 938 store->DeleteAddChunk(encoded_chunk_id); 939 } 940 } 941 } 942 943 void SafeBrowsingDatabaseNew::CacheHashResults( 944 const std::vector<SBPrefix>& prefixes, 945 const std::vector<SBFullHashResult>& full_hits, 946 const base::TimeDelta& cache_lifetime) { 947 const base::Time expire_after = base::Time::Now() + cache_lifetime; 948 949 // This is called on the I/O thread, lock against updates. 950 base::AutoLock locked(lookup_lock_); 951 952 // Create or reset all cached results for these prefixes. 953 for (size_t i = 0; i < prefixes.size(); ++i) { 954 browse_gethash_cache_[prefixes[i]] = SBCachedFullHashResult(expire_after); 955 } 956 957 // Insert any fullhash hits. Note that there may be one, multiple, or no 958 // fullhashes for any given entry in |prefixes|. 959 for (size_t i = 0; i < full_hits.size(); ++i) { 960 const SBPrefix prefix = full_hits[i].hash.prefix; 961 browse_gethash_cache_[prefix].full_hashes.push_back(full_hits[i]); 962 } 963 } 964 965 bool SafeBrowsingDatabaseNew::UpdateStarted( 966 std::vector<SBListChunkRanges>* lists) { 967 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 968 DCHECK(lists); 969 970 // If |BeginUpdate()| fails, reset the database. 971 if (!browse_store_->BeginUpdate()) { 972 RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_BEGIN); 973 HandleCorruptDatabase(); 974 return false; 975 } 976 977 if (download_store_.get() && !download_store_->BeginUpdate()) { 978 RecordFailure(FAILURE_DOWNLOAD_DATABASE_UPDATE_BEGIN); 979 HandleCorruptDatabase(); 980 return false; 981 } 982 983 if (csd_whitelist_store_.get() && !csd_whitelist_store_->BeginUpdate()) { 984 RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_BEGIN); 985 HandleCorruptDatabase(); 986 return false; 987 } 988 989 if (download_whitelist_store_.get() && 990 !download_whitelist_store_->BeginUpdate()) { 991 RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_BEGIN); 992 HandleCorruptDatabase(); 993 return false; 994 } 995 996 if (extension_blacklist_store_ && 997 !extension_blacklist_store_->BeginUpdate()) { 998 RecordFailure(FAILURE_EXTENSION_BLACKLIST_UPDATE_BEGIN); 999 HandleCorruptDatabase(); 1000 return false; 1001 } 1002 1003 if (side_effect_free_whitelist_store_ && 1004 !side_effect_free_whitelist_store_->BeginUpdate()) { 1005 RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_BEGIN); 1006 HandleCorruptDatabase(); 1007 return false; 1008 } 1009 1010 if (ip_blacklist_store_ && !ip_blacklist_store_->BeginUpdate()) { 1011 RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_BEGIN); 1012 HandleCorruptDatabase(); 1013 return false; 1014 } 1015 1016 { 1017 base::AutoLock locked(lookup_lock_); 1018 // Cached fullhash results must be cleared on every database update (whether 1019 // successful or not.) 1020 browse_gethash_cache_.clear(); 1021 } 1022 1023 UpdateChunkRangesForLists(browse_store_.get(), 1024 safe_browsing_util::kMalwareList, 1025 safe_browsing_util::kPhishingList, 1026 lists); 1027 1028 // NOTE(shess): |download_store_| used to contain kBinHashList, which has been 1029 // deprecated. Code to delete the list from the store shows ~15k hits/day as 1030 // of Feb 2014, so it has been removed. Everything _should_ be resilient to 1031 // extra data of that sort. 1032 UpdateChunkRangesForList(download_store_.get(), 1033 safe_browsing_util::kBinUrlList, lists); 1034 1035 UpdateChunkRangesForList(csd_whitelist_store_.get(), 1036 safe_browsing_util::kCsdWhiteList, lists); 1037 1038 UpdateChunkRangesForList(download_whitelist_store_.get(), 1039 safe_browsing_util::kDownloadWhiteList, lists); 1040 1041 UpdateChunkRangesForList(extension_blacklist_store_.get(), 1042 safe_browsing_util::kExtensionBlacklist, lists); 1043 1044 UpdateChunkRangesForList(side_effect_free_whitelist_store_.get(), 1045 safe_browsing_util::kSideEffectFreeWhitelist, lists); 1046 1047 UpdateChunkRangesForList(ip_blacklist_store_.get(), 1048 safe_browsing_util::kIPBlacklist, lists); 1049 1050 corruption_detected_ = false; 1051 change_detected_ = false; 1052 return true; 1053 } 1054 1055 void SafeBrowsingDatabaseNew::UpdateFinished(bool update_succeeded) { 1056 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 1057 1058 // The update may have failed due to corrupt storage (for instance, 1059 // an excessive number of invalid add_chunks and sub_chunks). 1060 // Double-check that the databases are valid. 1061 // TODO(shess): Providing a checksum for the add_chunk and sub_chunk 1062 // sections would allow throwing a corruption error in 1063 // UpdateStarted(). 1064 if (!update_succeeded) { 1065 if (!browse_store_->CheckValidity()) 1066 DLOG(ERROR) << "Safe-browsing browse database corrupt."; 1067 1068 if (download_store_.get() && !download_store_->CheckValidity()) 1069 DLOG(ERROR) << "Safe-browsing download database corrupt."; 1070 1071 if (csd_whitelist_store_.get() && !csd_whitelist_store_->CheckValidity()) 1072 DLOG(ERROR) << "Safe-browsing csd whitelist database corrupt."; 1073 1074 if (download_whitelist_store_.get() && 1075 !download_whitelist_store_->CheckValidity()) { 1076 DLOG(ERROR) << "Safe-browsing download whitelist database corrupt."; 1077 } 1078 1079 if (extension_blacklist_store_ && 1080 !extension_blacklist_store_->CheckValidity()) { 1081 DLOG(ERROR) << "Safe-browsing extension blacklist database corrupt."; 1082 } 1083 1084 if (side_effect_free_whitelist_store_ && 1085 !side_effect_free_whitelist_store_->CheckValidity()) { 1086 DLOG(ERROR) << "Safe-browsing side-effect free whitelist database " 1087 << "corrupt."; 1088 } 1089 1090 if (ip_blacklist_store_ && !ip_blacklist_store_->CheckValidity()) { 1091 DLOG(ERROR) << "Safe-browsing IP blacklist database corrupt."; 1092 } 1093 } 1094 1095 if (corruption_detected_) 1096 return; 1097 1098 // Unroll the transaction if there was a protocol error or if the 1099 // transaction was empty. This will leave the prefix set, the 1100 // pending hashes, and the prefix miss cache in place. 1101 if (!update_succeeded || !change_detected_) { 1102 // Track empty updates to answer questions at http://crbug.com/72216 . 1103 if (update_succeeded && !change_detected_) 1104 UMA_HISTOGRAM_COUNTS("SB2.DatabaseUpdateKilobytes", 0); 1105 browse_store_->CancelUpdate(); 1106 if (download_store_.get()) 1107 download_store_->CancelUpdate(); 1108 if (csd_whitelist_store_.get()) 1109 csd_whitelist_store_->CancelUpdate(); 1110 if (download_whitelist_store_.get()) 1111 download_whitelist_store_->CancelUpdate(); 1112 if (extension_blacklist_store_) 1113 extension_blacklist_store_->CancelUpdate(); 1114 if (side_effect_free_whitelist_store_) 1115 side_effect_free_whitelist_store_->CancelUpdate(); 1116 if (ip_blacklist_store_) 1117 ip_blacklist_store_->CancelUpdate(); 1118 return; 1119 } 1120 1121 if (download_store_) { 1122 int64 size_bytes = UpdateHashPrefixStore( 1123 DownloadDBFilename(filename_base_), 1124 download_store_.get(), 1125 FAILURE_DOWNLOAD_DATABASE_UPDATE_FINISH); 1126 UMA_HISTOGRAM_COUNTS("SB2.DownloadDatabaseKilobytes", 1127 static_cast<int>(size_bytes / 1024)); 1128 } 1129 1130 UpdateBrowseStore(); 1131 UpdateWhitelistStore(CsdWhitelistDBFilename(filename_base_), 1132 csd_whitelist_store_.get(), 1133 &csd_whitelist_); 1134 UpdateWhitelistStore(DownloadWhitelistDBFilename(filename_base_), 1135 download_whitelist_store_.get(), 1136 &download_whitelist_); 1137 1138 if (extension_blacklist_store_) { 1139 int64 size_bytes = UpdateHashPrefixStore( 1140 ExtensionBlacklistDBFilename(filename_base_), 1141 extension_blacklist_store_.get(), 1142 FAILURE_EXTENSION_BLACKLIST_UPDATE_FINISH); 1143 UMA_HISTOGRAM_COUNTS("SB2.ExtensionBlacklistKilobytes", 1144 static_cast<int>(size_bytes / 1024)); 1145 } 1146 1147 if (side_effect_free_whitelist_store_) 1148 UpdateSideEffectFreeWhitelistStore(); 1149 1150 if (ip_blacklist_store_) 1151 UpdateIpBlacklistStore(); 1152 } 1153 1154 void SafeBrowsingDatabaseNew::UpdateWhitelistStore( 1155 const base::FilePath& store_filename, 1156 SafeBrowsingStore* store, 1157 SBWhitelist* whitelist) { 1158 if (!store) 1159 return; 1160 1161 // Note: |builder| will not be empty. The current data store implementation 1162 // stores all full-length hashes as both full and prefix hashes. 1163 safe_browsing::PrefixSetBuilder builder; 1164 std::vector<SBAddFullHash> full_hashes; 1165 if (!store->FinishUpdate(&builder, &full_hashes)) { 1166 RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_FINISH); 1167 WhitelistEverything(whitelist); 1168 return; 1169 } 1170 1171 #if defined(OS_MACOSX) 1172 base::mac::SetFileBackupExclusion(store_filename); 1173 #endif 1174 1175 LoadWhitelist(full_hashes, whitelist); 1176 } 1177 1178 int64 SafeBrowsingDatabaseNew::UpdateHashPrefixStore( 1179 const base::FilePath& store_filename, 1180 SafeBrowsingStore* store, 1181 FailureType failure_type) { 1182 // These results are not used after this call. Simply ignore the 1183 // returned value after FinishUpdate(...). 1184 safe_browsing::PrefixSetBuilder builder; 1185 std::vector<SBAddFullHash> add_full_hashes_result; 1186 1187 if (!store->FinishUpdate(&builder, &add_full_hashes_result)) 1188 RecordFailure(failure_type); 1189 1190 #if defined(OS_MACOSX) 1191 base::mac::SetFileBackupExclusion(store_filename); 1192 #endif 1193 1194 return GetFileSizeOrZero(store_filename); 1195 } 1196 1197 void SafeBrowsingDatabaseNew::UpdateBrowseStore() { 1198 // Measure the amount of IO during the filter build. 1199 base::IoCounters io_before, io_after; 1200 base::ProcessHandle handle = base::Process::Current().handle(); 1201 scoped_ptr<base::ProcessMetrics> metric( 1202 #if !defined(OS_MACOSX) 1203 base::ProcessMetrics::CreateProcessMetrics(handle) 1204 #else 1205 // Getting stats only for the current process is enough, so NULL is fine. 1206 base::ProcessMetrics::CreateProcessMetrics(handle, NULL) 1207 #endif 1208 ); 1209 1210 // IoCounters are currently not supported on Mac, and may not be 1211 // available for Linux, so we check the result and only show IO 1212 // stats if they are available. 1213 const bool got_counters = metric->GetIOCounters(&io_before); 1214 1215 const base::TimeTicks before = base::TimeTicks::Now(); 1216 1217 // TODO(shess): Perhaps refactor to let builder accumulate full hashes on the 1218 // fly? Other clients use the SBAddFullHash vector, but AFAICT they only use 1219 // the SBFullHash portion. It would need an accessor on PrefixSet. 1220 safe_browsing::PrefixSetBuilder builder; 1221 std::vector<SBAddFullHash> add_full_hashes; 1222 if (!browse_store_->FinishUpdate(&builder, &add_full_hashes)) { 1223 RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_FINISH); 1224 return; 1225 } 1226 1227 std::vector<SBFullHash> full_hash_results; 1228 for (size_t i = 0; i < add_full_hashes.size(); ++i) { 1229 full_hash_results.push_back(add_full_hashes[i].full_hash); 1230 } 1231 1232 scoped_ptr<safe_browsing::PrefixSet> 1233 prefix_set(builder.GetPrefixSet(full_hash_results)); 1234 1235 // Swap in the newly built filter. 1236 { 1237 base::AutoLock locked(lookup_lock_); 1238 browse_prefix_set_.swap(prefix_set); 1239 } 1240 1241 UMA_HISTOGRAM_LONG_TIMES("SB2.BuildFilter", base::TimeTicks::Now() - before); 1242 1243 // Persist the prefix set to disk. Since only this thread changes 1244 // |browse_prefix_set_|, there is no need to lock. 1245 WritePrefixSet(); 1246 1247 // Gather statistics. 1248 if (got_counters && metric->GetIOCounters(&io_after)) { 1249 UMA_HISTOGRAM_COUNTS("SB2.BuildReadKilobytes", 1250 static_cast<int>(io_after.ReadTransferCount - 1251 io_before.ReadTransferCount) / 1024); 1252 UMA_HISTOGRAM_COUNTS("SB2.BuildWriteKilobytes", 1253 static_cast<int>(io_after.WriteTransferCount - 1254 io_before.WriteTransferCount) / 1024); 1255 UMA_HISTOGRAM_COUNTS("SB2.BuildReadOperations", 1256 static_cast<int>(io_after.ReadOperationCount - 1257 io_before.ReadOperationCount)); 1258 UMA_HISTOGRAM_COUNTS("SB2.BuildWriteOperations", 1259 static_cast<int>(io_after.WriteOperationCount - 1260 io_before.WriteOperationCount)); 1261 } 1262 1263 const base::FilePath browse_filename = BrowseDBFilename(filename_base_); 1264 const int64 file_size = GetFileSizeOrZero(browse_filename); 1265 UMA_HISTOGRAM_COUNTS("SB2.BrowseDatabaseKilobytes", 1266 static_cast<int>(file_size / 1024)); 1267 1268 #if defined(OS_MACOSX) 1269 base::mac::SetFileBackupExclusion(browse_filename); 1270 #endif 1271 } 1272 1273 void SafeBrowsingDatabaseNew::UpdateSideEffectFreeWhitelistStore() { 1274 safe_browsing::PrefixSetBuilder builder; 1275 std::vector<SBAddFullHash> add_full_hashes_result; 1276 1277 if (!side_effect_free_whitelist_store_->FinishUpdate( 1278 &builder, &add_full_hashes_result)) { 1279 RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_FINISH); 1280 return; 1281 } 1282 scoped_ptr<safe_browsing::PrefixSet> 1283 prefix_set(builder.GetPrefixSetNoHashes()); 1284 1285 // Swap in the newly built prefix set. 1286 { 1287 base::AutoLock locked(lookup_lock_); 1288 side_effect_free_whitelist_prefix_set_.swap(prefix_set); 1289 } 1290 1291 const base::FilePath side_effect_free_whitelist_filename = 1292 SideEffectFreeWhitelistDBFilename(filename_base_); 1293 const base::FilePath side_effect_free_whitelist_prefix_set_filename = 1294 PrefixSetForFilename(side_effect_free_whitelist_filename); 1295 const base::TimeTicks before = base::TimeTicks::Now(); 1296 const bool write_ok = side_effect_free_whitelist_prefix_set_->WriteFile( 1297 side_effect_free_whitelist_prefix_set_filename); 1298 UMA_HISTOGRAM_TIMES("SB2.SideEffectFreePrefixSetWrite", 1299 base::TimeTicks::Now() - before); 1300 1301 if (!write_ok) 1302 RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_WRITE); 1303 1304 // Gather statistics. 1305 int64 file_size = GetFileSizeOrZero( 1306 side_effect_free_whitelist_prefix_set_filename); 1307 UMA_HISTOGRAM_COUNTS("SB2.SideEffectFreeWhitelistPrefixSetKilobytes", 1308 static_cast<int>(file_size / 1024)); 1309 file_size = GetFileSizeOrZero(side_effect_free_whitelist_filename); 1310 UMA_HISTOGRAM_COUNTS("SB2.SideEffectFreeWhitelistDatabaseKilobytes", 1311 static_cast<int>(file_size / 1024)); 1312 1313 #if defined(OS_MACOSX) 1314 base::mac::SetFileBackupExclusion(side_effect_free_whitelist_filename); 1315 base::mac::SetFileBackupExclusion( 1316 side_effect_free_whitelist_prefix_set_filename); 1317 #endif 1318 } 1319 1320 void SafeBrowsingDatabaseNew::UpdateIpBlacklistStore() { 1321 // Note: prefixes will not be empty. The current data store implementation 1322 // stores all full-length hashes as both full and prefix hashes. 1323 safe_browsing::PrefixSetBuilder builder; 1324 std::vector<SBAddFullHash> full_hashes; 1325 if (!ip_blacklist_store_->FinishUpdate(&builder, &full_hashes)) { 1326 RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_FINISH); 1327 LoadIpBlacklist(std::vector<SBAddFullHash>()); // Clear the list. 1328 return; 1329 } 1330 1331 #if defined(OS_MACOSX) 1332 base::mac::SetFileBackupExclusion(IpBlacklistDBFilename(filename_base_)); 1333 #endif 1334 1335 LoadIpBlacklist(full_hashes); 1336 } 1337 1338 void SafeBrowsingDatabaseNew::HandleCorruptDatabase() { 1339 // Reset the database after the current task has unwound (but only 1340 // reset once within the scope of a given task). 1341 if (!reset_factory_.HasWeakPtrs()) { 1342 RecordFailure(FAILURE_DATABASE_CORRUPT); 1343 base::MessageLoop::current()->PostTask(FROM_HERE, 1344 base::Bind(&SafeBrowsingDatabaseNew::OnHandleCorruptDatabase, 1345 reset_factory_.GetWeakPtr())); 1346 } 1347 } 1348 1349 void SafeBrowsingDatabaseNew::OnHandleCorruptDatabase() { 1350 RecordFailure(FAILURE_DATABASE_CORRUPT_HANDLER); 1351 corruption_detected_ = true; // Stop updating the database. 1352 ResetDatabase(); 1353 1354 // NOTE(shess): ResetDatabase() should remove the corruption, so this should 1355 // only happen once. If you are here because you are hitting this after a 1356 // restart, then I would be very interested in working with you to figure out 1357 // what is happening, since it may affect real users. 1358 DLOG(FATAL) << "SafeBrowsing database was corrupt and reset"; 1359 } 1360 1361 // TODO(shess): I'm not clear why this code doesn't have any 1362 // real error-handling. 1363 void SafeBrowsingDatabaseNew::LoadPrefixSet() { 1364 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 1365 DCHECK(!filename_base_.empty()); 1366 1367 const base::FilePath browse_filename = BrowseDBFilename(filename_base_); 1368 const base::FilePath browse_prefix_set_filename = 1369 PrefixSetForFilename(browse_filename); 1370 1371 // Only use the prefix set if database is present and non-empty. 1372 if (!GetFileSizeOrZero(browse_filename)) 1373 return; 1374 1375 // Cleanup any stale bloom filter (no longer used). 1376 // TODO(shess): Track existence to drive removal of this code? 1377 const base::FilePath bloom_filter_filename = 1378 BloomFilterForFilename(browse_filename); 1379 base::DeleteFile(bloom_filter_filename, false); 1380 1381 const base::TimeTicks before = base::TimeTicks::Now(); 1382 browse_prefix_set_ = safe_browsing::PrefixSet::LoadFile( 1383 browse_prefix_set_filename); 1384 UMA_HISTOGRAM_TIMES("SB2.PrefixSetLoad", base::TimeTicks::Now() - before); 1385 1386 if (!browse_prefix_set_.get()) 1387 RecordFailure(FAILURE_BROWSE_PREFIX_SET_READ); 1388 } 1389 1390 bool SafeBrowsingDatabaseNew::Delete() { 1391 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 1392 DCHECK(!filename_base_.empty()); 1393 1394 // TODO(shess): This is a mess. SafeBrowsingFileStore::Delete() closes the 1395 // store before calling DeleteStore(). DeleteStore() deletes transient files 1396 // in addition to the main file. Probably all of these should be converted to 1397 // a helper which calls Delete() if the store exists, else DeleteStore() on 1398 // the generated filename. 1399 1400 // TODO(shess): Determine if the histograms are useful in any way. I cannot 1401 // recall any action taken as a result of their values, in which case it might 1402 // make more sense to histogram an overall thumbs-up/-down and just dig deeper 1403 // if something looks wrong. 1404 1405 const bool r1 = browse_store_->Delete(); 1406 if (!r1) 1407 RecordFailure(FAILURE_DATABASE_STORE_DELETE); 1408 1409 const bool r2 = download_store_.get() ? download_store_->Delete() : true; 1410 if (!r2) 1411 RecordFailure(FAILURE_DATABASE_STORE_DELETE); 1412 1413 const bool r3 = csd_whitelist_store_.get() ? 1414 csd_whitelist_store_->Delete() : true; 1415 if (!r3) 1416 RecordFailure(FAILURE_DATABASE_STORE_DELETE); 1417 1418 const bool r4 = download_whitelist_store_.get() ? 1419 download_whitelist_store_->Delete() : true; 1420 if (!r4) 1421 RecordFailure(FAILURE_DATABASE_STORE_DELETE); 1422 1423 const base::FilePath browse_filename = BrowseDBFilename(filename_base_); 1424 const base::FilePath bloom_filter_filename = 1425 BloomFilterForFilename(browse_filename); 1426 const bool r5 = base::DeleteFile(bloom_filter_filename, false); 1427 if (!r5) 1428 RecordFailure(FAILURE_DATABASE_FILTER_DELETE); 1429 1430 const base::FilePath browse_prefix_set_filename = 1431 PrefixSetForFilename(browse_filename); 1432 const bool r6 = base::DeleteFile(browse_prefix_set_filename, false); 1433 if (!r6) 1434 RecordFailure(FAILURE_BROWSE_PREFIX_SET_DELETE); 1435 1436 const base::FilePath extension_blacklist_filename = 1437 ExtensionBlacklistDBFilename(filename_base_); 1438 const bool r7 = base::DeleteFile(extension_blacklist_filename, false); 1439 if (!r7) 1440 RecordFailure(FAILURE_EXTENSION_BLACKLIST_DELETE); 1441 1442 const base::FilePath side_effect_free_whitelist_filename = 1443 SideEffectFreeWhitelistDBFilename(filename_base_); 1444 const bool r8 = base::DeleteFile(side_effect_free_whitelist_filename, 1445 false); 1446 if (!r8) 1447 RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_DELETE); 1448 1449 const base::FilePath side_effect_free_whitelist_prefix_set_filename = 1450 PrefixSetForFilename(side_effect_free_whitelist_filename); 1451 const bool r9 = base::DeleteFile( 1452 side_effect_free_whitelist_prefix_set_filename, 1453 false); 1454 if (!r9) 1455 RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_DELETE); 1456 1457 const bool r10 = base::DeleteFile(IpBlacklistDBFilename(filename_base_), 1458 false); 1459 if (!r10) 1460 RecordFailure(FAILURE_IP_BLACKLIST_DELETE); 1461 1462 return r1 && r2 && r3 && r4 && r5 && r6 && r7 && r8 && r9 && r10; 1463 } 1464 1465 void SafeBrowsingDatabaseNew::WritePrefixSet() { 1466 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 1467 1468 if (!browse_prefix_set_.get()) 1469 return; 1470 1471 const base::FilePath browse_filename = BrowseDBFilename(filename_base_); 1472 const base::FilePath browse_prefix_set_filename = 1473 PrefixSetForFilename(browse_filename); 1474 1475 const base::TimeTicks before = base::TimeTicks::Now(); 1476 const bool write_ok = browse_prefix_set_->WriteFile( 1477 browse_prefix_set_filename); 1478 UMA_HISTOGRAM_TIMES("SB2.PrefixSetWrite", base::TimeTicks::Now() - before); 1479 1480 const int64 file_size = GetFileSizeOrZero(browse_prefix_set_filename); 1481 UMA_HISTOGRAM_COUNTS("SB2.PrefixSetKilobytes", 1482 static_cast<int>(file_size / 1024)); 1483 1484 if (!write_ok) 1485 RecordFailure(FAILURE_BROWSE_PREFIX_SET_WRITE); 1486 1487 #if defined(OS_MACOSX) 1488 base::mac::SetFileBackupExclusion(browse_prefix_set_filename); 1489 #endif 1490 } 1491 1492 void SafeBrowsingDatabaseNew::WhitelistEverything(SBWhitelist* whitelist) { 1493 base::AutoLock locked(lookup_lock_); 1494 whitelist->second = true; 1495 whitelist->first.clear(); 1496 } 1497 1498 void SafeBrowsingDatabaseNew::LoadWhitelist( 1499 const std::vector<SBAddFullHash>& full_hashes, 1500 SBWhitelist* whitelist) { 1501 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 1502 if (full_hashes.size() > kMaxWhitelistSize) { 1503 WhitelistEverything(whitelist); 1504 return; 1505 } 1506 1507 std::vector<SBFullHash> new_whitelist; 1508 new_whitelist.reserve(full_hashes.size()); 1509 for (std::vector<SBAddFullHash>::const_iterator it = full_hashes.begin(); 1510 it != full_hashes.end(); ++it) { 1511 new_whitelist.push_back(it->full_hash); 1512 } 1513 std::sort(new_whitelist.begin(), new_whitelist.end(), SBFullHashLess); 1514 1515 SBFullHash kill_switch = SBFullHashForString(kWhitelistKillSwitchUrl); 1516 if (std::binary_search(new_whitelist.begin(), new_whitelist.end(), 1517 kill_switch, SBFullHashLess)) { 1518 // The kill switch is whitelisted hence we whitelist all URLs. 1519 WhitelistEverything(whitelist); 1520 } else { 1521 base::AutoLock locked(lookup_lock_); 1522 whitelist->second = false; 1523 whitelist->first.swap(new_whitelist); 1524 } 1525 } 1526 1527 void SafeBrowsingDatabaseNew::LoadIpBlacklist( 1528 const std::vector<SBAddFullHash>& full_hashes) { 1529 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 1530 IPBlacklist new_blacklist; 1531 for (std::vector<SBAddFullHash>::const_iterator it = full_hashes.begin(); 1532 it != full_hashes.end(); 1533 ++it) { 1534 const char* full_hash = it->full_hash.full_hash; 1535 DCHECK_EQ(crypto::kSHA256Length, arraysize(it->full_hash.full_hash)); 1536 // The format of the IP blacklist is: 1537 // SHA-1(IPv6 prefix) + uint8(prefix size) + 11 unused bytes. 1538 std::string hashed_ip_prefix(full_hash, base::kSHA1Length); 1539 size_t prefix_size = static_cast<uint8>(full_hash[base::kSHA1Length]); 1540 if (prefix_size > kMaxIpPrefixSize || prefix_size < kMinIpPrefixSize) { 1541 RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_INVALID); 1542 new_blacklist.clear(); // Load empty blacklist. 1543 break; 1544 } 1545 1546 // We precompute the mask for the given subnet size to speed up lookups. 1547 // Basically we need to create a 16B long string which has the highest 1548 // |size| bits sets to one. 1549 std::string mask(net::kIPv6AddressSize, '\0'); 1550 mask.replace(0, prefix_size / 8, prefix_size / 8, '\xFF'); 1551 if ((prefix_size % 8) != 0) { 1552 mask[prefix_size / 8] = 0xFF << (8 - (prefix_size % 8)); 1553 } 1554 DVLOG(2) << "Inserting malicious IP: " 1555 << " raw:" << base::HexEncode(full_hash, crypto::kSHA256Length) 1556 << " mask:" << base::HexEncode(mask.data(), mask.size()) 1557 << " prefix_size:" << prefix_size 1558 << " hashed_ip:" << base::HexEncode(hashed_ip_prefix.data(), 1559 hashed_ip_prefix.size()); 1560 new_blacklist[mask].insert(hashed_ip_prefix); 1561 } 1562 1563 base::AutoLock locked(lookup_lock_); 1564 ip_blacklist_.swap(new_blacklist); 1565 } 1566 1567 bool SafeBrowsingDatabaseNew::IsMalwareIPMatchKillSwitchOn() { 1568 SBFullHash malware_kill_switch = SBFullHashForString(kMalwareIPKillSwitchUrl); 1569 std::vector<SBFullHash> full_hashes; 1570 full_hashes.push_back(malware_kill_switch); 1571 return ContainsWhitelistedHashes(csd_whitelist_, full_hashes); 1572 } 1573 1574 bool SafeBrowsingDatabaseNew::IsCsdWhitelistKillSwitchOn() { 1575 return csd_whitelist_.second; 1576 } 1577