1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "chrome/browser/safe_browsing/safe_browsing_database.h" 6 7 #include <algorithm> 8 #include <iterator> 9 10 #include "base/bind.h" 11 #include "base/file_util.h" 12 #include "base/message_loop/message_loop.h" 13 #include "base/metrics/histogram.h" 14 #include "base/metrics/stats_counters.h" 15 #include "base/process/process.h" 16 #include "base/process/process_metrics.h" 17 #include "base/sha1.h" 18 #include "base/strings/string_number_conversions.h" 19 #include "base/strings/stringprintf.h" 20 #include "base/time/time.h" 21 #include "chrome/browser/safe_browsing/prefix_set.h" 22 #include "chrome/browser/safe_browsing/safe_browsing_store_file.h" 23 #include "content/public/browser/browser_thread.h" 24 #include "crypto/sha2.h" 25 #include "net/base/net_util.h" 26 #include "url/gurl.h" 27 28 #if defined(OS_MACOSX) 29 #include "base/mac/mac_util.h" 30 #endif 31 32 using content::BrowserThread; 33 34 namespace { 35 36 // Filename suffix for the bloom filter. 37 const base::FilePath::CharType kBloomFilterFile[] = 38 FILE_PATH_LITERAL(" Filter 2"); 39 // Filename suffix for the prefix set. 40 const base::FilePath::CharType kPrefixSetFile[] = 41 FILE_PATH_LITERAL(" Prefix Set"); 42 // Filename suffix for download store. 43 const base::FilePath::CharType kDownloadDBFile[] = 44 FILE_PATH_LITERAL(" Download"); 45 // Filename suffix for client-side phishing detection whitelist store. 46 const base::FilePath::CharType kCsdWhitelistDBFile[] = 47 FILE_PATH_LITERAL(" Csd Whitelist"); 48 // Filename suffix for the download whitelist store. 49 const base::FilePath::CharType kDownloadWhitelistDBFile[] = 50 FILE_PATH_LITERAL(" Download Whitelist"); 51 // Filename suffix for the extension blacklist store. 52 const base::FilePath::CharType kExtensionBlacklistDBFile[] = 53 FILE_PATH_LITERAL(" Extension Blacklist"); 54 // Filename suffix for the side-effect free whitelist store. 55 const base::FilePath::CharType kSideEffectFreeWhitelistDBFile[] = 56 FILE_PATH_LITERAL(" Side-Effect Free Whitelist"); 57 // Filename suffix for the csd malware IP blacklist store. 58 const base::FilePath::CharType kIPBlacklistDBFile[] = 59 FILE_PATH_LITERAL(" IP Blacklist"); 60 61 // Filename suffix for browse store. 62 // TODO(shess): "Safe Browsing Bloom Prefix Set" is full of win. 63 // Unfortunately, to change the name implies lots of transition code 64 // for little benefit. If/when file formats change (say to put all 65 // the data in one file), that would be a convenient point to rectify 66 // this. 67 const base::FilePath::CharType kBrowseDBFile[] = FILE_PATH_LITERAL(" Bloom"); 68 69 // Maximum number of entries we allow in any of the whitelists. 70 // If a whitelist on disk contains more entries then all lookups to 71 // the whitelist will be considered a match. 72 const size_t kMaxWhitelistSize = 5000; 73 74 // If the hash of this exact expression is on a whitelist then all 75 // lookups to this whitelist will be considered a match. 76 const char kWhitelistKillSwitchUrl[] = 77 "sb-ssl.google.com/safebrowsing/csd/killswitch"; // Don't change this! 78 79 // If the hash of this exact expression is on a whitelist then the 80 // malware IP blacklisting feature will be disabled in csd. 81 // Don't change this! 82 const char kMalwareIPKillSwitchUrl[] = 83 "sb-ssl.google.com/safebrowsing/csd/killswitch_malware"; 84 85 const size_t kMaxIpPrefixSize = 128; 86 const size_t kMinIpPrefixSize = 1; 87 88 // To save space, the incoming |chunk_id| and |list_id| are combined 89 // into an |encoded_chunk_id| for storage by shifting the |list_id| 90 // into the low-order bits. These functions decode that information. 91 // TODO(lzheng): It was reasonable when database is saved in sqlite, but 92 // there should be better ways to save chunk_id and list_id after we use 93 // SafeBrowsingStoreFile. 94 int GetListIdBit(const int encoded_chunk_id) { 95 return encoded_chunk_id & 1; 96 } 97 int DecodeChunkId(int encoded_chunk_id) { 98 return encoded_chunk_id >> 1; 99 } 100 int EncodeChunkId(const int chunk, const int list_id) { 101 DCHECK_NE(list_id, safe_browsing_util::INVALID); 102 return chunk << 1 | list_id % 2; 103 } 104 105 // Generate the set of full hashes to check for |url|. If 106 // |include_whitelist_hashes| is true we will generate additional path-prefixes 107 // to match against the csd whitelist. E.g., if the path-prefix /foo is on the 108 // whitelist it should also match /foo/bar which is not the case for all the 109 // other lists. We'll also always add a pattern for the empty path. 110 // TODO(shess): This function is almost the same as 111 // |CompareFullHashes()| in safe_browsing_util.cc, except that code 112 // does an early exit on match. Since match should be the infrequent 113 // case (phishing or malware found), consider combining this function 114 // with that one. 115 void BrowseFullHashesToCheck(const GURL& url, 116 bool include_whitelist_hashes, 117 std::vector<SBFullHash>* full_hashes) { 118 std::vector<std::string> hosts; 119 if (url.HostIsIPAddress()) { 120 hosts.push_back(url.host()); 121 } else { 122 safe_browsing_util::GenerateHostsToCheck(url, &hosts); 123 } 124 125 std::vector<std::string> paths; 126 safe_browsing_util::GeneratePathsToCheck(url, &paths); 127 128 for (size_t i = 0; i < hosts.size(); ++i) { 129 for (size_t j = 0; j < paths.size(); ++j) { 130 const std::string& path = paths[j]; 131 full_hashes->push_back(SBFullHashForString(hosts[i] + path)); 132 133 // We may have /foo as path-prefix in the whitelist which should 134 // also match with /foo/bar and /foo?bar. Hence, for every path 135 // that ends in '/' we also add the path without the slash. 136 if (include_whitelist_hashes && 137 path.size() > 1 && 138 path[path.size() - 1] == '/') { 139 full_hashes->push_back( 140 SBFullHashForString(hosts[i] + path.substr(0, path.size() - 1))); 141 } 142 } 143 } 144 } 145 146 // Get the prefixes matching the download |urls|. 147 void GetDownloadUrlPrefixes(const std::vector<GURL>& urls, 148 std::vector<SBPrefix>* prefixes) { 149 std::vector<SBFullHash> full_hashes; 150 for (size_t i = 0; i < urls.size(); ++i) 151 BrowseFullHashesToCheck(urls[i], false, &full_hashes); 152 153 for (size_t i = 0; i < full_hashes.size(); ++i) 154 prefixes->push_back(full_hashes[i].prefix); 155 } 156 157 // Helper function to compare addprefixes in |store| with |prefixes|. 158 // The |list_bit| indicates which list (url or hash) to compare. 159 // 160 // Returns true if there is a match, |*prefix_hits| (if non-NULL) will contain 161 // the actual matching prefixes. 162 bool MatchAddPrefixes(SafeBrowsingStore* store, 163 int list_bit, 164 const std::vector<SBPrefix>& prefixes, 165 std::vector<SBPrefix>* prefix_hits) { 166 prefix_hits->clear(); 167 bool found_match = false; 168 169 SBAddPrefixes add_prefixes; 170 store->GetAddPrefixes(&add_prefixes); 171 for (SBAddPrefixes::const_iterator iter = add_prefixes.begin(); 172 iter != add_prefixes.end(); ++iter) { 173 for (size_t j = 0; j < prefixes.size(); ++j) { 174 const SBPrefix& prefix = prefixes[j]; 175 if (prefix == iter->prefix && 176 GetListIdBit(iter->chunk_id) == list_bit) { 177 prefix_hits->push_back(prefix); 178 found_match = true; 179 } 180 } 181 } 182 return found_match; 183 } 184 185 // Find the entries in |full_hashes| with prefix in |prefix_hits|, and 186 // add them to |full_hits| if not expired. "Not expired" is when 187 // either |last_update| was recent enough, or the item has been 188 // received recently enough. Expired items are not deleted because a 189 // future update may make them acceptable again. 190 // 191 // For efficiency reasons the code walks |prefix_hits| and 192 // |full_hashes| in parallel, so they must be sorted by prefix. 193 void GetCachedFullHashesForBrowse( 194 const std::vector<SBPrefix>& prefix_hits, 195 const std::vector<SBFullHashCached>& full_hashes, 196 std::vector<SBFullHashResult>* full_hits) { 197 const base::Time now = base::Time::Now(); 198 199 std::vector<SBPrefix>::const_iterator piter = prefix_hits.begin(); 200 std::vector<SBFullHashCached>::const_iterator hiter = full_hashes.begin(); 201 202 while (piter != prefix_hits.end() && hiter != full_hashes.end()) { 203 if (*piter < hiter->hash.prefix) { 204 ++piter; 205 } else if (hiter->hash.prefix < *piter) { 206 ++hiter; 207 } else { 208 if (now <= hiter->expire_after) { 209 SBFullHashResult result; 210 result.list_id = hiter->list_id; 211 result.hash = hiter->hash; 212 full_hits->push_back(result); 213 } 214 215 // Only increment |hiter|, |piter| might have multiple hits. 216 ++hiter; 217 } 218 } 219 } 220 221 // This function generates a chunk range string for |chunks|. It 222 // outputs one chunk range string per list and writes it to the 223 // |list_ranges| vector. We expect |list_ranges| to already be of the 224 // right size. E.g., if |chunks| contains chunks with two different 225 // list ids then |list_ranges| must contain two elements. 226 void GetChunkRanges(const std::vector<int>& chunks, 227 std::vector<std::string>* list_ranges) { 228 // Since there are 2 possible list ids, there must be exactly two 229 // list ranges. Even if the chunk data should only contain one 230 // line, this code has to somehow handle corruption. 231 DCHECK_EQ(2U, list_ranges->size()); 232 233 std::vector<std::vector<int> > decoded_chunks(list_ranges->size()); 234 for (std::vector<int>::const_iterator iter = chunks.begin(); 235 iter != chunks.end(); ++iter) { 236 int mod_list_id = GetListIdBit(*iter); 237 DCHECK_GE(mod_list_id, 0); 238 DCHECK_LT(static_cast<size_t>(mod_list_id), decoded_chunks.size()); 239 decoded_chunks[mod_list_id].push_back(DecodeChunkId(*iter)); 240 } 241 for (size_t i = 0; i < decoded_chunks.size(); ++i) { 242 ChunksToRangeString(decoded_chunks[i], &((*list_ranges)[i])); 243 } 244 } 245 246 // Helper function to create chunk range lists for Browse related 247 // lists. 248 void UpdateChunkRanges(SafeBrowsingStore* store, 249 const std::vector<std::string>& listnames, 250 std::vector<SBListChunkRanges>* lists) { 251 if (!store) 252 return; 253 254 DCHECK_GT(listnames.size(), 0U); 255 DCHECK_LE(listnames.size(), 2U); 256 std::vector<int> add_chunks; 257 std::vector<int> sub_chunks; 258 store->GetAddChunks(&add_chunks); 259 store->GetSubChunks(&sub_chunks); 260 261 // Always decode 2 ranges, even if only the first one is expected. 262 // The loop below will only load as many into |lists| as |listnames| 263 // indicates. 264 std::vector<std::string> adds(2); 265 std::vector<std::string> subs(2); 266 GetChunkRanges(add_chunks, &adds); 267 GetChunkRanges(sub_chunks, &subs); 268 269 for (size_t i = 0; i < listnames.size(); ++i) { 270 const std::string& listname = listnames[i]; 271 DCHECK_EQ(safe_browsing_util::GetListId(listname) % 2, 272 static_cast<int>(i % 2)); 273 DCHECK_NE(safe_browsing_util::GetListId(listname), 274 safe_browsing_util::INVALID); 275 lists->push_back(SBListChunkRanges(listname)); 276 lists->back().adds.swap(adds[i]); 277 lists->back().subs.swap(subs[i]); 278 } 279 } 280 281 void UpdateChunkRangesForLists(SafeBrowsingStore* store, 282 const std::string& listname0, 283 const std::string& listname1, 284 std::vector<SBListChunkRanges>* lists) { 285 std::vector<std::string> listnames; 286 listnames.push_back(listname0); 287 listnames.push_back(listname1); 288 UpdateChunkRanges(store, listnames, lists); 289 } 290 291 void UpdateChunkRangesForList(SafeBrowsingStore* store, 292 const std::string& listname, 293 std::vector<SBListChunkRanges>* lists) { 294 UpdateChunkRanges(store, std::vector<std::string>(1, listname), lists); 295 } 296 297 // Order |SBFullHashCached| items on the prefix part. 298 bool SBFullHashCachedPrefixLess(const SBFullHashCached& a, 299 const SBFullHashCached& b) { 300 return a.hash.prefix < b.hash.prefix; 301 } 302 303 // This code always checks for non-zero file size. This helper makes 304 // that less verbose. 305 int64 GetFileSizeOrZero(const base::FilePath& file_path) { 306 int64 size_64; 307 if (!base::GetFileSize(file_path, &size_64)) 308 return 0; 309 return size_64; 310 } 311 312 } // namespace 313 314 // The default SafeBrowsingDatabaseFactory. 315 class SafeBrowsingDatabaseFactoryImpl : public SafeBrowsingDatabaseFactory { 316 public: 317 virtual SafeBrowsingDatabase* CreateSafeBrowsingDatabase( 318 bool enable_download_protection, 319 bool enable_client_side_whitelist, 320 bool enable_download_whitelist, 321 bool enable_extension_blacklist, 322 bool enable_side_effect_free_whitelist, 323 bool enable_ip_blacklist) OVERRIDE { 324 return new SafeBrowsingDatabaseNew( 325 new SafeBrowsingStoreFile, 326 enable_download_protection ? new SafeBrowsingStoreFile : NULL, 327 enable_client_side_whitelist ? new SafeBrowsingStoreFile : NULL, 328 enable_download_whitelist ? new SafeBrowsingStoreFile : NULL, 329 enable_extension_blacklist ? new SafeBrowsingStoreFile : NULL, 330 enable_side_effect_free_whitelist ? new SafeBrowsingStoreFile : NULL, 331 enable_ip_blacklist ? new SafeBrowsingStoreFile : NULL); 332 } 333 334 SafeBrowsingDatabaseFactoryImpl() { } 335 336 private: 337 DISALLOW_COPY_AND_ASSIGN(SafeBrowsingDatabaseFactoryImpl); 338 }; 339 340 // static 341 SafeBrowsingDatabaseFactory* SafeBrowsingDatabase::factory_ = NULL; 342 343 // Factory method, non-thread safe. Caller has to make sure this s called 344 // on SafeBrowsing Thread. 345 // TODO(shess): There's no need for a factory any longer. Convert 346 // SafeBrowsingDatabaseNew to SafeBrowsingDatabase, and have Create() 347 // callers just construct things directly. 348 SafeBrowsingDatabase* SafeBrowsingDatabase::Create( 349 bool enable_download_protection, 350 bool enable_client_side_whitelist, 351 bool enable_download_whitelist, 352 bool enable_extension_blacklist, 353 bool enable_side_effect_free_whitelist, 354 bool enable_ip_blacklist) { 355 if (!factory_) 356 factory_ = new SafeBrowsingDatabaseFactoryImpl(); 357 return factory_->CreateSafeBrowsingDatabase( 358 enable_download_protection, 359 enable_client_side_whitelist, 360 enable_download_whitelist, 361 enable_extension_blacklist, 362 enable_side_effect_free_whitelist, 363 enable_ip_blacklist); 364 } 365 366 SafeBrowsingDatabase::~SafeBrowsingDatabase() { 367 } 368 369 // static 370 base::FilePath SafeBrowsingDatabase::BrowseDBFilename( 371 const base::FilePath& db_base_filename) { 372 return base::FilePath(db_base_filename.value() + kBrowseDBFile); 373 } 374 375 // static 376 base::FilePath SafeBrowsingDatabase::DownloadDBFilename( 377 const base::FilePath& db_base_filename) { 378 return base::FilePath(db_base_filename.value() + kDownloadDBFile); 379 } 380 381 // static 382 base::FilePath SafeBrowsingDatabase::BloomFilterForFilename( 383 const base::FilePath& db_filename) { 384 return base::FilePath(db_filename.value() + kBloomFilterFile); 385 } 386 387 // static 388 base::FilePath SafeBrowsingDatabase::PrefixSetForFilename( 389 const base::FilePath& db_filename) { 390 return base::FilePath(db_filename.value() + kPrefixSetFile); 391 } 392 393 // static 394 base::FilePath SafeBrowsingDatabase::CsdWhitelistDBFilename( 395 const base::FilePath& db_filename) { 396 return base::FilePath(db_filename.value() + kCsdWhitelistDBFile); 397 } 398 399 // static 400 base::FilePath SafeBrowsingDatabase::DownloadWhitelistDBFilename( 401 const base::FilePath& db_filename) { 402 return base::FilePath(db_filename.value() + kDownloadWhitelistDBFile); 403 } 404 405 // static 406 base::FilePath SafeBrowsingDatabase::ExtensionBlacklistDBFilename( 407 const base::FilePath& db_filename) { 408 return base::FilePath(db_filename.value() + kExtensionBlacklistDBFile); 409 } 410 411 // static 412 base::FilePath SafeBrowsingDatabase::SideEffectFreeWhitelistDBFilename( 413 const base::FilePath& db_filename) { 414 return base::FilePath(db_filename.value() + kSideEffectFreeWhitelistDBFile); 415 } 416 417 // static 418 base::FilePath SafeBrowsingDatabase::IpBlacklistDBFilename( 419 const base::FilePath& db_filename) { 420 return base::FilePath(db_filename.value() + kIPBlacklistDBFile); 421 } 422 423 SafeBrowsingStore* SafeBrowsingDatabaseNew::GetStore(const int list_id) { 424 if (list_id == safe_browsing_util::PHISH || 425 list_id == safe_browsing_util::MALWARE) { 426 return browse_store_.get(); 427 } else if (list_id == safe_browsing_util::BINURL) { 428 return download_store_.get(); 429 } else if (list_id == safe_browsing_util::CSDWHITELIST) { 430 return csd_whitelist_store_.get(); 431 } else if (list_id == safe_browsing_util::DOWNLOADWHITELIST) { 432 return download_whitelist_store_.get(); 433 } else if (list_id == safe_browsing_util::EXTENSIONBLACKLIST) { 434 return extension_blacklist_store_.get(); 435 } else if (list_id == safe_browsing_util::SIDEEFFECTFREEWHITELIST) { 436 return side_effect_free_whitelist_store_.get(); 437 } else if (list_id == safe_browsing_util::IPBLACKLIST) { 438 return ip_blacklist_store_.get(); 439 } 440 return NULL; 441 } 442 443 // static 444 void SafeBrowsingDatabase::RecordFailure(FailureType failure_type) { 445 UMA_HISTOGRAM_ENUMERATION("SB2.DatabaseFailure", failure_type, 446 FAILURE_DATABASE_MAX); 447 } 448 449 SafeBrowsingDatabaseNew::SafeBrowsingDatabaseNew() 450 : creation_loop_(base::MessageLoop::current()), 451 browse_store_(new SafeBrowsingStoreFile), 452 reset_factory_(this), 453 corruption_detected_(false), 454 change_detected_(false) { 455 DCHECK(browse_store_.get()); 456 DCHECK(!download_store_.get()); 457 DCHECK(!csd_whitelist_store_.get()); 458 DCHECK(!download_whitelist_store_.get()); 459 DCHECK(!extension_blacklist_store_.get()); 460 DCHECK(!side_effect_free_whitelist_store_.get()); 461 DCHECK(!ip_blacklist_store_.get()); 462 } 463 464 SafeBrowsingDatabaseNew::SafeBrowsingDatabaseNew( 465 SafeBrowsingStore* browse_store, 466 SafeBrowsingStore* download_store, 467 SafeBrowsingStore* csd_whitelist_store, 468 SafeBrowsingStore* download_whitelist_store, 469 SafeBrowsingStore* extension_blacklist_store, 470 SafeBrowsingStore* side_effect_free_whitelist_store, 471 SafeBrowsingStore* ip_blacklist_store) 472 : creation_loop_(base::MessageLoop::current()), 473 browse_store_(browse_store), 474 download_store_(download_store), 475 csd_whitelist_store_(csd_whitelist_store), 476 download_whitelist_store_(download_whitelist_store), 477 extension_blacklist_store_(extension_blacklist_store), 478 side_effect_free_whitelist_store_(side_effect_free_whitelist_store), 479 ip_blacklist_store_(ip_blacklist_store), 480 reset_factory_(this), 481 corruption_detected_(false) { 482 DCHECK(browse_store_.get()); 483 } 484 485 SafeBrowsingDatabaseNew::~SafeBrowsingDatabaseNew() { 486 // The DCHECK is disabled due to crbug.com/338486 . 487 // DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 488 } 489 490 void SafeBrowsingDatabaseNew::Init(const base::FilePath& filename_base) { 491 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 492 493 // This should not be run multiple times. 494 DCHECK(filename_base_.empty()); 495 496 filename_base_ = filename_base; 497 498 // TODO(shess): The various stores are really only necessary while doing 499 // updates, or when querying a store directly (see |ContainsDownloadUrl()|). 500 // The store variables are also tested to see if a list is enabled. Perhaps 501 // the stores could be refactored into an update object so that they are only 502 // live in memory while being actively used. The sense of enabled probably 503 // belongs in protocol_manager or database_manager. 504 505 browse_store_->Init( 506 BrowseDBFilename(filename_base_), 507 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase, 508 base::Unretained(this))); 509 510 { 511 // NOTE: There is no need to grab the lock in this function, since 512 // until it returns, there are no pointers to this class on other 513 // threads. Then again, that means there is no possibility of 514 // contention on the lock... 515 base::AutoLock locked(lookup_lock_); 516 cached_browse_hashes_.clear(); 517 LoadPrefixSet(); 518 } 519 520 if (download_store_.get()) { 521 download_store_->Init( 522 DownloadDBFilename(filename_base_), 523 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase, 524 base::Unretained(this))); 525 } 526 527 if (csd_whitelist_store_.get()) { 528 csd_whitelist_store_->Init( 529 CsdWhitelistDBFilename(filename_base_), 530 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase, 531 base::Unretained(this))); 532 533 std::vector<SBAddFullHash> full_hashes; 534 if (csd_whitelist_store_->GetAddFullHashes(&full_hashes)) { 535 LoadWhitelist(full_hashes, &csd_whitelist_); 536 } else { 537 WhitelistEverything(&csd_whitelist_); 538 } 539 } else { 540 WhitelistEverything(&csd_whitelist_); // Just to be safe. 541 } 542 543 if (download_whitelist_store_.get()) { 544 download_whitelist_store_->Init( 545 DownloadWhitelistDBFilename(filename_base_), 546 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase, 547 base::Unretained(this))); 548 549 std::vector<SBAddFullHash> full_hashes; 550 if (download_whitelist_store_->GetAddFullHashes(&full_hashes)) { 551 LoadWhitelist(full_hashes, &download_whitelist_); 552 } else { 553 WhitelistEverything(&download_whitelist_); 554 } 555 } else { 556 WhitelistEverything(&download_whitelist_); // Just to be safe. 557 } 558 559 if (extension_blacklist_store_.get()) { 560 extension_blacklist_store_->Init( 561 ExtensionBlacklistDBFilename(filename_base_), 562 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase, 563 base::Unretained(this))); 564 } 565 566 if (side_effect_free_whitelist_store_.get()) { 567 const base::FilePath side_effect_free_whitelist_filename = 568 SideEffectFreeWhitelistDBFilename(filename_base_); 569 const base::FilePath side_effect_free_whitelist_prefix_set_filename = 570 PrefixSetForFilename(side_effect_free_whitelist_filename); 571 side_effect_free_whitelist_store_->Init( 572 side_effect_free_whitelist_filename, 573 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase, 574 base::Unretained(this))); 575 576 // Only use the prefix set if database is present and non-empty. 577 if (GetFileSizeOrZero(side_effect_free_whitelist_filename)) { 578 const base::TimeTicks before = base::TimeTicks::Now(); 579 side_effect_free_whitelist_prefix_set_ = 580 safe_browsing::PrefixSet::LoadFile( 581 side_effect_free_whitelist_prefix_set_filename); 582 UMA_HISTOGRAM_TIMES("SB2.SideEffectFreeWhitelistPrefixSetLoad", 583 base::TimeTicks::Now() - before); 584 if (!side_effect_free_whitelist_prefix_set_.get()) 585 RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_READ); 586 } 587 } else { 588 // Delete any files of the side-effect free sidelist that may be around 589 // from when it was previously enabled. 590 SafeBrowsingStoreFile::DeleteStore( 591 SideEffectFreeWhitelistDBFilename(filename_base_)); 592 base::DeleteFile( 593 PrefixSetForFilename(SideEffectFreeWhitelistDBFilename(filename_base_)), 594 false); 595 } 596 597 if (ip_blacklist_store_.get()) { 598 ip_blacklist_store_->Init( 599 IpBlacklistDBFilename(filename_base_), 600 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase, 601 base::Unretained(this))); 602 603 std::vector<SBAddFullHash> full_hashes; 604 if (ip_blacklist_store_->GetAddFullHashes(&full_hashes)) { 605 LoadIpBlacklist(full_hashes); 606 } else { 607 LoadIpBlacklist(std::vector<SBAddFullHash>()); // Clear the list. 608 } 609 } 610 } 611 612 bool SafeBrowsingDatabaseNew::ResetDatabase() { 613 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 614 615 // Delete files on disk. 616 // TODO(shess): Hard to see where one might want to delete without a 617 // reset. Perhaps inline |Delete()|? 618 if (!Delete()) 619 return false; 620 621 // Reset objects in memory. 622 { 623 base::AutoLock locked(lookup_lock_); 624 cached_browse_hashes_.clear(); 625 prefix_miss_cache_.clear(); 626 browse_prefix_set_.reset(); 627 side_effect_free_whitelist_prefix_set_.reset(); 628 ip_blacklist_.clear(); 629 } 630 // Wants to acquire the lock itself. 631 WhitelistEverything(&csd_whitelist_); 632 WhitelistEverything(&download_whitelist_); 633 return true; 634 } 635 636 bool SafeBrowsingDatabaseNew::ContainsBrowseUrl( 637 const GURL& url, 638 std::vector<SBPrefix>* prefix_hits, 639 std::vector<SBFullHashResult>* cache_hits) { 640 // Clear the results first. 641 prefix_hits->clear(); 642 cache_hits->clear(); 643 644 std::vector<SBFullHash> full_hashes; 645 BrowseFullHashesToCheck(url, false, &full_hashes); 646 if (full_hashes.empty()) 647 return false; 648 649 // This function is called on the I/O thread, prevent changes to 650 // filter and caches. 651 base::AutoLock locked(lookup_lock_); 652 653 // |browse_prefix_set_| is empty until it is either read from disk, or the 654 // first update populates it. Bail out without a hit if not yet 655 // available. 656 if (!browse_prefix_set_.get()) 657 return false; 658 659 size_t miss_count = 0; 660 for (size_t i = 0; i < full_hashes.size(); ++i) { 661 if (browse_prefix_set_->Exists(full_hashes[i])) { 662 const SBPrefix prefix = full_hashes[i].prefix; 663 prefix_hits->push_back(prefix); 664 if (prefix_miss_cache_.count(prefix) > 0) 665 ++miss_count; 666 } 667 } 668 669 // If all the prefixes are cached as 'misses', don't issue a GetHash. 670 if (miss_count == prefix_hits->size()) 671 return false; 672 673 // Find matching cached gethash responses. 674 std::sort(prefix_hits->begin(), prefix_hits->end()); 675 GetCachedFullHashesForBrowse(*prefix_hits, cached_browse_hashes_, cache_hits); 676 677 return true; 678 } 679 680 bool SafeBrowsingDatabaseNew::ContainsDownloadUrl( 681 const std::vector<GURL>& urls, 682 std::vector<SBPrefix>* prefix_hits) { 683 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 684 685 // Ignore this check when download checking is not enabled. 686 if (!download_store_.get()) 687 return false; 688 689 std::vector<SBPrefix> prefixes; 690 GetDownloadUrlPrefixes(urls, &prefixes); 691 return MatchAddPrefixes(download_store_.get(), 692 safe_browsing_util::BINURL % 2, 693 prefixes, 694 prefix_hits); 695 } 696 697 bool SafeBrowsingDatabaseNew::ContainsCsdWhitelistedUrl(const GURL& url) { 698 // This method is theoretically thread-safe but we expect all calls to 699 // originate from the IO thread. 700 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 701 std::vector<SBFullHash> full_hashes; 702 BrowseFullHashesToCheck(url, true, &full_hashes); 703 return ContainsWhitelistedHashes(csd_whitelist_, full_hashes); 704 } 705 706 bool SafeBrowsingDatabaseNew::ContainsDownloadWhitelistedUrl(const GURL& url) { 707 std::vector<SBFullHash> full_hashes; 708 BrowseFullHashesToCheck(url, true, &full_hashes); 709 return ContainsWhitelistedHashes(download_whitelist_, full_hashes); 710 } 711 712 bool SafeBrowsingDatabaseNew::ContainsExtensionPrefixes( 713 const std::vector<SBPrefix>& prefixes, 714 std::vector<SBPrefix>* prefix_hits) { 715 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 716 if (!extension_blacklist_store_) 717 return false; 718 719 return MatchAddPrefixes(extension_blacklist_store_.get(), 720 safe_browsing_util::EXTENSIONBLACKLIST % 2, 721 prefixes, 722 prefix_hits); 723 } 724 725 bool SafeBrowsingDatabaseNew::ContainsSideEffectFreeWhitelistUrl( 726 const GURL& url) { 727 std::string host; 728 std::string path; 729 std::string query; 730 safe_browsing_util::CanonicalizeUrl(url, &host, &path, &query); 731 std::string url_to_check = host + path; 732 if (!query.empty()) 733 url_to_check += "?" + query; 734 SBFullHash full_hash = SBFullHashForString(url_to_check); 735 736 // This function can be called on any thread, so lock against any changes 737 base::AutoLock locked(lookup_lock_); 738 739 // |side_effect_free_whitelist_prefix_set_| is empty until it is either read 740 // from disk, or the first update populates it. Bail out without a hit if 741 // not yet available. 742 if (!side_effect_free_whitelist_prefix_set_.get()) 743 return false; 744 745 return side_effect_free_whitelist_prefix_set_->Exists(full_hash); 746 } 747 748 bool SafeBrowsingDatabaseNew::ContainsMalwareIP(const std::string& ip_address) { 749 net::IPAddressNumber ip_number; 750 if (!net::ParseIPLiteralToNumber(ip_address, &ip_number)) 751 return false; 752 if (ip_number.size() == net::kIPv4AddressSize) 753 ip_number = net::ConvertIPv4NumberToIPv6Number(ip_number); 754 if (ip_number.size() != net::kIPv6AddressSize) 755 return false; // better safe than sorry. 756 757 // This function can be called from any thread. 758 base::AutoLock locked(lookup_lock_); 759 for (IPBlacklist::const_iterator it = ip_blacklist_.begin(); 760 it != ip_blacklist_.end(); 761 ++it) { 762 const std::string& mask = it->first; 763 DCHECK_EQ(mask.size(), ip_number.size()); 764 std::string subnet(net::kIPv6AddressSize, '\0'); 765 for (size_t i = 0; i < net::kIPv6AddressSize; ++i) { 766 subnet[i] = ip_number[i] & mask[i]; 767 } 768 const std::string hash = base::SHA1HashString(subnet); 769 DVLOG(2) << "Lookup Malware IP: " 770 << " ip:" << ip_address 771 << " mask:" << base::HexEncode(mask.data(), mask.size()) 772 << " subnet:" << base::HexEncode(subnet.data(), subnet.size()) 773 << " hash:" << base::HexEncode(hash.data(), hash.size()); 774 if (it->second.count(hash) > 0) { 775 return true; 776 } 777 } 778 return false; 779 } 780 781 bool SafeBrowsingDatabaseNew::ContainsDownloadWhitelistedString( 782 const std::string& str) { 783 std::vector<SBFullHash> hashes; 784 hashes.push_back(SBFullHashForString(str)); 785 return ContainsWhitelistedHashes(download_whitelist_, hashes); 786 } 787 788 bool SafeBrowsingDatabaseNew::ContainsWhitelistedHashes( 789 const SBWhitelist& whitelist, 790 const std::vector<SBFullHash>& hashes) { 791 base::AutoLock l(lookup_lock_); 792 if (whitelist.second) 793 return true; 794 for (std::vector<SBFullHash>::const_iterator it = hashes.begin(); 795 it != hashes.end(); ++it) { 796 if (std::binary_search(whitelist.first.begin(), whitelist.first.end(), 797 *it, SBFullHashLess)) { 798 return true; 799 } 800 } 801 return false; 802 } 803 804 // Helper to insert add-chunk entries. 805 void SafeBrowsingDatabaseNew::InsertAddChunk( 806 SafeBrowsingStore* store, 807 const safe_browsing_util::ListType list_id, 808 const SBChunkData& chunk_data) { 809 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 810 DCHECK(store); 811 812 // The server can give us a chunk that we already have because 813 // it's part of a range. Don't add it again. 814 const int chunk_id = chunk_data.ChunkNumber(); 815 const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id); 816 if (store->CheckAddChunk(encoded_chunk_id)) 817 return; 818 819 store->SetAddChunk(encoded_chunk_id); 820 if (chunk_data.IsPrefix()) { 821 const size_t c = chunk_data.PrefixCount(); 822 for (size_t i = 0; i < c; ++i) { 823 STATS_COUNTER("SB.PrefixAdd", 1); 824 store->WriteAddPrefix(encoded_chunk_id, chunk_data.PrefixAt(i)); 825 } 826 } else { 827 const size_t c = chunk_data.FullHashCount(); 828 for (size_t i = 0; i < c; ++i) { 829 STATS_COUNTER("SB.PrefixAddFull", 1); 830 store->WriteAddHash(encoded_chunk_id, chunk_data.FullHashAt(i)); 831 } 832 } 833 } 834 835 // Helper to insert sub-chunk entries. 836 void SafeBrowsingDatabaseNew::InsertSubChunk( 837 SafeBrowsingStore* store, 838 const safe_browsing_util::ListType list_id, 839 const SBChunkData& chunk_data) { 840 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 841 DCHECK(store); 842 843 // The server can give us a chunk that we already have because 844 // it's part of a range. Don't add it again. 845 const int chunk_id = chunk_data.ChunkNumber(); 846 const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id); 847 if (store->CheckSubChunk(encoded_chunk_id)) 848 return; 849 850 store->SetSubChunk(encoded_chunk_id); 851 if (chunk_data.IsPrefix()) { 852 const size_t c = chunk_data.PrefixCount(); 853 for (size_t i = 0; i < c; ++i) { 854 STATS_COUNTER("SB.PrefixSub", 1); 855 const int add_chunk_id = chunk_data.AddChunkNumberAt(i); 856 const int encoded_add_chunk_id = EncodeChunkId(add_chunk_id, list_id); 857 store->WriteSubPrefix(encoded_chunk_id, encoded_add_chunk_id, 858 chunk_data.PrefixAt(i)); 859 } 860 } else { 861 const size_t c = chunk_data.FullHashCount(); 862 for (size_t i = 0; i < c; ++i) { 863 STATS_COUNTER("SB.PrefixSubFull", 1); 864 const int add_chunk_id = chunk_data.AddChunkNumberAt(i); 865 const int encoded_add_chunk_id = EncodeChunkId(add_chunk_id, list_id); 866 store->WriteSubHash(encoded_chunk_id, encoded_add_chunk_id, 867 chunk_data.FullHashAt(i)); 868 } 869 } 870 } 871 872 void SafeBrowsingDatabaseNew::InsertChunks( 873 const std::string& list_name, 874 const std::vector<SBChunkData*>& chunks) { 875 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 876 877 if (corruption_detected_ || chunks.empty()) 878 return; 879 880 const base::TimeTicks before = base::TimeTicks::Now(); 881 882 // TODO(shess): The caller should just pass list_id. 883 const safe_browsing_util::ListType list_id = 884 safe_browsing_util::GetListId(list_name); 885 886 SafeBrowsingStore* store = GetStore(list_id); 887 if (!store) return; 888 889 change_detected_ = true; 890 891 // TODO(shess): I believe that the list is always add or sub. Can this use 892 // that productively? 893 store->BeginChunk(); 894 for (size_t i = 0; i < chunks.size(); ++i) { 895 if (chunks[i]->IsAdd()) { 896 InsertAddChunk(store, list_id, *chunks[i]); 897 } else if (chunks[i]->IsSub()) { 898 InsertSubChunk(store, list_id, *chunks[i]); 899 } else { 900 NOTREACHED(); 901 } 902 } 903 store->FinishChunk(); 904 905 UMA_HISTOGRAM_TIMES("SB2.ChunkInsert", base::TimeTicks::Now() - before); 906 } 907 908 void SafeBrowsingDatabaseNew::DeleteChunks( 909 const std::vector<SBChunkDelete>& chunk_deletes) { 910 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 911 912 if (corruption_detected_ || chunk_deletes.empty()) 913 return; 914 915 const std::string& list_name = chunk_deletes.front().list_name; 916 const safe_browsing_util::ListType list_id = 917 safe_browsing_util::GetListId(list_name); 918 919 SafeBrowsingStore* store = GetStore(list_id); 920 if (!store) return; 921 922 change_detected_ = true; 923 924 for (size_t i = 0; i < chunk_deletes.size(); ++i) { 925 std::vector<int> chunk_numbers; 926 RangesToChunks(chunk_deletes[i].chunk_del, &chunk_numbers); 927 for (size_t j = 0; j < chunk_numbers.size(); ++j) { 928 const int encoded_chunk_id = EncodeChunkId(chunk_numbers[j], list_id); 929 if (chunk_deletes[i].is_sub_del) 930 store->DeleteSubChunk(encoded_chunk_id); 931 else 932 store->DeleteAddChunk(encoded_chunk_id); 933 } 934 } 935 } 936 937 void SafeBrowsingDatabaseNew::CacheHashResults( 938 const std::vector<SBPrefix>& prefixes, 939 const std::vector<SBFullHashResult>& full_hits, 940 const base::TimeDelta& cache_lifetime) { 941 const base::Time expire_after = base::Time::Now() + cache_lifetime; 942 943 // This is called on the I/O thread, lock against updates. 944 base::AutoLock locked(lookup_lock_); 945 946 if (full_hits.empty()) { 947 prefix_miss_cache_.insert(prefixes.begin(), prefixes.end()); 948 return; 949 } 950 951 const size_t orig_size = cached_browse_hashes_.size(); 952 for (std::vector<SBFullHashResult>::const_iterator iter = full_hits.begin(); 953 iter != full_hits.end(); ++iter) { 954 if (iter->list_id == safe_browsing_util::MALWARE || 955 iter->list_id == safe_browsing_util::PHISH) { 956 SBFullHashCached cached_hash; 957 cached_hash.hash = iter->hash; 958 cached_hash.list_id = iter->list_id; 959 cached_hash.expire_after = expire_after; 960 cached_browse_hashes_.push_back(cached_hash); 961 } 962 } 963 964 // Sort new entries then merge with the previously-sorted entries. 965 std::vector<SBFullHashCached>::iterator 966 orig_end = cached_browse_hashes_.begin() + orig_size; 967 std::sort(orig_end, cached_browse_hashes_.end(), SBFullHashCachedPrefixLess); 968 std::inplace_merge(cached_browse_hashes_.begin(), 969 orig_end, cached_browse_hashes_.end(), 970 SBFullHashCachedPrefixLess); 971 } 972 973 bool SafeBrowsingDatabaseNew::UpdateStarted( 974 std::vector<SBListChunkRanges>* lists) { 975 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 976 DCHECK(lists); 977 978 // If |BeginUpdate()| fails, reset the database. 979 if (!browse_store_->BeginUpdate()) { 980 RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_BEGIN); 981 HandleCorruptDatabase(); 982 return false; 983 } 984 985 if (download_store_.get() && !download_store_->BeginUpdate()) { 986 RecordFailure(FAILURE_DOWNLOAD_DATABASE_UPDATE_BEGIN); 987 HandleCorruptDatabase(); 988 return false; 989 } 990 991 if (csd_whitelist_store_.get() && !csd_whitelist_store_->BeginUpdate()) { 992 RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_BEGIN); 993 HandleCorruptDatabase(); 994 return false; 995 } 996 997 if (download_whitelist_store_.get() && 998 !download_whitelist_store_->BeginUpdate()) { 999 RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_BEGIN); 1000 HandleCorruptDatabase(); 1001 return false; 1002 } 1003 1004 if (extension_blacklist_store_ && 1005 !extension_blacklist_store_->BeginUpdate()) { 1006 RecordFailure(FAILURE_EXTENSION_BLACKLIST_UPDATE_BEGIN); 1007 HandleCorruptDatabase(); 1008 return false; 1009 } 1010 1011 if (side_effect_free_whitelist_store_ && 1012 !side_effect_free_whitelist_store_->BeginUpdate()) { 1013 RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_BEGIN); 1014 HandleCorruptDatabase(); 1015 return false; 1016 } 1017 1018 if (ip_blacklist_store_ && !ip_blacklist_store_->BeginUpdate()) { 1019 RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_BEGIN); 1020 HandleCorruptDatabase(); 1021 return false; 1022 } 1023 1024 UpdateChunkRangesForLists(browse_store_.get(), 1025 safe_browsing_util::kMalwareList, 1026 safe_browsing_util::kPhishingList, 1027 lists); 1028 1029 // NOTE(shess): |download_store_| used to contain kBinHashList, which has been 1030 // deprecated. Code to delete the list from the store shows ~15k hits/day as 1031 // of Feb 2014, so it has been removed. Everything _should_ be resilient to 1032 // extra data of that sort. 1033 UpdateChunkRangesForList(download_store_.get(), 1034 safe_browsing_util::kBinUrlList, lists); 1035 1036 UpdateChunkRangesForList(csd_whitelist_store_.get(), 1037 safe_browsing_util::kCsdWhiteList, lists); 1038 1039 UpdateChunkRangesForList(download_whitelist_store_.get(), 1040 safe_browsing_util::kDownloadWhiteList, lists); 1041 1042 UpdateChunkRangesForList(extension_blacklist_store_.get(), 1043 safe_browsing_util::kExtensionBlacklist, lists); 1044 1045 UpdateChunkRangesForList(side_effect_free_whitelist_store_.get(), 1046 safe_browsing_util::kSideEffectFreeWhitelist, lists); 1047 1048 UpdateChunkRangesForList(ip_blacklist_store_.get(), 1049 safe_browsing_util::kIPBlacklist, lists); 1050 1051 corruption_detected_ = false; 1052 change_detected_ = false; 1053 return true; 1054 } 1055 1056 void SafeBrowsingDatabaseNew::UpdateFinished(bool update_succeeded) { 1057 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 1058 1059 // The update may have failed due to corrupt storage (for instance, 1060 // an excessive number of invalid add_chunks and sub_chunks). 1061 // Double-check that the databases are valid. 1062 // TODO(shess): Providing a checksum for the add_chunk and sub_chunk 1063 // sections would allow throwing a corruption error in 1064 // UpdateStarted(). 1065 if (!update_succeeded) { 1066 if (!browse_store_->CheckValidity()) 1067 DLOG(ERROR) << "Safe-browsing browse database corrupt."; 1068 1069 if (download_store_.get() && !download_store_->CheckValidity()) 1070 DLOG(ERROR) << "Safe-browsing download database corrupt."; 1071 1072 if (csd_whitelist_store_.get() && !csd_whitelist_store_->CheckValidity()) 1073 DLOG(ERROR) << "Safe-browsing csd whitelist database corrupt."; 1074 1075 if (download_whitelist_store_.get() && 1076 !download_whitelist_store_->CheckValidity()) { 1077 DLOG(ERROR) << "Safe-browsing download whitelist database corrupt."; 1078 } 1079 1080 if (extension_blacklist_store_ && 1081 !extension_blacklist_store_->CheckValidity()) { 1082 DLOG(ERROR) << "Safe-browsing extension blacklist database corrupt."; 1083 } 1084 1085 if (side_effect_free_whitelist_store_ && 1086 !side_effect_free_whitelist_store_->CheckValidity()) { 1087 DLOG(ERROR) << "Safe-browsing side-effect free whitelist database " 1088 << "corrupt."; 1089 } 1090 1091 if (ip_blacklist_store_ && !ip_blacklist_store_->CheckValidity()) { 1092 DLOG(ERROR) << "Safe-browsing IP blacklist database corrupt."; 1093 } 1094 } 1095 1096 if (corruption_detected_) 1097 return; 1098 1099 // Unroll the transaction if there was a protocol error or if the 1100 // transaction was empty. This will leave the prefix set, the 1101 // pending hashes, and the prefix miss cache in place. 1102 if (!update_succeeded || !change_detected_) { 1103 // Track empty updates to answer questions at http://crbug.com/72216 . 1104 if (update_succeeded && !change_detected_) 1105 UMA_HISTOGRAM_COUNTS("SB2.DatabaseUpdateKilobytes", 0); 1106 browse_store_->CancelUpdate(); 1107 if (download_store_.get()) 1108 download_store_->CancelUpdate(); 1109 if (csd_whitelist_store_.get()) 1110 csd_whitelist_store_->CancelUpdate(); 1111 if (download_whitelist_store_.get()) 1112 download_whitelist_store_->CancelUpdate(); 1113 if (extension_blacklist_store_) 1114 extension_blacklist_store_->CancelUpdate(); 1115 if (side_effect_free_whitelist_store_) 1116 side_effect_free_whitelist_store_->CancelUpdate(); 1117 if (ip_blacklist_store_) 1118 ip_blacklist_store_->CancelUpdate(); 1119 return; 1120 } 1121 1122 if (download_store_) { 1123 int64 size_bytes = UpdateHashPrefixStore( 1124 DownloadDBFilename(filename_base_), 1125 download_store_.get(), 1126 FAILURE_DOWNLOAD_DATABASE_UPDATE_FINISH); 1127 UMA_HISTOGRAM_COUNTS("SB2.DownloadDatabaseKilobytes", 1128 static_cast<int>(size_bytes / 1024)); 1129 } 1130 1131 UpdateBrowseStore(); 1132 UpdateWhitelistStore(CsdWhitelistDBFilename(filename_base_), 1133 csd_whitelist_store_.get(), 1134 &csd_whitelist_); 1135 UpdateWhitelistStore(DownloadWhitelistDBFilename(filename_base_), 1136 download_whitelist_store_.get(), 1137 &download_whitelist_); 1138 1139 if (extension_blacklist_store_) { 1140 int64 size_bytes = UpdateHashPrefixStore( 1141 ExtensionBlacklistDBFilename(filename_base_), 1142 extension_blacklist_store_.get(), 1143 FAILURE_EXTENSION_BLACKLIST_UPDATE_FINISH); 1144 UMA_HISTOGRAM_COUNTS("SB2.ExtensionBlacklistKilobytes", 1145 static_cast<int>(size_bytes / 1024)); 1146 } 1147 1148 if (side_effect_free_whitelist_store_) 1149 UpdateSideEffectFreeWhitelistStore(); 1150 1151 if (ip_blacklist_store_) 1152 UpdateIpBlacklistStore(); 1153 } 1154 1155 void SafeBrowsingDatabaseNew::UpdateWhitelistStore( 1156 const base::FilePath& store_filename, 1157 SafeBrowsingStore* store, 1158 SBWhitelist* whitelist) { 1159 if (!store) 1160 return; 1161 1162 // Note: |builder| will not be empty. The current data store implementation 1163 // stores all full-length hashes as both full and prefix hashes. 1164 safe_browsing::PrefixSetBuilder builder; 1165 std::vector<SBAddFullHash> full_hashes; 1166 if (!store->FinishUpdate(&builder, &full_hashes)) { 1167 RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_FINISH); 1168 WhitelistEverything(whitelist); 1169 return; 1170 } 1171 1172 #if defined(OS_MACOSX) 1173 base::mac::SetFileBackupExclusion(store_filename); 1174 #endif 1175 1176 LoadWhitelist(full_hashes, whitelist); 1177 } 1178 1179 int64 SafeBrowsingDatabaseNew::UpdateHashPrefixStore( 1180 const base::FilePath& store_filename, 1181 SafeBrowsingStore* store, 1182 FailureType failure_type) { 1183 // These results are not used after this call. Simply ignore the 1184 // returned value after FinishUpdate(...). 1185 safe_browsing::PrefixSetBuilder builder; 1186 std::vector<SBAddFullHash> add_full_hashes_result; 1187 1188 if (!store->FinishUpdate(&builder, &add_full_hashes_result)) 1189 RecordFailure(failure_type); 1190 1191 #if defined(OS_MACOSX) 1192 base::mac::SetFileBackupExclusion(store_filename); 1193 #endif 1194 1195 return GetFileSizeOrZero(store_filename); 1196 } 1197 1198 void SafeBrowsingDatabaseNew::UpdateBrowseStore() { 1199 // Measure the amount of IO during the filter build. 1200 base::IoCounters io_before, io_after; 1201 base::ProcessHandle handle = base::Process::Current().handle(); 1202 scoped_ptr<base::ProcessMetrics> metric( 1203 #if !defined(OS_MACOSX) 1204 base::ProcessMetrics::CreateProcessMetrics(handle) 1205 #else 1206 // Getting stats only for the current process is enough, so NULL is fine. 1207 base::ProcessMetrics::CreateProcessMetrics(handle, NULL) 1208 #endif 1209 ); 1210 1211 // IoCounters are currently not supported on Mac, and may not be 1212 // available for Linux, so we check the result and only show IO 1213 // stats if they are available. 1214 const bool got_counters = metric->GetIOCounters(&io_before); 1215 1216 const base::TimeTicks before = base::TimeTicks::Now(); 1217 1218 // TODO(shess): Perhaps refactor to let builder accumulate full hashes on the 1219 // fly? Other clients use the SBAddFullHash vector, but AFAICT they only use 1220 // the SBFullHash portion. It would need an accessor on PrefixSet. 1221 safe_browsing::PrefixSetBuilder builder; 1222 std::vector<SBAddFullHash> add_full_hashes; 1223 if (!browse_store_->FinishUpdate(&builder, &add_full_hashes)) { 1224 RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_FINISH); 1225 return; 1226 } 1227 1228 std::vector<SBFullHash> full_hash_results; 1229 for (size_t i = 0; i < add_full_hashes.size(); ++i) { 1230 full_hash_results.push_back(add_full_hashes[i].full_hash); 1231 } 1232 1233 scoped_ptr<safe_browsing::PrefixSet> 1234 prefix_set(builder.GetPrefixSet(full_hash_results)); 1235 1236 // Swap in the newly built filter and cache. 1237 { 1238 base::AutoLock locked(lookup_lock_); 1239 1240 // TODO(shess): If |CacheHashResults()| is posted between the 1241 // earlier lock and this clear, those pending hashes will be lost. 1242 // It could be fixed by only removing hashes which were collected 1243 // at the earlier point. I believe that is fail-safe as-is (the 1244 // hash will be fetched again). 1245 cached_browse_hashes_.clear(); 1246 prefix_miss_cache_.clear(); 1247 browse_prefix_set_.swap(prefix_set); 1248 } 1249 1250 UMA_HISTOGRAM_LONG_TIMES("SB2.BuildFilter", base::TimeTicks::Now() - before); 1251 1252 // Persist the prefix set to disk. Since only this thread changes 1253 // |browse_prefix_set_|, there is no need to lock. 1254 WritePrefixSet(); 1255 1256 // Gather statistics. 1257 if (got_counters && metric->GetIOCounters(&io_after)) { 1258 UMA_HISTOGRAM_COUNTS("SB2.BuildReadKilobytes", 1259 static_cast<int>(io_after.ReadTransferCount - 1260 io_before.ReadTransferCount) / 1024); 1261 UMA_HISTOGRAM_COUNTS("SB2.BuildWriteKilobytes", 1262 static_cast<int>(io_after.WriteTransferCount - 1263 io_before.WriteTransferCount) / 1024); 1264 UMA_HISTOGRAM_COUNTS("SB2.BuildReadOperations", 1265 static_cast<int>(io_after.ReadOperationCount - 1266 io_before.ReadOperationCount)); 1267 UMA_HISTOGRAM_COUNTS("SB2.BuildWriteOperations", 1268 static_cast<int>(io_after.WriteOperationCount - 1269 io_before.WriteOperationCount)); 1270 } 1271 1272 const base::FilePath browse_filename = BrowseDBFilename(filename_base_); 1273 const int64 file_size = GetFileSizeOrZero(browse_filename); 1274 UMA_HISTOGRAM_COUNTS("SB2.BrowseDatabaseKilobytes", 1275 static_cast<int>(file_size / 1024)); 1276 1277 #if defined(OS_MACOSX) 1278 base::mac::SetFileBackupExclusion(browse_filename); 1279 #endif 1280 } 1281 1282 void SafeBrowsingDatabaseNew::UpdateSideEffectFreeWhitelistStore() { 1283 safe_browsing::PrefixSetBuilder builder; 1284 std::vector<SBAddFullHash> add_full_hashes_result; 1285 1286 if (!side_effect_free_whitelist_store_->FinishUpdate( 1287 &builder, &add_full_hashes_result)) { 1288 RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_FINISH); 1289 return; 1290 } 1291 scoped_ptr<safe_browsing::PrefixSet> 1292 prefix_set(builder.GetPrefixSetNoHashes()); 1293 1294 // Swap in the newly built prefix set. 1295 { 1296 base::AutoLock locked(lookup_lock_); 1297 side_effect_free_whitelist_prefix_set_.swap(prefix_set); 1298 } 1299 1300 const base::FilePath side_effect_free_whitelist_filename = 1301 SideEffectFreeWhitelistDBFilename(filename_base_); 1302 const base::FilePath side_effect_free_whitelist_prefix_set_filename = 1303 PrefixSetForFilename(side_effect_free_whitelist_filename); 1304 const base::TimeTicks before = base::TimeTicks::Now(); 1305 const bool write_ok = side_effect_free_whitelist_prefix_set_->WriteFile( 1306 side_effect_free_whitelist_prefix_set_filename); 1307 UMA_HISTOGRAM_TIMES("SB2.SideEffectFreePrefixSetWrite", 1308 base::TimeTicks::Now() - before); 1309 1310 if (!write_ok) 1311 RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_WRITE); 1312 1313 // Gather statistics. 1314 int64 file_size = GetFileSizeOrZero( 1315 side_effect_free_whitelist_prefix_set_filename); 1316 UMA_HISTOGRAM_COUNTS("SB2.SideEffectFreeWhitelistPrefixSetKilobytes", 1317 static_cast<int>(file_size / 1024)); 1318 file_size = GetFileSizeOrZero(side_effect_free_whitelist_filename); 1319 UMA_HISTOGRAM_COUNTS("SB2.SideEffectFreeWhitelistDatabaseKilobytes", 1320 static_cast<int>(file_size / 1024)); 1321 1322 #if defined(OS_MACOSX) 1323 base::mac::SetFileBackupExclusion(side_effect_free_whitelist_filename); 1324 base::mac::SetFileBackupExclusion( 1325 side_effect_free_whitelist_prefix_set_filename); 1326 #endif 1327 } 1328 1329 void SafeBrowsingDatabaseNew::UpdateIpBlacklistStore() { 1330 // Note: prefixes will not be empty. The current data store implementation 1331 // stores all full-length hashes as both full and prefix hashes. 1332 safe_browsing::PrefixSetBuilder builder; 1333 std::vector<SBAddFullHash> full_hashes; 1334 if (!ip_blacklist_store_->FinishUpdate(&builder, &full_hashes)) { 1335 RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_FINISH); 1336 LoadIpBlacklist(std::vector<SBAddFullHash>()); // Clear the list. 1337 return; 1338 } 1339 1340 #if defined(OS_MACOSX) 1341 base::mac::SetFileBackupExclusion(IpBlacklistDBFilename(filename_base_)); 1342 #endif 1343 1344 LoadIpBlacklist(full_hashes); 1345 } 1346 1347 void SafeBrowsingDatabaseNew::HandleCorruptDatabase() { 1348 // Reset the database after the current task has unwound (but only 1349 // reset once within the scope of a given task). 1350 if (!reset_factory_.HasWeakPtrs()) { 1351 RecordFailure(FAILURE_DATABASE_CORRUPT); 1352 base::MessageLoop::current()->PostTask(FROM_HERE, 1353 base::Bind(&SafeBrowsingDatabaseNew::OnHandleCorruptDatabase, 1354 reset_factory_.GetWeakPtr())); 1355 } 1356 } 1357 1358 void SafeBrowsingDatabaseNew::OnHandleCorruptDatabase() { 1359 RecordFailure(FAILURE_DATABASE_CORRUPT_HANDLER); 1360 corruption_detected_ = true; // Stop updating the database. 1361 ResetDatabase(); 1362 1363 // NOTE(shess): ResetDatabase() should remove the corruption, so this should 1364 // only happen once. If you are here because you are hitting this after a 1365 // restart, then I would be very interested in working with you to figure out 1366 // what is happening, since it may affect real users. 1367 DLOG(FATAL) << "SafeBrowsing database was corrupt and reset"; 1368 } 1369 1370 // TODO(shess): I'm not clear why this code doesn't have any 1371 // real error-handling. 1372 void SafeBrowsingDatabaseNew::LoadPrefixSet() { 1373 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 1374 DCHECK(!filename_base_.empty()); 1375 1376 const base::FilePath browse_filename = BrowseDBFilename(filename_base_); 1377 const base::FilePath browse_prefix_set_filename = 1378 PrefixSetForFilename(browse_filename); 1379 1380 // Only use the prefix set if database is present and non-empty. 1381 if (!GetFileSizeOrZero(browse_filename)) 1382 return; 1383 1384 // Cleanup any stale bloom filter (no longer used). 1385 // TODO(shess): Track existence to drive removal of this code? 1386 const base::FilePath bloom_filter_filename = 1387 BloomFilterForFilename(browse_filename); 1388 base::DeleteFile(bloom_filter_filename, false); 1389 1390 const base::TimeTicks before = base::TimeTicks::Now(); 1391 browse_prefix_set_ = safe_browsing::PrefixSet::LoadFile( 1392 browse_prefix_set_filename); 1393 UMA_HISTOGRAM_TIMES("SB2.PrefixSetLoad", base::TimeTicks::Now() - before); 1394 1395 if (!browse_prefix_set_.get()) 1396 RecordFailure(FAILURE_BROWSE_PREFIX_SET_READ); 1397 } 1398 1399 bool SafeBrowsingDatabaseNew::Delete() { 1400 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 1401 DCHECK(!filename_base_.empty()); 1402 1403 // TODO(shess): This is a mess. SafeBrowsingFileStore::Delete() closes the 1404 // store before calling DeleteStore(). DeleteStore() deletes transient files 1405 // in addition to the main file. Probably all of these should be converted to 1406 // a helper which calls Delete() if the store exists, else DeleteStore() on 1407 // the generated filename. 1408 1409 // TODO(shess): Determine if the histograms are useful in any way. I cannot 1410 // recall any action taken as a result of their values, in which case it might 1411 // make more sense to histogram an overall thumbs-up/-down and just dig deeper 1412 // if something looks wrong. 1413 1414 const bool r1 = browse_store_->Delete(); 1415 if (!r1) 1416 RecordFailure(FAILURE_DATABASE_STORE_DELETE); 1417 1418 const bool r2 = download_store_.get() ? download_store_->Delete() : true; 1419 if (!r2) 1420 RecordFailure(FAILURE_DATABASE_STORE_DELETE); 1421 1422 const bool r3 = csd_whitelist_store_.get() ? 1423 csd_whitelist_store_->Delete() : true; 1424 if (!r3) 1425 RecordFailure(FAILURE_DATABASE_STORE_DELETE); 1426 1427 const bool r4 = download_whitelist_store_.get() ? 1428 download_whitelist_store_->Delete() : true; 1429 if (!r4) 1430 RecordFailure(FAILURE_DATABASE_STORE_DELETE); 1431 1432 const base::FilePath browse_filename = BrowseDBFilename(filename_base_); 1433 const base::FilePath bloom_filter_filename = 1434 BloomFilterForFilename(browse_filename); 1435 const bool r5 = base::DeleteFile(bloom_filter_filename, false); 1436 if (!r5) 1437 RecordFailure(FAILURE_DATABASE_FILTER_DELETE); 1438 1439 const base::FilePath browse_prefix_set_filename = 1440 PrefixSetForFilename(browse_filename); 1441 const bool r6 = base::DeleteFile(browse_prefix_set_filename, false); 1442 if (!r6) 1443 RecordFailure(FAILURE_BROWSE_PREFIX_SET_DELETE); 1444 1445 const base::FilePath extension_blacklist_filename = 1446 ExtensionBlacklistDBFilename(filename_base_); 1447 const bool r7 = base::DeleteFile(extension_blacklist_filename, false); 1448 if (!r7) 1449 RecordFailure(FAILURE_EXTENSION_BLACKLIST_DELETE); 1450 1451 const base::FilePath side_effect_free_whitelist_filename = 1452 SideEffectFreeWhitelistDBFilename(filename_base_); 1453 const bool r8 = base::DeleteFile(side_effect_free_whitelist_filename, 1454 false); 1455 if (!r8) 1456 RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_DELETE); 1457 1458 const base::FilePath side_effect_free_whitelist_prefix_set_filename = 1459 PrefixSetForFilename(side_effect_free_whitelist_filename); 1460 const bool r9 = base::DeleteFile( 1461 side_effect_free_whitelist_prefix_set_filename, 1462 false); 1463 if (!r9) 1464 RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_DELETE); 1465 1466 const bool r10 = base::DeleteFile(IpBlacklistDBFilename(filename_base_), 1467 false); 1468 if (!r10) 1469 RecordFailure(FAILURE_IP_BLACKLIST_DELETE); 1470 1471 return r1 && r2 && r3 && r4 && r5 && r6 && r7 && r8 && r9 && r10; 1472 } 1473 1474 void SafeBrowsingDatabaseNew::WritePrefixSet() { 1475 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 1476 1477 if (!browse_prefix_set_.get()) 1478 return; 1479 1480 const base::FilePath browse_filename = BrowseDBFilename(filename_base_); 1481 const base::FilePath browse_prefix_set_filename = 1482 PrefixSetForFilename(browse_filename); 1483 1484 const base::TimeTicks before = base::TimeTicks::Now(); 1485 const bool write_ok = browse_prefix_set_->WriteFile( 1486 browse_prefix_set_filename); 1487 UMA_HISTOGRAM_TIMES("SB2.PrefixSetWrite", base::TimeTicks::Now() - before); 1488 1489 const int64 file_size = GetFileSizeOrZero(browse_prefix_set_filename); 1490 UMA_HISTOGRAM_COUNTS("SB2.PrefixSetKilobytes", 1491 static_cast<int>(file_size / 1024)); 1492 1493 if (!write_ok) 1494 RecordFailure(FAILURE_BROWSE_PREFIX_SET_WRITE); 1495 1496 #if defined(OS_MACOSX) 1497 base::mac::SetFileBackupExclusion(browse_prefix_set_filename); 1498 #endif 1499 } 1500 1501 void SafeBrowsingDatabaseNew::WhitelistEverything(SBWhitelist* whitelist) { 1502 base::AutoLock locked(lookup_lock_); 1503 whitelist->second = true; 1504 whitelist->first.clear(); 1505 } 1506 1507 void SafeBrowsingDatabaseNew::LoadWhitelist( 1508 const std::vector<SBAddFullHash>& full_hashes, 1509 SBWhitelist* whitelist) { 1510 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 1511 if (full_hashes.size() > kMaxWhitelistSize) { 1512 WhitelistEverything(whitelist); 1513 return; 1514 } 1515 1516 std::vector<SBFullHash> new_whitelist; 1517 new_whitelist.reserve(full_hashes.size()); 1518 for (std::vector<SBAddFullHash>::const_iterator it = full_hashes.begin(); 1519 it != full_hashes.end(); ++it) { 1520 new_whitelist.push_back(it->full_hash); 1521 } 1522 std::sort(new_whitelist.begin(), new_whitelist.end(), SBFullHashLess); 1523 1524 SBFullHash kill_switch = SBFullHashForString(kWhitelistKillSwitchUrl); 1525 if (std::binary_search(new_whitelist.begin(), new_whitelist.end(), 1526 kill_switch, SBFullHashLess)) { 1527 // The kill switch is whitelisted hence we whitelist all URLs. 1528 WhitelistEverything(whitelist); 1529 } else { 1530 base::AutoLock locked(lookup_lock_); 1531 whitelist->second = false; 1532 whitelist->first.swap(new_whitelist); 1533 } 1534 } 1535 1536 void SafeBrowsingDatabaseNew::LoadIpBlacklist( 1537 const std::vector<SBAddFullHash>& full_hashes) { 1538 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 1539 IPBlacklist new_blacklist; 1540 for (std::vector<SBAddFullHash>::const_iterator it = full_hashes.begin(); 1541 it != full_hashes.end(); 1542 ++it) { 1543 const char* full_hash = it->full_hash.full_hash; 1544 DCHECK_EQ(crypto::kSHA256Length, arraysize(it->full_hash.full_hash)); 1545 // The format of the IP blacklist is: 1546 // SHA-1(IPv6 prefix) + uint8(prefix size) + 11 unused bytes. 1547 std::string hashed_ip_prefix(full_hash, base::kSHA1Length); 1548 size_t prefix_size = static_cast<uint8>(full_hash[base::kSHA1Length]); 1549 if (prefix_size > kMaxIpPrefixSize || prefix_size < kMinIpPrefixSize) { 1550 RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_INVALID); 1551 new_blacklist.clear(); // Load empty blacklist. 1552 break; 1553 } 1554 1555 // We precompute the mask for the given subnet size to speed up lookups. 1556 // Basically we need to create a 16B long string which has the highest 1557 // |size| bits sets to one. 1558 std::string mask(net::kIPv6AddressSize, '\0'); 1559 mask.replace(0, prefix_size / 8, prefix_size / 8, '\xFF'); 1560 if ((prefix_size % 8) != 0) { 1561 mask[prefix_size / 8] = 0xFF << (8 - (prefix_size % 8)); 1562 } 1563 DVLOG(2) << "Inserting malicious IP: " 1564 << " raw:" << base::HexEncode(full_hash, crypto::kSHA256Length) 1565 << " mask:" << base::HexEncode(mask.data(), mask.size()) 1566 << " prefix_size:" << prefix_size 1567 << " hashed_ip:" << base::HexEncode(hashed_ip_prefix.data(), 1568 hashed_ip_prefix.size()); 1569 new_blacklist[mask].insert(hashed_ip_prefix); 1570 } 1571 1572 base::AutoLock locked(lookup_lock_); 1573 ip_blacklist_.swap(new_blacklist); 1574 } 1575 1576 bool SafeBrowsingDatabaseNew::IsMalwareIPMatchKillSwitchOn() { 1577 SBFullHash malware_kill_switch = SBFullHashForString(kMalwareIPKillSwitchUrl); 1578 std::vector<SBFullHash> full_hashes; 1579 full_hashes.push_back(malware_kill_switch); 1580 return ContainsWhitelistedHashes(csd_whitelist_, full_hashes); 1581 } 1582 1583 bool SafeBrowsingDatabaseNew::IsCsdWhitelistKillSwitchOn() { 1584 return csd_whitelist_.second; 1585 } 1586