1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "chrome/browser/safe_browsing/safe_browsing_database.h" 6 7 #include <algorithm> 8 #include <iterator> 9 10 #include "base/file_util.h" 11 #include "base/metrics/histogram.h" 12 #include "base/metrics/stats_counters.h" 13 #include "base/time.h" 14 #include "base/message_loop.h" 15 #include "base/process_util.h" 16 #include "crypto/sha2.h" 17 #include "chrome/browser/safe_browsing/bloom_filter.h" 18 #include "chrome/browser/safe_browsing/prefix_set.h" 19 #include "chrome/browser/safe_browsing/safe_browsing_store_file.h" 20 #include "content/browser/browser_thread.h" 21 #include "googleurl/src/gurl.h" 22 23 namespace { 24 25 // Filename suffix for the bloom filter. 26 const FilePath::CharType kBloomFilterFile[] = FILE_PATH_LITERAL(" Filter 2"); 27 // Filename suffix for download store. 28 const FilePath::CharType kDownloadDBFile[] = FILE_PATH_LITERAL(" Download"); 29 // Filename suffix for client-side phishing detection whitelist store. 30 const FilePath::CharType kCsdWhitelistDBFile[] = 31 FILE_PATH_LITERAL(" Csd Whitelist"); 32 // Filename suffix for browse store. 33 // TODO(lzheng): change to a better name when we change the file format. 34 const FilePath::CharType kBrowseDBFile[] = FILE_PATH_LITERAL(" Bloom"); 35 36 // The maximum staleness for a cached entry. 37 const int kMaxStalenessMinutes = 45; 38 39 // Maximum number of entries we allow in the client-side phishing detection 40 // whitelist. If the whitelist on disk contains more entries then 41 // ContainsCsdWhitelistedUrl will always return true. 42 const size_t kMaxCsdWhitelistSize = 5000; 43 44 // If the hash of this exact expression is on the csd whitelist then 45 // ContainsCsdWhitelistedUrl will always return true. 46 const char kCsdKillSwitchUrl[] = 47 "sb-ssl.google.com/safebrowsing/csd/killswitch"; 48 49 // To save space, the incoming |chunk_id| and |list_id| are combined 50 // into an |encoded_chunk_id| for storage by shifting the |list_id| 51 // into the low-order bits. These functions decode that information. 52 // TODO(lzheng): It was reasonable when database is saved in sqlite, but 53 // there should be better ways to save chunk_id and list_id after we use 54 // SafeBrowsingStoreFile. 55 int GetListIdBit(const int encoded_chunk_id) { 56 return encoded_chunk_id & 1; 57 } 58 int DecodeChunkId(int encoded_chunk_id) { 59 return encoded_chunk_id >> 1; 60 } 61 int EncodeChunkId(const int chunk, const int list_id) { 62 DCHECK_NE(list_id, safe_browsing_util::INVALID); 63 return chunk << 1 | list_id % 2; 64 } 65 66 // Generate the set of full hashes to check for |url|. If 67 // |include_whitelist_hashes| is true we will generate additional path-prefixes 68 // to match against the csd whitelist. E.g., if the path-prefix /foo is on the 69 // whitelist it should also match /foo/bar which is not the case for all the 70 // other lists. 71 // TODO(shess): This function is almost the same as 72 // |CompareFullHashes()| in safe_browsing_util.cc, except that code 73 // does an early exit on match. Since match should be the infrequent 74 // case (phishing or malware found), consider combining this function 75 // with that one. 76 void BrowseFullHashesToCheck(const GURL& url, 77 bool include_whitelist_hashes, 78 std::vector<SBFullHash>* full_hashes) { 79 std::vector<std::string> hosts; 80 if (url.HostIsIPAddress()) { 81 hosts.push_back(url.host()); 82 } else { 83 safe_browsing_util::GenerateHostsToCheck(url, &hosts); 84 } 85 86 std::vector<std::string> paths; 87 safe_browsing_util::GeneratePathsToCheck(url, &paths); 88 89 for (size_t i = 0; i < hosts.size(); ++i) { 90 for (size_t j = 0; j < paths.size(); ++j) { 91 const std::string& path = paths[j]; 92 SBFullHash full_hash; 93 crypto::SHA256HashString(hosts[i] + path, &full_hash, 94 sizeof(full_hash)); 95 full_hashes->push_back(full_hash); 96 97 // We may have /foo as path-prefix in the whitelist which should 98 // also match with /foo/bar and /foo?bar. Hence, for every path 99 // that ends in '/' we also add the path without the slash. 100 if (include_whitelist_hashes && 101 path.size() > 1 && 102 path[path.size() - 1] == '/') { 103 crypto::SHA256HashString(hosts[i] + path.substr(0, path.size() - 1), 104 &full_hash, sizeof(full_hash)); 105 full_hashes->push_back(full_hash); 106 } 107 } 108 } 109 } 110 111 // Get the prefixes matching the download |urls|. 112 void GetDownloadUrlPrefixes(const std::vector<GURL>& urls, 113 std::vector<SBPrefix>* prefixes) { 114 std::vector<SBFullHash> full_hashes; 115 for (size_t i = 0; i < urls.size(); ++i) 116 BrowseFullHashesToCheck(urls[i], false, &full_hashes); 117 118 for (size_t i = 0; i < full_hashes.size(); ++i) 119 prefixes->push_back(full_hashes[i].prefix); 120 } 121 122 // Find the entries in |full_hashes| with prefix in |prefix_hits|, and 123 // add them to |full_hits| if not expired. "Not expired" is when 124 // either |last_update| was recent enough, or the item has been 125 // received recently enough. Expired items are not deleted because a 126 // future update may make them acceptable again. 127 // 128 // For efficiency reasons the code walks |prefix_hits| and 129 // |full_hashes| in parallel, so they must be sorted by prefix. 130 void GetCachedFullHashesForBrowse(const std::vector<SBPrefix>& prefix_hits, 131 const std::vector<SBAddFullHash>& full_hashes, 132 std::vector<SBFullHashResult>* full_hits, 133 base::Time last_update) { 134 const base::Time expire_time = 135 base::Time::Now() - base::TimeDelta::FromMinutes(kMaxStalenessMinutes); 136 137 std::vector<SBPrefix>::const_iterator piter = prefix_hits.begin(); 138 std::vector<SBAddFullHash>::const_iterator hiter = full_hashes.begin(); 139 140 while (piter != prefix_hits.end() && hiter != full_hashes.end()) { 141 if (*piter < hiter->full_hash.prefix) { 142 ++piter; 143 } else if (hiter->full_hash.prefix < *piter) { 144 ++hiter; 145 } else { 146 if (expire_time < last_update || 147 expire_time.ToTimeT() < hiter->received) { 148 SBFullHashResult result; 149 const int list_bit = GetListIdBit(hiter->chunk_id); 150 DCHECK(list_bit == safe_browsing_util::MALWARE || 151 list_bit == safe_browsing_util::PHISH); 152 if (!safe_browsing_util::GetListName(list_bit, &result.list_name)) 153 continue; 154 result.add_chunk_id = DecodeChunkId(hiter->chunk_id); 155 result.hash = hiter->full_hash; 156 full_hits->push_back(result); 157 } 158 159 // Only increment |hiter|, |piter| might have multiple hits. 160 ++hiter; 161 } 162 } 163 } 164 165 // This function generates a chunk range string for |chunks|. It 166 // outputs one chunk range string per list and writes it to the 167 // |list_ranges| vector. We expect |list_ranges| to already be of the 168 // right size. E.g., if |chunks| contains chunks with two different 169 // list ids then |list_ranges| must contain two elements. 170 void GetChunkRanges(const std::vector<int>& chunks, 171 std::vector<std::string>* list_ranges) { 172 DCHECK_GT(list_ranges->size(), 0U); 173 DCHECK_LE(list_ranges->size(), 2U); 174 std::vector<std::vector<int> > decoded_chunks(list_ranges->size()); 175 for (std::vector<int>::const_iterator iter = chunks.begin(); 176 iter != chunks.end(); ++iter) { 177 int mod_list_id = GetListIdBit(*iter); 178 DCHECK_GE(mod_list_id, 0); 179 DCHECK_LT(static_cast<size_t>(mod_list_id), decoded_chunks.size()); 180 decoded_chunks[mod_list_id].push_back(DecodeChunkId(*iter)); 181 } 182 for (size_t i = 0; i < decoded_chunks.size(); ++i) { 183 ChunksToRangeString(decoded_chunks[i], &((*list_ranges)[i])); 184 } 185 } 186 187 // Helper function to create chunk range lists for Browse related 188 // lists. 189 void UpdateChunkRanges(SafeBrowsingStore* store, 190 const std::vector<std::string>& listnames, 191 std::vector<SBListChunkRanges>* lists) { 192 DCHECK_GT(listnames.size(), 0U); 193 DCHECK_LE(listnames.size(), 2U); 194 std::vector<int> add_chunks; 195 std::vector<int> sub_chunks; 196 store->GetAddChunks(&add_chunks); 197 store->GetSubChunks(&sub_chunks); 198 199 std::vector<std::string> adds(listnames.size()); 200 std::vector<std::string> subs(listnames.size()); 201 GetChunkRanges(add_chunks, &adds); 202 GetChunkRanges(sub_chunks, &subs); 203 204 for (size_t i = 0; i < listnames.size(); ++i) { 205 const std::string& listname = listnames[i]; 206 DCHECK_EQ(safe_browsing_util::GetListId(listname) % 2, 207 static_cast<int>(i % 2)); 208 DCHECK_NE(safe_browsing_util::GetListId(listname), 209 safe_browsing_util::INVALID); 210 lists->push_back(SBListChunkRanges(listname)); 211 lists->back().adds.swap(adds[i]); 212 lists->back().subs.swap(subs[i]); 213 } 214 } 215 216 // Order |SBAddFullHash| on the prefix part. |SBAddPrefixLess()| from 217 // safe_browsing_store.h orders on both chunk-id and prefix. 218 bool SBAddFullHashPrefixLess(const SBAddFullHash& a, const SBAddFullHash& b) { 219 return a.full_hash.prefix < b.full_hash.prefix; 220 } 221 222 // As compared to the bloom filter, PrefixSet should have these 223 // properties: 224 // - Any bloom filter miss should be a prefix set miss. 225 // - Any prefix set hit should be a bloom filter hit. 226 // - Bloom filter false positives are prefix set misses. 227 // The following is to log actual performance to verify this. 228 enum PrefixSetEvent { 229 PREFIX_SET_EVENT_HIT, 230 PREFIX_SET_EVENT_BLOOM_HIT, 231 PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT, 232 PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT_INVALID, 233 PREFIX_SET_GETPREFIXES_BROKEN, 234 PREFIX_SET_GETPREFIXES_BROKEN_SIZE, 235 PREFIX_SET_GETPREFIXES_FIRST_BROKEN, 236 PREFIX_SET_SBPREFIX_WAS_BROKEN, 237 PREFIX_SET_GETPREFIXES_BROKEN_SORTING, 238 PREFIX_SET_GETPREFIXES_BROKEN_DUPLICATION, 239 PREFIX_SET_GETPREFIX_UNSORTED_IS_DELTA, 240 PREFIX_SET_GETPREFIX_UNSORTED_IS_INDEX, 241 PREFIX_SET_GETPREFIX_CHECKSUM_MISMATCH, 242 243 // Memory space for histograms is determined by the max. ALWAYS ADD 244 // NEW VALUES BEFORE THIS ONE. 245 PREFIX_SET_EVENT_MAX 246 }; 247 248 void RecordPrefixSetInfo(PrefixSetEvent event_type) { 249 UMA_HISTOGRAM_ENUMERATION("SB2.PrefixSetEvent", event_type, 250 PREFIX_SET_EVENT_MAX); 251 } 252 253 // Generate a |PrefixSet| instance from the contents of 254 // |add_prefixes|. Additionally performs various checks to make sure 255 // that the resulting prefix set is valid, so that the 256 // PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT_INVALID histogram in 257 // ContainsBrowseUrl() can be trustworthy. 258 safe_browsing::PrefixSet* PrefixSetFromAddPrefixes( 259 const std::vector<SBAddPrefix>& add_prefixes) { 260 // TODO(shess): If |add_prefixes| were sorted by the prefix, it 261 // could be passed directly to |PrefixSet()|, removing the need for 262 // |prefixes|. For now, |prefixes| is useful while debugging 263 // things. 264 std::vector<SBPrefix> prefixes; 265 for (size_t i = 0; i < add_prefixes.size(); ++i) { 266 prefixes.push_back(add_prefixes[i].prefix); 267 } 268 269 std::sort(prefixes.begin(), prefixes.end()); 270 prefixes.erase(std::unique(prefixes.begin(), prefixes.end()), 271 prefixes.end()); 272 273 scoped_ptr<safe_browsing::PrefixSet> 274 prefix_set(new safe_browsing::PrefixSet(prefixes)); 275 276 std::vector<SBPrefix> restored; 277 prefix_set->GetPrefixes(&restored); 278 279 // Expect them to be equal. 280 if (restored.size() == prefixes.size() && 281 std::equal(prefixes.begin(), prefixes.end(), restored.begin())) 282 return prefix_set.release(); 283 284 // Log BROKEN for continuity with previous release, and SIZE to 285 // distinguish which test failed. 286 NOTREACHED(); 287 RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_BROKEN); 288 if (restored.size() != prefixes.size()) 289 RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_BROKEN_SIZE); 290 291 // Try to distinguish between updates from one broken user and a 292 // distributed problem. 293 static bool logged_broken = false; 294 if (!logged_broken) { 295 RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_FIRST_BROKEN); 296 logged_broken = true; 297 } 298 299 // This seems so very very unlikely. But if it ever were true, then 300 // it could explain why GetPrefixes() seemed broken. 301 if (sizeof(int) != sizeof(int32)) 302 RecordPrefixSetInfo(PREFIX_SET_SBPREFIX_WAS_BROKEN); 303 304 // Check if memory was corrupted during construction. 305 if (!prefix_set->CheckChecksum()) 306 RecordPrefixSetInfo(PREFIX_SET_GETPREFIX_CHECKSUM_MISMATCH); 307 308 // Check whether |restored| is unsorted, or has duplication. 309 if (restored.size()) { 310 size_t unsorted_count = 0; 311 bool duplicates = false; 312 SBPrefix prev = restored[0]; 313 for (size_t i = 0; i < restored.size(); prev = restored[i], ++i) { 314 if (prev > restored[i]) { 315 unsorted_count++; 316 UMA_HISTOGRAM_COUNTS("SB2.PrefixSetUnsortedDifference", 317 prev - restored[i]); 318 319 // When unsorted, how big is the set, and how far are we into 320 // it. If the set is very small or large, that might inform 321 // pursuit of a degenerate case. If the percentage is close 322 // to 0%, 100%, or 50%, then there might be an interesting 323 // degenerate case to explore. 324 UMA_HISTOGRAM_COUNTS("SB2.PrefixSetUnsortedSize", restored.size()); 325 UMA_HISTOGRAM_PERCENTAGE("SB2.PrefixSetUnsortedPercent", 326 i * 100 / restored.size()); 327 328 if (prefix_set->IsDeltaAt(i)) { 329 RecordPrefixSetInfo(PREFIX_SET_GETPREFIX_UNSORTED_IS_DELTA); 330 331 // Histograms require memory on the order of the number of 332 // buckets, making high-precision logging expensive. For 333 // now aim for a sense of the range of the problem. 334 UMA_HISTOGRAM_CUSTOM_COUNTS("SB2.PrefixSetUnsortedDelta", 335 prefix_set->DeltaAt(i), 1, 0xFFFF, 50); 336 } else { 337 RecordPrefixSetInfo(PREFIX_SET_GETPREFIX_UNSORTED_IS_INDEX); 338 } 339 } 340 if (prev == restored[i]) 341 duplicates = true; 342 } 343 344 // Record findings. 345 if (unsorted_count) { 346 RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_BROKEN_SORTING); 347 UMA_HISTOGRAM_COUNTS_100("SB2.PrefixSetUnsorted", unsorted_count); 348 } 349 if (duplicates) 350 RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_BROKEN_DUPLICATION); 351 352 // Fix the problems noted. If |restored| was unsorted, then 353 // |duplicates| may give a false negative. 354 if (unsorted_count) 355 std::sort(restored.begin(), restored.end()); 356 if (unsorted_count || duplicates) 357 restored.erase(std::unique(restored.begin(), restored.end()), 358 restored.end()); 359 } 360 361 // NOTE(shess): The following could be done using a single 362 // uber-loop, but it's complicated by needing multiple parallel 363 // iterators. Didn't seem worthwhile for something that will only 364 // live for a short period and only fires for one in a million 365 // updates. 366 367 // Find elements in |restored| which are not in |prefixes|. 368 std::vector<SBPrefix> difference; 369 std::set_difference(restored.begin(), restored.end(), 370 prefixes.begin(), prefixes.end(), 371 std::back_inserter(difference)); 372 if (difference.size()) 373 UMA_HISTOGRAM_COUNTS_100("SB2.PrefixSetRestoredExcess", difference.size()); 374 375 // Find elements in |prefixes| which are not in |restored|. 376 difference.clear(); 377 std::set_difference(prefixes.begin(), prefixes.end(), 378 restored.begin(), restored.end(), 379 std::back_inserter(difference)); 380 if (difference.size()) 381 UMA_HISTOGRAM_COUNTS_100("SB2.PrefixSetRestoredShortfall", 382 difference.size()); 383 384 return prefix_set.release(); 385 } 386 387 } // namespace 388 389 // The default SafeBrowsingDatabaseFactory. 390 class SafeBrowsingDatabaseFactoryImpl : public SafeBrowsingDatabaseFactory { 391 public: 392 virtual SafeBrowsingDatabase* CreateSafeBrowsingDatabase( 393 bool enable_download_protection, 394 bool enable_client_side_whitelist) { 395 return new SafeBrowsingDatabaseNew( 396 new SafeBrowsingStoreFile, 397 enable_download_protection ? new SafeBrowsingStoreFile : NULL, 398 enable_client_side_whitelist ? new SafeBrowsingStoreFile : NULL); 399 } 400 401 SafeBrowsingDatabaseFactoryImpl() { } 402 403 private: 404 DISALLOW_COPY_AND_ASSIGN(SafeBrowsingDatabaseFactoryImpl); 405 }; 406 407 // static 408 SafeBrowsingDatabaseFactory* SafeBrowsingDatabase::factory_ = NULL; 409 410 // Factory method, non-thread safe. Caller has to make sure this s called 411 // on SafeBrowsing Thread. 412 // TODO(shess): There's no need for a factory any longer. Convert 413 // SafeBrowsingDatabaseNew to SafeBrowsingDatabase, and have Create() 414 // callers just construct things directly. 415 SafeBrowsingDatabase* SafeBrowsingDatabase::Create( 416 bool enable_download_protection, 417 bool enable_client_side_whitelist) { 418 if (!factory_) 419 factory_ = new SafeBrowsingDatabaseFactoryImpl(); 420 return factory_->CreateSafeBrowsingDatabase(enable_download_protection, 421 enable_client_side_whitelist); 422 } 423 424 SafeBrowsingDatabase::~SafeBrowsingDatabase() { 425 } 426 427 // static 428 FilePath SafeBrowsingDatabase::BrowseDBFilename( 429 const FilePath& db_base_filename) { 430 return FilePath(db_base_filename.value() + kBrowseDBFile); 431 } 432 433 // static 434 FilePath SafeBrowsingDatabase::DownloadDBFilename( 435 const FilePath& db_base_filename) { 436 return FilePath(db_base_filename.value() + kDownloadDBFile); 437 } 438 439 // static 440 FilePath SafeBrowsingDatabase::BloomFilterForFilename( 441 const FilePath& db_filename) { 442 return FilePath(db_filename.value() + kBloomFilterFile); 443 } 444 445 // static 446 FilePath SafeBrowsingDatabase::CsdWhitelistDBFilename( 447 const FilePath& db_filename) { 448 return FilePath(db_filename.value() + kCsdWhitelistDBFile); 449 } 450 451 SafeBrowsingStore* SafeBrowsingDatabaseNew::GetStore(const int list_id) { 452 DVLOG(3) << "Get store for list: " << list_id; 453 if (list_id == safe_browsing_util::PHISH || 454 list_id == safe_browsing_util::MALWARE) { 455 return browse_store_.get(); 456 } else if (list_id == safe_browsing_util::BINURL || 457 list_id == safe_browsing_util::BINHASH) { 458 return download_store_.get(); 459 } else if (list_id == safe_browsing_util::CSDWHITELIST) { 460 return csd_whitelist_store_.get(); 461 } 462 return NULL; 463 } 464 465 // static 466 void SafeBrowsingDatabase::RecordFailure(FailureType failure_type) { 467 UMA_HISTOGRAM_ENUMERATION("SB2.DatabaseFailure", failure_type, 468 FAILURE_DATABASE_MAX); 469 } 470 471 SafeBrowsingDatabaseNew::SafeBrowsingDatabaseNew() 472 : creation_loop_(MessageLoop::current()), 473 browse_store_(new SafeBrowsingStoreFile), 474 download_store_(NULL), 475 csd_whitelist_store_(NULL), 476 ALLOW_THIS_IN_INITIALIZER_LIST(reset_factory_(this)) { 477 DCHECK(browse_store_.get()); 478 DCHECK(!download_store_.get()); 479 DCHECK(!csd_whitelist_store_.get()); 480 } 481 482 SafeBrowsingDatabaseNew::SafeBrowsingDatabaseNew( 483 SafeBrowsingStore* browse_store, 484 SafeBrowsingStore* download_store, 485 SafeBrowsingStore* csd_whitelist_store) 486 : creation_loop_(MessageLoop::current()), 487 browse_store_(browse_store), 488 download_store_(download_store), 489 csd_whitelist_store_(csd_whitelist_store), 490 ALLOW_THIS_IN_INITIALIZER_LIST(reset_factory_(this)), 491 corruption_detected_(false) { 492 DCHECK(browse_store_.get()); 493 } 494 495 SafeBrowsingDatabaseNew::~SafeBrowsingDatabaseNew() { 496 DCHECK_EQ(creation_loop_, MessageLoop::current()); 497 } 498 499 void SafeBrowsingDatabaseNew::Init(const FilePath& filename_base) { 500 DCHECK_EQ(creation_loop_, MessageLoop::current()); 501 // Ensure we haven't been run before. 502 DCHECK(browse_filename_.empty()); 503 DCHECK(download_filename_.empty()); 504 DCHECK(csd_whitelist_filename_.empty()); 505 506 browse_filename_ = BrowseDBFilename(filename_base); 507 bloom_filter_filename_ = BloomFilterForFilename(browse_filename_); 508 509 browse_store_->Init( 510 browse_filename_, 511 NewCallback(this, &SafeBrowsingDatabaseNew::HandleCorruptDatabase)); 512 DVLOG(1) << "Init browse store: " << browse_filename_.value(); 513 514 { 515 // NOTE: There is no need to grab the lock in this function, since 516 // until it returns, there are no pointers to this class on other 517 // threads. Then again, that means there is no possibility of 518 // contention on the lock... 519 base::AutoLock locked(lookup_lock_); 520 full_browse_hashes_.clear(); 521 pending_browse_hashes_.clear(); 522 LoadBloomFilter(); 523 } 524 525 if (download_store_.get()) { 526 download_filename_ = DownloadDBFilename(filename_base); 527 download_store_->Init( 528 download_filename_, 529 NewCallback(this, &SafeBrowsingDatabaseNew::HandleCorruptDatabase)); 530 DVLOG(1) << "Init download store: " << download_filename_.value(); 531 } 532 533 if (csd_whitelist_store_.get()) { 534 csd_whitelist_filename_ = CsdWhitelistDBFilename(filename_base); 535 csd_whitelist_store_->Init( 536 csd_whitelist_filename_, 537 NewCallback(this, &SafeBrowsingDatabaseNew::HandleCorruptDatabase)); 538 DVLOG(1) << "Init csd whitelist store: " << csd_whitelist_filename_.value(); 539 std::vector<SBAddFullHash> full_hashes; 540 if (csd_whitelist_store_->GetAddFullHashes(&full_hashes)) { 541 LoadCsdWhitelist(full_hashes); 542 } else { 543 CsdWhitelistAllUrls(); 544 } 545 } else { 546 CsdWhitelistAllUrls(); // Just to be safe. 547 } 548 } 549 550 bool SafeBrowsingDatabaseNew::ResetDatabase() { 551 DCHECK_EQ(creation_loop_, MessageLoop::current()); 552 553 // Delete files on disk. 554 // TODO(shess): Hard to see where one might want to delete without a 555 // reset. Perhaps inline |Delete()|? 556 if (!Delete()) 557 return false; 558 559 // Reset objects in memory. 560 { 561 base::AutoLock locked(lookup_lock_); 562 full_browse_hashes_.clear(); 563 pending_browse_hashes_.clear(); 564 prefix_miss_cache_.clear(); 565 // TODO(shess): This could probably be |bloom_filter_.reset()|. 566 browse_bloom_filter_ = new BloomFilter(BloomFilter::kBloomFilterMinSize * 567 BloomFilter::kBloomFilterSizeRatio); 568 // TODO(shess): It is simpler for the code to assume that presence 569 // of a bloom filter always implies presence of a prefix set. 570 prefix_set_.reset(new safe_browsing::PrefixSet(std::vector<SBPrefix>())); 571 } 572 // Wants to acquire the lock itself. 573 CsdWhitelistAllUrls(); 574 575 return true; 576 } 577 578 // TODO(lzheng): Remove matching_list, it is not used anywhere. 579 bool SafeBrowsingDatabaseNew::ContainsBrowseUrl( 580 const GURL& url, 581 std::string* matching_list, 582 std::vector<SBPrefix>* prefix_hits, 583 std::vector<SBFullHashResult>* full_hits, 584 base::Time last_update) { 585 // Clear the results first. 586 matching_list->clear(); 587 prefix_hits->clear(); 588 full_hits->clear(); 589 590 std::vector<SBFullHash> full_hashes; 591 BrowseFullHashesToCheck(url, false, &full_hashes); 592 if (full_hashes.empty()) 593 return false; 594 595 // This function is called on the I/O thread, prevent changes to 596 // bloom filter and caches. 597 base::AutoLock locked(lookup_lock_); 598 599 if (!browse_bloom_filter_.get()) 600 return false; 601 DCHECK(prefix_set_.get()); 602 603 // Used to double-check in case of a hit mis-match. 604 std::vector<SBPrefix> restored; 605 606 size_t miss_count = 0; 607 for (size_t i = 0; i < full_hashes.size(); ++i) { 608 bool found = prefix_set_->Exists(full_hashes[i].prefix); 609 610 if (browse_bloom_filter_->Exists(full_hashes[i].prefix)) { 611 RecordPrefixSetInfo(PREFIX_SET_EVENT_BLOOM_HIT); 612 if (found) 613 RecordPrefixSetInfo(PREFIX_SET_EVENT_HIT); 614 prefix_hits->push_back(full_hashes[i].prefix); 615 if (prefix_miss_cache_.count(full_hashes[i].prefix) > 0) 616 ++miss_count; 617 } else { 618 // Bloom filter misses should never be in prefix set. Re-create 619 // the original prefixes and manually search for it, to check if 620 // there's a bug with how |Exists()| is implemented. 621 // |UpdateBrowseStore()| previously verified that 622 // |GetPrefixes()| returns the same prefixes as were passed to 623 // the constructor. 624 DCHECK(!found); 625 if (found) { 626 if (restored.empty()) 627 prefix_set_->GetPrefixes(&restored); 628 629 // If the item is not in the re-created list, then there is an 630 // error in |PrefixSet::Exists()|. If the item is in the 631 // re-created list, then the bloom filter was wrong. 632 if (std::binary_search(restored.begin(), restored.end(), 633 full_hashes[i].prefix)) { 634 RecordPrefixSetInfo(PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT); 635 } else { 636 RecordPrefixSetInfo(PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT_INVALID); 637 } 638 } 639 } 640 } 641 642 // If all the prefixes are cached as 'misses', don't issue a GetHash. 643 if (miss_count == prefix_hits->size()) 644 return false; 645 646 // Find the matching full-hash results. |full_browse_hashes_| are from the 647 // database, |pending_browse_hashes_| are from GetHash requests between 648 // updates. 649 std::sort(prefix_hits->begin(), prefix_hits->end()); 650 651 GetCachedFullHashesForBrowse(*prefix_hits, full_browse_hashes_, 652 full_hits, last_update); 653 GetCachedFullHashesForBrowse(*prefix_hits, pending_browse_hashes_, 654 full_hits, last_update); 655 return true; 656 } 657 658 bool SafeBrowsingDatabaseNew::MatchDownloadAddPrefixes( 659 int list_bit, 660 const std::vector<SBPrefix>& prefixes, 661 std::vector<SBPrefix>* prefix_hits) { 662 prefix_hits->clear(); 663 664 std::vector<SBAddPrefix> add_prefixes; 665 download_store_->GetAddPrefixes(&add_prefixes); 666 for (size_t i = 0; i < add_prefixes.size(); ++i) { 667 for (size_t j = 0; j < prefixes.size(); ++j) { 668 const SBPrefix& prefix = prefixes[j]; 669 if (prefix == add_prefixes[i].prefix && 670 GetListIdBit(add_prefixes[i].chunk_id) == list_bit) { 671 prefix_hits->push_back(prefix); 672 } 673 } 674 } 675 return !prefix_hits->empty(); 676 } 677 678 bool SafeBrowsingDatabaseNew::ContainsDownloadUrl( 679 const std::vector<GURL>& urls, 680 std::vector<SBPrefix>* prefix_hits) { 681 DCHECK_EQ(creation_loop_, MessageLoop::current()); 682 683 // Ignore this check when download checking is not enabled. 684 if (!download_store_.get()) 685 return false; 686 687 std::vector<SBPrefix> prefixes; 688 GetDownloadUrlPrefixes(urls, &prefixes); 689 return MatchDownloadAddPrefixes(safe_browsing_util::BINURL % 2, 690 prefixes, 691 prefix_hits); 692 } 693 694 bool SafeBrowsingDatabaseNew::ContainsDownloadHashPrefix( 695 const SBPrefix& prefix) { 696 DCHECK_EQ(creation_loop_, MessageLoop::current()); 697 698 // Ignore this check when download store is not available. 699 if (!download_store_.get()) 700 return false; 701 702 std::vector<SBPrefix> prefixes(1, prefix); 703 std::vector<SBPrefix> prefix_hits; 704 return MatchDownloadAddPrefixes(safe_browsing_util::BINHASH % 2, 705 prefixes, 706 &prefix_hits); 707 } 708 709 bool SafeBrowsingDatabaseNew::ContainsCsdWhitelistedUrl(const GURL& url) { 710 // This method is theoretically thread-safe but we expect all calls to 711 // originate from the IO thread. 712 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 713 base::AutoLock l(lookup_lock_); 714 if (csd_whitelist_all_urls_) 715 return true; 716 717 std::vector<SBFullHash> full_hashes; 718 BrowseFullHashesToCheck(url, true, &full_hashes); 719 for (std::vector<SBFullHash>::const_iterator it = full_hashes.begin(); 720 it != full_hashes.end(); ++it) { 721 if (std::binary_search(csd_whitelist_.begin(), csd_whitelist_.end(), *it)) 722 return true; 723 } 724 return false; 725 } 726 727 // Helper to insert entries for all of the prefixes or full hashes in 728 // |entry| into the store. 729 void SafeBrowsingDatabaseNew::InsertAdd(int chunk_id, SBPrefix host, 730 const SBEntry* entry, int list_id) { 731 DCHECK_EQ(creation_loop_, MessageLoop::current()); 732 733 SafeBrowsingStore* store = GetStore(list_id); 734 if (!store) return; 735 736 STATS_COUNTER("SB.HostInsert", 1); 737 const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id); 738 const int count = entry->prefix_count(); 739 740 DCHECK(!entry->IsSub()); 741 if (!count) { 742 // No prefixes, use host instead. 743 STATS_COUNTER("SB.PrefixAdd", 1); 744 store->WriteAddPrefix(encoded_chunk_id, host); 745 } else if (entry->IsPrefix()) { 746 // Prefixes only. 747 for (int i = 0; i < count; i++) { 748 const SBPrefix prefix = entry->PrefixAt(i); 749 STATS_COUNTER("SB.PrefixAdd", 1); 750 store->WriteAddPrefix(encoded_chunk_id, prefix); 751 } 752 } else { 753 // Prefixes and hashes. 754 const base::Time receive_time = base::Time::Now(); 755 for (int i = 0; i < count; ++i) { 756 const SBFullHash full_hash = entry->FullHashAt(i); 757 const SBPrefix prefix = full_hash.prefix; 758 759 STATS_COUNTER("SB.PrefixAdd", 1); 760 store->WriteAddPrefix(encoded_chunk_id, prefix); 761 762 STATS_COUNTER("SB.PrefixAddFull", 1); 763 store->WriteAddHash(encoded_chunk_id, receive_time, full_hash); 764 } 765 } 766 } 767 768 // Helper to iterate over all the entries in the hosts in |chunks| and 769 // add them to the store. 770 void SafeBrowsingDatabaseNew::InsertAddChunks(const int list_id, 771 const SBChunkList& chunks) { 772 DCHECK_EQ(creation_loop_, MessageLoop::current()); 773 774 SafeBrowsingStore* store = GetStore(list_id); 775 if (!store) return; 776 777 for (SBChunkList::const_iterator citer = chunks.begin(); 778 citer != chunks.end(); ++citer) { 779 const int chunk_id = citer->chunk_number; 780 781 // The server can give us a chunk that we already have because 782 // it's part of a range. Don't add it again. 783 const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id); 784 if (store->CheckAddChunk(encoded_chunk_id)) 785 continue; 786 787 store->SetAddChunk(encoded_chunk_id); 788 for (std::deque<SBChunkHost>::const_iterator hiter = citer->hosts.begin(); 789 hiter != citer->hosts.end(); ++hiter) { 790 // NOTE: Could pass |encoded_chunk_id|, but then inserting add 791 // chunks would look different from inserting sub chunks. 792 InsertAdd(chunk_id, hiter->host, hiter->entry, list_id); 793 } 794 } 795 } 796 797 // Helper to insert entries for all of the prefixes or full hashes in 798 // |entry| into the store. 799 void SafeBrowsingDatabaseNew::InsertSub(int chunk_id, SBPrefix host, 800 const SBEntry* entry, int list_id) { 801 DCHECK_EQ(creation_loop_, MessageLoop::current()); 802 803 SafeBrowsingStore* store = GetStore(list_id); 804 if (!store) return; 805 806 STATS_COUNTER("SB.HostDelete", 1); 807 const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id); 808 const int count = entry->prefix_count(); 809 810 DCHECK(entry->IsSub()); 811 if (!count) { 812 // No prefixes, use host instead. 813 STATS_COUNTER("SB.PrefixSub", 1); 814 const int add_chunk_id = EncodeChunkId(entry->chunk_id(), list_id); 815 store->WriteSubPrefix(encoded_chunk_id, add_chunk_id, host); 816 } else if (entry->IsPrefix()) { 817 // Prefixes only. 818 for (int i = 0; i < count; i++) { 819 const SBPrefix prefix = entry->PrefixAt(i); 820 const int add_chunk_id = 821 EncodeChunkId(entry->ChunkIdAtPrefix(i), list_id); 822 823 STATS_COUNTER("SB.PrefixSub", 1); 824 store->WriteSubPrefix(encoded_chunk_id, add_chunk_id, prefix); 825 } 826 } else { 827 // Prefixes and hashes. 828 for (int i = 0; i < count; ++i) { 829 const SBFullHash full_hash = entry->FullHashAt(i); 830 const int add_chunk_id = 831 EncodeChunkId(entry->ChunkIdAtPrefix(i), list_id); 832 833 STATS_COUNTER("SB.PrefixSub", 1); 834 store->WriteSubPrefix(encoded_chunk_id, add_chunk_id, full_hash.prefix); 835 836 STATS_COUNTER("SB.PrefixSubFull", 1); 837 store->WriteSubHash(encoded_chunk_id, add_chunk_id, full_hash); 838 } 839 } 840 } 841 842 // Helper to iterate over all the entries in the hosts in |chunks| and 843 // add them to the store. 844 void SafeBrowsingDatabaseNew::InsertSubChunks(int list_id, 845 const SBChunkList& chunks) { 846 DCHECK_EQ(creation_loop_, MessageLoop::current()); 847 848 SafeBrowsingStore* store = GetStore(list_id); 849 if (!store) return; 850 851 for (SBChunkList::const_iterator citer = chunks.begin(); 852 citer != chunks.end(); ++citer) { 853 const int chunk_id = citer->chunk_number; 854 855 // The server can give us a chunk that we already have because 856 // it's part of a range. Don't add it again. 857 const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id); 858 if (store->CheckSubChunk(encoded_chunk_id)) 859 continue; 860 861 store->SetSubChunk(encoded_chunk_id); 862 for (std::deque<SBChunkHost>::const_iterator hiter = citer->hosts.begin(); 863 hiter != citer->hosts.end(); ++hiter) { 864 InsertSub(chunk_id, hiter->host, hiter->entry, list_id); 865 } 866 } 867 } 868 869 void SafeBrowsingDatabaseNew::InsertChunks(const std::string& list_name, 870 const SBChunkList& chunks) { 871 DCHECK_EQ(creation_loop_, MessageLoop::current()); 872 873 if (corruption_detected_ || chunks.empty()) 874 return; 875 876 const base::Time insert_start = base::Time::Now(); 877 878 const int list_id = safe_browsing_util::GetListId(list_name); 879 DVLOG(2) << list_name << ": " << list_id; 880 881 SafeBrowsingStore* store = GetStore(list_id); 882 if (!store) return; 883 884 change_detected_ = true; 885 886 store->BeginChunk(); 887 if (chunks.front().is_add) { 888 InsertAddChunks(list_id, chunks); 889 } else { 890 InsertSubChunks(list_id, chunks); 891 } 892 store->FinishChunk(); 893 894 UMA_HISTOGRAM_TIMES("SB2.ChunkInsert", base::Time::Now() - insert_start); 895 } 896 897 void SafeBrowsingDatabaseNew::DeleteChunks( 898 const std::vector<SBChunkDelete>& chunk_deletes) { 899 DCHECK_EQ(creation_loop_, MessageLoop::current()); 900 901 if (corruption_detected_ || chunk_deletes.empty()) 902 return; 903 904 const std::string& list_name = chunk_deletes.front().list_name; 905 const int list_id = safe_browsing_util::GetListId(list_name); 906 907 SafeBrowsingStore* store = GetStore(list_id); 908 if (!store) return; 909 910 change_detected_ = true; 911 912 for (size_t i = 0; i < chunk_deletes.size(); ++i) { 913 std::vector<int> chunk_numbers; 914 RangesToChunks(chunk_deletes[i].chunk_del, &chunk_numbers); 915 for (size_t j = 0; j < chunk_numbers.size(); ++j) { 916 const int encoded_chunk_id = EncodeChunkId(chunk_numbers[j], list_id); 917 if (chunk_deletes[i].is_sub_del) 918 store->DeleteSubChunk(encoded_chunk_id); 919 else 920 store->DeleteAddChunk(encoded_chunk_id); 921 } 922 } 923 } 924 925 void SafeBrowsingDatabaseNew::CacheHashResults( 926 const std::vector<SBPrefix>& prefixes, 927 const std::vector<SBFullHashResult>& full_hits) { 928 // This is called on the I/O thread, lock against updates. 929 base::AutoLock locked(lookup_lock_); 930 931 if (full_hits.empty()) { 932 prefix_miss_cache_.insert(prefixes.begin(), prefixes.end()); 933 return; 934 } 935 936 // TODO(shess): SBFullHashResult and SBAddFullHash are very similar. 937 // Refactor to make them identical. 938 const base::Time now = base::Time::Now(); 939 const size_t orig_size = pending_browse_hashes_.size(); 940 for (std::vector<SBFullHashResult>::const_iterator iter = full_hits.begin(); 941 iter != full_hits.end(); ++iter) { 942 const int list_id = safe_browsing_util::GetListId(iter->list_name); 943 if (list_id == safe_browsing_util::MALWARE || 944 list_id == safe_browsing_util::PHISH) { 945 int encoded_chunk_id = EncodeChunkId(iter->add_chunk_id, list_id); 946 SBAddFullHash add_full_hash(encoded_chunk_id, now, iter->hash); 947 pending_browse_hashes_.push_back(add_full_hash); 948 } 949 } 950 951 // Sort new entries then merge with the previously-sorted entries. 952 std::vector<SBAddFullHash>::iterator 953 orig_end = pending_browse_hashes_.begin() + orig_size; 954 std::sort(orig_end, pending_browse_hashes_.end(), SBAddFullHashPrefixLess); 955 std::inplace_merge(pending_browse_hashes_.begin(), 956 orig_end, pending_browse_hashes_.end(), 957 SBAddFullHashPrefixLess); 958 } 959 960 bool SafeBrowsingDatabaseNew::UpdateStarted( 961 std::vector<SBListChunkRanges>* lists) { 962 DCHECK_EQ(creation_loop_, MessageLoop::current()); 963 DCHECK(lists); 964 965 // If |BeginUpdate()| fails, reset the database. 966 if (!browse_store_->BeginUpdate()) { 967 RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_BEGIN); 968 HandleCorruptDatabase(); 969 return false; 970 } 971 972 if (download_store_.get() && !download_store_->BeginUpdate()) { 973 RecordFailure(FAILURE_DOWNLOAD_DATABASE_UPDATE_BEGIN); 974 HandleCorruptDatabase(); 975 return false; 976 } 977 978 if (csd_whitelist_store_.get() && !csd_whitelist_store_->BeginUpdate()) { 979 RecordFailure(FAILURE_CSD_WHITELIST_DATABASE_UPDATE_BEGIN); 980 HandleCorruptDatabase(); 981 return false; 982 } 983 984 std::vector<std::string> browse_listnames; 985 browse_listnames.push_back(safe_browsing_util::kMalwareList); 986 browse_listnames.push_back(safe_browsing_util::kPhishingList); 987 UpdateChunkRanges(browse_store_.get(), browse_listnames, lists); 988 989 if (download_store_.get()) { 990 std::vector<std::string> download_listnames; 991 download_listnames.push_back(safe_browsing_util::kBinUrlList); 992 download_listnames.push_back(safe_browsing_util::kBinHashList); 993 UpdateChunkRanges(download_store_.get(), download_listnames, lists); 994 } 995 996 if (csd_whitelist_store_.get()) { 997 std::vector<std::string> csd_whitelist_listnames; 998 csd_whitelist_listnames.push_back(safe_browsing_util::kCsdWhiteList); 999 UpdateChunkRanges(csd_whitelist_store_.get(), 1000 csd_whitelist_listnames, lists); 1001 } 1002 1003 corruption_detected_ = false; 1004 change_detected_ = false; 1005 return true; 1006 } 1007 1008 void SafeBrowsingDatabaseNew::UpdateFinished(bool update_succeeded) { 1009 DCHECK_EQ(creation_loop_, MessageLoop::current()); 1010 if (corruption_detected_) 1011 return; 1012 1013 // Unroll the transaction if there was a protocol error or if the 1014 // transaction was empty. This will leave the bloom filter, the 1015 // pending hashes, and the prefix miss cache in place. 1016 if (!update_succeeded || !change_detected_) { 1017 // Track empty updates to answer questions at http://crbug.com/72216 . 1018 if (update_succeeded && !change_detected_) 1019 UMA_HISTOGRAM_COUNTS("SB2.DatabaseUpdateKilobytes", 0); 1020 browse_store_->CancelUpdate(); 1021 if (download_store_.get()) 1022 download_store_->CancelUpdate(); 1023 if (csd_whitelist_store_.get()) 1024 csd_whitelist_store_->CancelUpdate(); 1025 return; 1026 } 1027 1028 // for download 1029 UpdateDownloadStore(); 1030 // for browsing 1031 UpdateBrowseStore(); 1032 // for csd whitelist 1033 UpdateCsdWhitelistStore(); 1034 } 1035 1036 void SafeBrowsingDatabaseNew::UpdateCsdWhitelistStore() { 1037 if (!csd_whitelist_store_.get()) 1038 return; 1039 1040 // For the csd whitelist, we don't cache and save full hashes since all 1041 // hashes are already full. 1042 std::vector<SBAddFullHash> empty_add_hashes; 1043 1044 // Not needed for the csd whitelist. 1045 std::set<SBPrefix> empty_miss_cache; 1046 1047 // Note: prefixes will not be empty. The current data store implementation 1048 // stores all full-length hashes as both full and prefix hashes. 1049 std::vector<SBAddPrefix> prefixes; 1050 std::vector<SBAddFullHash> full_hashes; 1051 if (!csd_whitelist_store_->FinishUpdate(empty_add_hashes, 1052 empty_miss_cache, 1053 &prefixes, 1054 &full_hashes)) { 1055 RecordFailure(FAILURE_CSD_WHITELIST_DATABASE_UPDATE_FINISH); 1056 CsdWhitelistAllUrls(); 1057 return; 1058 } 1059 LoadCsdWhitelist(full_hashes); 1060 } 1061 1062 void SafeBrowsingDatabaseNew::UpdateDownloadStore() { 1063 if (!download_store_.get()) 1064 return; 1065 1066 // For download, we don't cache and save full hashes. 1067 std::vector<SBAddFullHash> empty_add_hashes; 1068 1069 // For download, backend lookup happens only if a prefix is in add list. 1070 // No need to pass in miss cache when call FinishUpdate to caculate 1071 // bloomfilter false positives. 1072 std::set<SBPrefix> empty_miss_cache; 1073 1074 // These results are not used after this call. Simply ignore the 1075 // returned value after FinishUpdate(...). 1076 std::vector<SBAddPrefix> add_prefixes_result; 1077 std::vector<SBAddFullHash> add_full_hashes_result; 1078 1079 if (!download_store_->FinishUpdate(empty_add_hashes, 1080 empty_miss_cache, 1081 &add_prefixes_result, 1082 &add_full_hashes_result)) 1083 RecordFailure(FAILURE_DOWNLOAD_DATABASE_UPDATE_FINISH); 1084 return; 1085 } 1086 1087 void SafeBrowsingDatabaseNew::UpdateBrowseStore() { 1088 // Copy out the pending add hashes. Copy rather than swapping in 1089 // case |ContainsBrowseURL()| is called before the new filter is complete. 1090 std::vector<SBAddFullHash> pending_add_hashes; 1091 { 1092 base::AutoLock locked(lookup_lock_); 1093 pending_add_hashes.insert(pending_add_hashes.end(), 1094 pending_browse_hashes_.begin(), 1095 pending_browse_hashes_.end()); 1096 } 1097 1098 // Measure the amount of IO during the bloom filter build. 1099 base::IoCounters io_before, io_after; 1100 base::ProcessHandle handle = base::Process::Current().handle(); 1101 scoped_ptr<base::ProcessMetrics> metric( 1102 #if !defined(OS_MACOSX) 1103 base::ProcessMetrics::CreateProcessMetrics(handle) 1104 #else 1105 // Getting stats only for the current process is enough, so NULL is fine. 1106 base::ProcessMetrics::CreateProcessMetrics(handle, NULL) 1107 #endif 1108 ); 1109 1110 // IoCounters are currently not supported on Mac, and may not be 1111 // available for Linux, so we check the result and only show IO 1112 // stats if they are available. 1113 const bool got_counters = metric->GetIOCounters(&io_before); 1114 1115 const base::Time before = base::Time::Now(); 1116 1117 std::vector<SBAddPrefix> add_prefixes; 1118 std::vector<SBAddFullHash> add_full_hashes; 1119 if (!browse_store_->FinishUpdate(pending_add_hashes, prefix_miss_cache_, 1120 &add_prefixes, &add_full_hashes)) { 1121 RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_FINISH); 1122 return; 1123 } 1124 1125 // Create and populate |filter| from |add_prefixes|. 1126 // TODO(shess): The bloom filter doesn't need to be a 1127 // scoped_refptr<> for this code. Refactor that away. 1128 const int filter_size = 1129 BloomFilter::FilterSizeForKeyCount(add_prefixes.size()); 1130 scoped_refptr<BloomFilter> filter(new BloomFilter(filter_size)); 1131 for (size_t i = 0; i < add_prefixes.size(); ++i) { 1132 filter->Insert(add_prefixes[i].prefix); 1133 } 1134 1135 scoped_ptr<safe_browsing::PrefixSet> 1136 prefix_set(PrefixSetFromAddPrefixes(add_prefixes)); 1137 1138 // This needs to be in sorted order by prefix for efficient access. 1139 std::sort(add_full_hashes.begin(), add_full_hashes.end(), 1140 SBAddFullHashPrefixLess); 1141 1142 // Swap in the newly built filter and cache. 1143 { 1144 base::AutoLock locked(lookup_lock_); 1145 full_browse_hashes_.swap(add_full_hashes); 1146 1147 // TODO(shess): If |CacheHashResults()| is posted between the 1148 // earlier lock and this clear, those pending hashes will be lost. 1149 // It could be fixed by only removing hashes which were collected 1150 // at the earlier point. I believe that is fail-safe as-is (the 1151 // hash will be fetched again). 1152 pending_browse_hashes_.clear(); 1153 prefix_miss_cache_.clear(); 1154 browse_bloom_filter_.swap(filter); 1155 prefix_set_.swap(prefix_set); 1156 } 1157 1158 const base::TimeDelta bloom_gen = base::Time::Now() - before; 1159 1160 // Persist the bloom filter to disk. Since only this thread changes 1161 // |browse_bloom_filter_|, there is no need to lock. 1162 WriteBloomFilter(); 1163 1164 // Gather statistics. 1165 if (got_counters && metric->GetIOCounters(&io_after)) { 1166 UMA_HISTOGRAM_COUNTS("SB2.BuildReadKilobytes", 1167 static_cast<int>(io_after.ReadTransferCount - 1168 io_before.ReadTransferCount) / 1024); 1169 UMA_HISTOGRAM_COUNTS("SB2.BuildWriteKilobytes", 1170 static_cast<int>(io_after.WriteTransferCount - 1171 io_before.WriteTransferCount) / 1024); 1172 UMA_HISTOGRAM_COUNTS("SB2.BuildReadOperations", 1173 static_cast<int>(io_after.ReadOperationCount - 1174 io_before.ReadOperationCount)); 1175 UMA_HISTOGRAM_COUNTS("SB2.BuildWriteOperations", 1176 static_cast<int>(io_after.WriteOperationCount - 1177 io_before.WriteOperationCount)); 1178 } 1179 DVLOG(1) << "SafeBrowsingDatabaseImpl built bloom filter in " 1180 << bloom_gen.InMilliseconds() << " ms total. prefix count: " 1181 << add_prefixes.size(); 1182 UMA_HISTOGRAM_LONG_TIMES("SB2.BuildFilter", bloom_gen); 1183 UMA_HISTOGRAM_COUNTS("SB2.FilterKilobytes", 1184 browse_bloom_filter_->size() / 1024); 1185 int64 size_64; 1186 if (file_util::GetFileSize(browse_filename_, &size_64)) 1187 UMA_HISTOGRAM_COUNTS("SB2.BrowseDatabaseKilobytes", 1188 static_cast<int>(size_64 / 1024)); 1189 if (file_util::GetFileSize(download_filename_, &size_64)) 1190 UMA_HISTOGRAM_COUNTS("SB2.DownloadDatabaseKilobytes", 1191 static_cast<int>(size_64 / 1024)); 1192 } 1193 1194 void SafeBrowsingDatabaseNew::HandleCorruptDatabase() { 1195 // Reset the database after the current task has unwound (but only 1196 // reset once within the scope of a given task). 1197 if (reset_factory_.empty()) { 1198 RecordFailure(FAILURE_DATABASE_CORRUPT); 1199 MessageLoop::current()->PostTask(FROM_HERE, 1200 reset_factory_.NewRunnableMethod( 1201 &SafeBrowsingDatabaseNew::OnHandleCorruptDatabase)); 1202 } 1203 } 1204 1205 void SafeBrowsingDatabaseNew::OnHandleCorruptDatabase() { 1206 RecordFailure(FAILURE_DATABASE_CORRUPT_HANDLER); 1207 corruption_detected_ = true; // Stop updating the database. 1208 ResetDatabase(); 1209 DCHECK(false) << "SafeBrowsing database was corrupt and reset"; 1210 } 1211 1212 // TODO(shess): I'm not clear why this code doesn't have any 1213 // real error-handling. 1214 void SafeBrowsingDatabaseNew::LoadBloomFilter() { 1215 DCHECK_EQ(creation_loop_, MessageLoop::current()); 1216 DCHECK(!bloom_filter_filename_.empty()); 1217 1218 // If we're missing either of the database or filter files, we wait until the 1219 // next update to generate a new filter. 1220 // TODO(paulg): Investigate how often the filter file is missing and how 1221 // expensive it would be to regenerate it. 1222 int64 size_64; 1223 if (!file_util::GetFileSize(browse_filename_, &size_64) || size_64 == 0) 1224 return; 1225 1226 if (!file_util::GetFileSize(bloom_filter_filename_, &size_64) || 1227 size_64 == 0) { 1228 RecordFailure(FAILURE_DATABASE_FILTER_MISSING); 1229 return; 1230 } 1231 1232 const base::TimeTicks before = base::TimeTicks::Now(); 1233 browse_bloom_filter_ = BloomFilter::LoadFile(bloom_filter_filename_); 1234 DVLOG(1) << "SafeBrowsingDatabaseNew read bloom filter in " 1235 << (base::TimeTicks::Now() - before).InMilliseconds() << " ms"; 1236 1237 if (!browse_bloom_filter_.get()) 1238 RecordFailure(FAILURE_DATABASE_FILTER_READ); 1239 1240 // Manually re-generate the prefix set from the main database. 1241 // TODO(shess): Write/read for prefix set. 1242 std::vector<SBAddPrefix> add_prefixes; 1243 browse_store_->GetAddPrefixes(&add_prefixes); 1244 prefix_set_.reset(PrefixSetFromAddPrefixes(add_prefixes)); 1245 } 1246 1247 bool SafeBrowsingDatabaseNew::Delete() { 1248 DCHECK_EQ(creation_loop_, MessageLoop::current()); 1249 1250 const bool r1 = browse_store_->Delete(); 1251 if (!r1) 1252 RecordFailure(FAILURE_DATABASE_STORE_DELETE); 1253 1254 const bool r2 = download_store_.get() ? download_store_->Delete() : true; 1255 if (!r2) 1256 RecordFailure(FAILURE_DATABASE_STORE_DELETE); 1257 1258 const bool r3 = csd_whitelist_store_.get() ? 1259 csd_whitelist_store_->Delete() : true; 1260 if (!r3) 1261 RecordFailure(FAILURE_DATABASE_STORE_DELETE); 1262 1263 const bool r4 = file_util::Delete(bloom_filter_filename_, false); 1264 if (!r4) 1265 RecordFailure(FAILURE_DATABASE_FILTER_DELETE); 1266 return r1 && r2 && r3 && r4; 1267 } 1268 1269 void SafeBrowsingDatabaseNew::WriteBloomFilter() { 1270 DCHECK_EQ(creation_loop_, MessageLoop::current()); 1271 1272 if (!browse_bloom_filter_.get()) 1273 return; 1274 1275 const base::TimeTicks before = base::TimeTicks::Now(); 1276 const bool write_ok = browse_bloom_filter_->WriteFile(bloom_filter_filename_); 1277 DVLOG(1) << "SafeBrowsingDatabaseNew wrote bloom filter in " 1278 << (base::TimeTicks::Now() - before).InMilliseconds() << " ms"; 1279 1280 if (!write_ok) 1281 RecordFailure(FAILURE_DATABASE_FILTER_WRITE); 1282 } 1283 1284 void SafeBrowsingDatabaseNew::CsdWhitelistAllUrls() { 1285 base::AutoLock locked(lookup_lock_); 1286 csd_whitelist_all_urls_ = true; 1287 csd_whitelist_.clear(); 1288 } 1289 1290 void SafeBrowsingDatabaseNew::LoadCsdWhitelist( 1291 const std::vector<SBAddFullHash>& full_hashes) { 1292 DCHECK_EQ(creation_loop_, MessageLoop::current()); 1293 if (full_hashes.size() > kMaxCsdWhitelistSize) { 1294 CsdWhitelistAllUrls(); 1295 return; 1296 } 1297 1298 std::vector<SBFullHash> new_csd_whitelist; 1299 for (std::vector<SBAddFullHash>::const_iterator it = full_hashes.begin(); 1300 it != full_hashes.end(); ++it) { 1301 new_csd_whitelist.push_back(it->full_hash); 1302 } 1303 std::sort(new_csd_whitelist.begin(), new_csd_whitelist.end()); 1304 1305 SBFullHash kill_switch; 1306 crypto::SHA256HashString(kCsdKillSwitchUrl, &kill_switch, 1307 sizeof(kill_switch)); 1308 if (std::binary_search(new_csd_whitelist.begin(), new_csd_whitelist.end(), 1309 kill_switch)) { 1310 // The kill switch is whitelisted hence we whitelist all URLs. 1311 CsdWhitelistAllUrls(); 1312 } else { 1313 base::AutoLock locked(lookup_lock_); 1314 csd_whitelist_all_urls_ = false; 1315 csd_whitelist_.swap(new_csd_whitelist); 1316 } 1317 } 1318