1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "chrome/browser/safe_browsing/safe_browsing_database.h" 6 7 #include <algorithm> 8 #include <iterator> 9 10 #include "base/bind.h" 11 #include "base/file_util.h" 12 #include "base/message_loop/message_loop.h" 13 #include "base/metrics/histogram.h" 14 #include "base/metrics/stats_counters.h" 15 #include "base/process/process.h" 16 #include "base/process/process_metrics.h" 17 #include "base/time/time.h" 18 #include "chrome/browser/safe_browsing/prefix_set.h" 19 #include "chrome/browser/safe_browsing/safe_browsing_store_file.h" 20 #include "content/public/browser/browser_thread.h" 21 #include "crypto/sha2.h" 22 #include "url/gurl.h" 23 24 #if defined(OS_MACOSX) 25 #include "base/mac/mac_util.h" 26 #endif 27 28 using content::BrowserThread; 29 30 namespace { 31 32 // Filename suffix for the bloom filter. 33 const base::FilePath::CharType kBloomFilterFile[] = 34 FILE_PATH_LITERAL(" Filter 2"); 35 // Filename suffix for the prefix set. 36 const base::FilePath::CharType kPrefixSetFile[] = 37 FILE_PATH_LITERAL(" Prefix Set"); 38 // Filename suffix for download store. 39 const base::FilePath::CharType kDownloadDBFile[] = 40 FILE_PATH_LITERAL(" Download"); 41 // Filename suffix for client-side phishing detection whitelist store. 42 const base::FilePath::CharType kCsdWhitelistDBFile[] = 43 FILE_PATH_LITERAL(" Csd Whitelist"); 44 // Filename suffix for the download whitelist store. 45 const base::FilePath::CharType kDownloadWhitelistDBFile[] = 46 FILE_PATH_LITERAL(" Download Whitelist"); 47 // Filename suffix for the extension blacklist store. 48 const base::FilePath::CharType kExtensionBlacklistDBFile[] = 49 FILE_PATH_LITERAL(" Extension Blacklist"); 50 // Filename suffix for the side-effect free whitelist store. 51 const base::FilePath::CharType kSideEffectFreeWhitelistDBFile[] = 52 FILE_PATH_LITERAL(" Side-Effect Free Whitelist"); 53 // Filename suffix for browse store. 54 // TODO(shess): "Safe Browsing Bloom Prefix Set" is full of win. 55 // Unfortunately, to change the name implies lots of transition code 56 // for little benefit. If/when file formats change (say to put all 57 // the data in one file), that would be a convenient point to rectify 58 // this. 59 const base::FilePath::CharType kBrowseDBFile[] = FILE_PATH_LITERAL(" Bloom"); 60 61 // The maximum staleness for a cached entry. 62 const int kMaxStalenessMinutes = 45; 63 64 // Maximum number of entries we allow in any of the whitelists. 65 // If a whitelist on disk contains more entries then all lookups to 66 // the whitelist will be considered a match. 67 const size_t kMaxWhitelistSize = 5000; 68 69 // If the hash of this exact expression is on a whitelist then all 70 // lookups to this whitelist will be considered a match. 71 const char kWhitelistKillSwitchUrl[] = 72 "sb-ssl.google.com/safebrowsing/csd/killswitch"; // Don't change this! 73 74 // If the hash of this exact expression is on a whitelist then the 75 // malware IP blacklisting feature will be disabled in csd. 76 // Don't change this! 77 const char kMalwareIPKillSwitchUrl[] = 78 "sb-ssl.google.com/safebrowsing/csd/killswitch_malware"; 79 80 // To save space, the incoming |chunk_id| and |list_id| are combined 81 // into an |encoded_chunk_id| for storage by shifting the |list_id| 82 // into the low-order bits. These functions decode that information. 83 // TODO(lzheng): It was reasonable when database is saved in sqlite, but 84 // there should be better ways to save chunk_id and list_id after we use 85 // SafeBrowsingStoreFile. 86 int GetListIdBit(const int encoded_chunk_id) { 87 return encoded_chunk_id & 1; 88 } 89 int DecodeChunkId(int encoded_chunk_id) { 90 return encoded_chunk_id >> 1; 91 } 92 int EncodeChunkId(const int chunk, const int list_id) { 93 DCHECK_NE(list_id, safe_browsing_util::INVALID); 94 return chunk << 1 | list_id % 2; 95 } 96 97 // Generate the set of full hashes to check for |url|. If 98 // |include_whitelist_hashes| is true we will generate additional path-prefixes 99 // to match against the csd whitelist. E.g., if the path-prefix /foo is on the 100 // whitelist it should also match /foo/bar which is not the case for all the 101 // other lists. We'll also always add a pattern for the empty path. 102 // TODO(shess): This function is almost the same as 103 // |CompareFullHashes()| in safe_browsing_util.cc, except that code 104 // does an early exit on match. Since match should be the infrequent 105 // case (phishing or malware found), consider combining this function 106 // with that one. 107 void BrowseFullHashesToCheck(const GURL& url, 108 bool include_whitelist_hashes, 109 std::vector<SBFullHash>* full_hashes) { 110 std::vector<std::string> hosts; 111 if (url.HostIsIPAddress()) { 112 hosts.push_back(url.host()); 113 } else { 114 safe_browsing_util::GenerateHostsToCheck(url, &hosts); 115 } 116 117 std::vector<std::string> paths; 118 safe_browsing_util::GeneratePathsToCheck(url, &paths); 119 120 for (size_t i = 0; i < hosts.size(); ++i) { 121 for (size_t j = 0; j < paths.size(); ++j) { 122 const std::string& path = paths[j]; 123 SBFullHash full_hash; 124 crypto::SHA256HashString(hosts[i] + path, &full_hash, 125 sizeof(full_hash)); 126 full_hashes->push_back(full_hash); 127 128 // We may have /foo as path-prefix in the whitelist which should 129 // also match with /foo/bar and /foo?bar. Hence, for every path 130 // that ends in '/' we also add the path without the slash. 131 if (include_whitelist_hashes && 132 path.size() > 1 && 133 path[path.size() - 1] == '/') { 134 crypto::SHA256HashString(hosts[i] + path.substr(0, path.size() - 1), 135 &full_hash, sizeof(full_hash)); 136 full_hashes->push_back(full_hash); 137 } 138 } 139 } 140 } 141 142 // Get the prefixes matching the download |urls|. 143 void GetDownloadUrlPrefixes(const std::vector<GURL>& urls, 144 std::vector<SBPrefix>* prefixes) { 145 std::vector<SBFullHash> full_hashes; 146 for (size_t i = 0; i < urls.size(); ++i) 147 BrowseFullHashesToCheck(urls[i], false, &full_hashes); 148 149 for (size_t i = 0; i < full_hashes.size(); ++i) 150 prefixes->push_back(full_hashes[i].prefix); 151 } 152 153 // Helper function to compare addprefixes in |store| with |prefixes|. 154 // The |list_bit| indicates which list (url or hash) to compare. 155 // 156 // Returns true if there is a match, |*prefix_hits| (if non-NULL) will contain 157 // the actual matching prefixes. 158 bool MatchAddPrefixes(SafeBrowsingStore* store, 159 int list_bit, 160 const std::vector<SBPrefix>& prefixes, 161 std::vector<SBPrefix>* prefix_hits) { 162 prefix_hits->clear(); 163 bool found_match = false; 164 165 SBAddPrefixes add_prefixes; 166 store->GetAddPrefixes(&add_prefixes); 167 for (SBAddPrefixes::const_iterator iter = add_prefixes.begin(); 168 iter != add_prefixes.end(); ++iter) { 169 for (size_t j = 0; j < prefixes.size(); ++j) { 170 const SBPrefix& prefix = prefixes[j]; 171 if (prefix == iter->prefix && 172 GetListIdBit(iter->chunk_id) == list_bit) { 173 prefix_hits->push_back(prefix); 174 found_match = true; 175 } 176 } 177 } 178 return found_match; 179 } 180 181 // Find the entries in |full_hashes| with prefix in |prefix_hits|, and 182 // add them to |full_hits| if not expired. "Not expired" is when 183 // either |last_update| was recent enough, or the item has been 184 // received recently enough. Expired items are not deleted because a 185 // future update may make them acceptable again. 186 // 187 // For efficiency reasons the code walks |prefix_hits| and 188 // |full_hashes| in parallel, so they must be sorted by prefix. 189 void GetCachedFullHashesForBrowse(const std::vector<SBPrefix>& prefix_hits, 190 const std::vector<SBAddFullHash>& full_hashes, 191 std::vector<SBFullHashResult>* full_hits, 192 base::Time last_update) { 193 const base::Time expire_time = 194 base::Time::Now() - base::TimeDelta::FromMinutes(kMaxStalenessMinutes); 195 196 std::vector<SBPrefix>::const_iterator piter = prefix_hits.begin(); 197 std::vector<SBAddFullHash>::const_iterator hiter = full_hashes.begin(); 198 199 while (piter != prefix_hits.end() && hiter != full_hashes.end()) { 200 if (*piter < hiter->full_hash.prefix) { 201 ++piter; 202 } else if (hiter->full_hash.prefix < *piter) { 203 ++hiter; 204 } else { 205 if (expire_time < last_update || 206 expire_time.ToTimeT() < hiter->received) { 207 SBFullHashResult result; 208 const int list_bit = GetListIdBit(hiter->chunk_id); 209 DCHECK(list_bit == safe_browsing_util::MALWARE || 210 list_bit == safe_browsing_util::PHISH); 211 const safe_browsing_util::ListType list_id = 212 static_cast<safe_browsing_util::ListType>(list_bit); 213 if (!safe_browsing_util::GetListName(list_id, &result.list_name)) 214 continue; 215 result.add_chunk_id = DecodeChunkId(hiter->chunk_id); 216 result.hash = hiter->full_hash; 217 full_hits->push_back(result); 218 } 219 220 // Only increment |hiter|, |piter| might have multiple hits. 221 ++hiter; 222 } 223 } 224 } 225 226 // This function generates a chunk range string for |chunks|. It 227 // outputs one chunk range string per list and writes it to the 228 // |list_ranges| vector. We expect |list_ranges| to already be of the 229 // right size. E.g., if |chunks| contains chunks with two different 230 // list ids then |list_ranges| must contain two elements. 231 void GetChunkRanges(const std::vector<int>& chunks, 232 std::vector<std::string>* list_ranges) { 233 DCHECK_GT(list_ranges->size(), 0U); 234 DCHECK_LE(list_ranges->size(), 2U); 235 std::vector<std::vector<int> > decoded_chunks(list_ranges->size()); 236 for (std::vector<int>::const_iterator iter = chunks.begin(); 237 iter != chunks.end(); ++iter) { 238 int mod_list_id = GetListIdBit(*iter); 239 DCHECK_GE(mod_list_id, 0); 240 DCHECK_LT(static_cast<size_t>(mod_list_id), decoded_chunks.size()); 241 decoded_chunks[mod_list_id].push_back(DecodeChunkId(*iter)); 242 } 243 for (size_t i = 0; i < decoded_chunks.size(); ++i) { 244 ChunksToRangeString(decoded_chunks[i], &((*list_ranges)[i])); 245 } 246 } 247 248 // Helper function to create chunk range lists for Browse related 249 // lists. 250 void UpdateChunkRanges(SafeBrowsingStore* store, 251 const std::vector<std::string>& listnames, 252 std::vector<SBListChunkRanges>* lists) { 253 DCHECK_GT(listnames.size(), 0U); 254 DCHECK_LE(listnames.size(), 2U); 255 std::vector<int> add_chunks; 256 std::vector<int> sub_chunks; 257 store->GetAddChunks(&add_chunks); 258 store->GetSubChunks(&sub_chunks); 259 260 std::vector<std::string> adds(listnames.size()); 261 std::vector<std::string> subs(listnames.size()); 262 GetChunkRanges(add_chunks, &adds); 263 GetChunkRanges(sub_chunks, &subs); 264 265 for (size_t i = 0; i < listnames.size(); ++i) { 266 const std::string& listname = listnames[i]; 267 DCHECK_EQ(safe_browsing_util::GetListId(listname) % 2, 268 static_cast<int>(i % 2)); 269 DCHECK_NE(safe_browsing_util::GetListId(listname), 270 safe_browsing_util::INVALID); 271 lists->push_back(SBListChunkRanges(listname)); 272 lists->back().adds.swap(adds[i]); 273 lists->back().subs.swap(subs[i]); 274 } 275 } 276 277 // Helper for deleting chunks left over from obsolete lists. 278 void DeleteChunksFromStore(SafeBrowsingStore* store, int listid){ 279 std::vector<int> add_chunks; 280 size_t adds_deleted = 0; 281 store->GetAddChunks(&add_chunks); 282 for (std::vector<int>::const_iterator iter = add_chunks.begin(); 283 iter != add_chunks.end(); ++iter) { 284 if (GetListIdBit(*iter) == GetListIdBit(listid)) { 285 adds_deleted++; 286 store->DeleteAddChunk(*iter); 287 } 288 } 289 if (adds_deleted > 0) 290 UMA_HISTOGRAM_COUNTS("SB2.DownloadBinhashAddsDeleted", adds_deleted); 291 292 std::vector<int> sub_chunks; 293 size_t subs_deleted = 0; 294 store->GetSubChunks(&sub_chunks); 295 for (std::vector<int>::const_iterator iter = sub_chunks.begin(); 296 iter != sub_chunks.end(); ++iter) { 297 if (GetListIdBit(*iter) == GetListIdBit(listid)) { 298 subs_deleted++; 299 store->DeleteSubChunk(*iter); 300 } 301 } 302 if (subs_deleted > 0) 303 UMA_HISTOGRAM_COUNTS("SB2.DownloadBinhashSubsDeleted", subs_deleted); 304 } 305 306 // Order |SBAddFullHash| on the prefix part. |SBAddPrefixLess()| from 307 // safe_browsing_store.h orders on both chunk-id and prefix. 308 bool SBAddFullHashPrefixLess(const SBAddFullHash& a, const SBAddFullHash& b) { 309 return a.full_hash.prefix < b.full_hash.prefix; 310 } 311 312 // This code always checks for non-zero file size. This helper makes 313 // that less verbose. 314 int64 GetFileSizeOrZero(const base::FilePath& file_path) { 315 int64 size_64; 316 if (!file_util::GetFileSize(file_path, &size_64)) 317 return 0; 318 return size_64; 319 } 320 321 } // namespace 322 323 // The default SafeBrowsingDatabaseFactory. 324 class SafeBrowsingDatabaseFactoryImpl : public SafeBrowsingDatabaseFactory { 325 public: 326 virtual SafeBrowsingDatabase* CreateSafeBrowsingDatabase( 327 bool enable_download_protection, 328 bool enable_client_side_whitelist, 329 bool enable_download_whitelist, 330 bool enable_extension_blacklist, 331 bool enable_side_effect_free_whitelist) OVERRIDE { 332 return new SafeBrowsingDatabaseNew( 333 new SafeBrowsingStoreFile, 334 enable_download_protection ? new SafeBrowsingStoreFile : NULL, 335 enable_client_side_whitelist ? new SafeBrowsingStoreFile : NULL, 336 enable_download_whitelist ? new SafeBrowsingStoreFile : NULL, 337 enable_extension_blacklist ? new SafeBrowsingStoreFile : NULL, 338 enable_side_effect_free_whitelist ? new SafeBrowsingStoreFile : NULL); 339 } 340 341 SafeBrowsingDatabaseFactoryImpl() { } 342 343 private: 344 DISALLOW_COPY_AND_ASSIGN(SafeBrowsingDatabaseFactoryImpl); 345 }; 346 347 // static 348 SafeBrowsingDatabaseFactory* SafeBrowsingDatabase::factory_ = NULL; 349 350 // Factory method, non-thread safe. Caller has to make sure this s called 351 // on SafeBrowsing Thread. 352 // TODO(shess): There's no need for a factory any longer. Convert 353 // SafeBrowsingDatabaseNew to SafeBrowsingDatabase, and have Create() 354 // callers just construct things directly. 355 SafeBrowsingDatabase* SafeBrowsingDatabase::Create( 356 bool enable_download_protection, 357 bool enable_client_side_whitelist, 358 bool enable_download_whitelist, 359 bool enable_extension_blacklist, 360 bool enable_side_effect_free_whitelist) { 361 if (!factory_) 362 factory_ = new SafeBrowsingDatabaseFactoryImpl(); 363 return factory_->CreateSafeBrowsingDatabase( 364 enable_download_protection, 365 enable_client_side_whitelist, 366 enable_download_whitelist, 367 enable_extension_blacklist, 368 enable_side_effect_free_whitelist); 369 } 370 371 SafeBrowsingDatabase::~SafeBrowsingDatabase() { 372 } 373 374 // static 375 base::FilePath SafeBrowsingDatabase::BrowseDBFilename( 376 const base::FilePath& db_base_filename) { 377 return base::FilePath(db_base_filename.value() + kBrowseDBFile); 378 } 379 380 // static 381 base::FilePath SafeBrowsingDatabase::DownloadDBFilename( 382 const base::FilePath& db_base_filename) { 383 return base::FilePath(db_base_filename.value() + kDownloadDBFile); 384 } 385 386 // static 387 base::FilePath SafeBrowsingDatabase::BloomFilterForFilename( 388 const base::FilePath& db_filename) { 389 return base::FilePath(db_filename.value() + kBloomFilterFile); 390 } 391 392 // static 393 base::FilePath SafeBrowsingDatabase::PrefixSetForFilename( 394 const base::FilePath& db_filename) { 395 return base::FilePath(db_filename.value() + kPrefixSetFile); 396 } 397 398 // static 399 base::FilePath SafeBrowsingDatabase::CsdWhitelistDBFilename( 400 const base::FilePath& db_filename) { 401 return base::FilePath(db_filename.value() + kCsdWhitelistDBFile); 402 } 403 404 // static 405 base::FilePath SafeBrowsingDatabase::DownloadWhitelistDBFilename( 406 const base::FilePath& db_filename) { 407 return base::FilePath(db_filename.value() + kDownloadWhitelistDBFile); 408 } 409 410 // static 411 base::FilePath SafeBrowsingDatabase::ExtensionBlacklistDBFilename( 412 const base::FilePath& db_filename) { 413 return base::FilePath(db_filename.value() + kExtensionBlacklistDBFile); 414 } 415 416 // static 417 base::FilePath SafeBrowsingDatabase::SideEffectFreeWhitelistDBFilename( 418 const base::FilePath& db_filename) { 419 return base::FilePath(db_filename.value() + kSideEffectFreeWhitelistDBFile); 420 } 421 422 SafeBrowsingStore* SafeBrowsingDatabaseNew::GetStore(const int list_id) { 423 if (list_id == safe_browsing_util::PHISH || 424 list_id == safe_browsing_util::MALWARE) { 425 return browse_store_.get(); 426 } else if (list_id == safe_browsing_util::BINURL || 427 list_id == safe_browsing_util::BINHASH) { 428 return download_store_.get(); 429 } else if (list_id == safe_browsing_util::CSDWHITELIST) { 430 return csd_whitelist_store_.get(); 431 } else if (list_id == safe_browsing_util::DOWNLOADWHITELIST) { 432 return download_whitelist_store_.get(); 433 } else if (list_id == safe_browsing_util::EXTENSIONBLACKLIST) { 434 return extension_blacklist_store_.get(); 435 } else if (list_id == safe_browsing_util::SIDEEFFECTFREEWHITELIST) { 436 return side_effect_free_whitelist_store_.get(); 437 } 438 return NULL; 439 } 440 441 // static 442 void SafeBrowsingDatabase::RecordFailure(FailureType failure_type) { 443 UMA_HISTOGRAM_ENUMERATION("SB2.DatabaseFailure", failure_type, 444 FAILURE_DATABASE_MAX); 445 } 446 447 SafeBrowsingDatabaseNew::SafeBrowsingDatabaseNew() 448 : creation_loop_(base::MessageLoop::current()), 449 browse_store_(new SafeBrowsingStoreFile), 450 reset_factory_(this), 451 corruption_detected_(false), 452 change_detected_(false) { 453 DCHECK(browse_store_.get()); 454 DCHECK(!download_store_.get()); 455 DCHECK(!csd_whitelist_store_.get()); 456 DCHECK(!download_whitelist_store_.get()); 457 DCHECK(!extension_blacklist_store_.get()); 458 DCHECK(!side_effect_free_whitelist_store_.get()); 459 } 460 461 SafeBrowsingDatabaseNew::SafeBrowsingDatabaseNew( 462 SafeBrowsingStore* browse_store, 463 SafeBrowsingStore* download_store, 464 SafeBrowsingStore* csd_whitelist_store, 465 SafeBrowsingStore* download_whitelist_store, 466 SafeBrowsingStore* extension_blacklist_store, 467 SafeBrowsingStore* side_effect_free_whitelist_store) 468 : creation_loop_(base::MessageLoop::current()), 469 browse_store_(browse_store), 470 download_store_(download_store), 471 csd_whitelist_store_(csd_whitelist_store), 472 download_whitelist_store_(download_whitelist_store), 473 extension_blacklist_store_(extension_blacklist_store), 474 side_effect_free_whitelist_store_(side_effect_free_whitelist_store), 475 reset_factory_(this), 476 corruption_detected_(false) { 477 DCHECK(browse_store_.get()); 478 } 479 480 SafeBrowsingDatabaseNew::~SafeBrowsingDatabaseNew() { 481 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 482 } 483 484 void SafeBrowsingDatabaseNew::Init(const base::FilePath& filename_base) { 485 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 486 // Ensure we haven't been run before. 487 DCHECK(browse_filename_.empty()); 488 DCHECK(download_filename_.empty()); 489 DCHECK(csd_whitelist_filename_.empty()); 490 DCHECK(download_whitelist_filename_.empty()); 491 DCHECK(extension_blacklist_filename_.empty()); 492 DCHECK(side_effect_free_whitelist_filename_.empty()); 493 494 browse_filename_ = BrowseDBFilename(filename_base); 495 browse_prefix_set_filename_ = PrefixSetForFilename(browse_filename_); 496 497 browse_store_->Init( 498 browse_filename_, 499 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase, 500 base::Unretained(this))); 501 DVLOG(1) << "Init browse store: " << browse_filename_.value(); 502 503 { 504 // NOTE: There is no need to grab the lock in this function, since 505 // until it returns, there are no pointers to this class on other 506 // threads. Then again, that means there is no possibility of 507 // contention on the lock... 508 base::AutoLock locked(lookup_lock_); 509 full_browse_hashes_.clear(); 510 pending_browse_hashes_.clear(); 511 LoadPrefixSet(); 512 } 513 514 if (download_store_.get()) { 515 download_filename_ = DownloadDBFilename(filename_base); 516 download_store_->Init( 517 download_filename_, 518 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase, 519 base::Unretained(this))); 520 DVLOG(1) << "Init download store: " << download_filename_.value(); 521 } 522 523 if (csd_whitelist_store_.get()) { 524 csd_whitelist_filename_ = CsdWhitelistDBFilename(filename_base); 525 csd_whitelist_store_->Init( 526 csd_whitelist_filename_, 527 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase, 528 base::Unretained(this))); 529 DVLOG(1) << "Init csd whitelist store: " << csd_whitelist_filename_.value(); 530 std::vector<SBAddFullHash> full_hashes; 531 if (csd_whitelist_store_->GetAddFullHashes(&full_hashes)) { 532 LoadWhitelist(full_hashes, &csd_whitelist_); 533 } else { 534 WhitelistEverything(&csd_whitelist_); 535 } 536 } else { 537 WhitelistEverything(&csd_whitelist_); // Just to be safe. 538 } 539 540 if (download_whitelist_store_.get()) { 541 download_whitelist_filename_ = DownloadWhitelistDBFilename(filename_base); 542 download_whitelist_store_->Init( 543 download_whitelist_filename_, 544 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase, 545 base::Unretained(this))); 546 DVLOG(1) << "Init download whitelist store: " 547 << download_whitelist_filename_.value(); 548 std::vector<SBAddFullHash> full_hashes; 549 if (download_whitelist_store_->GetAddFullHashes(&full_hashes)) { 550 LoadWhitelist(full_hashes, &download_whitelist_); 551 } else { 552 WhitelistEverything(&download_whitelist_); 553 } 554 } else { 555 WhitelistEverything(&download_whitelist_); // Just to be safe. 556 } 557 558 if (extension_blacklist_store_.get()) { 559 extension_blacklist_filename_ = ExtensionBlacklistDBFilename(filename_base); 560 extension_blacklist_store_->Init( 561 extension_blacklist_filename_, 562 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase, 563 base::Unretained(this))); 564 DVLOG(1) << "Init extension blacklist store: " 565 << extension_blacklist_filename_.value(); 566 } 567 568 if (side_effect_free_whitelist_store_.get()) { 569 side_effect_free_whitelist_filename_ = 570 SideEffectFreeWhitelistDBFilename(filename_base); 571 side_effect_free_whitelist_prefix_set_filename_ = 572 PrefixSetForFilename(side_effect_free_whitelist_filename_); 573 side_effect_free_whitelist_store_->Init( 574 side_effect_free_whitelist_filename_, 575 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase, 576 base::Unretained(this))); 577 DVLOG(1) << "Init side-effect free whitelist store: " 578 << side_effect_free_whitelist_filename_.value(); 579 580 // If there is no database, the filter cannot be used. 581 base::PlatformFileInfo db_info; 582 if (file_util::GetFileInfo(side_effect_free_whitelist_filename_, &db_info) 583 && db_info.size != 0) { 584 const base::TimeTicks before = base::TimeTicks::Now(); 585 side_effect_free_whitelist_prefix_set_.reset( 586 safe_browsing::PrefixSet::LoadFile( 587 side_effect_free_whitelist_prefix_set_filename_)); 588 DVLOG(1) << "SafeBrowsingDatabaseNew read side-effect free whitelist " 589 << "prefix set in " 590 << (base::TimeTicks::Now() - before).InMilliseconds() << " ms"; 591 UMA_HISTOGRAM_TIMES("SB2.SideEffectFreeWhitelistPrefixSetLoad", 592 base::TimeTicks::Now() - before); 593 if (!side_effect_free_whitelist_prefix_set_.get()) 594 RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_READ); 595 } 596 } else { 597 // Delete any files of the side-effect free sidelist that may be around 598 // from when it was previously enabled. 599 SafeBrowsingStoreFile::DeleteStore( 600 SideEffectFreeWhitelistDBFilename(filename_base)); 601 } 602 } 603 604 bool SafeBrowsingDatabaseNew::ResetDatabase() { 605 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 606 607 // Delete files on disk. 608 // TODO(shess): Hard to see where one might want to delete without a 609 // reset. Perhaps inline |Delete()|? 610 if (!Delete()) 611 return false; 612 613 // Reset objects in memory. 614 { 615 base::AutoLock locked(lookup_lock_); 616 full_browse_hashes_.clear(); 617 pending_browse_hashes_.clear(); 618 prefix_miss_cache_.clear(); 619 browse_prefix_set_.reset(); 620 side_effect_free_whitelist_prefix_set_.reset(); 621 } 622 // Wants to acquire the lock itself. 623 WhitelistEverything(&csd_whitelist_); 624 WhitelistEverything(&download_whitelist_); 625 626 return true; 627 } 628 629 // TODO(lzheng): Remove matching_list, it is not used anywhere. 630 bool SafeBrowsingDatabaseNew::ContainsBrowseUrl( 631 const GURL& url, 632 std::string* matching_list, 633 std::vector<SBPrefix>* prefix_hits, 634 std::vector<SBFullHashResult>* full_hits, 635 base::Time last_update) { 636 // Clear the results first. 637 matching_list->clear(); 638 prefix_hits->clear(); 639 full_hits->clear(); 640 641 std::vector<SBFullHash> full_hashes; 642 BrowseFullHashesToCheck(url, false, &full_hashes); 643 if (full_hashes.empty()) 644 return false; 645 646 // This function is called on the I/O thread, prevent changes to 647 // filter and caches. 648 base::AutoLock locked(lookup_lock_); 649 650 // |browse_prefix_set_| is empty until it is either read from disk, or the 651 // first update populates it. Bail out without a hit if not yet 652 // available. 653 if (!browse_prefix_set_.get()) 654 return false; 655 656 size_t miss_count = 0; 657 for (size_t i = 0; i < full_hashes.size(); ++i) { 658 const SBPrefix prefix = full_hashes[i].prefix; 659 if (browse_prefix_set_->Exists(prefix)) { 660 prefix_hits->push_back(prefix); 661 if (prefix_miss_cache_.count(prefix) > 0) 662 ++miss_count; 663 } 664 } 665 666 // If all the prefixes are cached as 'misses', don't issue a GetHash. 667 if (miss_count == prefix_hits->size()) 668 return false; 669 670 // Find the matching full-hash results. |full_browse_hashes_| are from the 671 // database, |pending_browse_hashes_| are from GetHash requests between 672 // updates. 673 std::sort(prefix_hits->begin(), prefix_hits->end()); 674 675 GetCachedFullHashesForBrowse(*prefix_hits, full_browse_hashes_, 676 full_hits, last_update); 677 GetCachedFullHashesForBrowse(*prefix_hits, pending_browse_hashes_, 678 full_hits, last_update); 679 return true; 680 } 681 682 bool SafeBrowsingDatabaseNew::ContainsDownloadUrl( 683 const std::vector<GURL>& urls, 684 std::vector<SBPrefix>* prefix_hits) { 685 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 686 687 // Ignore this check when download checking is not enabled. 688 if (!download_store_.get()) 689 return false; 690 691 std::vector<SBPrefix> prefixes; 692 GetDownloadUrlPrefixes(urls, &prefixes); 693 return MatchAddPrefixes(download_store_.get(), 694 safe_browsing_util::BINURL % 2, 695 prefixes, 696 prefix_hits); 697 } 698 699 bool SafeBrowsingDatabaseNew::ContainsDownloadHashPrefix( 700 const SBPrefix& prefix) { 701 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 702 703 // Ignore this check when download store is not available. 704 if (!download_store_.get()) 705 return false; 706 707 std::vector<SBPrefix> prefix_hits; 708 return MatchAddPrefixes(download_store_.get(), 709 safe_browsing_util::BINHASH % 2, 710 std::vector<SBPrefix>(1, prefix), 711 &prefix_hits); 712 } 713 714 bool SafeBrowsingDatabaseNew::ContainsCsdWhitelistedUrl(const GURL& url) { 715 // This method is theoretically thread-safe but we expect all calls to 716 // originate from the IO thread. 717 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 718 std::vector<SBFullHash> full_hashes; 719 BrowseFullHashesToCheck(url, true, &full_hashes); 720 return ContainsWhitelistedHashes(csd_whitelist_, full_hashes); 721 } 722 723 bool SafeBrowsingDatabaseNew::ContainsDownloadWhitelistedUrl(const GURL& url) { 724 std::vector<SBFullHash> full_hashes; 725 BrowseFullHashesToCheck(url, true, &full_hashes); 726 return ContainsWhitelistedHashes(download_whitelist_, full_hashes); 727 } 728 729 bool SafeBrowsingDatabaseNew::ContainsExtensionPrefixes( 730 const std::vector<SBPrefix>& prefixes, 731 std::vector<SBPrefix>* prefix_hits) { 732 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 733 if (!extension_blacklist_store_) 734 return false; 735 736 return MatchAddPrefixes(extension_blacklist_store_.get(), 737 safe_browsing_util::EXTENSIONBLACKLIST % 2, 738 prefixes, 739 prefix_hits); 740 } 741 742 bool SafeBrowsingDatabaseNew::ContainsSideEffectFreeWhitelistUrl( 743 const GURL& url) { 744 SBFullHash full_hash; 745 std::string host; 746 std::string path; 747 std::string query; 748 safe_browsing_util::CanonicalizeUrl(url, &host, &path, &query); 749 std::string url_to_check = host + path; 750 if (!query.empty()) 751 url_to_check += "?" + query; 752 crypto::SHA256HashString(url_to_check, &full_hash, sizeof(full_hash)); 753 754 // This function can be called on any thread, so lock against any changes 755 base::AutoLock locked(lookup_lock_); 756 757 // |side_effect_free_whitelist_prefix_set_| is empty until it is either read 758 // from disk, or the first update populates it. Bail out without a hit if 759 // not yet available. 760 if (!side_effect_free_whitelist_prefix_set_.get()) 761 return false; 762 763 return side_effect_free_whitelist_prefix_set_->Exists(full_hash.prefix); 764 } 765 766 bool SafeBrowsingDatabaseNew::ContainsDownloadWhitelistedString( 767 const std::string& str) { 768 SBFullHash hash; 769 crypto::SHA256HashString(str, &hash, sizeof(hash)); 770 std::vector<SBFullHash> hashes; 771 hashes.push_back(hash); 772 return ContainsWhitelistedHashes(download_whitelist_, hashes); 773 } 774 775 bool SafeBrowsingDatabaseNew::ContainsWhitelistedHashes( 776 const SBWhitelist& whitelist, 777 const std::vector<SBFullHash>& hashes) { 778 base::AutoLock l(lookup_lock_); 779 if (whitelist.second) 780 return true; 781 for (std::vector<SBFullHash>::const_iterator it = hashes.begin(); 782 it != hashes.end(); ++it) { 783 if (std::binary_search(whitelist.first.begin(), whitelist.first.end(), *it)) 784 return true; 785 } 786 return false; 787 } 788 789 // Helper to insert entries for all of the prefixes or full hashes in 790 // |entry| into the store. 791 void SafeBrowsingDatabaseNew::InsertAdd(int chunk_id, SBPrefix host, 792 const SBEntry* entry, int list_id) { 793 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 794 795 SafeBrowsingStore* store = GetStore(list_id); 796 if (!store) return; 797 798 STATS_COUNTER("SB.HostInsert", 1); 799 const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id); 800 const int count = entry->prefix_count(); 801 802 DCHECK(!entry->IsSub()); 803 if (!count) { 804 // No prefixes, use host instead. 805 STATS_COUNTER("SB.PrefixAdd", 1); 806 store->WriteAddPrefix(encoded_chunk_id, host); 807 } else if (entry->IsPrefix()) { 808 // Prefixes only. 809 for (int i = 0; i < count; i++) { 810 const SBPrefix prefix = entry->PrefixAt(i); 811 STATS_COUNTER("SB.PrefixAdd", 1); 812 store->WriteAddPrefix(encoded_chunk_id, prefix); 813 } 814 } else { 815 // Prefixes and hashes. 816 const base::Time receive_time = base::Time::Now(); 817 for (int i = 0; i < count; ++i) { 818 const SBFullHash full_hash = entry->FullHashAt(i); 819 const SBPrefix prefix = full_hash.prefix; 820 821 STATS_COUNTER("SB.PrefixAdd", 1); 822 store->WriteAddPrefix(encoded_chunk_id, prefix); 823 824 STATS_COUNTER("SB.PrefixAddFull", 1); 825 store->WriteAddHash(encoded_chunk_id, receive_time, full_hash); 826 } 827 } 828 } 829 830 // Helper to iterate over all the entries in the hosts in |chunks| and 831 // add them to the store. 832 void SafeBrowsingDatabaseNew::InsertAddChunks( 833 const safe_browsing_util::ListType list_id, 834 const SBChunkList& chunks) { 835 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 836 837 SafeBrowsingStore* store = GetStore(list_id); 838 if (!store) return; 839 840 for (SBChunkList::const_iterator citer = chunks.begin(); 841 citer != chunks.end(); ++citer) { 842 const int chunk_id = citer->chunk_number; 843 844 // The server can give us a chunk that we already have because 845 // it's part of a range. Don't add it again. 846 const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id); 847 if (store->CheckAddChunk(encoded_chunk_id)) 848 continue; 849 850 store->SetAddChunk(encoded_chunk_id); 851 for (std::deque<SBChunkHost>::const_iterator hiter = citer->hosts.begin(); 852 hiter != citer->hosts.end(); ++hiter) { 853 // NOTE: Could pass |encoded_chunk_id|, but then inserting add 854 // chunks would look different from inserting sub chunks. 855 InsertAdd(chunk_id, hiter->host, hiter->entry, list_id); 856 } 857 } 858 } 859 860 // Helper to insert entries for all of the prefixes or full hashes in 861 // |entry| into the store. 862 void SafeBrowsingDatabaseNew::InsertSub(int chunk_id, SBPrefix host, 863 const SBEntry* entry, int list_id) { 864 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 865 866 SafeBrowsingStore* store = GetStore(list_id); 867 if (!store) return; 868 869 STATS_COUNTER("SB.HostDelete", 1); 870 const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id); 871 const int count = entry->prefix_count(); 872 873 DCHECK(entry->IsSub()); 874 if (!count) { 875 // No prefixes, use host instead. 876 STATS_COUNTER("SB.PrefixSub", 1); 877 const int add_chunk_id = EncodeChunkId(entry->chunk_id(), list_id); 878 store->WriteSubPrefix(encoded_chunk_id, add_chunk_id, host); 879 } else if (entry->IsPrefix()) { 880 // Prefixes only. 881 for (int i = 0; i < count; i++) { 882 const SBPrefix prefix = entry->PrefixAt(i); 883 const int add_chunk_id = 884 EncodeChunkId(entry->ChunkIdAtPrefix(i), list_id); 885 886 STATS_COUNTER("SB.PrefixSub", 1); 887 store->WriteSubPrefix(encoded_chunk_id, add_chunk_id, prefix); 888 } 889 } else { 890 // Prefixes and hashes. 891 for (int i = 0; i < count; ++i) { 892 const SBFullHash full_hash = entry->FullHashAt(i); 893 const int add_chunk_id = 894 EncodeChunkId(entry->ChunkIdAtPrefix(i), list_id); 895 896 STATS_COUNTER("SB.PrefixSub", 1); 897 store->WriteSubPrefix(encoded_chunk_id, add_chunk_id, full_hash.prefix); 898 899 STATS_COUNTER("SB.PrefixSubFull", 1); 900 store->WriteSubHash(encoded_chunk_id, add_chunk_id, full_hash); 901 } 902 } 903 } 904 905 // Helper to iterate over all the entries in the hosts in |chunks| and 906 // add them to the store. 907 void SafeBrowsingDatabaseNew::InsertSubChunks( 908 safe_browsing_util::ListType list_id, 909 const SBChunkList& chunks) { 910 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 911 912 SafeBrowsingStore* store = GetStore(list_id); 913 if (!store) return; 914 915 for (SBChunkList::const_iterator citer = chunks.begin(); 916 citer != chunks.end(); ++citer) { 917 const int chunk_id = citer->chunk_number; 918 919 // The server can give us a chunk that we already have because 920 // it's part of a range. Don't add it again. 921 const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id); 922 if (store->CheckSubChunk(encoded_chunk_id)) 923 continue; 924 925 store->SetSubChunk(encoded_chunk_id); 926 for (std::deque<SBChunkHost>::const_iterator hiter = citer->hosts.begin(); 927 hiter != citer->hosts.end(); ++hiter) { 928 InsertSub(chunk_id, hiter->host, hiter->entry, list_id); 929 } 930 } 931 } 932 933 void SafeBrowsingDatabaseNew::InsertChunks(const std::string& list_name, 934 const SBChunkList& chunks) { 935 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 936 937 if (corruption_detected_ || chunks.empty()) 938 return; 939 940 const base::TimeTicks before = base::TimeTicks::Now(); 941 942 const safe_browsing_util::ListType list_id = 943 safe_browsing_util::GetListId(list_name); 944 DVLOG(2) << list_name << ": " << list_id; 945 946 SafeBrowsingStore* store = GetStore(list_id); 947 if (!store) return; 948 949 change_detected_ = true; 950 951 store->BeginChunk(); 952 if (chunks.front().is_add) { 953 InsertAddChunks(list_id, chunks); 954 } else { 955 InsertSubChunks(list_id, chunks); 956 } 957 store->FinishChunk(); 958 959 UMA_HISTOGRAM_TIMES("SB2.ChunkInsert", base::TimeTicks::Now() - before); 960 } 961 962 void SafeBrowsingDatabaseNew::DeleteChunks( 963 const std::vector<SBChunkDelete>& chunk_deletes) { 964 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 965 966 if (corruption_detected_ || chunk_deletes.empty()) 967 return; 968 969 const std::string& list_name = chunk_deletes.front().list_name; 970 const safe_browsing_util::ListType list_id = 971 safe_browsing_util::GetListId(list_name); 972 973 SafeBrowsingStore* store = GetStore(list_id); 974 if (!store) return; 975 976 change_detected_ = true; 977 978 for (size_t i = 0; i < chunk_deletes.size(); ++i) { 979 std::vector<int> chunk_numbers; 980 RangesToChunks(chunk_deletes[i].chunk_del, &chunk_numbers); 981 for (size_t j = 0; j < chunk_numbers.size(); ++j) { 982 const int encoded_chunk_id = EncodeChunkId(chunk_numbers[j], list_id); 983 if (chunk_deletes[i].is_sub_del) 984 store->DeleteSubChunk(encoded_chunk_id); 985 else 986 store->DeleteAddChunk(encoded_chunk_id); 987 } 988 } 989 } 990 991 void SafeBrowsingDatabaseNew::CacheHashResults( 992 const std::vector<SBPrefix>& prefixes, 993 const std::vector<SBFullHashResult>& full_hits) { 994 // This is called on the I/O thread, lock against updates. 995 base::AutoLock locked(lookup_lock_); 996 997 if (full_hits.empty()) { 998 prefix_miss_cache_.insert(prefixes.begin(), prefixes.end()); 999 return; 1000 } 1001 1002 // TODO(shess): SBFullHashResult and SBAddFullHash are very similar. 1003 // Refactor to make them identical. 1004 const base::Time now = base::Time::Now(); 1005 const size_t orig_size = pending_browse_hashes_.size(); 1006 for (std::vector<SBFullHashResult>::const_iterator iter = full_hits.begin(); 1007 iter != full_hits.end(); ++iter) { 1008 const int list_id = safe_browsing_util::GetListId(iter->list_name); 1009 if (list_id == safe_browsing_util::MALWARE || 1010 list_id == safe_browsing_util::PHISH) { 1011 int encoded_chunk_id = EncodeChunkId(iter->add_chunk_id, list_id); 1012 SBAddFullHash add_full_hash(encoded_chunk_id, now, iter->hash); 1013 pending_browse_hashes_.push_back(add_full_hash); 1014 } 1015 } 1016 1017 // Sort new entries then merge with the previously-sorted entries. 1018 std::vector<SBAddFullHash>::iterator 1019 orig_end = pending_browse_hashes_.begin() + orig_size; 1020 std::sort(orig_end, pending_browse_hashes_.end(), SBAddFullHashPrefixLess); 1021 std::inplace_merge(pending_browse_hashes_.begin(), 1022 orig_end, pending_browse_hashes_.end(), 1023 SBAddFullHashPrefixLess); 1024 } 1025 1026 bool SafeBrowsingDatabaseNew::UpdateStarted( 1027 std::vector<SBListChunkRanges>* lists) { 1028 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 1029 DCHECK(lists); 1030 1031 // If |BeginUpdate()| fails, reset the database. 1032 if (!browse_store_->BeginUpdate()) { 1033 RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_BEGIN); 1034 HandleCorruptDatabase(); 1035 return false; 1036 } 1037 1038 if (download_store_.get() && !download_store_->BeginUpdate()) { 1039 RecordFailure(FAILURE_DOWNLOAD_DATABASE_UPDATE_BEGIN); 1040 HandleCorruptDatabase(); 1041 return false; 1042 } 1043 1044 if (csd_whitelist_store_.get() && !csd_whitelist_store_->BeginUpdate()) { 1045 RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_BEGIN); 1046 HandleCorruptDatabase(); 1047 return false; 1048 } 1049 1050 if (download_whitelist_store_.get() && 1051 !download_whitelist_store_->BeginUpdate()) { 1052 RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_BEGIN); 1053 HandleCorruptDatabase(); 1054 return false; 1055 } 1056 1057 if (extension_blacklist_store_ && 1058 !extension_blacklist_store_->BeginUpdate()) { 1059 RecordFailure(FAILURE_EXTENSION_BLACKLIST_UPDATE_BEGIN); 1060 HandleCorruptDatabase(); 1061 return false; 1062 } 1063 1064 if (side_effect_free_whitelist_store_ && 1065 !side_effect_free_whitelist_store_->BeginUpdate()) { 1066 RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_BEGIN); 1067 HandleCorruptDatabase(); 1068 return false; 1069 } 1070 1071 std::vector<std::string> browse_listnames; 1072 browse_listnames.push_back(safe_browsing_util::kMalwareList); 1073 browse_listnames.push_back(safe_browsing_util::kPhishingList); 1074 UpdateChunkRanges(browse_store_.get(), browse_listnames, lists); 1075 1076 if (download_store_.get()) { 1077 // This store used to contain kBinHashList in addition to 1078 // kBinUrlList. Strip the stale data before generating the chunk 1079 // ranges to request. UpdateChunkRanges() will traverse the chunk 1080 // list, so this is very cheap if there are no kBinHashList chunks. 1081 const int listid = 1082 safe_browsing_util::GetListId(safe_browsing_util::kBinHashList); 1083 DeleteChunksFromStore(download_store_.get(), listid); 1084 1085 // The above marks the chunks for deletion, but they are not 1086 // actually deleted until the database is rewritten. The 1087 // following code removes the kBinHashList part of the request 1088 // before continuing so that UpdateChunkRanges() doesn't break. 1089 std::vector<std::string> download_listnames; 1090 download_listnames.push_back(safe_browsing_util::kBinUrlList); 1091 download_listnames.push_back(safe_browsing_util::kBinHashList); 1092 UpdateChunkRanges(download_store_.get(), download_listnames, lists); 1093 DCHECK_EQ(lists->back().name, 1094 std::string(safe_browsing_util::kBinHashList)); 1095 lists->pop_back(); 1096 1097 // TODO(shess): This problem could also be handled in 1098 // BeginUpdate() by detecting the chunks to delete and rewriting 1099 // the database before it's used. When I implemented that, it 1100 // felt brittle, it might be easier to just wait for some future 1101 // format change. 1102 } 1103 1104 if (csd_whitelist_store_.get()) { 1105 std::vector<std::string> csd_whitelist_listnames; 1106 csd_whitelist_listnames.push_back(safe_browsing_util::kCsdWhiteList); 1107 UpdateChunkRanges(csd_whitelist_store_.get(), 1108 csd_whitelist_listnames, lists); 1109 } 1110 1111 if (download_whitelist_store_.get()) { 1112 std::vector<std::string> download_whitelist_listnames; 1113 download_whitelist_listnames.push_back( 1114 safe_browsing_util::kDownloadWhiteList); 1115 UpdateChunkRanges(download_whitelist_store_.get(), 1116 download_whitelist_listnames, lists); 1117 } 1118 1119 if (extension_blacklist_store_) { 1120 UpdateChunkRanges( 1121 extension_blacklist_store_.get(), 1122 std::vector<std::string>(1, safe_browsing_util::kExtensionBlacklist), 1123 lists); 1124 } 1125 1126 if (side_effect_free_whitelist_store_) { 1127 UpdateChunkRanges( 1128 side_effect_free_whitelist_store_.get(), 1129 std::vector<std::string>( 1130 1, safe_browsing_util::kSideEffectFreeWhitelist), 1131 lists); 1132 } 1133 1134 corruption_detected_ = false; 1135 change_detected_ = false; 1136 return true; 1137 } 1138 1139 void SafeBrowsingDatabaseNew::UpdateFinished(bool update_succeeded) { 1140 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 1141 1142 // The update may have failed due to corrupt storage (for instance, 1143 // an excessive number of invalid add_chunks and sub_chunks). 1144 // Double-check that the databases are valid. 1145 // TODO(shess): Providing a checksum for the add_chunk and sub_chunk 1146 // sections would allow throwing a corruption error in 1147 // UpdateStarted(). 1148 if (!update_succeeded) { 1149 if (!browse_store_->CheckValidity()) 1150 DLOG(ERROR) << "Safe-browsing browse database corrupt."; 1151 1152 if (download_store_.get() && !download_store_->CheckValidity()) 1153 DLOG(ERROR) << "Safe-browsing download database corrupt."; 1154 1155 if (csd_whitelist_store_.get() && !csd_whitelist_store_->CheckValidity()) 1156 DLOG(ERROR) << "Safe-browsing csd whitelist database corrupt."; 1157 1158 if (download_whitelist_store_.get() && 1159 !download_whitelist_store_->CheckValidity()) { 1160 DLOG(ERROR) << "Safe-browsing download whitelist database corrupt."; 1161 } 1162 1163 if (extension_blacklist_store_ && 1164 !extension_blacklist_store_->CheckValidity()) { 1165 DLOG(ERROR) << "Safe-browsing extension blacklist database corrupt."; 1166 } 1167 1168 if (side_effect_free_whitelist_store_ && 1169 !side_effect_free_whitelist_store_->CheckValidity()) { 1170 DLOG(ERROR) << "Safe-browsing side-effect free whitelist database " 1171 << "corrupt."; 1172 } 1173 } 1174 1175 if (corruption_detected_) 1176 return; 1177 1178 // Unroll the transaction if there was a protocol error or if the 1179 // transaction was empty. This will leave the prefix set, the 1180 // pending hashes, and the prefix miss cache in place. 1181 if (!update_succeeded || !change_detected_) { 1182 // Track empty updates to answer questions at http://crbug.com/72216 . 1183 if (update_succeeded && !change_detected_) 1184 UMA_HISTOGRAM_COUNTS("SB2.DatabaseUpdateKilobytes", 0); 1185 browse_store_->CancelUpdate(); 1186 if (download_store_.get()) 1187 download_store_->CancelUpdate(); 1188 if (csd_whitelist_store_.get()) 1189 csd_whitelist_store_->CancelUpdate(); 1190 if (download_whitelist_store_.get()) 1191 download_whitelist_store_->CancelUpdate(); 1192 if (extension_blacklist_store_) 1193 extension_blacklist_store_->CancelUpdate(); 1194 if (side_effect_free_whitelist_store_) 1195 side_effect_free_whitelist_store_->CancelUpdate(); 1196 return; 1197 } 1198 1199 if (download_store_) { 1200 int64 size_bytes = UpdateHashPrefixStore( 1201 download_filename_, 1202 download_store_.get(), 1203 FAILURE_DOWNLOAD_DATABASE_UPDATE_FINISH); 1204 UMA_HISTOGRAM_COUNTS("SB2.DownloadDatabaseKilobytes", 1205 static_cast<int>(size_bytes / 1024)); 1206 } 1207 1208 UpdateBrowseStore(); 1209 UpdateWhitelistStore(csd_whitelist_filename_, 1210 csd_whitelist_store_.get(), 1211 &csd_whitelist_); 1212 UpdateWhitelistStore(download_whitelist_filename_, 1213 download_whitelist_store_.get(), 1214 &download_whitelist_); 1215 1216 if (extension_blacklist_store_) { 1217 int64 size_bytes = UpdateHashPrefixStore( 1218 extension_blacklist_filename_, 1219 extension_blacklist_store_.get(), 1220 FAILURE_EXTENSION_BLACKLIST_UPDATE_FINISH); 1221 UMA_HISTOGRAM_COUNTS("SB2.ExtensionBlacklistKilobytes", 1222 static_cast<int>(size_bytes / 1024)); 1223 } 1224 1225 if (side_effect_free_whitelist_store_) 1226 UpdateSideEffectFreeWhitelistStore(); 1227 } 1228 1229 void SafeBrowsingDatabaseNew::UpdateWhitelistStore( 1230 const base::FilePath& store_filename, 1231 SafeBrowsingStore* store, 1232 SBWhitelist* whitelist) { 1233 if (!store) 1234 return; 1235 1236 // For the whitelists, we don't cache and save full hashes since all 1237 // hashes are already full. 1238 std::vector<SBAddFullHash> empty_add_hashes; 1239 1240 // Not needed for the whitelists. 1241 std::set<SBPrefix> empty_miss_cache; 1242 1243 // Note: prefixes will not be empty. The current data store implementation 1244 // stores all full-length hashes as both full and prefix hashes. 1245 SBAddPrefixes prefixes; 1246 std::vector<SBAddFullHash> full_hashes; 1247 if (!store->FinishUpdate(empty_add_hashes, empty_miss_cache, &prefixes, 1248 &full_hashes)) { 1249 RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_FINISH); 1250 WhitelistEverything(whitelist); 1251 return; 1252 } 1253 1254 #if defined(OS_MACOSX) 1255 base::mac::SetFileBackupExclusion(store_filename); 1256 #endif 1257 1258 LoadWhitelist(full_hashes, whitelist); 1259 } 1260 1261 int64 SafeBrowsingDatabaseNew::UpdateHashPrefixStore( 1262 const base::FilePath& store_filename, 1263 SafeBrowsingStore* store, 1264 FailureType failure_type) { 1265 // We don't cache and save full hashes. 1266 std::vector<SBAddFullHash> empty_add_hashes; 1267 1268 // Backend lookup happens only if a prefix is in add list. 1269 std::set<SBPrefix> empty_miss_cache; 1270 1271 // These results are not used after this call. Simply ignore the 1272 // returned value after FinishUpdate(...). 1273 SBAddPrefixes add_prefixes_result; 1274 std::vector<SBAddFullHash> add_full_hashes_result; 1275 1276 if (!store->FinishUpdate(empty_add_hashes, 1277 empty_miss_cache, 1278 &add_prefixes_result, 1279 &add_full_hashes_result)) { 1280 RecordFailure(failure_type); 1281 } 1282 1283 #if defined(OS_MACOSX) 1284 base::mac::SetFileBackupExclusion(store_filename); 1285 #endif 1286 1287 return GetFileSizeOrZero(store_filename); 1288 } 1289 1290 void SafeBrowsingDatabaseNew::UpdateBrowseStore() { 1291 // Copy out the pending add hashes. Copy rather than swapping in 1292 // case |ContainsBrowseURL()| is called before the new filter is complete. 1293 std::vector<SBAddFullHash> pending_add_hashes; 1294 { 1295 base::AutoLock locked(lookup_lock_); 1296 pending_add_hashes.insert(pending_add_hashes.end(), 1297 pending_browse_hashes_.begin(), 1298 pending_browse_hashes_.end()); 1299 } 1300 1301 // Measure the amount of IO during the filter build. 1302 base::IoCounters io_before, io_after; 1303 base::ProcessHandle handle = base::Process::Current().handle(); 1304 scoped_ptr<base::ProcessMetrics> metric( 1305 #if !defined(OS_MACOSX) 1306 base::ProcessMetrics::CreateProcessMetrics(handle) 1307 #else 1308 // Getting stats only for the current process is enough, so NULL is fine. 1309 base::ProcessMetrics::CreateProcessMetrics(handle, NULL) 1310 #endif 1311 ); 1312 1313 // IoCounters are currently not supported on Mac, and may not be 1314 // available for Linux, so we check the result and only show IO 1315 // stats if they are available. 1316 const bool got_counters = metric->GetIOCounters(&io_before); 1317 1318 const base::TimeTicks before = base::TimeTicks::Now(); 1319 1320 SBAddPrefixes add_prefixes; 1321 std::vector<SBAddFullHash> add_full_hashes; 1322 if (!browse_store_->FinishUpdate(pending_add_hashes, prefix_miss_cache_, 1323 &add_prefixes, &add_full_hashes)) { 1324 RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_FINISH); 1325 return; 1326 } 1327 1328 // TODO(shess): If |add_prefixes| were sorted by the prefix, it 1329 // could be passed directly to |PrefixSet()|, removing the need for 1330 // |prefixes|. For now, |prefixes| is useful while debugging 1331 // things. 1332 std::vector<SBPrefix> prefixes; 1333 prefixes.reserve(add_prefixes.size()); 1334 for (SBAddPrefixes::const_iterator iter = add_prefixes.begin(); 1335 iter != add_prefixes.end(); ++iter) { 1336 prefixes.push_back(iter->prefix); 1337 } 1338 1339 std::sort(prefixes.begin(), prefixes.end()); 1340 scoped_ptr<safe_browsing::PrefixSet> 1341 prefix_set(new safe_browsing::PrefixSet(prefixes)); 1342 1343 // This needs to be in sorted order by prefix for efficient access. 1344 std::sort(add_full_hashes.begin(), add_full_hashes.end(), 1345 SBAddFullHashPrefixLess); 1346 1347 // Swap in the newly built filter and cache. 1348 { 1349 base::AutoLock locked(lookup_lock_); 1350 full_browse_hashes_.swap(add_full_hashes); 1351 1352 // TODO(shess): If |CacheHashResults()| is posted between the 1353 // earlier lock and this clear, those pending hashes will be lost. 1354 // It could be fixed by only removing hashes which were collected 1355 // at the earlier point. I believe that is fail-safe as-is (the 1356 // hash will be fetched again). 1357 pending_browse_hashes_.clear(); 1358 prefix_miss_cache_.clear(); 1359 browse_prefix_set_.swap(prefix_set); 1360 } 1361 1362 DVLOG(1) << "SafeBrowsingDatabaseImpl built prefix set in " 1363 << (base::TimeTicks::Now() - before).InMilliseconds() 1364 << " ms total. prefix count: " << add_prefixes.size(); 1365 UMA_HISTOGRAM_LONG_TIMES("SB2.BuildFilter", base::TimeTicks::Now() - before); 1366 1367 // Persist the prefix set to disk. Since only this thread changes 1368 // |browse_prefix_set_|, there is no need to lock. 1369 WritePrefixSet(); 1370 1371 // Gather statistics. 1372 if (got_counters && metric->GetIOCounters(&io_after)) { 1373 UMA_HISTOGRAM_COUNTS("SB2.BuildReadKilobytes", 1374 static_cast<int>(io_after.ReadTransferCount - 1375 io_before.ReadTransferCount) / 1024); 1376 UMA_HISTOGRAM_COUNTS("SB2.BuildWriteKilobytes", 1377 static_cast<int>(io_after.WriteTransferCount - 1378 io_before.WriteTransferCount) / 1024); 1379 UMA_HISTOGRAM_COUNTS("SB2.BuildReadOperations", 1380 static_cast<int>(io_after.ReadOperationCount - 1381 io_before.ReadOperationCount)); 1382 UMA_HISTOGRAM_COUNTS("SB2.BuildWriteOperations", 1383 static_cast<int>(io_after.WriteOperationCount - 1384 io_before.WriteOperationCount)); 1385 } 1386 1387 int64 file_size = GetFileSizeOrZero(browse_prefix_set_filename_); 1388 UMA_HISTOGRAM_COUNTS("SB2.PrefixSetKilobytes", 1389 static_cast<int>(file_size / 1024)); 1390 file_size = GetFileSizeOrZero(browse_filename_); 1391 UMA_HISTOGRAM_COUNTS("SB2.BrowseDatabaseKilobytes", 1392 static_cast<int>(file_size / 1024)); 1393 1394 #if defined(OS_MACOSX) 1395 base::mac::SetFileBackupExclusion(browse_filename_); 1396 #endif 1397 } 1398 1399 void SafeBrowsingDatabaseNew::UpdateSideEffectFreeWhitelistStore() { 1400 std::vector<SBAddFullHash> empty_add_hashes; 1401 std::set<SBPrefix> empty_miss_cache; 1402 SBAddPrefixes add_prefixes; 1403 std::vector<SBAddFullHash> add_full_hashes_result; 1404 1405 if (!side_effect_free_whitelist_store_->FinishUpdate( 1406 empty_add_hashes, 1407 empty_miss_cache, 1408 &add_prefixes, 1409 &add_full_hashes_result)) { 1410 RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_FINISH); 1411 return; 1412 } 1413 1414 // TODO(shess): If |add_prefixes| were sorted by the prefix, it 1415 // could be passed directly to |PrefixSet()|, removing the need for 1416 // |prefixes|. For now, |prefixes| is useful while debugging 1417 // things. 1418 std::vector<SBPrefix> prefixes; 1419 prefixes.reserve(add_prefixes.size()); 1420 for (SBAddPrefixes::const_iterator iter = add_prefixes.begin(); 1421 iter != add_prefixes.end(); ++iter) { 1422 prefixes.push_back(iter->prefix); 1423 } 1424 1425 std::sort(prefixes.begin(), prefixes.end()); 1426 scoped_ptr<safe_browsing::PrefixSet> 1427 prefix_set(new safe_browsing::PrefixSet(prefixes)); 1428 1429 // Swap in the newly built prefix set. 1430 { 1431 base::AutoLock locked(lookup_lock_); 1432 side_effect_free_whitelist_prefix_set_.swap(prefix_set); 1433 } 1434 1435 const base::TimeTicks before = base::TimeTicks::Now(); 1436 const bool write_ok = side_effect_free_whitelist_prefix_set_->WriteFile( 1437 side_effect_free_whitelist_prefix_set_filename_); 1438 DVLOG(1) << "SafeBrowsingDatabaseNew wrote side-effect free whitelist prefix " 1439 << "set in " << (base::TimeTicks::Now() - before).InMilliseconds() 1440 << " ms"; 1441 UMA_HISTOGRAM_TIMES("SB2.SideEffectFreePrefixSetWrite", 1442 base::TimeTicks::Now() - before); 1443 1444 if (!write_ok) 1445 RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_WRITE); 1446 1447 // Gather statistics. 1448 int64 file_size = GetFileSizeOrZero( 1449 side_effect_free_whitelist_prefix_set_filename_); 1450 UMA_HISTOGRAM_COUNTS("SB2.SideEffectFreeWhitelistPrefixSetKilobytes", 1451 static_cast<int>(file_size / 1024)); 1452 file_size = GetFileSizeOrZero(side_effect_free_whitelist_filename_); 1453 UMA_HISTOGRAM_COUNTS("SB2.SideEffectFreeWhitelistDatabaseKilobytes", 1454 static_cast<int>(file_size / 1024)); 1455 1456 #if defined(OS_MACOSX) 1457 base::mac::SetFileBackupExclusion(side_effect_free_whitelist_filename_); 1458 base::mac::SetFileBackupExclusion( 1459 side_effect_free_whitelist_prefix_set_filename_); 1460 #endif 1461 } 1462 1463 void SafeBrowsingDatabaseNew::HandleCorruptDatabase() { 1464 // Reset the database after the current task has unwound (but only 1465 // reset once within the scope of a given task). 1466 if (!reset_factory_.HasWeakPtrs()) { 1467 RecordFailure(FAILURE_DATABASE_CORRUPT); 1468 base::MessageLoop::current()->PostTask(FROM_HERE, 1469 base::Bind(&SafeBrowsingDatabaseNew::OnHandleCorruptDatabase, 1470 reset_factory_.GetWeakPtr())); 1471 } 1472 } 1473 1474 void SafeBrowsingDatabaseNew::OnHandleCorruptDatabase() { 1475 RecordFailure(FAILURE_DATABASE_CORRUPT_HANDLER); 1476 corruption_detected_ = true; // Stop updating the database. 1477 ResetDatabase(); 1478 DLOG(FATAL) << "SafeBrowsing database was corrupt and reset"; 1479 } 1480 1481 // TODO(shess): I'm not clear why this code doesn't have any 1482 // real error-handling. 1483 void SafeBrowsingDatabaseNew::LoadPrefixSet() { 1484 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 1485 DCHECK(!browse_prefix_set_filename_.empty()); 1486 1487 // If there is no database, the filter cannot be used. 1488 base::PlatformFileInfo db_info; 1489 if (!file_util::GetFileInfo(browse_filename_, &db_info) || db_info.size == 0) 1490 return; 1491 1492 // Cleanup any stale bloom filter (no longer used). 1493 // TODO(shess): Track failure to delete? 1494 base::FilePath bloom_filter_filename = 1495 BloomFilterForFilename(browse_filename_); 1496 base::DeleteFile(bloom_filter_filename, false); 1497 1498 const base::TimeTicks before = base::TimeTicks::Now(); 1499 browse_prefix_set_.reset(safe_browsing::PrefixSet::LoadFile( 1500 browse_prefix_set_filename_)); 1501 DVLOG(1) << "SafeBrowsingDatabaseNew read prefix set in " 1502 << (base::TimeTicks::Now() - before).InMilliseconds() << " ms"; 1503 UMA_HISTOGRAM_TIMES("SB2.PrefixSetLoad", base::TimeTicks::Now() - before); 1504 1505 if (!browse_prefix_set_.get()) 1506 RecordFailure(FAILURE_BROWSE_PREFIX_SET_READ); 1507 } 1508 1509 bool SafeBrowsingDatabaseNew::Delete() { 1510 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 1511 1512 const bool r1 = browse_store_->Delete(); 1513 if (!r1) 1514 RecordFailure(FAILURE_DATABASE_STORE_DELETE); 1515 1516 const bool r2 = download_store_.get() ? download_store_->Delete() : true; 1517 if (!r2) 1518 RecordFailure(FAILURE_DATABASE_STORE_DELETE); 1519 1520 const bool r3 = csd_whitelist_store_.get() ? 1521 csd_whitelist_store_->Delete() : true; 1522 if (!r3) 1523 RecordFailure(FAILURE_DATABASE_STORE_DELETE); 1524 1525 const bool r4 = download_whitelist_store_.get() ? 1526 download_whitelist_store_->Delete() : true; 1527 if (!r4) 1528 RecordFailure(FAILURE_DATABASE_STORE_DELETE); 1529 1530 base::FilePath bloom_filter_filename = 1531 BloomFilterForFilename(browse_filename_); 1532 const bool r5 = base::DeleteFile(bloom_filter_filename, false); 1533 if (!r5) 1534 RecordFailure(FAILURE_DATABASE_FILTER_DELETE); 1535 1536 const bool r6 = base::DeleteFile(browse_prefix_set_filename_, false); 1537 if (!r6) 1538 RecordFailure(FAILURE_BROWSE_PREFIX_SET_DELETE); 1539 1540 const bool r7 = base::DeleteFile(extension_blacklist_filename_, false); 1541 if (!r7) 1542 RecordFailure(FAILURE_EXTENSION_BLACKLIST_DELETE); 1543 1544 const bool r8 = base::DeleteFile(side_effect_free_whitelist_filename_, 1545 false); 1546 if (!r8) 1547 RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_DELETE); 1548 1549 const bool r9 = base::DeleteFile( 1550 side_effect_free_whitelist_prefix_set_filename_, 1551 false); 1552 if (!r9) 1553 RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_DELETE); 1554 1555 return r1 && r2 && r3 && r4 && r5 && r6 && r7 && r8 && r9; 1556 } 1557 1558 void SafeBrowsingDatabaseNew::WritePrefixSet() { 1559 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 1560 1561 if (!browse_prefix_set_.get()) 1562 return; 1563 1564 const base::TimeTicks before = base::TimeTicks::Now(); 1565 const bool write_ok = browse_prefix_set_->WriteFile( 1566 browse_prefix_set_filename_); 1567 DVLOG(1) << "SafeBrowsingDatabaseNew wrote prefix set in " 1568 << (base::TimeTicks::Now() - before).InMilliseconds() << " ms"; 1569 UMA_HISTOGRAM_TIMES("SB2.PrefixSetWrite", base::TimeTicks::Now() - before); 1570 1571 if (!write_ok) 1572 RecordFailure(FAILURE_BROWSE_PREFIX_SET_WRITE); 1573 1574 #if defined(OS_MACOSX) 1575 base::mac::SetFileBackupExclusion(browse_prefix_set_filename_); 1576 #endif 1577 } 1578 1579 void SafeBrowsingDatabaseNew::WhitelistEverything(SBWhitelist* whitelist) { 1580 base::AutoLock locked(lookup_lock_); 1581 whitelist->second = true; 1582 whitelist->first.clear(); 1583 } 1584 1585 void SafeBrowsingDatabaseNew::LoadWhitelist( 1586 const std::vector<SBAddFullHash>& full_hashes, 1587 SBWhitelist* whitelist) { 1588 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 1589 if (full_hashes.size() > kMaxWhitelistSize) { 1590 WhitelistEverything(whitelist); 1591 return; 1592 } 1593 1594 std::vector<SBFullHash> new_whitelist; 1595 new_whitelist.reserve(full_hashes.size()); 1596 for (std::vector<SBAddFullHash>::const_iterator it = full_hashes.begin(); 1597 it != full_hashes.end(); ++it) { 1598 new_whitelist.push_back(it->full_hash); 1599 } 1600 std::sort(new_whitelist.begin(), new_whitelist.end()); 1601 1602 SBFullHash kill_switch; 1603 crypto::SHA256HashString(kWhitelistKillSwitchUrl, &kill_switch, 1604 sizeof(kill_switch)); 1605 if (std::binary_search(new_whitelist.begin(), new_whitelist.end(), 1606 kill_switch)) { 1607 // The kill switch is whitelisted hence we whitelist all URLs. 1608 WhitelistEverything(whitelist); 1609 } else { 1610 base::AutoLock locked(lookup_lock_); 1611 whitelist->second = false; 1612 whitelist->first.swap(new_whitelist); 1613 } 1614 } 1615 1616 bool SafeBrowsingDatabaseNew::IsMalwareIPMatchKillSwitchOn() { 1617 SBFullHash malware_kill_switch; 1618 crypto::SHA256HashString(kMalwareIPKillSwitchUrl, &malware_kill_switch, 1619 sizeof(malware_kill_switch)); 1620 std::vector<SBFullHash> full_hashes; 1621 full_hashes.push_back(malware_kill_switch); 1622 return ContainsWhitelistedHashes(csd_whitelist_, full_hashes); 1623 } 1624