Home | History | Annotate | Download | only in safe_browsing
      1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "chrome/browser/safe_browsing/safe_browsing_database.h"
      6 
      7 #include <algorithm>
      8 #include <iterator>
      9 
     10 #include "base/file_util.h"
     11 #include "base/metrics/histogram.h"
     12 #include "base/metrics/stats_counters.h"
     13 #include "base/time.h"
     14 #include "base/message_loop.h"
     15 #include "base/process_util.h"
     16 #include "crypto/sha2.h"
     17 #include "chrome/browser/safe_browsing/bloom_filter.h"
     18 #include "chrome/browser/safe_browsing/prefix_set.h"
     19 #include "chrome/browser/safe_browsing/safe_browsing_store_file.h"
     20 #include "content/browser/browser_thread.h"
     21 #include "googleurl/src/gurl.h"
     22 
     23 namespace {
     24 
     25 // Filename suffix for the bloom filter.
     26 const FilePath::CharType kBloomFilterFile[] = FILE_PATH_LITERAL(" Filter 2");
     27 // Filename suffix for download store.
     28 const FilePath::CharType kDownloadDBFile[] = FILE_PATH_LITERAL(" Download");
     29 // Filename suffix for client-side phishing detection whitelist store.
     30 const FilePath::CharType kCsdWhitelistDBFile[] =
     31     FILE_PATH_LITERAL(" Csd Whitelist");
     32 // Filename suffix for browse store.
     33 // TODO(lzheng): change to a better name when we change the file format.
     34 const FilePath::CharType kBrowseDBFile[] = FILE_PATH_LITERAL(" Bloom");
     35 
     36 // The maximum staleness for a cached entry.
     37 const int kMaxStalenessMinutes = 45;
     38 
     39 // Maximum number of entries we allow in the client-side phishing detection
     40 // whitelist.  If the whitelist on disk contains more entries then
     41 // ContainsCsdWhitelistedUrl will always return true.
     42 const size_t kMaxCsdWhitelistSize = 5000;
     43 
     44 // If the hash of this exact expression is on the csd whitelist then
     45 // ContainsCsdWhitelistedUrl will always return true.
     46 const char kCsdKillSwitchUrl[] =
     47     "sb-ssl.google.com/safebrowsing/csd/killswitch";
     48 
     49 // To save space, the incoming |chunk_id| and |list_id| are combined
     50 // into an |encoded_chunk_id| for storage by shifting the |list_id|
     51 // into the low-order bits.  These functions decode that information.
     52 // TODO(lzheng): It was reasonable when database is saved in sqlite, but
     53 // there should be better ways to save chunk_id and list_id after we use
     54 // SafeBrowsingStoreFile.
     55 int GetListIdBit(const int encoded_chunk_id) {
     56   return encoded_chunk_id & 1;
     57 }
     58 int DecodeChunkId(int encoded_chunk_id) {
     59   return encoded_chunk_id >> 1;
     60 }
     61 int EncodeChunkId(const int chunk, const int list_id) {
     62   DCHECK_NE(list_id, safe_browsing_util::INVALID);
     63   return chunk << 1 | list_id % 2;
     64 }
     65 
     66 // Generate the set of full hashes to check for |url|.  If
     67 // |include_whitelist_hashes| is true we will generate additional path-prefixes
     68 // to match against the csd whitelist.  E.g., if the path-prefix /foo is on the
     69 // whitelist it should also match /foo/bar which is not the case for all the
     70 // other lists.
     71 // TODO(shess): This function is almost the same as
     72 // |CompareFullHashes()| in safe_browsing_util.cc, except that code
     73 // does an early exit on match.  Since match should be the infrequent
     74 // case (phishing or malware found), consider combining this function
     75 // with that one.
     76 void BrowseFullHashesToCheck(const GURL& url,
     77                              bool include_whitelist_hashes,
     78                              std::vector<SBFullHash>* full_hashes) {
     79   std::vector<std::string> hosts;
     80   if (url.HostIsIPAddress()) {
     81     hosts.push_back(url.host());
     82   } else {
     83     safe_browsing_util::GenerateHostsToCheck(url, &hosts);
     84   }
     85 
     86   std::vector<std::string> paths;
     87   safe_browsing_util::GeneratePathsToCheck(url, &paths);
     88 
     89   for (size_t i = 0; i < hosts.size(); ++i) {
     90     for (size_t j = 0; j < paths.size(); ++j) {
     91       const std::string& path = paths[j];
     92       SBFullHash full_hash;
     93       crypto::SHA256HashString(hosts[i] + path, &full_hash,
     94                                sizeof(full_hash));
     95       full_hashes->push_back(full_hash);
     96 
     97       // We may have /foo as path-prefix in the whitelist which should
     98       // also match with /foo/bar and /foo?bar.  Hence, for every path
     99       // that ends in '/' we also add the path without the slash.
    100       if (include_whitelist_hashes &&
    101           path.size() > 1 &&
    102           path[path.size() - 1] == '/') {
    103         crypto::SHA256HashString(hosts[i] + path.substr(0, path.size() - 1),
    104                                  &full_hash, sizeof(full_hash));
    105         full_hashes->push_back(full_hash);
    106       }
    107     }
    108   }
    109 }
    110 
    111 // Get the prefixes matching the download |urls|.
    112 void GetDownloadUrlPrefixes(const std::vector<GURL>& urls,
    113                             std::vector<SBPrefix>* prefixes) {
    114   std::vector<SBFullHash> full_hashes;
    115   for (size_t i = 0; i < urls.size(); ++i)
    116     BrowseFullHashesToCheck(urls[i], false, &full_hashes);
    117 
    118   for (size_t i = 0; i < full_hashes.size(); ++i)
    119     prefixes->push_back(full_hashes[i].prefix);
    120 }
    121 
    122 // Find the entries in |full_hashes| with prefix in |prefix_hits|, and
    123 // add them to |full_hits| if not expired.  "Not expired" is when
    124 // either |last_update| was recent enough, or the item has been
    125 // received recently enough.  Expired items are not deleted because a
    126 // future update may make them acceptable again.
    127 //
    128 // For efficiency reasons the code walks |prefix_hits| and
    129 // |full_hashes| in parallel, so they must be sorted by prefix.
    130 void GetCachedFullHashesForBrowse(const std::vector<SBPrefix>& prefix_hits,
    131                                   const std::vector<SBAddFullHash>& full_hashes,
    132                                   std::vector<SBFullHashResult>* full_hits,
    133                                   base::Time last_update) {
    134   const base::Time expire_time =
    135       base::Time::Now() - base::TimeDelta::FromMinutes(kMaxStalenessMinutes);
    136 
    137   std::vector<SBPrefix>::const_iterator piter = prefix_hits.begin();
    138   std::vector<SBAddFullHash>::const_iterator hiter = full_hashes.begin();
    139 
    140   while (piter != prefix_hits.end() && hiter != full_hashes.end()) {
    141     if (*piter < hiter->full_hash.prefix) {
    142       ++piter;
    143     } else if (hiter->full_hash.prefix < *piter) {
    144       ++hiter;
    145     } else {
    146       if (expire_time < last_update ||
    147           expire_time.ToTimeT() < hiter->received) {
    148         SBFullHashResult result;
    149         const int list_bit = GetListIdBit(hiter->chunk_id);
    150         DCHECK(list_bit == safe_browsing_util::MALWARE ||
    151                list_bit == safe_browsing_util::PHISH);
    152         if (!safe_browsing_util::GetListName(list_bit, &result.list_name))
    153           continue;
    154         result.add_chunk_id = DecodeChunkId(hiter->chunk_id);
    155         result.hash = hiter->full_hash;
    156         full_hits->push_back(result);
    157       }
    158 
    159       // Only increment |hiter|, |piter| might have multiple hits.
    160       ++hiter;
    161     }
    162   }
    163 }
    164 
    165 // This function generates a chunk range string for |chunks|. It
    166 // outputs one chunk range string per list and writes it to the
    167 // |list_ranges| vector.  We expect |list_ranges| to already be of the
    168 // right size.  E.g., if |chunks| contains chunks with two different
    169 // list ids then |list_ranges| must contain two elements.
    170 void GetChunkRanges(const std::vector<int>& chunks,
    171                     std::vector<std::string>* list_ranges) {
    172   DCHECK_GT(list_ranges->size(), 0U);
    173   DCHECK_LE(list_ranges->size(), 2U);
    174   std::vector<std::vector<int> > decoded_chunks(list_ranges->size());
    175   for (std::vector<int>::const_iterator iter = chunks.begin();
    176        iter != chunks.end(); ++iter) {
    177     int mod_list_id = GetListIdBit(*iter);
    178     DCHECK_GE(mod_list_id, 0);
    179     DCHECK_LT(static_cast<size_t>(mod_list_id), decoded_chunks.size());
    180     decoded_chunks[mod_list_id].push_back(DecodeChunkId(*iter));
    181   }
    182   for (size_t i = 0; i < decoded_chunks.size(); ++i) {
    183     ChunksToRangeString(decoded_chunks[i], &((*list_ranges)[i]));
    184   }
    185 }
    186 
    187 // Helper function to create chunk range lists for Browse related
    188 // lists.
    189 void UpdateChunkRanges(SafeBrowsingStore* store,
    190                        const std::vector<std::string>& listnames,
    191                        std::vector<SBListChunkRanges>* lists) {
    192   DCHECK_GT(listnames.size(), 0U);
    193   DCHECK_LE(listnames.size(), 2U);
    194   std::vector<int> add_chunks;
    195   std::vector<int> sub_chunks;
    196   store->GetAddChunks(&add_chunks);
    197   store->GetSubChunks(&sub_chunks);
    198 
    199   std::vector<std::string> adds(listnames.size());
    200   std::vector<std::string> subs(listnames.size());
    201   GetChunkRanges(add_chunks, &adds);
    202   GetChunkRanges(sub_chunks, &subs);
    203 
    204   for (size_t i = 0; i < listnames.size(); ++i) {
    205     const std::string& listname = listnames[i];
    206     DCHECK_EQ(safe_browsing_util::GetListId(listname) % 2,
    207               static_cast<int>(i % 2));
    208     DCHECK_NE(safe_browsing_util::GetListId(listname),
    209               safe_browsing_util::INVALID);
    210     lists->push_back(SBListChunkRanges(listname));
    211     lists->back().adds.swap(adds[i]);
    212     lists->back().subs.swap(subs[i]);
    213   }
    214 }
    215 
    216 // Order |SBAddFullHash| on the prefix part.  |SBAddPrefixLess()| from
    217 // safe_browsing_store.h orders on both chunk-id and prefix.
    218 bool SBAddFullHashPrefixLess(const SBAddFullHash& a, const SBAddFullHash& b) {
    219   return a.full_hash.prefix < b.full_hash.prefix;
    220 }
    221 
    222 // As compared to the bloom filter, PrefixSet should have these
    223 // properties:
    224 // - Any bloom filter miss should be a prefix set miss.
    225 // - Any prefix set hit should be a bloom filter hit.
    226 // - Bloom filter false positives are prefix set misses.
    227 // The following is to log actual performance to verify this.
    228 enum PrefixSetEvent {
    229   PREFIX_SET_EVENT_HIT,
    230   PREFIX_SET_EVENT_BLOOM_HIT,
    231   PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT,
    232   PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT_INVALID,
    233   PREFIX_SET_GETPREFIXES_BROKEN,
    234   PREFIX_SET_GETPREFIXES_BROKEN_SIZE,
    235   PREFIX_SET_GETPREFIXES_FIRST_BROKEN,
    236   PREFIX_SET_SBPREFIX_WAS_BROKEN,
    237   PREFIX_SET_GETPREFIXES_BROKEN_SORTING,
    238   PREFIX_SET_GETPREFIXES_BROKEN_DUPLICATION,
    239   PREFIX_SET_GETPREFIX_UNSORTED_IS_DELTA,
    240   PREFIX_SET_GETPREFIX_UNSORTED_IS_INDEX,
    241   PREFIX_SET_GETPREFIX_CHECKSUM_MISMATCH,
    242 
    243   // Memory space for histograms is determined by the max.  ALWAYS ADD
    244   // NEW VALUES BEFORE THIS ONE.
    245   PREFIX_SET_EVENT_MAX
    246 };
    247 
    248 void RecordPrefixSetInfo(PrefixSetEvent event_type) {
    249   UMA_HISTOGRAM_ENUMERATION("SB2.PrefixSetEvent", event_type,
    250                             PREFIX_SET_EVENT_MAX);
    251 }
    252 
    253 // Generate a |PrefixSet| instance from the contents of
    254 // |add_prefixes|.  Additionally performs various checks to make sure
    255 // that the resulting prefix set is valid, so that the
    256 // PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT_INVALID histogram in
    257 // ContainsBrowseUrl() can be trustworthy.
    258 safe_browsing::PrefixSet* PrefixSetFromAddPrefixes(
    259     const std::vector<SBAddPrefix>& add_prefixes) {
    260   // TODO(shess): If |add_prefixes| were sorted by the prefix, it
    261   // could be passed directly to |PrefixSet()|, removing the need for
    262   // |prefixes|.  For now, |prefixes| is useful while debugging
    263   // things.
    264   std::vector<SBPrefix> prefixes;
    265   for (size_t i = 0; i < add_prefixes.size(); ++i) {
    266     prefixes.push_back(add_prefixes[i].prefix);
    267   }
    268 
    269   std::sort(prefixes.begin(), prefixes.end());
    270   prefixes.erase(std::unique(prefixes.begin(), prefixes.end()),
    271                  prefixes.end());
    272 
    273   scoped_ptr<safe_browsing::PrefixSet>
    274       prefix_set(new safe_browsing::PrefixSet(prefixes));
    275 
    276   std::vector<SBPrefix> restored;
    277   prefix_set->GetPrefixes(&restored);
    278 
    279   // Expect them to be equal.
    280   if (restored.size() == prefixes.size() &&
    281       std::equal(prefixes.begin(), prefixes.end(), restored.begin()))
    282     return prefix_set.release();
    283 
    284   // Log BROKEN for continuity with previous release, and SIZE to
    285   // distinguish which test failed.
    286   NOTREACHED();
    287   RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_BROKEN);
    288   if (restored.size() != prefixes.size())
    289     RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_BROKEN_SIZE);
    290 
    291   // Try to distinguish between updates from one broken user and a
    292   // distributed problem.
    293   static bool logged_broken = false;
    294   if (!logged_broken) {
    295     RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_FIRST_BROKEN);
    296     logged_broken = true;
    297   }
    298 
    299   // This seems so very very unlikely.  But if it ever were true, then
    300   // it could explain why GetPrefixes() seemed broken.
    301   if (sizeof(int) != sizeof(int32))
    302     RecordPrefixSetInfo(PREFIX_SET_SBPREFIX_WAS_BROKEN);
    303 
    304   // Check if memory was corrupted during construction.
    305   if (!prefix_set->CheckChecksum())
    306     RecordPrefixSetInfo(PREFIX_SET_GETPREFIX_CHECKSUM_MISMATCH);
    307 
    308   // Check whether |restored| is unsorted, or has duplication.
    309   if (restored.size()) {
    310     size_t unsorted_count = 0;
    311     bool duplicates = false;
    312     SBPrefix prev = restored[0];
    313     for (size_t i = 0; i < restored.size(); prev = restored[i], ++i) {
    314       if (prev > restored[i]) {
    315         unsorted_count++;
    316         UMA_HISTOGRAM_COUNTS("SB2.PrefixSetUnsortedDifference",
    317                              prev - restored[i]);
    318 
    319         // When unsorted, how big is the set, and how far are we into
    320         // it.  If the set is very small or large, that might inform
    321         // pursuit of a degenerate case.  If the percentage is close
    322         // to 0%, 100%, or 50%, then there might be an interesting
    323         // degenerate case to explore.
    324         UMA_HISTOGRAM_COUNTS("SB2.PrefixSetUnsortedSize", restored.size());
    325         UMA_HISTOGRAM_PERCENTAGE("SB2.PrefixSetUnsortedPercent",
    326                                  i * 100 / restored.size());
    327 
    328         if (prefix_set->IsDeltaAt(i)) {
    329           RecordPrefixSetInfo(PREFIX_SET_GETPREFIX_UNSORTED_IS_DELTA);
    330 
    331           // Histograms require memory on the order of the number of
    332           // buckets, making high-precision logging expensive.  For
    333           // now aim for a sense of the range of the problem.
    334           UMA_HISTOGRAM_CUSTOM_COUNTS("SB2.PrefixSetUnsortedDelta",
    335                                       prefix_set->DeltaAt(i), 1, 0xFFFF, 50);
    336         } else {
    337           RecordPrefixSetInfo(PREFIX_SET_GETPREFIX_UNSORTED_IS_INDEX);
    338         }
    339       }
    340       if (prev == restored[i])
    341         duplicates = true;
    342     }
    343 
    344     // Record findings.
    345     if (unsorted_count) {
    346       RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_BROKEN_SORTING);
    347       UMA_HISTOGRAM_COUNTS_100("SB2.PrefixSetUnsorted", unsorted_count);
    348     }
    349     if (duplicates)
    350       RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_BROKEN_DUPLICATION);
    351 
    352     // Fix the problems noted.  If |restored| was unsorted, then
    353     // |duplicates| may give a false negative.
    354     if (unsorted_count)
    355       std::sort(restored.begin(), restored.end());
    356     if (unsorted_count || duplicates)
    357       restored.erase(std::unique(restored.begin(), restored.end()),
    358                      restored.end());
    359   }
    360 
    361   // NOTE(shess): The following could be done using a single
    362   // uber-loop, but it's complicated by needing multiple parallel
    363   // iterators.  Didn't seem worthwhile for something that will only
    364   // live for a short period and only fires for one in a million
    365   // updates.
    366 
    367   // Find elements in |restored| which are not in |prefixes|.
    368   std::vector<SBPrefix> difference;
    369   std::set_difference(restored.begin(), restored.end(),
    370                       prefixes.begin(), prefixes.end(),
    371                       std::back_inserter(difference));
    372   if (difference.size())
    373     UMA_HISTOGRAM_COUNTS_100("SB2.PrefixSetRestoredExcess", difference.size());
    374 
    375   // Find elements in |prefixes| which are not in |restored|.
    376   difference.clear();
    377   std::set_difference(prefixes.begin(), prefixes.end(),
    378                       restored.begin(), restored.end(),
    379                       std::back_inserter(difference));
    380   if (difference.size())
    381     UMA_HISTOGRAM_COUNTS_100("SB2.PrefixSetRestoredShortfall",
    382                              difference.size());
    383 
    384   return prefix_set.release();
    385 }
    386 
    387 }  // namespace
    388 
    389 // The default SafeBrowsingDatabaseFactory.
    390 class SafeBrowsingDatabaseFactoryImpl : public SafeBrowsingDatabaseFactory {
    391  public:
    392   virtual SafeBrowsingDatabase* CreateSafeBrowsingDatabase(
    393       bool enable_download_protection,
    394       bool enable_client_side_whitelist) {
    395     return new SafeBrowsingDatabaseNew(
    396         new SafeBrowsingStoreFile,
    397         enable_download_protection ? new SafeBrowsingStoreFile : NULL,
    398         enable_client_side_whitelist ? new SafeBrowsingStoreFile : NULL);
    399   }
    400 
    401   SafeBrowsingDatabaseFactoryImpl() { }
    402 
    403  private:
    404   DISALLOW_COPY_AND_ASSIGN(SafeBrowsingDatabaseFactoryImpl);
    405 };
    406 
    407 // static
    408 SafeBrowsingDatabaseFactory* SafeBrowsingDatabase::factory_ = NULL;
    409 
    410 // Factory method, non-thread safe. Caller has to make sure this s called
    411 // on SafeBrowsing Thread.
    412 // TODO(shess): There's no need for a factory any longer.  Convert
    413 // SafeBrowsingDatabaseNew to SafeBrowsingDatabase, and have Create()
    414 // callers just construct things directly.
    415 SafeBrowsingDatabase* SafeBrowsingDatabase::Create(
    416     bool enable_download_protection,
    417     bool enable_client_side_whitelist) {
    418   if (!factory_)
    419     factory_ = new SafeBrowsingDatabaseFactoryImpl();
    420   return factory_->CreateSafeBrowsingDatabase(enable_download_protection,
    421                                               enable_client_side_whitelist);
    422 }
    423 
    424 SafeBrowsingDatabase::~SafeBrowsingDatabase() {
    425 }
    426 
    427 // static
    428 FilePath SafeBrowsingDatabase::BrowseDBFilename(
    429          const FilePath& db_base_filename) {
    430   return FilePath(db_base_filename.value() + kBrowseDBFile);
    431 }
    432 
    433 // static
    434 FilePath SafeBrowsingDatabase::DownloadDBFilename(
    435     const FilePath& db_base_filename) {
    436   return FilePath(db_base_filename.value() + kDownloadDBFile);
    437 }
    438 
    439 // static
    440 FilePath SafeBrowsingDatabase::BloomFilterForFilename(
    441     const FilePath& db_filename) {
    442   return FilePath(db_filename.value() + kBloomFilterFile);
    443 }
    444 
    445 // static
    446 FilePath SafeBrowsingDatabase::CsdWhitelistDBFilename(
    447     const FilePath& db_filename) {
    448   return FilePath(db_filename.value() + kCsdWhitelistDBFile);
    449 }
    450 
    451 SafeBrowsingStore* SafeBrowsingDatabaseNew::GetStore(const int list_id) {
    452   DVLOG(3) << "Get store for list: " << list_id;
    453   if (list_id == safe_browsing_util::PHISH ||
    454       list_id == safe_browsing_util::MALWARE) {
    455     return browse_store_.get();
    456   } else if (list_id == safe_browsing_util::BINURL ||
    457              list_id == safe_browsing_util::BINHASH) {
    458     return download_store_.get();
    459   } else if (list_id == safe_browsing_util::CSDWHITELIST) {
    460     return csd_whitelist_store_.get();
    461   }
    462   return NULL;
    463 }
    464 
    465 // static
    466 void SafeBrowsingDatabase::RecordFailure(FailureType failure_type) {
    467   UMA_HISTOGRAM_ENUMERATION("SB2.DatabaseFailure", failure_type,
    468                             FAILURE_DATABASE_MAX);
    469 }
    470 
    471 SafeBrowsingDatabaseNew::SafeBrowsingDatabaseNew()
    472     : creation_loop_(MessageLoop::current()),
    473       browse_store_(new SafeBrowsingStoreFile),
    474       download_store_(NULL),
    475       csd_whitelist_store_(NULL),
    476       ALLOW_THIS_IN_INITIALIZER_LIST(reset_factory_(this)) {
    477   DCHECK(browse_store_.get());
    478   DCHECK(!download_store_.get());
    479   DCHECK(!csd_whitelist_store_.get());
    480 }
    481 
    482 SafeBrowsingDatabaseNew::SafeBrowsingDatabaseNew(
    483     SafeBrowsingStore* browse_store,
    484     SafeBrowsingStore* download_store,
    485     SafeBrowsingStore* csd_whitelist_store)
    486     : creation_loop_(MessageLoop::current()),
    487       browse_store_(browse_store),
    488       download_store_(download_store),
    489       csd_whitelist_store_(csd_whitelist_store),
    490       ALLOW_THIS_IN_INITIALIZER_LIST(reset_factory_(this)),
    491       corruption_detected_(false) {
    492   DCHECK(browse_store_.get());
    493 }
    494 
    495 SafeBrowsingDatabaseNew::~SafeBrowsingDatabaseNew() {
    496   DCHECK_EQ(creation_loop_, MessageLoop::current());
    497 }
    498 
    499 void SafeBrowsingDatabaseNew::Init(const FilePath& filename_base) {
    500   DCHECK_EQ(creation_loop_, MessageLoop::current());
    501   // Ensure we haven't been run before.
    502   DCHECK(browse_filename_.empty());
    503   DCHECK(download_filename_.empty());
    504   DCHECK(csd_whitelist_filename_.empty());
    505 
    506   browse_filename_ = BrowseDBFilename(filename_base);
    507   bloom_filter_filename_ = BloomFilterForFilename(browse_filename_);
    508 
    509   browse_store_->Init(
    510       browse_filename_,
    511       NewCallback(this, &SafeBrowsingDatabaseNew::HandleCorruptDatabase));
    512   DVLOG(1) << "Init browse store: " << browse_filename_.value();
    513 
    514   {
    515     // NOTE: There is no need to grab the lock in this function, since
    516     // until it returns, there are no pointers to this class on other
    517     // threads.  Then again, that means there is no possibility of
    518     // contention on the lock...
    519     base::AutoLock locked(lookup_lock_);
    520     full_browse_hashes_.clear();
    521     pending_browse_hashes_.clear();
    522     LoadBloomFilter();
    523   }
    524 
    525   if (download_store_.get()) {
    526     download_filename_ = DownloadDBFilename(filename_base);
    527     download_store_->Init(
    528         download_filename_,
    529         NewCallback(this, &SafeBrowsingDatabaseNew::HandleCorruptDatabase));
    530     DVLOG(1) << "Init download store: " << download_filename_.value();
    531   }
    532 
    533   if (csd_whitelist_store_.get()) {
    534     csd_whitelist_filename_ = CsdWhitelistDBFilename(filename_base);
    535     csd_whitelist_store_->Init(
    536         csd_whitelist_filename_,
    537         NewCallback(this, &SafeBrowsingDatabaseNew::HandleCorruptDatabase));
    538     DVLOG(1) << "Init csd whitelist store: " << csd_whitelist_filename_.value();
    539     std::vector<SBAddFullHash> full_hashes;
    540     if (csd_whitelist_store_->GetAddFullHashes(&full_hashes)) {
    541       LoadCsdWhitelist(full_hashes);
    542     } else {
    543       CsdWhitelistAllUrls();
    544     }
    545   } else {
    546     CsdWhitelistAllUrls();  // Just to be safe.
    547   }
    548 }
    549 
    550 bool SafeBrowsingDatabaseNew::ResetDatabase() {
    551   DCHECK_EQ(creation_loop_, MessageLoop::current());
    552 
    553   // Delete files on disk.
    554   // TODO(shess): Hard to see where one might want to delete without a
    555   // reset.  Perhaps inline |Delete()|?
    556   if (!Delete())
    557     return false;
    558 
    559   // Reset objects in memory.
    560   {
    561     base::AutoLock locked(lookup_lock_);
    562     full_browse_hashes_.clear();
    563     pending_browse_hashes_.clear();
    564     prefix_miss_cache_.clear();
    565     // TODO(shess): This could probably be |bloom_filter_.reset()|.
    566     browse_bloom_filter_ = new BloomFilter(BloomFilter::kBloomFilterMinSize *
    567                                            BloomFilter::kBloomFilterSizeRatio);
    568     // TODO(shess): It is simpler for the code to assume that presence
    569     // of a bloom filter always implies presence of a prefix set.
    570     prefix_set_.reset(new safe_browsing::PrefixSet(std::vector<SBPrefix>()));
    571   }
    572   // Wants to acquire the lock itself.
    573   CsdWhitelistAllUrls();
    574 
    575   return true;
    576 }
    577 
    578 // TODO(lzheng): Remove matching_list, it is not used anywhere.
    579 bool SafeBrowsingDatabaseNew::ContainsBrowseUrl(
    580     const GURL& url,
    581     std::string* matching_list,
    582     std::vector<SBPrefix>* prefix_hits,
    583     std::vector<SBFullHashResult>* full_hits,
    584     base::Time last_update) {
    585   // Clear the results first.
    586   matching_list->clear();
    587   prefix_hits->clear();
    588   full_hits->clear();
    589 
    590   std::vector<SBFullHash> full_hashes;
    591   BrowseFullHashesToCheck(url, false, &full_hashes);
    592   if (full_hashes.empty())
    593     return false;
    594 
    595   // This function is called on the I/O thread, prevent changes to
    596   // bloom filter and caches.
    597   base::AutoLock locked(lookup_lock_);
    598 
    599   if (!browse_bloom_filter_.get())
    600     return false;
    601   DCHECK(prefix_set_.get());
    602 
    603   // Used to double-check in case of a hit mis-match.
    604   std::vector<SBPrefix> restored;
    605 
    606   size_t miss_count = 0;
    607   for (size_t i = 0; i < full_hashes.size(); ++i) {
    608     bool found = prefix_set_->Exists(full_hashes[i].prefix);
    609 
    610     if (browse_bloom_filter_->Exists(full_hashes[i].prefix)) {
    611       RecordPrefixSetInfo(PREFIX_SET_EVENT_BLOOM_HIT);
    612       if (found)
    613         RecordPrefixSetInfo(PREFIX_SET_EVENT_HIT);
    614       prefix_hits->push_back(full_hashes[i].prefix);
    615       if (prefix_miss_cache_.count(full_hashes[i].prefix) > 0)
    616         ++miss_count;
    617     } else {
    618       // Bloom filter misses should never be in prefix set.  Re-create
    619       // the original prefixes and manually search for it, to check if
    620       // there's a bug with how |Exists()| is implemented.
    621       // |UpdateBrowseStore()| previously verified that
    622       // |GetPrefixes()| returns the same prefixes as were passed to
    623       // the constructor.
    624       DCHECK(!found);
    625       if (found) {
    626         if (restored.empty())
    627           prefix_set_->GetPrefixes(&restored);
    628 
    629         // If the item is not in the re-created list, then there is an
    630         // error in |PrefixSet::Exists()|.  If the item is in the
    631         // re-created list, then the bloom filter was wrong.
    632         if (std::binary_search(restored.begin(), restored.end(),
    633                                full_hashes[i].prefix)) {
    634           RecordPrefixSetInfo(PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT);
    635         } else {
    636           RecordPrefixSetInfo(PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT_INVALID);
    637         }
    638       }
    639     }
    640   }
    641 
    642   // If all the prefixes are cached as 'misses', don't issue a GetHash.
    643   if (miss_count == prefix_hits->size())
    644     return false;
    645 
    646   // Find the matching full-hash results.  |full_browse_hashes_| are from the
    647   // database, |pending_browse_hashes_| are from GetHash requests between
    648   // updates.
    649   std::sort(prefix_hits->begin(), prefix_hits->end());
    650 
    651   GetCachedFullHashesForBrowse(*prefix_hits, full_browse_hashes_,
    652                                full_hits, last_update);
    653   GetCachedFullHashesForBrowse(*prefix_hits, pending_browse_hashes_,
    654                                full_hits, last_update);
    655   return true;
    656 }
    657 
    658 bool SafeBrowsingDatabaseNew::MatchDownloadAddPrefixes(
    659     int list_bit,
    660     const std::vector<SBPrefix>& prefixes,
    661     std::vector<SBPrefix>* prefix_hits) {
    662   prefix_hits->clear();
    663 
    664   std::vector<SBAddPrefix> add_prefixes;
    665   download_store_->GetAddPrefixes(&add_prefixes);
    666   for (size_t i = 0; i < add_prefixes.size(); ++i) {
    667     for (size_t j = 0; j < prefixes.size(); ++j) {
    668       const SBPrefix& prefix = prefixes[j];
    669       if (prefix == add_prefixes[i].prefix &&
    670           GetListIdBit(add_prefixes[i].chunk_id) == list_bit) {
    671         prefix_hits->push_back(prefix);
    672       }
    673     }
    674   }
    675   return !prefix_hits->empty();
    676 }
    677 
    678 bool SafeBrowsingDatabaseNew::ContainsDownloadUrl(
    679     const std::vector<GURL>& urls,
    680     std::vector<SBPrefix>* prefix_hits) {
    681   DCHECK_EQ(creation_loop_, MessageLoop::current());
    682 
    683   // Ignore this check when download checking is not enabled.
    684   if (!download_store_.get())
    685     return false;
    686 
    687   std::vector<SBPrefix> prefixes;
    688   GetDownloadUrlPrefixes(urls, &prefixes);
    689   return MatchDownloadAddPrefixes(safe_browsing_util::BINURL % 2,
    690                                   prefixes,
    691                                   prefix_hits);
    692 }
    693 
    694 bool SafeBrowsingDatabaseNew::ContainsDownloadHashPrefix(
    695     const SBPrefix& prefix) {
    696   DCHECK_EQ(creation_loop_, MessageLoop::current());
    697 
    698   // Ignore this check when download store is not available.
    699   if (!download_store_.get())
    700     return false;
    701 
    702   std::vector<SBPrefix> prefixes(1, prefix);
    703   std::vector<SBPrefix> prefix_hits;
    704   return MatchDownloadAddPrefixes(safe_browsing_util::BINHASH % 2,
    705                                   prefixes,
    706                                   &prefix_hits);
    707 }
    708 
    709 bool SafeBrowsingDatabaseNew::ContainsCsdWhitelistedUrl(const GURL& url) {
    710   // This method is theoretically thread-safe but we expect all calls to
    711   // originate from the IO thread.
    712   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
    713   base::AutoLock l(lookup_lock_);
    714   if (csd_whitelist_all_urls_)
    715     return true;
    716 
    717   std::vector<SBFullHash> full_hashes;
    718   BrowseFullHashesToCheck(url, true, &full_hashes);
    719   for (std::vector<SBFullHash>::const_iterator it = full_hashes.begin();
    720        it != full_hashes.end(); ++it) {
    721     if (std::binary_search(csd_whitelist_.begin(), csd_whitelist_.end(), *it))
    722       return true;
    723   }
    724   return false;
    725 }
    726 
    727 // Helper to insert entries for all of the prefixes or full hashes in
    728 // |entry| into the store.
    729 void SafeBrowsingDatabaseNew::InsertAdd(int chunk_id, SBPrefix host,
    730                                         const SBEntry* entry, int list_id) {
    731   DCHECK_EQ(creation_loop_, MessageLoop::current());
    732 
    733   SafeBrowsingStore* store = GetStore(list_id);
    734   if (!store) return;
    735 
    736   STATS_COUNTER("SB.HostInsert", 1);
    737   const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id);
    738   const int count = entry->prefix_count();
    739 
    740   DCHECK(!entry->IsSub());
    741   if (!count) {
    742     // No prefixes, use host instead.
    743     STATS_COUNTER("SB.PrefixAdd", 1);
    744     store->WriteAddPrefix(encoded_chunk_id, host);
    745   } else if (entry->IsPrefix()) {
    746     // Prefixes only.
    747     for (int i = 0; i < count; i++) {
    748       const SBPrefix prefix = entry->PrefixAt(i);
    749       STATS_COUNTER("SB.PrefixAdd", 1);
    750       store->WriteAddPrefix(encoded_chunk_id, prefix);
    751     }
    752   } else {
    753     // Prefixes and hashes.
    754     const base::Time receive_time = base::Time::Now();
    755     for (int i = 0; i < count; ++i) {
    756       const SBFullHash full_hash = entry->FullHashAt(i);
    757       const SBPrefix prefix = full_hash.prefix;
    758 
    759       STATS_COUNTER("SB.PrefixAdd", 1);
    760       store->WriteAddPrefix(encoded_chunk_id, prefix);
    761 
    762       STATS_COUNTER("SB.PrefixAddFull", 1);
    763       store->WriteAddHash(encoded_chunk_id, receive_time, full_hash);
    764     }
    765   }
    766 }
    767 
    768 // Helper to iterate over all the entries in the hosts in |chunks| and
    769 // add them to the store.
    770 void SafeBrowsingDatabaseNew::InsertAddChunks(const int list_id,
    771                                               const SBChunkList& chunks) {
    772   DCHECK_EQ(creation_loop_, MessageLoop::current());
    773 
    774   SafeBrowsingStore* store = GetStore(list_id);
    775   if (!store) return;
    776 
    777   for (SBChunkList::const_iterator citer = chunks.begin();
    778        citer != chunks.end(); ++citer) {
    779     const int chunk_id = citer->chunk_number;
    780 
    781     // The server can give us a chunk that we already have because
    782     // it's part of a range.  Don't add it again.
    783     const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id);
    784     if (store->CheckAddChunk(encoded_chunk_id))
    785       continue;
    786 
    787     store->SetAddChunk(encoded_chunk_id);
    788     for (std::deque<SBChunkHost>::const_iterator hiter = citer->hosts.begin();
    789          hiter != citer->hosts.end(); ++hiter) {
    790       // NOTE: Could pass |encoded_chunk_id|, but then inserting add
    791       // chunks would look different from inserting sub chunks.
    792       InsertAdd(chunk_id, hiter->host, hiter->entry, list_id);
    793     }
    794   }
    795 }
    796 
    797 // Helper to insert entries for all of the prefixes or full hashes in
    798 // |entry| into the store.
    799 void SafeBrowsingDatabaseNew::InsertSub(int chunk_id, SBPrefix host,
    800                                         const SBEntry* entry, int list_id) {
    801   DCHECK_EQ(creation_loop_, MessageLoop::current());
    802 
    803   SafeBrowsingStore* store = GetStore(list_id);
    804   if (!store) return;
    805 
    806   STATS_COUNTER("SB.HostDelete", 1);
    807   const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id);
    808   const int count = entry->prefix_count();
    809 
    810   DCHECK(entry->IsSub());
    811   if (!count) {
    812     // No prefixes, use host instead.
    813     STATS_COUNTER("SB.PrefixSub", 1);
    814     const int add_chunk_id = EncodeChunkId(entry->chunk_id(), list_id);
    815     store->WriteSubPrefix(encoded_chunk_id, add_chunk_id, host);
    816   } else if (entry->IsPrefix()) {
    817     // Prefixes only.
    818     for (int i = 0; i < count; i++) {
    819       const SBPrefix prefix = entry->PrefixAt(i);
    820       const int add_chunk_id =
    821           EncodeChunkId(entry->ChunkIdAtPrefix(i), list_id);
    822 
    823       STATS_COUNTER("SB.PrefixSub", 1);
    824       store->WriteSubPrefix(encoded_chunk_id, add_chunk_id, prefix);
    825     }
    826   } else {
    827     // Prefixes and hashes.
    828     for (int i = 0; i < count; ++i) {
    829       const SBFullHash full_hash = entry->FullHashAt(i);
    830       const int add_chunk_id =
    831           EncodeChunkId(entry->ChunkIdAtPrefix(i), list_id);
    832 
    833       STATS_COUNTER("SB.PrefixSub", 1);
    834       store->WriteSubPrefix(encoded_chunk_id, add_chunk_id, full_hash.prefix);
    835 
    836       STATS_COUNTER("SB.PrefixSubFull", 1);
    837       store->WriteSubHash(encoded_chunk_id, add_chunk_id, full_hash);
    838     }
    839   }
    840 }
    841 
    842 // Helper to iterate over all the entries in the hosts in |chunks| and
    843 // add them to the store.
    844 void SafeBrowsingDatabaseNew::InsertSubChunks(int list_id,
    845                                               const SBChunkList& chunks) {
    846   DCHECK_EQ(creation_loop_, MessageLoop::current());
    847 
    848   SafeBrowsingStore* store = GetStore(list_id);
    849   if (!store) return;
    850 
    851   for (SBChunkList::const_iterator citer = chunks.begin();
    852        citer != chunks.end(); ++citer) {
    853     const int chunk_id = citer->chunk_number;
    854 
    855     // The server can give us a chunk that we already have because
    856     // it's part of a range.  Don't add it again.
    857     const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id);
    858     if (store->CheckSubChunk(encoded_chunk_id))
    859       continue;
    860 
    861     store->SetSubChunk(encoded_chunk_id);
    862     for (std::deque<SBChunkHost>::const_iterator hiter = citer->hosts.begin();
    863          hiter != citer->hosts.end(); ++hiter) {
    864       InsertSub(chunk_id, hiter->host, hiter->entry, list_id);
    865     }
    866   }
    867 }
    868 
    869 void SafeBrowsingDatabaseNew::InsertChunks(const std::string& list_name,
    870                                            const SBChunkList& chunks) {
    871   DCHECK_EQ(creation_loop_, MessageLoop::current());
    872 
    873   if (corruption_detected_ || chunks.empty())
    874     return;
    875 
    876   const base::Time insert_start = base::Time::Now();
    877 
    878   const int list_id = safe_browsing_util::GetListId(list_name);
    879   DVLOG(2) << list_name << ": " << list_id;
    880 
    881   SafeBrowsingStore* store = GetStore(list_id);
    882   if (!store) return;
    883 
    884   change_detected_ = true;
    885 
    886   store->BeginChunk();
    887   if (chunks.front().is_add) {
    888     InsertAddChunks(list_id, chunks);
    889   } else {
    890     InsertSubChunks(list_id, chunks);
    891   }
    892   store->FinishChunk();
    893 
    894   UMA_HISTOGRAM_TIMES("SB2.ChunkInsert", base::Time::Now() - insert_start);
    895 }
    896 
    897 void SafeBrowsingDatabaseNew::DeleteChunks(
    898     const std::vector<SBChunkDelete>& chunk_deletes) {
    899   DCHECK_EQ(creation_loop_, MessageLoop::current());
    900 
    901   if (corruption_detected_ || chunk_deletes.empty())
    902     return;
    903 
    904   const std::string& list_name = chunk_deletes.front().list_name;
    905   const int list_id = safe_browsing_util::GetListId(list_name);
    906 
    907   SafeBrowsingStore* store = GetStore(list_id);
    908   if (!store) return;
    909 
    910   change_detected_ = true;
    911 
    912   for (size_t i = 0; i < chunk_deletes.size(); ++i) {
    913     std::vector<int> chunk_numbers;
    914     RangesToChunks(chunk_deletes[i].chunk_del, &chunk_numbers);
    915     for (size_t j = 0; j < chunk_numbers.size(); ++j) {
    916       const int encoded_chunk_id = EncodeChunkId(chunk_numbers[j], list_id);
    917       if (chunk_deletes[i].is_sub_del)
    918         store->DeleteSubChunk(encoded_chunk_id);
    919       else
    920         store->DeleteAddChunk(encoded_chunk_id);
    921     }
    922   }
    923 }
    924 
    925 void SafeBrowsingDatabaseNew::CacheHashResults(
    926     const std::vector<SBPrefix>& prefixes,
    927     const std::vector<SBFullHashResult>& full_hits) {
    928   // This is called on the I/O thread, lock against updates.
    929   base::AutoLock locked(lookup_lock_);
    930 
    931   if (full_hits.empty()) {
    932     prefix_miss_cache_.insert(prefixes.begin(), prefixes.end());
    933     return;
    934   }
    935 
    936   // TODO(shess): SBFullHashResult and SBAddFullHash are very similar.
    937   // Refactor to make them identical.
    938   const base::Time now = base::Time::Now();
    939   const size_t orig_size = pending_browse_hashes_.size();
    940   for (std::vector<SBFullHashResult>::const_iterator iter = full_hits.begin();
    941        iter != full_hits.end(); ++iter) {
    942     const int list_id = safe_browsing_util::GetListId(iter->list_name);
    943     if (list_id == safe_browsing_util::MALWARE ||
    944         list_id == safe_browsing_util::PHISH) {
    945       int encoded_chunk_id = EncodeChunkId(iter->add_chunk_id, list_id);
    946       SBAddFullHash add_full_hash(encoded_chunk_id, now, iter->hash);
    947       pending_browse_hashes_.push_back(add_full_hash);
    948     }
    949   }
    950 
    951   // Sort new entries then merge with the previously-sorted entries.
    952   std::vector<SBAddFullHash>::iterator
    953       orig_end = pending_browse_hashes_.begin() + orig_size;
    954   std::sort(orig_end, pending_browse_hashes_.end(), SBAddFullHashPrefixLess);
    955   std::inplace_merge(pending_browse_hashes_.begin(),
    956                      orig_end, pending_browse_hashes_.end(),
    957                      SBAddFullHashPrefixLess);
    958 }
    959 
    960 bool SafeBrowsingDatabaseNew::UpdateStarted(
    961     std::vector<SBListChunkRanges>* lists) {
    962   DCHECK_EQ(creation_loop_, MessageLoop::current());
    963   DCHECK(lists);
    964 
    965   // If |BeginUpdate()| fails, reset the database.
    966   if (!browse_store_->BeginUpdate()) {
    967     RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_BEGIN);
    968     HandleCorruptDatabase();
    969     return false;
    970   }
    971 
    972   if (download_store_.get() && !download_store_->BeginUpdate()) {
    973     RecordFailure(FAILURE_DOWNLOAD_DATABASE_UPDATE_BEGIN);
    974     HandleCorruptDatabase();
    975     return false;
    976   }
    977 
    978   if (csd_whitelist_store_.get() && !csd_whitelist_store_->BeginUpdate()) {
    979     RecordFailure(FAILURE_CSD_WHITELIST_DATABASE_UPDATE_BEGIN);
    980     HandleCorruptDatabase();
    981     return false;
    982   }
    983 
    984   std::vector<std::string> browse_listnames;
    985   browse_listnames.push_back(safe_browsing_util::kMalwareList);
    986   browse_listnames.push_back(safe_browsing_util::kPhishingList);
    987   UpdateChunkRanges(browse_store_.get(), browse_listnames, lists);
    988 
    989   if (download_store_.get()) {
    990     std::vector<std::string> download_listnames;
    991     download_listnames.push_back(safe_browsing_util::kBinUrlList);
    992     download_listnames.push_back(safe_browsing_util::kBinHashList);
    993     UpdateChunkRanges(download_store_.get(), download_listnames, lists);
    994   }
    995 
    996   if (csd_whitelist_store_.get()) {
    997     std::vector<std::string> csd_whitelist_listnames;
    998     csd_whitelist_listnames.push_back(safe_browsing_util::kCsdWhiteList);
    999     UpdateChunkRanges(csd_whitelist_store_.get(),
   1000                       csd_whitelist_listnames, lists);
   1001   }
   1002 
   1003   corruption_detected_ = false;
   1004   change_detected_ = false;
   1005   return true;
   1006 }
   1007 
   1008 void SafeBrowsingDatabaseNew::UpdateFinished(bool update_succeeded) {
   1009   DCHECK_EQ(creation_loop_, MessageLoop::current());
   1010   if (corruption_detected_)
   1011     return;
   1012 
   1013   // Unroll the transaction if there was a protocol error or if the
   1014   // transaction was empty.  This will leave the bloom filter, the
   1015   // pending hashes, and the prefix miss cache in place.
   1016   if (!update_succeeded || !change_detected_) {
   1017     // Track empty updates to answer questions at http://crbug.com/72216 .
   1018     if (update_succeeded && !change_detected_)
   1019       UMA_HISTOGRAM_COUNTS("SB2.DatabaseUpdateKilobytes", 0);
   1020     browse_store_->CancelUpdate();
   1021     if (download_store_.get())
   1022       download_store_->CancelUpdate();
   1023     if (csd_whitelist_store_.get())
   1024       csd_whitelist_store_->CancelUpdate();
   1025     return;
   1026   }
   1027 
   1028   // for download
   1029   UpdateDownloadStore();
   1030   // for browsing
   1031   UpdateBrowseStore();
   1032   // for csd whitelist
   1033   UpdateCsdWhitelistStore();
   1034 }
   1035 
   1036 void SafeBrowsingDatabaseNew::UpdateCsdWhitelistStore() {
   1037   if (!csd_whitelist_store_.get())
   1038     return;
   1039 
   1040   // For the csd whitelist, we don't cache and save full hashes since all
   1041   // hashes are already full.
   1042   std::vector<SBAddFullHash> empty_add_hashes;
   1043 
   1044   // Not needed for the csd whitelist.
   1045   std::set<SBPrefix> empty_miss_cache;
   1046 
   1047   // Note: prefixes will not be empty.  The current data store implementation
   1048   // stores all full-length hashes as both full and prefix hashes.
   1049   std::vector<SBAddPrefix> prefixes;
   1050   std::vector<SBAddFullHash> full_hashes;
   1051   if (!csd_whitelist_store_->FinishUpdate(empty_add_hashes,
   1052                                           empty_miss_cache,
   1053                                           &prefixes,
   1054                                           &full_hashes)) {
   1055     RecordFailure(FAILURE_CSD_WHITELIST_DATABASE_UPDATE_FINISH);
   1056     CsdWhitelistAllUrls();
   1057     return;
   1058   }
   1059   LoadCsdWhitelist(full_hashes);
   1060 }
   1061 
   1062 void SafeBrowsingDatabaseNew::UpdateDownloadStore() {
   1063   if (!download_store_.get())
   1064     return;
   1065 
   1066   // For download, we don't cache and save full hashes.
   1067   std::vector<SBAddFullHash> empty_add_hashes;
   1068 
   1069   // For download, backend lookup happens only if a prefix is in add list.
   1070   // No need to pass in miss cache when call FinishUpdate to caculate
   1071   // bloomfilter false positives.
   1072   std::set<SBPrefix> empty_miss_cache;
   1073 
   1074   // These results are not used after this call. Simply ignore the
   1075   // returned value after FinishUpdate(...).
   1076   std::vector<SBAddPrefix> add_prefixes_result;
   1077   std::vector<SBAddFullHash> add_full_hashes_result;
   1078 
   1079   if (!download_store_->FinishUpdate(empty_add_hashes,
   1080                                      empty_miss_cache,
   1081                                      &add_prefixes_result,
   1082                                      &add_full_hashes_result))
   1083     RecordFailure(FAILURE_DOWNLOAD_DATABASE_UPDATE_FINISH);
   1084   return;
   1085 }
   1086 
   1087 void SafeBrowsingDatabaseNew::UpdateBrowseStore() {
   1088   // Copy out the pending add hashes.  Copy rather than swapping in
   1089   // case |ContainsBrowseURL()| is called before the new filter is complete.
   1090   std::vector<SBAddFullHash> pending_add_hashes;
   1091   {
   1092     base::AutoLock locked(lookup_lock_);
   1093     pending_add_hashes.insert(pending_add_hashes.end(),
   1094                               pending_browse_hashes_.begin(),
   1095                               pending_browse_hashes_.end());
   1096   }
   1097 
   1098   // Measure the amount of IO during the bloom filter build.
   1099   base::IoCounters io_before, io_after;
   1100   base::ProcessHandle handle = base::Process::Current().handle();
   1101   scoped_ptr<base::ProcessMetrics> metric(
   1102 #if !defined(OS_MACOSX)
   1103       base::ProcessMetrics::CreateProcessMetrics(handle)
   1104 #else
   1105       // Getting stats only for the current process is enough, so NULL is fine.
   1106       base::ProcessMetrics::CreateProcessMetrics(handle, NULL)
   1107 #endif
   1108   );
   1109 
   1110   // IoCounters are currently not supported on Mac, and may not be
   1111   // available for Linux, so we check the result and only show IO
   1112   // stats if they are available.
   1113   const bool got_counters = metric->GetIOCounters(&io_before);
   1114 
   1115   const base::Time before = base::Time::Now();
   1116 
   1117   std::vector<SBAddPrefix> add_prefixes;
   1118   std::vector<SBAddFullHash> add_full_hashes;
   1119   if (!browse_store_->FinishUpdate(pending_add_hashes, prefix_miss_cache_,
   1120                                    &add_prefixes, &add_full_hashes)) {
   1121     RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_FINISH);
   1122     return;
   1123   }
   1124 
   1125   // Create and populate |filter| from |add_prefixes|.
   1126   // TODO(shess): The bloom filter doesn't need to be a
   1127   // scoped_refptr<> for this code.  Refactor that away.
   1128   const int filter_size =
   1129       BloomFilter::FilterSizeForKeyCount(add_prefixes.size());
   1130   scoped_refptr<BloomFilter> filter(new BloomFilter(filter_size));
   1131   for (size_t i = 0; i < add_prefixes.size(); ++i) {
   1132     filter->Insert(add_prefixes[i].prefix);
   1133   }
   1134 
   1135   scoped_ptr<safe_browsing::PrefixSet>
   1136       prefix_set(PrefixSetFromAddPrefixes(add_prefixes));
   1137 
   1138   // This needs to be in sorted order by prefix for efficient access.
   1139   std::sort(add_full_hashes.begin(), add_full_hashes.end(),
   1140             SBAddFullHashPrefixLess);
   1141 
   1142   // Swap in the newly built filter and cache.
   1143   {
   1144     base::AutoLock locked(lookup_lock_);
   1145     full_browse_hashes_.swap(add_full_hashes);
   1146 
   1147     // TODO(shess): If |CacheHashResults()| is posted between the
   1148     // earlier lock and this clear, those pending hashes will be lost.
   1149     // It could be fixed by only removing hashes which were collected
   1150     // at the earlier point.  I believe that is fail-safe as-is (the
   1151     // hash will be fetched again).
   1152     pending_browse_hashes_.clear();
   1153     prefix_miss_cache_.clear();
   1154     browse_bloom_filter_.swap(filter);
   1155     prefix_set_.swap(prefix_set);
   1156   }
   1157 
   1158   const base::TimeDelta bloom_gen = base::Time::Now() - before;
   1159 
   1160   // Persist the bloom filter to disk.  Since only this thread changes
   1161   // |browse_bloom_filter_|, there is no need to lock.
   1162   WriteBloomFilter();
   1163 
   1164   // Gather statistics.
   1165   if (got_counters && metric->GetIOCounters(&io_after)) {
   1166     UMA_HISTOGRAM_COUNTS("SB2.BuildReadKilobytes",
   1167                          static_cast<int>(io_after.ReadTransferCount -
   1168                                           io_before.ReadTransferCount) / 1024);
   1169     UMA_HISTOGRAM_COUNTS("SB2.BuildWriteKilobytes",
   1170                          static_cast<int>(io_after.WriteTransferCount -
   1171                                           io_before.WriteTransferCount) / 1024);
   1172     UMA_HISTOGRAM_COUNTS("SB2.BuildReadOperations",
   1173                          static_cast<int>(io_after.ReadOperationCount -
   1174                                           io_before.ReadOperationCount));
   1175     UMA_HISTOGRAM_COUNTS("SB2.BuildWriteOperations",
   1176                          static_cast<int>(io_after.WriteOperationCount -
   1177                                           io_before.WriteOperationCount));
   1178   }
   1179   DVLOG(1) << "SafeBrowsingDatabaseImpl built bloom filter in "
   1180            << bloom_gen.InMilliseconds() << " ms total.  prefix count: "
   1181            << add_prefixes.size();
   1182   UMA_HISTOGRAM_LONG_TIMES("SB2.BuildFilter", bloom_gen);
   1183   UMA_HISTOGRAM_COUNTS("SB2.FilterKilobytes",
   1184                        browse_bloom_filter_->size() / 1024);
   1185   int64 size_64;
   1186   if (file_util::GetFileSize(browse_filename_, &size_64))
   1187     UMA_HISTOGRAM_COUNTS("SB2.BrowseDatabaseKilobytes",
   1188                          static_cast<int>(size_64 / 1024));
   1189   if (file_util::GetFileSize(download_filename_, &size_64))
   1190     UMA_HISTOGRAM_COUNTS("SB2.DownloadDatabaseKilobytes",
   1191                          static_cast<int>(size_64 / 1024));
   1192 }
   1193 
   1194 void SafeBrowsingDatabaseNew::HandleCorruptDatabase() {
   1195   // Reset the database after the current task has unwound (but only
   1196   // reset once within the scope of a given task).
   1197   if (reset_factory_.empty()) {
   1198     RecordFailure(FAILURE_DATABASE_CORRUPT);
   1199     MessageLoop::current()->PostTask(FROM_HERE,
   1200         reset_factory_.NewRunnableMethod(
   1201             &SafeBrowsingDatabaseNew::OnHandleCorruptDatabase));
   1202   }
   1203 }
   1204 
   1205 void SafeBrowsingDatabaseNew::OnHandleCorruptDatabase() {
   1206   RecordFailure(FAILURE_DATABASE_CORRUPT_HANDLER);
   1207   corruption_detected_ = true;  // Stop updating the database.
   1208   ResetDatabase();
   1209   DCHECK(false) << "SafeBrowsing database was corrupt and reset";
   1210 }
   1211 
   1212 // TODO(shess): I'm not clear why this code doesn't have any
   1213 // real error-handling.
   1214 void SafeBrowsingDatabaseNew::LoadBloomFilter() {
   1215   DCHECK_EQ(creation_loop_, MessageLoop::current());
   1216   DCHECK(!bloom_filter_filename_.empty());
   1217 
   1218   // If we're missing either of the database or filter files, we wait until the
   1219   // next update to generate a new filter.
   1220   // TODO(paulg): Investigate how often the filter file is missing and how
   1221   // expensive it would be to regenerate it.
   1222   int64 size_64;
   1223   if (!file_util::GetFileSize(browse_filename_, &size_64) || size_64 == 0)
   1224     return;
   1225 
   1226   if (!file_util::GetFileSize(bloom_filter_filename_, &size_64) ||
   1227       size_64 == 0) {
   1228     RecordFailure(FAILURE_DATABASE_FILTER_MISSING);
   1229     return;
   1230   }
   1231 
   1232   const base::TimeTicks before = base::TimeTicks::Now();
   1233   browse_bloom_filter_ = BloomFilter::LoadFile(bloom_filter_filename_);
   1234   DVLOG(1) << "SafeBrowsingDatabaseNew read bloom filter in "
   1235            << (base::TimeTicks::Now() - before).InMilliseconds() << " ms";
   1236 
   1237   if (!browse_bloom_filter_.get())
   1238     RecordFailure(FAILURE_DATABASE_FILTER_READ);
   1239 
   1240   // Manually re-generate the prefix set from the main database.
   1241   // TODO(shess): Write/read for prefix set.
   1242   std::vector<SBAddPrefix> add_prefixes;
   1243   browse_store_->GetAddPrefixes(&add_prefixes);
   1244   prefix_set_.reset(PrefixSetFromAddPrefixes(add_prefixes));
   1245 }
   1246 
   1247 bool SafeBrowsingDatabaseNew::Delete() {
   1248   DCHECK_EQ(creation_loop_, MessageLoop::current());
   1249 
   1250   const bool r1 = browse_store_->Delete();
   1251   if (!r1)
   1252     RecordFailure(FAILURE_DATABASE_STORE_DELETE);
   1253 
   1254   const bool r2 = download_store_.get() ? download_store_->Delete() : true;
   1255   if (!r2)
   1256     RecordFailure(FAILURE_DATABASE_STORE_DELETE);
   1257 
   1258   const bool r3 = csd_whitelist_store_.get() ?
   1259       csd_whitelist_store_->Delete() : true;
   1260   if (!r3)
   1261     RecordFailure(FAILURE_DATABASE_STORE_DELETE);
   1262 
   1263   const bool r4 = file_util::Delete(bloom_filter_filename_, false);
   1264   if (!r4)
   1265     RecordFailure(FAILURE_DATABASE_FILTER_DELETE);
   1266   return r1 && r2 && r3 && r4;
   1267 }
   1268 
   1269 void SafeBrowsingDatabaseNew::WriteBloomFilter() {
   1270   DCHECK_EQ(creation_loop_, MessageLoop::current());
   1271 
   1272   if (!browse_bloom_filter_.get())
   1273     return;
   1274 
   1275   const base::TimeTicks before = base::TimeTicks::Now();
   1276   const bool write_ok = browse_bloom_filter_->WriteFile(bloom_filter_filename_);
   1277   DVLOG(1) << "SafeBrowsingDatabaseNew wrote bloom filter in "
   1278            << (base::TimeTicks::Now() - before).InMilliseconds() << " ms";
   1279 
   1280   if (!write_ok)
   1281     RecordFailure(FAILURE_DATABASE_FILTER_WRITE);
   1282 }
   1283 
   1284 void SafeBrowsingDatabaseNew::CsdWhitelistAllUrls() {
   1285   base::AutoLock locked(lookup_lock_);
   1286   csd_whitelist_all_urls_ = true;
   1287   csd_whitelist_.clear();
   1288 }
   1289 
   1290 void SafeBrowsingDatabaseNew::LoadCsdWhitelist(
   1291     const std::vector<SBAddFullHash>& full_hashes) {
   1292   DCHECK_EQ(creation_loop_, MessageLoop::current());
   1293   if (full_hashes.size() > kMaxCsdWhitelistSize) {
   1294     CsdWhitelistAllUrls();
   1295     return;
   1296   }
   1297 
   1298   std::vector<SBFullHash> new_csd_whitelist;
   1299   for (std::vector<SBAddFullHash>::const_iterator it = full_hashes.begin();
   1300        it != full_hashes.end(); ++it) {
   1301     new_csd_whitelist.push_back(it->full_hash);
   1302   }
   1303   std::sort(new_csd_whitelist.begin(), new_csd_whitelist.end());
   1304 
   1305   SBFullHash kill_switch;
   1306   crypto::SHA256HashString(kCsdKillSwitchUrl, &kill_switch,
   1307                            sizeof(kill_switch));
   1308   if (std::binary_search(new_csd_whitelist.begin(), new_csd_whitelist.end(),
   1309                          kill_switch)) {
   1310     // The kill switch is whitelisted hence we whitelist all URLs.
   1311     CsdWhitelistAllUrls();
   1312   } else {
   1313     base::AutoLock locked(lookup_lock_);
   1314     csd_whitelist_all_urls_ = false;
   1315     csd_whitelist_.swap(new_csd_whitelist);
   1316   }
   1317 }
   1318