Home | History | Annotate | Download | only in safe_browsing
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "chrome/browser/safe_browsing/safe_browsing_database.h"
      6 
      7 #include <algorithm>
      8 #include <iterator>
      9 
     10 #include "base/bind.h"
     11 #include "base/file_util.h"
     12 #include "base/message_loop/message_loop.h"
     13 #include "base/metrics/histogram.h"
     14 #include "base/metrics/stats_counters.h"
     15 #include "base/process/process.h"
     16 #include "base/process/process_metrics.h"
     17 #include "base/sha1.h"
     18 #include "base/strings/string_number_conversions.h"
     19 #include "base/strings/stringprintf.h"
     20 #include "base/time/time.h"
     21 #include "chrome/browser/safe_browsing/prefix_set.h"
     22 #include "chrome/browser/safe_browsing/safe_browsing_store_file.h"
     23 #include "content/public/browser/browser_thread.h"
     24 #include "crypto/sha2.h"
     25 #include "net/base/net_util.h"
     26 #include "url/gurl.h"
     27 
     28 #if defined(OS_MACOSX)
     29 #include "base/mac/mac_util.h"
     30 #endif
     31 
     32 using content::BrowserThread;
     33 
     34 namespace {
     35 
     36 // Filename suffix for the bloom filter.
     37 const base::FilePath::CharType kBloomFilterFile[] =
     38     FILE_PATH_LITERAL(" Filter 2");
     39 // Filename suffix for the prefix set.
     40 const base::FilePath::CharType kPrefixSetFile[] =
     41     FILE_PATH_LITERAL(" Prefix Set");
     42 // Filename suffix for download store.
     43 const base::FilePath::CharType kDownloadDBFile[] =
     44     FILE_PATH_LITERAL(" Download");
     45 // Filename suffix for client-side phishing detection whitelist store.
     46 const base::FilePath::CharType kCsdWhitelistDBFile[] =
     47     FILE_PATH_LITERAL(" Csd Whitelist");
     48 // Filename suffix for the download whitelist store.
     49 const base::FilePath::CharType kDownloadWhitelistDBFile[] =
     50     FILE_PATH_LITERAL(" Download Whitelist");
     51 // Filename suffix for the extension blacklist store.
     52 const base::FilePath::CharType kExtensionBlacklistDBFile[] =
     53     FILE_PATH_LITERAL(" Extension Blacklist");
     54 // Filename suffix for the side-effect free whitelist store.
     55 const base::FilePath::CharType kSideEffectFreeWhitelistDBFile[] =
     56     FILE_PATH_LITERAL(" Side-Effect Free Whitelist");
     57 // Filename suffix for the csd malware IP blacklist store.
     58 const base::FilePath::CharType kIPBlacklistDBFile[] =
     59     FILE_PATH_LITERAL(" IP Blacklist");
     60 
     61 // Filename suffix for browse store.
     62 // TODO(shess): "Safe Browsing Bloom Prefix Set" is full of win.
     63 // Unfortunately, to change the name implies lots of transition code
     64 // for little benefit.  If/when file formats change (say to put all
     65 // the data in one file), that would be a convenient point to rectify
     66 // this.
     67 const base::FilePath::CharType kBrowseDBFile[] = FILE_PATH_LITERAL(" Bloom");
     68 
     69 // The maximum staleness for a cached entry.
     70 const int kMaxStalenessMinutes = 45;
     71 
     72 // Maximum number of entries we allow in any of the whitelists.
     73 // If a whitelist on disk contains more entries then all lookups to
     74 // the whitelist will be considered a match.
     75 const size_t kMaxWhitelistSize = 5000;
     76 
     77 // If the hash of this exact expression is on a whitelist then all
     78 // lookups to this whitelist will be considered a match.
     79 const char kWhitelistKillSwitchUrl[] =
     80     "sb-ssl.google.com/safebrowsing/csd/killswitch";  // Don't change this!
     81 
     82 // If the hash of this exact expression is on a whitelist then the
     83 // malware IP blacklisting feature will be disabled in csd.
     84 // Don't change this!
     85 const char kMalwareIPKillSwitchUrl[] =
     86     "sb-ssl.google.com/safebrowsing/csd/killswitch_malware";
     87 
     88 const size_t kMaxIpPrefixSize = 128;
     89 const size_t kMinIpPrefixSize = 1;
     90 
     91 // To save space, the incoming |chunk_id| and |list_id| are combined
     92 // into an |encoded_chunk_id| for storage by shifting the |list_id|
     93 // into the low-order bits.  These functions decode that information.
     94 // TODO(lzheng): It was reasonable when database is saved in sqlite, but
     95 // there should be better ways to save chunk_id and list_id after we use
     96 // SafeBrowsingStoreFile.
     97 int GetListIdBit(const int encoded_chunk_id) {
     98   return encoded_chunk_id & 1;
     99 }
    100 int DecodeChunkId(int encoded_chunk_id) {
    101   return encoded_chunk_id >> 1;
    102 }
    103 int EncodeChunkId(const int chunk, const int list_id) {
    104   DCHECK_NE(list_id, safe_browsing_util::INVALID);
    105   return chunk << 1 | list_id % 2;
    106 }
    107 
    108 // Generate the set of full hashes to check for |url|.  If
    109 // |include_whitelist_hashes| is true we will generate additional path-prefixes
    110 // to match against the csd whitelist.  E.g., if the path-prefix /foo is on the
    111 // whitelist it should also match /foo/bar which is not the case for all the
    112 // other lists.  We'll also always add a pattern for the empty path.
    113 // TODO(shess): This function is almost the same as
    114 // |CompareFullHashes()| in safe_browsing_util.cc, except that code
    115 // does an early exit on match.  Since match should be the infrequent
    116 // case (phishing or malware found), consider combining this function
    117 // with that one.
    118 void BrowseFullHashesToCheck(const GURL& url,
    119                              bool include_whitelist_hashes,
    120                              std::vector<SBFullHash>* full_hashes) {
    121   std::vector<std::string> hosts;
    122   if (url.HostIsIPAddress()) {
    123     hosts.push_back(url.host());
    124   } else {
    125     safe_browsing_util::GenerateHostsToCheck(url, &hosts);
    126   }
    127 
    128   std::vector<std::string> paths;
    129   safe_browsing_util::GeneratePathsToCheck(url, &paths);
    130 
    131   for (size_t i = 0; i < hosts.size(); ++i) {
    132     for (size_t j = 0; j < paths.size(); ++j) {
    133       const std::string& path = paths[j];
    134       SBFullHash full_hash;
    135       crypto::SHA256HashString(hosts[i] + path, &full_hash,
    136                                sizeof(full_hash));
    137       full_hashes->push_back(full_hash);
    138 
    139       // We may have /foo as path-prefix in the whitelist which should
    140       // also match with /foo/bar and /foo?bar.  Hence, for every path
    141       // that ends in '/' we also add the path without the slash.
    142       if (include_whitelist_hashes &&
    143           path.size() > 1 &&
    144           path[path.size() - 1] == '/') {
    145         crypto::SHA256HashString(hosts[i] + path.substr(0, path.size() - 1),
    146                                  &full_hash, sizeof(full_hash));
    147         full_hashes->push_back(full_hash);
    148       }
    149     }
    150   }
    151 }
    152 
    153 // Get the prefixes matching the download |urls|.
    154 void GetDownloadUrlPrefixes(const std::vector<GURL>& urls,
    155                             std::vector<SBPrefix>* prefixes) {
    156   std::vector<SBFullHash> full_hashes;
    157   for (size_t i = 0; i < urls.size(); ++i)
    158     BrowseFullHashesToCheck(urls[i], false, &full_hashes);
    159 
    160   for (size_t i = 0; i < full_hashes.size(); ++i)
    161     prefixes->push_back(full_hashes[i].prefix);
    162 }
    163 
    164 // Helper function to compare addprefixes in |store| with |prefixes|.
    165 // The |list_bit| indicates which list (url or hash) to compare.
    166 //
    167 // Returns true if there is a match, |*prefix_hits| (if non-NULL) will contain
    168 // the actual matching prefixes.
    169 bool MatchAddPrefixes(SafeBrowsingStore* store,
    170                       int list_bit,
    171                       const std::vector<SBPrefix>& prefixes,
    172                       std::vector<SBPrefix>* prefix_hits) {
    173   prefix_hits->clear();
    174   bool found_match = false;
    175 
    176   SBAddPrefixes add_prefixes;
    177   store->GetAddPrefixes(&add_prefixes);
    178   for (SBAddPrefixes::const_iterator iter = add_prefixes.begin();
    179        iter != add_prefixes.end(); ++iter) {
    180     for (size_t j = 0; j < prefixes.size(); ++j) {
    181       const SBPrefix& prefix = prefixes[j];
    182       if (prefix == iter->prefix &&
    183           GetListIdBit(iter->chunk_id) == list_bit) {
    184         prefix_hits->push_back(prefix);
    185         found_match = true;
    186       }
    187     }
    188   }
    189   return found_match;
    190 }
    191 
    192 // Find the entries in |full_hashes| with prefix in |prefix_hits|, and
    193 // add them to |full_hits| if not expired.  "Not expired" is when
    194 // either |last_update| was recent enough, or the item has been
    195 // received recently enough.  Expired items are not deleted because a
    196 // future update may make them acceptable again.
    197 //
    198 // For efficiency reasons the code walks |prefix_hits| and
    199 // |full_hashes| in parallel, so they must be sorted by prefix.
    200 void GetCachedFullHashesForBrowse(const std::vector<SBPrefix>& prefix_hits,
    201                                   const std::vector<SBAddFullHash>& full_hashes,
    202                                   std::vector<SBFullHashResult>* full_hits,
    203                                   base::Time last_update) {
    204   const base::Time expire_time =
    205       base::Time::Now() - base::TimeDelta::FromMinutes(kMaxStalenessMinutes);
    206 
    207   std::vector<SBPrefix>::const_iterator piter = prefix_hits.begin();
    208   std::vector<SBAddFullHash>::const_iterator hiter = full_hashes.begin();
    209 
    210   while (piter != prefix_hits.end() && hiter != full_hashes.end()) {
    211     if (*piter < hiter->full_hash.prefix) {
    212       ++piter;
    213     } else if (hiter->full_hash.prefix < *piter) {
    214       ++hiter;
    215     } else {
    216       if (expire_time < last_update ||
    217           expire_time.ToTimeT() < hiter->received) {
    218         SBFullHashResult result;
    219         const int list_bit = GetListIdBit(hiter->chunk_id);
    220         DCHECK(list_bit == safe_browsing_util::MALWARE ||
    221                list_bit == safe_browsing_util::PHISH);
    222         const safe_browsing_util::ListType list_id =
    223             static_cast<safe_browsing_util::ListType>(list_bit);
    224         if (!safe_browsing_util::GetListName(list_id, &result.list_name))
    225           continue;
    226         result.add_chunk_id = DecodeChunkId(hiter->chunk_id);
    227         result.hash = hiter->full_hash;
    228         full_hits->push_back(result);
    229       }
    230 
    231       // Only increment |hiter|, |piter| might have multiple hits.
    232       ++hiter;
    233     }
    234   }
    235 }
    236 
    237 // This function generates a chunk range string for |chunks|. It
    238 // outputs one chunk range string per list and writes it to the
    239 // |list_ranges| vector.  We expect |list_ranges| to already be of the
    240 // right size.  E.g., if |chunks| contains chunks with two different
    241 // list ids then |list_ranges| must contain two elements.
    242 void GetChunkRanges(const std::vector<int>& chunks,
    243                     std::vector<std::string>* list_ranges) {
    244   // Since there are 2 possible list ids, there must be exactly two
    245   // list ranges.  Even if the chunk data should only contain one
    246   // line, this code has to somehow handle corruption.
    247   DCHECK_EQ(2U, list_ranges->size());
    248 
    249   std::vector<std::vector<int> > decoded_chunks(list_ranges->size());
    250   for (std::vector<int>::const_iterator iter = chunks.begin();
    251        iter != chunks.end(); ++iter) {
    252     int mod_list_id = GetListIdBit(*iter);
    253     DCHECK_GE(mod_list_id, 0);
    254     DCHECK_LT(static_cast<size_t>(mod_list_id), decoded_chunks.size());
    255     decoded_chunks[mod_list_id].push_back(DecodeChunkId(*iter));
    256   }
    257   for (size_t i = 0; i < decoded_chunks.size(); ++i) {
    258     ChunksToRangeString(decoded_chunks[i], &((*list_ranges)[i]));
    259   }
    260 }
    261 
    262 // Helper function to create chunk range lists for Browse related
    263 // lists.
    264 void UpdateChunkRanges(SafeBrowsingStore* store,
    265                        const std::vector<std::string>& listnames,
    266                        std::vector<SBListChunkRanges>* lists) {
    267   DCHECK_GT(listnames.size(), 0U);
    268   DCHECK_LE(listnames.size(), 2U);
    269   std::vector<int> add_chunks;
    270   std::vector<int> sub_chunks;
    271   store->GetAddChunks(&add_chunks);
    272   store->GetSubChunks(&sub_chunks);
    273 
    274   // Always decode 2 ranges, even if only the first one is expected.
    275   // The loop below will only load as many into |lists| as |listnames|
    276   // indicates.
    277   std::vector<std::string> adds(2);
    278   std::vector<std::string> subs(2);
    279   GetChunkRanges(add_chunks, &adds);
    280   GetChunkRanges(sub_chunks, &subs);
    281 
    282   for (size_t i = 0; i < listnames.size(); ++i) {
    283     const std::string& listname = listnames[i];
    284     DCHECK_EQ(safe_browsing_util::GetListId(listname) % 2,
    285               static_cast<int>(i % 2));
    286     DCHECK_NE(safe_browsing_util::GetListId(listname),
    287               safe_browsing_util::INVALID);
    288     lists->push_back(SBListChunkRanges(listname));
    289     lists->back().adds.swap(adds[i]);
    290     lists->back().subs.swap(subs[i]);
    291   }
    292 }
    293 
    294 // Helper for deleting chunks left over from obsolete lists.
    295 void DeleteChunksFromStore(SafeBrowsingStore* store, int listid){
    296   std::vector<int> add_chunks;
    297   size_t adds_deleted = 0;
    298   store->GetAddChunks(&add_chunks);
    299   for (std::vector<int>::const_iterator iter = add_chunks.begin();
    300        iter != add_chunks.end(); ++iter) {
    301     if (GetListIdBit(*iter) == GetListIdBit(listid)) {
    302       adds_deleted++;
    303       store->DeleteAddChunk(*iter);
    304     }
    305   }
    306   if (adds_deleted > 0)
    307     UMA_HISTOGRAM_COUNTS("SB2.DownloadBinhashAddsDeleted", adds_deleted);
    308 
    309   std::vector<int> sub_chunks;
    310   size_t subs_deleted = 0;
    311   store->GetSubChunks(&sub_chunks);
    312   for (std::vector<int>::const_iterator iter = sub_chunks.begin();
    313        iter != sub_chunks.end(); ++iter) {
    314     if (GetListIdBit(*iter) == GetListIdBit(listid)) {
    315       subs_deleted++;
    316       store->DeleteSubChunk(*iter);
    317     }
    318   }
    319   if (subs_deleted > 0)
    320     UMA_HISTOGRAM_COUNTS("SB2.DownloadBinhashSubsDeleted", subs_deleted);
    321 }
    322 
    323 // Order |SBAddFullHash| on the prefix part.  |SBAddPrefixLess()| from
    324 // safe_browsing_store.h orders on both chunk-id and prefix.
    325 bool SBAddFullHashPrefixLess(const SBAddFullHash& a, const SBAddFullHash& b) {
    326   return a.full_hash.prefix < b.full_hash.prefix;
    327 }
    328 
    329 // This code always checks for non-zero file size.  This helper makes
    330 // that less verbose.
    331 int64 GetFileSizeOrZero(const base::FilePath& file_path) {
    332   int64 size_64;
    333   if (!base::GetFileSize(file_path, &size_64))
    334     return 0;
    335   return size_64;
    336 }
    337 
    338 }  // namespace
    339 
    340 // The default SafeBrowsingDatabaseFactory.
    341 class SafeBrowsingDatabaseFactoryImpl : public SafeBrowsingDatabaseFactory {
    342  public:
    343   virtual SafeBrowsingDatabase* CreateSafeBrowsingDatabase(
    344       bool enable_download_protection,
    345       bool enable_client_side_whitelist,
    346       bool enable_download_whitelist,
    347       bool enable_extension_blacklist,
    348       bool enable_side_effect_free_whitelist,
    349       bool enable_ip_blacklist) OVERRIDE {
    350     return new SafeBrowsingDatabaseNew(
    351         new SafeBrowsingStoreFile,
    352         enable_download_protection ? new SafeBrowsingStoreFile : NULL,
    353         enable_client_side_whitelist ? new SafeBrowsingStoreFile : NULL,
    354         enable_download_whitelist ? new SafeBrowsingStoreFile : NULL,
    355         enable_extension_blacklist ? new SafeBrowsingStoreFile : NULL,
    356         enable_side_effect_free_whitelist ? new SafeBrowsingStoreFile : NULL,
    357         enable_ip_blacklist ? new SafeBrowsingStoreFile : NULL);
    358   }
    359 
    360   SafeBrowsingDatabaseFactoryImpl() { }
    361 
    362  private:
    363   DISALLOW_COPY_AND_ASSIGN(SafeBrowsingDatabaseFactoryImpl);
    364 };
    365 
    366 // static
    367 SafeBrowsingDatabaseFactory* SafeBrowsingDatabase::factory_ = NULL;
    368 
    369 // Factory method, non-thread safe. Caller has to make sure this s called
    370 // on SafeBrowsing Thread.
    371 // TODO(shess): There's no need for a factory any longer.  Convert
    372 // SafeBrowsingDatabaseNew to SafeBrowsingDatabase, and have Create()
    373 // callers just construct things directly.
    374 SafeBrowsingDatabase* SafeBrowsingDatabase::Create(
    375     bool enable_download_protection,
    376     bool enable_client_side_whitelist,
    377     bool enable_download_whitelist,
    378     bool enable_extension_blacklist,
    379     bool enable_side_effect_free_whitelist,
    380     bool enable_ip_blacklist) {
    381   if (!factory_)
    382     factory_ = new SafeBrowsingDatabaseFactoryImpl();
    383   return factory_->CreateSafeBrowsingDatabase(
    384       enable_download_protection,
    385       enable_client_side_whitelist,
    386       enable_download_whitelist,
    387       enable_extension_blacklist,
    388       enable_side_effect_free_whitelist,
    389       enable_ip_blacklist);
    390 }
    391 
    392 SafeBrowsingDatabase::~SafeBrowsingDatabase() {
    393 }
    394 
    395 // static
    396 base::FilePath SafeBrowsingDatabase::BrowseDBFilename(
    397     const base::FilePath& db_base_filename) {
    398   return base::FilePath(db_base_filename.value() + kBrowseDBFile);
    399 }
    400 
    401 // static
    402 base::FilePath SafeBrowsingDatabase::DownloadDBFilename(
    403     const base::FilePath& db_base_filename) {
    404   return base::FilePath(db_base_filename.value() + kDownloadDBFile);
    405 }
    406 
    407 // static
    408 base::FilePath SafeBrowsingDatabase::BloomFilterForFilename(
    409     const base::FilePath& db_filename) {
    410   return base::FilePath(db_filename.value() + kBloomFilterFile);
    411 }
    412 
    413 // static
    414 base::FilePath SafeBrowsingDatabase::PrefixSetForFilename(
    415     const base::FilePath& db_filename) {
    416   return base::FilePath(db_filename.value() + kPrefixSetFile);
    417 }
    418 
    419 // static
    420 base::FilePath SafeBrowsingDatabase::CsdWhitelistDBFilename(
    421     const base::FilePath& db_filename) {
    422   return base::FilePath(db_filename.value() + kCsdWhitelistDBFile);
    423 }
    424 
    425 // static
    426 base::FilePath SafeBrowsingDatabase::DownloadWhitelistDBFilename(
    427     const base::FilePath& db_filename) {
    428   return base::FilePath(db_filename.value() + kDownloadWhitelistDBFile);
    429 }
    430 
    431 // static
    432 base::FilePath SafeBrowsingDatabase::ExtensionBlacklistDBFilename(
    433     const base::FilePath& db_filename) {
    434   return base::FilePath(db_filename.value() + kExtensionBlacklistDBFile);
    435 }
    436 
    437 // static
    438 base::FilePath SafeBrowsingDatabase::SideEffectFreeWhitelistDBFilename(
    439     const base::FilePath& db_filename) {
    440   return base::FilePath(db_filename.value() + kSideEffectFreeWhitelistDBFile);
    441 }
    442 
    443 // static
    444 base::FilePath SafeBrowsingDatabase::IpBlacklistDBFilename(
    445     const base::FilePath& db_filename) {
    446   return base::FilePath(db_filename.value() + kIPBlacklistDBFile);
    447 }
    448 
    449 SafeBrowsingStore* SafeBrowsingDatabaseNew::GetStore(const int list_id) {
    450   if (list_id == safe_browsing_util::PHISH ||
    451       list_id == safe_browsing_util::MALWARE) {
    452     return browse_store_.get();
    453   } else if (list_id == safe_browsing_util::BINURL ||
    454              list_id == safe_browsing_util::BINHASH) {
    455     return download_store_.get();
    456   } else if (list_id == safe_browsing_util::CSDWHITELIST) {
    457     return csd_whitelist_store_.get();
    458   } else if (list_id == safe_browsing_util::DOWNLOADWHITELIST) {
    459     return download_whitelist_store_.get();
    460   } else if (list_id == safe_browsing_util::EXTENSIONBLACKLIST) {
    461     return extension_blacklist_store_.get();
    462   } else if (list_id == safe_browsing_util::SIDEEFFECTFREEWHITELIST) {
    463     return side_effect_free_whitelist_store_.get();
    464   } else if (list_id == safe_browsing_util::IPBLACKLIST) {
    465     return ip_blacklist_store_.get();
    466   }
    467   return NULL;
    468 }
    469 
    470 // static
    471 void SafeBrowsingDatabase::RecordFailure(FailureType failure_type) {
    472   UMA_HISTOGRAM_ENUMERATION("SB2.DatabaseFailure", failure_type,
    473                             FAILURE_DATABASE_MAX);
    474 }
    475 
    476 SafeBrowsingDatabaseNew::SafeBrowsingDatabaseNew()
    477     : creation_loop_(base::MessageLoop::current()),
    478       browse_store_(new SafeBrowsingStoreFile),
    479       reset_factory_(this),
    480       corruption_detected_(false),
    481       change_detected_(false) {
    482   DCHECK(browse_store_.get());
    483   DCHECK(!download_store_.get());
    484   DCHECK(!csd_whitelist_store_.get());
    485   DCHECK(!download_whitelist_store_.get());
    486   DCHECK(!extension_blacklist_store_.get());
    487   DCHECK(!side_effect_free_whitelist_store_.get());
    488   DCHECK(!ip_blacklist_store_.get());
    489 }
    490 
    491 SafeBrowsingDatabaseNew::SafeBrowsingDatabaseNew(
    492     SafeBrowsingStore* browse_store,
    493     SafeBrowsingStore* download_store,
    494     SafeBrowsingStore* csd_whitelist_store,
    495     SafeBrowsingStore* download_whitelist_store,
    496     SafeBrowsingStore* extension_blacklist_store,
    497     SafeBrowsingStore* side_effect_free_whitelist_store,
    498     SafeBrowsingStore* ip_blacklist_store)
    499     : creation_loop_(base::MessageLoop::current()),
    500       browse_store_(browse_store),
    501       download_store_(download_store),
    502       csd_whitelist_store_(csd_whitelist_store),
    503       download_whitelist_store_(download_whitelist_store),
    504       extension_blacklist_store_(extension_blacklist_store),
    505       side_effect_free_whitelist_store_(side_effect_free_whitelist_store),
    506       ip_blacklist_store_(ip_blacklist_store),
    507       reset_factory_(this),
    508       corruption_detected_(false) {
    509   DCHECK(browse_store_.get());
    510 }
    511 
    512 SafeBrowsingDatabaseNew::~SafeBrowsingDatabaseNew() {
    513   DCHECK_EQ(creation_loop_, base::MessageLoop::current());
    514 }
    515 
    516 void SafeBrowsingDatabaseNew::Init(const base::FilePath& filename_base) {
    517   DCHECK_EQ(creation_loop_, base::MessageLoop::current());
    518   // Ensure we haven't been run before.
    519   DCHECK(browse_filename_.empty());
    520   DCHECK(download_filename_.empty());
    521   DCHECK(csd_whitelist_filename_.empty());
    522   DCHECK(download_whitelist_filename_.empty());
    523   DCHECK(extension_blacklist_filename_.empty());
    524   DCHECK(side_effect_free_whitelist_filename_.empty());
    525   DCHECK(ip_blacklist_filename_.empty());
    526 
    527   browse_filename_ = BrowseDBFilename(filename_base);
    528   browse_prefix_set_filename_ = PrefixSetForFilename(browse_filename_);
    529 
    530   browse_store_->Init(
    531       browse_filename_,
    532       base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
    533                  base::Unretained(this)));
    534   DVLOG(1) << "Init browse store: " << browse_filename_.value();
    535 
    536   {
    537     // NOTE: There is no need to grab the lock in this function, since
    538     // until it returns, there are no pointers to this class on other
    539     // threads.  Then again, that means there is no possibility of
    540     // contention on the lock...
    541     base::AutoLock locked(lookup_lock_);
    542     full_browse_hashes_.clear();
    543     pending_browse_hashes_.clear();
    544     LoadPrefixSet();
    545   }
    546 
    547   if (download_store_.get()) {
    548     download_filename_ = DownloadDBFilename(filename_base);
    549     download_store_->Init(
    550         download_filename_,
    551         base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
    552                    base::Unretained(this)));
    553     DVLOG(1) << "Init download store: " << download_filename_.value();
    554   }
    555 
    556   if (csd_whitelist_store_.get()) {
    557     csd_whitelist_filename_ = CsdWhitelistDBFilename(filename_base);
    558     csd_whitelist_store_->Init(
    559         csd_whitelist_filename_,
    560         base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
    561                    base::Unretained(this)));
    562     DVLOG(1) << "Init csd whitelist store: " << csd_whitelist_filename_.value();
    563     std::vector<SBAddFullHash> full_hashes;
    564     if (csd_whitelist_store_->GetAddFullHashes(&full_hashes)) {
    565       LoadWhitelist(full_hashes, &csd_whitelist_);
    566     } else {
    567       WhitelistEverything(&csd_whitelist_);
    568     }
    569   } else {
    570     WhitelistEverything(&csd_whitelist_);  // Just to be safe.
    571   }
    572 
    573   if (download_whitelist_store_.get()) {
    574     download_whitelist_filename_ = DownloadWhitelistDBFilename(filename_base);
    575     download_whitelist_store_->Init(
    576         download_whitelist_filename_,
    577         base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
    578                    base::Unretained(this)));
    579     DVLOG(1) << "Init download whitelist store: "
    580              << download_whitelist_filename_.value();
    581     std::vector<SBAddFullHash> full_hashes;
    582     if (download_whitelist_store_->GetAddFullHashes(&full_hashes)) {
    583       LoadWhitelist(full_hashes, &download_whitelist_);
    584     } else {
    585       WhitelistEverything(&download_whitelist_);
    586     }
    587   } else {
    588     WhitelistEverything(&download_whitelist_);  // Just to be safe.
    589   }
    590 
    591   if (extension_blacklist_store_.get()) {
    592     extension_blacklist_filename_ = ExtensionBlacklistDBFilename(filename_base);
    593     extension_blacklist_store_->Init(
    594         extension_blacklist_filename_,
    595         base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
    596                    base::Unretained(this)));
    597     DVLOG(1) << "Init extension blacklist store: "
    598              << extension_blacklist_filename_.value();
    599   }
    600 
    601   if (side_effect_free_whitelist_store_.get()) {
    602     side_effect_free_whitelist_filename_ =
    603         SideEffectFreeWhitelistDBFilename(filename_base);
    604     side_effect_free_whitelist_prefix_set_filename_ =
    605         PrefixSetForFilename(side_effect_free_whitelist_filename_);
    606     side_effect_free_whitelist_store_->Init(
    607         side_effect_free_whitelist_filename_,
    608         base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
    609                    base::Unretained(this)));
    610     DVLOG(1) << "Init side-effect free whitelist store: "
    611              << side_effect_free_whitelist_filename_.value();
    612 
    613     // If there is no database, the filter cannot be used.
    614     base::PlatformFileInfo db_info;
    615     if (base::GetFileInfo(side_effect_free_whitelist_filename_, &db_info)
    616         && db_info.size != 0) {
    617       const base::TimeTicks before = base::TimeTicks::Now();
    618       side_effect_free_whitelist_prefix_set_.reset(
    619           safe_browsing::PrefixSet::LoadFile(
    620               side_effect_free_whitelist_prefix_set_filename_));
    621       DVLOG(1) << "SafeBrowsingDatabaseNew read side-effect free whitelist "
    622                << "prefix set in "
    623                << (base::TimeTicks::Now() - before).InMilliseconds() << " ms";
    624       UMA_HISTOGRAM_TIMES("SB2.SideEffectFreeWhitelistPrefixSetLoad",
    625                           base::TimeTicks::Now() - before);
    626       if (!side_effect_free_whitelist_prefix_set_.get())
    627         RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_READ);
    628     }
    629   } else {
    630     // Delete any files of the side-effect free sidelist that may be around
    631     // from when it was previously enabled.
    632     SafeBrowsingStoreFile::DeleteStore(
    633         SideEffectFreeWhitelistDBFilename(filename_base));
    634   }
    635 
    636   if (ip_blacklist_store_.get()) {
    637     ip_blacklist_filename_ = IpBlacklistDBFilename(filename_base);
    638     ip_blacklist_store_->Init(
    639         ip_blacklist_filename_,
    640         base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
    641                    base::Unretained(this)));
    642     DVLOG(1) << "SafeBrowsingDatabaseNew read ip blacklist: "
    643              << ip_blacklist_filename_.value();
    644     std::vector<SBAddFullHash> full_hashes;
    645     if (ip_blacklist_store_->GetAddFullHashes(&full_hashes)) {
    646       LoadIpBlacklist(full_hashes);
    647     } else {
    648       DVLOG(1) << "Unable to load full hashes from the IP blacklist.";
    649       LoadIpBlacklist(std::vector<SBAddFullHash>());  // Clear the list.
    650     }
    651   }
    652 }
    653 
    654 bool SafeBrowsingDatabaseNew::ResetDatabase() {
    655   DCHECK_EQ(creation_loop_, base::MessageLoop::current());
    656 
    657   // Delete files on disk.
    658   // TODO(shess): Hard to see where one might want to delete without a
    659   // reset.  Perhaps inline |Delete()|?
    660   if (!Delete())
    661     return false;
    662 
    663   // Reset objects in memory.
    664   {
    665     base::AutoLock locked(lookup_lock_);
    666     full_browse_hashes_.clear();
    667     pending_browse_hashes_.clear();
    668     prefix_miss_cache_.clear();
    669     browse_prefix_set_.reset();
    670     side_effect_free_whitelist_prefix_set_.reset();
    671     ip_blacklist_.clear();
    672   }
    673   // Wants to acquire the lock itself.
    674   WhitelistEverything(&csd_whitelist_);
    675   WhitelistEverything(&download_whitelist_);
    676   return true;
    677 }
    678 
    679 // TODO(lzheng): Remove matching_list, it is not used anywhere.
    680 bool SafeBrowsingDatabaseNew::ContainsBrowseUrl(
    681     const GURL& url,
    682     std::string* matching_list,
    683     std::vector<SBPrefix>* prefix_hits,
    684     std::vector<SBFullHashResult>* full_hits,
    685     base::Time last_update) {
    686   // Clear the results first.
    687   matching_list->clear();
    688   prefix_hits->clear();
    689   full_hits->clear();
    690 
    691   std::vector<SBFullHash> full_hashes;
    692   BrowseFullHashesToCheck(url, false, &full_hashes);
    693   if (full_hashes.empty())
    694     return false;
    695 
    696   // This function is called on the I/O thread, prevent changes to
    697   // filter and caches.
    698   base::AutoLock locked(lookup_lock_);
    699 
    700   // |browse_prefix_set_| is empty until it is either read from disk, or the
    701   // first update populates it.  Bail out without a hit if not yet
    702   // available.
    703   if (!browse_prefix_set_.get())
    704     return false;
    705 
    706   size_t miss_count = 0;
    707   for (size_t i = 0; i < full_hashes.size(); ++i) {
    708     const SBPrefix prefix = full_hashes[i].prefix;
    709     if (browse_prefix_set_->Exists(prefix)) {
    710       prefix_hits->push_back(prefix);
    711       if (prefix_miss_cache_.count(prefix) > 0)
    712         ++miss_count;
    713     }
    714   }
    715 
    716   // If all the prefixes are cached as 'misses', don't issue a GetHash.
    717   if (miss_count == prefix_hits->size())
    718     return false;
    719 
    720   // Find the matching full-hash results.  |full_browse_hashes_| are from the
    721   // database, |pending_browse_hashes_| are from GetHash requests between
    722   // updates.
    723   std::sort(prefix_hits->begin(), prefix_hits->end());
    724 
    725   GetCachedFullHashesForBrowse(*prefix_hits, full_browse_hashes_,
    726                                full_hits, last_update);
    727   GetCachedFullHashesForBrowse(*prefix_hits, pending_browse_hashes_,
    728                                full_hits, last_update);
    729   return true;
    730 }
    731 
    732 bool SafeBrowsingDatabaseNew::ContainsDownloadUrl(
    733     const std::vector<GURL>& urls,
    734     std::vector<SBPrefix>* prefix_hits) {
    735   DCHECK_EQ(creation_loop_, base::MessageLoop::current());
    736 
    737   // Ignore this check when download checking is not enabled.
    738   if (!download_store_.get())
    739     return false;
    740 
    741   std::vector<SBPrefix> prefixes;
    742   GetDownloadUrlPrefixes(urls, &prefixes);
    743   return MatchAddPrefixes(download_store_.get(),
    744                           safe_browsing_util::BINURL % 2,
    745                           prefixes,
    746                           prefix_hits);
    747 }
    748 
    749 bool SafeBrowsingDatabaseNew::ContainsDownloadHashPrefix(
    750     const SBPrefix& prefix) {
    751   DCHECK_EQ(creation_loop_, base::MessageLoop::current());
    752 
    753   // Ignore this check when download store is not available.
    754   if (!download_store_.get())
    755     return false;
    756 
    757   std::vector<SBPrefix> prefix_hits;
    758   return MatchAddPrefixes(download_store_.get(),
    759                           safe_browsing_util::BINHASH % 2,
    760                           std::vector<SBPrefix>(1, prefix),
    761                           &prefix_hits);
    762 }
    763 
    764 bool SafeBrowsingDatabaseNew::ContainsCsdWhitelistedUrl(const GURL& url) {
    765   // This method is theoretically thread-safe but we expect all calls to
    766   // originate from the IO thread.
    767   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
    768   std::vector<SBFullHash> full_hashes;
    769   BrowseFullHashesToCheck(url, true, &full_hashes);
    770   return ContainsWhitelistedHashes(csd_whitelist_, full_hashes);
    771 }
    772 
    773 bool SafeBrowsingDatabaseNew::ContainsDownloadWhitelistedUrl(const GURL& url) {
    774   std::vector<SBFullHash> full_hashes;
    775   BrowseFullHashesToCheck(url, true, &full_hashes);
    776   return ContainsWhitelistedHashes(download_whitelist_, full_hashes);
    777 }
    778 
    779 bool SafeBrowsingDatabaseNew::ContainsExtensionPrefixes(
    780     const std::vector<SBPrefix>& prefixes,
    781     std::vector<SBPrefix>* prefix_hits) {
    782   DCHECK_EQ(creation_loop_, base::MessageLoop::current());
    783   if (!extension_blacklist_store_)
    784     return false;
    785 
    786   return MatchAddPrefixes(extension_blacklist_store_.get(),
    787                           safe_browsing_util::EXTENSIONBLACKLIST % 2,
    788                           prefixes,
    789                           prefix_hits);
    790 }
    791 
    792 bool SafeBrowsingDatabaseNew::ContainsSideEffectFreeWhitelistUrl(
    793     const GURL& url) {
    794   SBFullHash full_hash;
    795   std::string host;
    796   std::string path;
    797   std::string query;
    798   safe_browsing_util::CanonicalizeUrl(url, &host, &path, &query);
    799   std::string url_to_check = host + path;
    800   if (!query.empty())
    801     url_to_check +=  "?" + query;
    802   crypto::SHA256HashString(url_to_check, &full_hash, sizeof(full_hash));
    803 
    804   // This function can be called on any thread, so lock against any changes
    805   base::AutoLock locked(lookup_lock_);
    806 
    807   // |side_effect_free_whitelist_prefix_set_| is empty until it is either read
    808   // from disk, or the first update populates it.  Bail out without a hit if
    809   // not yet available.
    810   if (!side_effect_free_whitelist_prefix_set_.get())
    811     return false;
    812 
    813   return side_effect_free_whitelist_prefix_set_->Exists(full_hash.prefix);
    814 }
    815 
    816 bool SafeBrowsingDatabaseNew::ContainsMalwareIP(const std::string& ip_address) {
    817   net::IPAddressNumber ip_number;
    818   if (!net::ParseIPLiteralToNumber(ip_address, &ip_number)) {
    819     DVLOG(2) << "Unable to parse IP address: '" << ip_address << "'";
    820     return false;
    821   }
    822   if (ip_number.size() == net::kIPv4AddressSize) {
    823     ip_number = net::ConvertIPv4NumberToIPv6Number(ip_number);
    824   }
    825   if (ip_number.size() != net::kIPv6AddressSize) {
    826     DVLOG(2) << "Unable to convert IPv4 address to IPv6: '"
    827              << ip_address << "'";
    828     return false;  // better safe than sorry.
    829   }
    830   // This function can be called from any thread.
    831   base::AutoLock locked(lookup_lock_);
    832   for (IPBlacklist::const_iterator it = ip_blacklist_.begin();
    833        it != ip_blacklist_.end();
    834        ++it) {
    835     const std::string& mask = it->first;
    836     DCHECK_EQ(mask.size(), ip_number.size());
    837     std::string subnet(net::kIPv6AddressSize, '\0');
    838     for (size_t i = 0; i < net::kIPv6AddressSize; ++i) {
    839       subnet[i] = ip_number[i] & mask[i];
    840     }
    841     const std::string hash = base::SHA1HashString(subnet);
    842     DVLOG(2) << "Lookup Malware IP: "
    843              << " ip:" << ip_address
    844              << " mask:" << base::HexEncode(mask.data(), mask.size())
    845              << " subnet:" << base::HexEncode(subnet.data(), subnet.size())
    846              << " hash:" << base::HexEncode(hash.data(), hash.size());
    847     if (it->second.count(hash) > 0) {
    848       return true;
    849     }
    850   }
    851   return false;
    852 }
    853 
    854 bool SafeBrowsingDatabaseNew::ContainsDownloadWhitelistedString(
    855     const std::string& str) {
    856   SBFullHash hash;
    857   crypto::SHA256HashString(str, &hash, sizeof(hash));
    858   std::vector<SBFullHash> hashes;
    859   hashes.push_back(hash);
    860   return ContainsWhitelistedHashes(download_whitelist_, hashes);
    861 }
    862 
    863 bool SafeBrowsingDatabaseNew::ContainsWhitelistedHashes(
    864     const SBWhitelist& whitelist,
    865     const std::vector<SBFullHash>& hashes) {
    866   base::AutoLock l(lookup_lock_);
    867   if (whitelist.second)
    868     return true;
    869   for (std::vector<SBFullHash>::const_iterator it = hashes.begin();
    870        it != hashes.end(); ++it) {
    871     if (std::binary_search(whitelist.first.begin(), whitelist.first.end(), *it))
    872       return true;
    873   }
    874   return false;
    875 }
    876 
    877 // Helper to insert entries for all of the prefixes or full hashes in
    878 // |entry| into the store.
    879 void SafeBrowsingDatabaseNew::InsertAdd(int chunk_id, SBPrefix host,
    880                                         const SBEntry* entry, int list_id) {
    881   DCHECK_EQ(creation_loop_, base::MessageLoop::current());
    882 
    883   SafeBrowsingStore* store = GetStore(list_id);
    884   if (!store) return;
    885 
    886   STATS_COUNTER("SB.HostInsert", 1);
    887   const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id);
    888   const int count = entry->prefix_count();
    889 
    890   DCHECK(!entry->IsSub());
    891   if (!count) {
    892     // No prefixes, use host instead.
    893     STATS_COUNTER("SB.PrefixAdd", 1);
    894     store->WriteAddPrefix(encoded_chunk_id, host);
    895   } else if (entry->IsPrefix()) {
    896     // Prefixes only.
    897     for (int i = 0; i < count; i++) {
    898       const SBPrefix prefix = entry->PrefixAt(i);
    899       STATS_COUNTER("SB.PrefixAdd", 1);
    900       store->WriteAddPrefix(encoded_chunk_id, prefix);
    901     }
    902   } else {
    903     // Prefixes and hashes.
    904     const base::Time receive_time = base::Time::Now();
    905     for (int i = 0; i < count; ++i) {
    906       const SBFullHash full_hash = entry->FullHashAt(i);
    907       const SBPrefix prefix = full_hash.prefix;
    908 
    909       STATS_COUNTER("SB.PrefixAdd", 1);
    910       store->WriteAddPrefix(encoded_chunk_id, prefix);
    911 
    912       STATS_COUNTER("SB.PrefixAddFull", 1);
    913       store->WriteAddHash(encoded_chunk_id, receive_time, full_hash);
    914     }
    915   }
    916 }
    917 
    918 // Helper to iterate over all the entries in the hosts in |chunks| and
    919 // add them to the store.
    920 void SafeBrowsingDatabaseNew::InsertAddChunks(
    921     const safe_browsing_util::ListType list_id,
    922     const SBChunkList& chunks) {
    923   DCHECK_EQ(creation_loop_, base::MessageLoop::current());
    924 
    925   SafeBrowsingStore* store = GetStore(list_id);
    926   if (!store) return;
    927 
    928   for (SBChunkList::const_iterator citer = chunks.begin();
    929        citer != chunks.end(); ++citer) {
    930     const int chunk_id = citer->chunk_number;
    931 
    932     // The server can give us a chunk that we already have because
    933     // it's part of a range.  Don't add it again.
    934     const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id);
    935     if (store->CheckAddChunk(encoded_chunk_id))
    936       continue;
    937 
    938     store->SetAddChunk(encoded_chunk_id);
    939     for (std::deque<SBChunkHost>::const_iterator hiter = citer->hosts.begin();
    940          hiter != citer->hosts.end(); ++hiter) {
    941       // NOTE: Could pass |encoded_chunk_id|, but then inserting add
    942       // chunks would look different from inserting sub chunks.
    943       InsertAdd(chunk_id, hiter->host, hiter->entry, list_id);
    944     }
    945   }
    946 }
    947 
    948 // Helper to insert entries for all of the prefixes or full hashes in
    949 // |entry| into the store.
    950 void SafeBrowsingDatabaseNew::InsertSub(int chunk_id, SBPrefix host,
    951                                         const SBEntry* entry, int list_id) {
    952   DCHECK_EQ(creation_loop_, base::MessageLoop::current());
    953 
    954   SafeBrowsingStore* store = GetStore(list_id);
    955   if (!store) return;
    956 
    957   STATS_COUNTER("SB.HostDelete", 1);
    958   const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id);
    959   const int count = entry->prefix_count();
    960 
    961   DCHECK(entry->IsSub());
    962   if (!count) {
    963     // No prefixes, use host instead.
    964     STATS_COUNTER("SB.PrefixSub", 1);
    965     const int add_chunk_id = EncodeChunkId(entry->chunk_id(), list_id);
    966     store->WriteSubPrefix(encoded_chunk_id, add_chunk_id, host);
    967   } else if (entry->IsPrefix()) {
    968     // Prefixes only.
    969     for (int i = 0; i < count; i++) {
    970       const SBPrefix prefix = entry->PrefixAt(i);
    971       const int add_chunk_id =
    972           EncodeChunkId(entry->ChunkIdAtPrefix(i), list_id);
    973 
    974       STATS_COUNTER("SB.PrefixSub", 1);
    975       store->WriteSubPrefix(encoded_chunk_id, add_chunk_id, prefix);
    976     }
    977   } else {
    978     // Prefixes and hashes.
    979     for (int i = 0; i < count; ++i) {
    980       const SBFullHash full_hash = entry->FullHashAt(i);
    981       const int add_chunk_id =
    982           EncodeChunkId(entry->ChunkIdAtPrefix(i), list_id);
    983 
    984       STATS_COUNTER("SB.PrefixSub", 1);
    985       store->WriteSubPrefix(encoded_chunk_id, add_chunk_id, full_hash.prefix);
    986 
    987       STATS_COUNTER("SB.PrefixSubFull", 1);
    988       store->WriteSubHash(encoded_chunk_id, add_chunk_id, full_hash);
    989     }
    990   }
    991 }
    992 
    993 // Helper to iterate over all the entries in the hosts in |chunks| and
    994 // add them to the store.
    995 void SafeBrowsingDatabaseNew::InsertSubChunks(
    996     safe_browsing_util::ListType list_id,
    997     const SBChunkList& chunks) {
    998   DCHECK_EQ(creation_loop_, base::MessageLoop::current());
    999 
   1000   SafeBrowsingStore* store = GetStore(list_id);
   1001   if (!store) return;
   1002 
   1003   for (SBChunkList::const_iterator citer = chunks.begin();
   1004        citer != chunks.end(); ++citer) {
   1005     const int chunk_id = citer->chunk_number;
   1006 
   1007     // The server can give us a chunk that we already have because
   1008     // it's part of a range.  Don't add it again.
   1009     const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id);
   1010     if (store->CheckSubChunk(encoded_chunk_id))
   1011       continue;
   1012 
   1013     store->SetSubChunk(encoded_chunk_id);
   1014     for (std::deque<SBChunkHost>::const_iterator hiter = citer->hosts.begin();
   1015          hiter != citer->hosts.end(); ++hiter) {
   1016       InsertSub(chunk_id, hiter->host, hiter->entry, list_id);
   1017     }
   1018   }
   1019 }
   1020 
   1021 void SafeBrowsingDatabaseNew::InsertChunks(const std::string& list_name,
   1022                                            const SBChunkList& chunks) {
   1023   DCHECK_EQ(creation_loop_, base::MessageLoop::current());
   1024 
   1025   if (corruption_detected_ || chunks.empty())
   1026     return;
   1027 
   1028   const base::TimeTicks before = base::TimeTicks::Now();
   1029 
   1030   const safe_browsing_util::ListType list_id =
   1031       safe_browsing_util::GetListId(list_name);
   1032   DVLOG(2) << list_name << ": " << list_id;
   1033 
   1034   SafeBrowsingStore* store = GetStore(list_id);
   1035   if (!store) return;
   1036 
   1037   change_detected_ = true;
   1038 
   1039   store->BeginChunk();
   1040   if (chunks.front().is_add) {
   1041     InsertAddChunks(list_id, chunks);
   1042   } else {
   1043     InsertSubChunks(list_id, chunks);
   1044   }
   1045   store->FinishChunk();
   1046 
   1047   UMA_HISTOGRAM_TIMES("SB2.ChunkInsert", base::TimeTicks::Now() - before);
   1048 }
   1049 
   1050 void SafeBrowsingDatabaseNew::DeleteChunks(
   1051     const std::vector<SBChunkDelete>& chunk_deletes) {
   1052   DCHECK_EQ(creation_loop_, base::MessageLoop::current());
   1053 
   1054   if (corruption_detected_ || chunk_deletes.empty())
   1055     return;
   1056 
   1057   const std::string& list_name = chunk_deletes.front().list_name;
   1058   const safe_browsing_util::ListType list_id =
   1059       safe_browsing_util::GetListId(list_name);
   1060 
   1061   SafeBrowsingStore* store = GetStore(list_id);
   1062   if (!store) return;
   1063 
   1064   change_detected_ = true;
   1065 
   1066   for (size_t i = 0; i < chunk_deletes.size(); ++i) {
   1067     std::vector<int> chunk_numbers;
   1068     RangesToChunks(chunk_deletes[i].chunk_del, &chunk_numbers);
   1069     for (size_t j = 0; j < chunk_numbers.size(); ++j) {
   1070       const int encoded_chunk_id = EncodeChunkId(chunk_numbers[j], list_id);
   1071       if (chunk_deletes[i].is_sub_del)
   1072         store->DeleteSubChunk(encoded_chunk_id);
   1073       else
   1074         store->DeleteAddChunk(encoded_chunk_id);
   1075     }
   1076   }
   1077 }
   1078 
   1079 void SafeBrowsingDatabaseNew::CacheHashResults(
   1080     const std::vector<SBPrefix>& prefixes,
   1081     const std::vector<SBFullHashResult>& full_hits) {
   1082   // This is called on the I/O thread, lock against updates.
   1083   base::AutoLock locked(lookup_lock_);
   1084 
   1085   if (full_hits.empty()) {
   1086     prefix_miss_cache_.insert(prefixes.begin(), prefixes.end());
   1087     return;
   1088   }
   1089 
   1090   // TODO(shess): SBFullHashResult and SBAddFullHash are very similar.
   1091   // Refactor to make them identical.
   1092   const base::Time now = base::Time::Now();
   1093   const size_t orig_size = pending_browse_hashes_.size();
   1094   for (std::vector<SBFullHashResult>::const_iterator iter = full_hits.begin();
   1095        iter != full_hits.end(); ++iter) {
   1096     const int list_id = safe_browsing_util::GetListId(iter->list_name);
   1097     if (list_id == safe_browsing_util::MALWARE ||
   1098         list_id == safe_browsing_util::PHISH) {
   1099       int encoded_chunk_id = EncodeChunkId(iter->add_chunk_id, list_id);
   1100       SBAddFullHash add_full_hash(encoded_chunk_id, now, iter->hash);
   1101       pending_browse_hashes_.push_back(add_full_hash);
   1102     }
   1103   }
   1104 
   1105   // Sort new entries then merge with the previously-sorted entries.
   1106   std::vector<SBAddFullHash>::iterator
   1107       orig_end = pending_browse_hashes_.begin() + orig_size;
   1108   std::sort(orig_end, pending_browse_hashes_.end(), SBAddFullHashPrefixLess);
   1109   std::inplace_merge(pending_browse_hashes_.begin(),
   1110                      orig_end, pending_browse_hashes_.end(),
   1111                      SBAddFullHashPrefixLess);
   1112 }
   1113 
   1114 bool SafeBrowsingDatabaseNew::UpdateStarted(
   1115     std::vector<SBListChunkRanges>* lists) {
   1116   DCHECK_EQ(creation_loop_, base::MessageLoop::current());
   1117   DCHECK(lists);
   1118 
   1119   // If |BeginUpdate()| fails, reset the database.
   1120   if (!browse_store_->BeginUpdate()) {
   1121     RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_BEGIN);
   1122     HandleCorruptDatabase();
   1123     return false;
   1124   }
   1125 
   1126   if (download_store_.get() && !download_store_->BeginUpdate()) {
   1127     RecordFailure(FAILURE_DOWNLOAD_DATABASE_UPDATE_BEGIN);
   1128     HandleCorruptDatabase();
   1129     return false;
   1130   }
   1131 
   1132   if (csd_whitelist_store_.get() && !csd_whitelist_store_->BeginUpdate()) {
   1133     RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_BEGIN);
   1134     HandleCorruptDatabase();
   1135     return false;
   1136   }
   1137 
   1138   if (download_whitelist_store_.get() &&
   1139       !download_whitelist_store_->BeginUpdate()) {
   1140     RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_BEGIN);
   1141     HandleCorruptDatabase();
   1142     return false;
   1143   }
   1144 
   1145   if (extension_blacklist_store_ &&
   1146       !extension_blacklist_store_->BeginUpdate()) {
   1147     RecordFailure(FAILURE_EXTENSION_BLACKLIST_UPDATE_BEGIN);
   1148     HandleCorruptDatabase();
   1149     return false;
   1150   }
   1151 
   1152   if (side_effect_free_whitelist_store_ &&
   1153       !side_effect_free_whitelist_store_->BeginUpdate()) {
   1154     RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_BEGIN);
   1155     HandleCorruptDatabase();
   1156     return false;
   1157   }
   1158 
   1159   if (ip_blacklist_store_ && !ip_blacklist_store_->BeginUpdate()) {
   1160     RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_BEGIN);
   1161     HandleCorruptDatabase();
   1162     return false;
   1163   }
   1164 
   1165   std::vector<std::string> browse_listnames;
   1166   browse_listnames.push_back(safe_browsing_util::kMalwareList);
   1167   browse_listnames.push_back(safe_browsing_util::kPhishingList);
   1168   UpdateChunkRanges(browse_store_.get(), browse_listnames, lists);
   1169 
   1170   if (download_store_.get()) {
   1171     // This store used to contain kBinHashList in addition to
   1172     // kBinUrlList.  Strip the stale data before generating the chunk
   1173     // ranges to request.  UpdateChunkRanges() will traverse the chunk
   1174     // list, so this is very cheap if there are no kBinHashList chunks.
   1175     const int listid =
   1176         safe_browsing_util::GetListId(safe_browsing_util::kBinHashList);
   1177     DeleteChunksFromStore(download_store_.get(), listid);
   1178 
   1179     // The above marks the chunks for deletion, but they are not
   1180     // actually deleted until the database is rewritten.  The
   1181     // following code removes the kBinHashList part of the request
   1182     // before continuing so that UpdateChunkRanges() doesn't break.
   1183     std::vector<std::string> download_listnames;
   1184     download_listnames.push_back(safe_browsing_util::kBinUrlList);
   1185     download_listnames.push_back(safe_browsing_util::kBinHashList);
   1186     UpdateChunkRanges(download_store_.get(), download_listnames, lists);
   1187     DCHECK_EQ(lists->back().name,
   1188               std::string(safe_browsing_util::kBinHashList));
   1189     lists->pop_back();
   1190 
   1191     // TODO(shess): This problem could also be handled in
   1192     // BeginUpdate() by detecting the chunks to delete and rewriting
   1193     // the database before it's used.  When I implemented that, it
   1194     // felt brittle, it might be easier to just wait for some future
   1195     // format change.
   1196   }
   1197 
   1198   if (csd_whitelist_store_.get()) {
   1199     std::vector<std::string> csd_whitelist_listnames;
   1200     csd_whitelist_listnames.push_back(safe_browsing_util::kCsdWhiteList);
   1201     UpdateChunkRanges(csd_whitelist_store_.get(),
   1202                       csd_whitelist_listnames, lists);
   1203   }
   1204 
   1205   if (download_whitelist_store_.get()) {
   1206     std::vector<std::string> download_whitelist_listnames;
   1207     download_whitelist_listnames.push_back(
   1208         safe_browsing_util::kDownloadWhiteList);
   1209     UpdateChunkRanges(download_whitelist_store_.get(),
   1210                       download_whitelist_listnames, lists);
   1211   }
   1212 
   1213   if (extension_blacklist_store_) {
   1214     UpdateChunkRanges(
   1215         extension_blacklist_store_.get(),
   1216         std::vector<std::string>(1, safe_browsing_util::kExtensionBlacklist),
   1217         lists);
   1218   }
   1219 
   1220   if (side_effect_free_whitelist_store_) {
   1221     UpdateChunkRanges(
   1222         side_effect_free_whitelist_store_.get(),
   1223         std::vector<std::string>(
   1224             1, safe_browsing_util::kSideEffectFreeWhitelist),
   1225         lists);
   1226   }
   1227 
   1228   if (ip_blacklist_store_) {
   1229     UpdateChunkRanges(
   1230         ip_blacklist_store_.get(),
   1231         std::vector<std::string>(1, safe_browsing_util::kIPBlacklist),
   1232         lists);
   1233   }
   1234 
   1235   corruption_detected_ = false;
   1236   change_detected_ = false;
   1237   return true;
   1238 }
   1239 
   1240 void SafeBrowsingDatabaseNew::UpdateFinished(bool update_succeeded) {
   1241   DCHECK_EQ(creation_loop_, base::MessageLoop::current());
   1242 
   1243   // The update may have failed due to corrupt storage (for instance,
   1244   // an excessive number of invalid add_chunks and sub_chunks).
   1245   // Double-check that the databases are valid.
   1246   // TODO(shess): Providing a checksum for the add_chunk and sub_chunk
   1247   // sections would allow throwing a corruption error in
   1248   // UpdateStarted().
   1249   if (!update_succeeded) {
   1250     if (!browse_store_->CheckValidity())
   1251       DLOG(ERROR) << "Safe-browsing browse database corrupt.";
   1252 
   1253     if (download_store_.get() && !download_store_->CheckValidity())
   1254       DLOG(ERROR) << "Safe-browsing download database corrupt.";
   1255 
   1256     if (csd_whitelist_store_.get() && !csd_whitelist_store_->CheckValidity())
   1257       DLOG(ERROR) << "Safe-browsing csd whitelist database corrupt.";
   1258 
   1259     if (download_whitelist_store_.get() &&
   1260         !download_whitelist_store_->CheckValidity()) {
   1261       DLOG(ERROR) << "Safe-browsing download whitelist database corrupt.";
   1262     }
   1263 
   1264     if (extension_blacklist_store_ &&
   1265         !extension_blacklist_store_->CheckValidity()) {
   1266       DLOG(ERROR) << "Safe-browsing extension blacklist database corrupt.";
   1267     }
   1268 
   1269     if (side_effect_free_whitelist_store_ &&
   1270         !side_effect_free_whitelist_store_->CheckValidity()) {
   1271       DLOG(ERROR) << "Safe-browsing side-effect free whitelist database "
   1272                   << "corrupt.";
   1273     }
   1274 
   1275     if (ip_blacklist_store_ && !ip_blacklist_store_->CheckValidity()) {
   1276       DLOG(ERROR) << "Safe-browsing IP blacklist database corrupt.";
   1277     }
   1278   }
   1279 
   1280   if (corruption_detected_)
   1281     return;
   1282 
   1283   // Unroll the transaction if there was a protocol error or if the
   1284   // transaction was empty.  This will leave the prefix set, the
   1285   // pending hashes, and the prefix miss cache in place.
   1286   if (!update_succeeded || !change_detected_) {
   1287     // Track empty updates to answer questions at http://crbug.com/72216 .
   1288     if (update_succeeded && !change_detected_)
   1289       UMA_HISTOGRAM_COUNTS("SB2.DatabaseUpdateKilobytes", 0);
   1290     browse_store_->CancelUpdate();
   1291     if (download_store_.get())
   1292       download_store_->CancelUpdate();
   1293     if (csd_whitelist_store_.get())
   1294       csd_whitelist_store_->CancelUpdate();
   1295     if (download_whitelist_store_.get())
   1296       download_whitelist_store_->CancelUpdate();
   1297     if (extension_blacklist_store_)
   1298       extension_blacklist_store_->CancelUpdate();
   1299     if (side_effect_free_whitelist_store_)
   1300       side_effect_free_whitelist_store_->CancelUpdate();
   1301     if (ip_blacklist_store_)
   1302       ip_blacklist_store_->CancelUpdate();
   1303     return;
   1304   }
   1305 
   1306   if (download_store_) {
   1307     int64 size_bytes = UpdateHashPrefixStore(
   1308         download_filename_,
   1309         download_store_.get(),
   1310         FAILURE_DOWNLOAD_DATABASE_UPDATE_FINISH);
   1311     UMA_HISTOGRAM_COUNTS("SB2.DownloadDatabaseKilobytes",
   1312                          static_cast<int>(size_bytes / 1024));
   1313   }
   1314 
   1315   UpdateBrowseStore();
   1316   UpdateWhitelistStore(csd_whitelist_filename_,
   1317                        csd_whitelist_store_.get(),
   1318                        &csd_whitelist_);
   1319   UpdateWhitelistStore(download_whitelist_filename_,
   1320                        download_whitelist_store_.get(),
   1321                        &download_whitelist_);
   1322 
   1323   if (extension_blacklist_store_) {
   1324     int64 size_bytes = UpdateHashPrefixStore(
   1325         extension_blacklist_filename_,
   1326         extension_blacklist_store_.get(),
   1327         FAILURE_EXTENSION_BLACKLIST_UPDATE_FINISH);
   1328     UMA_HISTOGRAM_COUNTS("SB2.ExtensionBlacklistKilobytes",
   1329                          static_cast<int>(size_bytes / 1024));
   1330   }
   1331 
   1332   if (side_effect_free_whitelist_store_)
   1333     UpdateSideEffectFreeWhitelistStore();
   1334 
   1335   if (ip_blacklist_store_)
   1336     UpdateIpBlacklistStore();
   1337 }
   1338 
   1339 void SafeBrowsingDatabaseNew::UpdateWhitelistStore(
   1340     const base::FilePath& store_filename,
   1341     SafeBrowsingStore* store,
   1342     SBWhitelist* whitelist) {
   1343   if (!store)
   1344     return;
   1345 
   1346   // For the whitelists, we don't cache and save full hashes since all
   1347   // hashes are already full.
   1348   std::vector<SBAddFullHash> empty_add_hashes;
   1349 
   1350   // Note: prefixes will not be empty.  The current data store implementation
   1351   // stores all full-length hashes as both full and prefix hashes.
   1352   SBAddPrefixes prefixes;
   1353   std::vector<SBAddFullHash> full_hashes;
   1354   if (!store->FinishUpdate(empty_add_hashes, &prefixes, &full_hashes)) {
   1355     RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_FINISH);
   1356     WhitelistEverything(whitelist);
   1357     return;
   1358   }
   1359 
   1360 #if defined(OS_MACOSX)
   1361   base::mac::SetFileBackupExclusion(store_filename);
   1362 #endif
   1363 
   1364   LoadWhitelist(full_hashes, whitelist);
   1365 }
   1366 
   1367 int64 SafeBrowsingDatabaseNew::UpdateHashPrefixStore(
   1368     const base::FilePath& store_filename,
   1369     SafeBrowsingStore* store,
   1370     FailureType failure_type) {
   1371   // We don't cache and save full hashes.
   1372   std::vector<SBAddFullHash> empty_add_hashes;
   1373 
   1374   // These results are not used after this call. Simply ignore the
   1375   // returned value after FinishUpdate(...).
   1376   SBAddPrefixes add_prefixes_result;
   1377   std::vector<SBAddFullHash> add_full_hashes_result;
   1378 
   1379   if (!store->FinishUpdate(empty_add_hashes,
   1380                            &add_prefixes_result,
   1381                            &add_full_hashes_result)) {
   1382     RecordFailure(failure_type);
   1383   }
   1384 
   1385 #if defined(OS_MACOSX)
   1386   base::mac::SetFileBackupExclusion(store_filename);
   1387 #endif
   1388 
   1389   return GetFileSizeOrZero(store_filename);
   1390 }
   1391 
   1392 void SafeBrowsingDatabaseNew::UpdateBrowseStore() {
   1393   // Copy out the pending add hashes.  Copy rather than swapping in
   1394   // case |ContainsBrowseURL()| is called before the new filter is complete.
   1395   std::vector<SBAddFullHash> pending_add_hashes;
   1396   {
   1397     base::AutoLock locked(lookup_lock_);
   1398     pending_add_hashes.insert(pending_add_hashes.end(),
   1399                               pending_browse_hashes_.begin(),
   1400                               pending_browse_hashes_.end());
   1401   }
   1402 
   1403   // Measure the amount of IO during the filter build.
   1404   base::IoCounters io_before, io_after;
   1405   base::ProcessHandle handle = base::Process::Current().handle();
   1406   scoped_ptr<base::ProcessMetrics> metric(
   1407 #if !defined(OS_MACOSX)
   1408       base::ProcessMetrics::CreateProcessMetrics(handle)
   1409 #else
   1410       // Getting stats only for the current process is enough, so NULL is fine.
   1411       base::ProcessMetrics::CreateProcessMetrics(handle, NULL)
   1412 #endif
   1413   );
   1414 
   1415   // IoCounters are currently not supported on Mac, and may not be
   1416   // available for Linux, so we check the result and only show IO
   1417   // stats if they are available.
   1418   const bool got_counters = metric->GetIOCounters(&io_before);
   1419 
   1420   const base::TimeTicks before = base::TimeTicks::Now();
   1421 
   1422   SBAddPrefixes add_prefixes;
   1423   std::vector<SBAddFullHash> add_full_hashes;
   1424   if (!browse_store_->FinishUpdate(pending_add_hashes,
   1425                                    &add_prefixes, &add_full_hashes)) {
   1426     RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_FINISH);
   1427     return;
   1428   }
   1429 
   1430   // TODO(shess): If |add_prefixes| were sorted by the prefix, it
   1431   // could be passed directly to |PrefixSet()|, removing the need for
   1432   // |prefixes|.  For now, |prefixes| is useful while debugging
   1433   // things.
   1434   std::vector<SBPrefix> prefixes;
   1435   prefixes.reserve(add_prefixes.size());
   1436   for (SBAddPrefixes::const_iterator iter = add_prefixes.begin();
   1437        iter != add_prefixes.end(); ++iter) {
   1438     prefixes.push_back(iter->prefix);
   1439   }
   1440 
   1441   std::sort(prefixes.begin(), prefixes.end());
   1442   scoped_ptr<safe_browsing::PrefixSet>
   1443       prefix_set(new safe_browsing::PrefixSet(prefixes));
   1444 
   1445   // This needs to be in sorted order by prefix for efficient access.
   1446   std::sort(add_full_hashes.begin(), add_full_hashes.end(),
   1447             SBAddFullHashPrefixLess);
   1448 
   1449   // Swap in the newly built filter and cache.
   1450   {
   1451     base::AutoLock locked(lookup_lock_);
   1452     full_browse_hashes_.swap(add_full_hashes);
   1453 
   1454     // TODO(shess): If |CacheHashResults()| is posted between the
   1455     // earlier lock and this clear, those pending hashes will be lost.
   1456     // It could be fixed by only removing hashes which were collected
   1457     // at the earlier point.  I believe that is fail-safe as-is (the
   1458     // hash will be fetched again).
   1459     pending_browse_hashes_.clear();
   1460     prefix_miss_cache_.clear();
   1461     browse_prefix_set_.swap(prefix_set);
   1462   }
   1463 
   1464   DVLOG(1) << "SafeBrowsingDatabaseImpl built prefix set in "
   1465            << (base::TimeTicks::Now() - before).InMilliseconds()
   1466            << " ms total.  prefix count: " << add_prefixes.size();
   1467   UMA_HISTOGRAM_LONG_TIMES("SB2.BuildFilter", base::TimeTicks::Now() - before);
   1468 
   1469   // Persist the prefix set to disk.  Since only this thread changes
   1470   // |browse_prefix_set_|, there is no need to lock.
   1471   WritePrefixSet();
   1472 
   1473   // Gather statistics.
   1474   if (got_counters && metric->GetIOCounters(&io_after)) {
   1475     UMA_HISTOGRAM_COUNTS("SB2.BuildReadKilobytes",
   1476                          static_cast<int>(io_after.ReadTransferCount -
   1477                                           io_before.ReadTransferCount) / 1024);
   1478     UMA_HISTOGRAM_COUNTS("SB2.BuildWriteKilobytes",
   1479                          static_cast<int>(io_after.WriteTransferCount -
   1480                                           io_before.WriteTransferCount) / 1024);
   1481     UMA_HISTOGRAM_COUNTS("SB2.BuildReadOperations",
   1482                          static_cast<int>(io_after.ReadOperationCount -
   1483                                           io_before.ReadOperationCount));
   1484     UMA_HISTOGRAM_COUNTS("SB2.BuildWriteOperations",
   1485                          static_cast<int>(io_after.WriteOperationCount -
   1486                                           io_before.WriteOperationCount));
   1487   }
   1488 
   1489   int64 file_size = GetFileSizeOrZero(browse_prefix_set_filename_);
   1490   UMA_HISTOGRAM_COUNTS("SB2.PrefixSetKilobytes",
   1491                        static_cast<int>(file_size / 1024));
   1492   file_size = GetFileSizeOrZero(browse_filename_);
   1493   UMA_HISTOGRAM_COUNTS("SB2.BrowseDatabaseKilobytes",
   1494                        static_cast<int>(file_size / 1024));
   1495 
   1496 #if defined(OS_MACOSX)
   1497   base::mac::SetFileBackupExclusion(browse_filename_);
   1498 #endif
   1499 }
   1500 
   1501 void SafeBrowsingDatabaseNew::UpdateSideEffectFreeWhitelistStore() {
   1502   std::vector<SBAddFullHash> empty_add_hashes;
   1503   SBAddPrefixes add_prefixes;
   1504   std::vector<SBAddFullHash> add_full_hashes_result;
   1505 
   1506   if (!side_effect_free_whitelist_store_->FinishUpdate(
   1507           empty_add_hashes,
   1508           &add_prefixes,
   1509           &add_full_hashes_result)) {
   1510     RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_FINISH);
   1511     return;
   1512   }
   1513 
   1514   // TODO(shess): If |add_prefixes| were sorted by the prefix, it
   1515   // could be passed directly to |PrefixSet()|, removing the need for
   1516   // |prefixes|.  For now, |prefixes| is useful while debugging
   1517   // things.
   1518   std::vector<SBPrefix> prefixes;
   1519   prefixes.reserve(add_prefixes.size());
   1520   for (SBAddPrefixes::const_iterator iter = add_prefixes.begin();
   1521        iter != add_prefixes.end(); ++iter) {
   1522     prefixes.push_back(iter->prefix);
   1523   }
   1524 
   1525   std::sort(prefixes.begin(), prefixes.end());
   1526   scoped_ptr<safe_browsing::PrefixSet>
   1527       prefix_set(new safe_browsing::PrefixSet(prefixes));
   1528 
   1529   // Swap in the newly built prefix set.
   1530   {
   1531     base::AutoLock locked(lookup_lock_);
   1532     side_effect_free_whitelist_prefix_set_.swap(prefix_set);
   1533   }
   1534 
   1535   const base::TimeTicks before = base::TimeTicks::Now();
   1536   const bool write_ok = side_effect_free_whitelist_prefix_set_->WriteFile(
   1537       side_effect_free_whitelist_prefix_set_filename_);
   1538   DVLOG(1) << "SafeBrowsingDatabaseNew wrote side-effect free whitelist prefix "
   1539            << "set in " << (base::TimeTicks::Now() - before).InMilliseconds()
   1540            << " ms";
   1541   UMA_HISTOGRAM_TIMES("SB2.SideEffectFreePrefixSetWrite",
   1542                       base::TimeTicks::Now() - before);
   1543 
   1544   if (!write_ok)
   1545     RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_WRITE);
   1546 
   1547   // Gather statistics.
   1548   int64 file_size = GetFileSizeOrZero(
   1549       side_effect_free_whitelist_prefix_set_filename_);
   1550   UMA_HISTOGRAM_COUNTS("SB2.SideEffectFreeWhitelistPrefixSetKilobytes",
   1551                        static_cast<int>(file_size / 1024));
   1552   file_size = GetFileSizeOrZero(side_effect_free_whitelist_filename_);
   1553   UMA_HISTOGRAM_COUNTS("SB2.SideEffectFreeWhitelistDatabaseKilobytes",
   1554                        static_cast<int>(file_size / 1024));
   1555 
   1556 #if defined(OS_MACOSX)
   1557   base::mac::SetFileBackupExclusion(side_effect_free_whitelist_filename_);
   1558   base::mac::SetFileBackupExclusion(
   1559       side_effect_free_whitelist_prefix_set_filename_);
   1560 #endif
   1561 }
   1562 
   1563 void SafeBrowsingDatabaseNew::UpdateIpBlacklistStore() {
   1564   // For the IP blacklist, we don't cache and save full hashes since all
   1565   // hashes are already full.
   1566   std::vector<SBAddFullHash> empty_add_hashes;
   1567 
   1568   // Note: prefixes will not be empty.  The current data store implementation
   1569   // stores all full-length hashes as both full and prefix hashes.
   1570   SBAddPrefixes prefixes;
   1571   std::vector<SBAddFullHash> full_hashes;
   1572   if (!ip_blacklist_store_->FinishUpdate(empty_add_hashes,
   1573                                          &prefixes, &full_hashes)) {
   1574     RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_FINISH);
   1575     LoadIpBlacklist(std::vector<SBAddFullHash>());  // Clear the list.
   1576     return;
   1577   }
   1578 
   1579 #if defined(OS_MACOSX)
   1580   base::mac::SetFileBackupExclusion(ip_blacklist_filename_);
   1581 #endif
   1582 
   1583   LoadIpBlacklist(full_hashes);
   1584 }
   1585 
   1586 void SafeBrowsingDatabaseNew::HandleCorruptDatabase() {
   1587   // Reset the database after the current task has unwound (but only
   1588   // reset once within the scope of a given task).
   1589   if (!reset_factory_.HasWeakPtrs()) {
   1590     RecordFailure(FAILURE_DATABASE_CORRUPT);
   1591     base::MessageLoop::current()->PostTask(FROM_HERE,
   1592         base::Bind(&SafeBrowsingDatabaseNew::OnHandleCorruptDatabase,
   1593                    reset_factory_.GetWeakPtr()));
   1594   }
   1595 }
   1596 
   1597 void SafeBrowsingDatabaseNew::OnHandleCorruptDatabase() {
   1598   RecordFailure(FAILURE_DATABASE_CORRUPT_HANDLER);
   1599   corruption_detected_ = true;  // Stop updating the database.
   1600   ResetDatabase();
   1601   DLOG(FATAL) << "SafeBrowsing database was corrupt and reset";
   1602 }
   1603 
   1604 // TODO(shess): I'm not clear why this code doesn't have any
   1605 // real error-handling.
   1606 void SafeBrowsingDatabaseNew::LoadPrefixSet() {
   1607   DCHECK_EQ(creation_loop_, base::MessageLoop::current());
   1608   DCHECK(!browse_prefix_set_filename_.empty());
   1609 
   1610   // If there is no database, the filter cannot be used.
   1611   base::PlatformFileInfo db_info;
   1612   if (!base::GetFileInfo(browse_filename_, &db_info) || db_info.size == 0)
   1613     return;
   1614 
   1615   // Cleanup any stale bloom filter (no longer used).
   1616   // TODO(shess): Track failure to delete?
   1617   base::FilePath bloom_filter_filename =
   1618       BloomFilterForFilename(browse_filename_);
   1619   base::DeleteFile(bloom_filter_filename, false);
   1620 
   1621   const base::TimeTicks before = base::TimeTicks::Now();
   1622   browse_prefix_set_.reset(safe_browsing::PrefixSet::LoadFile(
   1623       browse_prefix_set_filename_));
   1624   DVLOG(1) << "SafeBrowsingDatabaseNew read prefix set in "
   1625            << (base::TimeTicks::Now() - before).InMilliseconds() << " ms";
   1626   UMA_HISTOGRAM_TIMES("SB2.PrefixSetLoad", base::TimeTicks::Now() - before);
   1627 
   1628   if (!browse_prefix_set_.get())
   1629     RecordFailure(FAILURE_BROWSE_PREFIX_SET_READ);
   1630 }
   1631 
   1632 bool SafeBrowsingDatabaseNew::Delete() {
   1633   DCHECK_EQ(creation_loop_, base::MessageLoop::current());
   1634 
   1635   const bool r1 = browse_store_->Delete();
   1636   if (!r1)
   1637     RecordFailure(FAILURE_DATABASE_STORE_DELETE);
   1638 
   1639   const bool r2 = download_store_.get() ? download_store_->Delete() : true;
   1640   if (!r2)
   1641     RecordFailure(FAILURE_DATABASE_STORE_DELETE);
   1642 
   1643   const bool r3 = csd_whitelist_store_.get() ?
   1644       csd_whitelist_store_->Delete() : true;
   1645   if (!r3)
   1646     RecordFailure(FAILURE_DATABASE_STORE_DELETE);
   1647 
   1648   const bool r4 = download_whitelist_store_.get() ?
   1649       download_whitelist_store_->Delete() : true;
   1650   if (!r4)
   1651     RecordFailure(FAILURE_DATABASE_STORE_DELETE);
   1652 
   1653   base::FilePath bloom_filter_filename =
   1654       BloomFilterForFilename(browse_filename_);
   1655   const bool r5 = base::DeleteFile(bloom_filter_filename, false);
   1656   if (!r5)
   1657     RecordFailure(FAILURE_DATABASE_FILTER_DELETE);
   1658 
   1659   const bool r6 = base::DeleteFile(browse_prefix_set_filename_, false);
   1660   if (!r6)
   1661     RecordFailure(FAILURE_BROWSE_PREFIX_SET_DELETE);
   1662 
   1663   const bool r7 = base::DeleteFile(extension_blacklist_filename_, false);
   1664   if (!r7)
   1665     RecordFailure(FAILURE_EXTENSION_BLACKLIST_DELETE);
   1666 
   1667   const bool r8 = base::DeleteFile(side_effect_free_whitelist_filename_,
   1668                                     false);
   1669   if (!r8)
   1670     RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_DELETE);
   1671 
   1672   const bool r9 = base::DeleteFile(
   1673       side_effect_free_whitelist_prefix_set_filename_,
   1674       false);
   1675   if (!r9)
   1676     RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_PREFIX_SET_DELETE);
   1677 
   1678   const bool r10 = base::DeleteFile(ip_blacklist_filename_, false);
   1679   if (!r10)
   1680     RecordFailure(FAILURE_IP_BLACKLIST_DELETE);
   1681 
   1682   return r1 && r2 && r3 && r4 && r5 && r6 && r7 && r8 && r9 && r10;
   1683 }
   1684 
   1685 void SafeBrowsingDatabaseNew::WritePrefixSet() {
   1686   DCHECK_EQ(creation_loop_, base::MessageLoop::current());
   1687 
   1688   if (!browse_prefix_set_.get())
   1689     return;
   1690 
   1691   const base::TimeTicks before = base::TimeTicks::Now();
   1692   const bool write_ok = browse_prefix_set_->WriteFile(
   1693       browse_prefix_set_filename_);
   1694   DVLOG(1) << "SafeBrowsingDatabaseNew wrote prefix set in "
   1695            << (base::TimeTicks::Now() - before).InMilliseconds() << " ms";
   1696   UMA_HISTOGRAM_TIMES("SB2.PrefixSetWrite", base::TimeTicks::Now() - before);
   1697 
   1698   if (!write_ok)
   1699     RecordFailure(FAILURE_BROWSE_PREFIX_SET_WRITE);
   1700 
   1701 #if defined(OS_MACOSX)
   1702   base::mac::SetFileBackupExclusion(browse_prefix_set_filename_);
   1703 #endif
   1704 }
   1705 
   1706 void SafeBrowsingDatabaseNew::WhitelistEverything(SBWhitelist* whitelist) {
   1707   base::AutoLock locked(lookup_lock_);
   1708   whitelist->second = true;
   1709   whitelist->first.clear();
   1710 }
   1711 
   1712 void SafeBrowsingDatabaseNew::LoadWhitelist(
   1713     const std::vector<SBAddFullHash>& full_hashes,
   1714     SBWhitelist* whitelist) {
   1715   DCHECK_EQ(creation_loop_, base::MessageLoop::current());
   1716   if (full_hashes.size() > kMaxWhitelistSize) {
   1717     WhitelistEverything(whitelist);
   1718     return;
   1719   }
   1720 
   1721   std::vector<SBFullHash> new_whitelist;
   1722   new_whitelist.reserve(full_hashes.size());
   1723   for (std::vector<SBAddFullHash>::const_iterator it = full_hashes.begin();
   1724        it != full_hashes.end(); ++it) {
   1725     new_whitelist.push_back(it->full_hash);
   1726   }
   1727   std::sort(new_whitelist.begin(), new_whitelist.end());
   1728 
   1729   SBFullHash kill_switch;
   1730   crypto::SHA256HashString(kWhitelistKillSwitchUrl, &kill_switch,
   1731                            sizeof(kill_switch));
   1732   if (std::binary_search(new_whitelist.begin(), new_whitelist.end(),
   1733                          kill_switch)) {
   1734     // The kill switch is whitelisted hence we whitelist all URLs.
   1735     WhitelistEverything(whitelist);
   1736   } else {
   1737     base::AutoLock locked(lookup_lock_);
   1738     whitelist->second = false;
   1739     whitelist->first.swap(new_whitelist);
   1740   }
   1741 }
   1742 
   1743 void SafeBrowsingDatabaseNew::LoadIpBlacklist(
   1744     const std::vector<SBAddFullHash>& full_hashes) {
   1745   DCHECK_EQ(creation_loop_, base::MessageLoop::current());
   1746   IPBlacklist new_blacklist;
   1747   DVLOG(2) << "Writing IP blacklist of size: " << full_hashes.size();
   1748   for (std::vector<SBAddFullHash>::const_iterator it = full_hashes.begin();
   1749        it != full_hashes.end();
   1750        ++it) {
   1751     const char* full_hash = it->full_hash.full_hash;
   1752     DCHECK_EQ(crypto::kSHA256Length, arraysize(it->full_hash.full_hash));
   1753     // The format of the IP blacklist is:
   1754     // SHA-1(IPv6 prefix) + uint8(prefix size) + 11 unused bytes.
   1755     std::string hashed_ip_prefix(full_hash, base::kSHA1Length);
   1756     size_t prefix_size = static_cast<uint8>(full_hash[base::kSHA1Length]);
   1757     if (prefix_size > kMaxIpPrefixSize || prefix_size < kMinIpPrefixSize) {
   1758       DVLOG(2) << "Invalid IP prefix size in IP blacklist: " << prefix_size;
   1759       RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_INVALID);
   1760       new_blacklist.clear();  // Load empty blacklist.
   1761       break;
   1762     }
   1763 
   1764     // We precompute the mask for the given subnet size to speed up lookups.
   1765     // Basically we need to create a 16B long string which has the highest
   1766     // |size| bits sets to one.
   1767     std::string mask(net::kIPv6AddressSize, '\0');
   1768     mask.replace(0, prefix_size / 8, prefix_size / 8, '\xFF');
   1769     if ((prefix_size % 8) != 0) {
   1770       mask[prefix_size / 8] = 0xFF << (8 - (prefix_size % 8));
   1771     }
   1772     DVLOG(2) << "Inserting malicious IP: "
   1773              << " raw:" << base::HexEncode(full_hash, crypto::kSHA256Length)
   1774              << " mask:" << base::HexEncode(mask.data(), mask.size())
   1775              << " prefix_size:" << prefix_size
   1776              << " hashed_ip:" << base::HexEncode(hashed_ip_prefix.data(),
   1777                                                  hashed_ip_prefix.size());
   1778     new_blacklist[mask].insert(hashed_ip_prefix);
   1779   }
   1780 
   1781   base::AutoLock locked(lookup_lock_);
   1782   ip_blacklist_.swap(new_blacklist);
   1783 }
   1784 
   1785 bool SafeBrowsingDatabaseNew::IsMalwareIPMatchKillSwitchOn() {
   1786   SBFullHash malware_kill_switch;
   1787   crypto::SHA256HashString(kMalwareIPKillSwitchUrl, &malware_kill_switch,
   1788                            sizeof(malware_kill_switch));
   1789   std::vector<SBFullHash> full_hashes;
   1790   full_hashes.push_back(malware_kill_switch);
   1791   return ContainsWhitelistedHashes(csd_whitelist_, full_hashes);
   1792 }
   1793