Home | History | Annotate | Download | only in safe_browsing
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "chrome/browser/safe_browsing/safe_browsing_store_file.h"
      6 
      7 #include "base/file_util.h"
      8 #include "base/files/scoped_file.h"
      9 #include "base/md5.h"
     10 #include "base/metrics/histogram.h"
     11 #include "base/metrics/sparse_histogram.h"
     12 
     13 namespace {
     14 
     15 // NOTE(shess): kFileMagic should not be a byte-wise palindrome, so
     16 // that byte-order changes force corruption.
     17 const int32 kFileMagic = 0x600D71FE;
     18 
     19 // Version history:
     20 // Version 6: aad08754/r2814 by erikkay (at) google.com on 2008-10-02 (sqlite)
     21 // Version 7: 6afe28a5/r37435 by shess (at) chromium.org on 2010-01-28
     22 // Version 8: d3dd0715/r259791 by shess (at) chromium.org on 2014-03-27
     23 const int32 kFileVersion = 8;
     24 
     25 // ReadAndVerifyHeader() returns this in case of error.
     26 const int32 kInvalidVersion = -1;
     27 
     28 // Header at the front of the main database file.
     29 struct FileHeaderV7 {
     30   int32 magic, version;
     31   uint32 add_chunk_count, sub_chunk_count;
     32   uint32 add_prefix_count, sub_prefix_count;
     33   uint32 add_hash_count, sub_hash_count;
     34 };
     35 
     36 // Starting with version 8, the storage is sorted and can be sharded to allow
     37 // updates to be done with lower memory requirements.  Newly written files will
     38 // be sharded to need less than this amount of memory during update.  Larger
     39 // values are preferred to minimize looping overhead during processing.
     40 const int64 kUpdateStorageBytes = 100 * 1024;
     41 
     42 // Prevent excessive sharding by setting a lower limit on the shard stride.
     43 // Smaller values should work fine, but very small values will probably lead to
     44 // poor performance.  Shard stride is indirectly related to
     45 // |kUpdateStorageBytes|, setting that very small will bump against this.
     46 const uint32 kMinShardStride = 1 << 24;
     47 
     48 // Strides over the entire SBPrefix space.
     49 const uint64 kMaxShardStride = 1ULL << 32;
     50 
     51 // Maximum SBPrefix value.
     52 const SBPrefix kMaxSBPrefix = ~0;
     53 
     54 // Header at the front of the main database file.
     55 struct FileHeaderV8 {
     56   int32 magic, version;
     57   uint32 add_chunk_count, sub_chunk_count;
     58   uint32 shard_stride;
     59   // TODO(shess): Is this where 64-bit will bite me?  Perhaps write a
     60   // specialized read/write?
     61 };
     62 
     63 union FileHeader {
     64   struct FileHeaderV7 v7;
     65   struct FileHeaderV8 v8;
     66 };
     67 
     68 // Header for each chunk in the chunk-accumulation file.
     69 struct ChunkHeader {
     70   uint32 add_prefix_count, sub_prefix_count;
     71   uint32 add_hash_count, sub_hash_count;
     72 };
     73 
     74 // Header for each shard of data in the main database file.
     75 struct ShardHeader {
     76   uint32 add_prefix_count, sub_prefix_count;
     77   uint32 add_hash_count, sub_hash_count;
     78 };
     79 
     80 // Enumerate different format-change events for histogramming
     81 // purposes.  DO NOT CHANGE THE ORDERING OF THESE VALUES.
     82 enum FormatEventType {
     83   // Corruption detected, broken down by file format.
     84   FORMAT_EVENT_FILE_CORRUPT,
     85   FORMAT_EVENT_SQLITE_CORRUPT,  // Obsolete
     86 
     87   // The type of format found in the file.  The expected case (new
     88   // file format) is intentionally not covered.
     89   FORMAT_EVENT_FOUND_SQLITE,
     90   FORMAT_EVENT_FOUND_UNKNOWN,
     91 
     92   // The number of SQLite-format files deleted should be the same as
     93   // FORMAT_EVENT_FOUND_SQLITE.  It can differ if the delete fails,
     94   // or if a failure prevents the update from succeeding.
     95   FORMAT_EVENT_SQLITE_DELETED,  // Obsolete
     96   FORMAT_EVENT_SQLITE_DELETE_FAILED,  // Obsolete
     97 
     98   // Found and deleted (or failed to delete) the ancient "Safe
     99   // Browsing" file.
    100   FORMAT_EVENT_DELETED_ORIGINAL,
    101   FORMAT_EVENT_DELETED_ORIGINAL_FAILED,
    102 
    103   // The checksum did not check out in CheckValidity() or in
    104   // FinishUpdate().  This most likely indicates that the machine
    105   // crashed before the file was fully sync'ed to disk.
    106   FORMAT_EVENT_VALIDITY_CHECKSUM_FAILURE,
    107   FORMAT_EVENT_UPDATE_CHECKSUM_FAILURE,
    108 
    109   // The header checksum was incorrect in ReadAndVerifyHeader().  Likely
    110   // indicates that the system crashed while writing an update.
    111   FORMAT_EVENT_HEADER_CHECKSUM_FAILURE,
    112 
    113   // Memory space for histograms is determined by the max.  ALWAYS
    114   // ADD NEW VALUES BEFORE THIS ONE.
    115   FORMAT_EVENT_MAX
    116 };
    117 
    118 void RecordFormatEvent(FormatEventType event_type) {
    119   UMA_HISTOGRAM_ENUMERATION("SB2.FormatEvent", event_type, FORMAT_EVENT_MAX);
    120 }
    121 
    122 // Rewind the file.  Using fseek(2) because rewind(3) errors are
    123 // weird.
    124 bool FileRewind(FILE* fp) {
    125   int rv = fseek(fp, 0, SEEK_SET);
    126   DCHECK_EQ(rv, 0);
    127   return rv == 0;
    128 }
    129 
    130 // Read from |fp| into |item|, and fold the input data into the
    131 // checksum in |context|, if non-NULL.  Return true on success.
    132 template <class T>
    133 bool ReadItem(T* item, FILE* fp, base::MD5Context* context) {
    134   const size_t ret = fread(item, sizeof(T), 1, fp);
    135   if (ret != 1)
    136     return false;
    137 
    138   if (context) {
    139     base::MD5Update(context,
    140                     base::StringPiece(reinterpret_cast<char*>(item),
    141                                       sizeof(T)));
    142   }
    143   return true;
    144 }
    145 
    146 // Write |item| to |fp|, and fold the output data into the checksum in
    147 // |context|, if non-NULL.  Return true on success.
    148 template <class T>
    149 bool WriteItem(const T& item, FILE* fp, base::MD5Context* context) {
    150   const size_t ret = fwrite(&item, sizeof(T), 1, fp);
    151   if (ret != 1)
    152     return false;
    153 
    154   if (context) {
    155     base::MD5Update(context,
    156                     base::StringPiece(reinterpret_cast<const char*>(&item),
    157                                       sizeof(T)));
    158   }
    159 
    160   return true;
    161 }
    162 
    163 // Read |count| items into |values| from |fp|, and fold them into the
    164 // checksum in |context|.  Returns true on success.
    165 template <typename CT>
    166 bool ReadToContainer(CT* values, size_t count, FILE* fp,
    167                      base::MD5Context* context) {
    168   if (!count)
    169     return true;
    170 
    171   for (size_t i = 0; i < count; ++i) {
    172     typename CT::value_type value;
    173     if (!ReadItem(&value, fp, context))
    174       return false;
    175 
    176     // push_back() is more obvious, but coded this way std::set can
    177     // also be read.
    178     values->insert(values->end(), value);
    179   }
    180 
    181   return true;
    182 }
    183 
    184 // Write values between |beg| and |end| to |fp|, and fold the data into the
    185 // checksum in |context|, if non-NULL.  Returns true if all items successful.
    186 template <typename CTI>
    187 bool WriteRange(const CTI& beg, const CTI& end,
    188                 FILE* fp, base::MD5Context* context) {
    189   for (CTI iter = beg; iter != end; ++iter) {
    190     if (!WriteItem(*iter, fp, context))
    191       return false;
    192   }
    193   return true;
    194 }
    195 
    196 // Write all of |values| to |fp|, and fold the data into the checksum
    197 // in |context|, if non-NULL.  Returns true if all items successful.
    198 template <typename CT>
    199 bool WriteContainer(const CT& values, FILE* fp,
    200                     base::MD5Context* context) {
    201   return WriteRange(values.begin(), values.end(), fp, context);
    202 }
    203 
    204 // Delete the chunks in |deleted| from |chunks|.
    205 void DeleteChunksFromSet(const base::hash_set<int32>& deleted,
    206                          std::set<int32>* chunks) {
    207   for (std::set<int32>::iterator iter = chunks->begin();
    208        iter != chunks->end();) {
    209     std::set<int32>::iterator prev = iter++;
    210     if (deleted.count(*prev) > 0)
    211       chunks->erase(prev);
    212   }
    213 }
    214 
    215 bool ReadAndVerifyChecksum(FILE* fp, base::MD5Context* context) {
    216   base::MD5Digest calculated_digest;
    217   base::MD5IntermediateFinal(&calculated_digest, context);
    218 
    219   base::MD5Digest file_digest;
    220   if (!ReadItem(&file_digest, fp, context))
    221     return false;
    222 
    223   return memcmp(&file_digest, &calculated_digest, sizeof(file_digest)) == 0;
    224 }
    225 
    226 // Sanity-check the header against the file's size to make sure our
    227 // vectors aren't gigantic.  This doubles as a cheap way to detect
    228 // corruption without having to checksum the entire file.
    229 bool FileHeaderV7SanityCheck(const base::FilePath& filename,
    230                              const FileHeaderV7& header) {
    231   DCHECK_EQ(header.version, 7);
    232 
    233   int64 size = 0;
    234   if (!base::GetFileSize(filename, &size))
    235     return false;
    236 
    237   int64 expected_size = sizeof(FileHeaderV7);
    238   expected_size += header.add_chunk_count * sizeof(int32);
    239   expected_size += header.sub_chunk_count * sizeof(int32);
    240   expected_size += header.add_prefix_count * sizeof(SBAddPrefix);
    241   expected_size += header.sub_prefix_count * sizeof(SBSubPrefix);
    242   expected_size += header.add_hash_count * sizeof(SBAddFullHash);
    243   expected_size += header.sub_hash_count * sizeof(SBSubFullHash);
    244   expected_size += sizeof(base::MD5Digest);
    245   if (size != expected_size)
    246     return false;
    247 
    248   return true;
    249 }
    250 
    251 // Helper function to read the file header and chunk TOC.  Rewinds |fp| and
    252 // initializes |context|.  The header is left in |header|, with the version
    253 // returned.  kInvalidVersion is returned for sanity check or checksum failure.
    254 int ReadAndVerifyHeader(const base::FilePath& filename,
    255                         FileHeader* header,
    256                         std::set<int32>* add_chunks,
    257                         std::set<int32>* sub_chunks,
    258                         FILE* fp,
    259                         base::MD5Context* context) {
    260   DCHECK(header);
    261   DCHECK(add_chunks);
    262   DCHECK(sub_chunks);
    263   DCHECK(fp);
    264   DCHECK(context);
    265 
    266   int version = kInvalidVersion;
    267 
    268   base::MD5Init(context);
    269   if (!FileRewind(fp))
    270     return kInvalidVersion;
    271   if (!ReadItem(&header->v8, fp, context))
    272     return kInvalidVersion;
    273   if (header->v8.magic != kFileMagic)
    274     return kInvalidVersion;
    275 
    276   size_t add_chunks_count = 0;
    277   size_t sub_chunks_count = 0;
    278 
    279   // Track version read to inform removal of support for older versions.
    280   UMA_HISTOGRAM_SPARSE_SLOWLY("SB2.StoreVersionRead", header->v8.version);
    281 
    282   if (header->v8.version == 7) {
    283     version = 7;
    284 
    285     // Reset the context and re-read the v7 header.
    286     base::MD5Init(context);
    287     if (!FileRewind(fp))
    288       return kInvalidVersion;
    289     if (!ReadItem(&header->v7, fp, context))
    290       return kInvalidVersion;
    291     if (header->v7.magic != kFileMagic || header->v7.version != 7)
    292       return kInvalidVersion;
    293     if (!FileHeaderV7SanityCheck(filename, header->v7))
    294       return kInvalidVersion;
    295 
    296     add_chunks_count = header->v7.add_chunk_count;
    297     sub_chunks_count = header->v7.sub_chunk_count;
    298   } else if (header->v8.version == kFileVersion) {
    299     version = 8;
    300     add_chunks_count = header->v8.add_chunk_count;
    301     sub_chunks_count = header->v8.sub_chunk_count;
    302   } else {
    303     return kInvalidVersion;
    304   }
    305 
    306   if (!ReadToContainer(add_chunks, add_chunks_count, fp, context) ||
    307       !ReadToContainer(sub_chunks, sub_chunks_count, fp, context)) {
    308     return kInvalidVersion;
    309   }
    310 
    311   // v8 includes a checksum to validate the header.
    312   if (version > 7 && !ReadAndVerifyChecksum(fp, context)) {
    313     RecordFormatEvent(FORMAT_EVENT_HEADER_CHECKSUM_FAILURE);
    314     return kInvalidVersion;
    315   }
    316 
    317   return version;
    318 }
    319 
    320 // Helper function to write out the initial header and chunks-contained data.
    321 // Rewinds |fp|, initializes |context|, then writes a file header and
    322 // |add_chunks| and |sub_chunks|.
    323 bool WriteHeader(uint32 out_stride,
    324                  const std::set<int32>& add_chunks,
    325                  const std::set<int32>& sub_chunks,
    326                  FILE* fp,
    327                  base::MD5Context* context) {
    328   if (!FileRewind(fp))
    329     return false;
    330 
    331   base::MD5Init(context);
    332   FileHeaderV8 header;
    333   header.magic = kFileMagic;
    334   header.version = kFileVersion;
    335   header.add_chunk_count = add_chunks.size();
    336   header.sub_chunk_count = sub_chunks.size();
    337   header.shard_stride = out_stride;
    338   if (!WriteItem(header, fp, context))
    339     return false;
    340 
    341   if (!WriteContainer(add_chunks, fp, context) ||
    342       !WriteContainer(sub_chunks, fp, context))
    343     return false;
    344 
    345   // Write out the header digest.
    346   base::MD5Digest header_digest;
    347   base::MD5IntermediateFinal(&header_digest, context);
    348   if (!WriteItem(header_digest, fp, context))
    349     return false;
    350 
    351   return true;
    352 }
    353 
    354 // Return |true| if the range is sorted by the given comparator.
    355 template <typename CTI, typename LESS>
    356 bool sorted(CTI beg, CTI end, LESS less) {
    357   while ((end - beg) > 2) {
    358     CTI n = beg++;
    359     DCHECK(!less(*beg, *n));
    360     if (less(*beg, *n))
    361       return false;
    362   }
    363   return true;
    364 }
    365 
    366 // Merge |beg|..|end| into |container|.  Both should be sorted by the given
    367 // comparator, and the range iterators should not be derived from |container|.
    368 // Differs from std::inplace_merge() in that additional memory is not required
    369 // for linear performance.
    370 template <typename CT, typename CTI, typename COMP>
    371 void container_merge(CT* container, CTI beg, CTI end, const COMP& less) {
    372   DCHECK(sorted(container->begin(), container->end(), less));
    373   DCHECK(sorted(beg, end, less));
    374 
    375   // Size the container to fit the results.
    376   const size_t c_size = container->size();
    377   container->resize(c_size + (end - beg));
    378 
    379   // |c_end| points to the original endpoint, while |c_out| points to the
    380   // endpoint that will scan from end to beginning while merging.
    381   typename CT::iterator c_end = container->begin() + c_size;
    382   typename CT::iterator c_out = container->end();
    383 
    384   // While both inputs have data, move the greater to |c_out|.
    385   while (c_end != container->begin() && end != beg) {
    386     if (less(*(c_end - 1), *(end - 1))) {
    387       *(--c_out) = *(--end);
    388     } else {
    389       *(--c_out) = *(--c_end);
    390     }
    391   }
    392 
    393   // Copy any data remaining in the new range.
    394   if (end != beg) {
    395     // The original container data has been fully shifted.
    396     DCHECK(c_end == container->begin());
    397 
    398     // There is exactly the correct amount of space left.
    399     DCHECK_EQ(c_out - c_end, end - beg);
    400 
    401     std::copy(beg, end, container->begin());
    402   }
    403 
    404   DCHECK(sorted(container->begin(), container->end(), less));
    405 }
    406 
    407 // Collection of iterators used while stepping through StateInternal (see
    408 // below).
    409 class StateInternalPos {
    410  public:
    411   StateInternalPos(SBAddPrefixes::iterator add_prefixes_iter,
    412                    SBSubPrefixes::iterator sub_prefixes_iter,
    413                    std::vector<SBAddFullHash>::iterator add_hashes_iter,
    414                    std::vector<SBSubFullHash>::iterator sub_hashes_iter)
    415       : add_prefixes_iter_(add_prefixes_iter),
    416         sub_prefixes_iter_(sub_prefixes_iter),
    417         add_hashes_iter_(add_hashes_iter),
    418         sub_hashes_iter_(sub_hashes_iter) {
    419   }
    420 
    421   SBAddPrefixes::iterator add_prefixes_iter_;
    422   SBSubPrefixes::iterator sub_prefixes_iter_;
    423   std::vector<SBAddFullHash>::iterator add_hashes_iter_;
    424   std::vector<SBSubFullHash>::iterator sub_hashes_iter_;
    425 };
    426 
    427 // Helper to find the next shard boundary.
    428 template <class T>
    429 bool prefix_bounder(SBPrefix val, const T& elt) {
    430   return val < elt.GetAddPrefix();
    431 }
    432 
    433 // Container for partial database state.  Includes add/sub prefixes/hashes, plus
    434 // aggregate operations on same.
    435 class StateInternal {
    436  public:
    437   // Append indicated amount of data from |fp|.
    438   bool AppendData(size_t add_prefix_count, size_t sub_prefix_count,
    439                   size_t add_hash_count, size_t sub_hash_count,
    440                   FILE* fp, base::MD5Context* context) {
    441     return
    442         ReadToContainer(&add_prefixes_, add_prefix_count, fp, context) &&
    443         ReadToContainer(&sub_prefixes_, sub_prefix_count, fp, context) &&
    444         ReadToContainer(&add_full_hashes_, add_hash_count, fp, context) &&
    445         ReadToContainer(&sub_full_hashes_, sub_hash_count, fp, context);
    446   }
    447 
    448   void ClearData() {
    449     add_prefixes_.clear();
    450     sub_prefixes_.clear();
    451     add_full_hashes_.clear();
    452     sub_full_hashes_.clear();
    453   }
    454 
    455   // Merge data from |beg|..|end| into receiver's state, then process the state.
    456   // The current state and the range given should corrospond to the same sorted
    457   // shard of data from different sources.  |add_del_cache| and |sub_del_cache|
    458   // indicate the chunk ids which should be deleted during processing (see
    459   // SBProcessSubs).
    460   void MergeDataAndProcess(const StateInternalPos& beg,
    461                            const StateInternalPos& end,
    462                            const base::hash_set<int32>& add_del_cache,
    463                            const base::hash_set<int32>& sub_del_cache) {
    464     container_merge(&add_prefixes_,
    465                     beg.add_prefixes_iter_,
    466                     end.add_prefixes_iter_,
    467                     SBAddPrefixLess<SBAddPrefix,SBAddPrefix>);
    468 
    469     container_merge(&sub_prefixes_,
    470                     beg.sub_prefixes_iter_,
    471                     end.sub_prefixes_iter_,
    472                     SBAddPrefixLess<SBSubPrefix,SBSubPrefix>);
    473 
    474     container_merge(&add_full_hashes_,
    475                     beg.add_hashes_iter_,
    476                     end.add_hashes_iter_,
    477                     SBAddPrefixHashLess<SBAddFullHash,SBAddFullHash>);
    478 
    479     container_merge(&sub_full_hashes_,
    480                     beg.sub_hashes_iter_,
    481                     end.sub_hashes_iter_,
    482                     SBAddPrefixHashLess<SBSubFullHash, SBSubFullHash>);
    483 
    484     SBProcessSubs(&add_prefixes_, &sub_prefixes_,
    485                   &add_full_hashes_, &sub_full_hashes_,
    486                   add_del_cache, sub_del_cache);
    487   }
    488 
    489   // Sort the data appropriately for the sharding, merging, and processing
    490   // operations.
    491   void SortData() {
    492     std::sort(add_prefixes_.begin(), add_prefixes_.end(),
    493               SBAddPrefixLess<SBAddPrefix,SBAddPrefix>);
    494     std::sort(sub_prefixes_.begin(), sub_prefixes_.end(),
    495               SBAddPrefixLess<SBSubPrefix,SBSubPrefix>);
    496     std::sort(add_full_hashes_.begin(), add_full_hashes_.end(),
    497               SBAddPrefixHashLess<SBAddFullHash,SBAddFullHash>);
    498     std::sort(sub_full_hashes_.begin(), sub_full_hashes_.end(),
    499               SBAddPrefixHashLess<SBSubFullHash,SBSubFullHash>);
    500   }
    501 
    502   // Iterator from the beginning of the state's data.
    503   StateInternalPos StateBegin() {
    504     return StateInternalPos(add_prefixes_.begin(),
    505                             sub_prefixes_.begin(),
    506                             add_full_hashes_.begin(),
    507                             sub_full_hashes_.begin());
    508   }
    509 
    510   // An iterator pointing just after the last possible element of the shard
    511   // indicated by |shard_max|.  Used to step through the state by shard.
    512   // TODO(shess): Verify whether binary search really improves over linear.
    513   // Merging or writing will immediately touch all of these elements.
    514   StateInternalPos ShardEnd(const StateInternalPos& beg, SBPrefix shard_max) {
    515     return StateInternalPos(
    516         std::upper_bound(beg.add_prefixes_iter_, add_prefixes_.end(),
    517                          shard_max, prefix_bounder<SBAddPrefix>),
    518         std::upper_bound(beg.sub_prefixes_iter_, sub_prefixes_.end(),
    519                          shard_max, prefix_bounder<SBSubPrefix>),
    520         std::upper_bound(beg.add_hashes_iter_, add_full_hashes_.end(),
    521                          shard_max, prefix_bounder<SBAddFullHash>),
    522         std::upper_bound(beg.sub_hashes_iter_, sub_full_hashes_.end(),
    523                          shard_max, prefix_bounder<SBSubFullHash>));
    524   }
    525 
    526   // Write a shard header and data for the shard starting at |beg| and ending at
    527   // the element before |end|.
    528   bool WriteShard(const StateInternalPos& beg, const StateInternalPos& end,
    529                   FILE* fp, base::MD5Context* context) {
    530     ShardHeader shard_header;
    531     shard_header.add_prefix_count =
    532         end.add_prefixes_iter_ - beg.add_prefixes_iter_;
    533     shard_header.sub_prefix_count =
    534         end.sub_prefixes_iter_ - beg.sub_prefixes_iter_;
    535     shard_header.add_hash_count =
    536         end.add_hashes_iter_ - beg.add_hashes_iter_;
    537     shard_header.sub_hash_count =
    538         end.sub_hashes_iter_ - beg.sub_hashes_iter_;
    539 
    540     return
    541         WriteItem(shard_header, fp, context) &&
    542         WriteRange(beg.add_prefixes_iter_, end.add_prefixes_iter_,
    543                    fp, context) &&
    544         WriteRange(beg.sub_prefixes_iter_, end.sub_prefixes_iter_,
    545                    fp, context) &&
    546         WriteRange(beg.add_hashes_iter_, end.add_hashes_iter_,
    547                    fp, context) &&
    548         WriteRange(beg.sub_hashes_iter_, end.sub_hashes_iter_,
    549                    fp, context);
    550   }
    551 
    552   SBAddPrefixes add_prefixes_;
    553   SBSubPrefixes sub_prefixes_;
    554   std::vector<SBAddFullHash> add_full_hashes_;
    555   std::vector<SBSubFullHash> sub_full_hashes_;
    556 };
    557 
    558 // True if |val| is an even power of two.
    559 template <typename T>
    560 bool IsPowerOfTwo(const T& val) {
    561   return val && (val & (val - 1)) == 0;
    562 }
    563 
    564 // Helper to read the entire database state, used by GetAddPrefixes() and
    565 // GetAddFullHashes().  Those functions are generally used only for smaller
    566 // files.  Returns false in case of errors reading the data.
    567 bool ReadDbStateHelper(const base::FilePath& filename,
    568                        StateInternal* db_state) {
    569   file_util::ScopedFILE file(base::OpenFile(filename, "rb"));
    570   if (file.get() == NULL)
    571     return false;
    572 
    573   std::set<int32> add_chunks;
    574   std::set<int32> sub_chunks;
    575 
    576   base::MD5Context context;
    577   FileHeader header;
    578   const int version =
    579       ReadAndVerifyHeader(filename, &header, &add_chunks, &sub_chunks,
    580                           file.get(), &context);
    581   if (version == kInvalidVersion)
    582     return false;
    583 
    584   if (version == 7) {
    585     if (!db_state->AppendData(header.v7.add_prefix_count,
    586                               header.v7.sub_prefix_count,
    587                               header.v7.add_hash_count,
    588                               header.v7.sub_hash_count,
    589                               file.get(), &context)) {
    590       return false;
    591     }
    592 
    593     // v7 data was not stored sorted.
    594     db_state->SortData();
    595   } else {
    596     // Read until the shard start overflows, always at least one pass.
    597     uint64 in_min = 0;
    598     uint64 in_stride = header.v8.shard_stride;
    599     if (!in_stride)
    600       in_stride = kMaxShardStride;
    601     if (!IsPowerOfTwo(in_stride))
    602       return false;
    603 
    604     do {
    605       ShardHeader shard_header;
    606       if (!ReadItem(&shard_header, file.get(), &context))
    607         return false;
    608 
    609       if (!db_state->AppendData(shard_header.add_prefix_count,
    610                                 shard_header.sub_prefix_count,
    611                                 shard_header.add_hash_count,
    612                                 shard_header.sub_hash_count,
    613                                 file.get(), &context)) {
    614         return false;
    615       }
    616 
    617       in_min += in_stride;
    618     } while (in_min <= kMaxSBPrefix);
    619   }
    620 
    621   if (!ReadAndVerifyChecksum(file.get(), &context))
    622     return false;
    623 
    624   int64 size = 0;
    625   if (!base::GetFileSize(filename, &size))
    626     return false;
    627 
    628   return static_cast<int64>(ftell(file.get())) == size;
    629 }
    630 
    631 }  // namespace
    632 
    633 // static
    634 void SafeBrowsingStoreFile::CheckForOriginalAndDelete(
    635     const base::FilePath& current_filename) {
    636   const base::FilePath original_filename(
    637       current_filename.DirName().AppendASCII("Safe Browsing"));
    638   if (base::PathExists(original_filename)) {
    639     int64 size = 0;
    640     if (base::GetFileSize(original_filename, &size)) {
    641       UMA_HISTOGRAM_COUNTS("SB2.OldDatabaseKilobytes",
    642                            static_cast<int>(size / 1024));
    643     }
    644 
    645     if (base::DeleteFile(original_filename, false)) {
    646       RecordFormatEvent(FORMAT_EVENT_DELETED_ORIGINAL);
    647     } else {
    648       RecordFormatEvent(FORMAT_EVENT_DELETED_ORIGINAL_FAILED);
    649     }
    650 
    651     // Just best-effort on the journal file, don't want to get lost in
    652     // the weeds.
    653     const base::FilePath journal_filename(
    654         current_filename.DirName().AppendASCII("Safe Browsing-journal"));
    655     base::DeleteFile(journal_filename, false);
    656   }
    657 }
    658 
    659 SafeBrowsingStoreFile::SafeBrowsingStoreFile()
    660     : chunks_written_(0), empty_(false), corruption_seen_(false) {}
    661 
    662 SafeBrowsingStoreFile::~SafeBrowsingStoreFile() {
    663   Close();
    664 }
    665 
    666 bool SafeBrowsingStoreFile::Delete() {
    667   // The database should not be open at this point.  But, just in
    668   // case, close everything before deleting.
    669   if (!Close()) {
    670     NOTREACHED();
    671     return false;
    672   }
    673 
    674   return DeleteStore(filename_);
    675 }
    676 
    677 bool SafeBrowsingStoreFile::CheckValidity() {
    678   // The file was either empty or never opened.  The empty case is
    679   // presumed not to be invalid.  The never-opened case can happen if
    680   // BeginUpdate() fails for any databases, and should already have
    681   // caused the corruption callback to fire.
    682   if (!file_.get())
    683     return true;
    684 
    685   if (!FileRewind(file_.get()))
    686     return OnCorruptDatabase();
    687 
    688   int64 size = 0;
    689   if (!base::GetFileSize(filename_, &size))
    690     return OnCorruptDatabase();
    691 
    692   base::MD5Context context;
    693   base::MD5Init(&context);
    694 
    695   // Read everything except the final digest.
    696   size_t bytes_left = static_cast<size_t>(size);
    697   CHECK(size == static_cast<int64>(bytes_left));
    698   if (bytes_left < sizeof(base::MD5Digest))
    699     return OnCorruptDatabase();
    700   bytes_left -= sizeof(base::MD5Digest);
    701 
    702   // Fold the contents of the file into the checksum.
    703   while (bytes_left > 0) {
    704     char buf[4096];
    705     const size_t c = std::min(sizeof(buf), bytes_left);
    706     const size_t ret = fread(buf, 1, c, file_.get());
    707 
    708     // The file's size changed while reading, give up.
    709     if (ret != c)
    710       return OnCorruptDatabase();
    711     base::MD5Update(&context, base::StringPiece(buf, c));
    712     bytes_left -= c;
    713   }
    714 
    715   if (!ReadAndVerifyChecksum(file_.get(), &context)) {
    716     RecordFormatEvent(FORMAT_EVENT_VALIDITY_CHECKSUM_FAILURE);
    717     return OnCorruptDatabase();
    718   }
    719 
    720   return true;
    721 }
    722 
    723 void SafeBrowsingStoreFile::Init(
    724     const base::FilePath& filename,
    725     const base::Closure& corruption_callback
    726 ) {
    727   filename_ = filename;
    728   corruption_callback_ = corruption_callback;
    729 }
    730 
    731 bool SafeBrowsingStoreFile::BeginChunk() {
    732   return ClearChunkBuffers();
    733 }
    734 
    735 bool SafeBrowsingStoreFile::WriteAddPrefix(int32 chunk_id, SBPrefix prefix) {
    736   add_prefixes_.push_back(SBAddPrefix(chunk_id, prefix));
    737   return true;
    738 }
    739 
    740 bool SafeBrowsingStoreFile::GetAddPrefixes(SBAddPrefixes* add_prefixes) {
    741   add_prefixes->clear();
    742   if (!base::PathExists(filename_))
    743     return true;
    744 
    745   StateInternal db_state;
    746   if (!ReadDbStateHelper(filename_, &db_state))
    747     return OnCorruptDatabase();
    748 
    749   add_prefixes->swap(db_state.add_prefixes_);
    750   return true;
    751 }
    752 
    753 bool SafeBrowsingStoreFile::GetAddFullHashes(
    754     std::vector<SBAddFullHash>* add_full_hashes) {
    755   add_full_hashes->clear();
    756   if (!base::PathExists(filename_))
    757     return true;
    758 
    759   StateInternal db_state;
    760   if (!ReadDbStateHelper(filename_, &db_state))
    761     return OnCorruptDatabase();
    762 
    763   add_full_hashes->swap(db_state.add_full_hashes_);
    764   return true;
    765 }
    766 
    767 bool SafeBrowsingStoreFile::WriteAddHash(int32 chunk_id,
    768                                          const SBFullHash& full_hash) {
    769   add_hashes_.push_back(SBAddFullHash(chunk_id, full_hash));
    770   return true;
    771 }
    772 
    773 bool SafeBrowsingStoreFile::WriteSubPrefix(int32 chunk_id,
    774                                            int32 add_chunk_id,
    775                                            SBPrefix prefix) {
    776   sub_prefixes_.push_back(SBSubPrefix(chunk_id, add_chunk_id, prefix));
    777   return true;
    778 }
    779 
    780 bool SafeBrowsingStoreFile::WriteSubHash(int32 chunk_id, int32 add_chunk_id,
    781                                          const SBFullHash& full_hash) {
    782   sub_hashes_.push_back(SBSubFullHash(chunk_id, add_chunk_id, full_hash));
    783   return true;
    784 }
    785 
    786 bool SafeBrowsingStoreFile::OnCorruptDatabase() {
    787   if (!corruption_seen_)
    788     RecordFormatEvent(FORMAT_EVENT_FILE_CORRUPT);
    789   corruption_seen_ = true;
    790 
    791   corruption_callback_.Run();
    792 
    793   // Return false as a convenience to callers.
    794   return false;
    795 }
    796 
    797 bool SafeBrowsingStoreFile::Close() {
    798   ClearUpdateBuffers();
    799 
    800   // Make sure the files are closed.
    801   file_.reset();
    802   new_file_.reset();
    803   return true;
    804 }
    805 
    806 bool SafeBrowsingStoreFile::BeginUpdate() {
    807   DCHECK(!file_.get() && !new_file_.get());
    808 
    809   // Structures should all be clear unless something bad happened.
    810   DCHECK(add_chunks_cache_.empty());
    811   DCHECK(sub_chunks_cache_.empty());
    812   DCHECK(add_del_cache_.empty());
    813   DCHECK(sub_del_cache_.empty());
    814   DCHECK(add_prefixes_.empty());
    815   DCHECK(sub_prefixes_.empty());
    816   DCHECK(add_hashes_.empty());
    817   DCHECK(sub_hashes_.empty());
    818   DCHECK_EQ(chunks_written_, 0);
    819 
    820   // Since the following code will already hit the profile looking for
    821   // database files, this is a reasonable to time delete any old
    822   // files.
    823   CheckForOriginalAndDelete(filename_);
    824 
    825   corruption_seen_ = false;
    826 
    827   const base::FilePath new_filename = TemporaryFileForFilename(filename_);
    828   base::ScopedFILE new_file(base::OpenFile(new_filename, "wb+"));
    829   if (new_file.get() == NULL)
    830     return false;
    831 
    832   base::ScopedFILE file(base::OpenFile(filename_, "rb"));
    833   empty_ = (file.get() == NULL);
    834   if (empty_) {
    835     // If the file exists but cannot be opened, try to delete it (not
    836     // deleting directly, the bloom filter needs to be deleted, too).
    837     if (base::PathExists(filename_))
    838       return OnCorruptDatabase();
    839 
    840     new_file_.swap(new_file);
    841     return true;
    842   }
    843 
    844   base::MD5Context context;
    845   FileHeader header;
    846   const int version =
    847       ReadAndVerifyHeader(filename_, &header,
    848                           &add_chunks_cache_, &sub_chunks_cache_,
    849                           file.get(), &context);
    850   if (version == kInvalidVersion) {
    851     FileHeaderV8 retry_header;
    852     if (FileRewind(file.get()) && ReadItem(&retry_header, file.get(), NULL) &&
    853         (retry_header.magic != kFileMagic ||
    854          (retry_header.version != 8 && retry_header.version != 7))) {
    855       // TODO(shess): Think on whether these histograms are generating any
    856       // actionable data.  I kid you not, SQLITE happens many thousands of times
    857       // per day, UNKNOWN about 3x higher than that.
    858       if (!strcmp(reinterpret_cast<char*>(&retry_header.magic),
    859                   "SQLite format 3")) {
    860         RecordFormatEvent(FORMAT_EVENT_FOUND_SQLITE);
    861       } else {
    862         RecordFormatEvent(FORMAT_EVENT_FOUND_UNKNOWN);
    863       }
    864     }
    865 
    866     // Close the file so that it can be deleted.
    867     file.reset();
    868 
    869     return OnCorruptDatabase();
    870   }
    871 
    872   file_.swap(file);
    873   new_file_.swap(new_file);
    874   return true;
    875 }
    876 
    877 bool SafeBrowsingStoreFile::FinishChunk() {
    878   if (!add_prefixes_.size() && !sub_prefixes_.size() &&
    879       !add_hashes_.size() && !sub_hashes_.size())
    880     return true;
    881 
    882   ChunkHeader header;
    883   header.add_prefix_count = add_prefixes_.size();
    884   header.sub_prefix_count = sub_prefixes_.size();
    885   header.add_hash_count = add_hashes_.size();
    886   header.sub_hash_count = sub_hashes_.size();
    887   if (!WriteItem(header, new_file_.get(), NULL))
    888     return false;
    889 
    890   if (!WriteContainer(add_prefixes_, new_file_.get(), NULL) ||
    891       !WriteContainer(sub_prefixes_, new_file_.get(), NULL) ||
    892       !WriteContainer(add_hashes_, new_file_.get(), NULL) ||
    893       !WriteContainer(sub_hashes_, new_file_.get(), NULL))
    894     return false;
    895 
    896   ++chunks_written_;
    897 
    898   // Clear everything to save memory.
    899   return ClearChunkBuffers();
    900 }
    901 
    902 bool SafeBrowsingStoreFile::DoUpdate(
    903     safe_browsing::PrefixSetBuilder* builder,
    904     std::vector<SBAddFullHash>* add_full_hashes_result) {
    905   DCHECK(file_.get() || empty_);
    906   DCHECK(new_file_.get());
    907   CHECK(builder);
    908   CHECK(add_full_hashes_result);
    909 
    910   // Rewind the temporary storage.
    911   if (!FileRewind(new_file_.get()))
    912     return false;
    913 
    914   // Get chunk file's size for validating counts.
    915   int64 update_size = 0;
    916   if (!base::GetFileSize(TemporaryFileForFilename(filename_), &update_size))
    917     return OnCorruptDatabase();
    918 
    919   // Track update size to answer questions at http://crbug.com/72216 .
    920   // Log small updates as 1k so that the 0 (underflow) bucket can be
    921   // used for "empty" in SafeBrowsingDatabase.
    922   UMA_HISTOGRAM_COUNTS("SB2.DatabaseUpdateKilobytes",
    923                        std::max(static_cast<int>(update_size / 1024), 1));
    924 
    925   // Chunk updates to integrate.
    926   StateInternal new_state;
    927 
    928   // Read update chunks.
    929   for (int i = 0; i < chunks_written_; ++i) {
    930     ChunkHeader header;
    931 
    932     int64 ofs = ftell(new_file_.get());
    933     if (ofs == -1)
    934       return false;
    935 
    936     if (!ReadItem(&header, new_file_.get(), NULL))
    937       return false;
    938 
    939     // As a safety measure, make sure that the header describes a sane
    940     // chunk, given the remaining file size.
    941     int64 expected_size = ofs + sizeof(ChunkHeader);
    942     expected_size += header.add_prefix_count * sizeof(SBAddPrefix);
    943     expected_size += header.sub_prefix_count * sizeof(SBSubPrefix);
    944     expected_size += header.add_hash_count * sizeof(SBAddFullHash);
    945     expected_size += header.sub_hash_count * sizeof(SBSubFullHash);
    946     if (expected_size > update_size)
    947       return false;
    948 
    949     if (!new_state.AppendData(header.add_prefix_count, header.sub_prefix_count,
    950                               header.add_hash_count, header.sub_hash_count,
    951                               new_file_.get(), NULL)) {
    952       return false;
    953     }
    954   }
    955 
    956   // The state was accumulated by chunk, sort by prefix.
    957   new_state.SortData();
    958 
    959   // These strides control how much data is loaded into memory per pass.
    960   // Strides must be an even power of two.  |in_stride| will be derived from the
    961   // input file.  |out_stride| will be derived from an estimate of the resulting
    962   // file's size.  |process_stride| will be the max of both.
    963   uint64 in_stride = kMaxShardStride;
    964   uint64 out_stride = kMaxShardStride;
    965   uint64 process_stride = 0;
    966 
    967   // The header info is only used later if |!empty_|.  The v8 read loop only
    968   // needs |in_stride|, while v7 needs to refer to header information.
    969   base::MD5Context in_context;
    970   int version = kInvalidVersion;
    971   FileHeader header;
    972 
    973   if (!empty_) {
    974     DCHECK(file_.get());
    975 
    976     version = ReadAndVerifyHeader(filename_, &header,
    977                                   &add_chunks_cache_, &sub_chunks_cache_,
    978                                   file_.get(), &in_context);
    979     if (version == kInvalidVersion)
    980       return OnCorruptDatabase();
    981 
    982     if (version == 8 && header.v8.shard_stride)
    983       in_stride = header.v8.shard_stride;
    984 
    985     // The header checksum should have prevented this case, but the code will be
    986     // broken if this is not correct.
    987     if (!IsPowerOfTwo(in_stride))
    988       return OnCorruptDatabase();
    989   }
    990 
    991   // We no longer need to track deleted chunks.
    992   DeleteChunksFromSet(add_del_cache_, &add_chunks_cache_);
    993   DeleteChunksFromSet(sub_del_cache_, &sub_chunks_cache_);
    994 
    995   // Calculate |out_stride| to break the file down into reasonable shards.
    996   {
    997     int64 original_size = 0;
    998     if (!empty_ && !base::GetFileSize(filename_, &original_size))
    999       return OnCorruptDatabase();
   1000 
   1001     // Approximate the final size as everything.  Subs and deletes will reduce
   1002     // the size, but modest over-sharding won't hurt much.
   1003     int64 shard_size = original_size + update_size;
   1004 
   1005     // Keep splitting until a single stride of data fits the target.
   1006     size_t shifts = 0;
   1007     while (out_stride > kMinShardStride && shard_size > kUpdateStorageBytes) {
   1008       out_stride >>= 1;
   1009       shard_size >>= 1;
   1010       ++shifts;
   1011     }
   1012     UMA_HISTOGRAM_COUNTS("SB2.OutShardShifts", shifts);
   1013 
   1014     DCHECK(IsPowerOfTwo(out_stride));
   1015   }
   1016 
   1017   // Outer loop strides by the max of the input stride (to read integral shards)
   1018   // and the output stride (to write integral shards).
   1019   process_stride = std::max(in_stride, out_stride);
   1020   DCHECK(IsPowerOfTwo(process_stride));
   1021   DCHECK_EQ(0u, process_stride % in_stride);
   1022   DCHECK_EQ(0u, process_stride % out_stride);
   1023 
   1024   // Start writing the new data to |new_file_|.
   1025   base::MD5Context out_context;
   1026   if (!WriteHeader(out_stride, add_chunks_cache_, sub_chunks_cache_,
   1027                    new_file_.get(), &out_context)) {
   1028     return false;
   1029   }
   1030 
   1031   // Start at the beginning of the SBPrefix space.
   1032   uint64 in_min = 0;
   1033   uint64 out_min = 0;
   1034   uint64 process_min = 0;
   1035 
   1036   // Start at the beginning of the updates.
   1037   StateInternalPos new_pos = new_state.StateBegin();
   1038 
   1039   // Re-usable container for shard processing.
   1040   StateInternal db_state;
   1041 
   1042   // Track aggregate counts for histograms.
   1043   size_t add_prefix_count = 0;
   1044   size_t sub_prefix_count = 0;
   1045 
   1046   do {
   1047     // Maximum element in the current shard.
   1048     SBPrefix process_max =
   1049         static_cast<SBPrefix>(process_min + process_stride - 1);
   1050     DCHECK_GT(process_max, process_min);
   1051 
   1052     // Drop the data from previous pass.
   1053     db_state.ClearData();
   1054 
   1055     // Fill the processing shard with one or more input shards.
   1056     if (!empty_) {
   1057       if (version == 7) {
   1058         // Treat v7 as a single-shard file.
   1059         DCHECK_EQ(in_min, 0u);
   1060         DCHECK_EQ(in_stride, kMaxShardStride);
   1061         DCHECK_EQ(process_stride, kMaxShardStride);
   1062         if (!db_state.AppendData(header.v7.add_prefix_count,
   1063                                  header.v7.sub_prefix_count,
   1064                                  header.v7.add_hash_count,
   1065                                  header.v7.sub_hash_count,
   1066                                  file_.get(), &in_context))
   1067           return OnCorruptDatabase();
   1068 
   1069         // v7 data is not sorted correctly.
   1070         db_state.SortData();
   1071       } else {
   1072         do {
   1073           ShardHeader shard_header;
   1074           if (!ReadItem(&shard_header, file_.get(), &in_context))
   1075             return OnCorruptDatabase();
   1076 
   1077           if (!db_state.AppendData(shard_header.add_prefix_count,
   1078                                    shard_header.sub_prefix_count,
   1079                                    shard_header.add_hash_count,
   1080                                    shard_header.sub_hash_count,
   1081                                    file_.get(), &in_context))
   1082             return OnCorruptDatabase();
   1083 
   1084           in_min += in_stride;
   1085         } while (in_min <= kMaxSBPrefix && in_min < process_max);
   1086       }
   1087     }
   1088 
   1089     // Shard the update data to match the database data, then merge the update
   1090     // data and process the results.
   1091     {
   1092       StateInternalPos new_end = new_state.ShardEnd(new_pos, process_max);
   1093       db_state.MergeDataAndProcess(new_pos, new_end,
   1094                                    add_del_cache_, sub_del_cache_);
   1095       new_pos = new_end;
   1096     }
   1097 
   1098     // Collect the processed data for return to caller.
   1099     for (size_t i = 0; i < db_state.add_prefixes_.size(); ++i) {
   1100       builder->AddPrefix(db_state.add_prefixes_[i].prefix);
   1101     }
   1102     add_full_hashes_result->insert(add_full_hashes_result->end(),
   1103                                    db_state.add_full_hashes_.begin(),
   1104                                    db_state.add_full_hashes_.end());
   1105     add_prefix_count += db_state.add_prefixes_.size();
   1106     sub_prefix_count += db_state.sub_prefixes_.size();
   1107 
   1108     // Write one or more shards of processed output.
   1109     StateInternalPos out_pos = db_state.StateBegin();
   1110     do {
   1111       SBPrefix out_max = static_cast<SBPrefix>(out_min + out_stride - 1);
   1112       DCHECK_GT(out_max, out_min);
   1113 
   1114       StateInternalPos out_end = db_state.ShardEnd(out_pos, out_max);
   1115       if (!db_state.WriteShard(out_pos, out_end, new_file_.get(), &out_context))
   1116         return false;
   1117       out_pos = out_end;
   1118 
   1119       out_min += out_stride;
   1120     } while (out_min == static_cast<SBPrefix>(out_min) &&
   1121              out_min < process_max);
   1122 
   1123     process_min += process_stride;
   1124   } while (process_min <= kMaxSBPrefix);
   1125 
   1126   // Verify the overall checksum.
   1127   if (!empty_) {
   1128     if (!ReadAndVerifyChecksum(file_.get(), &in_context)) {
   1129       RecordFormatEvent(FORMAT_EVENT_UPDATE_CHECKSUM_FAILURE);
   1130       return OnCorruptDatabase();
   1131     }
   1132 
   1133     // TODO(shess): Verify EOF?
   1134 
   1135     // Close the input file so the new file can be renamed over it.
   1136     file_.reset();
   1137   }
   1138   DCHECK(!file_.get());
   1139 
   1140   // Write the overall checksum.
   1141   base::MD5Digest out_digest;
   1142   base::MD5Final(&out_digest, &out_context);
   1143   if (!WriteItem(out_digest, new_file_.get(), NULL))
   1144     return false;
   1145 
   1146   // Trim any excess left over from the temporary chunk data.
   1147   if (!base::TruncateFile(new_file_.get()))
   1148     return false;
   1149 
   1150   // Close the file handle and swizzle the file into place.
   1151   new_file_.reset();
   1152   if (!base::DeleteFile(filename_, false) &&
   1153       base::PathExists(filename_))
   1154     return false;
   1155 
   1156   const base::FilePath new_filename = TemporaryFileForFilename(filename_);
   1157   if (!base::Move(new_filename, filename_))
   1158     return false;
   1159 
   1160   // Record counts before swapping to caller.
   1161   UMA_HISTOGRAM_COUNTS("SB2.AddPrefixes", add_prefix_count);
   1162   UMA_HISTOGRAM_COUNTS("SB2.SubPrefixes", sub_prefix_count);
   1163 
   1164   return true;
   1165 }
   1166 
   1167 bool SafeBrowsingStoreFile::FinishUpdate(
   1168     safe_browsing::PrefixSetBuilder* builder,
   1169     std::vector<SBAddFullHash>* add_full_hashes_result) {
   1170   DCHECK(builder);
   1171   DCHECK(add_full_hashes_result);
   1172 
   1173   if (!DoUpdate(builder, add_full_hashes_result)) {
   1174     CancelUpdate();
   1175     return false;
   1176   }
   1177 
   1178   DCHECK(!new_file_.get());
   1179   DCHECK(!file_.get());
   1180 
   1181   return Close();
   1182 }
   1183 
   1184 bool SafeBrowsingStoreFile::CancelUpdate() {
   1185   return Close();
   1186 }
   1187 
   1188 void SafeBrowsingStoreFile::SetAddChunk(int32 chunk_id) {
   1189   add_chunks_cache_.insert(chunk_id);
   1190 }
   1191 
   1192 bool SafeBrowsingStoreFile::CheckAddChunk(int32 chunk_id) {
   1193   return add_chunks_cache_.count(chunk_id) > 0;
   1194 }
   1195 
   1196 void SafeBrowsingStoreFile::GetAddChunks(std::vector<int32>* out) {
   1197   out->clear();
   1198   out->insert(out->end(), add_chunks_cache_.begin(), add_chunks_cache_.end());
   1199 }
   1200 
   1201 void SafeBrowsingStoreFile::SetSubChunk(int32 chunk_id) {
   1202   sub_chunks_cache_.insert(chunk_id);
   1203 }
   1204 
   1205 bool SafeBrowsingStoreFile::CheckSubChunk(int32 chunk_id) {
   1206   return sub_chunks_cache_.count(chunk_id) > 0;
   1207 }
   1208 
   1209 void SafeBrowsingStoreFile::GetSubChunks(std::vector<int32>* out) {
   1210   out->clear();
   1211   out->insert(out->end(), sub_chunks_cache_.begin(), sub_chunks_cache_.end());
   1212 }
   1213 
   1214 void SafeBrowsingStoreFile::DeleteAddChunk(int32 chunk_id) {
   1215   add_del_cache_.insert(chunk_id);
   1216 }
   1217 
   1218 void SafeBrowsingStoreFile::DeleteSubChunk(int32 chunk_id) {
   1219   sub_del_cache_.insert(chunk_id);
   1220 }
   1221 
   1222 // static
   1223 bool SafeBrowsingStoreFile::DeleteStore(const base::FilePath& basename) {
   1224   if (!base::DeleteFile(basename, false) &&
   1225       base::PathExists(basename)) {
   1226     NOTREACHED();
   1227     return false;
   1228   }
   1229 
   1230   const base::FilePath new_filename = TemporaryFileForFilename(basename);
   1231   if (!base::DeleteFile(new_filename, false) &&
   1232       base::PathExists(new_filename)) {
   1233     NOTREACHED();
   1234     return false;
   1235   }
   1236 
   1237   // With SQLite support gone, one way to get to this code is if the
   1238   // existing file is a SQLite file.  Make sure the journal file is
   1239   // also removed.
   1240   const base::FilePath journal_filename(
   1241       basename.value() + FILE_PATH_LITERAL("-journal"));
   1242   if (base::PathExists(journal_filename))
   1243     base::DeleteFile(journal_filename, false);
   1244 
   1245   return true;
   1246 }
   1247