Home | History | Annotate | Download | only in safe_browsing
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_H_
      6 #define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_H_
      7 
      8 #include <deque>
      9 #include <set>
     10 #include <vector>
     11 
     12 #include "base/basictypes.h"
     13 #include "base/callback_forward.h"
     14 #include "base/containers/hash_tables.h"
     15 #include "base/time/time.h"
     16 #include "chrome/browser/safe_browsing/prefix_set.h"
     17 #include "chrome/browser/safe_browsing/safe_browsing_util.h"
     18 
     19 namespace base {
     20 class FilePath;
     21 }
     22 
     23 // SafeBrowsingStore provides a storage abstraction for the
     24 // safe-browsing data used to build the bloom filter.  The items
     25 // stored are:
     26 //   The set of add and sub chunks seen.
     27 //   List of SBAddPrefix (chunk_id and SBPrefix).
     28 //   List of SBSubPrefix (chunk_id and the target SBAddPrefix).
     29 //   List of SBAddFullHash (SBAddPrefix, time received and an SBFullHash).
     30 //   List of SBSubFullHash (chunk_id, target SBAddPrefix, and an SBFullHash).
     31 //
     32 // The store is geared towards updating the data, not runtime access
     33 // to the data (that is handled by SafeBrowsingDatabase).  Updates are
     34 // handled similar to a SQL transaction cycle, with the new data being
     35 // returned from FinishUpdate() (the COMMIT).  Data is not persistent
     36 // until FinishUpdate() returns successfully.
     37 //
     38 // FinishUpdate() also handles dropping items who's chunk has been
     39 // deleted, and netting out the add/sub lists (when a sub matches an
     40 // add, both are dropped).
     41 
     42 // GetAddChunkId(), GetAddPrefix() and GetFullHash() are exposed so
     43 // that these items can be generically compared with each other by
     44 // SBAddPrefixLess() and SBAddPrefixHashLess().
     45 
     46 struct SBAddPrefix {
     47   int32 chunk_id;
     48   SBPrefix prefix;
     49 
     50   SBAddPrefix(int32 id, SBPrefix p) : chunk_id(id), prefix(p) {}
     51   SBAddPrefix() : chunk_id(), prefix() {}
     52 
     53   int32 GetAddChunkId() const { return chunk_id; }
     54   SBPrefix GetAddPrefix() const { return prefix; }
     55 };
     56 
     57 // TODO(shess): Measure the performance impact of switching this back to
     58 // std::vector<> once the v8 file format dominates.  Also SBSubPrefixes.
     59 typedef std::deque<SBAddPrefix> SBAddPrefixes;
     60 
     61 struct SBSubPrefix {
     62   int32 chunk_id;
     63   int32 add_chunk_id;
     64   SBPrefix add_prefix;
     65 
     66   SBSubPrefix(int32 id, int32 add_id, SBPrefix prefix)
     67       : chunk_id(id), add_chunk_id(add_id), add_prefix(prefix) {}
     68   SBSubPrefix() : chunk_id(), add_chunk_id(), add_prefix() {}
     69 
     70   int32 GetAddChunkId() const { return add_chunk_id; }
     71   SBPrefix GetAddPrefix() const { return add_prefix; }
     72 };
     73 
     74 typedef std::deque<SBSubPrefix> SBSubPrefixes;
     75 
     76 struct SBAddFullHash {
     77   int32 chunk_id;
     78   // Received field is not used anymore, but is kept for DB compatability.
     79   // TODO(shess): Deprecate and remove.
     80   int32 deprecated_received;
     81   SBFullHash full_hash;
     82 
     83   SBAddFullHash(int32 id, const SBFullHash& h)
     84       : chunk_id(id), deprecated_received(), full_hash(h) {}
     85 
     86   SBAddFullHash() : chunk_id(), deprecated_received(), full_hash() {}
     87 
     88   int32 GetAddChunkId() const { return chunk_id; }
     89   SBPrefix GetAddPrefix() const { return full_hash.prefix; }
     90 };
     91 
     92 struct SBSubFullHash {
     93   int32 chunk_id;
     94   int32 add_chunk_id;
     95   SBFullHash full_hash;
     96 
     97   SBSubFullHash(int32 id, int32 add_id, const SBFullHash& h)
     98       : chunk_id(id), add_chunk_id(add_id), full_hash(h) {}
     99   SBSubFullHash() : chunk_id(), add_chunk_id(), full_hash() {}
    100 
    101   int32 GetAddChunkId() const { return add_chunk_id; }
    102   SBPrefix GetAddPrefix() const { return full_hash.prefix; }
    103 };
    104 
    105 // Determine less-than based on prefix and add chunk.
    106 template <class T, class U>
    107 bool SBAddPrefixLess(const T& a, const U& b) {
    108   if (a.GetAddPrefix() != b.GetAddPrefix())
    109     return a.GetAddPrefix() < b.GetAddPrefix();
    110 
    111   return a.GetAddChunkId() < b.GetAddChunkId();
    112 }
    113 
    114 // Determine less-than based on prefix, add chunk, and full hash.
    115 // Prefix can compare differently than hash due to byte ordering,
    116 // so it must take precedence.
    117 template <class T, class U>
    118 bool SBAddPrefixHashLess(const T& a, const U& b) {
    119   if (SBAddPrefixLess(a, b))
    120     return true;
    121 
    122   if (SBAddPrefixLess(b, a))
    123     return false;
    124 
    125   return memcmp(a.full_hash.full_hash, b.full_hash.full_hash,
    126                 sizeof(a.full_hash.full_hash)) < 0;
    127 }
    128 
    129 // Process the lists for subs which knock out adds.  For any item in
    130 // |sub_prefixes| which has a match in |add_prefixes|, knock out the
    131 // matched items from all vectors.  Additionally remove items from
    132 // deleted chunks.
    133 //
    134 // The inputs must be sorted by SBAddPrefixLess or SBAddPrefixHashLess.
    135 void SBProcessSubs(SBAddPrefixes* add_prefixes,
    136                    SBSubPrefixes* sub_prefixes,
    137                    std::vector<SBAddFullHash>* add_full_hashes,
    138                    std::vector<SBSubFullHash>* sub_full_hashes,
    139                    const base::hash_set<int32>& add_chunks_deleted,
    140                    const base::hash_set<int32>& sub_chunks_deleted);
    141 
    142 // Abstract interface for storing data.
    143 class SafeBrowsingStore {
    144  public:
    145   SafeBrowsingStore() {}
    146   virtual ~SafeBrowsingStore() {}
    147 
    148   // Sets up the information for later use, but does not necessarily
    149   // check whether the underlying file exists, or is valid.  If
    150   // |curruption_callback| is non-NULL it will be called if corruption
    151   // is detected, which could happen as part of any call other than
    152   // Delete().  The appropriate action is to use Delete() to clear the
    153   // store.
    154   virtual void Init(const base::FilePath& filename,
    155                     const base::Closure& corruption_callback) = 0;
    156 
    157   // Deletes the files which back the store, returning true if
    158   // successful.
    159   virtual bool Delete() = 0;
    160 
    161   // Get all Add prefixes out from the store.
    162   virtual bool GetAddPrefixes(SBAddPrefixes* add_prefixes) = 0;
    163 
    164   // Get all add full-length hashes.
    165   virtual bool GetAddFullHashes(
    166       std::vector<SBAddFullHash>* add_full_hashes) = 0;
    167 
    168   // Start an update.  None of the following methods should be called
    169   // unless this returns true.  If this returns true, the update
    170   // should be terminated by FinishUpdate() or CancelUpdate().
    171   virtual bool BeginUpdate() = 0;
    172 
    173   // Start a chunk of data.  None of the methods through FinishChunk()
    174   // should be called unless this returns true.
    175   // TODO(shess): Would it make sense for this to accept |chunk_id|?
    176   // Possibly not, because of possible confusion between sub_chunk_id
    177   // and add_chunk_id.
    178   virtual bool BeginChunk() = 0;
    179 
    180   virtual bool WriteAddPrefix(int32 chunk_id, SBPrefix prefix) = 0;
    181   virtual bool WriteAddHash(int32 chunk_id,
    182                             const SBFullHash& full_hash) = 0;
    183   virtual bool WriteSubPrefix(int32 chunk_id,
    184                               int32 add_chunk_id, SBPrefix prefix) = 0;
    185   virtual bool WriteSubHash(int32 chunk_id, int32 add_chunk_id,
    186                             const SBFullHash& full_hash) = 0;
    187 
    188   // Collect the chunk data and preferrably store it on disk to
    189   // release memory.  Shoul not modify the data in-place.
    190   virtual bool FinishChunk() = 0;
    191 
    192   // Track the chunks which have been seen.
    193   virtual void SetAddChunk(int32 chunk_id) = 0;
    194   virtual bool CheckAddChunk(int32 chunk_id) = 0;
    195   virtual void GetAddChunks(std::vector<int32>* out) = 0;
    196   virtual void SetSubChunk(int32 chunk_id) = 0;
    197   virtual bool CheckSubChunk(int32 chunk_id) = 0;
    198   virtual void GetSubChunks(std::vector<int32>* out) = 0;
    199 
    200   // Delete the indicated chunk_id.  The chunk will continue to be
    201   // visible until the end of the transaction.
    202   virtual void DeleteAddChunk(int32 chunk_id) = 0;
    203   virtual void DeleteSubChunk(int32 chunk_id) = 0;
    204 
    205   // May be called during update to verify that the storage is valid.
    206   // Return true if the store seems valid.  If corruption is detected,
    207   // calls the corruption callback and return false.
    208   // NOTE(shess): When storage was SQLite, there was no guarantee that
    209   // a structurally sound database actually contained valid data,
    210   // whereas SafeBrowsingStoreFile checksums the data.  For now, this
    211   // distinction doesn't matter.
    212   virtual bool CheckValidity() = 0;
    213 
    214   // Pass the collected chunks through SBPRocessSubs() and commit to
    215   // permanent storage.  The resulting add prefixes and hashes will be
    216   // stored in |add_prefixes_result| and |add_full_hashes_result|.
    217   virtual bool FinishUpdate(
    218       safe_browsing::PrefixSetBuilder* builder,
    219       std::vector<SBAddFullHash>* add_full_hashes_result) = 0;
    220 
    221   // Cancel the update in process and remove any temporary disk
    222   // storage, leaving the original data unmodified.
    223   virtual bool CancelUpdate() = 0;
    224 
    225  private:
    226   DISALLOW_COPY_AND_ASSIGN(SafeBrowsingStore);
    227 };
    228 
    229 #endif  // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_H_
    230