1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_H_ 6 #define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_H_ 7 8 #include <deque> 9 #include <set> 10 #include <vector> 11 12 #include "base/basictypes.h" 13 #include "base/callback_forward.h" 14 #include "base/containers/hash_tables.h" 15 #include "base/time/time.h" 16 #include "chrome/browser/safe_browsing/prefix_set.h" 17 #include "chrome/browser/safe_browsing/safe_browsing_util.h" 18 19 namespace base { 20 class FilePath; 21 } 22 23 // SafeBrowsingStore provides a storage abstraction for the 24 // safe-browsing data used to build the bloom filter. The items 25 // stored are: 26 // The set of add and sub chunks seen. 27 // List of SBAddPrefix (chunk_id and SBPrefix). 28 // List of SBSubPrefix (chunk_id and the target SBAddPrefix). 29 // List of SBAddFullHash (SBAddPrefix, time received and an SBFullHash). 30 // List of SBSubFullHash (chunk_id, target SBAddPrefix, and an SBFullHash). 31 // 32 // The store is geared towards updating the data, not runtime access 33 // to the data (that is handled by SafeBrowsingDatabase). Updates are 34 // handled similar to a SQL transaction cycle, with the new data being 35 // returned from FinishUpdate() (the COMMIT). Data is not persistent 36 // until FinishUpdate() returns successfully. 37 // 38 // FinishUpdate() also handles dropping items who's chunk has been 39 // deleted, and netting out the add/sub lists (when a sub matches an 40 // add, both are dropped). 41 42 // GetAddChunkId(), GetAddPrefix() and GetFullHash() are exposed so 43 // that these items can be generically compared with each other by 44 // SBAddPrefixLess() and SBAddPrefixHashLess(). 45 46 struct SBAddPrefix { 47 int32 chunk_id; 48 SBPrefix prefix; 49 50 SBAddPrefix(int32 id, SBPrefix p) : chunk_id(id), prefix(p) {} 51 SBAddPrefix() : chunk_id(), prefix() {} 52 53 int32 GetAddChunkId() const { return chunk_id; } 54 SBPrefix GetAddPrefix() const { return prefix; } 55 }; 56 57 // TODO(shess): Measure the performance impact of switching this back to 58 // std::vector<> once the v8 file format dominates. Also SBSubPrefixes. 59 typedef std::deque<SBAddPrefix> SBAddPrefixes; 60 61 struct SBSubPrefix { 62 int32 chunk_id; 63 int32 add_chunk_id; 64 SBPrefix add_prefix; 65 66 SBSubPrefix(int32 id, int32 add_id, SBPrefix prefix) 67 : chunk_id(id), add_chunk_id(add_id), add_prefix(prefix) {} 68 SBSubPrefix() : chunk_id(), add_chunk_id(), add_prefix() {} 69 70 int32 GetAddChunkId() const { return add_chunk_id; } 71 SBPrefix GetAddPrefix() const { return add_prefix; } 72 }; 73 74 typedef std::deque<SBSubPrefix> SBSubPrefixes; 75 76 struct SBAddFullHash { 77 int32 chunk_id; 78 // Received field is not used anymore, but is kept for DB compatability. 79 // TODO(shess): Deprecate and remove. 80 int32 deprecated_received; 81 SBFullHash full_hash; 82 83 SBAddFullHash(int32 id, const SBFullHash& h) 84 : chunk_id(id), deprecated_received(), full_hash(h) {} 85 86 SBAddFullHash() : chunk_id(), deprecated_received(), full_hash() {} 87 88 int32 GetAddChunkId() const { return chunk_id; } 89 SBPrefix GetAddPrefix() const { return full_hash.prefix; } 90 }; 91 92 struct SBSubFullHash { 93 int32 chunk_id; 94 int32 add_chunk_id; 95 SBFullHash full_hash; 96 97 SBSubFullHash(int32 id, int32 add_id, const SBFullHash& h) 98 : chunk_id(id), add_chunk_id(add_id), full_hash(h) {} 99 SBSubFullHash() : chunk_id(), add_chunk_id(), full_hash() {} 100 101 int32 GetAddChunkId() const { return add_chunk_id; } 102 SBPrefix GetAddPrefix() const { return full_hash.prefix; } 103 }; 104 105 // Determine less-than based on prefix and add chunk. 106 template <class T, class U> 107 bool SBAddPrefixLess(const T& a, const U& b) { 108 if (a.GetAddPrefix() != b.GetAddPrefix()) 109 return a.GetAddPrefix() < b.GetAddPrefix(); 110 111 return a.GetAddChunkId() < b.GetAddChunkId(); 112 } 113 114 // Determine less-than based on prefix, add chunk, and full hash. 115 // Prefix can compare differently than hash due to byte ordering, 116 // so it must take precedence. 117 template <class T, class U> 118 bool SBAddPrefixHashLess(const T& a, const U& b) { 119 if (SBAddPrefixLess(a, b)) 120 return true; 121 122 if (SBAddPrefixLess(b, a)) 123 return false; 124 125 return memcmp(a.full_hash.full_hash, b.full_hash.full_hash, 126 sizeof(a.full_hash.full_hash)) < 0; 127 } 128 129 // Process the lists for subs which knock out adds. For any item in 130 // |sub_prefixes| which has a match in |add_prefixes|, knock out the 131 // matched items from all vectors. Additionally remove items from 132 // deleted chunks. 133 // 134 // The inputs must be sorted by SBAddPrefixLess or SBAddPrefixHashLess. 135 void SBProcessSubs(SBAddPrefixes* add_prefixes, 136 SBSubPrefixes* sub_prefixes, 137 std::vector<SBAddFullHash>* add_full_hashes, 138 std::vector<SBSubFullHash>* sub_full_hashes, 139 const base::hash_set<int32>& add_chunks_deleted, 140 const base::hash_set<int32>& sub_chunks_deleted); 141 142 // Abstract interface for storing data. 143 class SafeBrowsingStore { 144 public: 145 SafeBrowsingStore() {} 146 virtual ~SafeBrowsingStore() {} 147 148 // Sets up the information for later use, but does not necessarily 149 // check whether the underlying file exists, or is valid. If 150 // |curruption_callback| is non-NULL it will be called if corruption 151 // is detected, which could happen as part of any call other than 152 // Delete(). The appropriate action is to use Delete() to clear the 153 // store. 154 virtual void Init(const base::FilePath& filename, 155 const base::Closure& corruption_callback) = 0; 156 157 // Deletes the files which back the store, returning true if 158 // successful. 159 virtual bool Delete() = 0; 160 161 // Get all Add prefixes out from the store. 162 virtual bool GetAddPrefixes(SBAddPrefixes* add_prefixes) = 0; 163 164 // Get all add full-length hashes. 165 virtual bool GetAddFullHashes( 166 std::vector<SBAddFullHash>* add_full_hashes) = 0; 167 168 // Start an update. None of the following methods should be called 169 // unless this returns true. If this returns true, the update 170 // should be terminated by FinishUpdate() or CancelUpdate(). 171 virtual bool BeginUpdate() = 0; 172 173 // Start a chunk of data. None of the methods through FinishChunk() 174 // should be called unless this returns true. 175 // TODO(shess): Would it make sense for this to accept |chunk_id|? 176 // Possibly not, because of possible confusion between sub_chunk_id 177 // and add_chunk_id. 178 virtual bool BeginChunk() = 0; 179 180 virtual bool WriteAddPrefix(int32 chunk_id, SBPrefix prefix) = 0; 181 virtual bool WriteAddHash(int32 chunk_id, 182 const SBFullHash& full_hash) = 0; 183 virtual bool WriteSubPrefix(int32 chunk_id, 184 int32 add_chunk_id, SBPrefix prefix) = 0; 185 virtual bool WriteSubHash(int32 chunk_id, int32 add_chunk_id, 186 const SBFullHash& full_hash) = 0; 187 188 // Collect the chunk data and preferrably store it on disk to 189 // release memory. Shoul not modify the data in-place. 190 virtual bool FinishChunk() = 0; 191 192 // Track the chunks which have been seen. 193 virtual void SetAddChunk(int32 chunk_id) = 0; 194 virtual bool CheckAddChunk(int32 chunk_id) = 0; 195 virtual void GetAddChunks(std::vector<int32>* out) = 0; 196 virtual void SetSubChunk(int32 chunk_id) = 0; 197 virtual bool CheckSubChunk(int32 chunk_id) = 0; 198 virtual void GetSubChunks(std::vector<int32>* out) = 0; 199 200 // Delete the indicated chunk_id. The chunk will continue to be 201 // visible until the end of the transaction. 202 virtual void DeleteAddChunk(int32 chunk_id) = 0; 203 virtual void DeleteSubChunk(int32 chunk_id) = 0; 204 205 // May be called during update to verify that the storage is valid. 206 // Return true if the store seems valid. If corruption is detected, 207 // calls the corruption callback and return false. 208 // NOTE(shess): When storage was SQLite, there was no guarantee that 209 // a structurally sound database actually contained valid data, 210 // whereas SafeBrowsingStoreFile checksums the data. For now, this 211 // distinction doesn't matter. 212 virtual bool CheckValidity() = 0; 213 214 // Pass the collected chunks through SBPRocessSubs() and commit to 215 // permanent storage. The resulting add prefixes and hashes will be 216 // stored in |add_prefixes_result| and |add_full_hashes_result|. 217 virtual bool FinishUpdate( 218 safe_browsing::PrefixSetBuilder* builder, 219 std::vector<SBAddFullHash>* add_full_hashes_result) = 0; 220 221 // Cancel the update in process and remove any temporary disk 222 // storage, leaving the original data unmodified. 223 virtual bool CancelUpdate() = 0; 224 225 private: 226 DISALLOW_COPY_AND_ASSIGN(SafeBrowsingStore); 227 }; 228 229 #endif // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_H_ 230