1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_H_ 6 #define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_H_ 7 8 #include <set> 9 #include <vector> 10 11 #include "base/basictypes.h" 12 #include "base/callback_forward.h" 13 #include "base/containers/hash_tables.h" 14 #include "base/time/time.h" 15 #include "chrome/browser/safe_browsing/safe_browsing_util.h" 16 17 namespace base { 18 class FilePath; 19 } 20 21 // SafeBrowsingStore provides a storage abstraction for the 22 // safe-browsing data used to build the bloom filter. The items 23 // stored are: 24 // The set of add and sub chunks seen. 25 // List of SBAddPrefix (chunk_id and SBPrefix). 26 // List of SBSubPrefix (chunk_id and the target SBAddPrefix). 27 // List of SBAddFullHash (SBAddPrefix, time received and an SBFullHash). 28 // List of SBSubFullHash (chunk_id, target SBAddPrefix, and an SBFullHash). 29 // 30 // The store is geared towards updating the data, not runtime access 31 // to the data (that is handled by SafeBrowsingDatabase). Updates are 32 // handled similar to a SQL transaction cycle, with the new data being 33 // returned from FinishUpdate() (the COMMIT). Data is not persistent 34 // until FinishUpdate() returns successfully. 35 // 36 // FinishUpdate() also handles dropping items who's chunk has been 37 // deleted, and netting out the add/sub lists (when a sub matches an 38 // add, both are dropped). 39 40 // GetAddChunkId(), GetAddPrefix() and GetFullHash() are exposed so 41 // that these items can be generically compared with each other by 42 // SBAddPrefixLess() and SBAddPrefixHashLess(). 43 44 struct SBAddPrefix { 45 int32 chunk_id; 46 SBPrefix prefix; 47 48 SBAddPrefix(int32 id, SBPrefix p) : chunk_id(id), prefix(p) {} 49 SBAddPrefix() : chunk_id(), prefix() {} 50 51 int32 GetAddChunkId() const { return chunk_id; } 52 SBPrefix GetAddPrefix() const { return prefix; } 53 }; 54 55 typedef std::deque<SBAddPrefix> SBAddPrefixes; 56 57 struct SBSubPrefix { 58 int32 chunk_id; 59 int32 add_chunk_id; 60 SBPrefix add_prefix; 61 62 SBSubPrefix(int32 id, int32 add_id, int prefix) 63 : chunk_id(id), add_chunk_id(add_id), add_prefix(prefix) {} 64 SBSubPrefix() : chunk_id(), add_chunk_id(), add_prefix() {} 65 66 int32 GetAddChunkId() const { return add_chunk_id; } 67 SBPrefix GetAddPrefix() const { return add_prefix; } 68 }; 69 70 typedef std::deque<SBSubPrefix> SBSubPrefixes; 71 72 struct SBAddFullHash { 73 int32 chunk_id; 74 int32 received; 75 SBFullHash full_hash; 76 77 SBAddFullHash(int32 id, base::Time r, const SBFullHash& h) 78 : chunk_id(id), 79 received(static_cast<int32>(r.ToTimeT())), 80 full_hash(h) { 81 } 82 83 // Provided for ReadAddHashes() implementations, which already have 84 // an int32 for the time. 85 SBAddFullHash(int32 id, int32 r, const SBFullHash& h) 86 : chunk_id(id), received(r), full_hash(h) {} 87 88 SBAddFullHash() : chunk_id(), received(), full_hash() {} 89 90 int32 GetAddChunkId() const { return chunk_id; } 91 SBPrefix GetAddPrefix() const { return full_hash.prefix; } 92 }; 93 94 struct SBSubFullHash { 95 int32 chunk_id; 96 int32 add_chunk_id; 97 SBFullHash full_hash; 98 99 SBSubFullHash(int32 id, int32 add_id, const SBFullHash& h) 100 : chunk_id(id), add_chunk_id(add_id), full_hash(h) {} 101 SBSubFullHash() : chunk_id(), add_chunk_id(), full_hash() {} 102 103 int32 GetAddChunkId() const { return add_chunk_id; } 104 SBPrefix GetAddPrefix() const { return full_hash.prefix; } 105 }; 106 107 // Determine less-than based on add chunk and prefix. 108 template <class T, class U> 109 bool SBAddPrefixLess(const T& a, const U& b) { 110 if (a.GetAddChunkId() != b.GetAddChunkId()) 111 return a.GetAddChunkId() < b.GetAddChunkId(); 112 113 return a.GetAddPrefix() < b.GetAddPrefix(); 114 } 115 116 // Determine less-than based on add chunk, prefix, and full hash. 117 // Prefix can compare differently than hash due to byte ordering, 118 // so it must take precedence. 119 template <class T, class U> 120 bool SBAddPrefixHashLess(const T& a, const U& b) { 121 if (SBAddPrefixLess(a, b)) 122 return true; 123 124 if (SBAddPrefixLess(b, a)) 125 return false; 126 127 return memcmp(a.full_hash.full_hash, b.full_hash.full_hash, 128 sizeof(a.full_hash.full_hash)) < 0; 129 } 130 131 // Process the lists for subs which knock out adds. For any item in 132 // |sub_prefixes| which has a match in |add_prefixes|, knock out the 133 // matched items from all vectors. Additionally remove items from 134 // deleted chunks. 135 // 136 // TODO(shess): Since the prefixes are uniformly-distributed hashes, 137 // there aren't many ways to organize the inputs for efficient 138 // processing. For this reason, the vectors are sorted and processed 139 // in parallel. At this time this code does the sorting internally, 140 // but it might make sense to make sorting an API requirement so that 141 // the storage can optimize for it. 142 void SBProcessSubs(SBAddPrefixes* add_prefixes, 143 SBSubPrefixes* sub_prefixes, 144 std::vector<SBAddFullHash>* add_full_hashes, 145 std::vector<SBSubFullHash>* sub_full_hashes, 146 const base::hash_set<int32>& add_chunks_deleted, 147 const base::hash_set<int32>& sub_chunks_deleted); 148 149 // TODO(shess): This uses int32 rather than int because it's writing 150 // specifically-sized items to files. SBPrefix should likewise be 151 // explicitly sized. 152 153 // Abstract interface for storing data. 154 class SafeBrowsingStore { 155 public: 156 SafeBrowsingStore() {} 157 virtual ~SafeBrowsingStore() {} 158 159 // Sets up the information for later use, but does not necessarily 160 // check whether the underlying file exists, or is valid. If 161 // |curruption_callback| is non-NULL it will be called if corruption 162 // is detected, which could happen as part of any call other than 163 // Delete(). The appropriate action is to use Delete() to clear the 164 // store. 165 virtual void Init(const base::FilePath& filename, 166 const base::Closure& corruption_callback) = 0; 167 168 // Deletes the files which back the store, returning true if 169 // successful. 170 virtual bool Delete() = 0; 171 172 // Get all Add prefixes out from the store. 173 virtual bool GetAddPrefixes(SBAddPrefixes* add_prefixes) = 0; 174 175 // Get all add full-length hashes. 176 virtual bool GetAddFullHashes( 177 std::vector<SBAddFullHash>* add_full_hashes) = 0; 178 179 // Start an update. None of the following methods should be called 180 // unless this returns true. If this returns true, the update 181 // should be terminated by FinishUpdate() or CancelUpdate(). 182 virtual bool BeginUpdate() = 0; 183 184 // Start a chunk of data. None of the methods through FinishChunk() 185 // should be called unless this returns true. 186 // TODO(shess): Would it make sense for this to accept |chunk_id|? 187 // Possibly not, because of possible confusion between sub_chunk_id 188 // and add_chunk_id. 189 virtual bool BeginChunk() = 0; 190 191 virtual bool WriteAddPrefix(int32 chunk_id, SBPrefix prefix) = 0; 192 virtual bool WriteAddHash(int32 chunk_id, 193 base::Time receive_time, 194 const SBFullHash& full_hash) = 0; 195 virtual bool WriteSubPrefix(int32 chunk_id, 196 int32 add_chunk_id, SBPrefix prefix) = 0; 197 virtual bool WriteSubHash(int32 chunk_id, int32 add_chunk_id, 198 const SBFullHash& full_hash) = 0; 199 200 // Collect the chunk data and preferrably store it on disk to 201 // release memory. Shoul not modify the data in-place. 202 virtual bool FinishChunk() = 0; 203 204 // Track the chunks which have been seen. 205 virtual void SetAddChunk(int32 chunk_id) = 0; 206 virtual bool CheckAddChunk(int32 chunk_id) = 0; 207 virtual void GetAddChunks(std::vector<int32>* out) = 0; 208 virtual void SetSubChunk(int32 chunk_id) = 0; 209 virtual bool CheckSubChunk(int32 chunk_id) = 0; 210 virtual void GetSubChunks(std::vector<int32>* out) = 0; 211 212 // Delete the indicated chunk_id. The chunk will continue to be 213 // visible until the end of the transaction. 214 virtual void DeleteAddChunk(int32 chunk_id) = 0; 215 virtual void DeleteSubChunk(int32 chunk_id) = 0; 216 217 // May be called during update to verify that the storage is valid. 218 // Return true if the store seems valid. If corruption is detected, 219 // calls the corruption callback and return false. 220 // NOTE(shess): When storage was SQLite, there was no guarantee that 221 // a structurally sound database actually contained valid data, 222 // whereas SafeBrowsingStoreFile checksums the data. For now, this 223 // distinction doesn't matter. 224 virtual bool CheckValidity() = 0; 225 226 // Pass the collected chunks through SBPRocessSubs() and commit to 227 // permanent storage. The resulting add prefixes and hashes will be 228 // stored in |add_prefixes_result| and |add_full_hashes_result|. 229 // |pending_adds| is the set of full hashes which have been received 230 // since the previous update, and is provided as a convenience 231 // (could be written via WriteAddHash(), but that would flush the 232 // chunk to disk). 233 virtual bool FinishUpdate( 234 const std::vector<SBAddFullHash>& pending_adds, 235 SBAddPrefixes* add_prefixes_result, 236 std::vector<SBAddFullHash>* add_full_hashes_result) = 0; 237 238 // Cancel the update in process and remove any temporary disk 239 // storage, leaving the original data unmodified. 240 virtual bool CancelUpdate() = 0; 241 242 private: 243 DISALLOW_COPY_AND_ASSIGN(SafeBrowsingStore); 244 }; 245 246 #endif // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_H_ 247