1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 // 5 // Utilities for the SafeBrowsing code. 6 7 #ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_ 8 #define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_ 9 10 #include <cstring> 11 #include <deque> 12 #include <set> 13 #include <string> 14 #include <vector> 15 16 #include "base/basictypes.h" 17 #include "chrome/browser/safe_browsing/chunk_range.h" 18 19 class GURL; 20 21 class SBEntry; 22 23 // A truncated hash's type. 24 typedef int32 SBPrefix; 25 26 // Container for holding a chunk URL and the list it belongs to. 27 struct ChunkUrl { 28 std::string url; 29 std::string list_name; 30 }; 31 32 // A full hash. 33 union SBFullHash { 34 char full_hash[32]; 35 SBPrefix prefix; 36 }; 37 38 inline bool operator==(const SBFullHash& lhash, const SBFullHash& rhash) { 39 return memcmp(lhash.full_hash, rhash.full_hash, sizeof(SBFullHash)) == 0; 40 } 41 42 inline bool operator<(const SBFullHash& lhash, const SBFullHash& rhash) { 43 return memcmp(lhash.full_hash, rhash.full_hash, sizeof(SBFullHash)) < 0; 44 } 45 46 // Container for information about a specific host in an add/sub chunk. 47 struct SBChunkHost { 48 SBPrefix host; 49 SBEntry* entry; 50 }; 51 52 // Container for an add/sub chunk. 53 struct SBChunk { 54 SBChunk(); 55 ~SBChunk(); 56 57 int chunk_number; 58 int list_id; 59 bool is_add; 60 std::deque<SBChunkHost> hosts; 61 }; 62 63 // Container for a set of chunks. Interim wrapper to replace use of 64 // |std::deque<SBChunk>| with something having safer memory semantics. 65 // management. 66 // TODO(shess): |SBEntry| is currently a very roundabout way to hold 67 // things pending storage. It could be replaced with the structures 68 // used in SafeBrowsingStore, then lots of bridging code could 69 // dissappear. 70 class SBChunkList { 71 public: 72 SBChunkList(); 73 ~SBChunkList(); 74 75 // Implement that subset of the |std::deque<>| interface which 76 // callers expect. 77 bool empty() const { return chunks_.empty(); } 78 size_t size() { return chunks_.size(); } 79 80 void push_back(const SBChunk& chunk) { chunks_.push_back(chunk); } 81 SBChunk& back() { return chunks_.back(); } 82 SBChunk& front() { return chunks_.front(); } 83 const SBChunk& front() const { return chunks_.front(); } 84 85 typedef std::vector<SBChunk>::const_iterator const_iterator; 86 const_iterator begin() const { return chunks_.begin(); } 87 const_iterator end() const { return chunks_.end(); } 88 89 typedef std::vector<SBChunk>::iterator iterator; 90 iterator begin() { return chunks_.begin(); } 91 iterator end() { return chunks_.end(); } 92 93 SBChunk& operator[](size_t n) { return chunks_[n]; } 94 const SBChunk& operator[](size_t n) const { return chunks_[n]; } 95 96 // Calls |SBEvent::Destroy()| before clearing |chunks_|. 97 void clear(); 98 99 private: 100 std::vector<SBChunk> chunks_; 101 102 DISALLOW_COPY_AND_ASSIGN(SBChunkList); 103 }; 104 105 // Used when we get a gethash response. 106 struct SBFullHashResult { 107 SBFullHash hash; 108 std::string list_name; 109 int add_chunk_id; 110 }; 111 112 // Contains information about a list in the database. 113 struct SBListChunkRanges { 114 explicit SBListChunkRanges(const std::string& n); 115 116 std::string name; // The list name. 117 std::string adds; // The ranges for add chunks. 118 std::string subs; // The ranges for sub chunks. 119 }; 120 121 // Container for deleting chunks from the database. 122 struct SBChunkDelete { 123 SBChunkDelete(); 124 ~SBChunkDelete(); 125 126 std::string list_name; 127 bool is_sub_del; 128 std::vector<ChunkRange> chunk_del; 129 }; 130 131 // Different types of threats that SafeBrowsing protects against. 132 enum SBThreatType { 133 // No threat at all. 134 SB_THREAT_TYPE_SAFE, 135 136 // The URL is being used for phishing. 137 SB_THREAT_TYPE_URL_PHISHING, 138 139 // The URL hosts malware. 140 SB_THREAT_TYPE_URL_MALWARE, 141 142 // The download URL is malware. 143 SB_THREAT_TYPE_BINARY_MALWARE_URL, 144 145 // The hash of the download contents is malware. 146 SB_THREAT_TYPE_BINARY_MALWARE_HASH, 147 148 // Url detected by the client-side phishing model. Note that unlike the 149 // above values, this does not correspond to a downloaded list. 150 SB_THREAT_TYPE_CLIENT_SIDE_PHISHING_URL, 151 152 // The Chrome extension or app (given by its ID) is malware. 153 SB_THREAT_TYPE_EXTENSION, 154 }; 155 156 // SBEntry --------------------------------------------------------------------- 157 158 // Holds information about the prefixes for a hostkey. prefixes can either be 159 // 4 bytes (truncated hash) or 32 bytes (full hash). 160 // For adds: 161 // [list id ][chunk id][prefix count (0..n)][prefix1][prefix2] 162 // For subs: 163 // [list id ][chunk id (only used if prefix count is 0][prefix count (0..n)] 164 // [add chunk][prefix][add chunk][prefix] 165 class SBEntry { 166 public: 167 enum Type { 168 ADD_PREFIX, // 4 byte add entry. 169 SUB_PREFIX, // 4 byte sub entry. 170 ADD_FULL_HASH, // 32 byte add entry. 171 SUB_FULL_HASH, // 32 byte sub entry. 172 }; 173 174 // Creates a SBEntry with the necessary size for the given number of prefixes. 175 // Caller ownes the object and needs to free it by calling Destroy. 176 static SBEntry* Create(Type type, int prefix_count); 177 178 // Frees the entry's memory. 179 void Destroy(); 180 181 void set_list_id(int list_id) { data_.list_id = list_id; } 182 int list_id() const { return data_.list_id; } 183 void set_chunk_id(int chunk_id) { data_.chunk_id = chunk_id; } 184 int chunk_id() const { return data_.chunk_id; } 185 int prefix_count() const { return data_.prefix_count; } 186 187 // Returns true if this is a prefix as opposed to a full hash. 188 bool IsPrefix() const { 189 return type() == ADD_PREFIX || type() == SUB_PREFIX; 190 } 191 192 // Returns true if this is an add entry. 193 bool IsAdd() const { 194 return type() == ADD_PREFIX || type() == ADD_FULL_HASH; 195 } 196 197 // Returns true if this is a sub entry. 198 bool IsSub() const { 199 return type() == SUB_PREFIX || type() == SUB_FULL_HASH; 200 } 201 202 // Helper to return the size of the prefixes. 203 int HashLen() const { 204 return IsPrefix() ? sizeof(SBPrefix) : sizeof(SBFullHash); 205 } 206 207 // For add entries, returns the add chunk id. For sub entries, returns the 208 // add_chunk id for the prefix at the given index. 209 int ChunkIdAtPrefix(int index) const; 210 211 // Used for sub chunks to set the chunk id at a given index. 212 void SetChunkIdAtPrefix(int index, int chunk_id); 213 214 // Return the prefix/full hash at the given index. Caller is expected to 215 // call the right function based on the hash length. 216 const SBPrefix& PrefixAt(int index) const; 217 const SBFullHash& FullHashAt(int index) const; 218 219 // Return the prefix/full hash at the given index. Caller is expected to 220 // call the right function based on the hash length. 221 void SetPrefixAt(int index, const SBPrefix& prefix); 222 void SetFullHashAt(int index, const SBFullHash& full_hash); 223 224 private: 225 // Container for a sub prefix. 226 struct SBSubPrefix { 227 int add_chunk; 228 SBPrefix prefix; 229 }; 230 231 // Container for a sub full hash. 232 struct SBSubFullHash { 233 int add_chunk; 234 SBFullHash prefix; 235 }; 236 237 // Keep the fixed data together in one struct so that we can get its size 238 // easily. If any of this is modified, the database will have to be cleared. 239 struct Data { 240 int list_id; 241 // For adds, this is the add chunk number. 242 // For subs: if prefix_count is 0 then this is the add chunk that this sub 243 // refers to. Otherwise it's ignored, and the add_chunk in sub_prefixes 244 // or sub_full_hashes is used for each corresponding prefix. 245 int chunk_id; 246 Type type; 247 int prefix_count; 248 }; 249 250 SBEntry(); 251 ~SBEntry(); 252 253 // Helper to return the size of each prefix entry (i.e. for subs this 254 // includes an add chunk id). 255 static int PrefixSize(Type type); 256 257 // Helper to return how much memory a given Entry would require. 258 static int Size(Type type, int prefix_count); 259 260 // Returns how many bytes this entry is. 261 int Size() const; 262 263 Type type() const { return data_.type; } 264 265 void set_prefix_count(int count) { data_.prefix_count = count; } 266 void set_type(Type type) { data_.type = type; } 267 268 // The prefixes union must follow the fixed data so that they're contiguous 269 // in memory. 270 Data data_; 271 union { 272 SBPrefix add_prefixes_[1]; 273 SBSubPrefix sub_prefixes_[1]; 274 SBFullHash add_full_hashes_[1]; 275 SBSubFullHash sub_full_hashes_[1]; 276 }; 277 }; 278 279 280 // Utility functions ----------------------------------------------------------- 281 282 namespace safe_browsing_util { 283 284 // SafeBrowsing list names. 285 extern const char kMalwareList[]; 286 extern const char kPhishingList[]; 287 // Binary Download list names. 288 extern const char kBinUrlList[]; 289 extern const char kBinHashList[]; 290 // SafeBrowsing client-side detection whitelist list name. 291 extern const char kCsdWhiteList[]; 292 // SafeBrowsing download whitelist list name. 293 extern const char kDownloadWhiteList[]; 294 // SafeBrowsing extension list name. 295 extern const char kExtensionBlacklist[]; 296 // SafeBrowsing side-effect free whitelist name. 297 extern const char kSideEffectFreeWhitelist[]; 298 299 enum ListType { 300 INVALID = -1, 301 MALWARE = 0, 302 PHISH = 1, 303 BINURL = 2, 304 BINHASH = 3, 305 CSDWHITELIST = 4, 306 // SafeBrowsing lists are stored in pairs. Keep ListType 5 307 // available for a potential second list that we would store in the 308 // csd-whitelist store file. 309 DOWNLOADWHITELIST = 6, 310 // See above comment. Leave 7 available. 311 EXTENSIONBLACKLIST = 8, 312 // See above comment. Leave 9 available. 313 SIDEEFFECTFREEWHITELIST = 10, 314 // See above comment. Leave 11 available. 315 }; 316 317 // Maps a list name to ListType. 318 ListType GetListId(const std::string& name); 319 320 // Maps a ListId to list name. Return false if fails. 321 bool GetListName(ListType list_id, std::string* list); 322 323 // Canonicalizes url as per Google Safe Browsing Specification. 324 // See section 6.1 in 325 // http://code.google.com/p/google-safe-browsing/wiki/Protocolv2Spec. 326 void CanonicalizeUrl(const GURL& url, std::string* canonicalized_hostname, 327 std::string* canonicalized_path, 328 std::string* canonicalized_query); 329 330 // Given a URL, returns all the hosts we need to check. They are returned 331 // in order of size (i.e. b.c is first, then a.b.c). 332 void GenerateHostsToCheck(const GURL& url, std::vector<std::string>* hosts); 333 334 // Given a URL, returns all the paths we need to check. 335 void GeneratePathsToCheck(const GURL& url, std::vector<std::string>* paths); 336 337 // Given a URL, returns all the patterns we need to check. 338 void GeneratePatternsToCheck(const GURL& url, std::vector<std::string>* urls); 339 340 int GetHashIndex(const SBFullHash& hash, 341 const std::vector<SBFullHashResult>& full_hashes); 342 343 // Given a URL, compare all the possible host + path full hashes to the set of 344 // provided full hashes. Returns the index of the match if one is found, or -1 345 // otherwise. 346 int GetUrlHashIndex(const GURL& url, 347 const std::vector<SBFullHashResult>& full_hashes); 348 349 bool IsPhishingList(const std::string& list_name); 350 bool IsMalwareList(const std::string& list_name); 351 bool IsBadbinurlList(const std::string& list_name); 352 bool IsBadbinhashList(const std::string& list_name); 353 bool IsExtensionList(const std::string& list_name); 354 355 GURL GeneratePhishingReportUrl(const std::string& report_page, 356 const std::string& url_to_report, 357 bool is_client_side_detection); 358 359 SBFullHash StringToSBFullHash(const std::string& hash_in); 360 std::string SBFullHashToString(const SBFullHash& hash_out); 361 362 } // namespace safe_browsing_util 363 364 #endif // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_ 365