1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 // 5 // Utilities for the SafeBrowsing code. 6 7 #ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_ 8 #define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_ 9 10 #include <cstring> 11 #include <deque> 12 #include <set> 13 #include <string> 14 #include <vector> 15 16 #include "base/basictypes.h" 17 #include "chrome/browser/safe_browsing/chunk_range.h" 18 19 class GURL; 20 21 class SBEntry; 22 23 // A truncated hash's type. 24 typedef int32 SBPrefix; 25 26 // Container for holding a chunk URL and the list it belongs to. 27 struct ChunkUrl { 28 std::string url; 29 std::string list_name; 30 }; 31 32 // A full hash. 33 union SBFullHash { 34 char full_hash[32]; 35 SBPrefix prefix; 36 }; 37 38 inline bool operator==(const SBFullHash& lhash, const SBFullHash& rhash) { 39 return memcmp(lhash.full_hash, rhash.full_hash, sizeof(SBFullHash)) == 0; 40 } 41 42 inline bool operator<(const SBFullHash& lhash, const SBFullHash& rhash) { 43 return memcmp(lhash.full_hash, rhash.full_hash, sizeof(SBFullHash)) < 0; 44 } 45 46 // Container for information about a specific host in an add/sub chunk. 47 struct SBChunkHost { 48 SBPrefix host; 49 SBEntry* entry; 50 }; 51 52 // Container for an add/sub chunk. 53 struct SBChunk { 54 SBChunk(); 55 ~SBChunk(); 56 57 int chunk_number; 58 int list_id; 59 bool is_add; 60 std::deque<SBChunkHost> hosts; 61 }; 62 63 // Container for a set of chunks. Interim wrapper to replace use of 64 // |std::deque<SBChunk>| with something having safer memory semantics. 65 // management. 66 // TODO(shess): |SBEntry| is currently a very roundabout way to hold 67 // things pending storage. It could be replaced with the structures 68 // used in SafeBrowsingStore, then lots of bridging code could 69 // dissappear. 70 class SBChunkList { 71 public: 72 SBChunkList(); 73 ~SBChunkList(); 74 75 // Implement that subset of the |std::deque<>| interface which 76 // callers expect. 77 bool empty() const { return chunks_.empty(); } 78 size_t size() { return chunks_.size(); } 79 80 void push_back(const SBChunk& chunk) { chunks_.push_back(chunk); } 81 SBChunk& back() { return chunks_.back(); } 82 SBChunk& front() { return chunks_.front(); } 83 const SBChunk& front() const { return chunks_.front(); } 84 85 typedef std::vector<SBChunk>::const_iterator const_iterator; 86 const_iterator begin() const { return chunks_.begin(); } 87 const_iterator end() const { return chunks_.end(); } 88 89 typedef std::vector<SBChunk>::iterator iterator; 90 iterator begin() { return chunks_.begin(); } 91 iterator end() { return chunks_.end(); } 92 93 SBChunk& operator[](size_t n) { return chunks_[n]; } 94 const SBChunk& operator[](size_t n) const { return chunks_[n]; } 95 96 // Calls |SBEvent::Destroy()| before clearing |chunks_|. 97 void clear(); 98 99 private: 100 std::vector<SBChunk> chunks_; 101 102 DISALLOW_COPY_AND_ASSIGN(SBChunkList); 103 }; 104 105 // Used when we get a gethash response. 106 struct SBFullHashResult { 107 SBFullHash hash; 108 std::string list_name; 109 int add_chunk_id; 110 }; 111 112 // Contains information about a list in the database. 113 struct SBListChunkRanges { 114 explicit SBListChunkRanges(const std::string& n); 115 116 std::string name; // The list name. 117 std::string adds; // The ranges for add chunks. 118 std::string subs; // The ranges for sub chunks. 119 }; 120 121 // Container for deleting chunks from the database. 122 struct SBChunkDelete { 123 SBChunkDelete(); 124 ~SBChunkDelete(); 125 126 std::string list_name; 127 bool is_sub_del; 128 std::vector<ChunkRange> chunk_del; 129 }; 130 131 // Different types of threats that SafeBrowsing protects against. 132 enum SBThreatType { 133 // No threat at all. 134 SB_THREAT_TYPE_SAFE, 135 136 // The URL is being used for phishing. 137 SB_THREAT_TYPE_URL_PHISHING, 138 139 // The URL hosts malware. 140 SB_THREAT_TYPE_URL_MALWARE, 141 142 // The download URL is malware. 143 SB_THREAT_TYPE_BINARY_MALWARE_URL, 144 145 // The hash of the download contents is malware. 146 SB_THREAT_TYPE_BINARY_MALWARE_HASH, 147 148 // Url detected by the client-side phishing model. Note that unlike the 149 // above values, this does not correspond to a downloaded list. 150 SB_THREAT_TYPE_CLIENT_SIDE_PHISHING_URL, 151 152 // The Chrome extension or app (given by its ID) is malware. 153 SB_THREAT_TYPE_EXTENSION, 154 155 // Url detected by the client-side malware IP list. This IP list is part 156 // of the client side detection model. 157 SB_THREAT_TYPE_CLIENT_SIDE_MALWARE_URL, 158 }; 159 160 // SBEntry --------------------------------------------------------------------- 161 162 // Holds information about the prefixes for a hostkey. prefixes can either be 163 // 4 bytes (truncated hash) or 32 bytes (full hash). 164 // For adds: 165 // [list id ][chunk id][prefix count (0..n)][prefix1][prefix2] 166 // For subs: 167 // [list id ][chunk id (only used if prefix count is 0][prefix count (0..n)] 168 // [add chunk][prefix][add chunk][prefix] 169 class SBEntry { 170 public: 171 enum Type { 172 ADD_PREFIX, // 4 byte add entry. 173 SUB_PREFIX, // 4 byte sub entry. 174 ADD_FULL_HASH, // 32 byte add entry. 175 SUB_FULL_HASH, // 32 byte sub entry. 176 }; 177 178 // Creates a SBEntry with the necessary size for the given number of prefixes. 179 // Caller ownes the object and needs to free it by calling Destroy. 180 static SBEntry* Create(Type type, int prefix_count); 181 182 // Frees the entry's memory. 183 void Destroy(); 184 185 void set_list_id(int list_id) { data_.list_id = list_id; } 186 int list_id() const { return data_.list_id; } 187 void set_chunk_id(int chunk_id) { data_.chunk_id = chunk_id; } 188 int chunk_id() const { return data_.chunk_id; } 189 int prefix_count() const { return data_.prefix_count; } 190 191 // Returns true if this is a prefix as opposed to a full hash. 192 bool IsPrefix() const { 193 return type() == ADD_PREFIX || type() == SUB_PREFIX; 194 } 195 196 // Returns true if this is an add entry. 197 bool IsAdd() const { 198 return type() == ADD_PREFIX || type() == ADD_FULL_HASH; 199 } 200 201 // Returns true if this is a sub entry. 202 bool IsSub() const { 203 return type() == SUB_PREFIX || type() == SUB_FULL_HASH; 204 } 205 206 // Helper to return the size of the prefixes. 207 int HashLen() const { 208 return IsPrefix() ? sizeof(SBPrefix) : sizeof(SBFullHash); 209 } 210 211 // For add entries, returns the add chunk id. For sub entries, returns the 212 // add_chunk id for the prefix at the given index. 213 int ChunkIdAtPrefix(int index) const; 214 215 // Used for sub chunks to set the chunk id at a given index. 216 void SetChunkIdAtPrefix(int index, int chunk_id); 217 218 // Return the prefix/full hash at the given index. Caller is expected to 219 // call the right function based on the hash length. 220 const SBPrefix& PrefixAt(int index) const; 221 const SBFullHash& FullHashAt(int index) const; 222 223 // Return the prefix/full hash at the given index. Caller is expected to 224 // call the right function based on the hash length. 225 void SetPrefixAt(int index, const SBPrefix& prefix); 226 void SetFullHashAt(int index, const SBFullHash& full_hash); 227 228 private: 229 // Container for a sub prefix. 230 struct SBSubPrefix { 231 int add_chunk; 232 SBPrefix prefix; 233 }; 234 235 // Container for a sub full hash. 236 struct SBSubFullHash { 237 int add_chunk; 238 SBFullHash prefix; 239 }; 240 241 // Keep the fixed data together in one struct so that we can get its size 242 // easily. If any of this is modified, the database will have to be cleared. 243 struct Data { 244 int list_id; 245 // For adds, this is the add chunk number. 246 // For subs: if prefix_count is 0 then this is the add chunk that this sub 247 // refers to. Otherwise it's ignored, and the add_chunk in sub_prefixes 248 // or sub_full_hashes is used for each corresponding prefix. 249 int chunk_id; 250 Type type; 251 int prefix_count; 252 }; 253 254 SBEntry(); 255 ~SBEntry(); 256 257 // Helper to return the size of each prefix entry (i.e. for subs this 258 // includes an add chunk id). 259 static int PrefixSize(Type type); 260 261 // Helper to return how much memory a given Entry would require. 262 static int Size(Type type, int prefix_count); 263 264 // Returns how many bytes this entry is. 265 int Size() const; 266 267 Type type() const { return data_.type; } 268 269 void set_prefix_count(int count) { data_.prefix_count = count; } 270 void set_type(Type type) { data_.type = type; } 271 272 // The prefixes union must follow the fixed data so that they're contiguous 273 // in memory. 274 Data data_; 275 union { 276 SBPrefix add_prefixes_[1]; 277 SBSubPrefix sub_prefixes_[1]; 278 SBFullHash add_full_hashes_[1]; 279 SBSubFullHash sub_full_hashes_[1]; 280 }; 281 }; 282 283 284 // Utility functions ----------------------------------------------------------- 285 286 namespace safe_browsing_util { 287 288 // SafeBrowsing list names. 289 extern const char kMalwareList[]; 290 extern const char kPhishingList[]; 291 // Binary Download list names. 292 extern const char kBinUrlList[]; 293 extern const char kBinHashList[]; 294 // SafeBrowsing client-side detection whitelist list name. 295 extern const char kCsdWhiteList[]; 296 // SafeBrowsing download whitelist list name. 297 extern const char kDownloadWhiteList[]; 298 // SafeBrowsing extension list name. 299 extern const char kExtensionBlacklist[]; 300 // SafeBrowsing side-effect free whitelist name. 301 extern const char kSideEffectFreeWhitelist[]; 302 // SafeBrowsing csd malware IP blacklist name. 303 extern const char kIPBlacklist[]; 304 305 // This array must contain all Safe Browsing lists. 306 extern const char* kAllLists[10]; 307 308 enum ListType { 309 INVALID = -1, 310 MALWARE = 0, 311 PHISH = 1, 312 BINURL = 2, 313 BINHASH = 3, 314 CSDWHITELIST = 4, 315 // SafeBrowsing lists are stored in pairs. Keep ListType 5 316 // available for a potential second list that we would store in the 317 // csd-whitelist store file. 318 DOWNLOADWHITELIST = 6, 319 // See above comment. Leave 7 available. 320 EXTENSIONBLACKLIST = 8, 321 // See above comment. Leave 9 available. 322 SIDEEFFECTFREEWHITELIST = 10, 323 // See above comment. Leave 11 available. 324 IPBLACKLIST = 12, 325 // See above comment. Leave 13 available. 326 }; 327 328 // Maps a list name to ListType. 329 ListType GetListId(const std::string& name); 330 331 // Maps a ListId to list name. Return false if fails. 332 bool GetListName(ListType list_id, std::string* list); 333 334 // Canonicalizes url as per Google Safe Browsing Specification. 335 // See section 6.1 in 336 // http://code.google.com/p/google-safe-browsing/wiki/Protocolv2Spec. 337 void CanonicalizeUrl(const GURL& url, std::string* canonicalized_hostname, 338 std::string* canonicalized_path, 339 std::string* canonicalized_query); 340 341 // Given a URL, returns all the hosts we need to check. They are returned 342 // in order of size (i.e. b.c is first, then a.b.c). 343 void GenerateHostsToCheck(const GURL& url, std::vector<std::string>* hosts); 344 345 // Given a URL, returns all the paths we need to check. 346 void GeneratePathsToCheck(const GURL& url, std::vector<std::string>* paths); 347 348 // Given a URL, returns all the patterns we need to check. 349 void GeneratePatternsToCheck(const GURL& url, std::vector<std::string>* urls); 350 351 int GetHashIndex(const SBFullHash& hash, 352 const std::vector<SBFullHashResult>& full_hashes); 353 354 // Given a URL, compare all the possible host + path full hashes to the set of 355 // provided full hashes. Returns the index of the match if one is found, or -1 356 // otherwise. 357 int GetUrlHashIndex(const GURL& url, 358 const std::vector<SBFullHashResult>& full_hashes); 359 360 bool IsPhishingList(const std::string& list_name); 361 bool IsMalwareList(const std::string& list_name); 362 bool IsBadbinurlList(const std::string& list_name); 363 bool IsBadbinhashList(const std::string& list_name); 364 bool IsExtensionList(const std::string& list_name); 365 366 GURL GeneratePhishingReportUrl(const std::string& report_page, 367 const std::string& url_to_report, 368 bool is_client_side_detection); 369 370 SBFullHash StringToSBFullHash(const std::string& hash_in); 371 std::string SBFullHashToString(const SBFullHash& hash_out); 372 373 } // namespace safe_browsing_util 374 375 #endif // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_ 376