1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 // 5 // Utilities for the SafeBrowsing code. 6 7 #ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_ 8 #define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_ 9 10 #include <cstring> 11 #include <set> 12 #include <string> 13 #include <vector> 14 15 #include "base/basictypes.h" 16 #include "base/memory/scoped_ptr.h" 17 #include "base/strings/string_piece.h" 18 #include "base/time/time.h" 19 #include "chrome/browser/safe_browsing/chunk_range.h" 20 21 namespace safe_browsing { 22 class ChunkData; 23 }; 24 25 class GURL; 26 27 // A truncated hash's type. 28 typedef uint32 SBPrefix; 29 30 // Container for holding a chunk URL and the list it belongs to. 31 struct ChunkUrl { 32 std::string url; 33 std::string list_name; 34 }; 35 36 // A full hash. 37 union SBFullHash { 38 char full_hash[32]; 39 SBPrefix prefix; 40 }; 41 42 inline bool SBFullHashEqual(const SBFullHash& a, const SBFullHash& b) { 43 return !memcmp(a.full_hash, b.full_hash, sizeof(a.full_hash)); 44 } 45 46 inline bool SBFullHashLess(const SBFullHash& a, const SBFullHash& b) { 47 return memcmp(a.full_hash, b.full_hash, sizeof(a.full_hash)) < 0; 48 } 49 50 // Generate full hash for the given string. 51 SBFullHash SBFullHashForString(const base::StringPiece& str); 52 53 // Data for an individual chunk sent from the server. 54 class SBChunkData { 55 public: 56 SBChunkData(); 57 ~SBChunkData(); 58 59 // Create with manufactured data, for testing only. 60 // TODO(shess): Right now the test code calling this is in an anonymous 61 // namespace. Figure out how to shift this into private:. 62 explicit SBChunkData(safe_browsing::ChunkData* chunk_data); 63 64 // Read serialized ChunkData, returning true if the parse suceeded. 65 bool ParseFrom(const unsigned char* data, size_t length); 66 67 // Access the chunk data. |AddChunkNumberAt()| can only be called if 68 // |IsSub()| returns true. |Prefix*()| and |FullHash*()| can only be called 69 // if the corrosponding |Is*()| returned true. 70 int ChunkNumber() const; 71 bool IsAdd() const; 72 bool IsSub() const; 73 int AddChunkNumberAt(size_t i) const; 74 bool IsPrefix() const; 75 size_t PrefixCount() const; 76 SBPrefix PrefixAt(size_t i) const; 77 bool IsFullHash() const; 78 size_t FullHashCount() const; 79 SBFullHash FullHashAt(size_t i) const; 80 81 private: 82 // Protocol buffer sent from server. 83 scoped_ptr<safe_browsing::ChunkData> chunk_data_; 84 85 DISALLOW_COPY_AND_ASSIGN(SBChunkData); 86 }; 87 88 // Used when we get a gethash response. 89 struct SBFullHashResult { 90 SBFullHash hash; 91 // TODO(shess): Refactor to allow ListType here. 92 int list_id; 93 std::string metadata; 94 }; 95 96 // Caches individual response from GETHASH request. 97 struct SBCachedFullHashResult { 98 SBCachedFullHashResult(); 99 explicit SBCachedFullHashResult(const base::Time& in_expire_after); 100 ~SBCachedFullHashResult(); 101 102 base::Time expire_after; 103 std::vector<SBFullHashResult> full_hashes; 104 }; 105 106 // Contains information about a list in the database. 107 struct SBListChunkRanges { 108 explicit SBListChunkRanges(const std::string& n); 109 110 std::string name; // The list name. 111 std::string adds; // The ranges for add chunks. 112 std::string subs; // The ranges for sub chunks. 113 }; 114 115 // Container for deleting chunks from the database. 116 struct SBChunkDelete { 117 SBChunkDelete(); 118 ~SBChunkDelete(); 119 120 std::string list_name; 121 bool is_sub_del; 122 std::vector<ChunkRange> chunk_del; 123 }; 124 125 // Different types of threats that SafeBrowsing protects against. 126 enum SBThreatType { 127 // No threat at all. 128 SB_THREAT_TYPE_SAFE, 129 130 // The URL is being used for phishing. 131 SB_THREAT_TYPE_URL_PHISHING, 132 133 // The URL hosts malware. 134 SB_THREAT_TYPE_URL_MALWARE, 135 136 // The URL hosts harmful programs. 137 SB_THREAT_TYPE_URL_HARMFUL, 138 139 // The download URL is malware. 140 SB_THREAT_TYPE_BINARY_MALWARE_URL, 141 142 // Url detected by the client-side phishing model. Note that unlike the 143 // above values, this does not correspond to a downloaded list. 144 SB_THREAT_TYPE_CLIENT_SIDE_PHISHING_URL, 145 146 // The Chrome extension or app (given by its ID) is malware. 147 SB_THREAT_TYPE_EXTENSION, 148 149 // Url detected by the client-side malware IP list. This IP list is part 150 // of the client side detection model. 151 SB_THREAT_TYPE_CLIENT_SIDE_MALWARE_URL, 152 }; 153 154 // Utility functions ----------------------------------------------------------- 155 156 namespace safe_browsing_util { 157 158 // SafeBrowsing list names. 159 extern const char kMalwareList[]; 160 extern const char kPhishingList[]; 161 // Binary Download list name. 162 extern const char kBinUrlList[]; 163 // SafeBrowsing client-side detection whitelist list name. 164 extern const char kCsdWhiteList[]; 165 // SafeBrowsing download whitelist list name. 166 extern const char kDownloadWhiteList[]; 167 // SafeBrowsing extension list name. 168 extern const char kExtensionBlacklist[]; 169 // SafeBrowsing side-effect free whitelist name. 170 extern const char kSideEffectFreeWhitelist[]; 171 // SafeBrowsing csd malware IP blacklist name. 172 extern const char kIPBlacklist[]; 173 174 // This array must contain all Safe Browsing lists. 175 extern const char* kAllLists[8]; 176 177 enum ListType { 178 INVALID = -1, 179 MALWARE = 0, 180 PHISH = 1, 181 BINURL = 2, 182 // Obsolete BINHASH = 3, 183 CSDWHITELIST = 4, 184 // SafeBrowsing lists are stored in pairs. Keep ListType 5 185 // available for a potential second list that we would store in the 186 // csd-whitelist store file. 187 DOWNLOADWHITELIST = 6, 188 // See above comment. Leave 7 available. 189 EXTENSIONBLACKLIST = 8, 190 // See above comment. Leave 9 available. 191 SIDEEFFECTFREEWHITELIST = 10, 192 // See above comment. Leave 11 available. 193 IPBLACKLIST = 12, 194 // See above comment. Leave 13 available. 195 }; 196 197 // Maps a list name to ListType. 198 ListType GetListId(const base::StringPiece& name); 199 200 // Maps a ListId to list name. Return false if fails. 201 bool GetListName(ListType list_id, std::string* list); 202 203 // Canonicalizes url as per Google Safe Browsing Specification. 204 // See section 6.1 in 205 // http://code.google.com/p/google-safe-browsing/wiki/Protocolv2Spec. 206 void CanonicalizeUrl(const GURL& url, std::string* canonicalized_hostname, 207 std::string* canonicalized_path, 208 std::string* canonicalized_query); 209 210 // Given a URL, returns all the hosts we need to check. They are returned 211 // in order of size (i.e. b.c is first, then a.b.c). 212 void GenerateHostsToCheck(const GURL& url, std::vector<std::string>* hosts); 213 214 // Given a URL, returns all the paths we need to check. 215 void GeneratePathsToCheck(const GURL& url, std::vector<std::string>* paths); 216 217 // Given a URL, returns all the patterns we need to check. 218 void GeneratePatternsToCheck(const GURL& url, std::vector<std::string>* urls); 219 220 GURL GeneratePhishingReportUrl(const std::string& report_page, 221 const std::string& url_to_report, 222 bool is_client_side_detection); 223 224 SBFullHash StringToSBFullHash(const std::string& hash_in); 225 std::string SBFullHashToString(const SBFullHash& hash_out); 226 227 } // namespace safe_browsing_util 228 229 #endif // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_ 230