1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 // 5 // Utilities for the SafeBrowsing code. 6 7 #ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_ 8 #define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_ 9 10 #include <cstring> 11 #include <set> 12 #include <string> 13 #include <vector> 14 15 #include "base/basictypes.h" 16 #include "base/memory/scoped_ptr.h" 17 #include "base/strings/string_piece.h" 18 #include "chrome/browser/safe_browsing/chunk_range.h" 19 20 namespace safe_browsing { 21 class ChunkData; 22 }; 23 24 class GURL; 25 26 // A truncated hash's type. 27 typedef uint32 SBPrefix; 28 29 // Container for holding a chunk URL and the list it belongs to. 30 struct ChunkUrl { 31 std::string url; 32 std::string list_name; 33 }; 34 35 // A full hash. 36 union SBFullHash { 37 char full_hash[32]; 38 SBPrefix prefix; 39 }; 40 41 inline bool SBFullHashEqual(const SBFullHash& a, const SBFullHash& b) { 42 return !memcmp(a.full_hash, b.full_hash, sizeof(a.full_hash)); 43 } 44 45 inline bool SBFullHashLess(const SBFullHash& a, const SBFullHash& b) { 46 return memcmp(a.full_hash, b.full_hash, sizeof(a.full_hash)) < 0; 47 } 48 49 // Generate full hash for the given string. 50 SBFullHash SBFullHashForString(const base::StringPiece& str); 51 52 // Data for an individual chunk sent from the server. 53 class SBChunkData { 54 public: 55 SBChunkData(); 56 ~SBChunkData(); 57 58 // Create with manufactured data, for testing only. 59 // TODO(shess): Right now the test code calling this is in an anonymous 60 // namespace. Figure out how to shift this into private:. 61 explicit SBChunkData(safe_browsing::ChunkData* chunk_data); 62 63 // Read serialized ChunkData, returning true if the parse suceeded. 64 bool ParseFrom(const unsigned char* data, size_t length); 65 66 // Access the chunk data. |AddChunkNumberAt()| can only be called if 67 // |IsSub()| returns true. |Prefix*()| and |FullHash*()| can only be called 68 // if the corrosponding |Is*()| returned true. 69 int ChunkNumber() const; 70 bool IsAdd() const; 71 bool IsSub() const; 72 int AddChunkNumberAt(size_t i) const; 73 bool IsPrefix() const; 74 size_t PrefixCount() const; 75 SBPrefix PrefixAt(size_t i) const; 76 bool IsFullHash() const; 77 size_t FullHashCount() const; 78 SBFullHash FullHashAt(size_t i) const; 79 80 private: 81 // Protocol buffer sent from server. 82 scoped_ptr<safe_browsing::ChunkData> chunk_data_; 83 84 DISALLOW_COPY_AND_ASSIGN(SBChunkData); 85 }; 86 87 // Used when we get a gethash response. 88 struct SBFullHashResult { 89 SBFullHash hash; 90 // TODO(shess): Refactor to allow ListType here. 91 int list_id; 92 }; 93 94 // Contains information about a list in the database. 95 struct SBListChunkRanges { 96 explicit SBListChunkRanges(const std::string& n); 97 98 std::string name; // The list name. 99 std::string adds; // The ranges for add chunks. 100 std::string subs; // The ranges for sub chunks. 101 }; 102 103 // Container for deleting chunks from the database. 104 struct SBChunkDelete { 105 SBChunkDelete(); 106 ~SBChunkDelete(); 107 108 std::string list_name; 109 bool is_sub_del; 110 std::vector<ChunkRange> chunk_del; 111 }; 112 113 // Different types of threats that SafeBrowsing protects against. 114 enum SBThreatType { 115 // No threat at all. 116 SB_THREAT_TYPE_SAFE, 117 118 // The URL is being used for phishing. 119 SB_THREAT_TYPE_URL_PHISHING, 120 121 // The URL hosts malware. 122 SB_THREAT_TYPE_URL_MALWARE, 123 124 // The download URL is malware. 125 SB_THREAT_TYPE_BINARY_MALWARE_URL, 126 127 // Url detected by the client-side phishing model. Note that unlike the 128 // above values, this does not correspond to a downloaded list. 129 SB_THREAT_TYPE_CLIENT_SIDE_PHISHING_URL, 130 131 // The Chrome extension or app (given by its ID) is malware. 132 SB_THREAT_TYPE_EXTENSION, 133 134 // Url detected by the client-side malware IP list. This IP list is part 135 // of the client side detection model. 136 SB_THREAT_TYPE_CLIENT_SIDE_MALWARE_URL, 137 }; 138 139 // Utility functions ----------------------------------------------------------- 140 141 namespace safe_browsing_util { 142 143 // SafeBrowsing list names. 144 extern const char kMalwareList[]; 145 extern const char kPhishingList[]; 146 // Binary Download list name. 147 extern const char kBinUrlList[]; 148 // SafeBrowsing client-side detection whitelist list name. 149 extern const char kCsdWhiteList[]; 150 // SafeBrowsing download whitelist list name. 151 extern const char kDownloadWhiteList[]; 152 // SafeBrowsing extension list name. 153 extern const char kExtensionBlacklist[]; 154 // SafeBrowsing side-effect free whitelist name. 155 extern const char kSideEffectFreeWhitelist[]; 156 // SafeBrowsing csd malware IP blacklist name. 157 extern const char kIPBlacklist[]; 158 159 // This array must contain all Safe Browsing lists. 160 extern const char* kAllLists[8]; 161 162 enum ListType { 163 INVALID = -1, 164 MALWARE = 0, 165 PHISH = 1, 166 BINURL = 2, 167 // Obsolete BINHASH = 3, 168 CSDWHITELIST = 4, 169 // SafeBrowsing lists are stored in pairs. Keep ListType 5 170 // available for a potential second list that we would store in the 171 // csd-whitelist store file. 172 DOWNLOADWHITELIST = 6, 173 // See above comment. Leave 7 available. 174 EXTENSIONBLACKLIST = 8, 175 // See above comment. Leave 9 available. 176 SIDEEFFECTFREEWHITELIST = 10, 177 // See above comment. Leave 11 available. 178 IPBLACKLIST = 12, 179 // See above comment. Leave 13 available. 180 }; 181 182 // Maps a list name to ListType. 183 ListType GetListId(const base::StringPiece& name); 184 185 // Maps a ListId to list name. Return false if fails. 186 bool GetListName(ListType list_id, std::string* list); 187 188 // Canonicalizes url as per Google Safe Browsing Specification. 189 // See section 6.1 in 190 // http://code.google.com/p/google-safe-browsing/wiki/Protocolv2Spec. 191 void CanonicalizeUrl(const GURL& url, std::string* canonicalized_hostname, 192 std::string* canonicalized_path, 193 std::string* canonicalized_query); 194 195 // Given a URL, returns all the hosts we need to check. They are returned 196 // in order of size (i.e. b.c is first, then a.b.c). 197 void GenerateHostsToCheck(const GURL& url, std::vector<std::string>* hosts); 198 199 // Given a URL, returns all the paths we need to check. 200 void GeneratePathsToCheck(const GURL& url, std::vector<std::string>* paths); 201 202 // Given a URL, returns all the patterns we need to check. 203 void GeneratePatternsToCheck(const GURL& url, std::vector<std::string>* urls); 204 205 GURL GeneratePhishingReportUrl(const std::string& report_page, 206 const std::string& url_to_report, 207 bool is_client_side_detection); 208 209 SBFullHash StringToSBFullHash(const std::string& hash_in); 210 std::string SBFullHashToString(const SBFullHash& hash_out); 211 212 } // namespace safe_browsing_util 213 214 #endif // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_ 215