Home | History | Annotate | Download | only in safe_browsing
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 //
      5 // Utilities for the SafeBrowsing code.
      6 
      7 #ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_
      8 #define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_
      9 
     10 #include <cstring>
     11 #include <set>
     12 #include <string>
     13 #include <vector>
     14 
     15 #include "base/basictypes.h"
     16 #include "base/memory/scoped_ptr.h"
     17 #include "base/strings/string_piece.h"
     18 #include "chrome/browser/safe_browsing/chunk_range.h"
     19 
     20 namespace safe_browsing {
     21 class ChunkData;
     22 };
     23 
     24 class GURL;
     25 
     26 // A truncated hash's type.
     27 typedef uint32 SBPrefix;
     28 
     29 // Container for holding a chunk URL and the list it belongs to.
     30 struct ChunkUrl {
     31   std::string url;
     32   std::string list_name;
     33 };
     34 
     35 // A full hash.
     36 union SBFullHash {
     37   char full_hash[32];
     38   SBPrefix prefix;
     39 };
     40 
     41 inline bool SBFullHashEqual(const SBFullHash& a, const SBFullHash& b) {
     42   return !memcmp(a.full_hash, b.full_hash, sizeof(a.full_hash));
     43 }
     44 
     45 inline bool SBFullHashLess(const SBFullHash& a, const SBFullHash& b) {
     46   return memcmp(a.full_hash, b.full_hash, sizeof(a.full_hash)) < 0;
     47 }
     48 
     49 // Generate full hash for the given string.
     50 SBFullHash SBFullHashForString(const base::StringPiece& str);
     51 
     52 // Data for an individual chunk sent from the server.
     53 class SBChunkData {
     54  public:
     55   SBChunkData();
     56   ~SBChunkData();
     57 
     58   // Create with manufactured data, for testing only.
     59   // TODO(shess): Right now the test code calling this is in an anonymous
     60   // namespace.  Figure out how to shift this into private:.
     61   explicit SBChunkData(safe_browsing::ChunkData* chunk_data);
     62 
     63   // Read serialized ChunkData, returning true if the parse suceeded.
     64   bool ParseFrom(const unsigned char* data, size_t length);
     65 
     66   // Access the chunk data.  |AddChunkNumberAt()| can only be called if
     67   // |IsSub()| returns true.  |Prefix*()| and |FullHash*()| can only be called
     68   // if the corrosponding |Is*()| returned true.
     69   int ChunkNumber() const;
     70   bool IsAdd() const;
     71   bool IsSub() const;
     72   int AddChunkNumberAt(size_t i) const;
     73   bool IsPrefix() const;
     74   size_t PrefixCount() const;
     75   SBPrefix PrefixAt(size_t i) const;
     76   bool IsFullHash() const;
     77   size_t FullHashCount() const;
     78   SBFullHash FullHashAt(size_t i) const;
     79 
     80  private:
     81   // Protocol buffer sent from server.
     82   scoped_ptr<safe_browsing::ChunkData> chunk_data_;
     83 
     84   DISALLOW_COPY_AND_ASSIGN(SBChunkData);
     85 };
     86 
     87 // Used when we get a gethash response.
     88 struct SBFullHashResult {
     89   SBFullHash hash;
     90   // TODO(shess): Refactor to allow ListType here.
     91   int list_id;
     92 };
     93 
     94 // Contains information about a list in the database.
     95 struct SBListChunkRanges {
     96   explicit SBListChunkRanges(const std::string& n);
     97 
     98   std::string name;  // The list name.
     99   std::string adds;  // The ranges for add chunks.
    100   std::string subs;  // The ranges for sub chunks.
    101 };
    102 
    103 // Container for deleting chunks from the database.
    104 struct SBChunkDelete {
    105   SBChunkDelete();
    106   ~SBChunkDelete();
    107 
    108   std::string list_name;
    109   bool is_sub_del;
    110   std::vector<ChunkRange> chunk_del;
    111 };
    112 
    113 // Different types of threats that SafeBrowsing protects against.
    114 enum SBThreatType {
    115   // No threat at all.
    116   SB_THREAT_TYPE_SAFE,
    117 
    118   // The URL is being used for phishing.
    119   SB_THREAT_TYPE_URL_PHISHING,
    120 
    121   // The URL hosts malware.
    122   SB_THREAT_TYPE_URL_MALWARE,
    123 
    124   // The download URL is malware.
    125   SB_THREAT_TYPE_BINARY_MALWARE_URL,
    126 
    127   // Url detected by the client-side phishing model.  Note that unlike the
    128   // above values, this does not correspond to a downloaded list.
    129   SB_THREAT_TYPE_CLIENT_SIDE_PHISHING_URL,
    130 
    131   // The Chrome extension or app (given by its ID) is malware.
    132   SB_THREAT_TYPE_EXTENSION,
    133 
    134   // Url detected by the client-side malware IP list. This IP list is part
    135   // of the client side detection model.
    136   SB_THREAT_TYPE_CLIENT_SIDE_MALWARE_URL,
    137 };
    138 
    139 // Utility functions -----------------------------------------------------------
    140 
    141 namespace safe_browsing_util {
    142 
    143 // SafeBrowsing list names.
    144 extern const char kMalwareList[];
    145 extern const char kPhishingList[];
    146 // Binary Download list name.
    147 extern const char kBinUrlList[];
    148 // SafeBrowsing client-side detection whitelist list name.
    149 extern const char kCsdWhiteList[];
    150 // SafeBrowsing download whitelist list name.
    151 extern const char kDownloadWhiteList[];
    152 // SafeBrowsing extension list name.
    153 extern const char kExtensionBlacklist[];
    154 // SafeBrowsing side-effect free whitelist name.
    155 extern const char kSideEffectFreeWhitelist[];
    156 // SafeBrowsing csd malware IP blacklist name.
    157 extern const char kIPBlacklist[];
    158 
    159 // This array must contain all Safe Browsing lists.
    160 extern const char* kAllLists[8];
    161 
    162 enum ListType {
    163   INVALID = -1,
    164   MALWARE = 0,
    165   PHISH = 1,
    166   BINURL = 2,
    167   // Obsolete BINHASH = 3,
    168   CSDWHITELIST = 4,
    169   // SafeBrowsing lists are stored in pairs.  Keep ListType 5
    170   // available for a potential second list that we would store in the
    171   // csd-whitelist store file.
    172   DOWNLOADWHITELIST = 6,
    173   // See above comment. Leave 7 available.
    174   EXTENSIONBLACKLIST = 8,
    175   // See above comment. Leave 9 available.
    176   SIDEEFFECTFREEWHITELIST = 10,
    177   // See above comment. Leave 11 available.
    178   IPBLACKLIST = 12,
    179   // See above comment.  Leave 13 available.
    180 };
    181 
    182 // Maps a list name to ListType.
    183 ListType GetListId(const base::StringPiece& name);
    184 
    185 // Maps a ListId to list name. Return false if fails.
    186 bool GetListName(ListType list_id, std::string* list);
    187 
    188 // Canonicalizes url as per Google Safe Browsing Specification.
    189 // See section 6.1 in
    190 // http://code.google.com/p/google-safe-browsing/wiki/Protocolv2Spec.
    191 void CanonicalizeUrl(const GURL& url, std::string* canonicalized_hostname,
    192                      std::string* canonicalized_path,
    193                      std::string* canonicalized_query);
    194 
    195 // Given a URL, returns all the hosts we need to check.  They are returned
    196 // in order of size (i.e. b.c is first, then a.b.c).
    197 void GenerateHostsToCheck(const GURL& url, std::vector<std::string>* hosts);
    198 
    199 // Given a URL, returns all the paths we need to check.
    200 void GeneratePathsToCheck(const GURL& url, std::vector<std::string>* paths);
    201 
    202 // Given a URL, returns all the patterns we need to check.
    203 void GeneratePatternsToCheck(const GURL& url, std::vector<std::string>* urls);
    204 
    205 GURL GeneratePhishingReportUrl(const std::string& report_page,
    206                                const std::string& url_to_report,
    207                                bool is_client_side_detection);
    208 
    209 SBFullHash StringToSBFullHash(const std::string& hash_in);
    210 std::string SBFullHashToString(const SBFullHash& hash_out);
    211 
    212 }  // namespace safe_browsing_util
    213 
    214 #endif  // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_
    215