Home | History | Annotate | Download | only in safe_browsing
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 //
      5 // Utilities for the SafeBrowsing code.
      6 
      7 #ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_
      8 #define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_
      9 
     10 #include <cstring>
     11 #include <set>
     12 #include <string>
     13 #include <vector>
     14 
     15 #include "base/basictypes.h"
     16 #include "base/memory/scoped_ptr.h"
     17 #include "base/strings/string_piece.h"
     18 #include "base/time/time.h"
     19 #include "chrome/browser/safe_browsing/chunk_range.h"
     20 
     21 namespace safe_browsing {
     22 class ChunkData;
     23 };
     24 
     25 class GURL;
     26 
     27 // A truncated hash's type.
     28 typedef uint32 SBPrefix;
     29 
     30 // Container for holding a chunk URL and the list it belongs to.
     31 struct ChunkUrl {
     32   std::string url;
     33   std::string list_name;
     34 };
     35 
     36 // A full hash.
     37 union SBFullHash {
     38   char full_hash[32];
     39   SBPrefix prefix;
     40 };
     41 
     42 inline bool SBFullHashEqual(const SBFullHash& a, const SBFullHash& b) {
     43   return !memcmp(a.full_hash, b.full_hash, sizeof(a.full_hash));
     44 }
     45 
     46 inline bool SBFullHashLess(const SBFullHash& a, const SBFullHash& b) {
     47   return memcmp(a.full_hash, b.full_hash, sizeof(a.full_hash)) < 0;
     48 }
     49 
     50 // Generate full hash for the given string.
     51 SBFullHash SBFullHashForString(const base::StringPiece& str);
     52 
     53 // Data for an individual chunk sent from the server.
     54 class SBChunkData {
     55  public:
     56   SBChunkData();
     57   ~SBChunkData();
     58 
     59   // Create with manufactured data, for testing only.
     60   // TODO(shess): Right now the test code calling this is in an anonymous
     61   // namespace.  Figure out how to shift this into private:.
     62   explicit SBChunkData(safe_browsing::ChunkData* chunk_data);
     63 
     64   // Read serialized ChunkData, returning true if the parse suceeded.
     65   bool ParseFrom(const unsigned char* data, size_t length);
     66 
     67   // Access the chunk data.  |AddChunkNumberAt()| can only be called if
     68   // |IsSub()| returns true.  |Prefix*()| and |FullHash*()| can only be called
     69   // if the corrosponding |Is*()| returned true.
     70   int ChunkNumber() const;
     71   bool IsAdd() const;
     72   bool IsSub() const;
     73   int AddChunkNumberAt(size_t i) const;
     74   bool IsPrefix() const;
     75   size_t PrefixCount() const;
     76   SBPrefix PrefixAt(size_t i) const;
     77   bool IsFullHash() const;
     78   size_t FullHashCount() const;
     79   SBFullHash FullHashAt(size_t i) const;
     80 
     81  private:
     82   // Protocol buffer sent from server.
     83   scoped_ptr<safe_browsing::ChunkData> chunk_data_;
     84 
     85   DISALLOW_COPY_AND_ASSIGN(SBChunkData);
     86 };
     87 
     88 // Used when we get a gethash response.
     89 struct SBFullHashResult {
     90   SBFullHash hash;
     91   // TODO(shess): Refactor to allow ListType here.
     92   int list_id;
     93   std::string metadata;
     94 };
     95 
     96 // Caches individual response from GETHASH request.
     97 struct SBCachedFullHashResult {
     98   SBCachedFullHashResult();
     99   explicit SBCachedFullHashResult(const base::Time& in_expire_after);
    100   ~SBCachedFullHashResult();
    101 
    102   base::Time expire_after;
    103   std::vector<SBFullHashResult> full_hashes;
    104 };
    105 
    106 // Contains information about a list in the database.
    107 struct SBListChunkRanges {
    108   explicit SBListChunkRanges(const std::string& n);
    109 
    110   std::string name;  // The list name.
    111   std::string adds;  // The ranges for add chunks.
    112   std::string subs;  // The ranges for sub chunks.
    113 };
    114 
    115 // Container for deleting chunks from the database.
    116 struct SBChunkDelete {
    117   SBChunkDelete();
    118   ~SBChunkDelete();
    119 
    120   std::string list_name;
    121   bool is_sub_del;
    122   std::vector<ChunkRange> chunk_del;
    123 };
    124 
    125 // Different types of threats that SafeBrowsing protects against.
    126 enum SBThreatType {
    127   // No threat at all.
    128   SB_THREAT_TYPE_SAFE,
    129 
    130   // The URL is being used for phishing.
    131   SB_THREAT_TYPE_URL_PHISHING,
    132 
    133   // The URL hosts malware.
    134   SB_THREAT_TYPE_URL_MALWARE,
    135 
    136   // The URL hosts harmful programs.
    137   SB_THREAT_TYPE_URL_HARMFUL,
    138 
    139   // The download URL is malware.
    140   SB_THREAT_TYPE_BINARY_MALWARE_URL,
    141 
    142   // Url detected by the client-side phishing model.  Note that unlike the
    143   // above values, this does not correspond to a downloaded list.
    144   SB_THREAT_TYPE_CLIENT_SIDE_PHISHING_URL,
    145 
    146   // The Chrome extension or app (given by its ID) is malware.
    147   SB_THREAT_TYPE_EXTENSION,
    148 
    149   // Url detected by the client-side malware IP list. This IP list is part
    150   // of the client side detection model.
    151   SB_THREAT_TYPE_CLIENT_SIDE_MALWARE_URL,
    152 };
    153 
    154 // Utility functions -----------------------------------------------------------
    155 
    156 namespace safe_browsing_util {
    157 
    158 // SafeBrowsing list names.
    159 extern const char kMalwareList[];
    160 extern const char kPhishingList[];
    161 // Binary Download list name.
    162 extern const char kBinUrlList[];
    163 // SafeBrowsing client-side detection whitelist list name.
    164 extern const char kCsdWhiteList[];
    165 // SafeBrowsing download whitelist list name.
    166 extern const char kDownloadWhiteList[];
    167 // SafeBrowsing extension list name.
    168 extern const char kExtensionBlacklist[];
    169 // SafeBrowsing side-effect free whitelist name.
    170 extern const char kSideEffectFreeWhitelist[];
    171 // SafeBrowsing csd malware IP blacklist name.
    172 extern const char kIPBlacklist[];
    173 
    174 // This array must contain all Safe Browsing lists.
    175 extern const char* kAllLists[8];
    176 
    177 enum ListType {
    178   INVALID = -1,
    179   MALWARE = 0,
    180   PHISH = 1,
    181   BINURL = 2,
    182   // Obsolete BINHASH = 3,
    183   CSDWHITELIST = 4,
    184   // SafeBrowsing lists are stored in pairs.  Keep ListType 5
    185   // available for a potential second list that we would store in the
    186   // csd-whitelist store file.
    187   DOWNLOADWHITELIST = 6,
    188   // See above comment. Leave 7 available.
    189   EXTENSIONBLACKLIST = 8,
    190   // See above comment. Leave 9 available.
    191   SIDEEFFECTFREEWHITELIST = 10,
    192   // See above comment. Leave 11 available.
    193   IPBLACKLIST = 12,
    194   // See above comment.  Leave 13 available.
    195 };
    196 
    197 // Maps a list name to ListType.
    198 ListType GetListId(const base::StringPiece& name);
    199 
    200 // Maps a ListId to list name. Return false if fails.
    201 bool GetListName(ListType list_id, std::string* list);
    202 
    203 // Canonicalizes url as per Google Safe Browsing Specification.
    204 // See section 6.1 in
    205 // http://code.google.com/p/google-safe-browsing/wiki/Protocolv2Spec.
    206 void CanonicalizeUrl(const GURL& url, std::string* canonicalized_hostname,
    207                      std::string* canonicalized_path,
    208                      std::string* canonicalized_query);
    209 
    210 // Given a URL, returns all the hosts we need to check.  They are returned
    211 // in order of size (i.e. b.c is first, then a.b.c).
    212 void GenerateHostsToCheck(const GURL& url, std::vector<std::string>* hosts);
    213 
    214 // Given a URL, returns all the paths we need to check.
    215 void GeneratePathsToCheck(const GURL& url, std::vector<std::string>* paths);
    216 
    217 // Given a URL, returns all the patterns we need to check.
    218 void GeneratePatternsToCheck(const GURL& url, std::vector<std::string>* urls);
    219 
    220 GURL GeneratePhishingReportUrl(const std::string& report_page,
    221                                const std::string& url_to_report,
    222                                bool is_client_side_detection);
    223 
    224 SBFullHash StringToSBFullHash(const std::string& hash_in);
    225 std::string SBFullHashToString(const SBFullHash& hash_out);
    226 
    227 }  // namespace safe_browsing_util
    228 
    229 #endif  // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_
    230