Home | History | Annotate | Download | only in safe_browsing
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 //
      5 // Utilities for the SafeBrowsing code.
      6 
      7 #ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_
      8 #define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_
      9 
     10 #include <cstring>
     11 #include <deque>
     12 #include <set>
     13 #include <string>
     14 #include <vector>
     15 
     16 #include "base/basictypes.h"
     17 #include "chrome/browser/safe_browsing/chunk_range.h"
     18 
     19 class GURL;
     20 
     21 class SBEntry;
     22 
     23 // A truncated hash's type.
     24 typedef int32 SBPrefix;
     25 
     26 // Container for holding a chunk URL and the list it belongs to.
     27 struct ChunkUrl {
     28   std::string url;
     29   std::string list_name;
     30 };
     31 
     32 // A full hash.
     33 union SBFullHash {
     34   char full_hash[32];
     35   SBPrefix prefix;
     36 };
     37 
     38 inline bool operator==(const SBFullHash& lhash, const SBFullHash& rhash) {
     39   return memcmp(lhash.full_hash, rhash.full_hash, sizeof(SBFullHash)) == 0;
     40 }
     41 
     42 inline bool operator<(const SBFullHash& lhash, const SBFullHash& rhash) {
     43   return memcmp(lhash.full_hash, rhash.full_hash, sizeof(SBFullHash)) < 0;
     44 }
     45 
     46 // Container for information about a specific host in an add/sub chunk.
     47 struct SBChunkHost {
     48   SBPrefix host;
     49   SBEntry* entry;
     50 };
     51 
     52 // Container for an add/sub chunk.
     53 struct SBChunk {
     54   SBChunk();
     55   ~SBChunk();
     56 
     57   int chunk_number;
     58   int list_id;
     59   bool is_add;
     60   std::deque<SBChunkHost> hosts;
     61 };
     62 
     63 // Container for a set of chunks.  Interim wrapper to replace use of
     64 // |std::deque<SBChunk>| with something having safer memory semantics.
     65 // management.
     66 // TODO(shess): |SBEntry| is currently a very roundabout way to hold
     67 // things pending storage.  It could be replaced with the structures
     68 // used in SafeBrowsingStore, then lots of bridging code could
     69 // dissappear.
     70 class SBChunkList {
     71  public:
     72   SBChunkList();
     73   ~SBChunkList();
     74 
     75   // Implement that subset of the |std::deque<>| interface which
     76   // callers expect.
     77   bool empty() const { return chunks_.empty(); }
     78   size_t size() { return chunks_.size(); }
     79 
     80   void push_back(const SBChunk& chunk) { chunks_.push_back(chunk); }
     81   SBChunk& back() { return chunks_.back(); }
     82   SBChunk& front() { return chunks_.front(); }
     83   const SBChunk& front() const { return chunks_.front(); }
     84 
     85   typedef std::vector<SBChunk>::const_iterator const_iterator;
     86   const_iterator begin() const { return chunks_.begin(); }
     87   const_iterator end() const { return chunks_.end(); }
     88 
     89   typedef std::vector<SBChunk>::iterator iterator;
     90   iterator begin() { return chunks_.begin(); }
     91   iterator end() { return chunks_.end(); }
     92 
     93   SBChunk& operator[](size_t n) { return chunks_[n]; }
     94   const SBChunk& operator[](size_t n) const { return chunks_[n]; }
     95 
     96   // Calls |SBEvent::Destroy()| before clearing |chunks_|.
     97   void clear();
     98 
     99  private:
    100   std::vector<SBChunk> chunks_;
    101 
    102   DISALLOW_COPY_AND_ASSIGN(SBChunkList);
    103 };
    104 
    105 // Used when we get a gethash response.
    106 struct SBFullHashResult {
    107   SBFullHash hash;
    108   std::string list_name;
    109   int add_chunk_id;
    110 };
    111 
    112 // Contains information about a list in the database.
    113 struct SBListChunkRanges {
    114   explicit SBListChunkRanges(const std::string& n);
    115 
    116   std::string name;  // The list name.
    117   std::string adds;  // The ranges for add chunks.
    118   std::string subs;  // The ranges for sub chunks.
    119 };
    120 
    121 // Container for deleting chunks from the database.
    122 struct SBChunkDelete {
    123   SBChunkDelete();
    124   ~SBChunkDelete();
    125 
    126   std::string list_name;
    127   bool is_sub_del;
    128   std::vector<ChunkRange> chunk_del;
    129 };
    130 
    131 // Different types of threats that SafeBrowsing protects against.
    132 enum SBThreatType {
    133   // No threat at all.
    134   SB_THREAT_TYPE_SAFE,
    135 
    136   // The URL is being used for phishing.
    137   SB_THREAT_TYPE_URL_PHISHING,
    138 
    139   // The URL hosts malware.
    140   SB_THREAT_TYPE_URL_MALWARE,
    141 
    142   // The download URL is malware.
    143   SB_THREAT_TYPE_BINARY_MALWARE_URL,
    144 
    145   // The hash of the download contents is malware.
    146   SB_THREAT_TYPE_BINARY_MALWARE_HASH,
    147 
    148   // Url detected by the client-side phishing model.  Note that unlike the
    149   // above values, this does not correspond to a downloaded list.
    150   SB_THREAT_TYPE_CLIENT_SIDE_PHISHING_URL,
    151 
    152   // The Chrome extension or app (given by its ID) is malware.
    153   SB_THREAT_TYPE_EXTENSION,
    154 };
    155 
    156 // SBEntry ---------------------------------------------------------------------
    157 
    158 // Holds information about the prefixes for a hostkey.  prefixes can either be
    159 // 4 bytes (truncated hash) or 32 bytes (full hash).
    160 // For adds:
    161 //   [list id ][chunk id][prefix count (0..n)][prefix1][prefix2]
    162 // For subs:
    163 //   [list id ][chunk id (only used if prefix count is 0][prefix count (0..n)]
    164 //       [add chunk][prefix][add chunk][prefix]
    165 class SBEntry {
    166  public:
    167   enum Type {
    168     ADD_PREFIX,     // 4 byte add entry.
    169     SUB_PREFIX,     // 4 byte sub entry.
    170     ADD_FULL_HASH,  // 32 byte add entry.
    171     SUB_FULL_HASH,  // 32 byte sub entry.
    172   };
    173 
    174   // Creates a SBEntry with the necessary size for the given number of prefixes.
    175   // Caller ownes the object and needs to free it by calling Destroy.
    176   static SBEntry* Create(Type type, int prefix_count);
    177 
    178   // Frees the entry's memory.
    179   void Destroy();
    180 
    181   void set_list_id(int list_id) { data_.list_id = list_id; }
    182   int list_id() const { return data_.list_id; }
    183   void set_chunk_id(int chunk_id) { data_.chunk_id = chunk_id; }
    184   int chunk_id() const { return data_.chunk_id; }
    185   int prefix_count() const { return data_.prefix_count; }
    186 
    187   // Returns true if this is a prefix as opposed to a full hash.
    188   bool IsPrefix() const {
    189     return type() == ADD_PREFIX || type() == SUB_PREFIX;
    190   }
    191 
    192   // Returns true if this is an add entry.
    193   bool IsAdd() const {
    194     return type() == ADD_PREFIX || type() == ADD_FULL_HASH;
    195   }
    196 
    197   // Returns true if this is a sub entry.
    198   bool IsSub() const {
    199     return type() == SUB_PREFIX || type() == SUB_FULL_HASH;
    200   }
    201 
    202   // Helper to return the size of the prefixes.
    203   int HashLen() const {
    204     return IsPrefix() ? sizeof(SBPrefix) : sizeof(SBFullHash);
    205   }
    206 
    207   // For add entries, returns the add chunk id.  For sub entries, returns the
    208   // add_chunk id for the prefix at the given index.
    209   int ChunkIdAtPrefix(int index) const;
    210 
    211   // Used for sub chunks to set the chunk id at a given index.
    212   void SetChunkIdAtPrefix(int index, int chunk_id);
    213 
    214   // Return the prefix/full hash at the given index.  Caller is expected to
    215   // call the right function based on the hash length.
    216   const SBPrefix& PrefixAt(int index) const;
    217   const SBFullHash& FullHashAt(int index) const;
    218 
    219   // Return the prefix/full hash at the given index.  Caller is expected to
    220   // call the right function based on the hash length.
    221   void SetPrefixAt(int index, const SBPrefix& prefix);
    222   void SetFullHashAt(int index, const SBFullHash& full_hash);
    223 
    224  private:
    225   // Container for a sub prefix.
    226   struct SBSubPrefix {
    227     int add_chunk;
    228     SBPrefix prefix;
    229   };
    230 
    231   // Container for a sub full hash.
    232   struct SBSubFullHash {
    233     int add_chunk;
    234     SBFullHash prefix;
    235   };
    236 
    237   // Keep the fixed data together in one struct so that we can get its size
    238   // easily.  If any of this is modified, the database will have to be cleared.
    239   struct Data {
    240     int list_id;
    241     // For adds, this is the add chunk number.
    242     // For subs: if prefix_count is 0 then this is the add chunk that this sub
    243     //     refers to.  Otherwise it's ignored, and the add_chunk in sub_prefixes
    244     //     or sub_full_hashes is used for each corresponding prefix.
    245     int chunk_id;
    246     Type type;
    247     int prefix_count;
    248   };
    249 
    250   SBEntry();
    251   ~SBEntry();
    252 
    253   // Helper to return the size of each prefix entry (i.e. for subs this
    254   // includes an add chunk id).
    255   static int PrefixSize(Type type);
    256 
    257   // Helper to return how much memory a given Entry would require.
    258   static int Size(Type type, int prefix_count);
    259 
    260   // Returns how many bytes this entry is.
    261   int Size() const;
    262 
    263   Type type() const { return data_.type; }
    264 
    265   void set_prefix_count(int count) { data_.prefix_count = count; }
    266   void set_type(Type type) { data_.type = type; }
    267 
    268   // The prefixes union must follow the fixed data so that they're contiguous
    269   // in memory.
    270   Data data_;
    271   union {
    272     SBPrefix add_prefixes_[1];
    273     SBSubPrefix sub_prefixes_[1];
    274     SBFullHash add_full_hashes_[1];
    275     SBSubFullHash sub_full_hashes_[1];
    276   };
    277 };
    278 
    279 
    280 // Utility functions -----------------------------------------------------------
    281 
    282 namespace safe_browsing_util {
    283 
    284 // SafeBrowsing list names.
    285 extern const char kMalwareList[];
    286 extern const char kPhishingList[];
    287 // Binary Download list names.
    288 extern const char kBinUrlList[];
    289 extern const char kBinHashList[];
    290 // SafeBrowsing client-side detection whitelist list name.
    291 extern const char kCsdWhiteList[];
    292 // SafeBrowsing download whitelist list name.
    293 extern const char kDownloadWhiteList[];
    294 // SafeBrowsing extension list name.
    295 extern const char kExtensionBlacklist[];
    296 // SafeBrowsing side-effect free whitelist name.
    297 extern const char kSideEffectFreeWhitelist[];
    298 
    299 enum ListType {
    300   INVALID = -1,
    301   MALWARE = 0,
    302   PHISH = 1,
    303   BINURL = 2,
    304   BINHASH = 3,
    305   CSDWHITELIST = 4,
    306   // SafeBrowsing lists are stored in pairs.  Keep ListType 5
    307   // available for a potential second list that we would store in the
    308   // csd-whitelist store file.
    309   DOWNLOADWHITELIST = 6,
    310   // See above comment. Leave 7 available.
    311   EXTENSIONBLACKLIST = 8,
    312   // See above comment. Leave 9 available.
    313   SIDEEFFECTFREEWHITELIST = 10,
    314   // See above comment. Leave 11 available.
    315 };
    316 
    317 // Maps a list name to ListType.
    318 ListType GetListId(const std::string& name);
    319 
    320 // Maps a ListId to list name. Return false if fails.
    321 bool GetListName(ListType list_id, std::string* list);
    322 
    323 // Canonicalizes url as per Google Safe Browsing Specification.
    324 // See section 6.1 in
    325 // http://code.google.com/p/google-safe-browsing/wiki/Protocolv2Spec.
    326 void CanonicalizeUrl(const GURL& url, std::string* canonicalized_hostname,
    327                      std::string* canonicalized_path,
    328                      std::string* canonicalized_query);
    329 
    330 // Given a URL, returns all the hosts we need to check.  They are returned
    331 // in order of size (i.e. b.c is first, then a.b.c).
    332 void GenerateHostsToCheck(const GURL& url, std::vector<std::string>* hosts);
    333 
    334 // Given a URL, returns all the paths we need to check.
    335 void GeneratePathsToCheck(const GURL& url, std::vector<std::string>* paths);
    336 
    337 // Given a URL, returns all the patterns we need to check.
    338 void GeneratePatternsToCheck(const GURL& url, std::vector<std::string>* urls);
    339 
    340 int GetHashIndex(const SBFullHash& hash,
    341                  const std::vector<SBFullHashResult>& full_hashes);
    342 
    343 // Given a URL, compare all the possible host + path full hashes to the set of
    344 // provided full hashes.  Returns the index of the match if one is found, or -1
    345 // otherwise.
    346 int GetUrlHashIndex(const GURL& url,
    347                     const std::vector<SBFullHashResult>& full_hashes);
    348 
    349 bool IsPhishingList(const std::string& list_name);
    350 bool IsMalwareList(const std::string& list_name);
    351 bool IsBadbinurlList(const std::string& list_name);
    352 bool IsBadbinhashList(const std::string& list_name);
    353 bool IsExtensionList(const std::string& list_name);
    354 
    355 GURL GeneratePhishingReportUrl(const std::string& report_page,
    356                                const std::string& url_to_report,
    357                                bool is_client_side_detection);
    358 
    359 SBFullHash StringToSBFullHash(const std::string& hash_in);
    360 std::string SBFullHashToString(const SBFullHash& hash_out);
    361 
    362 }  // namespace safe_browsing_util
    363 
    364 #endif  // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_
    365