Home | History | Annotate | Download | only in safe_browsing
      1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 //
      5 // Utilities for the SafeBrowsing code.
      6 
      7 #ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_
      8 #define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_
      9 #pragma once
     10 
     11 #include <cstring>
     12 #include <deque>
     13 #include <string>
     14 #include <vector>
     15 
     16 #include "base/basictypes.h"
     17 #include "chrome/browser/safe_browsing/chunk_range.h"
     18 
     19 class GURL;
     20 
     21 class SBEntry;
     22 
     23 // A truncated hash's type.
     24 typedef int32 SBPrefix;
     25 
     26 // Container for holding a chunk URL and the MAC of the contents of the URL.
     27 struct ChunkUrl {
     28   std::string url;
     29   std::string mac;
     30   std::string list_name;
     31 };
     32 
     33 // A full hash.
     34 union SBFullHash {
     35   char full_hash[32];
     36   SBPrefix prefix;
     37 };
     38 
     39 inline bool operator==(const SBFullHash& lhash, const SBFullHash& rhash) {
     40   return memcmp(lhash.full_hash, rhash.full_hash, sizeof(SBFullHash)) == 0;
     41 }
     42 
     43 inline bool operator<(const SBFullHash& lhash, const SBFullHash& rhash) {
     44   return memcmp(lhash.full_hash, rhash.full_hash, sizeof(SBFullHash)) < 0;
     45 }
     46 
     47 // Container for information about a specific host in an add/sub chunk.
     48 struct SBChunkHost {
     49   SBPrefix host;
     50   SBEntry* entry;
     51 };
     52 
     53 // Container for an add/sub chunk.
     54 struct SBChunk {
     55   SBChunk();
     56   ~SBChunk();
     57 
     58   int chunk_number;
     59   int list_id;
     60   bool is_add;
     61   std::deque<SBChunkHost> hosts;
     62 };
     63 
     64 // Container for a set of chunks.  Interim wrapper to replace use of
     65 // |std::deque<SBChunk>| with something having safer memory semantics.
     66 // management.
     67 // TODO(shess): |SBEntry| is currently a very roundabout way to hold
     68 // things pending storage.  It could be replaced with the structures
     69 // used in SafeBrowsingStore, then lots of bridging code could
     70 // dissappear.
     71 class SBChunkList {
     72  public:
     73   SBChunkList();
     74   ~SBChunkList();
     75 
     76   // Implement that subset of the |std::deque<>| interface which
     77   // callers expect.
     78   bool empty() const { return chunks_.empty(); }
     79   size_t size() { return chunks_.size(); }
     80 
     81   void push_back(const SBChunk& chunk) { chunks_.push_back(chunk); }
     82   SBChunk& back() { return chunks_.back(); }
     83   SBChunk& front() { return chunks_.front(); }
     84   const SBChunk& front() const { return chunks_.front(); }
     85 
     86   typedef std::vector<SBChunk>::const_iterator const_iterator;
     87   const_iterator begin() const { return chunks_.begin(); }
     88   const_iterator end() const { return chunks_.end(); }
     89 
     90   typedef std::vector<SBChunk>::iterator iterator;
     91   iterator begin() { return chunks_.begin(); }
     92   iterator end() { return chunks_.end(); }
     93 
     94   SBChunk& operator[](size_t n) { return chunks_[n]; }
     95   const SBChunk& operator[](size_t n) const { return chunks_[n]; }
     96 
     97   // Calls |SBEvent::Destroy()| before clearing |chunks_|.
     98   void clear();
     99 
    100  private:
    101   std::vector<SBChunk> chunks_;
    102 
    103   DISALLOW_COPY_AND_ASSIGN(SBChunkList);
    104 };
    105 
    106 // Used when we get a gethash response.
    107 struct SBFullHashResult {
    108   SBFullHash hash;
    109   std::string list_name;
    110   int add_chunk_id;
    111 };
    112 
    113 // Contains information about a list in the database.
    114 struct SBListChunkRanges {
    115   explicit SBListChunkRanges(const std::string& n);
    116 
    117   std::string name;  // The list name.
    118   std::string adds;  // The ranges for add chunks.
    119   std::string subs;  // The ranges for sub chunks.
    120 };
    121 
    122 // Container for deleting chunks from the database.
    123 struct SBChunkDelete {
    124   SBChunkDelete();
    125   ~SBChunkDelete();
    126 
    127   std::string list_name;
    128   bool is_sub_del;
    129   std::vector<ChunkRange> chunk_del;
    130 };
    131 
    132 
    133 // SBEntry ---------------------------------------------------------------------
    134 
    135 // Holds information about the prefixes for a hostkey.  prefixes can either be
    136 // 4 bytes (truncated hash) or 32 bytes (full hash).
    137 // For adds:
    138 //   [list id ][chunk id][prefix count (0..n)][prefix1][prefix2]
    139 // For subs:
    140 //   [list id ][chunk id (only used if prefix count is 0][prefix count (0..n)]
    141 //       [add chunk][prefix][add chunk][prefix]
    142 class SBEntry {
    143  public:
    144   enum Type {
    145     ADD_PREFIX,     // 4 byte add entry.
    146     SUB_PREFIX,     // 4 byte sub entry.
    147     ADD_FULL_HASH,  // 32 byte add entry.
    148     SUB_FULL_HASH,  // 32 byte sub entry.
    149   };
    150 
    151   // Creates a SBEntry with the necessary size for the given number of prefixes.
    152   // Caller ownes the object and needs to free it by calling Destroy.
    153   static SBEntry* Create(Type type, int prefix_count);
    154 
    155   // Frees the entry's memory.
    156   void Destroy();
    157 
    158   void set_list_id(int list_id) { data_.list_id = list_id; }
    159   int list_id() const { return data_.list_id; }
    160   void set_chunk_id(int chunk_id) { data_.chunk_id = chunk_id; }
    161   int chunk_id() const { return data_.chunk_id; }
    162   int prefix_count() const { return data_.prefix_count; }
    163 
    164   // Returns true if this is a prefix as opposed to a full hash.
    165   bool IsPrefix() const {
    166     return type() == ADD_PREFIX || type() == SUB_PREFIX;
    167   }
    168 
    169   // Returns true if this is an add entry.
    170   bool IsAdd() const {
    171     return type() == ADD_PREFIX || type() == ADD_FULL_HASH;
    172   }
    173 
    174   // Returns true if this is a sub entry.
    175   bool IsSub() const {
    176     return type() == SUB_PREFIX || type() == SUB_FULL_HASH;
    177   }
    178 
    179   // Helper to return the size of the prefixes.
    180   int HashLen() const {
    181     return IsPrefix() ? sizeof(SBPrefix) : sizeof(SBFullHash);
    182   }
    183 
    184   // For add entries, returns the add chunk id.  For sub entries, returns the
    185   // add_chunk id for the prefix at the given index.
    186   int ChunkIdAtPrefix(int index) const;
    187 
    188   // Used for sub chunks to set the chunk id at a given index.
    189   void SetChunkIdAtPrefix(int index, int chunk_id);
    190 
    191   // Return the prefix/full hash at the given index.  Caller is expected to
    192   // call the right function based on the hash length.
    193   const SBPrefix& PrefixAt(int index) const;
    194   const SBFullHash& FullHashAt(int index) const;
    195 
    196   // Return the prefix/full hash at the given index.  Caller is expected to
    197   // call the right function based on the hash length.
    198   void SetPrefixAt(int index, const SBPrefix& prefix);
    199   void SetFullHashAt(int index, const SBFullHash& full_hash);
    200 
    201  private:
    202   // Container for a sub prefix.
    203   struct SBSubPrefix {
    204     int add_chunk;
    205     SBPrefix prefix;
    206   };
    207 
    208   // Container for a sub full hash.
    209   struct SBSubFullHash {
    210     int add_chunk;
    211     SBFullHash prefix;
    212   };
    213 
    214   // Keep the fixed data together in one struct so that we can get its size
    215   // easily.  If any of this is modified, the database will have to be cleared.
    216   struct Data {
    217     int list_id;
    218     // For adds, this is the add chunk number.
    219     // For subs: if prefix_count is 0 then this is the add chunk that this sub
    220     //     refers to.  Otherwise it's ignored, and the add_chunk in sub_prefixes
    221     //     or sub_full_hashes is used for each corresponding prefix.
    222     int chunk_id;
    223     Type type;
    224     int prefix_count;
    225   };
    226 
    227   SBEntry();
    228   ~SBEntry();
    229 
    230   // Helper to return the size of each prefix entry (i.e. for subs this
    231   // includes an add chunk id).
    232   static int PrefixSize(Type type);
    233 
    234   // Helper to return how much memory a given Entry would require.
    235   static int Size(Type type, int prefix_count);
    236 
    237   // Returns how many bytes this entry is.
    238   int Size() const;
    239 
    240   Type type() const { return data_.type; }
    241 
    242   void set_prefix_count(int count) { data_.prefix_count = count; }
    243   void set_type(Type type) { data_.type = type; }
    244 
    245   // The prefixes union must follow the fixed data so that they're contiguous
    246   // in memory.
    247   Data data_;
    248   union {
    249     SBPrefix add_prefixes_[1];
    250     SBSubPrefix sub_prefixes_[1];
    251     SBFullHash add_full_hashes_[1];
    252     SBSubFullHash sub_full_hashes_[1];
    253   };
    254 };
    255 
    256 
    257 // Utility functions -----------------------------------------------------------
    258 
    259 namespace safe_browsing_util {
    260 
    261 // SafeBrowsing list names.
    262 extern const char kMalwareList[];
    263 extern const char kPhishingList[];
    264 // Binary Download list names.
    265 extern const char kBinUrlList[];
    266 extern const char kBinHashList[];
    267 // SafeBrowsing client-side detection whitelist list name.
    268 extern const char kCsdWhiteList[];
    269 
    270 enum ListType {
    271   INVALID = -1,
    272   MALWARE = 0,
    273   PHISH = 1,
    274   BINURL = 2,
    275   BINHASH = 3,
    276   CSDWHITELIST = 4,
    277 };
    278 
    279 // Maps a list name to ListType.
    280 int GetListId(const std::string& name);
    281 // Maps a ListId to list name. Return false if fails.
    282 bool GetListName(int list_id, std::string* list);
    283 
    284 
    285 // Canonicalizes url as per Google Safe Browsing Specification.
    286 // See section 6.1 in
    287 // http://code.google.com/p/google-safe-browsing/wiki/Protocolv2Spec.
    288 void CanonicalizeUrl(const GURL& url, std::string* canonicalized_hostname,
    289                      std::string* canonicalized_path,
    290                      std::string* canonicalized_query);
    291 
    292 // Given a URL, returns all the hosts we need to check.  They are returned
    293 // in order of size (i.e. b.c is first, then a.b.c).
    294 void GenerateHostsToCheck(const GURL& url, std::vector<std::string>* hosts);
    295 
    296 // Given a URL, returns all the paths we need to check.
    297 void GeneratePathsToCheck(const GURL& url, std::vector<std::string>* paths);
    298 
    299 int GetHashIndex(const SBFullHash& hash,
    300                  const std::vector<SBFullHashResult>& full_hashes);
    301 
    302 // Given a URL, compare all the possible host + path full hashes to the set of
    303 // provided full hashes.  Returns the index of the match if one is found, or -1
    304 // otherwise.
    305 int GetUrlHashIndex(const GURL& url,
    306                     const std::vector<SBFullHashResult>& full_hashes);
    307 
    308 bool IsPhishingList(const std::string& list_name);
    309 bool IsMalwareList(const std::string& list_name);
    310 bool IsBadbinurlList(const std::string& list_name);
    311 bool IsBadbinhashList(const std::string& list_name);
    312 
    313 // Returns 'true' if 'mac' can be verified using 'key' and 'data'.
    314 bool VerifyMAC(const std::string& key,
    315                const std::string& mac,
    316                const char* data,
    317                int data_length);
    318 
    319 GURL GeneratePhishingReportUrl(const std::string& report_page,
    320                                const std::string& url_to_report);
    321 
    322 void StringToSBFullHash(const std::string& hash_in, SBFullHash* hash_out);
    323 std::string SBFullHashToString(const SBFullHash& hash_out);
    324 }  // namespace safe_browsing_util
    325 
    326 #endif  // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_
    327