Home | History | Annotate | Download | only in safe_browsing
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 //
      5 // Utilities for the SafeBrowsing code.
      6 
      7 #ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_
      8 #define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_
      9 
     10 #include <cstring>
     11 #include <deque>
     12 #include <set>
     13 #include <string>
     14 #include <vector>
     15 
     16 #include "base/basictypes.h"
     17 #include "chrome/browser/safe_browsing/chunk_range.h"
     18 
     19 class GURL;
     20 
     21 class SBEntry;
     22 
     23 // A truncated hash's type.
     24 typedef int32 SBPrefix;
     25 
     26 // Container for holding a chunk URL and the list it belongs to.
     27 struct ChunkUrl {
     28   std::string url;
     29   std::string list_name;
     30 };
     31 
     32 // A full hash.
     33 union SBFullHash {
     34   char full_hash[32];
     35   SBPrefix prefix;
     36 };
     37 
     38 inline bool operator==(const SBFullHash& lhash, const SBFullHash& rhash) {
     39   return memcmp(lhash.full_hash, rhash.full_hash, sizeof(SBFullHash)) == 0;
     40 }
     41 
     42 inline bool operator<(const SBFullHash& lhash, const SBFullHash& rhash) {
     43   return memcmp(lhash.full_hash, rhash.full_hash, sizeof(SBFullHash)) < 0;
     44 }
     45 
     46 // Container for information about a specific host in an add/sub chunk.
     47 struct SBChunkHost {
     48   SBPrefix host;
     49   SBEntry* entry;
     50 };
     51 
     52 // Container for an add/sub chunk.
     53 struct SBChunk {
     54   SBChunk();
     55   ~SBChunk();
     56 
     57   int chunk_number;
     58   int list_id;
     59   bool is_add;
     60   std::deque<SBChunkHost> hosts;
     61 };
     62 
     63 // Container for a set of chunks.  Interim wrapper to replace use of
     64 // |std::deque<SBChunk>| with something having safer memory semantics.
     65 // management.
     66 // TODO(shess): |SBEntry| is currently a very roundabout way to hold
     67 // things pending storage.  It could be replaced with the structures
     68 // used in SafeBrowsingStore, then lots of bridging code could
     69 // dissappear.
     70 class SBChunkList {
     71  public:
     72   SBChunkList();
     73   ~SBChunkList();
     74 
     75   // Implement that subset of the |std::deque<>| interface which
     76   // callers expect.
     77   bool empty() const { return chunks_.empty(); }
     78   size_t size() { return chunks_.size(); }
     79 
     80   void push_back(const SBChunk& chunk) { chunks_.push_back(chunk); }
     81   SBChunk& back() { return chunks_.back(); }
     82   SBChunk& front() { return chunks_.front(); }
     83   const SBChunk& front() const { return chunks_.front(); }
     84 
     85   typedef std::vector<SBChunk>::const_iterator const_iterator;
     86   const_iterator begin() const { return chunks_.begin(); }
     87   const_iterator end() const { return chunks_.end(); }
     88 
     89   typedef std::vector<SBChunk>::iterator iterator;
     90   iterator begin() { return chunks_.begin(); }
     91   iterator end() { return chunks_.end(); }
     92 
     93   SBChunk& operator[](size_t n) { return chunks_[n]; }
     94   const SBChunk& operator[](size_t n) const { return chunks_[n]; }
     95 
     96   // Calls |SBEvent::Destroy()| before clearing |chunks_|.
     97   void clear();
     98 
     99  private:
    100   std::vector<SBChunk> chunks_;
    101 
    102   DISALLOW_COPY_AND_ASSIGN(SBChunkList);
    103 };
    104 
    105 // Used when we get a gethash response.
    106 struct SBFullHashResult {
    107   SBFullHash hash;
    108   std::string list_name;
    109   int add_chunk_id;
    110 };
    111 
    112 // Contains information about a list in the database.
    113 struct SBListChunkRanges {
    114   explicit SBListChunkRanges(const std::string& n);
    115 
    116   std::string name;  // The list name.
    117   std::string adds;  // The ranges for add chunks.
    118   std::string subs;  // The ranges for sub chunks.
    119 };
    120 
    121 // Container for deleting chunks from the database.
    122 struct SBChunkDelete {
    123   SBChunkDelete();
    124   ~SBChunkDelete();
    125 
    126   std::string list_name;
    127   bool is_sub_del;
    128   std::vector<ChunkRange> chunk_del;
    129 };
    130 
    131 // Different types of threats that SafeBrowsing protects against.
    132 enum SBThreatType {
    133   // No threat at all.
    134   SB_THREAT_TYPE_SAFE,
    135 
    136   // The URL is being used for phishing.
    137   SB_THREAT_TYPE_URL_PHISHING,
    138 
    139   // The URL hosts malware.
    140   SB_THREAT_TYPE_URL_MALWARE,
    141 
    142   // The download URL is malware.
    143   SB_THREAT_TYPE_BINARY_MALWARE_URL,
    144 
    145   // The hash of the download contents is malware.
    146   SB_THREAT_TYPE_BINARY_MALWARE_HASH,
    147 
    148   // Url detected by the client-side phishing model.  Note that unlike the
    149   // above values, this does not correspond to a downloaded list.
    150   SB_THREAT_TYPE_CLIENT_SIDE_PHISHING_URL,
    151 
    152   // The Chrome extension or app (given by its ID) is malware.
    153   SB_THREAT_TYPE_EXTENSION,
    154 
    155   // Url detected by the client-side malware IP list. This IP list is part
    156   // of the client side detection model.
    157   SB_THREAT_TYPE_CLIENT_SIDE_MALWARE_URL,
    158 };
    159 
    160 // SBEntry ---------------------------------------------------------------------
    161 
    162 // Holds information about the prefixes for a hostkey.  prefixes can either be
    163 // 4 bytes (truncated hash) or 32 bytes (full hash).
    164 // For adds:
    165 //   [list id ][chunk id][prefix count (0..n)][prefix1][prefix2]
    166 // For subs:
    167 //   [list id ][chunk id (only used if prefix count is 0][prefix count (0..n)]
    168 //       [add chunk][prefix][add chunk][prefix]
    169 class SBEntry {
    170  public:
    171   enum Type {
    172     ADD_PREFIX,     // 4 byte add entry.
    173     SUB_PREFIX,     // 4 byte sub entry.
    174     ADD_FULL_HASH,  // 32 byte add entry.
    175     SUB_FULL_HASH,  // 32 byte sub entry.
    176   };
    177 
    178   // Creates a SBEntry with the necessary size for the given number of prefixes.
    179   // Caller ownes the object and needs to free it by calling Destroy.
    180   static SBEntry* Create(Type type, int prefix_count);
    181 
    182   // Frees the entry's memory.
    183   void Destroy();
    184 
    185   void set_list_id(int list_id) { data_.list_id = list_id; }
    186   int list_id() const { return data_.list_id; }
    187   void set_chunk_id(int chunk_id) { data_.chunk_id = chunk_id; }
    188   int chunk_id() const { return data_.chunk_id; }
    189   int prefix_count() const { return data_.prefix_count; }
    190 
    191   // Returns true if this is a prefix as opposed to a full hash.
    192   bool IsPrefix() const {
    193     return type() == ADD_PREFIX || type() == SUB_PREFIX;
    194   }
    195 
    196   // Returns true if this is an add entry.
    197   bool IsAdd() const {
    198     return type() == ADD_PREFIX || type() == ADD_FULL_HASH;
    199   }
    200 
    201   // Returns true if this is a sub entry.
    202   bool IsSub() const {
    203     return type() == SUB_PREFIX || type() == SUB_FULL_HASH;
    204   }
    205 
    206   // Helper to return the size of the prefixes.
    207   int HashLen() const {
    208     return IsPrefix() ? sizeof(SBPrefix) : sizeof(SBFullHash);
    209   }
    210 
    211   // For add entries, returns the add chunk id.  For sub entries, returns the
    212   // add_chunk id for the prefix at the given index.
    213   int ChunkIdAtPrefix(int index) const;
    214 
    215   // Used for sub chunks to set the chunk id at a given index.
    216   void SetChunkIdAtPrefix(int index, int chunk_id);
    217 
    218   // Return the prefix/full hash at the given index.  Caller is expected to
    219   // call the right function based on the hash length.
    220   const SBPrefix& PrefixAt(int index) const;
    221   const SBFullHash& FullHashAt(int index) const;
    222 
    223   // Return the prefix/full hash at the given index.  Caller is expected to
    224   // call the right function based on the hash length.
    225   void SetPrefixAt(int index, const SBPrefix& prefix);
    226   void SetFullHashAt(int index, const SBFullHash& full_hash);
    227 
    228  private:
    229   // Container for a sub prefix.
    230   struct SBSubPrefix {
    231     int add_chunk;
    232     SBPrefix prefix;
    233   };
    234 
    235   // Container for a sub full hash.
    236   struct SBSubFullHash {
    237     int add_chunk;
    238     SBFullHash prefix;
    239   };
    240 
    241   // Keep the fixed data together in one struct so that we can get its size
    242   // easily.  If any of this is modified, the database will have to be cleared.
    243   struct Data {
    244     int list_id;
    245     // For adds, this is the add chunk number.
    246     // For subs: if prefix_count is 0 then this is the add chunk that this sub
    247     //     refers to.  Otherwise it's ignored, and the add_chunk in sub_prefixes
    248     //     or sub_full_hashes is used for each corresponding prefix.
    249     int chunk_id;
    250     Type type;
    251     int prefix_count;
    252   };
    253 
    254   SBEntry();
    255   ~SBEntry();
    256 
    257   // Helper to return the size of each prefix entry (i.e. for subs this
    258   // includes an add chunk id).
    259   static int PrefixSize(Type type);
    260 
    261   // Helper to return how much memory a given Entry would require.
    262   static int Size(Type type, int prefix_count);
    263 
    264   // Returns how many bytes this entry is.
    265   int Size() const;
    266 
    267   Type type() const { return data_.type; }
    268 
    269   void set_prefix_count(int count) { data_.prefix_count = count; }
    270   void set_type(Type type) { data_.type = type; }
    271 
    272   // The prefixes union must follow the fixed data so that they're contiguous
    273   // in memory.
    274   Data data_;
    275   union {
    276     SBPrefix add_prefixes_[1];
    277     SBSubPrefix sub_prefixes_[1];
    278     SBFullHash add_full_hashes_[1];
    279     SBSubFullHash sub_full_hashes_[1];
    280   };
    281 };
    282 
    283 
    284 // Utility functions -----------------------------------------------------------
    285 
    286 namespace safe_browsing_util {
    287 
    288 // SafeBrowsing list names.
    289 extern const char kMalwareList[];
    290 extern const char kPhishingList[];
    291 // Binary Download list names.
    292 extern const char kBinUrlList[];
    293 extern const char kBinHashList[];
    294 // SafeBrowsing client-side detection whitelist list name.
    295 extern const char kCsdWhiteList[];
    296 // SafeBrowsing download whitelist list name.
    297 extern const char kDownloadWhiteList[];
    298 // SafeBrowsing extension list name.
    299 extern const char kExtensionBlacklist[];
    300 // SafeBrowsing side-effect free whitelist name.
    301 extern const char kSideEffectFreeWhitelist[];
    302 // SafeBrowsing csd malware IP blacklist name.
    303 extern const char kIPBlacklist[];
    304 
    305 // This array must contain all Safe Browsing lists.
    306 extern const char* kAllLists[10];
    307 
    308 enum ListType {
    309   INVALID = -1,
    310   MALWARE = 0,
    311   PHISH = 1,
    312   BINURL = 2,
    313   BINHASH = 3,
    314   CSDWHITELIST = 4,
    315   // SafeBrowsing lists are stored in pairs.  Keep ListType 5
    316   // available for a potential second list that we would store in the
    317   // csd-whitelist store file.
    318   DOWNLOADWHITELIST = 6,
    319   // See above comment. Leave 7 available.
    320   EXTENSIONBLACKLIST = 8,
    321   // See above comment. Leave 9 available.
    322   SIDEEFFECTFREEWHITELIST = 10,
    323   // See above comment. Leave 11 available.
    324   IPBLACKLIST = 12,
    325   // See above comment.  Leave 13 available.
    326 };
    327 
    328 // Maps a list name to ListType.
    329 ListType GetListId(const std::string& name);
    330 
    331 // Maps a ListId to list name. Return false if fails.
    332 bool GetListName(ListType list_id, std::string* list);
    333 
    334 // Canonicalizes url as per Google Safe Browsing Specification.
    335 // See section 6.1 in
    336 // http://code.google.com/p/google-safe-browsing/wiki/Protocolv2Spec.
    337 void CanonicalizeUrl(const GURL& url, std::string* canonicalized_hostname,
    338                      std::string* canonicalized_path,
    339                      std::string* canonicalized_query);
    340 
    341 // Given a URL, returns all the hosts we need to check.  They are returned
    342 // in order of size (i.e. b.c is first, then a.b.c).
    343 void GenerateHostsToCheck(const GURL& url, std::vector<std::string>* hosts);
    344 
    345 // Given a URL, returns all the paths we need to check.
    346 void GeneratePathsToCheck(const GURL& url, std::vector<std::string>* paths);
    347 
    348 // Given a URL, returns all the patterns we need to check.
    349 void GeneratePatternsToCheck(const GURL& url, std::vector<std::string>* urls);
    350 
    351 int GetHashIndex(const SBFullHash& hash,
    352                  const std::vector<SBFullHashResult>& full_hashes);
    353 
    354 // Given a URL, compare all the possible host + path full hashes to the set of
    355 // provided full hashes.  Returns the index of the match if one is found, or -1
    356 // otherwise.
    357 int GetUrlHashIndex(const GURL& url,
    358                     const std::vector<SBFullHashResult>& full_hashes);
    359 
    360 bool IsPhishingList(const std::string& list_name);
    361 bool IsMalwareList(const std::string& list_name);
    362 bool IsBadbinurlList(const std::string& list_name);
    363 bool IsBadbinhashList(const std::string& list_name);
    364 bool IsExtensionList(const std::string& list_name);
    365 
    366 GURL GeneratePhishingReportUrl(const std::string& report_page,
    367                                const std::string& url_to_report,
    368                                bool is_client_side_detection);
    369 
    370 SBFullHash StringToSBFullHash(const std::string& hash_in);
    371 std::string SBFullHashToString(const SBFullHash& hash_out);
    372 
    373 }  // namespace safe_browsing_util
    374 
    375 #endif  // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_
    376