1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 // 5 // Helper class which handles communication with the SafeBrowsing backends for 6 // client-side phishing detection. This class is used to fetch the client-side 7 // model and send it to all renderers. This class is also used to send a ping 8 // back to Google to verify if a particular site is really phishing or not. 9 // 10 // This class is not thread-safe and expects all calls to be made on the UI 11 // thread. We also expect that the calling thread runs a message loop. 12 13 #ifndef CHROME_BROWSER_SAFE_BROWSING_CLIENT_SIDE_DETECTION_SERVICE_H_ 14 #define CHROME_BROWSER_SAFE_BROWSING_CLIENT_SIDE_DETECTION_SERVICE_H_ 15 16 #include <map> 17 #include <queue> 18 #include <set> 19 #include <string> 20 #include <utility> 21 #include <vector> 22 23 #include "base/basictypes.h" 24 #include "base/callback_forward.h" 25 #include "base/gtest_prod_util.h" 26 #include "base/memory/linked_ptr.h" 27 #include "base/memory/ref_counted.h" 28 #include "base/memory/scoped_ptr.h" 29 #include "base/memory/weak_ptr.h" 30 #include "base/time/time.h" 31 #include "content/public/browser/notification_observer.h" 32 #include "content/public/browser/notification_registrar.h" 33 #include "net/base/net_util.h" 34 #include "net/url_request/url_fetcher_delegate.h" 35 #include "url/gurl.h" 36 37 class SafeBrowsingService; 38 39 namespace base { 40 class TimeDelta; 41 } 42 43 namespace content { 44 class RenderProcessHost; 45 } 46 47 namespace net { 48 class URLFetcher; 49 class URLRequestContextGetter; 50 class URLRequestStatus; 51 typedef std::vector<std::string> ResponseCookies; 52 } // namespace net 53 54 namespace safe_browsing { 55 class ClientMalwareRequest; 56 class ClientPhishingRequest; 57 class ClientPhishingResponse; 58 class ClientSideModel; 59 60 class ClientSideDetectionService : public net::URLFetcherDelegate, 61 public content::NotificationObserver { 62 public: 63 // void(GURL phishing_url, bool is_phishing). 64 typedef base::Callback<void(GURL, bool)> ClientReportPhishingRequestCallback; 65 typedef base::Callback<void(GURL, bool)> ClientReportMalwareRequestCallback; 66 67 virtual ~ClientSideDetectionService(); 68 69 // Creates a client-side detection service. The service is initially 70 // disabled, use SetEnabledAndRefreshState() to start it. The caller takes 71 // ownership of the object. This function may return NULL. 72 static ClientSideDetectionService* Create( 73 net::URLRequestContextGetter* request_context_getter); 74 75 // Enables or disables the service, and refreshes the state of all renderers. 76 // This is usually called by the SafeBrowsingService, which tracks whether 77 // any profile uses these services at all. Disabling cancels any pending 78 // requests; existing ClientSideDetectionHosts will have their callbacks 79 // called with "false" verdicts. Enabling starts downloading the model after 80 // a delay. In all cases, each render process is updated to match the state 81 // of the SafeBrowsing preference for that profile. 82 void SetEnabledAndRefreshState(bool enabled); 83 84 bool enabled() const { 85 return enabled_; 86 } 87 88 // From the net::URLFetcherDelegate interface. 89 virtual void OnURLFetchComplete(const net::URLFetcher* source) OVERRIDE; 90 91 // content::NotificationObserver overrides: 92 virtual void Observe(int type, 93 const content::NotificationSource& source, 94 const content::NotificationDetails& details) OVERRIDE; 95 96 // Sends a request to the SafeBrowsing servers with the ClientPhishingRequest. 97 // The URL scheme of the |url()| in the request should be HTTP. This method 98 // takes ownership of the |verdict| as well as the |callback| and calls the 99 // the callback once the result has come back from the server or if an error 100 // occurs during the fetch. If the service is disabled or an error occurs 101 // the phishing verdict will always be false. The callback is always called 102 // after SendClientReportPhishingRequest() returns and on the same thread as 103 // SendClientReportPhishingRequest() was called. You may set |callback| to 104 // NULL if you don't care about the server verdict. 105 virtual void SendClientReportPhishingRequest( 106 ClientPhishingRequest* verdict, 107 const ClientReportPhishingRequestCallback& callback); 108 109 // Similar to above one, instead send ClientMalwareRequest 110 virtual void SendClientReportMalwareRequest( 111 ClientMalwareRequest* verdict, 112 const ClientReportMalwareRequestCallback& callback); 113 114 // Returns true if the given IP address string falls within a private 115 // (unroutable) network block. Pages which are hosted on these IP addresses 116 // are exempt from client-side phishing detection. This is called by the 117 // ClientSideDetectionHost prior to sending the renderer a 118 // SafeBrowsingMsg_StartPhishingDetection IPC. 119 // 120 // ip_address should be a dotted IPv4 address, or an unbracketed IPv6 121 // address. 122 virtual bool IsPrivateIPAddress(const std::string& ip_address) const; 123 124 // Returns true if the given IP address is on the list of known bad IPs. 125 // ip_address should be a dotted IPv4 address, or an unbracketed IPv6 126 // address. 127 virtual bool IsBadIpAddress(const std::string& ip_address) const; 128 129 // Returns true and sets is_phishing if url is in the cache and valid. 130 virtual bool GetValidCachedResult(const GURL& url, bool* is_phishing); 131 132 // Returns true if the url is in the cache. 133 virtual bool IsInCache(const GURL& url); 134 135 // Returns true if we have sent more than kMaxReportsPerInterval phishing 136 // reports in the last kReportsInterval. 137 virtual bool OverPhishingReportLimit(); 138 139 // Returns true if we have sent more than kMaxReportsPerInterval malware 140 // reports in the last kReportsInterval. 141 virtual bool OverMalwareReportLimit(); 142 143 protected: 144 // Use Create() method to create an instance of this object. 145 explicit ClientSideDetectionService( 146 net::URLRequestContextGetter* request_context_getter); 147 148 // Enum used to keep stats about why we fail to get the client model. 149 enum ClientModelStatus { 150 MODEL_SUCCESS, 151 MODEL_NOT_CHANGED, 152 MODEL_FETCH_FAILED, 153 MODEL_EMPTY, 154 MODEL_TOO_LARGE, 155 MODEL_PARSE_ERROR, 156 MODEL_MISSING_FIELDS, 157 MODEL_INVALID_VERSION_NUMBER, 158 MODEL_BAD_HASH_IDS, 159 MODEL_STATUS_MAX // Always add new values before this one. 160 }; 161 162 // Starts fetching the model from the network or the cache. This method 163 // is called periodically to check whether a new client model is available 164 // for download. 165 void StartFetchModel(); 166 167 // Schedules the next fetch of the model. 168 virtual void ScheduleFetchModel(int64 delay_ms); // Virtual for testing. 169 170 // This method is called when we're done fetching the model either because 171 // we hit an error somewhere or because we're actually done fetch and 172 // validating the model. 173 virtual void EndFetchModel(ClientModelStatus status); // Virtual for testing. 174 175 private: 176 friend class ClientSideDetectionServiceTest; 177 FRIEND_TEST_ALL_PREFIXES(ClientSideDetectionServiceTest, FetchModelTest); 178 FRIEND_TEST_ALL_PREFIXES(ClientSideDetectionServiceTest, SetBadSubnets); 179 FRIEND_TEST_ALL_PREFIXES(ClientSideDetectionServiceTest, 180 SetEnabledAndRefreshState); 181 FRIEND_TEST_ALL_PREFIXES(ClientSideDetectionServiceTest, IsBadIpAddress); 182 FRIEND_TEST_ALL_PREFIXES(ClientSideDetectionServiceTest, 183 ModelHasValidHashIds); 184 185 // CacheState holds all information necessary to respond to a caller without 186 // actually making a HTTP request. 187 struct CacheState { 188 bool is_phishing; 189 base::Time timestamp; 190 191 CacheState(bool phish, base::Time time); 192 }; 193 typedef std::map<GURL, linked_ptr<CacheState> > PhishingCache; 194 195 // A tuple of (IP address block, prefix size) representing a private 196 // IP address range. 197 typedef std::pair<net::IPAddressNumber, size_t> AddressRange; 198 199 // Maps a IPv6 subnet mask to a set of hashed IPv6 subnets. The IPv6 200 // subnets are in network order and hashed with sha256. 201 typedef std::map<std::string /* subnet mask */, 202 std::set<std::string /* hashed subnet */> > BadSubnetMap; 203 204 static const char kClientReportMalwareUrl[]; 205 static const char kClientReportPhishingUrl[]; 206 static const char kClientModelUrl[]; 207 static const size_t kMaxModelSizeBytes; 208 static const int kMaxReportsPerInterval; 209 static const int kClientModelFetchIntervalMs; 210 static const int kInitialClientModelFetchDelayMs; 211 static const int kReportsIntervalDays; 212 static const int kNegativeCacheIntervalDays; 213 static const int kPositiveCacheIntervalMinutes; 214 215 // Starts sending the request to the client-side detection frontends. 216 // This method takes ownership of both pointers. 217 void StartClientReportPhishingRequest( 218 ClientPhishingRequest* verdict, 219 const ClientReportPhishingRequestCallback& callback); 220 221 void StartClientReportMalwareRequest( 222 ClientMalwareRequest* verdict, 223 const ClientReportMalwareRequestCallback& callback); 224 225 // Called by OnURLFetchComplete to handle the response from fetching the 226 // model. 227 void HandleModelResponse(const net::URLFetcher* source, 228 const GURL& url, 229 const net::URLRequestStatus& status, 230 int response_code, 231 const net::ResponseCookies& cookies, 232 const std::string& data); 233 234 // Called by OnURLFetchComplete to handle the server response from 235 // sending the client-side phishing request. 236 void HandlePhishingVerdict(const net::URLFetcher* source, 237 const GURL& url, 238 const net::URLRequestStatus& status, 239 int response_code, 240 const net::ResponseCookies& cookies, 241 const std::string& data); 242 243 // Called by OnURLFetchComplete to handle the server response from 244 // sending the client-side malware request. 245 void HandleMalwareVerdict(const net::URLFetcher* source, 246 const GURL& url, 247 const net::URLRequestStatus& status, 248 int response_code, 249 const net::ResponseCookies& cookies, 250 const std::string& data); 251 252 // Invalidate cache results which are no longer useful. 253 void UpdateCache(); 254 255 // Get the number of malware reports that we have sent over kReportsInterval. 256 int GetMalwareNumReports(); 257 258 // Get the number of phishing reports that we have sent over kReportsInterval. 259 int GetPhishingNumReports(); 260 261 // Get the number of reports that we have sent over kReportsInterval, and 262 // trims off the old elements. 263 int GetNumReports(std::queue<base::Time>* report_times); 264 265 // Initializes the |private_networks_| vector with the network blocks 266 // that we consider non-public IP addresses. Returns true on success. 267 bool InitializePrivateNetworks(); 268 269 // Send the model to the given renderer. 270 void SendModelToProcess(content::RenderProcessHost* process); 271 272 // Same as above but sends the model to all rendereres. 273 void SendModelToRenderers(); 274 275 // Reads the bad subnets from the client model and inserts them into 276 // |bad_subnets| for faster lookups. This method is static to simplify 277 // testing. 278 static void SetBadSubnets(const ClientSideModel& model, 279 BadSubnetMap* bad_subnets); 280 281 282 // Returns true iff all the hash id's in the client-side model point to 283 // valid hashes in the model. 284 static bool ModelHasValidHashIds(const ClientSideModel& model); 285 286 // Returns the URL that will be used for phishing requests. 287 static std::string GetClientReportUrl(const std::string& report_url); 288 289 // Whether the service is running or not. When the service is not running, 290 // it won't download the model nor report detected phishing URLs. 291 bool enabled_; 292 293 std::string model_str_; 294 scoped_ptr<ClientSideModel> model_; 295 scoped_ptr<base::TimeDelta> model_max_age_; 296 scoped_ptr<net::URLFetcher> model_fetcher_; 297 298 // Map of client report phishing request to the corresponding callback that 299 // has to be invoked when the request is done. 300 struct ClientReportInfo; 301 std::map<const net::URLFetcher*, ClientReportInfo*> 302 client_phishing_reports_; 303 std::map<const net::URLFetcher*, ClientReportInfo*> 304 client_malware_reports_; 305 306 // Cache of completed requests. Used to satisfy requests for the same urls 307 // as long as the next request falls within our caching window (which is 308 // determined by kNegativeCacheInterval and kPositiveCacheInterval). The 309 // size of this cache is limited by kMaxReportsPerDay * 310 // ceil(InDays(max(kNegativeCacheInterval, kPositiveCacheInterval))). 311 // TODO(gcasto): Serialize this so that it doesn't reset on browser restart. 312 PhishingCache cache_; 313 314 // Timestamp of when we sent a phishing request. Used to limit the number 315 // of phishing requests that we send in a day. 316 // TODO(gcasto): Serialize this so that it doesn't reset on browser restart. 317 std::queue<base::Time> phishing_report_times_; 318 319 // Timestamp of when we sent a malware request. Used to limit the number 320 // of malware requests that we send in a day. 321 std::queue<base::Time> malware_report_times_; 322 323 // Used to asynchronously call the callbacks for 324 // SendClientReportPhishingRequest. 325 base::WeakPtrFactory<ClientSideDetectionService> weak_factory_; 326 327 // The context we use to issue network requests. 328 scoped_refptr<net::URLRequestContextGetter> request_context_getter_; 329 330 // The network blocks that we consider private IP address ranges. 331 std::vector<AddressRange> private_networks_; 332 333 // Map of bad subnets which are copied from the client model and put into 334 // this map to speed up lookups. 335 BadSubnetMap bad_subnets_; 336 337 content::NotificationRegistrar registrar_; 338 339 DISALLOW_COPY_AND_ASSIGN(ClientSideDetectionService); 340 }; 341 } // namepsace safe_browsing 342 343 #endif // CHROME_BROWSER_SAFE_BROWSING_CLIENT_SIDE_DETECTION_SERVICE_H_ 344