1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 // 5 // Helper class which handles communication with the SafeBrowsing backends for 6 // client-side phishing detection. This class is used to fetch the client-side 7 // model and send it to all renderers. This class is also used to send a ping 8 // back to Google to verify if a particular site is really phishing or not. 9 // 10 // This class is not thread-safe and expects all calls to be made on the UI 11 // thread. We also expect that the calling thread runs a message loop. 12 13 #ifndef CHROME_BROWSER_SAFE_BROWSING_CLIENT_SIDE_DETECTION_SERVICE_H_ 14 #define CHROME_BROWSER_SAFE_BROWSING_CLIENT_SIDE_DETECTION_SERVICE_H_ 15 16 #include <map> 17 #include <queue> 18 #include <set> 19 #include <string> 20 #include <utility> 21 #include <vector> 22 23 #include "base/basictypes.h" 24 #include "base/callback_forward.h" 25 #include "base/gtest_prod_util.h" 26 #include "base/memory/linked_ptr.h" 27 #include "base/memory/ref_counted.h" 28 #include "base/memory/scoped_ptr.h" 29 #include "base/memory/weak_ptr.h" 30 #include "base/time/time.h" 31 #include "content/public/browser/notification_observer.h" 32 #include "content/public/browser/notification_registrar.h" 33 #include "net/base/net_util.h" 34 #include "net/url_request/url_fetcher_delegate.h" 35 #include "url/gurl.h" 36 37 class SafeBrowsingService; 38 39 namespace base { 40 class TimeDelta; 41 } 42 43 namespace content { 44 class RenderProcessHost; 45 } 46 47 namespace net { 48 class URLFetcher; 49 class URLRequestContextGetter; 50 class URLRequestStatus; 51 typedef std::vector<std::string> ResponseCookies; 52 } // namespace net 53 54 namespace safe_browsing { 55 class ClientMalwareRequest; 56 class ClientPhishingRequest; 57 class ClientPhishingResponse; 58 class ClientSideModel; 59 60 class ClientSideDetectionService : public net::URLFetcherDelegate, 61 public content::NotificationObserver { 62 public: 63 // void(GURL phishing_url, bool is_phishing). 64 typedef base::Callback<void(GURL, bool)> ClientReportPhishingRequestCallback; 65 // void(GURL original_url, GURL malware_url, bool is_malware). 66 typedef base::Callback<void(GURL, GURL, bool)> 67 ClientReportMalwareRequestCallback; 68 69 virtual ~ClientSideDetectionService(); 70 71 // Creates a client-side detection service. The service is initially 72 // disabled, use SetEnabledAndRefreshState() to start it. The caller takes 73 // ownership of the object. This function may return NULL. 74 static ClientSideDetectionService* Create( 75 net::URLRequestContextGetter* request_context_getter); 76 77 // Enables or disables the service, and refreshes the state of all renderers. 78 // This is usually called by the SafeBrowsingService, which tracks whether 79 // any profile uses these services at all. Disabling cancels any pending 80 // requests; existing ClientSideDetectionHosts will have their callbacks 81 // called with "false" verdicts. Enabling starts downloading the model after 82 // a delay. In all cases, each render process is updated to match the state 83 // of the SafeBrowsing preference for that profile. 84 void SetEnabledAndRefreshState(bool enabled); 85 86 bool enabled() const { 87 return enabled_; 88 } 89 90 // From the net::URLFetcherDelegate interface. 91 virtual void OnURLFetchComplete(const net::URLFetcher* source) OVERRIDE; 92 93 // content::NotificationObserver overrides: 94 virtual void Observe(int type, 95 const content::NotificationSource& source, 96 const content::NotificationDetails& details) OVERRIDE; 97 98 // Sends a request to the SafeBrowsing servers with the ClientPhishingRequest. 99 // The URL scheme of the |url()| in the request should be HTTP. This method 100 // takes ownership of the |verdict| as well as the |callback| and calls the 101 // the callback once the result has come back from the server or if an error 102 // occurs during the fetch. If the service is disabled or an error occurs 103 // the phishing verdict will always be false. The callback is always called 104 // after SendClientReportPhishingRequest() returns and on the same thread as 105 // SendClientReportPhishingRequest() was called. You may set |callback| to 106 // NULL if you don't care about the server verdict. 107 virtual void SendClientReportPhishingRequest( 108 ClientPhishingRequest* verdict, 109 const ClientReportPhishingRequestCallback& callback); 110 111 // Similar to above one, instead send ClientMalwareRequest 112 virtual void SendClientReportMalwareRequest( 113 ClientMalwareRequest* verdict, 114 const ClientReportMalwareRequestCallback& callback); 115 116 // Returns true if the given IP address string falls within a private 117 // (unroutable) network block. Pages which are hosted on these IP addresses 118 // are exempt from client-side phishing detection. This is called by the 119 // ClientSideDetectionHost prior to sending the renderer a 120 // SafeBrowsingMsg_StartPhishingDetection IPC. 121 // 122 // ip_address should be a dotted IPv4 address, or an unbracketed IPv6 123 // address. 124 virtual bool IsPrivateIPAddress(const std::string& ip_address) const; 125 126 // Returns true and sets is_phishing if url is in the cache and valid. 127 virtual bool GetValidCachedResult(const GURL& url, bool* is_phishing); 128 129 // Returns true if the url is in the cache. 130 virtual bool IsInCache(const GURL& url); 131 132 // Returns true if we have sent more than kMaxReportsPerInterval phishing 133 // reports in the last kReportsInterval. 134 virtual bool OverPhishingReportLimit(); 135 136 // Returns true if we have sent more than kMaxReportsPerInterval malware 137 // reports in the last kReportsInterval. 138 virtual bool OverMalwareReportLimit(); 139 140 protected: 141 // Use Create() method to create an instance of this object. 142 explicit ClientSideDetectionService( 143 net::URLRequestContextGetter* request_context_getter); 144 145 // Enum used to keep stats about why we fail to get the client model. 146 enum ClientModelStatus { 147 MODEL_SUCCESS, 148 MODEL_NOT_CHANGED, 149 MODEL_FETCH_FAILED, 150 MODEL_EMPTY, 151 MODEL_TOO_LARGE, 152 MODEL_PARSE_ERROR, 153 MODEL_MISSING_FIELDS, 154 MODEL_INVALID_VERSION_NUMBER, 155 MODEL_BAD_HASH_IDS, 156 MODEL_STATUS_MAX // Always add new values before this one. 157 }; 158 159 // Starts fetching the model from the network or the cache. This method 160 // is called periodically to check whether a new client model is available 161 // for download. 162 void StartFetchModel(); 163 164 // Schedules the next fetch of the model. 165 virtual void ScheduleFetchModel(int64 delay_ms); // Virtual for testing. 166 167 // This method is called when we're done fetching the model either because 168 // we hit an error somewhere or because we're actually done fetch and 169 // validating the model. 170 virtual void EndFetchModel(ClientModelStatus status); // Virtual for testing. 171 172 private: 173 friend class ClientSideDetectionServiceTest; 174 FRIEND_TEST_ALL_PREFIXES(ClientSideDetectionServiceTest, FetchModelTest); 175 FRIEND_TEST_ALL_PREFIXES(ClientSideDetectionServiceTest, SetBadSubnets); 176 FRIEND_TEST_ALL_PREFIXES(ClientSideDetectionServiceTest, 177 SetEnabledAndRefreshState); 178 FRIEND_TEST_ALL_PREFIXES(ClientSideDetectionServiceTest, IsBadIpAddress); 179 FRIEND_TEST_ALL_PREFIXES(ClientSideDetectionServiceTest, 180 ModelHasValidHashIds); 181 182 // CacheState holds all information necessary to respond to a caller without 183 // actually making a HTTP request. 184 struct CacheState { 185 bool is_phishing; 186 base::Time timestamp; 187 188 CacheState(bool phish, base::Time time); 189 }; 190 typedef std::map<GURL, linked_ptr<CacheState> > PhishingCache; 191 192 // A tuple of (IP address block, prefix size) representing a private 193 // IP address range. 194 typedef std::pair<net::IPAddressNumber, size_t> AddressRange; 195 196 // Maps a IPv6 subnet mask to a set of hashed IPv6 subnets. The IPv6 197 // subnets are in network order and hashed with sha256. 198 typedef std::map<std::string /* subnet mask */, 199 std::set<std::string /* hashed subnet */> > BadSubnetMap; 200 201 static const char kClientReportMalwareUrl[]; 202 static const char kClientReportPhishingUrl[]; 203 static const char kClientModelUrl[]; 204 static const size_t kMaxModelSizeBytes; 205 static const int kMaxReportsPerInterval; 206 static const int kClientModelFetchIntervalMs; 207 static const int kInitialClientModelFetchDelayMs; 208 static const int kReportsIntervalDays; 209 static const int kNegativeCacheIntervalDays; 210 static const int kPositiveCacheIntervalMinutes; 211 212 // Starts sending the request to the client-side detection frontends. 213 // This method takes ownership of both pointers. 214 void StartClientReportPhishingRequest( 215 ClientPhishingRequest* verdict, 216 const ClientReportPhishingRequestCallback& callback); 217 218 void StartClientReportMalwareRequest( 219 ClientMalwareRequest* verdict, 220 const ClientReportMalwareRequestCallback& callback); 221 222 // Called by OnURLFetchComplete to handle the response from fetching the 223 // model. 224 void HandleModelResponse(const net::URLFetcher* source, 225 const GURL& url, 226 const net::URLRequestStatus& status, 227 int response_code, 228 const net::ResponseCookies& cookies, 229 const std::string& data); 230 231 // Called by OnURLFetchComplete to handle the server response from 232 // sending the client-side phishing request. 233 void HandlePhishingVerdict(const net::URLFetcher* source, 234 const GURL& url, 235 const net::URLRequestStatus& status, 236 int response_code, 237 const net::ResponseCookies& cookies, 238 const std::string& data); 239 240 // Called by OnURLFetchComplete to handle the server response from 241 // sending the client-side malware request. 242 void HandleMalwareVerdict(const net::URLFetcher* source, 243 const GURL& url, 244 const net::URLRequestStatus& status, 245 int response_code, 246 const net::ResponseCookies& cookies, 247 const std::string& data); 248 249 // Invalidate cache results which are no longer useful. 250 void UpdateCache(); 251 252 // Get the number of malware reports that we have sent over kReportsInterval. 253 int GetMalwareNumReports(); 254 255 // Get the number of phishing reports that we have sent over kReportsInterval. 256 int GetPhishingNumReports(); 257 258 // Get the number of reports that we have sent over kReportsInterval, and 259 // trims off the old elements. 260 int GetNumReports(std::queue<base::Time>* report_times); 261 262 // Send the model to the given renderer. 263 void SendModelToProcess(content::RenderProcessHost* process); 264 265 // Same as above but sends the model to all rendereres. 266 void SendModelToRenderers(); 267 268 // Reads the bad subnets from the client model and inserts them into 269 // |bad_subnets| for faster lookups. This method is static to simplify 270 // testing. 271 static void SetBadSubnets(const ClientSideModel& model, 272 BadSubnetMap* bad_subnets); 273 274 275 // Returns true iff all the hash id's in the client-side model point to 276 // valid hashes in the model. 277 static bool ModelHasValidHashIds(const ClientSideModel& model); 278 279 // Returns the URL that will be used for phishing requests. 280 static GURL GetClientReportUrl(const std::string& report_url); 281 282 // Whether the service is running or not. When the service is not running, 283 // it won't download the model nor report detected phishing URLs. 284 bool enabled_; 285 286 std::string model_str_; 287 scoped_ptr<ClientSideModel> model_; 288 scoped_ptr<base::TimeDelta> model_max_age_; 289 scoped_ptr<net::URLFetcher> model_fetcher_; 290 291 // Map of client report phishing request to the corresponding callback that 292 // has to be invoked when the request is done. 293 struct ClientReportInfo; 294 std::map<const net::URLFetcher*, ClientReportInfo*> 295 client_phishing_reports_; 296 // Map of client malware ip request to the corresponding callback that 297 // has to be invoked when the request is done. 298 struct ClientMalwareReportInfo; 299 std::map<const net::URLFetcher*, ClientMalwareReportInfo*> 300 client_malware_reports_; 301 302 // Cache of completed requests. Used to satisfy requests for the same urls 303 // as long as the next request falls within our caching window (which is 304 // determined by kNegativeCacheInterval and kPositiveCacheInterval). The 305 // size of this cache is limited by kMaxReportsPerDay * 306 // ceil(InDays(max(kNegativeCacheInterval, kPositiveCacheInterval))). 307 // TODO(gcasto): Serialize this so that it doesn't reset on browser restart. 308 PhishingCache cache_; 309 310 // Timestamp of when we sent a phishing request. Used to limit the number 311 // of phishing requests that we send in a day. 312 // TODO(gcasto): Serialize this so that it doesn't reset on browser restart. 313 std::queue<base::Time> phishing_report_times_; 314 315 // Timestamp of when we sent a malware request. Used to limit the number 316 // of malware requests that we send in a day. 317 std::queue<base::Time> malware_report_times_; 318 319 // The context we use to issue network requests. 320 scoped_refptr<net::URLRequestContextGetter> request_context_getter_; 321 322 // Map of bad subnets which are copied from the client model and put into 323 // this map to speed up lookups. 324 BadSubnetMap bad_subnets_; 325 326 content::NotificationRegistrar registrar_; 327 328 // Used to asynchronously call the callbacks for 329 // SendClientReportPhishingRequest. 330 base::WeakPtrFactory<ClientSideDetectionService> weak_factory_; 331 332 DISALLOW_COPY_AND_ASSIGN(ClientSideDetectionService); 333 }; 334 } // namespace safe_browsing 335 336 #endif // CHROME_BROWSER_SAFE_BROWSING_CLIENT_SIDE_DETECTION_SERVICE_H_ 337