1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 // 5 // Helper class which handles communication with the SafeBrowsing backends for 6 // client-side phishing detection. This class can be used to get a file 7 // descriptor to the client-side phishing model and also to send a ping back to 8 // Google to verify if a particular site is really phishing or not. 9 // 10 // This class is not thread-safe and expects all calls to GetModelFile() and 11 // SendClientReportPhishingRequest() to be made on the UI thread. We also 12 // expect that the calling thread runs a message loop and that there is a FILE 13 // thread running to execute asynchronous file operations. 14 15 #ifndef CHROME_BROWSER_SAFE_BROWSING_CLIENT_SIDE_DETECTION_SERVICE_H_ 16 #define CHROME_BROWSER_SAFE_BROWSING_CLIENT_SIDE_DETECTION_SERVICE_H_ 17 #pragma once 18 19 #include <map> 20 #include <queue> 21 #include <string> 22 #include <utility> 23 #include <vector> 24 25 #include "base/basictypes.h" 26 #include "base/callback.h" 27 #include "base/file_path.h" 28 #include "base/gtest_prod_util.h" 29 #include "base/memory/linked_ptr.h" 30 #include "base/memory/ref_counted.h" 31 #include "base/memory/scoped_callback_factory.h" 32 #include "base/memory/scoped_ptr.h" 33 #include "base/platform_file.h" 34 #include "base/task.h" 35 #include "base/time.h" 36 #include "chrome/common/net/url_fetcher.h" 37 #include "googleurl/src/gurl.h" 38 #include "net/base/net_util.h" 39 40 namespace net { 41 class URLRequestContextGetter; 42 class URLRequestStatus; 43 } // namespace net 44 45 namespace safe_browsing { 46 class ClientPhishingRequest; 47 48 class ClientSideDetectionService : public URLFetcher::Delegate { 49 public: 50 typedef Callback1<base::PlatformFile>::Type OpenModelDoneCallback; 51 52 typedef Callback2<GURL /* phishing URL */, bool /* is phishing */>::Type 53 ClientReportPhishingRequestCallback; 54 55 virtual ~ClientSideDetectionService(); 56 57 // Creates a client-side detection service and starts fetching the client-side 58 // detection model if necessary. The model will be stored in |model_path|. 59 // The caller takes ownership of the object. This function may return NULL. 60 static ClientSideDetectionService* Create( 61 const FilePath& model_path, 62 net::URLRequestContextGetter* request_context_getter); 63 64 // From the URLFetcher::Delegate interface. 65 virtual void OnURLFetchComplete(const URLFetcher* source, 66 const GURL& url, 67 const net::URLRequestStatus& status, 68 int response_code, 69 const ResponseCookies& cookies, 70 const std::string& data); 71 72 // Gets the model file descriptor once the model is ready and stored 73 // on disk. If there was an error the callback is called and the 74 // platform file is set to kInvalidPlatformFileValue. The 75 // ClientSideDetectionService takes ownership of the |callback|. 76 // The callback is always called after GetModelFile() returns and on the 77 // same thread as GetModelFile() was called. 78 void GetModelFile(OpenModelDoneCallback* callback); 79 80 // Sends a request to the SafeBrowsing servers with the ClientPhishingRequest. 81 // The URL scheme of the |url()| in the request should be HTTP. This method 82 // takes ownership of the |verdict| as well as the |callback| and calls the 83 // the callback once the result has come back from the server or if an error 84 // occurs during the fetch. If an error occurs the phishing verdict will 85 // always be false. The callback is always called after 86 // SendClientReportPhishingRequest() returns and on the same thread as 87 // SendClientReportPhishingRequest() was called. 88 virtual void SendClientReportPhishingRequest( 89 ClientPhishingRequest* verdict, 90 ClientReportPhishingRequestCallback* callback); 91 92 // Returns true if the given IP address string falls within a private 93 // (unroutable) network block. Pages which are hosted on these IP addresses 94 // are exempt from client-side phishing detection. This is called by the 95 // ClientSideDetectionHost prior to sending the renderer a 96 // SafeBrowsingMsg_StartPhishingDetection IPC. 97 // 98 // ip_address should be a dotted IPv4 address, or an unbracketed IPv6 99 // address. 100 virtual bool IsPrivateIPAddress(const std::string& ip_address) const; 101 102 // Returns true and sets is_phishing if url is in the cache and valid. 103 virtual bool GetValidCachedResult(const GURL& url, bool* is_phishing); 104 105 // Returns true if the url is in the cache. 106 virtual bool IsInCache(const GURL& url); 107 108 // Returns true if we have sent more than kMaxReportsPerInterval in the last 109 // kReportsInterval. 110 virtual bool OverReportLimit(); 111 112 protected: 113 // Use Create() method to create an instance of this object. 114 ClientSideDetectionService( 115 const FilePath& model_path, 116 net::URLRequestContextGetter* request_context_getter); 117 118 private: 119 friend class ClientSideDetectionServiceTest; 120 121 enum ModelStatus { 122 // It's unclear whether or not the model was already fetched. 123 UNKNOWN_STATUS, 124 // Model is fetched and is stored on disk. 125 READY_STATUS, 126 // Error occured during fetching or writing. 127 ERROR_STATUS, 128 }; 129 130 // CacheState holds all information necessary to respond to a caller without 131 // actually making a HTTP request. 132 struct CacheState { 133 bool is_phishing; 134 base::Time timestamp; 135 136 CacheState(bool phish, base::Time time); 137 }; 138 typedef std::map<GURL, linked_ptr<CacheState> > PhishingCache; 139 140 // A tuple of (IP address block, prefix size) representing a private 141 // IP address range. 142 typedef std::pair<net::IPAddressNumber, size_t> AddressRange; 143 144 static const char kClientReportPhishingUrl[]; 145 static const char kClientModelUrl[]; 146 static const int kMaxReportsPerInterval; 147 static const base::TimeDelta kReportsInterval; 148 static const base::TimeDelta kNegativeCacheInterval; 149 static const base::TimeDelta kPositiveCacheInterval; 150 151 // Sets the model status and invokes all the pending callbacks in 152 // |open_callbacks_| with the current |model_file_| as parameter. 153 void SetModelStatus(ModelStatus status); 154 155 // Called once the initial open() of the model file is done. If the file 156 // exists we're done and we can call all the pending callbacks. If the 157 // file doesn't exist this method will asynchronously fetch the model 158 // from the server by invoking StartFetchingModel(). 159 void OpenModelFileDone(base::PlatformFileError error_code, 160 base::PassPlatformFile file, 161 bool created); 162 163 // Callback that is invoked once the attempt to create the model 164 // file on disk is done. If the file was created successfully we 165 // start writing the model to disk (asynchronously). Otherwise, we 166 // give up and send an invalid platform file to all the pending callbacks. 167 void CreateModelFileDone(base::PlatformFileError error_code, 168 base::PassPlatformFile file, 169 bool created); 170 171 // Callback is invoked once we're done writing the model file to disk. 172 // If everything went well then |model_file_| is a valid platform file 173 // that can be sent to all the pending callbacks. If an error occurs 174 // we give up and send an invalid platform file to all the pending callbacks. 175 void WriteModelFileDone(base::PlatformFileError error_code, 176 int bytes_written); 177 178 // Helper function which closes the |model_file_| if necessary. 179 void CloseModelFile(); 180 181 // Starts sending the request to the client-side detection frontends. 182 // This method takes ownership of both pointers. 183 void StartClientReportPhishingRequest( 184 ClientPhishingRequest* verdict, 185 ClientReportPhishingRequestCallback* callback); 186 187 // Starts getting the model file. 188 void StartGetModelFile(OpenModelDoneCallback* callback); 189 190 // Called by OnURLFetchComplete to handle the response from fetching the 191 // model. 192 void HandleModelResponse(const URLFetcher* source, 193 const GURL& url, 194 const net::URLRequestStatus& status, 195 int response_code, 196 const ResponseCookies& cookies, 197 const std::string& data); 198 199 // Called by OnURLFetchComplete to handle the server response from 200 // sending the client-side phishing request. 201 void HandlePhishingVerdict(const URLFetcher* source, 202 const GURL& url, 203 const net::URLRequestStatus& status, 204 int response_code, 205 const ResponseCookies& cookies, 206 const std::string& data); 207 208 // Invalidate cache results which are no longer useful. 209 void UpdateCache(); 210 211 // Get the number of phishing reports that we have sent over kReportsInterval 212 int GetNumReports(); 213 214 // Initializes the |private_networks_| vector with the network blocks 215 // that we consider non-public IP addresses. Returns true on success. 216 bool InitializePrivateNetworks(); 217 218 FilePath model_path_; 219 ModelStatus model_status_; 220 base::PlatformFile model_file_; 221 scoped_ptr<URLFetcher> model_fetcher_; 222 scoped_ptr<std::string> tmp_model_string_; 223 std::vector<OpenModelDoneCallback*> open_callbacks_; 224 225 // Map of client report phishing request to the corresponding callback that 226 // has to be invoked when the request is done. 227 struct ClientReportInfo; 228 std::map<const URLFetcher*, ClientReportInfo*> client_phishing_reports_; 229 230 // Cache of completed requests. Used to satisfy requests for the same urls 231 // as long as the next request falls within our caching window (which is 232 // determined by kNegativeCacheInterval and kPositiveCacheInterval). The 233 // size of this cache is limited by kMaxReportsPerDay * 234 // ceil(InDays(max(kNegativeCacheInterval, kPositiveCacheInterval))). 235 // TODO(gcasto): Serialize this so that it doesn't reset on browser restart. 236 PhishingCache cache_; 237 238 // Timestamp of when we sent a phishing request. Used to limit the number 239 // of phishing requests that we send in a day. 240 // TODO(gcasto): Serialize this so that it doesn't reset on browser restart. 241 std::queue<base::Time> phishing_report_times_; 242 243 // Used to asynchronously call the callbacks for GetModelFile and 244 // SendClientReportPhishingRequest. 245 ScopedRunnableMethodFactory<ClientSideDetectionService> method_factory_; 246 247 // The client-side detection service object (this) might go away before some 248 // of the callbacks are done (e.g., asynchronous file operations). The 249 // callback factory will revoke all pending callbacks if this goes away to 250 // avoid a crash. 251 base::ScopedCallbackFactory<ClientSideDetectionService> callback_factory_; 252 253 // The context we use to issue network requests. 254 scoped_refptr<net::URLRequestContextGetter> request_context_getter_; 255 256 // The network blocks that we consider private IP address ranges. 257 std::vector<AddressRange> private_networks_; 258 259 DISALLOW_COPY_AND_ASSIGN(ClientSideDetectionService); 260 }; 261 262 } // namepsace safe_browsing 263 264 #endif // CHROME_BROWSER_SAFE_BROWSING_CLIENT_SIDE_DETECTION_SERVICE_H_ 265