1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 // 5 // The Safe Browsing service is responsible for downloading anti-phishing and 6 // anti-malware tables and checking urls against them. 7 8 #ifndef CHROME_BROWSER_SAFE_BROWSING_DATABASE_MANAGER_H_ 9 #define CHROME_BROWSER_SAFE_BROWSING_DATABASE_MANAGER_H_ 10 11 #include <deque> 12 #include <map> 13 #include <set> 14 #include <string> 15 #include <vector> 16 17 #include "base/callback.h" 18 #include "base/containers/hash_tables.h" 19 #include "base/memory/ref_counted.h" 20 #include "base/memory/scoped_ptr.h" 21 #include "base/synchronization/lock.h" 22 #include "base/time/time.h" 23 #include "chrome/browser/safe_browsing/protocol_manager.h" 24 #include "chrome/browser/safe_browsing/safe_browsing_util.h" 25 #include "url/gurl.h" 26 27 class SafeBrowsingService; 28 class SafeBrowsingDatabase; 29 30 namespace base { 31 class Thread; 32 } 33 34 namespace net { 35 class URLRequestContext; 36 class URLRequestContextGetter; 37 } 38 39 namespace safe_browsing { 40 class ClientSideDetectionService; 41 class DownloadProtectionService; 42 } 43 44 // Construction needs to happen on the main thread. 45 class SafeBrowsingDatabaseManager 46 : public base::RefCountedThreadSafe<SafeBrowsingDatabaseManager>, 47 public SafeBrowsingProtocolManagerDelegate { 48 public: 49 class Client; 50 51 // Bundle of SafeBrowsing state while performing a URL or hash prefix check. 52 struct SafeBrowsingCheck { 53 // |check_type| should correspond to the type of item that is being 54 // checked, either a URL or a binary hash/URL. We store this for two 55 // purposes: to know which of Client's methods to call when a result is 56 // known, and for logging purposes. It *isn't* used to predict the response 57 // list type, that is information that the server gives us. 58 SafeBrowsingCheck(const std::vector<GURL>& urls, 59 const std::vector<SBFullHash>& full_hashes, 60 Client* client, 61 safe_browsing_util::ListType check_type, 62 const std::vector<SBThreatType>& expected_threats); 63 ~SafeBrowsingCheck(); 64 65 // Either |urls| or |full_hashes| is used to lookup database. |*_results| 66 // are parallel vectors containing the results. They are initialized to 67 // contain SB_THREAT_TYPE_SAFE. 68 std::vector<GURL> urls; 69 std::vector<SBThreatType> url_results; 70 std::vector<SBFullHash> full_hashes; 71 std::vector<SBThreatType> full_hash_results; 72 73 Client* client; 74 bool need_get_hash; 75 base::TimeTicks start; // When check was sent to SB service. 76 safe_browsing_util::ListType check_type; // See comment in constructor. 77 std::vector<SBThreatType> expected_threats; 78 std::vector<SBPrefix> prefix_hits; 79 std::vector<SBFullHashResult> full_hits; 80 81 // Vends weak pointers for TimeoutCallback(). If the response is 82 // received before the timeout fires, factory is destructed and 83 // the timeout won't be fired. 84 // TODO(lzheng): We should consider to use this time out check 85 // for browsing too (instead of implementin in 86 // safe_browsing_resource_handler.cc). 87 scoped_ptr<base::WeakPtrFactory< 88 SafeBrowsingDatabaseManager> > timeout_factory_; 89 90 private: 91 DISALLOW_COPY_AND_ASSIGN(SafeBrowsingCheck); 92 }; 93 94 class Client { 95 public: 96 void OnSafeBrowsingResult(const SafeBrowsingCheck& check); 97 98 protected: 99 virtual ~Client() {} 100 101 // Called when the result of checking a browse URL is known. 102 virtual void OnCheckBrowseUrlResult(const GURL& url, 103 SBThreatType threat_type) {} 104 105 // Called when the result of checking a download URL is known. 106 virtual void OnCheckDownloadUrlResult(const std::vector<GURL>& url_chain, 107 SBThreatType threat_type) {} 108 109 // Called when the result of checking a download binary hash is known. 110 virtual void OnCheckDownloadHashResult(const std::string& hash, 111 SBThreatType threat_type) {} 112 113 // Called when the result of checking a set of extensions is known. 114 virtual void OnCheckExtensionsResult( 115 const std::set<std::string>& threats) {} 116 }; 117 118 // Creates the safe browsing service. Need to initialize before using. 119 explicit SafeBrowsingDatabaseManager( 120 const scoped_refptr<SafeBrowsingService>& service); 121 122 // Returns true if the url's scheme can be checked. 123 bool CanCheckUrl(const GURL& url) const; 124 125 // Returns whether download protection is enabled. 126 bool download_protection_enabled() const { 127 return enable_download_protection_; 128 } 129 130 // Called on the IO thread to check if the given url is safe or not. If we 131 // can synchronously determine that the url is safe, CheckUrl returns true. 132 // Otherwise it returns false, and "client" is called asynchronously with the 133 // result when it is ready. 134 virtual bool CheckBrowseUrl(const GURL& url, Client* client); 135 136 // Check if the prefix for |url| is in safebrowsing download add lists. 137 // Result will be passed to callback in |client|. 138 virtual bool CheckDownloadUrl(const std::vector<GURL>& url_chain, 139 Client* client); 140 141 // Check if the prefix for |full_hash| is in safebrowsing binhash add lists. 142 // Result will be passed to callback in |client|. 143 virtual bool CheckDownloadHash(const std::string& full_hash, Client* client); 144 145 // Check which prefixes in |extension_ids| are in the safebrowsing blacklist. 146 // Returns true if not, false if further checks need to be made in which case 147 // the result will be passed to |client|. 148 virtual bool CheckExtensionIDs(const std::set<std::string>& extension_ids, 149 Client* client); 150 151 // Check if the given url is on the side-effect free whitelist. 152 // Can be called on any thread. Returns false if the check cannot be performed 153 // (e.g. because we are disabled or because of an invalid scheme in the URL). 154 // Otherwise, returns true if the URL is on the whitelist based on matching 155 // the hash prefix only (so there may be false positives). 156 virtual bool CheckSideEffectFreeWhitelistUrl(const GURL& url); 157 158 // Check if the |url| matches any of the full-length hashes from the 159 // client-side phishing detection whitelist. Returns true if there was a 160 // match and false otherwise. To make sure we are conservative we will return 161 // true if an error occurs. This method is expected to be called on the IO 162 // thread. 163 virtual bool MatchCsdWhitelistUrl(const GURL& url); 164 165 // Check if the given IP address (either IPv4 or IPv6) matches the malware 166 // IP blacklist. 167 virtual bool MatchMalwareIP(const std::string& ip_address); 168 169 // Check if the |url| matches any of the full-length hashes from the 170 // download whitelist. Returns true if there was a match and false otherwise. 171 // To make sure we are conservative we will return true if an error occurs. 172 // This method is expected to be called on the IO thread. 173 virtual bool MatchDownloadWhitelistUrl(const GURL& url); 174 175 // Check if |str| matches any of the full-length hashes from the download 176 // whitelist. Returns true if there was a match and false otherwise. 177 // To make sure we are conservative we will return true if an error occurs. 178 // This method is expected to be called on the IO thread. 179 virtual bool MatchDownloadWhitelistString(const std::string& str); 180 181 // Check if the CSD malware IP matching kill switch is turned on. 182 virtual bool IsMalwareKillSwitchOn(); 183 184 // Called on the IO thread to cancel a pending check if the result is no 185 // longer needed. 186 void CancelCheck(Client* client); 187 188 // Called on the IO thread when the SafeBrowsingProtocolManager has received 189 // the full hash results for prefix hits detected in the database. 190 void HandleGetHashResults( 191 SafeBrowsingCheck* check, 192 const std::vector<SBFullHashResult>& full_hashes, 193 bool can_cache); 194 195 // Called on the IO thread to release memory. 196 void PurgeMemory(); 197 198 // Log the user perceived delay caused by SafeBrowsing. This delay is the time 199 // delta starting from when we would have started reading data from the 200 // network, and ending when the SafeBrowsing check completes indicating that 201 // the current page is 'safe'. 202 void LogPauseDelay(base::TimeDelta time); 203 204 // Called to initialize objects that are used on the io_thread. This may be 205 // called multiple times during the life of the DatabaseManager. Should be 206 // called on IO thread. 207 void StartOnIOThread(); 208 209 // Called to stop or shutdown operations on the io_thread. This may be called 210 // multiple times during the life of the DatabaseManager. Should be called 211 // on IO thread. If shutdown is true, the manager is disabled permanently. 212 void StopOnIOThread(bool shutdown); 213 214 protected: 215 virtual ~SafeBrowsingDatabaseManager(); 216 217 // protected for tests. 218 void NotifyDatabaseUpdateFinished(bool update_succeeded); 219 220 private: 221 friend class base::RefCountedThreadSafe<SafeBrowsingDatabaseManager>; 222 friend class SafeBrowsingServerTest; 223 friend class SafeBrowsingServiceTest; 224 friend class SafeBrowsingServiceTestHelper; 225 friend class SafeBrowsingDatabaseManagerTest; 226 227 typedef std::set<SafeBrowsingCheck*> CurrentChecks; 228 typedef std::vector<SafeBrowsingCheck*> GetHashRequestors; 229 typedef base::hash_map<SBPrefix, GetHashRequestors> GetHashRequests; 230 231 // Clients that we've queued up for checking later once the database is ready. 232 struct QueuedCheck { 233 QueuedCheck(const safe_browsing_util::ListType check_type, 234 Client* client, 235 const GURL& url, 236 const std::vector<SBThreatType>& expected_threats, 237 const base::TimeTicks& start); 238 ~QueuedCheck(); 239 safe_browsing_util::ListType check_type; 240 Client* client; 241 GURL url; 242 std::vector<SBThreatType> expected_threats; 243 base::TimeTicks start; // When check was queued. 244 }; 245 246 // Called to stop operations on the io_thread. This may be called multiple 247 // times during the life of the DatabaseManager. Should be called on IO 248 // thread. 249 void DoStopOnIOThread(); 250 251 // Returns whether |database_| exists and is accessible. 252 bool DatabaseAvailable() const; 253 254 // Called on the IO thread. If the database does not exist, queues up a call 255 // on the db thread to create it. Returns whether the database is available. 256 // 257 // Note that this is only needed outside the db thread, since functions on the 258 // db thread can call GetDatabase() directly. 259 bool MakeDatabaseAvailable(); 260 261 // Called on the IO thread to try to close the database, freeing the memory 262 // associated with it. The database will be automatically reopened as needed. 263 // 264 // NOTE: Actual database closure is asynchronous, and until it happens, the IO 265 // thread is not allowed to access it; may not actually trigger a close if one 266 // is already pending or doing so would cause problems. 267 void CloseDatabase(); 268 269 // Should only be called on db thread as SafeBrowsingDatabase is not 270 // threadsafe. 271 SafeBrowsingDatabase* GetDatabase(); 272 273 // Called on the IO thread with the check result. 274 void OnCheckDone(SafeBrowsingCheck* info); 275 276 // Called on the database thread to retrieve chunks. 277 void GetAllChunksFromDatabase(GetChunksCallback callback); 278 279 // Called on the IO thread with the results of all chunks. 280 void OnGetAllChunksFromDatabase(const std::vector<SBListChunkRanges>& lists, 281 bool database_error, 282 GetChunksCallback callback); 283 284 // Called on the IO thread after the database reports that it added a chunk. 285 void OnAddChunksComplete(AddChunksCallback callback); 286 287 // Notification that the database is done loading its bloom filter. We may 288 // have had to queue checks until the database is ready, and if so, this 289 // checks them. 290 void DatabaseLoadComplete(); 291 292 // Called on the database thread to add/remove chunks and host keys. 293 // Callee will free the data when it's done. 294 void AddDatabaseChunks(const std::string& list, SBChunkList* chunks, 295 AddChunksCallback callback); 296 297 void DeleteDatabaseChunks(std::vector<SBChunkDelete>* chunk_deletes); 298 299 static SBThreatType GetThreatTypeFromListname(const std::string& list_name); 300 301 void NotifyClientBlockingComplete(Client* client, bool proceed); 302 303 void DatabaseUpdateFinished(bool update_succeeded); 304 305 // Called on the db thread to close the database. See CloseDatabase(). 306 void OnCloseDatabase(); 307 308 // Runs on the db thread to reset the database. We assume that resetting the 309 // database is a synchronous operation. 310 void OnResetDatabase(); 311 312 // Store in-memory the GetHash response. Runs on the database thread. 313 void CacheHashResults(const std::vector<SBPrefix>& prefixes, 314 const std::vector<SBFullHashResult>& full_hashes); 315 316 // Internal worker function for processing full hashes. 317 void OnHandleGetHashResults(SafeBrowsingCheck* check, 318 const std::vector<SBFullHashResult>& full_hashes); 319 320 // Run one check against |full_hashes|. Returns |true| if the check 321 // finds a match in |full_hashes|. 322 bool HandleOneCheck(SafeBrowsingCheck* check, 323 const std::vector<SBFullHashResult>& full_hashes); 324 325 // Checks the download hash on safe_browsing_thread_. 326 void CheckDownloadHashOnSBThread(SafeBrowsingCheck* check); 327 328 // Invoked by CheckDownloadUrl. It checks the download URL on 329 // safe_browsing_thread_. 330 void CheckDownloadUrlOnSBThread(SafeBrowsingCheck* check); 331 332 // The callback function when a safebrowsing check is timed out. Client will 333 // be notified that the safebrowsing check is SAFE when this happens. 334 void TimeoutCallback(SafeBrowsingCheck* check); 335 336 // Calls the Client's callback on IO thread after CheckDownloadUrl finishes. 337 void CheckDownloadUrlDone(SafeBrowsingCheck* check); 338 339 // Calls the Client's callback on IO thread after CheckDownloadHash finishes. 340 void CheckDownloadHashDone(SafeBrowsingCheck* check); 341 342 // Checks all extension ID hashes on safe_browsing_thread_. 343 void CheckExtensionIDsOnSBThread(SafeBrowsingCheck* check); 344 345 // Helper function that calls safe browsing client and cleans up |checks_|. 346 void SafeBrowsingCheckDone(SafeBrowsingCheck* check); 347 348 // Helper function to set |check| with default values and start a safe 349 // browsing check with timeout of |timeout|. |task| will be called on 350 // success, otherwise TimeoutCallback will be called. 351 void StartSafeBrowsingCheck(SafeBrowsingCheck* check, 352 const base::Closure& task); 353 354 // SafeBrowsingProtocolManageDelegate override 355 virtual void ResetDatabase() OVERRIDE; 356 virtual void UpdateStarted() OVERRIDE; 357 virtual void UpdateFinished(bool success) OVERRIDE; 358 virtual void GetChunks(GetChunksCallback callback) OVERRIDE; 359 virtual void AddChunks(const std::string& list, SBChunkList* chunks, 360 AddChunksCallback callback) OVERRIDE; 361 virtual void DeleteChunks( 362 std::vector<SBChunkDelete>* delete_chunks) OVERRIDE; 363 364 scoped_refptr<SafeBrowsingService> sb_service_; 365 366 CurrentChecks checks_; 367 368 // Used for issuing only one GetHash request for a given prefix. 369 GetHashRequests gethash_requests_; 370 371 // The persistent database. We don't use a scoped_ptr because it 372 // needs to be destroyed on a different thread than this object. 373 SafeBrowsingDatabase* database_; 374 375 // Lock used to prevent possible data races due to compiler optimizations. 376 mutable base::Lock database_lock_; 377 378 // Whether the service is running. 'enabled_' is used by the 379 // SafeBrowsingDatabaseManager on the IO thread during normal operations. 380 bool enabled_; 381 382 // Indicate if download_protection is enabled by command switch 383 // so we allow this feature to be exersized. 384 bool enable_download_protection_; 385 386 // Indicate if client-side phishing detection whitelist should be enabled 387 // or not. 388 bool enable_csd_whitelist_; 389 390 // Indicate if the download whitelist should be enabled or not. 391 bool enable_download_whitelist_; 392 393 // Indicate if the extension blacklist should be enabled. 394 bool enable_extension_blacklist_; 395 396 // Indicate if the side effect free whitelist should be enabled. 397 bool enable_side_effect_free_whitelist_; 398 399 // Indicate if the csd malware IP blacklist should be enabled. 400 bool enable_ip_blacklist_; 401 402 // The SafeBrowsing thread that runs database operations. 403 // 404 // Note: Functions that run on this thread should run synchronously and return 405 // to the IO thread, not post additional tasks back to this thread, lest we 406 // cause a race condition at shutdown time that leads to a database leak. 407 scoped_ptr<base::Thread> safe_browsing_thread_; 408 409 // Indicates if we're currently in an update cycle. 410 bool update_in_progress_; 411 412 // When true, newly fetched chunks may not in the database yet since the 413 // database is still updating. 414 bool database_update_in_progress_; 415 416 // Indicates if we're in the midst of trying to close the database. If this 417 // is true, nothing on the IO thread should access the database. 418 bool closing_database_; 419 420 std::deque<QueuedCheck> queued_checks_; 421 422 // Timeout to use for safe browsing checks. 423 base::TimeDelta check_timeout_; 424 425 DISALLOW_COPY_AND_ASSIGN(SafeBrowsingDatabaseManager); 426 }; 427 428 #endif // CHROME_BROWSER_SAFE_BROWSING_DATABASE_MANAGER_H_ 429