1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "chrome/browser/safe_browsing/client_side_detection_host.h" 6 7 #include <vector> 8 9 #include "base/logging.h" 10 #include "base/memory/ref_counted.h" 11 #include "base/memory/scoped_ptr.h" 12 #include "base/metrics/histogram.h" 13 #include "base/prefs/pref_service.h" 14 #include "base/sequenced_task_runner_helpers.h" 15 #include "chrome/browser/browser_process.h" 16 #include "chrome/browser/profiles/profile.h" 17 #include "chrome/browser/safe_browsing/browser_feature_extractor.h" 18 #include "chrome/browser/safe_browsing/client_side_detection_service.h" 19 #include "chrome/browser/safe_browsing/database_manager.h" 20 #include "chrome/browser/safe_browsing/safe_browsing_service.h" 21 #include "chrome/common/chrome_switches.h" 22 #include "chrome/common/chrome_version_info.h" 23 #include "chrome/common/pref_names.h" 24 #include "chrome/common/safe_browsing/csd.pb.h" 25 #include "chrome/common/safe_browsing/safebrowsing_messages.h" 26 #include "content/public/browser/browser_thread.h" 27 #include "content/public/browser/navigation_controller.h" 28 #include "content/public/browser/navigation_details.h" 29 #include "content/public/browser/navigation_entry.h" 30 #include "content/public/browser/notification_details.h" 31 #include "content/public/browser/notification_source.h" 32 #include "content/public/browser/notification_types.h" 33 #include "content/public/browser/render_process_host.h" 34 #include "content/public/browser/render_view_host.h" 35 #include "content/public/browser/resource_request_details.h" 36 #include "content/public/browser/web_contents.h" 37 #include "content/public/common/frame_navigate_params.h" 38 #include "url/gurl.h" 39 40 using content::BrowserThread; 41 using content::NavigationEntry; 42 using content::ResourceRequestDetails; 43 using content::WebContents; 44 45 namespace safe_browsing { 46 47 const int ClientSideDetectionHost::kMaxUrlsPerIP = 20; 48 const int ClientSideDetectionHost::kMaxIPsPerBrowse = 200; 49 50 namespace { 51 52 void EmptyUrlCheckCallback(bool processed) { 53 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 54 } 55 56 } // namespace 57 58 // This class is instantiated each time a new toplevel URL loads, and 59 // asynchronously checks whether the phishing classifier should run for this 60 // URL. If so, it notifies the renderer with a StartPhishingDetection IPC. 61 // Objects of this class are ref-counted and will be destroyed once nobody 62 // uses it anymore. If |web_contents|, |csd_service| or |host| go away you need 63 // to call Cancel(). We keep the |database_manager| alive in a ref pointer for 64 // as long as it takes. 65 class ClientSideDetectionHost::ShouldClassifyUrlRequest 66 : public base::RefCountedThreadSafe< 67 ClientSideDetectionHost::ShouldClassifyUrlRequest> { 68 public: 69 ShouldClassifyUrlRequest(const content::FrameNavigateParams& params, 70 WebContents* web_contents, 71 ClientSideDetectionService* csd_service, 72 SafeBrowsingDatabaseManager* database_manager, 73 ClientSideDetectionHost* host) 74 : canceled_(false), 75 params_(params), 76 web_contents_(web_contents), 77 csd_service_(csd_service), 78 database_manager_(database_manager), 79 host_(host) { 80 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 81 DCHECK(web_contents_); 82 DCHECK(csd_service_); 83 DCHECK(database_manager_.get()); 84 DCHECK(host_); 85 } 86 87 void Start() { 88 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 89 90 // We start by doing some simple checks that can run on the UI thread. 91 UMA_HISTOGRAM_COUNTS("SBClientPhishing.ClassificationStart", 1); 92 93 // Only classify [X]HTML documents. 94 if (params_.contents_mime_type != "text/html" && 95 params_.contents_mime_type != "application/xhtml+xml") { 96 VLOG(1) << "Skipping phishing classification for URL: " << params_.url 97 << " because it has an unsupported MIME type: " 98 << params_.contents_mime_type; 99 UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail", 100 NO_CLASSIFY_UNSUPPORTED_MIME_TYPE, 101 NO_CLASSIFY_MAX); 102 return; 103 } 104 105 if (csd_service_->IsPrivateIPAddress(params_.socket_address.host())) { 106 VLOG(1) << "Skipping phishing classification for URL: " << params_.url 107 << " because of hosting on private IP: " 108 << params_.socket_address.host(); 109 UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail", 110 NO_CLASSIFY_PRIVATE_IP, 111 NO_CLASSIFY_MAX); 112 return; 113 } 114 115 // Don't run the phishing classifier if the tab is incognito. 116 if (web_contents_->GetBrowserContext()->IsOffTheRecord()) { 117 VLOG(1) << "Skipping phishing classification for URL: " << params_.url 118 << " because we're browsing incognito."; 119 UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail", 120 NO_CLASSIFY_OFF_THE_RECORD, 121 NO_CLASSIFY_MAX); 122 123 return; 124 } 125 126 // We lookup the csd-whitelist before we lookup the cache because 127 // a URL may have recently been whitelisted. If the URL matches 128 // the csd-whitelist we won't start classification. The 129 // csd-whitelist check has to be done on the IO thread because it 130 // uses the SafeBrowsing service class. 131 BrowserThread::PostTask( 132 BrowserThread::IO, 133 FROM_HERE, 134 base::Bind(&ShouldClassifyUrlRequest::CheckCsdWhitelist, 135 this, params_.url)); 136 } 137 138 void Cancel() { 139 canceled_ = true; 140 // Just to make sure we don't do anything stupid we reset all these 141 // pointers except for the safebrowsing service class which may be 142 // accessed by CheckCsdWhitelist(). 143 web_contents_ = NULL; 144 csd_service_ = NULL; 145 host_ = NULL; 146 } 147 148 private: 149 friend class base::RefCountedThreadSafe< 150 ClientSideDetectionHost::ShouldClassifyUrlRequest>; 151 152 // Enum used to keep stats about why the pre-classification check failed. 153 enum PreClassificationCheckFailures { 154 OBSOLETE_NO_CLASSIFY_PROXY_FETCH, 155 NO_CLASSIFY_PRIVATE_IP, 156 NO_CLASSIFY_OFF_THE_RECORD, 157 NO_CLASSIFY_MATCH_CSD_WHITELIST, 158 NO_CLASSIFY_TOO_MANY_REPORTS, 159 NO_CLASSIFY_UNSUPPORTED_MIME_TYPE, 160 161 NO_CLASSIFY_MAX // Always add new values before this one. 162 }; 163 164 // The destructor can be called either from the UI or the IO thread. 165 virtual ~ShouldClassifyUrlRequest() { } 166 167 void CheckCsdWhitelist(const GURL& url) { 168 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 169 if (!database_manager_.get() || 170 database_manager_->MatchCsdWhitelistUrl(url)) { 171 // We're done. There is no point in going back to the UI thread. 172 VLOG(1) << "Skipping phishing classification for URL: " << url 173 << " because it matches the csd whitelist"; 174 UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail", 175 NO_CLASSIFY_MATCH_CSD_WHITELIST, 176 NO_CLASSIFY_MAX); 177 return; 178 } 179 180 bool malware_killswitch_on = database_manager_->IsMalwareKillSwitchOn(); 181 182 BrowserThread::PostTask( 183 BrowserThread::UI, 184 FROM_HERE, 185 base::Bind(&ShouldClassifyUrlRequest::CheckCache, this, 186 malware_killswitch_on)); 187 } 188 189 void CheckCache(bool malware_killswitch_on) { 190 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 191 if (canceled_) { 192 return; 193 } 194 195 host_->SetMalwareKillSwitch(malware_killswitch_on); 196 // If result is cached, we don't want to run classification again 197 bool is_phishing; 198 if (csd_service_->GetValidCachedResult(params_.url, &is_phishing)) { 199 VLOG(1) << "Satisfying request for " << params_.url << " from cache"; 200 UMA_HISTOGRAM_COUNTS("SBClientPhishing.RequestSatisfiedFromCache", 1); 201 // Since we are already on the UI thread, this is safe. 202 host_->MaybeShowPhishingWarning(params_.url, is_phishing); 203 return; 204 } 205 206 // We want to limit the number of requests, though we will ignore the 207 // limit for urls in the cache. We don't want to start classifying 208 // too many pages as phishing, but for those that we already think are 209 // phishing we want to give ourselves a chance to fix false positives. 210 if (csd_service_->IsInCache(params_.url)) { 211 VLOG(1) << "Reporting limit skipped for " << params_.url 212 << " as it was in the cache."; 213 UMA_HISTOGRAM_COUNTS("SBClientPhishing.ReportLimitSkipped", 1); 214 } else if (csd_service_->OverPhishingReportLimit()) { 215 VLOG(1) << "Too many report phishing requests sent recently, " 216 << "not running classification for " << params_.url; 217 UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail", 218 NO_CLASSIFY_TOO_MANY_REPORTS, 219 NO_CLASSIFY_MAX); 220 return; 221 } 222 223 // Everything checks out, so start classification. 224 // |web_contents_| is safe to call as we will be destructed 225 // before it is. 226 VLOG(1) << "Instruct renderer to start phishing detection for URL: " 227 << params_.url; 228 content::RenderViewHost* rvh = web_contents_->GetRenderViewHost(); 229 rvh->Send(new SafeBrowsingMsg_StartPhishingDetection( 230 rvh->GetRoutingID(), params_.url)); 231 } 232 233 // No need to protect |canceled_| with a lock because it is only read and 234 // written by the UI thread. 235 bool canceled_; 236 content::FrameNavigateParams params_; 237 WebContents* web_contents_; 238 ClientSideDetectionService* csd_service_; 239 // We keep a ref pointer here just to make sure the safe browsing 240 // database manager stays alive long enough. 241 scoped_refptr<SafeBrowsingDatabaseManager> database_manager_; 242 ClientSideDetectionHost* host_; 243 244 DISALLOW_COPY_AND_ASSIGN(ShouldClassifyUrlRequest); 245 }; 246 247 // static 248 ClientSideDetectionHost* ClientSideDetectionHost::Create( 249 WebContents* tab) { 250 return new ClientSideDetectionHost(tab); 251 } 252 253 ClientSideDetectionHost::ClientSideDetectionHost(WebContents* tab) 254 : content::WebContentsObserver(tab), 255 csd_service_(NULL), 256 weak_factory_(this), 257 unsafe_unique_page_id_(-1), 258 malware_killswitch_on_(false), 259 malware_report_enabled_(false) { 260 DCHECK(tab); 261 // Note: csd_service_ and sb_service will be NULL here in testing. 262 csd_service_ = g_browser_process->safe_browsing_detection_service(); 263 feature_extractor_.reset(new BrowserFeatureExtractor(tab, csd_service_)); 264 registrar_.Add(this, content::NOTIFICATION_RESOURCE_RESPONSE_STARTED, 265 content::Source<WebContents>(tab)); 266 267 scoped_refptr<SafeBrowsingService> sb_service = 268 g_browser_process->safe_browsing_service(); 269 if (sb_service.get()) { 270 ui_manager_ = sb_service->ui_manager(); 271 database_manager_ = sb_service->database_manager(); 272 ui_manager_->AddObserver(this); 273 } 274 275 // Only enable the malware bad IP matching and report feature for canary 276 // and dev channel. 277 chrome::VersionInfo::Channel channel = chrome::VersionInfo::GetChannel(); 278 malware_report_enabled_ = ( 279 channel == chrome::VersionInfo::CHANNEL_DEV || 280 channel == chrome::VersionInfo::CHANNEL_CANARY); 281 } 282 283 ClientSideDetectionHost::~ClientSideDetectionHost() { 284 if (ui_manager_.get()) 285 ui_manager_->RemoveObserver(this); 286 } 287 288 bool ClientSideDetectionHost::OnMessageReceived(const IPC::Message& message) { 289 bool handled = true; 290 IPC_BEGIN_MESSAGE_MAP(ClientSideDetectionHost, message) 291 IPC_MESSAGE_HANDLER(SafeBrowsingHostMsg_PhishingDetectionDone, 292 OnPhishingDetectionDone) 293 IPC_MESSAGE_UNHANDLED(handled = false) 294 IPC_END_MESSAGE_MAP() 295 return handled; 296 } 297 298 void ClientSideDetectionHost::DidNavigateMainFrame( 299 const content::LoadCommittedDetails& details, 300 const content::FrameNavigateParams& params) { 301 // TODO(noelutz): move this DCHECK to WebContents and fix all the unit tests 302 // that don't call this method on the UI thread. 303 // DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 304 if (details.is_in_page) { 305 // If the navigation is within the same page, the user isn't really 306 // navigating away. We don't need to cancel a pending callback or 307 // begin a new classification. 308 return; 309 } 310 // If we navigate away and there currently is a pending phishing 311 // report request we have to cancel it to make sure we don't display 312 // an interstitial for the wrong page. Note that this won't cancel 313 // the server ping back but only cancel the showing of the 314 // interstial. 315 weak_factory_.InvalidateWeakPtrs(); 316 317 if (!csd_service_) { 318 return; 319 } 320 321 // Cancel any pending classification request. 322 if (classification_request_.get()) { 323 classification_request_->Cancel(); 324 } 325 browse_info_.reset(new BrowseInfo); 326 327 // Store redirect chain information. 328 if (params.url.host() != cur_host_) { 329 cur_host_ = params.url.host(); 330 cur_host_redirects_ = params.redirects; 331 } 332 browse_info_->host_redirects = cur_host_redirects_; 333 browse_info_->url_redirects = params.redirects; 334 browse_info_->http_status_code = details.http_status_code; 335 336 // Notify the renderer if it should classify this URL. 337 classification_request_ = new ShouldClassifyUrlRequest( 338 params, web_contents(), csd_service_, database_manager_.get(), this); 339 classification_request_->Start(); 340 } 341 342 void ClientSideDetectionHost::OnSafeBrowsingHit( 343 const SafeBrowsingUIManager::UnsafeResource& resource) { 344 // Check that this notification is really for us and that it corresponds to 345 // either a malware or phishing hit. In this case we store the unique page 346 // ID for later. 347 if (web_contents() && 348 web_contents()->GetRenderProcessHost()->GetID() == 349 resource.render_process_host_id && 350 web_contents()->GetRenderViewHost()->GetRoutingID() == 351 resource.render_view_id && 352 (resource.threat_type == SB_THREAT_TYPE_URL_PHISHING || 353 resource.threat_type == SB_THREAT_TYPE_URL_MALWARE) && 354 web_contents()->GetController().GetActiveEntry()) { 355 unsafe_unique_page_id_ = 356 web_contents()->GetController().GetActiveEntry()->GetUniqueID(); 357 // We also keep the resource around in order to be able to send the 358 // malicious URL to the server. 359 unsafe_resource_.reset(new SafeBrowsingUIManager::UnsafeResource(resource)); 360 unsafe_resource_->callback.Reset(); // Don't do anything stupid. 361 } 362 } 363 364 void ClientSideDetectionHost::WebContentsDestroyed(WebContents* tab) { 365 DCHECK(tab); 366 // Tell any pending classification request that it is being canceled. 367 if (classification_request_.get()) { 368 classification_request_->Cancel(); 369 } 370 // Cancel all pending feature extractions. 371 feature_extractor_.reset(); 372 } 373 374 void ClientSideDetectionHost::OnPhishingDetectionDone( 375 const std::string& verdict_str) { 376 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 377 // There is something seriously wrong if there is no service class but 378 // this method is called. The renderer should not start phishing detection 379 // if there isn't any service class in the browser. 380 DCHECK(csd_service_); 381 // There shouldn't be any pending requests because we revoke them everytime 382 // we navigate away. 383 DCHECK(!weak_factory_.HasWeakPtrs()); 384 DCHECK(browse_info_.get()); 385 386 // We parse the protocol buffer here. If we're unable to parse it we won't 387 // send the verdict further. 388 scoped_ptr<ClientPhishingRequest> verdict(new ClientPhishingRequest); 389 if (csd_service_ && 390 !weak_factory_.HasWeakPtrs() && 391 browse_info_.get() && 392 verdict->ParseFromString(verdict_str) && 393 verdict->IsInitialized()) { 394 // We do the malware IP matching and request sending if the feature 395 // is enabled. 396 if (malware_report_enabled_ && !MalwareKillSwitchIsOn()) { 397 scoped_ptr<ClientMalwareRequest> malware_verdict( 398 new ClientMalwareRequest); 399 // Start browser-side malware feature extraction. Once we're done it will 400 // send the malware client verdict request. 401 malware_verdict->set_url(verdict->url()); 402 feature_extractor_->ExtractMalwareFeatures( 403 browse_info_.get(), malware_verdict.get()); 404 MalwareFeatureExtractionDone(malware_verdict.Pass()); 405 } 406 407 // We only send phishing verdict to the server if the verdict is phishing or 408 // if a SafeBrowsing interstitial was already shown for this site. E.g., a 409 // malware or phishing interstitial was shown but the user clicked 410 // through. 411 if (verdict->is_phishing() || DidShowSBInterstitial()) { 412 if (DidShowSBInterstitial()) { 413 browse_info_->unsafe_resource.reset(unsafe_resource_.release()); 414 } 415 // Start browser-side feature extraction. Once we're done it will send 416 // the client verdict request. 417 feature_extractor_->ExtractFeatures( 418 browse_info_.get(), 419 verdict.release(), 420 base::Bind(&ClientSideDetectionHost::FeatureExtractionDone, 421 weak_factory_.GetWeakPtr())); 422 } 423 } 424 browse_info_.reset(); 425 } 426 427 void ClientSideDetectionHost::MaybeShowPhishingWarning(GURL phishing_url, 428 bool is_phishing) { 429 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 430 VLOG(2) << "Received server phishing verdict for URL:" << phishing_url 431 << " is_phishing:" << is_phishing; 432 if (is_phishing) { 433 DCHECK(web_contents()); 434 if (ui_manager_.get()) { 435 SafeBrowsingUIManager::UnsafeResource resource; 436 resource.url = phishing_url; 437 resource.original_url = phishing_url; 438 resource.is_subresource = false; 439 resource.threat_type = SB_THREAT_TYPE_CLIENT_SIDE_PHISHING_URL; 440 resource.render_process_host_id = 441 web_contents()->GetRenderProcessHost()->GetID(); 442 resource.render_view_id = 443 web_contents()->GetRenderViewHost()->GetRoutingID(); 444 if (!ui_manager_->IsWhitelisted(resource)) { 445 // We need to stop any pending navigations, otherwise the interstital 446 // might not get created properly. 447 web_contents()->GetController().DiscardNonCommittedEntries(); 448 resource.callback = base::Bind(&EmptyUrlCheckCallback); 449 ui_manager_->DoDisplayBlockingPage(resource); 450 } 451 } 452 } 453 } 454 455 void ClientSideDetectionHost::FeatureExtractionDone( 456 bool success, 457 ClientPhishingRequest* request) { 458 if (!request) { 459 DLOG(FATAL) << "Invalid request object in FeatureExtractionDone"; 460 return; 461 } 462 VLOG(2) << "Feature extraction done (success:" << success << ") for URL: " 463 << request->url() << ". Start sending client phishing request."; 464 ClientSideDetectionService::ClientReportPhishingRequestCallback callback; 465 // If the client-side verdict isn't phishing we don't care about the server 466 // response because we aren't going to display a warning. 467 if (request->is_phishing()) { 468 callback = base::Bind(&ClientSideDetectionHost::MaybeShowPhishingWarning, 469 weak_factory_.GetWeakPtr()); 470 } 471 // Send ping even if the browser feature extraction failed. 472 csd_service_->SendClientReportPhishingRequest( 473 request, // The service takes ownership of the request object. 474 callback); 475 } 476 477 void ClientSideDetectionHost::MalwareFeatureExtractionDone( 478 scoped_ptr<ClientMalwareRequest> request) { 479 if (!request) { 480 DLOG(FATAL) << "Invalid request object in MalwareFeatureExtractionDone"; 481 return; 482 } 483 VLOG(2) << "Malware Feature extraction done for URL: " << request->url() 484 << ", with features count:" << request->feature_map_size(); 485 486 // Send ping if there is matching features. 487 if (request->feature_map_size() > 0) { 488 VLOG(1) << "Start sending client malware request."; 489 ClientSideDetectionService::ClientReportMalwareRequestCallback callback; 490 csd_service_->SendClientReportMalwareRequest( 491 request.release(), // The service takes ownership of the request object 492 callback); // no action after request sent for now 493 } 494 } 495 496 void ClientSideDetectionHost::UpdateIPUrlMap(const std::string& ip, 497 const std::string& url) { 498 if (ip.empty() || url.empty()) 499 return; 500 501 IPUrlMap::iterator it = browse_info_->ips.find(ip); 502 if (it == browse_info_->ips.end()) { 503 if (int(browse_info_->ips.size()) < kMaxIPsPerBrowse) { 504 std::set<std::string> urls; 505 urls.insert(url); 506 browse_info_->ips.insert(make_pair(ip, urls)); 507 } 508 } else if (int(it->second.size()) < kMaxUrlsPerIP) { 509 it->second.insert(url); 510 } 511 } 512 513 void ClientSideDetectionHost::Observe( 514 int type, 515 const content::NotificationSource& source, 516 const content::NotificationDetails& details) { 517 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 518 DCHECK_EQ(type, content::NOTIFICATION_RESOURCE_RESPONSE_STARTED); 519 const ResourceRequestDetails* req = content::Details<ResourceRequestDetails>( 520 details).ptr(); 521 if (req && browse_info_.get() && malware_report_enabled_ && 522 !MalwareKillSwitchIsOn()) { 523 if (req->url.is_valid()) { 524 UpdateIPUrlMap(req->socket_address.host() /* ip */, 525 req->url.spec() /* url */); 526 } 527 } 528 } 529 530 bool ClientSideDetectionHost::DidShowSBInterstitial() { 531 if (unsafe_unique_page_id_ <= 0 || !web_contents()) { 532 return false; 533 } 534 const NavigationEntry* nav_entry = 535 web_contents()->GetController().GetActiveEntry(); 536 return (nav_entry && nav_entry->GetUniqueID() == unsafe_unique_page_id_); 537 } 538 539 void ClientSideDetectionHost::set_client_side_detection_service( 540 ClientSideDetectionService* service) { 541 csd_service_ = service; 542 } 543 544 void ClientSideDetectionHost::set_safe_browsing_managers( 545 SafeBrowsingUIManager* ui_manager, 546 SafeBrowsingDatabaseManager* database_manager) { 547 if (ui_manager_.get()) 548 ui_manager_->RemoveObserver(this); 549 550 ui_manager_ = ui_manager; 551 if (ui_manager) 552 ui_manager_->AddObserver(this); 553 554 database_manager_ = database_manager; 555 } 556 557 bool ClientSideDetectionHost::MalwareKillSwitchIsOn() { 558 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 559 return malware_killswitch_on_; 560 } 561 562 void ClientSideDetectionHost::SetMalwareKillSwitch(bool killswitch_on) { 563 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 564 malware_killswitch_on_ = killswitch_on; 565 } 566 567 } // namespace safe_browsing 568