1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "chrome/browser/safe_browsing/client_side_detection_host.h" 6 7 #include <vector> 8 9 #include "base/command_line.h" 10 #include "base/logging.h" 11 #include "base/memory/ref_counted.h" 12 #include "base/memory/scoped_ptr.h" 13 #include "base/metrics/histogram.h" 14 #include "base/task.h" 15 #include "chrome/browser/browser_process.h" 16 #include "chrome/browser/profiles/profile.h" 17 #include "chrome/browser/safe_browsing/client_side_detection_service.h" 18 #include "chrome/browser/safe_browsing/safe_browsing_service.h" 19 #include "chrome/common/chrome_switches.h" 20 #include "chrome/common/safe_browsing/csd.pb.h" 21 #include "chrome/common/safe_browsing/safebrowsing_messages.h" 22 #include "content/browser/browser_thread.h" 23 #include "content/browser/renderer_host/render_process_host.h" 24 #include "content/browser/renderer_host/render_view_host.h" 25 #include "content/browser/renderer_host/resource_dispatcher_host.h" 26 #include "content/browser/tab_contents/navigation_controller.h" 27 #include "content/browser/tab_contents/tab_contents.h" 28 #include "content/common/notification_service.h" 29 #include "content/common/notification_type.h" 30 #include "content/common/view_messages.h" 31 #include "googleurl/src/gurl.h" 32 33 namespace safe_browsing { 34 35 // This class is instantiated each time a new toplevel URL loads, and 36 // asynchronously checks whether the phishing classifier should run for this 37 // URL. If so, it notifies the renderer with a StartPhishingDetection IPC. 38 // Objects of this class are ref-counted and will be destroyed once nobody 39 // uses it anymore. If |tab_contents|, |csd_service| or |host| go away you need 40 // to call Cancel(). We keep the |sb_service| alive in a ref pointer for as 41 // long as it takes. 42 class ClientSideDetectionHost::ShouldClassifyUrlRequest 43 : public base::RefCountedThreadSafe< 44 ClientSideDetectionHost::ShouldClassifyUrlRequest> { 45 public: 46 ShouldClassifyUrlRequest(const ViewHostMsg_FrameNavigate_Params& params, 47 TabContents* tab_contents, 48 ClientSideDetectionService* csd_service, 49 SafeBrowsingService* sb_service, 50 ClientSideDetectionHost* host) 51 : canceled_(false), 52 params_(params), 53 tab_contents_(tab_contents), 54 csd_service_(csd_service), 55 sb_service_(sb_service), 56 host_(host) { 57 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 58 DCHECK(tab_contents_); 59 DCHECK(csd_service_); 60 DCHECK(sb_service_); 61 DCHECK(host_); 62 } 63 64 void Start() { 65 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 66 67 // We start by doing some simple checks that can run on the UI thread. 68 UMA_HISTOGRAM_COUNTS("SBClientPhishing.ClassificationStart", 1); 69 70 // Only classify [X]HTML documents. 71 if (params_.contents_mime_type != "text/html" && 72 params_.contents_mime_type != "application/xhtml+xml") { 73 VLOG(1) << "Skipping phishing classification for URL: " << params_.url 74 << " because it has an unsupported MIME type: " 75 << params_.contents_mime_type; 76 UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail", 77 NO_CLASSIFY_UNSUPPORTED_MIME_TYPE, 78 NO_CLASSIFY_MAX); 79 return; 80 } 81 82 // Don't run the phishing classifier if the URL came from a private 83 // network, since we don't want to ping back in this case. We also need 84 // to check whether the connection was proxied -- if so, we won't have the 85 // correct remote IP address, and will skip phishing classification. 86 if (params_.was_fetched_via_proxy) { 87 VLOG(1) << "Skipping phishing classification for URL: " << params_.url 88 << " because it was fetched via a proxy."; 89 UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail", 90 NO_CLASSIFY_PROXY_FETCH, 91 NO_CLASSIFY_MAX); 92 return; 93 } 94 if (csd_service_->IsPrivateIPAddress(params_.socket_address.host())) { 95 VLOG(1) << "Skipping phishing classification for URL: " << params_.url 96 << " because of hosting on private IP: " 97 << params_.socket_address.host(); 98 UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail", 99 NO_CLASSIFY_PRIVATE_IP, 100 NO_CLASSIFY_MAX); 101 return; 102 } 103 104 // Don't run the phishing classifier if the tab is incognito. 105 if (tab_contents_->profile()->IsOffTheRecord()) { 106 VLOG(1) << "Skipping phishing classification for URL: " << params_.url 107 << " because we're browsing incognito."; 108 UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail", 109 NO_CLASSIFY_OFF_THE_RECORD, 110 NO_CLASSIFY_MAX); 111 112 return; 113 } 114 115 // We lookup the csd-whitelist before we lookup the cache because 116 // a URL may have recently been whitelisted. If the URL matches 117 // the csd-whitelist we won't start classification. The 118 // csd-whitelist check has to be done on the IO thread because it 119 // uses the SafeBrowsing service class. 120 BrowserThread::PostTask( 121 BrowserThread::IO, 122 FROM_HERE, 123 NewRunnableMethod(this, 124 &ShouldClassifyUrlRequest::CheckCsdWhitelist, 125 params_.url)); 126 } 127 128 void Cancel() { 129 canceled_ = true; 130 // Just to make sure we don't do anything stupid we reset all these 131 // pointers except for the safebrowsing service class which may be 132 // accessed by CheckCsdWhitelist(). 133 tab_contents_ = NULL; 134 csd_service_ = NULL; 135 host_ = NULL; 136 } 137 138 private: 139 friend class base::RefCountedThreadSafe< 140 ClientSideDetectionHost::ShouldClassifyUrlRequest>; 141 142 // Enum used to keep stats about why the pre-classification check failed. 143 enum PreClassificationCheckFailures { 144 NO_CLASSIFY_PROXY_FETCH, 145 NO_CLASSIFY_PRIVATE_IP, 146 NO_CLASSIFY_OFF_THE_RECORD, 147 NO_CLASSIFY_MATCH_CSD_WHITELIST, 148 NO_CLASSIFY_TOO_MANY_REPORTS, 149 NO_CLASSIFY_UNSUPPORTED_MIME_TYPE, 150 151 NO_CLASSIFY_MAX // Always add new values before this one. 152 }; 153 154 // The destructor can be called either from the UI or the IO thread. 155 virtual ~ShouldClassifyUrlRequest() { } 156 157 void CheckCsdWhitelist(const GURL& url) { 158 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 159 if (!sb_service_ || sb_service_->MatchCsdWhitelistUrl(url)) { 160 // We're done. There is no point in going back to the UI thread. 161 UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail", 162 NO_CLASSIFY_MATCH_CSD_WHITELIST, 163 NO_CLASSIFY_MAX); 164 return; 165 } 166 167 BrowserThread::PostTask( 168 BrowserThread::UI, 169 FROM_HERE, 170 NewRunnableMethod(this, 171 &ShouldClassifyUrlRequest::CheckCache)); 172 } 173 174 void CheckCache() { 175 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 176 if (canceled_) { 177 return; 178 } 179 180 // If result is cached, we don't want to run classification again 181 bool is_phishing; 182 if (csd_service_->GetValidCachedResult(params_.url, &is_phishing)) { 183 VLOG(1) << "Satisfying request for " << params_.url << " from cache"; 184 UMA_HISTOGRAM_COUNTS("SBClientPhishing.RequestSatisfiedFromCache", 1); 185 // Since we are already on the UI thread, this is safe. 186 host_->MaybeShowPhishingWarning(params_.url, is_phishing); 187 return; 188 } 189 190 // We want to limit the number of requests, though we will ignore the 191 // limit for urls in the cache. We don't want to start classifying 192 // too many pages as phishing, but for those that we already think are 193 // phishing we want to give ourselves a chance to fix false positives. 194 if (csd_service_->IsInCache(params_.url)) { 195 VLOG(1) << "Reporting limit skipped for " << params_.url 196 << " as it was in the cache."; 197 UMA_HISTOGRAM_COUNTS("SBClientPhishing.ReportLimitSkipped", 1); 198 } else if (csd_service_->OverReportLimit()) { 199 VLOG(1) << "Too many report phishing requests sent recently, " 200 << "not running classification for " << params_.url; 201 UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail", 202 NO_CLASSIFY_TOO_MANY_REPORTS, 203 NO_CLASSIFY_MAX); 204 return; 205 } 206 207 // Everything checks out, so start classification. 208 // |tab_contents_| is safe to call as we will be destructed 209 // before it is. 210 RenderViewHost* rvh = tab_contents_->render_view_host(); 211 rvh->Send(new SafeBrowsingMsg_StartPhishingDetection( 212 rvh->routing_id(), params_.url)); 213 } 214 215 // No need to protect |canceled_| with a lock because it is only read and 216 // written by the UI thread. 217 bool canceled_; 218 ViewHostMsg_FrameNavigate_Params params_; 219 TabContents* tab_contents_; 220 ClientSideDetectionService* csd_service_; 221 // We keep a ref pointer here just to make sure the service class stays alive 222 // long enough. 223 scoped_refptr<SafeBrowsingService> sb_service_; 224 ClientSideDetectionHost* host_; 225 226 DISALLOW_COPY_AND_ASSIGN(ShouldClassifyUrlRequest); 227 }; 228 229 // This class is used to display the phishing interstitial. 230 class CsdClient : public SafeBrowsingService::Client { 231 public: 232 CsdClient() {} 233 234 // Method from SafeBrowsingService::Client. This method is called on the 235 // IO thread once the interstitial is going away. This method simply deletes 236 // the CsdClient object. 237 virtual void OnBlockingPageComplete(bool proceed) { 238 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 239 // Delete this on the UI thread since it was created there. 240 BrowserThread::PostTask(BrowserThread::UI, 241 FROM_HERE, 242 new DeleteTask<CsdClient>(this)); 243 } 244 245 private: 246 friend class DeleteTask<CsdClient>; // Calls the private destructor. 247 248 // We're taking care of deleting this object. No-one else should delete 249 // this object. 250 virtual ~CsdClient() {} 251 252 DISALLOW_COPY_AND_ASSIGN(CsdClient); 253 }; 254 255 ClientSideDetectionHost::ClientSideDetectionHost(TabContents* tab) 256 : TabContentsObserver(tab), 257 csd_service_(g_browser_process->safe_browsing_detection_service()), 258 cb_factory_(ALLOW_THIS_IN_INITIALIZER_LIST(this)) { 259 DCHECK(tab); 260 // Note: csd_service_ and sb_service_ might be NULL. 261 ResourceDispatcherHost* resource = 262 g_browser_process->resource_dispatcher_host(); 263 if (resource) { 264 sb_service_ = resource->safe_browsing_service(); 265 } 266 } 267 268 ClientSideDetectionHost::~ClientSideDetectionHost() { 269 // Tell any pending classification request that it is being canceled. 270 if (classification_request_.get()) { 271 classification_request_->Cancel(); 272 } 273 } 274 275 bool ClientSideDetectionHost::OnMessageReceived(const IPC::Message& message) { 276 bool handled = true; 277 IPC_BEGIN_MESSAGE_MAP(ClientSideDetectionHost, message) 278 IPC_MESSAGE_HANDLER(SafeBrowsingHostMsg_DetectedPhishingSite, 279 OnDetectedPhishingSite) 280 IPC_MESSAGE_UNHANDLED(handled = false) 281 IPC_END_MESSAGE_MAP() 282 return handled; 283 } 284 285 void ClientSideDetectionHost::DidNavigateMainFramePostCommit( 286 const NavigationController::LoadCommittedDetails& details, 287 const ViewHostMsg_FrameNavigate_Params& params) { 288 // TODO(noelutz): move this DCHECK to TabContents and fix all the unit tests 289 // that don't call this method on the UI thread. 290 // DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 291 292 if (details.is_in_page) { 293 // If the navigation is within the same page, the user isn't really 294 // navigating away. We don't need to cancel a pending callback or 295 // begin a new classification. 296 return; 297 } 298 299 // If we navigate away and there currently is a pending phishing 300 // report request we have to cancel it to make sure we don't display 301 // an interstitial for the wrong page. Note that this won't cancel 302 // the server ping back but only cancel the showing of the 303 // interstial. 304 cb_factory_.RevokeAll(); 305 306 if (csd_service_) { 307 // Cancel any pending classification request. 308 if (classification_request_.get()) { 309 classification_request_->Cancel(); 310 } 311 312 // Notify the renderer if it should classify this URL. 313 classification_request_ = new ShouldClassifyUrlRequest(params, 314 tab_contents(), 315 csd_service_, 316 sb_service_, 317 this); 318 classification_request_->Start(); 319 } 320 } 321 322 void ClientSideDetectionHost::OnDetectedPhishingSite( 323 const std::string& verdict_str) { 324 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 325 // There is something seriously wrong if there is no service class but 326 // this method is called. The renderer should not start phishing detection 327 // if there isn't any service class in the browser. 328 DCHECK(csd_service_); 329 // We parse the protocol buffer here. If we're unable to parse it we won't 330 // send the verdict further. 331 scoped_ptr<ClientPhishingRequest> verdict(new ClientPhishingRequest); 332 if (csd_service_ && 333 verdict->ParseFromString(verdict_str) && 334 verdict->IsInitialized()) { 335 // There shouldn't be any pending requests because we revoke them everytime 336 // we navigate away. 337 DCHECK(!cb_factory_.HasPendingCallbacks()); 338 csd_service_->SendClientReportPhishingRequest( 339 verdict.release(), // The service takes ownership of the verdict. 340 cb_factory_.NewCallback( 341 &ClientSideDetectionHost::MaybeShowPhishingWarning)); 342 } 343 } 344 345 void ClientSideDetectionHost::MaybeShowPhishingWarning(GURL phishing_url, 346 bool is_phishing) { 347 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 348 if (is_phishing && 349 CommandLine::ForCurrentProcess()->HasSwitch( 350 switches::kEnableClientSidePhishingInterstitial)) { 351 DCHECK(tab_contents()); 352 // TODO(noelutz): this is not perfect. It's still possible that the 353 // user browses away before the interstitial is shown. Maybe we should 354 // stop all pending navigations? 355 if (sb_service_) { 356 // TODO(noelutz): refactor the SafeBrowsing service class and the 357 // SafeBrowsing blocking page class so that we don't need to depend 358 // on the SafeBrowsingService here and so that we don't need to go 359 // through the IO message loop. 360 std::vector<GURL> redirect_urls; 361 BrowserThread::PostTask( 362 BrowserThread::IO, 363 FROM_HERE, 364 NewRunnableMethod(sb_service_.get(), 365 &SafeBrowsingService::DisplayBlockingPage, 366 phishing_url, phishing_url, 367 redirect_urls, 368 // We only classify the main frame URL. 369 ResourceType::MAIN_FRAME, 370 // TODO(noelutz): create a separate threat type 371 // for client-side phishing detection. 372 SafeBrowsingService::URL_PHISHING, 373 new CsdClient() /* will delete itself */, 374 tab_contents()->GetRenderProcessHost()->id(), 375 tab_contents()->render_view_host()->routing_id())); 376 } 377 } 378 } 379 380 void ClientSideDetectionHost::set_client_side_detection_service( 381 ClientSideDetectionService* service) { 382 csd_service_ = service; 383 } 384 385 void ClientSideDetectionHost::set_safe_browsing_service( 386 SafeBrowsingService* service) { 387 sb_service_ = service; 388 } 389 390 } // namespace safe_browsing 391