Home | History | Annotate | Download | only in safe_browsing
      1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "chrome/browser/safe_browsing/client_side_detection_host.h"
      6 
      7 #include <vector>
      8 
      9 #include "base/command_line.h"
     10 #include "base/logging.h"
     11 #include "base/memory/ref_counted.h"
     12 #include "base/memory/scoped_ptr.h"
     13 #include "base/metrics/histogram.h"
     14 #include "base/task.h"
     15 #include "chrome/browser/browser_process.h"
     16 #include "chrome/browser/profiles/profile.h"
     17 #include "chrome/browser/safe_browsing/client_side_detection_service.h"
     18 #include "chrome/browser/safe_browsing/safe_browsing_service.h"
     19 #include "chrome/common/chrome_switches.h"
     20 #include "chrome/common/safe_browsing/csd.pb.h"
     21 #include "chrome/common/safe_browsing/safebrowsing_messages.h"
     22 #include "content/browser/browser_thread.h"
     23 #include "content/browser/renderer_host/render_process_host.h"
     24 #include "content/browser/renderer_host/render_view_host.h"
     25 #include "content/browser/renderer_host/resource_dispatcher_host.h"
     26 #include "content/browser/tab_contents/navigation_controller.h"
     27 #include "content/browser/tab_contents/tab_contents.h"
     28 #include "content/common/notification_service.h"
     29 #include "content/common/notification_type.h"
     30 #include "content/common/view_messages.h"
     31 #include "googleurl/src/gurl.h"
     32 
     33 namespace safe_browsing {
     34 
     35 // This class is instantiated each time a new toplevel URL loads, and
     36 // asynchronously checks whether the phishing classifier should run for this
     37 // URL.  If so, it notifies the renderer with a StartPhishingDetection IPC.
     38 // Objects of this class are ref-counted and will be destroyed once nobody
     39 // uses it anymore.  If |tab_contents|, |csd_service| or |host| go away you need
     40 // to call Cancel().  We keep the |sb_service| alive in a ref pointer for as
     41 // long as it takes.
     42 class ClientSideDetectionHost::ShouldClassifyUrlRequest
     43     : public base::RefCountedThreadSafe<
     44           ClientSideDetectionHost::ShouldClassifyUrlRequest> {
     45  public:
     46   ShouldClassifyUrlRequest(const ViewHostMsg_FrameNavigate_Params& params,
     47                            TabContents* tab_contents,
     48                            ClientSideDetectionService* csd_service,
     49                            SafeBrowsingService* sb_service,
     50                            ClientSideDetectionHost* host)
     51       : canceled_(false),
     52         params_(params),
     53         tab_contents_(tab_contents),
     54         csd_service_(csd_service),
     55         sb_service_(sb_service),
     56         host_(host) {
     57     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
     58     DCHECK(tab_contents_);
     59     DCHECK(csd_service_);
     60     DCHECK(sb_service_);
     61     DCHECK(host_);
     62   }
     63 
     64   void Start() {
     65     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
     66 
     67     // We start by doing some simple checks that can run on the UI thread.
     68     UMA_HISTOGRAM_COUNTS("SBClientPhishing.ClassificationStart", 1);
     69 
     70     // Only classify [X]HTML documents.
     71     if (params_.contents_mime_type != "text/html" &&
     72         params_.contents_mime_type != "application/xhtml+xml") {
     73       VLOG(1) << "Skipping phishing classification for URL: " << params_.url
     74               << " because it has an unsupported MIME type: "
     75               << params_.contents_mime_type;
     76       UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",
     77                                 NO_CLASSIFY_UNSUPPORTED_MIME_TYPE,
     78                                 NO_CLASSIFY_MAX);
     79       return;
     80     }
     81 
     82     // Don't run the phishing classifier if the URL came from a private
     83     // network, since we don't want to ping back in this case.  We also need
     84     // to check whether the connection was proxied -- if so, we won't have the
     85     // correct remote IP address, and will skip phishing classification.
     86     if (params_.was_fetched_via_proxy) {
     87       VLOG(1) << "Skipping phishing classification for URL: " << params_.url
     88               << " because it was fetched via a proxy.";
     89       UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",
     90                                 NO_CLASSIFY_PROXY_FETCH,
     91                                 NO_CLASSIFY_MAX);
     92       return;
     93     }
     94     if (csd_service_->IsPrivateIPAddress(params_.socket_address.host())) {
     95       VLOG(1) << "Skipping phishing classification for URL: " << params_.url
     96               << " because of hosting on private IP: "
     97               << params_.socket_address.host();
     98       UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",
     99                                 NO_CLASSIFY_PRIVATE_IP,
    100                                 NO_CLASSIFY_MAX);
    101       return;
    102     }
    103 
    104     // Don't run the phishing classifier if the tab is incognito.
    105     if (tab_contents_->profile()->IsOffTheRecord()) {
    106       VLOG(1) << "Skipping phishing classification for URL: " << params_.url
    107               << " because we're browsing incognito.";
    108       UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",
    109                                 NO_CLASSIFY_OFF_THE_RECORD,
    110                                 NO_CLASSIFY_MAX);
    111 
    112       return;
    113     }
    114 
    115     // We lookup the csd-whitelist before we lookup the cache because
    116     // a URL may have recently been whitelisted.  If the URL matches
    117     // the csd-whitelist we won't start classification.  The
    118     // csd-whitelist check has to be done on the IO thread because it
    119     // uses the SafeBrowsing service class.
    120     BrowserThread::PostTask(
    121         BrowserThread::IO,
    122         FROM_HERE,
    123         NewRunnableMethod(this,
    124                           &ShouldClassifyUrlRequest::CheckCsdWhitelist,
    125                           params_.url));
    126   }
    127 
    128   void Cancel() {
    129     canceled_ = true;
    130     // Just to make sure we don't do anything stupid we reset all these
    131     // pointers except for the safebrowsing service class which may be
    132     // accessed by CheckCsdWhitelist().
    133     tab_contents_ = NULL;
    134     csd_service_ = NULL;
    135     host_ = NULL;
    136   }
    137 
    138  private:
    139   friend class base::RefCountedThreadSafe<
    140       ClientSideDetectionHost::ShouldClassifyUrlRequest>;
    141 
    142   // Enum used to keep stats about why the pre-classification check failed.
    143   enum PreClassificationCheckFailures {
    144     NO_CLASSIFY_PROXY_FETCH,
    145     NO_CLASSIFY_PRIVATE_IP,
    146     NO_CLASSIFY_OFF_THE_RECORD,
    147     NO_CLASSIFY_MATCH_CSD_WHITELIST,
    148     NO_CLASSIFY_TOO_MANY_REPORTS,
    149     NO_CLASSIFY_UNSUPPORTED_MIME_TYPE,
    150 
    151     NO_CLASSIFY_MAX  // Always add new values before this one.
    152   };
    153 
    154   // The destructor can be called either from the UI or the IO thread.
    155   virtual ~ShouldClassifyUrlRequest() { }
    156 
    157   void CheckCsdWhitelist(const GURL& url) {
    158     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
    159     if (!sb_service_ || sb_service_->MatchCsdWhitelistUrl(url)) {
    160       // We're done.  There is no point in going back to the UI thread.
    161       UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",
    162                                 NO_CLASSIFY_MATCH_CSD_WHITELIST,
    163                                 NO_CLASSIFY_MAX);
    164       return;
    165     }
    166 
    167     BrowserThread::PostTask(
    168         BrowserThread::UI,
    169         FROM_HERE,
    170         NewRunnableMethod(this,
    171                           &ShouldClassifyUrlRequest::CheckCache));
    172   }
    173 
    174   void CheckCache() {
    175     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    176     if (canceled_) {
    177       return;
    178     }
    179 
    180     // If result is cached, we don't want to run classification again
    181     bool is_phishing;
    182     if (csd_service_->GetValidCachedResult(params_.url, &is_phishing)) {
    183       VLOG(1) << "Satisfying request for " << params_.url << " from cache";
    184       UMA_HISTOGRAM_COUNTS("SBClientPhishing.RequestSatisfiedFromCache", 1);
    185       // Since we are already on the UI thread, this is safe.
    186       host_->MaybeShowPhishingWarning(params_.url, is_phishing);
    187       return;
    188     }
    189 
    190     // We want to limit the number of requests, though we will ignore the
    191     // limit for urls in the cache.  We don't want to start classifying
    192     // too many pages as phishing, but for those that we already think are
    193     // phishing we want to give ourselves a chance to fix false positives.
    194     if (csd_service_->IsInCache(params_.url)) {
    195       VLOG(1) << "Reporting limit skipped for " << params_.url
    196               << " as it was in the cache.";
    197       UMA_HISTOGRAM_COUNTS("SBClientPhishing.ReportLimitSkipped", 1);
    198     } else if (csd_service_->OverReportLimit()) {
    199       VLOG(1) << "Too many report phishing requests sent recently, "
    200               << "not running classification for " << params_.url;
    201       UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",
    202                                 NO_CLASSIFY_TOO_MANY_REPORTS,
    203                                 NO_CLASSIFY_MAX);
    204       return;
    205     }
    206 
    207     // Everything checks out, so start classification.
    208     // |tab_contents_| is safe to call as we will be destructed
    209     // before it is.
    210     RenderViewHost* rvh = tab_contents_->render_view_host();
    211     rvh->Send(new SafeBrowsingMsg_StartPhishingDetection(
    212         rvh->routing_id(), params_.url));
    213   }
    214 
    215   // No need to protect |canceled_| with a lock because it is only read and
    216   // written by the UI thread.
    217   bool canceled_;
    218   ViewHostMsg_FrameNavigate_Params params_;
    219   TabContents* tab_contents_;
    220   ClientSideDetectionService* csd_service_;
    221   // We keep a ref pointer here just to make sure the service class stays alive
    222   // long enough.
    223   scoped_refptr<SafeBrowsingService> sb_service_;
    224   ClientSideDetectionHost* host_;
    225 
    226   DISALLOW_COPY_AND_ASSIGN(ShouldClassifyUrlRequest);
    227 };
    228 
    229 // This class is used to display the phishing interstitial.
    230 class CsdClient : public SafeBrowsingService::Client {
    231  public:
    232   CsdClient() {}
    233 
    234   // Method from SafeBrowsingService::Client.  This method is called on the
    235   // IO thread once the interstitial is going away.  This method simply deletes
    236   // the CsdClient object.
    237   virtual void OnBlockingPageComplete(bool proceed) {
    238     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
    239     // Delete this on the UI thread since it was created there.
    240     BrowserThread::PostTask(BrowserThread::UI,
    241                             FROM_HERE,
    242                             new DeleteTask<CsdClient>(this));
    243   }
    244 
    245  private:
    246   friend class DeleteTask<CsdClient>;  // Calls the private destructor.
    247 
    248   // We're taking care of deleting this object.  No-one else should delete
    249   // this object.
    250   virtual ~CsdClient() {}
    251 
    252   DISALLOW_COPY_AND_ASSIGN(CsdClient);
    253 };
    254 
    255 ClientSideDetectionHost::ClientSideDetectionHost(TabContents* tab)
    256     : TabContentsObserver(tab),
    257       csd_service_(g_browser_process->safe_browsing_detection_service()),
    258       cb_factory_(ALLOW_THIS_IN_INITIALIZER_LIST(this)) {
    259   DCHECK(tab);
    260   // Note: csd_service_ and sb_service_ might be NULL.
    261   ResourceDispatcherHost* resource =
    262       g_browser_process->resource_dispatcher_host();
    263   if (resource) {
    264     sb_service_ = resource->safe_browsing_service();
    265   }
    266 }
    267 
    268 ClientSideDetectionHost::~ClientSideDetectionHost() {
    269   // Tell any pending classification request that it is being canceled.
    270   if (classification_request_.get()) {
    271     classification_request_->Cancel();
    272   }
    273 }
    274 
    275 bool ClientSideDetectionHost::OnMessageReceived(const IPC::Message& message) {
    276   bool handled = true;
    277   IPC_BEGIN_MESSAGE_MAP(ClientSideDetectionHost, message)
    278     IPC_MESSAGE_HANDLER(SafeBrowsingHostMsg_DetectedPhishingSite,
    279                         OnDetectedPhishingSite)
    280     IPC_MESSAGE_UNHANDLED(handled = false)
    281   IPC_END_MESSAGE_MAP()
    282   return handled;
    283 }
    284 
    285 void ClientSideDetectionHost::DidNavigateMainFramePostCommit(
    286     const NavigationController::LoadCommittedDetails& details,
    287     const ViewHostMsg_FrameNavigate_Params& params) {
    288   // TODO(noelutz): move this DCHECK to TabContents and fix all the unit tests
    289   // that don't call this method on the UI thread.
    290   // DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    291 
    292   if (details.is_in_page) {
    293     // If the navigation is within the same page, the user isn't really
    294     // navigating away.  We don't need to cancel a pending callback or
    295     // begin a new classification.
    296     return;
    297   }
    298 
    299   // If we navigate away and there currently is a pending phishing
    300   // report request we have to cancel it to make sure we don't display
    301   // an interstitial for the wrong page.  Note that this won't cancel
    302   // the server ping back but only cancel the showing of the
    303   // interstial.
    304   cb_factory_.RevokeAll();
    305 
    306   if (csd_service_) {
    307     // Cancel any pending classification request.
    308     if (classification_request_.get()) {
    309       classification_request_->Cancel();
    310     }
    311 
    312     // Notify the renderer if it should classify this URL.
    313     classification_request_ = new ShouldClassifyUrlRequest(params,
    314                                                            tab_contents(),
    315                                                            csd_service_,
    316                                                            sb_service_,
    317                                                            this);
    318     classification_request_->Start();
    319   }
    320 }
    321 
    322 void ClientSideDetectionHost::OnDetectedPhishingSite(
    323     const std::string& verdict_str) {
    324   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    325   // There is something seriously wrong if there is no service class but
    326   // this method is called.  The renderer should not start phishing detection
    327   // if there isn't any service class in the browser.
    328   DCHECK(csd_service_);
    329   // We parse the protocol buffer here.  If we're unable to parse it we won't
    330   // send the verdict further.
    331   scoped_ptr<ClientPhishingRequest> verdict(new ClientPhishingRequest);
    332   if (csd_service_ &&
    333       verdict->ParseFromString(verdict_str) &&
    334       verdict->IsInitialized()) {
    335     // There shouldn't be any pending requests because we revoke them everytime
    336     // we navigate away.
    337     DCHECK(!cb_factory_.HasPendingCallbacks());
    338     csd_service_->SendClientReportPhishingRequest(
    339         verdict.release(),  // The service takes ownership of the verdict.
    340         cb_factory_.NewCallback(
    341             &ClientSideDetectionHost::MaybeShowPhishingWarning));
    342   }
    343 }
    344 
    345 void ClientSideDetectionHost::MaybeShowPhishingWarning(GURL phishing_url,
    346                                                        bool is_phishing) {
    347   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    348   if (is_phishing &&
    349       CommandLine::ForCurrentProcess()->HasSwitch(
    350           switches::kEnableClientSidePhishingInterstitial)) {
    351     DCHECK(tab_contents());
    352     // TODO(noelutz): this is not perfect.  It's still possible that the
    353     // user browses away before the interstitial is shown.  Maybe we should
    354     // stop all pending navigations?
    355     if (sb_service_) {
    356       // TODO(noelutz): refactor the SafeBrowsing service class and the
    357       // SafeBrowsing blocking page class so that we don't need to depend
    358       // on the SafeBrowsingService here and so that we don't need to go
    359       // through the IO message loop.
    360       std::vector<GURL> redirect_urls;
    361       BrowserThread::PostTask(
    362           BrowserThread::IO,
    363           FROM_HERE,
    364           NewRunnableMethod(sb_service_.get(),
    365                             &SafeBrowsingService::DisplayBlockingPage,
    366                             phishing_url, phishing_url,
    367                             redirect_urls,
    368                             // We only classify the main frame URL.
    369                             ResourceType::MAIN_FRAME,
    370                             // TODO(noelutz): create a separate threat type
    371                             // for client-side phishing detection.
    372                             SafeBrowsingService::URL_PHISHING,
    373                             new CsdClient() /* will delete itself */,
    374                             tab_contents()->GetRenderProcessHost()->id(),
    375                             tab_contents()->render_view_host()->routing_id()));
    376     }
    377   }
    378 }
    379 
    380 void ClientSideDetectionHost::set_client_side_detection_service(
    381     ClientSideDetectionService* service) {
    382   csd_service_ = service;
    383 }
    384 
    385 void ClientSideDetectionHost::set_safe_browsing_service(
    386     SafeBrowsingService* service) {
    387   sb_service_ = service;
    388 }
    389 
    390 }  // namespace safe_browsing
    391