Home | History | Annotate | Download | only in safe_browsing
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "chrome/browser/safe_browsing/client_side_detection_host.h"
      6 
      7 #include <vector>
      8 
      9 #include "base/logging.h"
     10 #include "base/memory/ref_counted.h"
     11 #include "base/memory/scoped_ptr.h"
     12 #include "base/metrics/histogram.h"
     13 #include "base/prefs/pref_service.h"
     14 #include "base/sequenced_task_runner_helpers.h"
     15 #include "base/strings/utf_string_conversions.h"
     16 #include "chrome/browser/browser_process.h"
     17 #include "chrome/browser/profiles/profile.h"
     18 #include "chrome/browser/safe_browsing/browser_feature_extractor.h"
     19 #include "chrome/browser/safe_browsing/client_side_detection_service.h"
     20 #include "chrome/browser/safe_browsing/database_manager.h"
     21 #include "chrome/browser/safe_browsing/safe_browsing_service.h"
     22 #include "chrome/common/pref_names.h"
     23 #include "chrome/common/safe_browsing/csd.pb.h"
     24 #include "chrome/common/safe_browsing/safebrowsing_messages.h"
     25 #include "content/public/browser/browser_thread.h"
     26 #include "content/public/browser/navigation_controller.h"
     27 #include "content/public/browser/navigation_details.h"
     28 #include "content/public/browser/navigation_entry.h"
     29 #include "content/public/browser/notification_details.h"
     30 #include "content/public/browser/notification_source.h"
     31 #include "content/public/browser/notification_types.h"
     32 #include "content/public/browser/render_process_host.h"
     33 #include "content/public/browser/render_view_host.h"
     34 #include "content/public/browser/resource_request_details.h"
     35 #include "content/public/browser/web_contents.h"
     36 #include "content/public/common/frame_navigate_params.h"
     37 #include "content/public/common/url_constants.h"
     38 #include "url/gurl.h"
     39 
     40 using content::BrowserThread;
     41 using content::NavigationEntry;
     42 using content::ResourceRequestDetails;
     43 using content::WebContents;
     44 
     45 namespace safe_browsing {
     46 
     47 const size_t ClientSideDetectionHost::kMaxUrlsPerIP = 20;
     48 const size_t ClientSideDetectionHost::kMaxIPsPerBrowse = 200;
     49 
     50 const char kSafeBrowsingMatchKey[] = "safe_browsing_match";
     51 
     52 typedef base::Callback<void(bool)> ShouldClassifyUrlCallback;
     53 
     54 // This class is instantiated each time a new toplevel URL loads, and
     55 // asynchronously checks whether the malware and phishing classifiers should run
     56 // for this URL.  If so, it notifies the host class by calling the provided
     57 // callback form the UI thread.  Objects of this class are ref-counted and will
     58 // be destroyed once nobody uses it anymore.  If |web_contents|, |csd_service|
     59 // or |host| go away you need to call Cancel().  We keep the |database_manager|
     60 // alive in a ref pointer for as long as it takes.
     61 class ClientSideDetectionHost::ShouldClassifyUrlRequest
     62     : public base::RefCountedThreadSafe<
     63           ClientSideDetectionHost::ShouldClassifyUrlRequest> {
     64  public:
     65   ShouldClassifyUrlRequest(
     66       const content::FrameNavigateParams& params,
     67       const ShouldClassifyUrlCallback& start_phishing_classification,
     68       const ShouldClassifyUrlCallback& start_malware_classification,
     69       WebContents* web_contents,
     70       ClientSideDetectionService* csd_service,
     71       SafeBrowsingDatabaseManager* database_manager,
     72       ClientSideDetectionHost* host)
     73       : params_(params),
     74         web_contents_(web_contents),
     75         csd_service_(csd_service),
     76         database_manager_(database_manager),
     77         host_(host),
     78         start_phishing_classification_cb_(start_phishing_classification),
     79         start_malware_classification_cb_(start_malware_classification) {
     80     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
     81     DCHECK(web_contents_);
     82     DCHECK(csd_service_);
     83     DCHECK(database_manager_.get());
     84     DCHECK(host_);
     85   }
     86 
     87   void Start() {
     88     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
     89 
     90     // We start by doing some simple checks that can run on the UI thread.
     91     UMA_HISTOGRAM_BOOLEAN("SBClientPhishing.ClassificationStart", 1);
     92     UMA_HISTOGRAM_BOOLEAN("SBClientMalware.ClassificationStart", 1);
     93 
     94     // Only classify [X]HTML documents.
     95     if (params_.contents_mime_type != "text/html" &&
     96         params_.contents_mime_type != "application/xhtml+xml") {
     97       VLOG(1) << "Skipping phishing classification for URL: " << params_.url
     98               << " because it has an unsupported MIME type: "
     99               << params_.contents_mime_type;
    100       DontClassifyForPhishing(NO_CLASSIFY_UNSUPPORTED_MIME_TYPE);
    101     }
    102 
    103     if (csd_service_->IsPrivateIPAddress(params_.socket_address.host())) {
    104       VLOG(1) << "Skipping phishing classification for URL: " << params_.url
    105               << " because of hosting on private IP: "
    106               << params_.socket_address.host();
    107       DontClassifyForPhishing(NO_CLASSIFY_PRIVATE_IP);
    108       DontClassifyForMalware(NO_CLASSIFY_PRIVATE_IP);
    109     }
    110 
    111     // For phishing we only classify HTTP pages.
    112     if (!params_.url.SchemeIs(url::kHttpScheme)) {
    113       VLOG(1) << "Skipping phishing classification for URL: " << params_.url
    114               << " because it is not HTTP: "
    115               << params_.socket_address.host();
    116       DontClassifyForPhishing(NO_CLASSIFY_NOT_HTTP_URL);
    117     }
    118 
    119     // Don't run any classifier if the tab is incognito.
    120     if (web_contents_->GetBrowserContext()->IsOffTheRecord()) {
    121       VLOG(1) << "Skipping phishing and malware classification for URL: "
    122               << params_.url << " because we're browsing incognito.";
    123       DontClassifyForPhishing(NO_CLASSIFY_OFF_THE_RECORD);
    124       DontClassifyForMalware(NO_CLASSIFY_OFF_THE_RECORD);
    125     }
    126 
    127     // We lookup the csd-whitelist before we lookup the cache because
    128     // a URL may have recently been whitelisted.  If the URL matches
    129     // the csd-whitelist we won't start phishing classification.  The
    130     // csd-whitelist check has to be done on the IO thread because it
    131     // uses the SafeBrowsing service class.
    132     if (ShouldClassifyForPhishing() || ShouldClassifyForMalware()) {
    133       BrowserThread::PostTask(
    134           BrowserThread::IO,
    135           FROM_HERE,
    136           base::Bind(&ShouldClassifyUrlRequest::CheckSafeBrowsingDatabase,
    137                      this, params_.url));
    138     }
    139   }
    140 
    141   void Cancel() {
    142     DontClassifyForPhishing(NO_CLASSIFY_CANCEL);
    143     DontClassifyForMalware(NO_CLASSIFY_CANCEL);
    144     // Just to make sure we don't do anything stupid we reset all these
    145     // pointers except for the safebrowsing service class which may be
    146     // accessed by CheckSafeBrowsingDatabase().
    147     web_contents_ = NULL;
    148     csd_service_ = NULL;
    149     host_ = NULL;
    150   }
    151 
    152  private:
    153   friend class base::RefCountedThreadSafe<
    154       ClientSideDetectionHost::ShouldClassifyUrlRequest>;
    155 
    156   // Enum used to keep stats about why the pre-classification check failed.
    157   enum PreClassificationCheckFailures {
    158     OBSOLETE_NO_CLASSIFY_PROXY_FETCH,
    159     NO_CLASSIFY_PRIVATE_IP,
    160     NO_CLASSIFY_OFF_THE_RECORD,
    161     NO_CLASSIFY_MATCH_CSD_WHITELIST,
    162     NO_CLASSIFY_TOO_MANY_REPORTS,
    163     NO_CLASSIFY_UNSUPPORTED_MIME_TYPE,
    164     NO_CLASSIFY_NO_DATABASE_MANAGER,
    165     NO_CLASSIFY_KILLSWITCH,
    166     NO_CLASSIFY_CANCEL,
    167     NO_CLASSIFY_RESULT_FROM_CACHE,
    168     NO_CLASSIFY_NOT_HTTP_URL,
    169 
    170     NO_CLASSIFY_MAX  // Always add new values before this one.
    171   };
    172 
    173   // The destructor can be called either from the UI or the IO thread.
    174   virtual ~ShouldClassifyUrlRequest() { }
    175 
    176   bool ShouldClassifyForPhishing() const {
    177     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    178     return !start_phishing_classification_cb_.is_null();
    179   }
    180 
    181   bool ShouldClassifyForMalware() const {
    182     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    183     return !start_malware_classification_cb_.is_null();
    184   }
    185 
    186   void DontClassifyForPhishing(PreClassificationCheckFailures reason) {
    187     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    188     if (ShouldClassifyForPhishing()) {
    189       // Track the first reason why we stopped classifying for phishing.
    190       UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",
    191                                 reason, NO_CLASSIFY_MAX);
    192       DVLOG(2) << "Failed phishing pre-classification checks.  Reason: "
    193                << reason;
    194       start_phishing_classification_cb_.Run(false);
    195     }
    196     start_phishing_classification_cb_.Reset();
    197   }
    198 
    199   void DontClassifyForMalware(PreClassificationCheckFailures reason) {
    200     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    201     if (ShouldClassifyForMalware()) {
    202       // Track the first reason why we stopped classifying for malware.
    203       UMA_HISTOGRAM_ENUMERATION("SBClientMalware.PreClassificationCheckFail",
    204                                 reason, NO_CLASSIFY_MAX);
    205       DVLOG(2) << "Failed malware pre-classification checks.  Reason: "
    206                << reason;
    207       start_malware_classification_cb_.Run(false);
    208     }
    209     start_malware_classification_cb_.Reset();
    210   }
    211 
    212   void CheckSafeBrowsingDatabase(const GURL& url) {
    213     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
    214     // We don't want to call the classification callbacks from the IO
    215     // thread so we simply pass the results of this method to CheckCache()
    216     // which is called on the UI thread;
    217     PreClassificationCheckFailures phishing_reason = NO_CLASSIFY_MAX;
    218     PreClassificationCheckFailures malware_reason = NO_CLASSIFY_MAX;
    219     if (!database_manager_.get()) {
    220       // We cannot check the Safe Browsing whitelists so we stop here
    221       // for safety.
    222       malware_reason = phishing_reason = NO_CLASSIFY_NO_DATABASE_MANAGER;
    223     } else {
    224       if (database_manager_->MatchCsdWhitelistUrl(url)) {
    225         VLOG(1) << "Skipping phishing classification for URL: " << url
    226                 << " because it matches the csd whitelist";
    227         phishing_reason = NO_CLASSIFY_MATCH_CSD_WHITELIST;
    228       }
    229       if (database_manager_->IsMalwareKillSwitchOn()) {
    230         malware_reason = NO_CLASSIFY_KILLSWITCH;
    231       }
    232     }
    233     BrowserThread::PostTask(
    234         BrowserThread::UI,
    235         FROM_HERE,
    236         base::Bind(&ShouldClassifyUrlRequest::CheckCache,
    237                    this,
    238                    phishing_reason,
    239                    malware_reason));
    240   }
    241 
    242   void CheckCache(PreClassificationCheckFailures phishing_reason,
    243                   PreClassificationCheckFailures malware_reason) {
    244     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    245     if (phishing_reason != NO_CLASSIFY_MAX)
    246       DontClassifyForPhishing(phishing_reason);
    247     if (malware_reason != NO_CLASSIFY_MAX)
    248       DontClassifyForMalware(malware_reason);
    249     if (!ShouldClassifyForMalware() && !ShouldClassifyForPhishing()) {
    250       return;  // No point in doing anything else.
    251     }
    252     // If result is cached, we don't want to run classification again.
    253     // In that case we're just trying to show the warning.
    254     bool is_phishing;
    255     if (csd_service_->GetValidCachedResult(params_.url, &is_phishing)) {
    256       VLOG(1) << "Satisfying request for " << params_.url << " from cache";
    257       UMA_HISTOGRAM_BOOLEAN("SBClientPhishing.RequestSatisfiedFromCache", 1);
    258       // Since we are already on the UI thread, this is safe.
    259       host_->MaybeShowPhishingWarning(params_.url, is_phishing);
    260       DontClassifyForPhishing(NO_CLASSIFY_RESULT_FROM_CACHE);
    261     }
    262 
    263     // We want to limit the number of requests, though we will ignore the
    264     // limit for urls in the cache.  We don't want to start classifying
    265     // too many pages as phishing, but for those that we already think are
    266     // phishing we want to send a request to the server to give ourselves
    267     // a chance to fix misclassifications.
    268     if (csd_service_->IsInCache(params_.url)) {
    269       VLOG(1) << "Reporting limit skipped for " << params_.url
    270               << " as it was in the cache.";
    271       UMA_HISTOGRAM_BOOLEAN("SBClientPhishing.ReportLimitSkipped", 1);
    272     } else if (csd_service_->OverPhishingReportLimit()) {
    273       VLOG(1) << "Too many report phishing requests sent recently, "
    274               << "not running classification for " << params_.url;
    275       DontClassifyForPhishing(NO_CLASSIFY_TOO_MANY_REPORTS);
    276     }
    277     if (csd_service_->OverMalwareReportLimit()) {
    278       DontClassifyForMalware(NO_CLASSIFY_TOO_MANY_REPORTS);
    279     }
    280 
    281     // Everything checks out, so start classification.
    282     // |web_contents_| is safe to call as we will be destructed
    283     // before it is.
    284     if (ShouldClassifyForPhishing()) {
    285       start_phishing_classification_cb_.Run(true);
    286       // Reset the callback to make sure ShouldClassifyForPhishing()
    287       // returns false.
    288       start_phishing_classification_cb_.Reset();
    289     }
    290     if (ShouldClassifyForMalware()) {
    291       start_malware_classification_cb_.Run(true);
    292       // Reset the callback to make sure ShouldClassifyForMalware()
    293       // returns false.
    294       start_malware_classification_cb_.Reset();
    295     }
    296   }
    297 
    298   content::FrameNavigateParams params_;
    299   WebContents* web_contents_;
    300   ClientSideDetectionService* csd_service_;
    301   // We keep a ref pointer here just to make sure the safe browsing
    302   // database manager stays alive long enough.
    303   scoped_refptr<SafeBrowsingDatabaseManager> database_manager_;
    304   ClientSideDetectionHost* host_;
    305 
    306   ShouldClassifyUrlCallback start_phishing_classification_cb_;
    307   ShouldClassifyUrlCallback start_malware_classification_cb_;
    308 
    309   DISALLOW_COPY_AND_ASSIGN(ShouldClassifyUrlRequest);
    310 };
    311 
    312 // static
    313 ClientSideDetectionHost* ClientSideDetectionHost::Create(
    314     WebContents* tab) {
    315   return new ClientSideDetectionHost(tab);
    316 }
    317 
    318 ClientSideDetectionHost::ClientSideDetectionHost(WebContents* tab)
    319     : content::WebContentsObserver(tab),
    320       csd_service_(NULL),
    321       classification_request_(NULL),
    322       should_extract_malware_features_(true),
    323       should_classify_for_malware_(false),
    324       pageload_complete_(false),
    325       weak_factory_(this),
    326       unsafe_unique_page_id_(-1) {
    327   DCHECK(tab);
    328   // Note: csd_service_ and sb_service will be NULL here in testing.
    329   csd_service_ = g_browser_process->safe_browsing_detection_service();
    330   feature_extractor_.reset(new BrowserFeatureExtractor(tab, this));
    331   registrar_.Add(this, content::NOTIFICATION_RESOURCE_RESPONSE_STARTED,
    332                  content::Source<WebContents>(tab));
    333 
    334   scoped_refptr<SafeBrowsingService> sb_service =
    335       g_browser_process->safe_browsing_service();
    336   if (sb_service.get()) {
    337     ui_manager_ = sb_service->ui_manager();
    338     database_manager_ = sb_service->database_manager();
    339     ui_manager_->AddObserver(this);
    340   }
    341 }
    342 
    343 ClientSideDetectionHost::~ClientSideDetectionHost() {
    344   if (ui_manager_.get())
    345     ui_manager_->RemoveObserver(this);
    346 }
    347 
    348 bool ClientSideDetectionHost::OnMessageReceived(const IPC::Message& message) {
    349   bool handled = true;
    350   IPC_BEGIN_MESSAGE_MAP(ClientSideDetectionHost, message)
    351     IPC_MESSAGE_HANDLER(SafeBrowsingHostMsg_PhishingDetectionDone,
    352                         OnPhishingDetectionDone)
    353     IPC_MESSAGE_UNHANDLED(handled = false)
    354   IPC_END_MESSAGE_MAP()
    355   return handled;
    356 }
    357 
    358 void ClientSideDetectionHost::DidNavigateMainFrame(
    359     const content::LoadCommittedDetails& details,
    360     const content::FrameNavigateParams& params) {
    361   // TODO(noelutz): move this DCHECK to WebContents and fix all the unit tests
    362   // that don't call this method on the UI thread.
    363   // DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    364   if (details.is_in_page) {
    365     // If the navigation is within the same page, the user isn't really
    366     // navigating away.  We don't need to cancel a pending callback or
    367     // begin a new classification.
    368     return;
    369   }
    370   // Cancel any pending classification request.
    371   if (classification_request_.get()) {
    372     classification_request_->Cancel();
    373   }
    374   // If we navigate away and there currently is a pending phishing
    375   // report request we have to cancel it to make sure we don't display
    376   // an interstitial for the wrong page.  Note that this won't cancel
    377   // the server ping back but only cancel the showing of the
    378   // interstial.
    379   weak_factory_.InvalidateWeakPtrs();
    380 
    381   if (!csd_service_) {
    382     return;
    383   }
    384   browse_info_.reset(new BrowseInfo);
    385 
    386   // Store redirect chain information.
    387   if (params.url.host() != cur_host_) {
    388     cur_host_ = params.url.host();
    389     cur_host_redirects_ = params.redirects;
    390   }
    391   browse_info_->url = params.url;
    392   browse_info_->host_redirects = cur_host_redirects_;
    393   browse_info_->url_redirects = params.redirects;
    394   browse_info_->referrer = params.referrer.url;
    395   browse_info_->http_status_code = details.http_status_code;
    396   browse_info_->page_id = params.page_id;
    397 
    398   should_extract_malware_features_ = true;
    399   should_classify_for_malware_ = false;
    400   pageload_complete_ = false;
    401 
    402   // Check whether we can cassify the current URL for phishing or malware.
    403   classification_request_ = new ShouldClassifyUrlRequest(
    404       params,
    405       base::Bind(&ClientSideDetectionHost::OnPhishingPreClassificationDone,
    406                  weak_factory_.GetWeakPtr()),
    407       base::Bind(&ClientSideDetectionHost::OnMalwarePreClassificationDone,
    408                  weak_factory_.GetWeakPtr()),
    409       web_contents(), csd_service_, database_manager_.get(), this);
    410   classification_request_->Start();
    411 }
    412 
    413 void ClientSideDetectionHost::OnSafeBrowsingHit(
    414     const SafeBrowsingUIManager::UnsafeResource& resource) {
    415   if (!web_contents() || !web_contents()->GetController().GetActiveEntry())
    416     return;
    417 
    418   // Check that the hit is either malware or phishing.
    419   if (resource.threat_type != SB_THREAT_TYPE_URL_PHISHING &&
    420       resource.threat_type != SB_THREAT_TYPE_URL_MALWARE)
    421     return;
    422 
    423   // Check that this notification is really for us.
    424   content::RenderViewHost* hit_rvh = content::RenderViewHost::FromID(
    425       resource.render_process_host_id, resource.render_view_id);
    426   if (!hit_rvh ||
    427       web_contents() != content::WebContents::FromRenderViewHost(hit_rvh))
    428     return;
    429 
    430   // Store the unique page ID for later.
    431   unsafe_unique_page_id_ =
    432       web_contents()->GetController().GetActiveEntry()->GetUniqueID();
    433 
    434   // We also keep the resource around in order to be able to send the
    435   // malicious URL to the server.
    436   unsafe_resource_.reset(new SafeBrowsingUIManager::UnsafeResource(resource));
    437   unsafe_resource_->callback.Reset();  // Don't do anything stupid.
    438 }
    439 
    440 void ClientSideDetectionHost::OnSafeBrowsingMatch(
    441     const SafeBrowsingUIManager::UnsafeResource& resource) {
    442   if (!web_contents() || !web_contents()->GetController().GetActiveEntry())
    443     return;
    444 
    445   // Check that this notification is really for us.
    446   content::RenderViewHost* hit_rvh = content::RenderViewHost::FromID(
    447       resource.render_process_host_id, resource.render_view_id);
    448   if (!hit_rvh ||
    449       web_contents() != content::WebContents::FromRenderViewHost(hit_rvh))
    450     return;
    451 
    452   web_contents()->GetController().GetActiveEntry()->SetExtraData(
    453       kSafeBrowsingMatchKey, base::ASCIIToUTF16("1"));
    454 }
    455 
    456 scoped_refptr<SafeBrowsingDatabaseManager>
    457 ClientSideDetectionHost::database_manager() {
    458   return database_manager_;
    459 }
    460 
    461 bool ClientSideDetectionHost::DidPageReceiveSafeBrowsingMatch() const {
    462   if (!web_contents() || !web_contents()->GetController().GetVisibleEntry())
    463     return false;
    464 
    465   // If an interstitial page is showing, GetVisibleEntry will return the
    466   // transient NavigationEntry for the interstitial. The transient entry
    467   // will not have the flag set, so use the pending entry instead if there
    468   // is one.
    469   NavigationEntry* entry = web_contents()->GetController().GetPendingEntry();
    470   if (!entry) {
    471     entry = web_contents()->GetController().GetVisibleEntry();
    472     if (entry->GetPageType() == content::PAGE_TYPE_INTERSTITIAL)
    473       entry = web_contents()->GetController().GetLastCommittedEntry();
    474     if (!entry)
    475       return false;
    476   }
    477 
    478   base::string16 value;
    479   return entry->GetExtraData(kSafeBrowsingMatchKey, &value);
    480 }
    481 
    482 void ClientSideDetectionHost::WebContentsDestroyed() {
    483   // Tell any pending classification request that it is being canceled.
    484   if (classification_request_.get()) {
    485     classification_request_->Cancel();
    486   }
    487   // Cancel all pending feature extractions.
    488   feature_extractor_.reset();
    489 }
    490 
    491 void ClientSideDetectionHost::OnPhishingPreClassificationDone(
    492     bool should_classify) {
    493   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    494   if (browse_info_.get() && should_classify) {
    495     VLOG(1) << "Instruct renderer to start phishing detection for URL: "
    496             << browse_info_->url;
    497     content::RenderViewHost* rvh = web_contents()->GetRenderViewHost();
    498     rvh->Send(new SafeBrowsingMsg_StartPhishingDetection(
    499         rvh->GetRoutingID(), browse_info_->url));
    500   }
    501 }
    502 
    503 void ClientSideDetectionHost::OnMalwarePreClassificationDone(
    504     bool should_classify) {
    505   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    506   // If classification checks failed we should stop extracting malware features.
    507   DVLOG(2) << "Malware pre-classification checks done. Should classify: "
    508            << should_classify;
    509   should_extract_malware_features_ = should_classify;
    510   should_classify_for_malware_ = should_classify;
    511   MaybeStartMalwareFeatureExtraction();
    512 }
    513 
    514 void ClientSideDetectionHost::DidStopLoading(content::RenderViewHost* rvh) {
    515   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    516   if (!csd_service_ || !browse_info_.get())
    517     return;
    518   DVLOG(2) << "Page finished loading.";
    519   pageload_complete_ = true;
    520   MaybeStartMalwareFeatureExtraction();
    521 }
    522 
    523 void ClientSideDetectionHost::MaybeStartMalwareFeatureExtraction() {
    524   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    525   if (csd_service_ && browse_info_.get() &&
    526       should_classify_for_malware_ &&
    527       pageload_complete_) {
    528     scoped_ptr<ClientMalwareRequest> malware_request(
    529         new ClientMalwareRequest);
    530     // Start browser-side malware feature extraction.  Once we're done it will
    531     // send the malware client verdict request.
    532     malware_request->set_url(browse_info_->url.spec());
    533     const GURL& referrer = browse_info_->referrer;
    534     if (referrer.SchemeIs("http")) {  // Only send http urls.
    535       malware_request->set_referrer_url(referrer.spec());
    536     }
    537     // This function doesn't expect browse_info_ to stay around after this
    538     // function returns.
    539     feature_extractor_->ExtractMalwareFeatures(
    540         browse_info_.get(),
    541         malware_request.release(),
    542         base::Bind(&ClientSideDetectionHost::MalwareFeatureExtractionDone,
    543                    weak_factory_.GetWeakPtr()));
    544     should_classify_for_malware_ = false;
    545   }
    546 }
    547 
    548 void ClientSideDetectionHost::OnPhishingDetectionDone(
    549     const std::string& verdict_str) {
    550   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    551   // There is something seriously wrong if there is no service class but
    552   // this method is called.  The renderer should not start phishing detection
    553   // if there isn't any service class in the browser.
    554   DCHECK(csd_service_);
    555   DCHECK(browse_info_.get());
    556 
    557   // We parse the protocol buffer here.  If we're unable to parse it we won't
    558   // send the verdict further.
    559   scoped_ptr<ClientPhishingRequest> verdict(new ClientPhishingRequest);
    560   if (csd_service_ &&
    561       browse_info_.get() &&
    562       verdict->ParseFromString(verdict_str) &&
    563       verdict->IsInitialized()) {
    564     // We only send phishing verdict to the server if the verdict is phishing or
    565     // if a SafeBrowsing interstitial was already shown for this site.  E.g., a
    566     // malware or phishing interstitial was shown but the user clicked
    567     // through.
    568     if (verdict->is_phishing() || DidShowSBInterstitial()) {
    569       if (DidShowSBInterstitial()) {
    570         browse_info_->unsafe_resource.reset(unsafe_resource_.release());
    571       }
    572       // Start browser-side feature extraction.  Once we're done it will send
    573       // the client verdict request.
    574       feature_extractor_->ExtractFeatures(
    575           browse_info_.get(),
    576           verdict.release(),
    577           base::Bind(&ClientSideDetectionHost::FeatureExtractionDone,
    578                      weak_factory_.GetWeakPtr()));
    579     }
    580   }
    581 }
    582 
    583 void ClientSideDetectionHost::MaybeShowPhishingWarning(GURL phishing_url,
    584                                                        bool is_phishing) {
    585   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    586   DVLOG(2) << "Received server phishing verdict for URL:" << phishing_url
    587            << " is_phishing:" << is_phishing;
    588   if (is_phishing) {
    589     DCHECK(web_contents());
    590     if (ui_manager_.get()) {
    591       SafeBrowsingUIManager::UnsafeResource resource;
    592       resource.url = phishing_url;
    593       resource.original_url = phishing_url;
    594       resource.is_subresource = false;
    595       resource.threat_type = SB_THREAT_TYPE_CLIENT_SIDE_PHISHING_URL;
    596       resource.render_process_host_id =
    597           web_contents()->GetRenderProcessHost()->GetID();
    598       resource.render_view_id =
    599           web_contents()->GetRenderViewHost()->GetRoutingID();
    600       if (!ui_manager_->IsWhitelisted(resource)) {
    601         // We need to stop any pending navigations, otherwise the interstital
    602         // might not get created properly.
    603         web_contents()->GetController().DiscardNonCommittedEntries();
    604       }
    605       ui_manager_->DisplayBlockingPage(resource);
    606     }
    607     // If there is true phishing verdict, invalidate weakptr so that no longer
    608     // consider the malware vedict.
    609     weak_factory_.InvalidateWeakPtrs();
    610   }
    611 }
    612 
    613 void ClientSideDetectionHost::MaybeShowMalwareWarning(GURL original_url,
    614                                                       GURL malware_url,
    615                                                       bool is_malware) {
    616   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    617   DVLOG(2) << "Received server malawre IP verdict for URL:" << malware_url
    618            << " is_malware:" << is_malware;
    619   if (is_malware && malware_url.is_valid() && original_url.is_valid()) {
    620     DCHECK(web_contents());
    621     if (ui_manager_.get()) {
    622       SafeBrowsingUIManager::UnsafeResource resource;
    623       resource.url = malware_url;
    624       resource.original_url = original_url;
    625       resource.is_subresource = (malware_url.host() != original_url.host());
    626       resource.threat_type = SB_THREAT_TYPE_CLIENT_SIDE_MALWARE_URL;
    627       resource.render_process_host_id =
    628           web_contents()->GetRenderProcessHost()->GetID();
    629       resource.render_view_id =
    630           web_contents()->GetRenderViewHost()->GetRoutingID();
    631       if (!ui_manager_->IsWhitelisted(resource)) {
    632         // We need to stop any pending navigations, otherwise the interstital
    633         // might not get created properly.
    634         web_contents()->GetController().DiscardNonCommittedEntries();
    635       }
    636       ui_manager_->DisplayBlockingPage(resource);
    637     }
    638     // If there is true malware verdict, invalidate weakptr so that no longer
    639     // consider the phishing vedict.
    640     weak_factory_.InvalidateWeakPtrs();
    641   }
    642 }
    643 
    644 void ClientSideDetectionHost::FeatureExtractionDone(
    645     bool success,
    646     ClientPhishingRequest* request) {
    647   DCHECK(request);
    648   DVLOG(2) << "Feature extraction done (success:" << success << ") for URL: "
    649            << request->url() << ". Start sending client phishing request.";
    650   ClientSideDetectionService::ClientReportPhishingRequestCallback callback;
    651   // If the client-side verdict isn't phishing we don't care about the server
    652   // response because we aren't going to display a warning.
    653   if (request->is_phishing()) {
    654     callback = base::Bind(&ClientSideDetectionHost::MaybeShowPhishingWarning,
    655                           weak_factory_.GetWeakPtr());
    656   }
    657   // Send ping even if the browser feature extraction failed.
    658   csd_service_->SendClientReportPhishingRequest(
    659       request,  // The service takes ownership of the request object.
    660       callback);
    661 }
    662 
    663 void ClientSideDetectionHost::MalwareFeatureExtractionDone(
    664     bool feature_extraction_success,
    665     scoped_ptr<ClientMalwareRequest> request) {
    666   DCHECK(request.get());
    667   DVLOG(2) << "Malware Feature extraction done for URL: " << request->url()
    668            << ", with badip url count:" << request->bad_ip_url_info_size();
    669 
    670   // Send ping if there is matching features.
    671   if (feature_extraction_success && request->bad_ip_url_info_size() > 0) {
    672     VLOG(1) << "Start sending client malware request.";
    673     ClientSideDetectionService::ClientReportMalwareRequestCallback callback;
    674     callback = base::Bind(&ClientSideDetectionHost::MaybeShowMalwareWarning,
    675                           weak_factory_.GetWeakPtr());
    676     csd_service_->SendClientReportMalwareRequest(request.release(), callback);
    677   }
    678 }
    679 
    680 void ClientSideDetectionHost::UpdateIPUrlMap(
    681     const std::string& ip,
    682     const std::string& url,
    683     const std::string& method,
    684     const std::string& referrer,
    685     const ResourceType::Type resource_type) {
    686   if (ip.empty() || url.empty())
    687     return;
    688 
    689   IPUrlMap::iterator it = browse_info_->ips.find(ip);
    690   if (it == browse_info_->ips.end()) {
    691     if (browse_info_->ips.size() < kMaxIPsPerBrowse) {
    692       std::vector<IPUrlInfo> url_infos;
    693       url_infos.push_back(IPUrlInfo(url, method, referrer, resource_type));
    694       browse_info_->ips.insert(make_pair(ip, url_infos));
    695     }
    696   } else if (it->second.size() < kMaxUrlsPerIP) {
    697     it->second.push_back(IPUrlInfo(url, method, referrer, resource_type));
    698   }
    699 }
    700 
    701 void ClientSideDetectionHost::Observe(
    702     int type,
    703     const content::NotificationSource& source,
    704     const content::NotificationDetails& details) {
    705   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    706   DCHECK_EQ(type, content::NOTIFICATION_RESOURCE_RESPONSE_STARTED);
    707   const ResourceRequestDetails* req = content::Details<ResourceRequestDetails>(
    708       details).ptr();
    709   if (req && browse_info_.get() &&
    710       should_extract_malware_features_ && req->url.is_valid()) {
    711     UpdateIPUrlMap(req->socket_address.host() /* ip */,
    712                    req->url.spec()  /* url */,
    713                    req->method,
    714                    req->referrer,
    715                    req->resource_type);
    716   }
    717 }
    718 
    719 bool ClientSideDetectionHost::DidShowSBInterstitial() const {
    720   if (unsafe_unique_page_id_ <= 0 || !web_contents()) {
    721     return false;
    722   }
    723   const NavigationEntry* nav_entry =
    724       web_contents()->GetController().GetActiveEntry();
    725   return (nav_entry && nav_entry->GetUniqueID() == unsafe_unique_page_id_);
    726 }
    727 
    728 void ClientSideDetectionHost::set_client_side_detection_service(
    729     ClientSideDetectionService* service) {
    730   csd_service_ = service;
    731 }
    732 
    733 void ClientSideDetectionHost::set_safe_browsing_managers(
    734     SafeBrowsingUIManager* ui_manager,
    735     SafeBrowsingDatabaseManager* database_manager) {
    736   if (ui_manager_.get())
    737     ui_manager_->RemoveObserver(this);
    738 
    739   ui_manager_ = ui_manager;
    740   if (ui_manager)
    741     ui_manager_->AddObserver(this);
    742 
    743   database_manager_ = database_manager;
    744 }
    745 
    746 }  // namespace safe_browsing
    747