Home | History | Annotate | Download | only in safe_browsing
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "chrome/browser/safe_browsing/client_side_detection_host.h"
      6 
      7 #include <vector>
      8 
      9 #include "base/logging.h"
     10 #include "base/memory/ref_counted.h"
     11 #include "base/memory/scoped_ptr.h"
     12 #include "base/metrics/histogram.h"
     13 #include "base/prefs/pref_service.h"
     14 #include "base/sequenced_task_runner_helpers.h"
     15 #include "base/strings/utf_string_conversions.h"
     16 #include "chrome/browser/browser_process.h"
     17 #include "chrome/browser/profiles/profile.h"
     18 #include "chrome/browser/safe_browsing/browser_feature_extractor.h"
     19 #include "chrome/browser/safe_browsing/client_side_detection_service.h"
     20 #include "chrome/browser/safe_browsing/database_manager.h"
     21 #include "chrome/browser/safe_browsing/safe_browsing_service.h"
     22 #include "chrome/common/pref_names.h"
     23 #include "chrome/common/safe_browsing/csd.pb.h"
     24 #include "chrome/common/safe_browsing/safebrowsing_messages.h"
     25 #include "content/public/browser/browser_thread.h"
     26 #include "content/public/browser/navigation_controller.h"
     27 #include "content/public/browser/navigation_details.h"
     28 #include "content/public/browser/navigation_entry.h"
     29 #include "content/public/browser/notification_details.h"
     30 #include "content/public/browser/notification_source.h"
     31 #include "content/public/browser/notification_types.h"
     32 #include "content/public/browser/render_process_host.h"
     33 #include "content/public/browser/render_view_host.h"
     34 #include "content/public/browser/resource_request_details.h"
     35 #include "content/public/browser/web_contents.h"
     36 #include "content/public/common/frame_navigate_params.h"
     37 #include "content/public/common/url_constants.h"
     38 #include "url/gurl.h"
     39 
     40 using content::BrowserThread;
     41 using content::NavigationEntry;
     42 using content::ResourceRequestDetails;
     43 using content::ResourceType;
     44 using content::WebContents;
     45 
     46 namespace safe_browsing {
     47 
     48 const size_t ClientSideDetectionHost::kMaxUrlsPerIP = 20;
     49 const size_t ClientSideDetectionHost::kMaxIPsPerBrowse = 200;
     50 
     51 const char kSafeBrowsingMatchKey[] = "safe_browsing_match";
     52 
     53 typedef base::Callback<void(bool)> ShouldClassifyUrlCallback;
     54 
     55 // This class is instantiated each time a new toplevel URL loads, and
     56 // asynchronously checks whether the malware and phishing classifiers should run
     57 // for this URL.  If so, it notifies the host class by calling the provided
     58 // callback form the UI thread.  Objects of this class are ref-counted and will
     59 // be destroyed once nobody uses it anymore.  If |web_contents|, |csd_service|
     60 // or |host| go away you need to call Cancel().  We keep the |database_manager|
     61 // alive in a ref pointer for as long as it takes.
     62 class ClientSideDetectionHost::ShouldClassifyUrlRequest
     63     : public base::RefCountedThreadSafe<
     64           ClientSideDetectionHost::ShouldClassifyUrlRequest> {
     65  public:
     66   ShouldClassifyUrlRequest(
     67       const content::FrameNavigateParams& params,
     68       const ShouldClassifyUrlCallback& start_phishing_classification,
     69       const ShouldClassifyUrlCallback& start_malware_classification,
     70       WebContents* web_contents,
     71       ClientSideDetectionService* csd_service,
     72       SafeBrowsingDatabaseManager* database_manager,
     73       ClientSideDetectionHost* host)
     74       : params_(params),
     75         web_contents_(web_contents),
     76         csd_service_(csd_service),
     77         database_manager_(database_manager),
     78         host_(host),
     79         start_phishing_classification_cb_(start_phishing_classification),
     80         start_malware_classification_cb_(start_malware_classification) {
     81     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
     82     DCHECK(web_contents_);
     83     DCHECK(csd_service_);
     84     DCHECK(database_manager_.get());
     85     DCHECK(host_);
     86   }
     87 
     88   void Start() {
     89     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
     90 
     91     // We start by doing some simple checks that can run on the UI thread.
     92     UMA_HISTOGRAM_BOOLEAN("SBClientPhishing.ClassificationStart", 1);
     93     UMA_HISTOGRAM_BOOLEAN("SBClientMalware.ClassificationStart", 1);
     94 
     95     // Only classify [X]HTML documents.
     96     if (params_.contents_mime_type != "text/html" &&
     97         params_.contents_mime_type != "application/xhtml+xml") {
     98       VLOG(1) << "Skipping phishing classification for URL: " << params_.url
     99               << " because it has an unsupported MIME type: "
    100               << params_.contents_mime_type;
    101       DontClassifyForPhishing(NO_CLASSIFY_UNSUPPORTED_MIME_TYPE);
    102     }
    103 
    104     if (csd_service_->IsPrivateIPAddress(params_.socket_address.host())) {
    105       VLOG(1) << "Skipping phishing classification for URL: " << params_.url
    106               << " because of hosting on private IP: "
    107               << params_.socket_address.host();
    108       DontClassifyForPhishing(NO_CLASSIFY_PRIVATE_IP);
    109       DontClassifyForMalware(NO_CLASSIFY_PRIVATE_IP);
    110     }
    111 
    112     // For phishing we only classify HTTP pages.
    113     if (!params_.url.SchemeIs(url::kHttpScheme)) {
    114       VLOG(1) << "Skipping phishing classification for URL: " << params_.url
    115               << " because it is not HTTP: "
    116               << params_.socket_address.host();
    117       DontClassifyForPhishing(NO_CLASSIFY_NOT_HTTP_URL);
    118     }
    119 
    120     // Don't run any classifier if the tab is incognito.
    121     if (web_contents_->GetBrowserContext()->IsOffTheRecord()) {
    122       VLOG(1) << "Skipping phishing and malware classification for URL: "
    123               << params_.url << " because we're browsing incognito.";
    124       DontClassifyForPhishing(NO_CLASSIFY_OFF_THE_RECORD);
    125       DontClassifyForMalware(NO_CLASSIFY_OFF_THE_RECORD);
    126     }
    127 
    128     // We lookup the csd-whitelist before we lookup the cache because
    129     // a URL may have recently been whitelisted.  If the URL matches
    130     // the csd-whitelist we won't start phishing classification.  The
    131     // csd-whitelist check has to be done on the IO thread because it
    132     // uses the SafeBrowsing service class.
    133     if (ShouldClassifyForPhishing() || ShouldClassifyForMalware()) {
    134       BrowserThread::PostTask(
    135           BrowserThread::IO,
    136           FROM_HERE,
    137           base::Bind(&ShouldClassifyUrlRequest::CheckSafeBrowsingDatabase,
    138                      this, params_.url));
    139     }
    140   }
    141 
    142   void Cancel() {
    143     DontClassifyForPhishing(NO_CLASSIFY_CANCEL);
    144     DontClassifyForMalware(NO_CLASSIFY_CANCEL);
    145     // Just to make sure we don't do anything stupid we reset all these
    146     // pointers except for the safebrowsing service class which may be
    147     // accessed by CheckSafeBrowsingDatabase().
    148     web_contents_ = NULL;
    149     csd_service_ = NULL;
    150     host_ = NULL;
    151   }
    152 
    153  private:
    154   friend class base::RefCountedThreadSafe<
    155       ClientSideDetectionHost::ShouldClassifyUrlRequest>;
    156 
    157   // Enum used to keep stats about why the pre-classification check failed.
    158   enum PreClassificationCheckFailures {
    159     OBSOLETE_NO_CLASSIFY_PROXY_FETCH,
    160     NO_CLASSIFY_PRIVATE_IP,
    161     NO_CLASSIFY_OFF_THE_RECORD,
    162     NO_CLASSIFY_MATCH_CSD_WHITELIST,
    163     NO_CLASSIFY_TOO_MANY_REPORTS,
    164     NO_CLASSIFY_UNSUPPORTED_MIME_TYPE,
    165     NO_CLASSIFY_NO_DATABASE_MANAGER,
    166     NO_CLASSIFY_KILLSWITCH,
    167     NO_CLASSIFY_CANCEL,
    168     NO_CLASSIFY_RESULT_FROM_CACHE,
    169     NO_CLASSIFY_NOT_HTTP_URL,
    170 
    171     NO_CLASSIFY_MAX  // Always add new values before this one.
    172   };
    173 
    174   // The destructor can be called either from the UI or the IO thread.
    175   virtual ~ShouldClassifyUrlRequest() { }
    176 
    177   bool ShouldClassifyForPhishing() const {
    178     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    179     return !start_phishing_classification_cb_.is_null();
    180   }
    181 
    182   bool ShouldClassifyForMalware() const {
    183     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    184     return !start_malware_classification_cb_.is_null();
    185   }
    186 
    187   void DontClassifyForPhishing(PreClassificationCheckFailures reason) {
    188     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    189     if (ShouldClassifyForPhishing()) {
    190       // Track the first reason why we stopped classifying for phishing.
    191       UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",
    192                                 reason, NO_CLASSIFY_MAX);
    193       DVLOG(2) << "Failed phishing pre-classification checks.  Reason: "
    194                << reason;
    195       start_phishing_classification_cb_.Run(false);
    196     }
    197     start_phishing_classification_cb_.Reset();
    198   }
    199 
    200   void DontClassifyForMalware(PreClassificationCheckFailures reason) {
    201     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    202     if (ShouldClassifyForMalware()) {
    203       // Track the first reason why we stopped classifying for malware.
    204       UMA_HISTOGRAM_ENUMERATION("SBClientMalware.PreClassificationCheckFail",
    205                                 reason, NO_CLASSIFY_MAX);
    206       DVLOG(2) << "Failed malware pre-classification checks.  Reason: "
    207                << reason;
    208       start_malware_classification_cb_.Run(false);
    209     }
    210     start_malware_classification_cb_.Reset();
    211   }
    212 
    213   void CheckSafeBrowsingDatabase(const GURL& url) {
    214     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
    215     // We don't want to call the classification callbacks from the IO
    216     // thread so we simply pass the results of this method to CheckCache()
    217     // which is called on the UI thread;
    218     PreClassificationCheckFailures phishing_reason = NO_CLASSIFY_MAX;
    219     PreClassificationCheckFailures malware_reason = NO_CLASSIFY_MAX;
    220     if (!database_manager_.get()) {
    221       // We cannot check the Safe Browsing whitelists so we stop here
    222       // for safety.
    223       malware_reason = phishing_reason = NO_CLASSIFY_NO_DATABASE_MANAGER;
    224     } else {
    225       if (database_manager_->MatchCsdWhitelistUrl(url)) {
    226         VLOG(1) << "Skipping phishing classification for URL: " << url
    227                 << " because it matches the csd whitelist";
    228         phishing_reason = NO_CLASSIFY_MATCH_CSD_WHITELIST;
    229       }
    230       if (database_manager_->IsMalwareKillSwitchOn()) {
    231         malware_reason = NO_CLASSIFY_KILLSWITCH;
    232       }
    233     }
    234     BrowserThread::PostTask(
    235         BrowserThread::UI,
    236         FROM_HERE,
    237         base::Bind(&ShouldClassifyUrlRequest::CheckCache,
    238                    this,
    239                    phishing_reason,
    240                    malware_reason));
    241   }
    242 
    243   void CheckCache(PreClassificationCheckFailures phishing_reason,
    244                   PreClassificationCheckFailures malware_reason) {
    245     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    246     if (phishing_reason != NO_CLASSIFY_MAX)
    247       DontClassifyForPhishing(phishing_reason);
    248     if (malware_reason != NO_CLASSIFY_MAX)
    249       DontClassifyForMalware(malware_reason);
    250     if (!ShouldClassifyForMalware() && !ShouldClassifyForPhishing()) {
    251       return;  // No point in doing anything else.
    252     }
    253     // If result is cached, we don't want to run classification again.
    254     // In that case we're just trying to show the warning.
    255     bool is_phishing;
    256     if (csd_service_->GetValidCachedResult(params_.url, &is_phishing)) {
    257       VLOG(1) << "Satisfying request for " << params_.url << " from cache";
    258       UMA_HISTOGRAM_BOOLEAN("SBClientPhishing.RequestSatisfiedFromCache", 1);
    259       // Since we are already on the UI thread, this is safe.
    260       host_->MaybeShowPhishingWarning(params_.url, is_phishing);
    261       DontClassifyForPhishing(NO_CLASSIFY_RESULT_FROM_CACHE);
    262     }
    263 
    264     // We want to limit the number of requests, though we will ignore the
    265     // limit for urls in the cache.  We don't want to start classifying
    266     // too many pages as phishing, but for those that we already think are
    267     // phishing we want to send a request to the server to give ourselves
    268     // a chance to fix misclassifications.
    269     if (csd_service_->IsInCache(params_.url)) {
    270       VLOG(1) << "Reporting limit skipped for " << params_.url
    271               << " as it was in the cache.";
    272       UMA_HISTOGRAM_BOOLEAN("SBClientPhishing.ReportLimitSkipped", 1);
    273     } else if (csd_service_->OverPhishingReportLimit()) {
    274       VLOG(1) << "Too many report phishing requests sent recently, "
    275               << "not running classification for " << params_.url;
    276       DontClassifyForPhishing(NO_CLASSIFY_TOO_MANY_REPORTS);
    277     }
    278     if (csd_service_->OverMalwareReportLimit()) {
    279       DontClassifyForMalware(NO_CLASSIFY_TOO_MANY_REPORTS);
    280     }
    281 
    282     // Everything checks out, so start classification.
    283     // |web_contents_| is safe to call as we will be destructed
    284     // before it is.
    285     if (ShouldClassifyForPhishing()) {
    286       start_phishing_classification_cb_.Run(true);
    287       // Reset the callback to make sure ShouldClassifyForPhishing()
    288       // returns false.
    289       start_phishing_classification_cb_.Reset();
    290     }
    291     if (ShouldClassifyForMalware()) {
    292       start_malware_classification_cb_.Run(true);
    293       // Reset the callback to make sure ShouldClassifyForMalware()
    294       // returns false.
    295       start_malware_classification_cb_.Reset();
    296     }
    297   }
    298 
    299   content::FrameNavigateParams params_;
    300   WebContents* web_contents_;
    301   ClientSideDetectionService* csd_service_;
    302   // We keep a ref pointer here just to make sure the safe browsing
    303   // database manager stays alive long enough.
    304   scoped_refptr<SafeBrowsingDatabaseManager> database_manager_;
    305   ClientSideDetectionHost* host_;
    306 
    307   ShouldClassifyUrlCallback start_phishing_classification_cb_;
    308   ShouldClassifyUrlCallback start_malware_classification_cb_;
    309 
    310   DISALLOW_COPY_AND_ASSIGN(ShouldClassifyUrlRequest);
    311 };
    312 
    313 // static
    314 ClientSideDetectionHost* ClientSideDetectionHost::Create(
    315     WebContents* tab) {
    316   return new ClientSideDetectionHost(tab);
    317 }
    318 
    319 ClientSideDetectionHost::ClientSideDetectionHost(WebContents* tab)
    320     : content::WebContentsObserver(tab),
    321       csd_service_(NULL),
    322       classification_request_(NULL),
    323       should_extract_malware_features_(true),
    324       should_classify_for_malware_(false),
    325       pageload_complete_(false),
    326       unsafe_unique_page_id_(-1),
    327       weak_factory_(this) {
    328   DCHECK(tab);
    329   // Note: csd_service_ and sb_service will be NULL here in testing.
    330   csd_service_ = g_browser_process->safe_browsing_detection_service();
    331   feature_extractor_.reset(new BrowserFeatureExtractor(tab, this));
    332   registrar_.Add(this, content::NOTIFICATION_RESOURCE_RESPONSE_STARTED,
    333                  content::Source<WebContents>(tab));
    334 
    335   scoped_refptr<SafeBrowsingService> sb_service =
    336       g_browser_process->safe_browsing_service();
    337   if (sb_service.get()) {
    338     ui_manager_ = sb_service->ui_manager();
    339     database_manager_ = sb_service->database_manager();
    340     ui_manager_->AddObserver(this);
    341   }
    342 }
    343 
    344 ClientSideDetectionHost::~ClientSideDetectionHost() {
    345   if (ui_manager_.get())
    346     ui_manager_->RemoveObserver(this);
    347 }
    348 
    349 bool ClientSideDetectionHost::OnMessageReceived(const IPC::Message& message) {
    350   bool handled = true;
    351   IPC_BEGIN_MESSAGE_MAP(ClientSideDetectionHost, message)
    352     IPC_MESSAGE_HANDLER(SafeBrowsingHostMsg_PhishingDetectionDone,
    353                         OnPhishingDetectionDone)
    354     IPC_MESSAGE_UNHANDLED(handled = false)
    355   IPC_END_MESSAGE_MAP()
    356   return handled;
    357 }
    358 
    359 void ClientSideDetectionHost::DidNavigateMainFrame(
    360     const content::LoadCommittedDetails& details,
    361     const content::FrameNavigateParams& params) {
    362   // TODO(noelutz): move this DCHECK to WebContents and fix all the unit tests
    363   // that don't call this method on the UI thread.
    364   // DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    365   if (details.is_in_page) {
    366     // If the navigation is within the same page, the user isn't really
    367     // navigating away.  We don't need to cancel a pending callback or
    368     // begin a new classification.
    369     return;
    370   }
    371   // Cancel any pending classification request.
    372   if (classification_request_.get()) {
    373     classification_request_->Cancel();
    374   }
    375   // If we navigate away and there currently is a pending phishing
    376   // report request we have to cancel it to make sure we don't display
    377   // an interstitial for the wrong page.  Note that this won't cancel
    378   // the server ping back but only cancel the showing of the
    379   // interstial.
    380   weak_factory_.InvalidateWeakPtrs();
    381 
    382   if (!csd_service_) {
    383     return;
    384   }
    385   browse_info_.reset(new BrowseInfo);
    386 
    387   // Store redirect chain information.
    388   if (params.url.host() != cur_host_) {
    389     cur_host_ = params.url.host();
    390     cur_host_redirects_ = params.redirects;
    391   }
    392   browse_info_->url = params.url;
    393   browse_info_->host_redirects = cur_host_redirects_;
    394   browse_info_->url_redirects = params.redirects;
    395   browse_info_->referrer = params.referrer.url;
    396   browse_info_->http_status_code = details.http_status_code;
    397   browse_info_->page_id = params.page_id;
    398 
    399   should_extract_malware_features_ = true;
    400   should_classify_for_malware_ = false;
    401   pageload_complete_ = false;
    402 
    403   // Check whether we can cassify the current URL for phishing or malware.
    404   classification_request_ = new ShouldClassifyUrlRequest(
    405       params,
    406       base::Bind(&ClientSideDetectionHost::OnPhishingPreClassificationDone,
    407                  weak_factory_.GetWeakPtr()),
    408       base::Bind(&ClientSideDetectionHost::OnMalwarePreClassificationDone,
    409                  weak_factory_.GetWeakPtr()),
    410       web_contents(), csd_service_, database_manager_.get(), this);
    411   classification_request_->Start();
    412 }
    413 
    414 void ClientSideDetectionHost::OnSafeBrowsingHit(
    415     const SafeBrowsingUIManager::UnsafeResource& resource) {
    416   if (!web_contents() || !web_contents()->GetController().GetActiveEntry())
    417     return;
    418 
    419   // Check that the hit is either malware or phishing.
    420   if (resource.threat_type != SB_THREAT_TYPE_URL_PHISHING &&
    421       resource.threat_type != SB_THREAT_TYPE_URL_MALWARE)
    422     return;
    423 
    424   // Check that this notification is really for us.
    425   content::RenderViewHost* hit_rvh = content::RenderViewHost::FromID(
    426       resource.render_process_host_id, resource.render_view_id);
    427   if (!hit_rvh ||
    428       web_contents() != content::WebContents::FromRenderViewHost(hit_rvh))
    429     return;
    430 
    431   // Store the unique page ID for later.
    432   unsafe_unique_page_id_ =
    433       web_contents()->GetController().GetActiveEntry()->GetUniqueID();
    434 
    435   // We also keep the resource around in order to be able to send the
    436   // malicious URL to the server.
    437   unsafe_resource_.reset(new SafeBrowsingUIManager::UnsafeResource(resource));
    438   unsafe_resource_->callback.Reset();  // Don't do anything stupid.
    439 }
    440 
    441 void ClientSideDetectionHost::OnSafeBrowsingMatch(
    442     const SafeBrowsingUIManager::UnsafeResource& resource) {
    443   if (!web_contents() || !web_contents()->GetController().GetActiveEntry())
    444     return;
    445 
    446   // Check that this notification is really for us.
    447   content::RenderViewHost* hit_rvh = content::RenderViewHost::FromID(
    448       resource.render_process_host_id, resource.render_view_id);
    449   if (!hit_rvh ||
    450       web_contents() != content::WebContents::FromRenderViewHost(hit_rvh))
    451     return;
    452 
    453   web_contents()->GetController().GetActiveEntry()->SetExtraData(
    454       kSafeBrowsingMatchKey, base::ASCIIToUTF16("1"));
    455 }
    456 
    457 scoped_refptr<SafeBrowsingDatabaseManager>
    458 ClientSideDetectionHost::database_manager() {
    459   return database_manager_;
    460 }
    461 
    462 bool ClientSideDetectionHost::DidPageReceiveSafeBrowsingMatch() const {
    463   if (!web_contents() || !web_contents()->GetController().GetVisibleEntry())
    464     return false;
    465 
    466   // If an interstitial page is showing, GetVisibleEntry will return the
    467   // transient NavigationEntry for the interstitial. The transient entry
    468   // will not have the flag set, so use the pending entry instead if there
    469   // is one.
    470   NavigationEntry* entry = web_contents()->GetController().GetPendingEntry();
    471   if (!entry) {
    472     entry = web_contents()->GetController().GetVisibleEntry();
    473     if (entry->GetPageType() == content::PAGE_TYPE_INTERSTITIAL)
    474       entry = web_contents()->GetController().GetLastCommittedEntry();
    475     if (!entry)
    476       return false;
    477   }
    478 
    479   base::string16 value;
    480   return entry->GetExtraData(kSafeBrowsingMatchKey, &value);
    481 }
    482 
    483 void ClientSideDetectionHost::WebContentsDestroyed() {
    484   // Tell any pending classification request that it is being canceled.
    485   if (classification_request_.get()) {
    486     classification_request_->Cancel();
    487   }
    488   // Cancel all pending feature extractions.
    489   feature_extractor_.reset();
    490 }
    491 
    492 void ClientSideDetectionHost::OnPhishingPreClassificationDone(
    493     bool should_classify) {
    494   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    495   if (browse_info_.get() && should_classify) {
    496     VLOG(1) << "Instruct renderer to start phishing detection for URL: "
    497             << browse_info_->url;
    498     content::RenderViewHost* rvh = web_contents()->GetRenderViewHost();
    499     rvh->Send(new SafeBrowsingMsg_StartPhishingDetection(
    500         rvh->GetRoutingID(), browse_info_->url));
    501   }
    502 }
    503 
    504 void ClientSideDetectionHost::OnMalwarePreClassificationDone(
    505     bool should_classify) {
    506   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    507   // If classification checks failed we should stop extracting malware features.
    508   DVLOG(2) << "Malware pre-classification checks done. Should classify: "
    509            << should_classify;
    510   should_extract_malware_features_ = should_classify;
    511   should_classify_for_malware_ = should_classify;
    512   MaybeStartMalwareFeatureExtraction();
    513 }
    514 
    515 void ClientSideDetectionHost::DidStopLoading(content::RenderViewHost* rvh) {
    516   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    517   if (!csd_service_ || !browse_info_.get())
    518     return;
    519   DVLOG(2) << "Page finished loading.";
    520   pageload_complete_ = true;
    521   MaybeStartMalwareFeatureExtraction();
    522 }
    523 
    524 void ClientSideDetectionHost::MaybeStartMalwareFeatureExtraction() {
    525   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    526   if (csd_service_ && browse_info_.get() &&
    527       should_classify_for_malware_ &&
    528       pageload_complete_) {
    529     scoped_ptr<ClientMalwareRequest> malware_request(
    530         new ClientMalwareRequest);
    531     // Start browser-side malware feature extraction.  Once we're done it will
    532     // send the malware client verdict request.
    533     malware_request->set_url(browse_info_->url.spec());
    534     const GURL& referrer = browse_info_->referrer;
    535     if (referrer.SchemeIs("http")) {  // Only send http urls.
    536       malware_request->set_referrer_url(referrer.spec());
    537     }
    538     // This function doesn't expect browse_info_ to stay around after this
    539     // function returns.
    540     feature_extractor_->ExtractMalwareFeatures(
    541         browse_info_.get(),
    542         malware_request.release(),
    543         base::Bind(&ClientSideDetectionHost::MalwareFeatureExtractionDone,
    544                    weak_factory_.GetWeakPtr()));
    545     should_classify_for_malware_ = false;
    546   }
    547 }
    548 
    549 void ClientSideDetectionHost::OnPhishingDetectionDone(
    550     const std::string& verdict_str) {
    551   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    552   // There is something seriously wrong if there is no service class but
    553   // this method is called.  The renderer should not start phishing detection
    554   // if there isn't any service class in the browser.
    555   DCHECK(csd_service_);
    556   DCHECK(browse_info_.get());
    557 
    558   // We parse the protocol buffer here.  If we're unable to parse it we won't
    559   // send the verdict further.
    560   scoped_ptr<ClientPhishingRequest> verdict(new ClientPhishingRequest);
    561   if (csd_service_ &&
    562       browse_info_.get() &&
    563       verdict->ParseFromString(verdict_str) &&
    564       verdict->IsInitialized()) {
    565     // We only send phishing verdict to the server if the verdict is phishing or
    566     // if a SafeBrowsing interstitial was already shown for this site.  E.g., a
    567     // malware or phishing interstitial was shown but the user clicked
    568     // through.
    569     if (verdict->is_phishing() || DidShowSBInterstitial()) {
    570       if (DidShowSBInterstitial()) {
    571         browse_info_->unsafe_resource.reset(unsafe_resource_.release());
    572       }
    573       // Start browser-side feature extraction.  Once we're done it will send
    574       // the client verdict request.
    575       feature_extractor_->ExtractFeatures(
    576           browse_info_.get(),
    577           verdict.release(),
    578           base::Bind(&ClientSideDetectionHost::FeatureExtractionDone,
    579                      weak_factory_.GetWeakPtr()));
    580     }
    581   }
    582 }
    583 
    584 void ClientSideDetectionHost::MaybeShowPhishingWarning(GURL phishing_url,
    585                                                        bool is_phishing) {
    586   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    587   DVLOG(2) << "Received server phishing verdict for URL:" << phishing_url
    588            << " is_phishing:" << is_phishing;
    589   if (is_phishing) {
    590     DCHECK(web_contents());
    591     if (ui_manager_.get()) {
    592       SafeBrowsingUIManager::UnsafeResource resource;
    593       resource.url = phishing_url;
    594       resource.original_url = phishing_url;
    595       resource.is_subresource = false;
    596       resource.threat_type = SB_THREAT_TYPE_CLIENT_SIDE_PHISHING_URL;
    597       resource.render_process_host_id =
    598           web_contents()->GetRenderProcessHost()->GetID();
    599       resource.render_view_id =
    600           web_contents()->GetRenderViewHost()->GetRoutingID();
    601       if (!ui_manager_->IsWhitelisted(resource)) {
    602         // We need to stop any pending navigations, otherwise the interstital
    603         // might not get created properly.
    604         web_contents()->GetController().DiscardNonCommittedEntries();
    605       }
    606       ui_manager_->DisplayBlockingPage(resource);
    607     }
    608     // If there is true phishing verdict, invalidate weakptr so that no longer
    609     // consider the malware vedict.
    610     weak_factory_.InvalidateWeakPtrs();
    611   }
    612 }
    613 
    614 void ClientSideDetectionHost::MaybeShowMalwareWarning(GURL original_url,
    615                                                       GURL malware_url,
    616                                                       bool is_malware) {
    617   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    618   DVLOG(2) << "Received server malawre IP verdict for URL:" << malware_url
    619            << " is_malware:" << is_malware;
    620   if (is_malware && malware_url.is_valid() && original_url.is_valid()) {
    621     DCHECK(web_contents());
    622     if (ui_manager_.get()) {
    623       SafeBrowsingUIManager::UnsafeResource resource;
    624       resource.url = malware_url;
    625       resource.original_url = original_url;
    626       resource.is_subresource = (malware_url.host() != original_url.host());
    627       resource.threat_type = SB_THREAT_TYPE_CLIENT_SIDE_MALWARE_URL;
    628       resource.render_process_host_id =
    629           web_contents()->GetRenderProcessHost()->GetID();
    630       resource.render_view_id =
    631           web_contents()->GetRenderViewHost()->GetRoutingID();
    632       if (!ui_manager_->IsWhitelisted(resource)) {
    633         // We need to stop any pending navigations, otherwise the interstital
    634         // might not get created properly.
    635         web_contents()->GetController().DiscardNonCommittedEntries();
    636       }
    637       ui_manager_->DisplayBlockingPage(resource);
    638     }
    639     // If there is true malware verdict, invalidate weakptr so that no longer
    640     // consider the phishing vedict.
    641     weak_factory_.InvalidateWeakPtrs();
    642   }
    643 }
    644 
    645 void ClientSideDetectionHost::FeatureExtractionDone(
    646     bool success,
    647     scoped_ptr<ClientPhishingRequest> request) {
    648   DCHECK(request);
    649   DVLOG(2) << "Feature extraction done (success:" << success << ") for URL: "
    650            << request->url() << ". Start sending client phishing request.";
    651   ClientSideDetectionService::ClientReportPhishingRequestCallback callback;
    652   // If the client-side verdict isn't phishing we don't care about the server
    653   // response because we aren't going to display a warning.
    654   if (request->is_phishing()) {
    655     callback = base::Bind(&ClientSideDetectionHost::MaybeShowPhishingWarning,
    656                           weak_factory_.GetWeakPtr());
    657   }
    658   // Send ping even if the browser feature extraction failed.
    659   csd_service_->SendClientReportPhishingRequest(
    660       request.release(),  // The service takes ownership of the request object.
    661       callback);
    662 }
    663 
    664 void ClientSideDetectionHost::MalwareFeatureExtractionDone(
    665     bool feature_extraction_success,
    666     scoped_ptr<ClientMalwareRequest> request) {
    667   DCHECK(request.get());
    668   DVLOG(2) << "Malware Feature extraction done for URL: " << request->url()
    669            << ", with badip url count:" << request->bad_ip_url_info_size();
    670 
    671   // Send ping if there is matching features.
    672   if (feature_extraction_success && request->bad_ip_url_info_size() > 0) {
    673     VLOG(1) << "Start sending client malware request.";
    674     ClientSideDetectionService::ClientReportMalwareRequestCallback callback;
    675     callback = base::Bind(&ClientSideDetectionHost::MaybeShowMalwareWarning,
    676                           weak_factory_.GetWeakPtr());
    677     csd_service_->SendClientReportMalwareRequest(request.release(), callback);
    678   }
    679 }
    680 
    681 void ClientSideDetectionHost::UpdateIPUrlMap(const std::string& ip,
    682                                              const std::string& url,
    683                                              const std::string& method,
    684                                              const std::string& referrer,
    685                                              const ResourceType resource_type) {
    686   if (ip.empty() || url.empty())
    687     return;
    688 
    689   IPUrlMap::iterator it = browse_info_->ips.find(ip);
    690   if (it == browse_info_->ips.end()) {
    691     if (browse_info_->ips.size() < kMaxIPsPerBrowse) {
    692       std::vector<IPUrlInfo> url_infos;
    693       url_infos.push_back(IPUrlInfo(url, method, referrer, resource_type));
    694       browse_info_->ips.insert(make_pair(ip, url_infos));
    695     }
    696   } else if (it->second.size() < kMaxUrlsPerIP) {
    697     it->second.push_back(IPUrlInfo(url, method, referrer, resource_type));
    698   }
    699 }
    700 
    701 void ClientSideDetectionHost::Observe(
    702     int type,
    703     const content::NotificationSource& source,
    704     const content::NotificationDetails& details) {
    705   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    706   DCHECK_EQ(type, content::NOTIFICATION_RESOURCE_RESPONSE_STARTED);
    707   const ResourceRequestDetails* req = content::Details<ResourceRequestDetails>(
    708       details).ptr();
    709   if (req && browse_info_.get() &&
    710       should_extract_malware_features_ && req->url.is_valid()) {
    711     UpdateIPUrlMap(req->socket_address.host() /* ip */,
    712                    req->url.spec()  /* url */,
    713                    req->method,
    714                    req->referrer,
    715                    req->resource_type);
    716   }
    717 }
    718 
    719 bool ClientSideDetectionHost::DidShowSBInterstitial() const {
    720   if (unsafe_unique_page_id_ <= 0 || !web_contents()) {
    721     return false;
    722   }
    723   const NavigationEntry* nav_entry =
    724       web_contents()->GetController().GetActiveEntry();
    725   return (nav_entry && nav_entry->GetUniqueID() == unsafe_unique_page_id_);
    726 }
    727 
    728 void ClientSideDetectionHost::set_client_side_detection_service(
    729     ClientSideDetectionService* service) {
    730   csd_service_ = service;
    731 }
    732 
    733 void ClientSideDetectionHost::set_safe_browsing_managers(
    734     SafeBrowsingUIManager* ui_manager,
    735     SafeBrowsingDatabaseManager* database_manager) {
    736   if (ui_manager_.get())
    737     ui_manager_->RemoveObserver(this);
    738 
    739   ui_manager_ = ui_manager;
    740   if (ui_manager)
    741     ui_manager_->AddObserver(this);
    742 
    743   database_manager_ = database_manager;
    744 }
    745 
    746 }  // namespace safe_browsing
    747