Home | History | Annotate | Download | only in safe_browsing
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "chrome/browser/safe_browsing/browser_feature_extractor.h"
      6 
      7 #include <map>
      8 #include <string>
      9 #include <vector>
     10 
     11 #include "base/memory/scoped_ptr.h"
     12 #include "base/message_loop/message_loop.h"
     13 #include "base/strings/stringprintf.h"
     14 #include "base/time/time.h"
     15 #include "chrome/browser/history/history_backend.h"
     16 #include "chrome/browser/history/history_service.h"
     17 #include "chrome/browser/history/history_service_factory.h"
     18 #include "chrome/browser/profiles/profile.h"
     19 #include "chrome/browser/safe_browsing/browser_features.h"
     20 #include "chrome/browser/safe_browsing/client_side_detection_host.h"
     21 #include "chrome/browser/safe_browsing/database_manager.h"
     22 #include "chrome/browser/safe_browsing/safe_browsing_service.h"
     23 #include "chrome/browser/safe_browsing/ui_manager.h"
     24 #include "chrome/common/safe_browsing/csd.pb.h"
     25 #include "chrome/test/base/chrome_render_view_host_test_harness.h"
     26 #include "chrome/test/base/testing_profile.h"
     27 #include "content/public/browser/navigation_controller.h"
     28 #include "content/public/browser/web_contents.h"
     29 #include "content/public/common/page_transition_types.h"
     30 #include "content/public/common/referrer.h"
     31 #include "content/public/test/test_browser_thread.h"
     32 #include "content/public/test/web_contents_tester.h"
     33 #include "testing/gmock/include/gmock/gmock.h"
     34 #include "testing/gtest/include/gtest/gtest.h"
     35 #include "url/gurl.h"
     36 
     37 using content::BrowserThread;
     38 using content::WebContentsTester;
     39 
     40 using testing::DoAll;
     41 using testing::Return;
     42 using testing::StrictMock;
     43 
     44 namespace safe_browsing {
     45 
     46 namespace {
     47 
     48 class MockSafeBrowsingDatabaseManager : public SafeBrowsingDatabaseManager {
     49  public:
     50   explicit MockSafeBrowsingDatabaseManager(
     51       const scoped_refptr<SafeBrowsingService>& service)
     52       : SafeBrowsingDatabaseManager(service) { }
     53 
     54   MOCK_METHOD1(MatchMalwareIP, bool(const std::string& ip_address));
     55 
     56  protected:
     57   virtual ~MockSafeBrowsingDatabaseManager() {}
     58 
     59  private:
     60   DISALLOW_COPY_AND_ASSIGN(MockSafeBrowsingDatabaseManager);
     61 };
     62 
     63 class MockClientSideDetectionHost : public ClientSideDetectionHost {
     64  public:
     65   MockClientSideDetectionHost(
     66       content::WebContents* tab,
     67       SafeBrowsingDatabaseManager* database_manager)
     68       : ClientSideDetectionHost(tab) {
     69     set_safe_browsing_managers(NULL, database_manager);
     70   }
     71 
     72   virtual ~MockClientSideDetectionHost() {}
     73 
     74   MOCK_METHOD1(IsBadIpAddress, bool(const std::string&));
     75 };
     76 }  // namespace
     77 
     78 class BrowserFeatureExtractorTest : public ChromeRenderViewHostTestHarness {
     79  protected:
     80   virtual void SetUp() {
     81     ChromeRenderViewHostTestHarness::SetUp();
     82     ASSERT_TRUE(profile()->CreateHistoryService(
     83         true /* delete_file */, false /* no_db */));
     84 
     85     db_manager_ = new StrictMock<MockSafeBrowsingDatabaseManager>(
     86         SafeBrowsingService::CreateSafeBrowsingService());
     87     host_.reset(new StrictMock<MockClientSideDetectionHost>(
     88         web_contents(), db_manager_.get()));
     89     extractor_.reset(
     90         new BrowserFeatureExtractor(web_contents(), host_.get()));
     91     num_pending_ = 0;
     92     browse_info_.reset(new BrowseInfo);
     93   }
     94 
     95   virtual void TearDown() {
     96     extractor_.reset();
     97     host_.reset();
     98     db_manager_ = NULL;
     99     profile()->DestroyHistoryService();
    100     ChromeRenderViewHostTestHarness::TearDown();
    101     ASSERT_EQ(0, num_pending_);
    102   }
    103 
    104   HistoryService* history_service() {
    105     return HistoryServiceFactory::GetForProfile(profile(),
    106                                                 Profile::EXPLICIT_ACCESS);
    107   }
    108 
    109   void SetRedirectChain(const std::vector<GURL>& redirect_chain,
    110                         bool new_host) {
    111     browse_info_->url_redirects = redirect_chain;
    112     if (new_host) {
    113       browse_info_->host_redirects = redirect_chain;
    114     }
    115   }
    116 
    117   // Wrapper around NavigateAndCommit that also sets the redirect chain to
    118   // a sane value.
    119   void SimpleNavigateAndCommit(const GURL& url) {
    120     std::vector<GURL> redirect_chain;
    121     redirect_chain.push_back(url);
    122     SetRedirectChain(redirect_chain, true);
    123     NavigateAndCommit(url, GURL(), content::PAGE_TRANSITION_LINK);
    124   }
    125 
    126   // This is similar to NavigateAndCommit that is in WebContentsTester, but
    127   // allows us to specify the referrer and page_transition_type.
    128   void NavigateAndCommit(const GURL& url,
    129                          const GURL& referrer,
    130                          content::PageTransition type) {
    131     web_contents()->GetController().LoadURL(
    132         url, content::Referrer(referrer, blink::WebReferrerPolicyDefault),
    133         type, std::string());
    134 
    135     static int page_id = 0;
    136     content::RenderViewHost* rvh =
    137         WebContentsTester::For(web_contents())->GetPendingRenderViewHost();
    138     if (!rvh) {
    139       rvh = web_contents()->GetRenderViewHost();
    140     }
    141     WebContentsTester::For(web_contents())->ProceedWithCrossSiteNavigation();
    142     WebContentsTester::For(web_contents())->TestDidNavigateWithReferrer(
    143         rvh, ++page_id, url,
    144         content::Referrer(referrer, blink::WebReferrerPolicyDefault), type);
    145   }
    146 
    147   bool ExtractFeatures(ClientPhishingRequest* request) {
    148     StartExtractFeatures(request);
    149     base::MessageLoop::current()->Run();
    150     EXPECT_EQ(1U, success_.count(request));
    151     return success_.count(request) ? success_[request] : false;
    152   }
    153 
    154   void StartExtractFeatures(ClientPhishingRequest* request) {
    155     success_.erase(request);
    156     ++num_pending_;
    157     extractor_->ExtractFeatures(
    158         browse_info_.get(),
    159         request,
    160         base::Bind(&BrowserFeatureExtractorTest::ExtractFeaturesDone,
    161                    base::Unretained(this)));
    162   }
    163 
    164   void GetFeatureMap(const ClientPhishingRequest& request,
    165                      std::map<std::string, double>* features) {
    166     for (int i = 0; i < request.non_model_feature_map_size(); ++i) {
    167       const ClientPhishingRequest::Feature& feature =
    168           request.non_model_feature_map(i);
    169       EXPECT_EQ(0U, features->count(feature.name()));
    170       (*features)[feature.name()] = feature.value();
    171     }
    172   }
    173 
    174   void ExtractMalwareFeatures(ClientMalwareRequest* request) {
    175     // Feature extraction takes ownership of the request object
    176     // and passes it along to the done callback in the end.
    177     StartExtractMalwareFeatures(request);
    178     base::MessageLoopForUI::current()->Run();
    179     EXPECT_EQ(1U, success_.count(request));
    180     EXPECT_TRUE(success_[request]);
    181   }
    182 
    183   void StartExtractMalwareFeatures(ClientMalwareRequest* request) {
    184     success_.erase(request);
    185     ++num_pending_;
    186     // We temporarily give up ownership of request to ExtractMalwareFeatures
    187     // but we'll regain ownership of it in ExtractMalwareFeaturesDone.
    188     extractor_->ExtractMalwareFeatures(
    189         browse_info_.get(),
    190         request,
    191         base::Bind(&BrowserFeatureExtractorTest::ExtractMalwareFeaturesDone,
    192                    base::Unretained(this)));
    193   }
    194 
    195   void GetMalwareUrls(
    196       const ClientMalwareRequest& request,
    197       std::map<std::string, std::set<std::string> >* urls) {
    198     for (int i = 0; i < request.bad_ip_url_info_size(); ++i) {
    199       const ClientMalwareRequest::UrlInfo& urlinfo =
    200           request.bad_ip_url_info(i);
    201       (*urls)[urlinfo.ip()].insert(urlinfo.url());
    202     }
    203   }
    204 
    205   int num_pending_;  // Number of pending feature extractions.
    206   scoped_ptr<BrowserFeatureExtractor> extractor_;
    207   std::map<void*, bool> success_;
    208   scoped_ptr<BrowseInfo> browse_info_;
    209   scoped_ptr<StrictMock<MockClientSideDetectionHost> > host_;
    210   scoped_refptr<StrictMock<MockSafeBrowsingDatabaseManager> > db_manager_;
    211 
    212  private:
    213   void ExtractFeaturesDone(bool success, ClientPhishingRequest* request) {
    214     ASSERT_EQ(0U, success_.count(request));
    215     success_[request] = success;
    216     if (--num_pending_ == 0) {
    217       base::MessageLoop::current()->Quit();
    218     }
    219   }
    220 
    221   void ExtractMalwareFeaturesDone(
    222       bool success,
    223       scoped_ptr<ClientMalwareRequest> request) {
    224     EXPECT_TRUE(BrowserThread::CurrentlyOn(BrowserThread::UI));
    225     ASSERT_EQ(0U, success_.count(request.get()));
    226     // The pointer doesn't really belong to us.  It belongs to
    227     // the test case which passed it to ExtractMalwareFeatures above.
    228     success_[request.release()] = success;
    229     if (--num_pending_ == 0) {
    230       base::MessageLoopForUI::current()->Quit();
    231     }
    232   }
    233 };
    234 
    235 TEST_F(BrowserFeatureExtractorTest, UrlNotInHistory) {
    236   ClientPhishingRequest request;
    237   SimpleNavigateAndCommit(GURL("http://www.google.com"));
    238   request.set_url("http://www.google.com/");
    239   request.set_client_score(0.5);
    240   EXPECT_FALSE(ExtractFeatures(&request));
    241 }
    242 
    243 TEST_F(BrowserFeatureExtractorTest, RequestNotInitialized) {
    244   ClientPhishingRequest request;
    245   request.set_url("http://www.google.com/");
    246   // Request is missing the score value.
    247   SimpleNavigateAndCommit(GURL("http://www.google.com"));
    248   EXPECT_FALSE(ExtractFeatures(&request));
    249 }
    250 
    251 TEST_F(BrowserFeatureExtractorTest, UrlInHistory) {
    252   history_service()->AddPage(GURL("http://www.foo.com/bar.html"),
    253                              base::Time::Now(),
    254                              history::SOURCE_BROWSED);
    255   history_service()->AddPage(GURL("https://www.foo.com/gaa.html"),
    256                              base::Time::Now(),
    257                              history::SOURCE_BROWSED);  // same host HTTPS.
    258   history_service()->AddPage(GURL("http://www.foo.com/gaa.html"),
    259                              base::Time::Now(),
    260                              history::SOURCE_BROWSED);  // same host HTTP.
    261   history_service()->AddPage(GURL("http://bar.foo.com/gaa.html"),
    262                              base::Time::Now(),
    263                              history::SOURCE_BROWSED);  // different host.
    264   history_service()->AddPage(GURL("http://www.foo.com/bar.html?a=b"),
    265                              base::Time::Now() - base::TimeDelta::FromHours(23),
    266                              NULL, 0, GURL(), history::RedirectList(),
    267                              content::PAGE_TRANSITION_LINK,
    268                              history::SOURCE_BROWSED, false);
    269   history_service()->AddPage(GURL("http://www.foo.com/bar.html"),
    270                              base::Time::Now() - base::TimeDelta::FromHours(25),
    271                              NULL, 0, GURL(), history::RedirectList(),
    272                              content::PAGE_TRANSITION_TYPED,
    273                              history::SOURCE_BROWSED, false);
    274   history_service()->AddPage(GURL("https://www.foo.com/goo.html"),
    275                              base::Time::Now() - base::TimeDelta::FromDays(5),
    276                              NULL, 0, GURL(), history::RedirectList(),
    277                              content::PAGE_TRANSITION_TYPED,
    278                              history::SOURCE_BROWSED, false);
    279 
    280   SimpleNavigateAndCommit(GURL("http://www.foo.com/bar.html"));
    281 
    282   ClientPhishingRequest request;
    283   request.set_url("http://www.foo.com/bar.html");
    284   request.set_client_score(0.5);
    285   EXPECT_TRUE(ExtractFeatures(&request));
    286   std::map<std::string, double> features;
    287   GetFeatureMap(request, &features);
    288 
    289   EXPECT_EQ(12U, features.size());
    290   EXPECT_DOUBLE_EQ(2.0, features[features::kUrlHistoryVisitCount]);
    291   EXPECT_DOUBLE_EQ(1.0,
    292                    features[features::kUrlHistoryVisitCountMoreThan24hAgo]);
    293   EXPECT_DOUBLE_EQ(1.0, features[features::kUrlHistoryTypedCount]);
    294   EXPECT_DOUBLE_EQ(1.0, features[features::kUrlHistoryLinkCount]);
    295   EXPECT_DOUBLE_EQ(4.0, features[features::kHttpHostVisitCount]);
    296   EXPECT_DOUBLE_EQ(2.0, features[features::kHttpsHostVisitCount]);
    297   EXPECT_DOUBLE_EQ(1.0, features[features::kFirstHttpHostVisitMoreThan24hAgo]);
    298   EXPECT_DOUBLE_EQ(1.0, features[features::kFirstHttpsHostVisitMoreThan24hAgo]);
    299 
    300   request.Clear();
    301   request.set_url("http://bar.foo.com/gaa.html");
    302   request.set_client_score(0.5);
    303   EXPECT_TRUE(ExtractFeatures(&request));
    304   features.clear();
    305   GetFeatureMap(request, &features);
    306   // We have less features because we didn't Navigate to this page, so we don't
    307   // have Referrer, IsFirstNavigation, HasSSLReferrer, etc.
    308   EXPECT_EQ(7U, features.size());
    309   EXPECT_DOUBLE_EQ(1.0, features[features::kUrlHistoryVisitCount]);
    310   EXPECT_DOUBLE_EQ(0.0,
    311                    features[features::kUrlHistoryVisitCountMoreThan24hAgo]);
    312   EXPECT_DOUBLE_EQ(0.0, features[features::kUrlHistoryTypedCount]);
    313   EXPECT_DOUBLE_EQ(1.0, features[features::kUrlHistoryLinkCount]);
    314   EXPECT_DOUBLE_EQ(1.0, features[features::kHttpHostVisitCount]);
    315   EXPECT_DOUBLE_EQ(0.0, features[features::kHttpsHostVisitCount]);
    316   EXPECT_DOUBLE_EQ(0.0, features[features::kFirstHttpHostVisitMoreThan24hAgo]);
    317   EXPECT_FALSE(features.count(features::kFirstHttpsHostVisitMoreThan24hAgo));
    318 }
    319 
    320 TEST_F(BrowserFeatureExtractorTest, MultipleRequestsAtOnce) {
    321   history_service()->AddPage(GURL("http://www.foo.com/bar.html"),
    322                              base::Time::Now(),
    323                              history::SOURCE_BROWSED);
    324   SimpleNavigateAndCommit(GURL("http:/www.foo.com/bar.html"));
    325   ClientPhishingRequest request;
    326   request.set_url("http://www.foo.com/bar.html");
    327   request.set_client_score(0.5);
    328   StartExtractFeatures(&request);
    329 
    330   SimpleNavigateAndCommit(GURL("http://www.foo.com/goo.html"));
    331   ClientPhishingRequest request2;
    332   request2.set_url("http://www.foo.com/goo.html");
    333   request2.set_client_score(1.0);
    334   StartExtractFeatures(&request2);
    335 
    336   base::MessageLoop::current()->Run();
    337   EXPECT_TRUE(success_[&request]);
    338   // Success is false because the second URL is not in the history and we are
    339   // not able to distinguish between a missing URL in the history and an error.
    340   EXPECT_FALSE(success_[&request2]);
    341 }
    342 
    343 TEST_F(BrowserFeatureExtractorTest, BrowseFeatures) {
    344   history_service()->AddPage(GURL("http://www.foo.com/"),
    345                              base::Time::Now(),
    346                              history::SOURCE_BROWSED);
    347   history_service()->AddPage(GURL("http://www.foo.com/page.html"),
    348                              base::Time::Now(),
    349                              history::SOURCE_BROWSED);
    350   history_service()->AddPage(GURL("http://www.bar.com/"),
    351                              base::Time::Now(),
    352                              history::SOURCE_BROWSED);
    353   history_service()->AddPage(GURL("http://www.bar.com/other_page.html"),
    354                              base::Time::Now(),
    355                              history::SOURCE_BROWSED);
    356   history_service()->AddPage(GURL("http://www.baz.com/"),
    357                              base::Time::Now(),
    358                              history::SOURCE_BROWSED);
    359 
    360   ClientPhishingRequest request;
    361   request.set_url("http://www.foo.com/");
    362   request.set_client_score(0.5);
    363   std::vector<GURL> redirect_chain;
    364   redirect_chain.push_back(GURL("http://somerandomwebsite.com/"));
    365   redirect_chain.push_back(GURL("http://www.foo.com/"));
    366   SetRedirectChain(redirect_chain, true);
    367   browse_info_->http_status_code = 200;
    368   NavigateAndCommit(GURL("http://www.foo.com/"),
    369                     GURL("http://google.com/"),
    370                     content::PageTransitionFromInt(
    371                         content::PAGE_TRANSITION_AUTO_BOOKMARK |
    372                         content::PAGE_TRANSITION_FORWARD_BACK));
    373 
    374   EXPECT_TRUE(ExtractFeatures(&request));
    375   std::map<std::string, double> features;
    376   GetFeatureMap(request, &features);
    377 
    378   EXPECT_EQ(1.0,
    379             features[base::StringPrintf("%s=%s",
    380                                         features::kReferrer,
    381                                         "http://google.com/")]);
    382   EXPECT_EQ(1.0,
    383             features[base::StringPrintf("%s[0]=%s",
    384                                         features::kRedirect,
    385                                         "http://somerandomwebsite.com/")]);
    386   // We shouldn't have a feature for the last redirect in the chain, since it
    387   // should always be the URL that we navigated to.
    388   EXPECT_EQ(0.0,
    389             features[base::StringPrintf("%s[1]=%s",
    390                                         features::kRedirect,
    391                                         "http://foo.com/")]);
    392   EXPECT_EQ(0.0, features[features::kHasSSLReferrer]);
    393   EXPECT_EQ(2.0, features[features::kPageTransitionType]);
    394   EXPECT_EQ(1.0, features[features::kIsFirstNavigation]);
    395   EXPECT_EQ(200.0, features[features::kHttpStatusCode]);
    396 
    397   request.Clear();
    398   request.set_url("http://www.foo.com/page.html");
    399   request.set_client_score(0.5);
    400   redirect_chain.clear();
    401   redirect_chain.push_back(GURL("http://www.foo.com/redirect"));
    402   redirect_chain.push_back(GURL("http://www.foo.com/second_redirect"));
    403   redirect_chain.push_back(GURL("http://www.foo.com/page.html"));
    404   SetRedirectChain(redirect_chain, false);
    405   browse_info_->http_status_code = 404;
    406   NavigateAndCommit(GURL("http://www.foo.com/page.html"),
    407                     GURL("http://www.foo.com"),
    408                     content::PageTransitionFromInt(
    409                         content::PAGE_TRANSITION_TYPED |
    410                         content::PAGE_TRANSITION_CHAIN_START |
    411                         content::PAGE_TRANSITION_CLIENT_REDIRECT));
    412 
    413   EXPECT_TRUE(ExtractFeatures(&request));
    414   features.clear();
    415   GetFeatureMap(request, &features);
    416 
    417   EXPECT_EQ(1,
    418             features[base::StringPrintf("%s=%s",
    419                                         features::kReferrer,
    420                                         "http://www.foo.com/")]);
    421   EXPECT_EQ(1.0,
    422             features[base::StringPrintf("%s[0]=%s",
    423                                         features::kRedirect,
    424                                         "http://www.foo.com/redirect")]);
    425   EXPECT_EQ(1.0,
    426             features[base::StringPrintf("%s[1]=%s",
    427                                         features::kRedirect,
    428                                         "http://www.foo.com/second_redirect")]);
    429   EXPECT_EQ(0.0, features[features::kHasSSLReferrer]);
    430   EXPECT_EQ(1.0, features[features::kPageTransitionType]);
    431   EXPECT_EQ(0.0, features[features::kIsFirstNavigation]);
    432   EXPECT_EQ(1.0,
    433             features[base::StringPrintf("%s%s=%s",
    434                                         features::kHostPrefix,
    435                                         features::kReferrer,
    436                                         "http://google.com/")]);
    437   EXPECT_EQ(1.0,
    438             features[base::StringPrintf("%s%s[0]=%s",
    439                                         features::kHostPrefix,
    440                                         features::kRedirect,
    441                                         "http://somerandomwebsite.com/")]);
    442   EXPECT_EQ(2.0,
    443             features[base::StringPrintf("%s%s",
    444                                         features::kHostPrefix,
    445                                         features::kPageTransitionType)]);
    446   EXPECT_EQ(1.0,
    447             features[base::StringPrintf("%s%s",
    448                                         features::kHostPrefix,
    449                                         features::kIsFirstNavigation)]);
    450   EXPECT_EQ(404.0, features[features::kHttpStatusCode]);
    451 
    452   request.Clear();
    453   request.set_url("http://www.bar.com/");
    454   request.set_client_score(0.5);
    455   redirect_chain.clear();
    456   redirect_chain.push_back(GURL("http://www.foo.com/page.html"));
    457   redirect_chain.push_back(GURL("http://www.bar.com/"));
    458   SetRedirectChain(redirect_chain, true);
    459   NavigateAndCommit(GURL("http://www.bar.com/"),
    460                     GURL("http://www.foo.com/page.html"),
    461                     content::PageTransitionFromInt(
    462                         content::PAGE_TRANSITION_LINK |
    463                         content::PAGE_TRANSITION_CHAIN_END |
    464                         content::PAGE_TRANSITION_CLIENT_REDIRECT));
    465 
    466   EXPECT_TRUE(ExtractFeatures(&request));
    467   features.clear();
    468   GetFeatureMap(request, &features);
    469 
    470   EXPECT_EQ(1.0,
    471             features[base::StringPrintf("%s=%s",
    472                                         features::kReferrer,
    473                                         "http://www.foo.com/page.html")]);
    474   EXPECT_EQ(1.0,
    475             features[base::StringPrintf("%s[0]=%s",
    476                                         features::kRedirect,
    477                                         "http://www.foo.com/page.html")]);
    478   EXPECT_EQ(0.0, features[features::kHasSSLReferrer]);
    479   EXPECT_EQ(0.0, features[features::kPageTransitionType]);
    480   EXPECT_EQ(0.0, features[features::kIsFirstNavigation]);
    481 
    482   // Should not have host features.
    483   EXPECT_EQ(0U,
    484             features.count(base::StringPrintf("%s%s",
    485                                               features::kHostPrefix,
    486                                               features::kPageTransitionType)));
    487   EXPECT_EQ(0U,
    488             features.count(base::StringPrintf("%s%s",
    489                                               features::kHostPrefix,
    490                                               features::kIsFirstNavigation)));
    491 
    492   request.Clear();
    493   request.set_url("http://www.bar.com/other_page.html");
    494   request.set_client_score(0.5);
    495   redirect_chain.clear();
    496   redirect_chain.push_back(GURL("http://www.bar.com/other_page.html"));
    497   SetRedirectChain(redirect_chain, false);
    498   NavigateAndCommit(GURL("http://www.bar.com/other_page.html"),
    499                     GURL("http://www.bar.com/"),
    500                     content::PAGE_TRANSITION_LINK);
    501 
    502   EXPECT_TRUE(ExtractFeatures(&request));
    503   features.clear();
    504   GetFeatureMap(request, &features);
    505 
    506   EXPECT_EQ(1.0,
    507             features[base::StringPrintf("%s=%s",
    508                                         features::kReferrer,
    509                                         "http://www.bar.com/")]);
    510   EXPECT_EQ(0.0, features[features::kHasSSLReferrer]);
    511   EXPECT_EQ(0.0, features[features::kPageTransitionType]);
    512   EXPECT_EQ(0.0, features[features::kIsFirstNavigation]);
    513   EXPECT_EQ(1.0,
    514             features[base::StringPrintf("%s%s=%s",
    515                                         features::kHostPrefix,
    516                                         features::kReferrer,
    517                                         "http://www.foo.com/page.html")]);
    518   EXPECT_EQ(1.0,
    519             features[base::StringPrintf("%s%s[0]=%s",
    520                                         features::kHostPrefix,
    521                                         features::kRedirect,
    522                                         "http://www.foo.com/page.html")]);
    523   EXPECT_EQ(0.0,
    524             features[base::StringPrintf("%s%s",
    525                                         features::kHostPrefix,
    526                                         features::kPageTransitionType)]);
    527   EXPECT_EQ(0.0,
    528             features[base::StringPrintf("%s%s",
    529                                         features::kHostPrefix,
    530                                         features::kIsFirstNavigation)]);
    531   request.Clear();
    532   request.set_url("http://www.baz.com/");
    533   request.set_client_score(0.5);
    534   redirect_chain.clear();
    535   redirect_chain.push_back(GURL("https://bankofamerica.com"));
    536   redirect_chain.push_back(GURL("http://www.baz.com/"));
    537   SetRedirectChain(redirect_chain, true);
    538   NavigateAndCommit(GURL("http://www.baz.com"),
    539                     GURL("https://bankofamerica.com"),
    540                     content::PAGE_TRANSITION_GENERATED);
    541 
    542   EXPECT_TRUE(ExtractFeatures(&request));
    543   features.clear();
    544   GetFeatureMap(request, &features);
    545 
    546   EXPECT_EQ(1.0,
    547             features[base::StringPrintf("%s[0]=%s",
    548                                         features::kRedirect,
    549                                         features::kSecureRedirectValue)]);
    550   EXPECT_EQ(1.0, features[features::kHasSSLReferrer]);
    551   EXPECT_EQ(5.0, features[features::kPageTransitionType]);
    552   // Should not have redirect or host features.
    553   EXPECT_EQ(0U,
    554             features.count(base::StringPrintf("%s%s",
    555                                               features::kHostPrefix,
    556                                               features::kPageTransitionType)));
    557   EXPECT_EQ(0U,
    558             features.count(base::StringPrintf("%s%s",
    559                                               features::kHostPrefix,
    560                                               features::kIsFirstNavigation)));
    561   EXPECT_EQ(5.0, features[features::kPageTransitionType]);
    562 }
    563 
    564 TEST_F(BrowserFeatureExtractorTest, SafeBrowsingFeatures) {
    565   SimpleNavigateAndCommit(GURL("http://www.foo.com/malware.html"));
    566   ClientPhishingRequest request;
    567   request.set_url("http://www.foo.com/malware.html");
    568   request.set_client_score(0.5);
    569 
    570   browse_info_->unsafe_resource.reset(
    571       new SafeBrowsingUIManager::UnsafeResource);
    572   browse_info_->unsafe_resource->url = GURL("http://www.malware.com/");
    573   browse_info_->unsafe_resource->original_url = GURL("http://www.good.com/");
    574   browse_info_->unsafe_resource->is_subresource = true;
    575   browse_info_->unsafe_resource->threat_type = SB_THREAT_TYPE_URL_MALWARE;
    576 
    577   ExtractFeatures(&request);
    578   std::map<std::string, double> features;
    579   GetFeatureMap(request, &features);
    580   EXPECT_TRUE(features.count(base::StringPrintf(
    581       "%s%s",
    582       features::kSafeBrowsingMaliciousUrl,
    583       "http://www.malware.com/")));
    584   EXPECT_TRUE(features.count(base::StringPrintf(
    585       "%s%s",
    586        features::kSafeBrowsingOriginalUrl,
    587         "http://www.good.com/")));
    588   EXPECT_DOUBLE_EQ(1.0, features[features::kSafeBrowsingIsSubresource]);
    589   EXPECT_DOUBLE_EQ(2.0, features[features::kSafeBrowsingThreatType]);
    590 }
    591 
    592 TEST_F(BrowserFeatureExtractorTest, MalwareFeatures) {
    593   ClientMalwareRequest request;
    594   request.set_url("http://www.foo.com/");
    595 
    596   std::vector<IPUrlInfo> bad_urls;
    597   bad_urls.push_back(IPUrlInfo("http://bad.com", "GET", "",
    598                                ResourceType::SCRIPT));
    599   bad_urls.push_back(IPUrlInfo("http://evil.com", "GET", "",
    600                                ResourceType::SCRIPT));
    601   browse_info_->ips.insert(std::make_pair("193.5.163.8", bad_urls));
    602   browse_info_->ips.insert(std::make_pair("92.92.92.92", bad_urls));
    603   std::vector<IPUrlInfo> good_urls;
    604   good_urls.push_back(IPUrlInfo("http://ok.com", "GET", "",
    605                                 ResourceType::SCRIPT));
    606   browse_info_->ips.insert(std::make_pair("23.94.78.1", good_urls));
    607   EXPECT_CALL(*db_manager_, MatchMalwareIP("193.5.163.8"))
    608       .WillOnce(Return(true));
    609   EXPECT_CALL(*db_manager_, MatchMalwareIP("92.92.92.92"))
    610       .WillOnce(Return(true));
    611   EXPECT_CALL(*db_manager_, MatchMalwareIP("23.94.78.1"))
    612       .WillOnce(Return(false));
    613 
    614   ExtractMalwareFeatures(&request);
    615   EXPECT_EQ(4, request.bad_ip_url_info_size());
    616   std::map<std::string, std::set<std::string> > result_urls;
    617   GetMalwareUrls(request, &result_urls);
    618 
    619   EXPECT_EQ(2U, result_urls.size());
    620   EXPECT_TRUE(result_urls.count("193.5.163.8"));
    621   std::set<std::string> urls = result_urls["193.5.163.8"];
    622   EXPECT_EQ(2U, urls.size());
    623   EXPECT_TRUE(urls.find("http://bad.com") != urls.end());
    624   EXPECT_TRUE(urls.find("http://evil.com") != urls.end());
    625   EXPECT_TRUE(result_urls.count("92.92.92.92"));
    626   urls = result_urls["92.92.92.92"];
    627   EXPECT_EQ(2U, urls.size());
    628   EXPECT_TRUE(urls.find("http://bad.com") != urls.end());
    629   EXPECT_TRUE(urls.find("http://evil.com") != urls.end());
    630 }
    631 
    632 TEST_F(BrowserFeatureExtractorTest, MalwareFeatures_ExceedLimit) {
    633   ClientMalwareRequest request;
    634   request.set_url("http://www.foo.com/");
    635 
    636   std::vector<IPUrlInfo> bad_urls;
    637   bad_urls.push_back(IPUrlInfo("http://bad.com", "GET", "",
    638                                ResourceType::SCRIPT));
    639   std::vector<std::string> ips;
    640   for (int i = 0; i < 7; ++i) {  // Add 7 ips
    641     std::string ip = base::StringPrintf("%d.%d.%d.%d", i, i, i, i);
    642     ips.push_back(ip);
    643     browse_info_->ips.insert(std::make_pair(ip, bad_urls));
    644 
    645     // First ip is good but all the others are bad.
    646     EXPECT_CALL(*db_manager_, MatchMalwareIP(ip)).WillOnce(Return(i > 0));
    647   }
    648 
    649   ExtractMalwareFeatures(&request);
    650   // The number of IP matched url we store is capped at 5 IPs per request.
    651   EXPECT_EQ(5, request.bad_ip_url_info_size());
    652 }
    653 
    654 }  // namespace safe_browsing
    655