1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "chrome/browser/safe_browsing/browser_feature_extractor.h" 6 7 #include <map> 8 #include <string> 9 #include <vector> 10 11 #include "base/memory/scoped_ptr.h" 12 #include "base/message_loop/message_loop.h" 13 #include "base/strings/stringprintf.h" 14 #include "base/time/time.h" 15 #include "chrome/browser/history/history_backend.h" 16 #include "chrome/browser/history/history_service.h" 17 #include "chrome/browser/history/history_service_factory.h" 18 #include "chrome/browser/profiles/profile.h" 19 #include "chrome/browser/safe_browsing/browser_features.h" 20 #include "chrome/browser/safe_browsing/client_side_detection_host.h" 21 #include "chrome/browser/safe_browsing/database_manager.h" 22 #include "chrome/browser/safe_browsing/safe_browsing_service.h" 23 #include "chrome/browser/safe_browsing/ui_manager.h" 24 #include "chrome/common/safe_browsing/csd.pb.h" 25 #include "chrome/test/base/chrome_render_view_host_test_harness.h" 26 #include "chrome/test/base/testing_profile.h" 27 #include "content/public/browser/navigation_controller.h" 28 #include "content/public/browser/web_contents.h" 29 #include "content/public/common/page_transition_types.h" 30 #include "content/public/common/referrer.h" 31 #include "content/public/test/test_browser_thread.h" 32 #include "content/public/test/web_contents_tester.h" 33 #include "testing/gmock/include/gmock/gmock.h" 34 #include "testing/gtest/include/gtest/gtest.h" 35 #include "url/gurl.h" 36 37 using content::BrowserThread; 38 using content::WebContentsTester; 39 40 using testing::DoAll; 41 using testing::Return; 42 using testing::StrictMock; 43 44 namespace safe_browsing { 45 46 namespace { 47 48 class MockSafeBrowsingDatabaseManager : public SafeBrowsingDatabaseManager { 49 public: 50 explicit MockSafeBrowsingDatabaseManager( 51 const scoped_refptr<SafeBrowsingService>& service) 52 : SafeBrowsingDatabaseManager(service) { } 53 54 MOCK_METHOD1(MatchMalwareIP, bool(const std::string& ip_address)); 55 56 protected: 57 virtual ~MockSafeBrowsingDatabaseManager() {} 58 59 private: 60 DISALLOW_COPY_AND_ASSIGN(MockSafeBrowsingDatabaseManager); 61 }; 62 63 class MockClientSideDetectionHost : public ClientSideDetectionHost { 64 public: 65 MockClientSideDetectionHost( 66 content::WebContents* tab, 67 SafeBrowsingDatabaseManager* database_manager) 68 : ClientSideDetectionHost(tab) { 69 set_safe_browsing_managers(NULL, database_manager); 70 } 71 72 virtual ~MockClientSideDetectionHost() {}; 73 74 MOCK_METHOD1(IsBadIpAddress, bool(const std::string&)); 75 }; 76 } // namespace 77 78 class BrowserFeatureExtractorTest : public ChromeRenderViewHostTestHarness { 79 protected: 80 virtual void SetUp() { 81 ChromeRenderViewHostTestHarness::SetUp(); 82 ASSERT_TRUE(profile()->CreateHistoryService( 83 true /* delete_file */, false /* no_db */)); 84 85 db_manager_ = new StrictMock<MockSafeBrowsingDatabaseManager>( 86 SafeBrowsingService::CreateSafeBrowsingService()); 87 host_.reset(new StrictMock<MockClientSideDetectionHost>( 88 web_contents(), db_manager_.get())); 89 extractor_.reset( 90 new BrowserFeatureExtractor(web_contents(), host_.get())); 91 num_pending_ = 0; 92 browse_info_.reset(new BrowseInfo); 93 } 94 95 virtual void TearDown() { 96 extractor_.reset(); 97 host_.reset(); 98 db_manager_ = NULL; 99 profile()->DestroyHistoryService(); 100 ChromeRenderViewHostTestHarness::TearDown(); 101 ASSERT_EQ(0, num_pending_); 102 } 103 104 HistoryService* history_service() { 105 return HistoryServiceFactory::GetForProfile(profile(), 106 Profile::EXPLICIT_ACCESS); 107 } 108 109 void SetRedirectChain(const std::vector<GURL>& redirect_chain, 110 bool new_host) { 111 browse_info_->url_redirects = redirect_chain; 112 if (new_host) { 113 browse_info_->host_redirects = redirect_chain; 114 } 115 } 116 117 // Wrapper around NavigateAndCommit that also sets the redirect chain to 118 // a sane value. 119 void SimpleNavigateAndCommit(const GURL& url) { 120 std::vector<GURL> redirect_chain; 121 redirect_chain.push_back(url); 122 SetRedirectChain(redirect_chain, true); 123 NavigateAndCommit(url, GURL(), content::PAGE_TRANSITION_LINK); 124 } 125 126 // This is similar to NavigateAndCommit that is in WebContentsTester, but 127 // allows us to specify the referrer and page_transition_type. 128 void NavigateAndCommit(const GURL& url, 129 const GURL& referrer, 130 content::PageTransition type) { 131 web_contents()->GetController().LoadURL( 132 url, content::Referrer(referrer, blink::WebReferrerPolicyDefault), 133 type, std::string()); 134 135 static int page_id = 0; 136 content::RenderViewHost* rvh = 137 WebContentsTester::For(web_contents())->GetPendingRenderViewHost(); 138 if (!rvh) { 139 rvh = web_contents()->GetRenderViewHost(); 140 } 141 WebContentsTester::For(web_contents())->ProceedWithCrossSiteNavigation(); 142 WebContentsTester::For(web_contents())->TestDidNavigateWithReferrer( 143 rvh, ++page_id, url, 144 content::Referrer(referrer, blink::WebReferrerPolicyDefault), type); 145 } 146 147 bool ExtractFeatures(ClientPhishingRequest* request) { 148 StartExtractFeatures(request); 149 base::MessageLoop::current()->Run(); 150 EXPECT_EQ(1U, success_.count(request)); 151 return success_.count(request) ? success_[request] : false; 152 } 153 154 void StartExtractFeatures(ClientPhishingRequest* request) { 155 success_.erase(request); 156 ++num_pending_; 157 extractor_->ExtractFeatures( 158 browse_info_.get(), 159 request, 160 base::Bind(&BrowserFeatureExtractorTest::ExtractFeaturesDone, 161 base::Unretained(this))); 162 } 163 164 void GetFeatureMap(const ClientPhishingRequest& request, 165 std::map<std::string, double>* features) { 166 for (int i = 0; i < request.non_model_feature_map_size(); ++i) { 167 const ClientPhishingRequest::Feature& feature = 168 request.non_model_feature_map(i); 169 EXPECT_EQ(0U, features->count(feature.name())); 170 (*features)[feature.name()] = feature.value(); 171 } 172 } 173 174 void ExtractMalwareFeatures(ClientMalwareRequest* request) { 175 // Feature extraction takes ownership of the request object 176 // and passes it along to the done callback in the end. 177 StartExtractMalwareFeatures(request); 178 base::MessageLoopForUI::current()->Run(); 179 EXPECT_EQ(1U, success_.count(request)); 180 EXPECT_TRUE(success_[request]); 181 } 182 183 void StartExtractMalwareFeatures(ClientMalwareRequest* request) { 184 success_.erase(request); 185 ++num_pending_; 186 // We temporarily give up ownership of request to ExtractMalwareFeatures 187 // but we'll regain ownership of it in ExtractMalwareFeaturesDone. 188 extractor_->ExtractMalwareFeatures( 189 browse_info_.get(), 190 request, 191 base::Bind(&BrowserFeatureExtractorTest::ExtractMalwareFeaturesDone, 192 base::Unretained(this))); 193 } 194 195 void GetMalwareUrls( 196 const ClientMalwareRequest& request, 197 std::map<std::string, std::set<std::string> >* urls) { 198 for (int i = 0; i < request.bad_ip_url_info_size(); ++i) { 199 const ClientMalwareRequest::UrlInfo& urlinfo = 200 request.bad_ip_url_info(i); 201 (*urls)[urlinfo.ip()].insert(urlinfo.url()); 202 } 203 } 204 205 int num_pending_; // Number of pending feature extractions. 206 scoped_ptr<BrowserFeatureExtractor> extractor_; 207 std::map<void*, bool> success_; 208 scoped_ptr<BrowseInfo> browse_info_; 209 scoped_ptr<StrictMock<MockClientSideDetectionHost> > host_; 210 scoped_refptr<StrictMock<MockSafeBrowsingDatabaseManager> > db_manager_; 211 212 private: 213 void ExtractFeaturesDone(bool success, ClientPhishingRequest* request) { 214 ASSERT_EQ(0U, success_.count(request)); 215 success_[request] = success; 216 if (--num_pending_ == 0) { 217 base::MessageLoop::current()->Quit(); 218 } 219 } 220 221 void ExtractMalwareFeaturesDone( 222 bool success, 223 scoped_ptr<ClientMalwareRequest> request) { 224 EXPECT_TRUE(BrowserThread::CurrentlyOn(BrowserThread::UI)); 225 ASSERT_EQ(0U, success_.count(request.get())); 226 // The pointer doesn't really belong to us. It belongs to 227 // the test case which passed it to ExtractMalwareFeatures above. 228 success_[request.release()] = success; 229 if (--num_pending_ == 0) { 230 base::MessageLoopForUI::current()->Quit(); 231 } 232 } 233 }; 234 235 TEST_F(BrowserFeatureExtractorTest, UrlNotInHistory) { 236 ClientPhishingRequest request; 237 SimpleNavigateAndCommit(GURL("http://www.google.com")); 238 request.set_url("http://www.google.com/"); 239 request.set_client_score(0.5); 240 EXPECT_FALSE(ExtractFeatures(&request)); 241 } 242 243 TEST_F(BrowserFeatureExtractorTest, RequestNotInitialized) { 244 ClientPhishingRequest request; 245 request.set_url("http://www.google.com/"); 246 // Request is missing the score value. 247 SimpleNavigateAndCommit(GURL("http://www.google.com")); 248 EXPECT_FALSE(ExtractFeatures(&request)); 249 } 250 251 TEST_F(BrowserFeatureExtractorTest, UrlInHistory) { 252 history_service()->AddPage(GURL("http://www.foo.com/bar.html"), 253 base::Time::Now(), 254 history::SOURCE_BROWSED); 255 history_service()->AddPage(GURL("https://www.foo.com/gaa.html"), 256 base::Time::Now(), 257 history::SOURCE_BROWSED); // same host HTTPS. 258 history_service()->AddPage(GURL("http://www.foo.com/gaa.html"), 259 base::Time::Now(), 260 history::SOURCE_BROWSED); // same host HTTP. 261 history_service()->AddPage(GURL("http://bar.foo.com/gaa.html"), 262 base::Time::Now(), 263 history::SOURCE_BROWSED); // different host. 264 history_service()->AddPage(GURL("http://www.foo.com/bar.html?a=b"), 265 base::Time::Now() - base::TimeDelta::FromHours(23), 266 NULL, 0, GURL(), history::RedirectList(), 267 content::PAGE_TRANSITION_LINK, 268 history::SOURCE_BROWSED, false); 269 history_service()->AddPage(GURL("http://www.foo.com/bar.html"), 270 base::Time::Now() - base::TimeDelta::FromHours(25), 271 NULL, 0, GURL(), history::RedirectList(), 272 content::PAGE_TRANSITION_TYPED, 273 history::SOURCE_BROWSED, false); 274 history_service()->AddPage(GURL("https://www.foo.com/goo.html"), 275 base::Time::Now() - base::TimeDelta::FromDays(5), 276 NULL, 0, GURL(), history::RedirectList(), 277 content::PAGE_TRANSITION_TYPED, 278 history::SOURCE_BROWSED, false); 279 280 SimpleNavigateAndCommit(GURL("http://www.foo.com/bar.html")); 281 282 ClientPhishingRequest request; 283 request.set_url("http://www.foo.com/bar.html"); 284 request.set_client_score(0.5); 285 EXPECT_TRUE(ExtractFeatures(&request)); 286 std::map<std::string, double> features; 287 GetFeatureMap(request, &features); 288 289 EXPECT_EQ(12U, features.size()); 290 EXPECT_DOUBLE_EQ(2.0, features[features::kUrlHistoryVisitCount]); 291 EXPECT_DOUBLE_EQ(1.0, 292 features[features::kUrlHistoryVisitCountMoreThan24hAgo]); 293 EXPECT_DOUBLE_EQ(1.0, features[features::kUrlHistoryTypedCount]); 294 EXPECT_DOUBLE_EQ(1.0, features[features::kUrlHistoryLinkCount]); 295 EXPECT_DOUBLE_EQ(4.0, features[features::kHttpHostVisitCount]); 296 EXPECT_DOUBLE_EQ(2.0, features[features::kHttpsHostVisitCount]); 297 EXPECT_DOUBLE_EQ(1.0, features[features::kFirstHttpHostVisitMoreThan24hAgo]); 298 EXPECT_DOUBLE_EQ(1.0, features[features::kFirstHttpsHostVisitMoreThan24hAgo]); 299 300 request.Clear(); 301 request.set_url("http://bar.foo.com/gaa.html"); 302 request.set_client_score(0.5); 303 EXPECT_TRUE(ExtractFeatures(&request)); 304 features.clear(); 305 GetFeatureMap(request, &features); 306 // We have less features because we didn't Navigate to this page, so we don't 307 // have Referrer, IsFirstNavigation, HasSSLReferrer, etc. 308 EXPECT_EQ(7U, features.size()); 309 EXPECT_DOUBLE_EQ(1.0, features[features::kUrlHistoryVisitCount]); 310 EXPECT_DOUBLE_EQ(0.0, 311 features[features::kUrlHistoryVisitCountMoreThan24hAgo]); 312 EXPECT_DOUBLE_EQ(0.0, features[features::kUrlHistoryTypedCount]); 313 EXPECT_DOUBLE_EQ(1.0, features[features::kUrlHistoryLinkCount]); 314 EXPECT_DOUBLE_EQ(1.0, features[features::kHttpHostVisitCount]); 315 EXPECT_DOUBLE_EQ(0.0, features[features::kHttpsHostVisitCount]); 316 EXPECT_DOUBLE_EQ(0.0, features[features::kFirstHttpHostVisitMoreThan24hAgo]); 317 EXPECT_FALSE(features.count(features::kFirstHttpsHostVisitMoreThan24hAgo)); 318 } 319 320 TEST_F(BrowserFeatureExtractorTest, MultipleRequestsAtOnce) { 321 history_service()->AddPage(GURL("http://www.foo.com/bar.html"), 322 base::Time::Now(), 323 history::SOURCE_BROWSED); 324 SimpleNavigateAndCommit(GURL("http:/www.foo.com/bar.html")); 325 ClientPhishingRequest request; 326 request.set_url("http://www.foo.com/bar.html"); 327 request.set_client_score(0.5); 328 StartExtractFeatures(&request); 329 330 SimpleNavigateAndCommit(GURL("http://www.foo.com/goo.html")); 331 ClientPhishingRequest request2; 332 request2.set_url("http://www.foo.com/goo.html"); 333 request2.set_client_score(1.0); 334 StartExtractFeatures(&request2); 335 336 base::MessageLoop::current()->Run(); 337 EXPECT_TRUE(success_[&request]); 338 // Success is false because the second URL is not in the history and we are 339 // not able to distinguish between a missing URL in the history and an error. 340 EXPECT_FALSE(success_[&request2]); 341 } 342 343 TEST_F(BrowserFeatureExtractorTest, BrowseFeatures) { 344 history_service()->AddPage(GURL("http://www.foo.com/"), 345 base::Time::Now(), 346 history::SOURCE_BROWSED); 347 history_service()->AddPage(GURL("http://www.foo.com/page.html"), 348 base::Time::Now(), 349 history::SOURCE_BROWSED); 350 history_service()->AddPage(GURL("http://www.bar.com/"), 351 base::Time::Now(), 352 history::SOURCE_BROWSED); 353 history_service()->AddPage(GURL("http://www.bar.com/other_page.html"), 354 base::Time::Now(), 355 history::SOURCE_BROWSED); 356 history_service()->AddPage(GURL("http://www.baz.com/"), 357 base::Time::Now(), 358 history::SOURCE_BROWSED); 359 360 ClientPhishingRequest request; 361 request.set_url("http://www.foo.com/"); 362 request.set_client_score(0.5); 363 std::vector<GURL> redirect_chain; 364 redirect_chain.push_back(GURL("http://somerandomwebsite.com/")); 365 redirect_chain.push_back(GURL("http://www.foo.com/")); 366 SetRedirectChain(redirect_chain, true); 367 browse_info_->http_status_code = 200; 368 NavigateAndCommit(GURL("http://www.foo.com/"), 369 GURL("http://google.com/"), 370 content::PageTransitionFromInt( 371 content::PAGE_TRANSITION_AUTO_BOOKMARK | 372 content::PAGE_TRANSITION_FORWARD_BACK)); 373 374 EXPECT_TRUE(ExtractFeatures(&request)); 375 std::map<std::string, double> features; 376 GetFeatureMap(request, &features); 377 378 EXPECT_EQ(1.0, 379 features[base::StringPrintf("%s=%s", 380 features::kReferrer, 381 "http://google.com/")]); 382 EXPECT_EQ(1.0, 383 features[base::StringPrintf("%s[0]=%s", 384 features::kRedirect, 385 "http://somerandomwebsite.com/")]); 386 // We shouldn't have a feature for the last redirect in the chain, since it 387 // should always be the URL that we navigated to. 388 EXPECT_EQ(0.0, 389 features[base::StringPrintf("%s[1]=%s", 390 features::kRedirect, 391 "http://foo.com/")]); 392 EXPECT_EQ(0.0, features[features::kHasSSLReferrer]); 393 EXPECT_EQ(2.0, features[features::kPageTransitionType]); 394 EXPECT_EQ(1.0, features[features::kIsFirstNavigation]); 395 EXPECT_EQ(200.0, features[features::kHttpStatusCode]); 396 397 request.Clear(); 398 request.set_url("http://www.foo.com/page.html"); 399 request.set_client_score(0.5); 400 redirect_chain.clear(); 401 redirect_chain.push_back(GURL("http://www.foo.com/redirect")); 402 redirect_chain.push_back(GURL("http://www.foo.com/second_redirect")); 403 redirect_chain.push_back(GURL("http://www.foo.com/page.html")); 404 SetRedirectChain(redirect_chain, false); 405 browse_info_->http_status_code = 404; 406 NavigateAndCommit(GURL("http://www.foo.com/page.html"), 407 GURL("http://www.foo.com"), 408 content::PageTransitionFromInt( 409 content::PAGE_TRANSITION_TYPED | 410 content::PAGE_TRANSITION_CHAIN_START | 411 content::PAGE_TRANSITION_CLIENT_REDIRECT)); 412 413 EXPECT_TRUE(ExtractFeatures(&request)); 414 features.clear(); 415 GetFeatureMap(request, &features); 416 417 EXPECT_EQ(1, 418 features[base::StringPrintf("%s=%s", 419 features::kReferrer, 420 "http://www.foo.com/")]); 421 EXPECT_EQ(1.0, 422 features[base::StringPrintf("%s[0]=%s", 423 features::kRedirect, 424 "http://www.foo.com/redirect")]); 425 EXPECT_EQ(1.0, 426 features[base::StringPrintf("%s[1]=%s", 427 features::kRedirect, 428 "http://www.foo.com/second_redirect")]); 429 EXPECT_EQ(0.0, features[features::kHasSSLReferrer]); 430 EXPECT_EQ(1.0, features[features::kPageTransitionType]); 431 EXPECT_EQ(0.0, features[features::kIsFirstNavigation]); 432 EXPECT_EQ(1.0, 433 features[base::StringPrintf("%s%s=%s", 434 features::kHostPrefix, 435 features::kReferrer, 436 "http://google.com/")]); 437 EXPECT_EQ(1.0, 438 features[base::StringPrintf("%s%s[0]=%s", 439 features::kHostPrefix, 440 features::kRedirect, 441 "http://somerandomwebsite.com/")]); 442 EXPECT_EQ(2.0, 443 features[base::StringPrintf("%s%s", 444 features::kHostPrefix, 445 features::kPageTransitionType)]); 446 EXPECT_EQ(1.0, 447 features[base::StringPrintf("%s%s", 448 features::kHostPrefix, 449 features::kIsFirstNavigation)]); 450 EXPECT_EQ(404.0, features[features::kHttpStatusCode]); 451 452 request.Clear(); 453 request.set_url("http://www.bar.com/"); 454 request.set_client_score(0.5); 455 redirect_chain.clear(); 456 redirect_chain.push_back(GURL("http://www.foo.com/page.html")); 457 redirect_chain.push_back(GURL("http://www.bar.com/")); 458 SetRedirectChain(redirect_chain, true); 459 NavigateAndCommit(GURL("http://www.bar.com/"), 460 GURL("http://www.foo.com/page.html"), 461 content::PageTransitionFromInt( 462 content::PAGE_TRANSITION_LINK | 463 content::PAGE_TRANSITION_CHAIN_END | 464 content::PAGE_TRANSITION_CLIENT_REDIRECT)); 465 466 EXPECT_TRUE(ExtractFeatures(&request)); 467 features.clear(); 468 GetFeatureMap(request, &features); 469 470 EXPECT_EQ(1.0, 471 features[base::StringPrintf("%s=%s", 472 features::kReferrer, 473 "http://www.foo.com/page.html")]); 474 EXPECT_EQ(1.0, 475 features[base::StringPrintf("%s[0]=%s", 476 features::kRedirect, 477 "http://www.foo.com/page.html")]); 478 EXPECT_EQ(0.0, features[features::kHasSSLReferrer]); 479 EXPECT_EQ(0.0, features[features::kPageTransitionType]); 480 EXPECT_EQ(0.0, features[features::kIsFirstNavigation]); 481 482 // Should not have host features. 483 EXPECT_EQ(0U, 484 features.count(base::StringPrintf("%s%s", 485 features::kHostPrefix, 486 features::kPageTransitionType))); 487 EXPECT_EQ(0U, 488 features.count(base::StringPrintf("%s%s", 489 features::kHostPrefix, 490 features::kIsFirstNavigation))); 491 492 request.Clear(); 493 request.set_url("http://www.bar.com/other_page.html"); 494 request.set_client_score(0.5); 495 redirect_chain.clear(); 496 redirect_chain.push_back(GURL("http://www.bar.com/other_page.html")); 497 SetRedirectChain(redirect_chain, false); 498 NavigateAndCommit(GURL("http://www.bar.com/other_page.html"), 499 GURL("http://www.bar.com/"), 500 content::PAGE_TRANSITION_LINK); 501 502 EXPECT_TRUE(ExtractFeatures(&request)); 503 features.clear(); 504 GetFeatureMap(request, &features); 505 506 EXPECT_EQ(1.0, 507 features[base::StringPrintf("%s=%s", 508 features::kReferrer, 509 "http://www.bar.com/")]); 510 EXPECT_EQ(0.0, features[features::kHasSSLReferrer]); 511 EXPECT_EQ(0.0, features[features::kPageTransitionType]); 512 EXPECT_EQ(0.0, features[features::kIsFirstNavigation]); 513 EXPECT_EQ(1.0, 514 features[base::StringPrintf("%s%s=%s", 515 features::kHostPrefix, 516 features::kReferrer, 517 "http://www.foo.com/page.html")]); 518 EXPECT_EQ(1.0, 519 features[base::StringPrintf("%s%s[0]=%s", 520 features::kHostPrefix, 521 features::kRedirect, 522 "http://www.foo.com/page.html")]); 523 EXPECT_EQ(0.0, 524 features[base::StringPrintf("%s%s", 525 features::kHostPrefix, 526 features::kPageTransitionType)]); 527 EXPECT_EQ(0.0, 528 features[base::StringPrintf("%s%s", 529 features::kHostPrefix, 530 features::kIsFirstNavigation)]); 531 request.Clear(); 532 request.set_url("http://www.baz.com/"); 533 request.set_client_score(0.5); 534 redirect_chain.clear(); 535 redirect_chain.push_back(GURL("https://bankofamerica.com")); 536 redirect_chain.push_back(GURL("http://www.baz.com/")); 537 SetRedirectChain(redirect_chain, true); 538 NavigateAndCommit(GURL("http://www.baz.com"), 539 GURL("https://bankofamerica.com"), 540 content::PAGE_TRANSITION_GENERATED); 541 542 EXPECT_TRUE(ExtractFeatures(&request)); 543 features.clear(); 544 GetFeatureMap(request, &features); 545 546 EXPECT_EQ(1.0, 547 features[base::StringPrintf("%s[0]=%s", 548 features::kRedirect, 549 features::kSecureRedirectValue)]); 550 EXPECT_EQ(1.0, features[features::kHasSSLReferrer]); 551 EXPECT_EQ(5.0, features[features::kPageTransitionType]); 552 // Should not have redirect or host features. 553 EXPECT_EQ(0U, 554 features.count(base::StringPrintf("%s%s", 555 features::kHostPrefix, 556 features::kPageTransitionType))); 557 EXPECT_EQ(0U, 558 features.count(base::StringPrintf("%s%s", 559 features::kHostPrefix, 560 features::kIsFirstNavigation))); 561 EXPECT_EQ(5.0, features[features::kPageTransitionType]); 562 } 563 564 TEST_F(BrowserFeatureExtractorTest, SafeBrowsingFeatures) { 565 SimpleNavigateAndCommit(GURL("http://www.foo.com/malware.html")); 566 ClientPhishingRequest request; 567 request.set_url("http://www.foo.com/malware.html"); 568 request.set_client_score(0.5); 569 570 browse_info_->unsafe_resource.reset( 571 new SafeBrowsingUIManager::UnsafeResource); 572 browse_info_->unsafe_resource->url = GURL("http://www.malware.com/"); 573 browse_info_->unsafe_resource->original_url = GURL("http://www.good.com/"); 574 browse_info_->unsafe_resource->is_subresource = true; 575 browse_info_->unsafe_resource->threat_type = SB_THREAT_TYPE_URL_MALWARE; 576 577 ExtractFeatures(&request); 578 std::map<std::string, double> features; 579 GetFeatureMap(request, &features); 580 EXPECT_TRUE(features.count(base::StringPrintf( 581 "%s%s", 582 features::kSafeBrowsingMaliciousUrl, 583 "http://www.malware.com/"))); 584 EXPECT_TRUE(features.count(base::StringPrintf( 585 "%s%s", 586 features::kSafeBrowsingOriginalUrl, 587 "http://www.good.com/"))); 588 EXPECT_DOUBLE_EQ(1.0, features[features::kSafeBrowsingIsSubresource]); 589 EXPECT_DOUBLE_EQ(2.0, features[features::kSafeBrowsingThreatType]); 590 } 591 592 TEST_F(BrowserFeatureExtractorTest, MalwareFeatures) { 593 ClientMalwareRequest request; 594 request.set_url("http://www.foo.com/"); 595 596 std::vector<IPUrlInfo> bad_urls; 597 bad_urls.push_back(IPUrlInfo("http://bad.com", "GET", "", 598 ResourceType::SCRIPT)); 599 bad_urls.push_back(IPUrlInfo("http://evil.com", "GET", "", 600 ResourceType::SCRIPT)); 601 browse_info_->ips.insert(std::make_pair("193.5.163.8", bad_urls)); 602 browse_info_->ips.insert(std::make_pair("92.92.92.92", bad_urls)); 603 std::vector<IPUrlInfo> good_urls; 604 good_urls.push_back(IPUrlInfo("http://ok.com", "GET", "", 605 ResourceType::SCRIPT)); 606 browse_info_->ips.insert(std::make_pair("23.94.78.1", good_urls)); 607 EXPECT_CALL(*db_manager_, MatchMalwareIP("193.5.163.8")) 608 .WillOnce(Return(true)); 609 EXPECT_CALL(*db_manager_, MatchMalwareIP("92.92.92.92")) 610 .WillOnce(Return(true)); 611 EXPECT_CALL(*db_manager_, MatchMalwareIP("23.94.78.1")) 612 .WillOnce(Return(false)); 613 614 ExtractMalwareFeatures(&request); 615 EXPECT_EQ(4, request.bad_ip_url_info_size()); 616 std::map<std::string, std::set<std::string> > result_urls; 617 GetMalwareUrls(request, &result_urls); 618 619 EXPECT_EQ(2U, result_urls.size()); 620 EXPECT_TRUE(result_urls.count("193.5.163.8")); 621 std::set<std::string> urls = result_urls["193.5.163.8"]; 622 EXPECT_EQ(2U, urls.size()); 623 EXPECT_TRUE(urls.find("http://bad.com") != urls.end()); 624 EXPECT_TRUE(urls.find("http://evil.com") != urls.end()); 625 EXPECT_TRUE(result_urls.count("92.92.92.92")); 626 urls = result_urls["92.92.92.92"]; 627 EXPECT_EQ(2U, urls.size()); 628 EXPECT_TRUE(urls.find("http://bad.com") != urls.end()); 629 EXPECT_TRUE(urls.find("http://evil.com") != urls.end()); 630 } 631 632 TEST_F(BrowserFeatureExtractorTest, MalwareFeatures_ExceedLimit) { 633 ClientMalwareRequest request; 634 request.set_url("http://www.foo.com/"); 635 636 std::vector<IPUrlInfo> bad_urls; 637 bad_urls.push_back(IPUrlInfo("http://bad.com", "GET", "", 638 ResourceType::SCRIPT)); 639 std::vector<std::string> ips; 640 for (int i = 0; i < 7; ++i) { // Add 7 ips 641 std::string ip = base::StringPrintf("%d.%d.%d.%d", i, i, i, i); 642 ips.push_back(ip); 643 browse_info_->ips.insert(std::make_pair(ip, bad_urls)); 644 645 // First ip is good but all the others are bad. 646 EXPECT_CALL(*db_manager_, MatchMalwareIP(ip)).WillOnce(Return(i > 0)); 647 } 648 649 ExtractMalwareFeatures(&request); 650 // The number of IP matched url we store is capped at 5 IPs per request. 651 EXPECT_EQ(5, request.bad_ip_url_info_size()); 652 } 653 654 } // namespace safe_browsing 655