1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "chrome/renderer/safe_browsing/phishing_classifier.h" 6 7 #include <string> 8 9 #include "base/bind.h" 10 #include "base/command_line.h" 11 #include "base/memory/scoped_ptr.h" 12 #include "base/strings/string16.h" 13 #include "base/strings/utf_string_conversions.h" 14 #include "chrome/common/chrome_switches.h" 15 #include "chrome/common/safe_browsing/client_model.pb.h" 16 #include "chrome/common/safe_browsing/csd.pb.h" 17 #include "chrome/renderer/safe_browsing/features.h" 18 #include "chrome/renderer/safe_browsing/mock_feature_extractor_clock.h" 19 #include "chrome/renderer/safe_browsing/murmurhash3_util.h" 20 #include "chrome/renderer/safe_browsing/scorer.h" 21 #include "chrome/test/base/in_process_browser_test.h" 22 #include "chrome/test/base/ui_test_utils.h" 23 #include "content/public/renderer/render_view.h" 24 #include "crypto/sha2.h" 25 #include "net/dns/mock_host_resolver.h" 26 #include "net/test/embedded_test_server/embedded_test_server.h" 27 #include "net/test/embedded_test_server/http_response.h" 28 #include "testing/gmock/include/gmock/gmock.h" 29 #include "url/gurl.h" 30 31 using ::testing::AllOf; 32 using ::testing::Contains; 33 using ::testing::Not; 34 using ::testing::Pair; 35 36 namespace { 37 38 // The first RenderFrame is routing ID 1, and the first RenderView is 2. 39 const int kRenderViewRoutingId = 2; 40 41 } 42 43 namespace safe_browsing { 44 45 class PhishingClassifierTest : public InProcessBrowserTest { 46 protected: 47 PhishingClassifierTest() 48 : url_tld_token_net_(features::kUrlTldToken + std::string("net")), 49 page_link_domain_phishing_(features::kPageLinkDomain + 50 std::string("phishing.com")), 51 page_term_login_(features::kPageTerm + std::string("login")) { 52 } 53 54 virtual void SetUpCommandLine(CommandLine* command_line) OVERRIDE { 55 command_line->AppendSwitch(switches::kSingleProcess); 56 #if defined(OS_WIN) 57 // Don't want to try to create a GPU process. 58 command_line->AppendSwitch(switches::kDisableGpu); 59 #endif 60 } 61 62 virtual void SetUpOnMainThread() OVERRIDE { 63 // Construct a model to test with. We include one feature from each of 64 // the feature extractors, which allows us to verify that they all ran. 65 ClientSideModel model; 66 67 model.add_hashes(crypto::SHA256HashString(url_tld_token_net_)); 68 model.add_hashes(crypto::SHA256HashString(page_link_domain_phishing_)); 69 model.add_hashes(crypto::SHA256HashString(page_term_login_)); 70 model.add_hashes(crypto::SHA256HashString("login")); 71 model.add_hashes(crypto::SHA256HashString(features::kUrlTldToken + 72 std::string("net"))); 73 model.add_hashes(crypto::SHA256HashString(features::kPageLinkDomain + 74 std::string("phishing.com"))); 75 model.add_hashes(crypto::SHA256HashString(features::kPageTerm + 76 std::string("login"))); 77 model.add_hashes(crypto::SHA256HashString("login")); 78 79 // Add a default rule with a non-phishy weight. 80 ClientSideModel::Rule* rule = model.add_rule(); 81 rule->set_weight(-1.0); 82 83 // To give a phishy score, the total weight needs to be >= 0 84 // (0.5 when converted to a probability). This will only happen 85 // if all of the listed features are present. 86 rule = model.add_rule(); 87 rule->add_feature(0); 88 rule->add_feature(1); 89 rule->add_feature(2); 90 rule->set_weight(1.0); 91 92 model.add_page_term(3); 93 model.set_murmur_hash_seed(2777808611U); 94 model.add_page_word(MurmurHash3String("login", model.murmur_hash_seed())); 95 model.set_max_words_per_term(1); 96 model.set_max_shingles_per_page(100); 97 model.set_shingle_size(3); 98 99 clock_ = new MockFeatureExtractorClock; 100 scorer_.reset(Scorer::Create(model.SerializeAsString())); 101 ASSERT_TRUE(scorer_.get()); 102 103 classifier_.reset(new PhishingClassifier( 104 content::RenderView::FromRoutingID(kRenderViewRoutingId), 105 clock_)); 106 } 107 108 virtual void TearDownOnMainThread() OVERRIDE { 109 content::RunAllPendingInMessageLoop(); 110 } 111 112 // Helper method to start phishing classification and wait for it to 113 // complete. Returns the true if the page is classified as phishy and 114 // false otherwise. 115 bool RunPhishingClassifier(const base::string16* page_text, 116 float* phishy_score, 117 FeatureMap* features) { 118 ClientPhishingRequest verdict; 119 // The classifier accesses the RenderView and must run in the RenderThread. 120 PostTaskToInProcessRendererAndWait( 121 base::Bind(&PhishingClassifierTest::DoRunPhishingClassifier, 122 base::Unretained(this), 123 page_text, phishy_score, features, &verdict)); 124 return verdict.is_phishing(); 125 } 126 127 void DoRunPhishingClassifier(const base::string16* page_text, 128 float* phishy_score, 129 FeatureMap* features, 130 ClientPhishingRequest* verdict) { 131 *phishy_score = PhishingClassifier::kInvalidScore; 132 features->Clear(); 133 134 // Force synchronous behavior for ease of unittesting. 135 base::RunLoop run_loop; 136 classifier_->BeginClassification( 137 page_text, 138 base::Bind(&PhishingClassifierTest::ClassificationFinished, 139 base::Unretained(this), &run_loop, verdict)); 140 content::RunThisRunLoop(&run_loop); 141 142 *phishy_score = verdict->client_score(); 143 for (int i = 0; i < verdict->feature_map_size(); ++i) { 144 features->AddRealFeature(verdict->feature_map(i).name(), 145 verdict->feature_map(i).value()); 146 } 147 } 148 149 // Completion callback for classification. 150 void ClassificationFinished(base::RunLoop* run_loop, 151 ClientPhishingRequest* verdict_out, 152 const ClientPhishingRequest& verdict) { 153 *verdict_out = verdict; // Copy the verdict. 154 run_loop->Quit(); 155 } 156 157 scoped_ptr<net::test_server::EmbeddedTestServer> embedded_test_server_; 158 net::test_server::EmbeddedTestServer* embedded_test_server() { 159 // TODO(ajwong): Merge this into BrowserTestBase. 160 if (!embedded_test_server_) { 161 embedded_test_server_.reset(new net::test_server::EmbeddedTestServer()); 162 embedded_test_server_->RegisterRequestHandler( 163 base::Bind(&PhishingClassifierTest::HandleRequest, 164 base::Unretained(this))); 165 CHECK(embedded_test_server_->InitializeAndWaitUntilReady()); 166 } 167 return embedded_test_server_.get(); 168 } 169 170 void LoadHtml(const std::string& host, const std::string& content) { 171 GURL::Replacements replace_host; 172 replace_host.SetHostStr(host); 173 response_content_ = content; 174 ui_test_utils::NavigateToURL( 175 browser(), 176 embedded_test_server()->base_url().ReplaceComponents(replace_host)); 177 } 178 179 void LoadHtmlPost(const std::string& host, const std::string& content) { 180 GURL::Replacements replace_host; 181 replace_host.SetHostStr(host); 182 response_content_ = content; 183 ui_test_utils::NavigateToURLWithPost( 184 browser(), 185 embedded_test_server()->base_url().ReplaceComponents(replace_host)); 186 } 187 188 scoped_ptr<net::test_server::HttpResponse> 189 HandleRequest(const net::test_server::HttpRequest& request) { 190 scoped_ptr<net::test_server::BasicHttpResponse> http_response( 191 new net::test_server::BasicHttpResponse()); 192 http_response->set_code(net::HTTP_OK); 193 http_response->set_content_type("text/html"); 194 http_response->set_content(response_content_); 195 return http_response.PassAs<net::test_server::HttpResponse>(); 196 } 197 198 std::string response_content_; 199 scoped_ptr<Scorer> scorer_; 200 scoped_ptr<PhishingClassifier> classifier_; 201 MockFeatureExtractorClock* clock_; // Owned by classifier_. 202 203 // Features that are in the model. 204 const std::string url_tld_token_net_; 205 const std::string page_link_domain_phishing_; 206 const std::string page_term_login_; 207 }; 208 209 // This test flakes on Mac with force compositing mode. 210 // http://crbug.com/316709 211 #if defined(OS_MACOSX) 212 #define MAYBE_TestClassification DISABLED_TestClassification 213 #else 214 #define MAYBE_TestClassification TestClassification 215 #endif 216 IN_PROC_BROWSER_TEST_F(PhishingClassifierTest, MAYBE_TestClassification) { 217 host_resolver()->AddRule("*", "127.0.0.1"); 218 219 // No scorer yet, so the classifier is not ready. 220 ASSERT_FALSE(classifier_->is_ready()); 221 222 // Now set the scorer. 223 classifier_->set_phishing_scorer(scorer_.get()); 224 ASSERT_TRUE(classifier_->is_ready()); 225 226 // This test doesn't exercise the extraction timing. 227 EXPECT_CALL(*clock_, Now()) 228 .WillRepeatedly(::testing::Return(base::TimeTicks::Now())); 229 230 base::string16 page_text = base::ASCIIToUTF16("login"); 231 float phishy_score; 232 FeatureMap features; 233 234 LoadHtml("host.net", 235 "<html><body><a href=\"http://phishing.com/\">login</a></body></html>"); 236 EXPECT_TRUE(RunPhishingClassifier(&page_text, &phishy_score, &features)); 237 // Note: features.features() might contain other features that simply aren't 238 // in the model. 239 EXPECT_THAT(features.features(), 240 AllOf(Contains(Pair(url_tld_token_net_, 1.0)), 241 Contains(Pair(page_link_domain_phishing_, 1.0)), 242 Contains(Pair(page_term_login_, 1.0)))); 243 EXPECT_FLOAT_EQ(0.5, phishy_score); 244 245 // Change the link domain to something non-phishy. 246 LoadHtml("host.net", 247 "<html><body><a href=\"http://safe.com/\">login</a></body></html>"); 248 EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score, &features)); 249 EXPECT_THAT(features.features(), 250 AllOf(Contains(Pair(url_tld_token_net_, 1.0)), 251 Contains(Pair(page_term_login_, 1.0)))); 252 EXPECT_THAT(features.features(), 253 Not(Contains(Pair(page_link_domain_phishing_, 1.0)))); 254 EXPECT_GE(phishy_score, 0.0); 255 EXPECT_LT(phishy_score, 0.5); 256 257 // Extraction should fail for this case since there is no TLD. 258 LoadHtml("localhost", "<html><body>content</body></html>"); 259 EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score, &features)); 260 EXPECT_EQ(0U, features.features().size()); 261 EXPECT_EQ(PhishingClassifier::kInvalidScore, phishy_score); 262 263 // Extraction should also fail for this case because the URL is not http. 264 net::SpawnedTestServer https_server( 265 net::SpawnedTestServer::TYPE_HTTPS, 266 net::SpawnedTestServer::kLocalhost, 267 base::FilePath(FILE_PATH_LITERAL("chrome/test/data"))); 268 ASSERT_TRUE(https_server.Start()); 269 std::string host_str("host.net"); // Must outlive replace_host. 270 GURL::Replacements replace_host; 271 replace_host.SetHostStr(host_str); 272 GURL test_url = https_server.GetURL("/files/title1.html"); 273 ui_test_utils::NavigateToURL(browser(), 274 test_url.ReplaceComponents(replace_host)); 275 EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score, &features)); 276 EXPECT_EQ(0U, features.features().size()); 277 EXPECT_EQ(PhishingClassifier::kInvalidScore, phishy_score); 278 279 // Extraction should fail for this case because the URL is a POST request. 280 LoadHtmlPost("host.net", "<html><body>content</body></html>"); 281 EXPECT_FALSE(RunPhishingClassifier(&page_text, &phishy_score, &features)); 282 EXPECT_EQ(0U, features.features().size()); 283 EXPECT_EQ(PhishingClassifier::kInvalidScore, phishy_score); 284 } 285 286 // Test flakes with LSAN enabled. See http://crbug.com/373155. 287 #if defined(LEAK_SANITIZER) 288 #define MAYBE_DisableDetection DISABLED_DisableDetection 289 #else 290 #define MAYBE_DisableDetection DisableDetection 291 #endif 292 IN_PROC_BROWSER_TEST_F(PhishingClassifierTest, MAYBE_DisableDetection) { 293 // No scorer yet, so the classifier is not ready. 294 EXPECT_FALSE(classifier_->is_ready()); 295 296 // Now set the scorer. 297 classifier_->set_phishing_scorer(scorer_.get()); 298 EXPECT_TRUE(classifier_->is_ready()); 299 300 // Set a NULL scorer, which turns detection back off. 301 classifier_->set_phishing_scorer(NULL); 302 EXPECT_FALSE(classifier_->is_ready()); 303 } 304 305 } // namespace safe_browsing 306