1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 // 5 // Note that although this is not a "browser" test, it runs as part of 6 // browser_tests. This is because WebKit does not work properly if it is 7 // shutdown and re-initialized. Since browser_tests runs each test in a 8 // new process, this avoids the problem. 9 10 #include "chrome/renderer/safe_browsing/phishing_dom_feature_extractor.h" 11 12 #include "base/bind.h" 13 #include "base/callback.h" 14 #include "base/compiler_specific.h" 15 #include "base/memory/weak_ptr.h" 16 #include "base/message_loop/message_loop.h" 17 #include "base/time/time.h" 18 #include "chrome/renderer/safe_browsing/features.h" 19 #include "chrome/renderer/safe_browsing/mock_feature_extractor_clock.h" 20 #include "chrome/renderer/safe_browsing/test_utils.h" 21 #include "content/public/test/render_view_fake_resources_test.h" 22 #include "testing/gmock/include/gmock/gmock.h" 23 #include "third_party/WebKit/public/platform/WebString.h" 24 #include "third_party/WebKit/public/web/WebFrame.h" 25 #include "third_party/WebKit/public/web/WebScriptSource.h" 26 27 using ::testing::DoAll; 28 using ::testing::Invoke; 29 using ::testing::Return; 30 31 namespace safe_browsing { 32 33 class PhishingDOMFeatureExtractorTest 34 : public content::RenderViewFakeResourcesTest { 35 public: 36 // Helper for the SubframeRemoval test that posts a message to remove 37 // the iframe "frame1" from the document. 38 void ScheduleRemoveIframe() { 39 message_loop_.PostTask( 40 FROM_HERE, 41 base::Bind(&PhishingDOMFeatureExtractorTest::RemoveIframe, 42 weak_factory_.GetWeakPtr())); 43 } 44 45 protected: 46 PhishingDOMFeatureExtractorTest() 47 : content::RenderViewFakeResourcesTest(), 48 weak_factory_(this) {} 49 50 virtual ~PhishingDOMFeatureExtractorTest() {} 51 52 virtual void SetUp() { 53 // Set up WebKit and the RenderView. 54 content::RenderViewFakeResourcesTest::SetUp(); 55 extractor_.reset(new PhishingDOMFeatureExtractor(view(), &clock_)); 56 } 57 58 virtual void TearDown() { 59 content::RenderViewFakeResourcesTest::TearDown(); 60 } 61 62 // Runs the DOMFeatureExtractor on the RenderView, waiting for the 63 // completion callback. Returns the success boolean from the callback. 64 bool ExtractFeatures(FeatureMap* features) { 65 success_ = false; 66 extractor_->ExtractFeatures( 67 features, 68 base::Bind(&PhishingDOMFeatureExtractorTest::ExtractionDone, 69 base::Unretained(this))); 70 message_loop_.Run(); 71 return success_; 72 } 73 74 // Completion callback for feature extraction. 75 void ExtractionDone(bool success) { 76 success_ = success; 77 message_loop_.Quit(); 78 } 79 80 // Does the actual work of removing the iframe "frame1" from the document. 81 void RemoveIframe() { 82 WebKit::WebFrame* main_frame = GetMainFrame(); 83 ASSERT_TRUE(main_frame); 84 main_frame->executeScript( 85 WebKit::WebString( 86 "document.body.removeChild(document.getElementById('frame1'));")); 87 } 88 89 MockFeatureExtractorClock clock_; 90 scoped_ptr<PhishingDOMFeatureExtractor> extractor_; 91 bool success_; // holds the success value from ExtractFeatures 92 base::WeakPtrFactory<PhishingDOMFeatureExtractorTest> weak_factory_; 93 }; 94 95 TEST_F(PhishingDOMFeatureExtractorTest, FormFeatures) { 96 // This test doesn't exercise the extraction timing. 97 EXPECT_CALL(clock_, Now()).WillRepeatedly(Return(base::TimeTicks::Now())); 98 responses_["http://host.com/"] = 99 "<html><head><body>" 100 "<form action=\"query\"><input type=text><input type=checkbox></form>" 101 "<form action=\"http://cgi.host.com/submit\"></form>" 102 "<form action=\"http://other.com/\"></form>" 103 "<form action=\"query\"></form>" 104 "<form></form></body></html>"; 105 106 FeatureMap expected_features; 107 expected_features.AddBooleanFeature(features::kPageHasForms); 108 expected_features.AddRealFeature(features::kPageActionOtherDomainFreq, 0.25); 109 expected_features.AddBooleanFeature(features::kPageHasTextInputs); 110 expected_features.AddBooleanFeature(features::kPageHasCheckInputs); 111 112 FeatureMap features; 113 LoadURL("http://host.com/"); 114 ASSERT_TRUE(ExtractFeatures(&features)); 115 ExpectFeatureMapsAreEqual(features, expected_features); 116 117 responses_["http://host.com/"] = 118 "<html><head><body>" 119 "<input type=\"radio\"><input type=password></body></html>"; 120 121 expected_features.Clear(); 122 expected_features.AddBooleanFeature(features::kPageHasRadioInputs); 123 expected_features.AddBooleanFeature(features::kPageHasPswdInputs); 124 125 features.Clear(); 126 LoadURL("http://host.com/"); 127 ASSERT_TRUE(ExtractFeatures(&features)); 128 ExpectFeatureMapsAreEqual(features, expected_features); 129 130 responses_["http://host.com/"] = 131 "<html><head><body><input></body></html>"; 132 133 expected_features.Clear(); 134 expected_features.AddBooleanFeature(features::kPageHasTextInputs); 135 136 features.Clear(); 137 LoadURL("http://host.com/"); 138 ASSERT_TRUE(ExtractFeatures(&features)); 139 ExpectFeatureMapsAreEqual(features, expected_features); 140 141 responses_["http://host.com/"] = 142 "<html><head><body><input type=\"invalid\"></body></html>"; 143 144 expected_features.Clear(); 145 expected_features.AddBooleanFeature(features::kPageHasTextInputs); 146 147 features.Clear(); 148 LoadURL("http://host.com/"); 149 ASSERT_TRUE(ExtractFeatures(&features)); 150 ExpectFeatureMapsAreEqual(features, expected_features); 151 } 152 153 TEST_F(PhishingDOMFeatureExtractorTest, LinkFeatures) { 154 // This test doesn't exercise the extraction timing. 155 EXPECT_CALL(clock_, Now()).WillRepeatedly(Return(base::TimeTicks::Now())); 156 responses_["http://www.host.com/"] = 157 "<html><head><body>" 158 "<a href=\"http://www2.host.com/abc\">link</a>" 159 "<a name=page_anchor></a>" 160 "<a href=\"http://www.chromium.org/\">chromium</a>" 161 "</body></html"; 162 163 FeatureMap expected_features; 164 expected_features.AddRealFeature(features::kPageExternalLinksFreq, 0.5); 165 expected_features.AddRealFeature(features::kPageSecureLinksFreq, 0.0); 166 expected_features.AddBooleanFeature(features::kPageLinkDomain + 167 std::string("chromium.org")); 168 169 FeatureMap features; 170 LoadURL("http://www.host.com/"); 171 ASSERT_TRUE(ExtractFeatures(&features)); 172 ExpectFeatureMapsAreEqual(features, expected_features); 173 174 responses_.clear(); 175 responses_["https://www.host.com/"] = 176 "<html><head><body>" 177 "<a href=\"login\">this is secure</a>" 178 "<a href=\"http://host.com\">not secure</a>" 179 "<a href=\"https://www2.host.com/login\">also secure</a>" 180 "<a href=\"http://chromium.org/\">also not secure</a>" 181 "</body></html>"; 182 183 expected_features.Clear(); 184 expected_features.AddRealFeature(features::kPageExternalLinksFreq, 0.25); 185 expected_features.AddRealFeature(features::kPageSecureLinksFreq, 0.5); 186 expected_features.AddBooleanFeature(features::kPageLinkDomain + 187 std::string("chromium.org")); 188 189 features.Clear(); 190 LoadURL("https://www.host.com/"); 191 ASSERT_TRUE(ExtractFeatures(&features)); 192 ExpectFeatureMapsAreEqual(features, expected_features); 193 } 194 195 TEST_F(PhishingDOMFeatureExtractorTest, ScriptAndImageFeatures) { 196 // This test doesn't exercise the extraction timing. 197 EXPECT_CALL(clock_, Now()).WillRepeatedly(Return(base::TimeTicks::Now())); 198 responses_["http://host.com/"] = 199 "<html><head><script></script><script></script></head></html>"; 200 201 FeatureMap expected_features; 202 expected_features.AddBooleanFeature(features::kPageNumScriptTagsGTOne); 203 204 FeatureMap features; 205 LoadURL("http://host.com/"); 206 ASSERT_TRUE(ExtractFeatures(&features)); 207 ExpectFeatureMapsAreEqual(features, expected_features); 208 209 responses_["http://host.com/"] = 210 "<html><head><script></script><script></script><script></script>" 211 "<script></script><script></script><script></script><script></script>" 212 "</head><body><img src=\"blah.gif\">" 213 "<img src=\"http://host2.com/blah.gif\"></body></html>"; 214 215 expected_features.Clear(); 216 expected_features.AddBooleanFeature(features::kPageNumScriptTagsGTOne); 217 expected_features.AddBooleanFeature(features::kPageNumScriptTagsGTSix); 218 expected_features.AddRealFeature(features::kPageImgOtherDomainFreq, 0.5); 219 220 features.Clear(); 221 LoadURL("http://host.com/"); 222 ASSERT_TRUE(ExtractFeatures(&features)); 223 ExpectFeatureMapsAreEqual(features, expected_features); 224 } 225 226 TEST_F(PhishingDOMFeatureExtractorTest, SubFrames) { 227 // This test doesn't exercise the extraction timing. 228 EXPECT_CALL(clock_, Now()).WillRepeatedly(Return(base::TimeTicks::Now())); 229 230 // Test that features are aggregated across all frames. 231 responses_["http://host.com/"] = 232 "<html><body><input type=text><a href=\"info.html\">link</a>" 233 "<iframe src=\"http://host2.com/\"></iframe>" 234 "<iframe src=\"http://host3.com/\"></iframe>" 235 "</body></html>"; 236 237 responses_["http://host2.com/"] = 238 "<html><head><script></script><body>" 239 "<form action=\"http://host4.com/\"><input type=checkbox></form>" 240 "<form action=\"http://host2.com/submit\"></form>" 241 "<a href=\"http://www.host2.com/home\">link</a>" 242 "<iframe src=\"nested.html\"></iframe>" 243 "<body></html>"; 244 245 responses_["http://host2.com/nested.html"] = 246 "<html><body><input type=password>" 247 "<a href=\"https://host4.com/\">link</a>" 248 "<a href=\"relative\">another</a>" 249 "</body></html>"; 250 251 responses_["http://host3.com/"] = 252 "<html><head><script></script><body>" 253 "<img src=\"http://host.com/123.png\">" 254 "</body></html>"; 255 256 FeatureMap expected_features; 257 expected_features.AddBooleanFeature(features::kPageHasForms); 258 // Form action domains are compared to the URL of the document they're in, 259 // not the URL of the toplevel page. So http://host2.com/ has two form 260 // actions, one of which is external. 261 expected_features.AddRealFeature(features::kPageActionOtherDomainFreq, 0.5); 262 expected_features.AddBooleanFeature(features::kPageHasTextInputs); 263 expected_features.AddBooleanFeature(features::kPageHasPswdInputs); 264 expected_features.AddBooleanFeature(features::kPageHasCheckInputs); 265 expected_features.AddRealFeature(features::kPageExternalLinksFreq, 0.25); 266 expected_features.AddBooleanFeature(features::kPageLinkDomain + 267 std::string("host4.com")); 268 expected_features.AddRealFeature(features::kPageSecureLinksFreq, 0.25); 269 expected_features.AddBooleanFeature(features::kPageNumScriptTagsGTOne); 270 expected_features.AddRealFeature(features::kPageImgOtherDomainFreq, 1.0); 271 272 FeatureMap features; 273 LoadURL("http://host.com/"); 274 ASSERT_TRUE(ExtractFeatures(&features)); 275 ExpectFeatureMapsAreEqual(features, expected_features); 276 } 277 278 TEST_F(PhishingDOMFeatureExtractorTest, Continuation) { 279 // For this test, we'll cause the feature extraction to run multiple 280 // iterations by incrementing the clock. 281 282 // This page has a total of 50 elements. For the external forms feature to 283 // be computed correctly, the extractor has to examine the whole document. 284 // Note: the empty HEAD is important -- WebKit will synthesize a HEAD if 285 // there isn't one present, which can be confusing for the element counts. 286 std::string response = "<html><head></head><body>" 287 "<form action=\"ondomain\"></form>"; 288 for (int i = 0; i < 45; ++i) { 289 response.append("<p>"); 290 } 291 response.append("<form action=\"http://host2.com/\"></form></body></html>"); 292 responses_["http://host.com/"] = response; 293 294 // Advance the clock 6 ms every 10 elements processed, 10 ms between chunks. 295 // Note that this assumes kClockCheckGranularity = 10 and 296 // kMaxTimePerChunkMs = 10. 297 base::TimeTicks now = base::TimeTicks::Now(); 298 EXPECT_CALL(clock_, Now()) 299 // Time check at the start of extraction. 300 .WillOnce(Return(now)) 301 // Time check at the start of the first chunk of work. 302 .WillOnce(Return(now)) 303 // Time check after the first 10 elements. 304 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(6))) 305 // Time check after the next 10 elements. This is over the chunk 306 // time limit, so a continuation task will be posted. 307 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(12))) 308 // Time check at the start of the second chunk of work. 309 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(22))) 310 // Time check after resuming iteration for the second chunk. 311 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(24))) 312 // Time check after the next 10 elements. 313 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(30))) 314 // Time check after the next 10 elements. This will trigger another 315 // continuation task. 316 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(36))) 317 // Time check at the start of the third chunk of work. 318 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(46))) 319 // Time check after resuming iteration for the third chunk. 320 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(48))) 321 // Time check after the last 10 elements. 322 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(54))) 323 // A final time check for the histograms. 324 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(56))); 325 326 FeatureMap expected_features; 327 expected_features.AddBooleanFeature(features::kPageHasForms); 328 expected_features.AddRealFeature(features::kPageActionOtherDomainFreq, 0.5); 329 330 FeatureMap features; 331 LoadURL("http://host.com/"); 332 ASSERT_TRUE(ExtractFeatures(&features)); 333 ExpectFeatureMapsAreEqual(features, expected_features); 334 // Make sure none of the mock expectations carry over to the next test. 335 ::testing::Mock::VerifyAndClearExpectations(&clock_); 336 337 // Now repeat the test with the same page, but advance the clock faster so 338 // that the extraction time exceeds the maximum total time for the feature 339 // extractor. Extraction should fail. Note that this assumes 340 // kMaxTotalTimeMs = 500. 341 EXPECT_CALL(clock_, Now()) 342 // Time check at the start of extraction. 343 .WillOnce(Return(now)) 344 // Time check at the start of the first chunk of work. 345 .WillOnce(Return(now)) 346 // Time check after the first 10 elements. 347 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(300))) 348 // Time check at the start of the second chunk of work. 349 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(350))) 350 // Time check after resuming iteration for the second chunk. 351 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(360))) 352 // Time check after the next 10 elements. This is over the limit. 353 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(600))) 354 // A final time check for the histograms. 355 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(620))); 356 357 features.Clear(); 358 EXPECT_FALSE(ExtractFeatures(&features)); 359 } 360 361 TEST_F(PhishingDOMFeatureExtractorTest, SubframeRemoval) { 362 // In this test, we'll advance the feature extractor so that it is positioned 363 // inside an iframe, and have it pause due to exceeding the chunk time limit. 364 // Then, prior to continuation, the iframe is removed from the document. 365 // As currently implemented, this should finish extraction from the removed 366 // iframe document. 367 responses_["http://host.com/"] = 368 "<html><head></head><body>" 369 "<iframe src=\"frame.html\" id=\"frame1\"></iframe>" 370 "<form></form></body></html>"; 371 responses_["http://host.com/frame.html"] = 372 "<html><body><p><p><p><input type=password></body></html>"; 373 374 base::TimeTicks now = base::TimeTicks::Now(); 375 EXPECT_CALL(clock_, Now()) 376 // Time check at the start of extraction. 377 .WillOnce(Return(now)) 378 // Time check at the start of the first chunk of work. 379 .WillOnce(Return(now)) 380 // Time check after the first 10 elements. Enough time has passed 381 // to stop extraction. Schedule the iframe removal to happen as soon as 382 // the feature extractor returns control to the message loop. 383 .WillOnce(DoAll( 384 Invoke(this, &PhishingDOMFeatureExtractorTest::ScheduleRemoveIframe), 385 Return(now + base::TimeDelta::FromMilliseconds(21)))) 386 // Time check at the start of the second chunk of work. 387 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(25))) 388 // Time check after resuming iteration for the second chunk. 389 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(27))) 390 // A final time check for the histograms. 391 .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(33))); 392 393 FeatureMap expected_features; 394 expected_features.AddBooleanFeature(features::kPageHasForms); 395 expected_features.AddBooleanFeature(features::kPageHasPswdInputs); 396 397 FeatureMap features; 398 LoadURL("http://host.com/"); 399 ASSERT_TRUE(ExtractFeatures(&features)); 400 ExpectFeatureMapsAreEqual(features, expected_features); 401 } 402 403 } // namespace safe_browsing 404