Home | History | Annotate | Download | only in safe_browsing
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 //
      5 // Note that although this is not a "browser" test, it runs as part of
      6 // browser_tests.  This is because WebKit does not work properly if it is
      7 // shutdown and re-initialized.  Since browser_tests runs each test in a
      8 // new process, this avoids the problem.
      9 
     10 #include "chrome/renderer/safe_browsing/phishing_dom_feature_extractor.h"
     11 
     12 #include "base/bind.h"
     13 #include "base/callback.h"
     14 #include "base/compiler_specific.h"
     15 #include "base/memory/weak_ptr.h"
     16 #include "base/message_loop/message_loop.h"
     17 #include "base/time/time.h"
     18 #include "chrome/renderer/safe_browsing/features.h"
     19 #include "chrome/renderer/safe_browsing/mock_feature_extractor_clock.h"
     20 #include "chrome/renderer/safe_browsing/test_utils.h"
     21 #include "content/public/test/render_view_fake_resources_test.h"
     22 #include "testing/gmock/include/gmock/gmock.h"
     23 #include "third_party/WebKit/public/platform/WebString.h"
     24 #include "third_party/WebKit/public/web/WebFrame.h"
     25 #include "third_party/WebKit/public/web/WebScriptSource.h"
     26 
     27 using ::testing::DoAll;
     28 using ::testing::Invoke;
     29 using ::testing::Return;
     30 
     31 namespace safe_browsing {
     32 
     33 class PhishingDOMFeatureExtractorTest
     34     : public content::RenderViewFakeResourcesTest {
     35  public:
     36   // Helper for the SubframeRemoval test that posts a message to remove
     37   // the iframe "frame1" from the document.
     38   void ScheduleRemoveIframe() {
     39     message_loop_.PostTask(
     40         FROM_HERE,
     41         base::Bind(&PhishingDOMFeatureExtractorTest::RemoveIframe,
     42                    weak_factory_.GetWeakPtr()));
     43   }
     44 
     45  protected:
     46   PhishingDOMFeatureExtractorTest()
     47       : content::RenderViewFakeResourcesTest(),
     48         weak_factory_(this) {}
     49 
     50   virtual ~PhishingDOMFeatureExtractorTest() {}
     51 
     52   virtual void SetUp() {
     53     // Set up WebKit and the RenderView.
     54     content::RenderViewFakeResourcesTest::SetUp();
     55     extractor_.reset(new PhishingDOMFeatureExtractor(view(), &clock_));
     56   }
     57 
     58   virtual void TearDown() {
     59     content::RenderViewFakeResourcesTest::TearDown();
     60   }
     61 
     62   // Runs the DOMFeatureExtractor on the RenderView, waiting for the
     63   // completion callback.  Returns the success boolean from the callback.
     64   bool ExtractFeatures(FeatureMap* features) {
     65     success_ = false;
     66     extractor_->ExtractFeatures(
     67         features,
     68         base::Bind(&PhishingDOMFeatureExtractorTest::ExtractionDone,
     69                    base::Unretained(this)));
     70     message_loop_.Run();
     71     return success_;
     72   }
     73 
     74   // Completion callback for feature extraction.
     75   void ExtractionDone(bool success) {
     76     success_ = success;
     77     message_loop_.Quit();
     78   }
     79 
     80   // Does the actual work of removing the iframe "frame1" from the document.
     81   void RemoveIframe() {
     82     WebKit::WebFrame* main_frame = GetMainFrame();
     83     ASSERT_TRUE(main_frame);
     84     main_frame->executeScript(
     85         WebKit::WebString(
     86             "document.body.removeChild(document.getElementById('frame1'));"));
     87   }
     88 
     89   MockFeatureExtractorClock clock_;
     90   scoped_ptr<PhishingDOMFeatureExtractor> extractor_;
     91   bool success_;  // holds the success value from ExtractFeatures
     92   base::WeakPtrFactory<PhishingDOMFeatureExtractorTest> weak_factory_;
     93 };
     94 
     95 TEST_F(PhishingDOMFeatureExtractorTest, FormFeatures) {
     96   // This test doesn't exercise the extraction timing.
     97   EXPECT_CALL(clock_, Now()).WillRepeatedly(Return(base::TimeTicks::Now()));
     98   responses_["http://host.com/"] =
     99       "<html><head><body>"
    100       "<form action=\"query\"><input type=text><input type=checkbox></form>"
    101       "<form action=\"http://cgi.host.com/submit\"></form>"
    102       "<form action=\"http://other.com/\"></form>"
    103       "<form action=\"query\"></form>"
    104       "<form></form></body></html>";
    105 
    106   FeatureMap expected_features;
    107   expected_features.AddBooleanFeature(features::kPageHasForms);
    108   expected_features.AddRealFeature(features::kPageActionOtherDomainFreq, 0.25);
    109   expected_features.AddBooleanFeature(features::kPageHasTextInputs);
    110   expected_features.AddBooleanFeature(features::kPageHasCheckInputs);
    111 
    112   FeatureMap features;
    113   LoadURL("http://host.com/");
    114   ASSERT_TRUE(ExtractFeatures(&features));
    115   ExpectFeatureMapsAreEqual(features, expected_features);
    116 
    117   responses_["http://host.com/"] =
    118       "<html><head><body>"
    119       "<input type=\"radio\"><input type=password></body></html>";
    120 
    121   expected_features.Clear();
    122   expected_features.AddBooleanFeature(features::kPageHasRadioInputs);
    123   expected_features.AddBooleanFeature(features::kPageHasPswdInputs);
    124 
    125   features.Clear();
    126   LoadURL("http://host.com/");
    127   ASSERT_TRUE(ExtractFeatures(&features));
    128   ExpectFeatureMapsAreEqual(features, expected_features);
    129 
    130   responses_["http://host.com/"] =
    131       "<html><head><body><input></body></html>";
    132 
    133   expected_features.Clear();
    134   expected_features.AddBooleanFeature(features::kPageHasTextInputs);
    135 
    136   features.Clear();
    137   LoadURL("http://host.com/");
    138   ASSERT_TRUE(ExtractFeatures(&features));
    139   ExpectFeatureMapsAreEqual(features, expected_features);
    140 
    141   responses_["http://host.com/"] =
    142       "<html><head><body><input type=\"invalid\"></body></html>";
    143 
    144   expected_features.Clear();
    145   expected_features.AddBooleanFeature(features::kPageHasTextInputs);
    146 
    147   features.Clear();
    148   LoadURL("http://host.com/");
    149   ASSERT_TRUE(ExtractFeatures(&features));
    150   ExpectFeatureMapsAreEqual(features, expected_features);
    151 }
    152 
    153 TEST_F(PhishingDOMFeatureExtractorTest, LinkFeatures) {
    154   // This test doesn't exercise the extraction timing.
    155   EXPECT_CALL(clock_, Now()).WillRepeatedly(Return(base::TimeTicks::Now()));
    156   responses_["http://www.host.com/"] =
    157       "<html><head><body>"
    158       "<a href=\"http://www2.host.com/abc\">link</a>"
    159       "<a name=page_anchor></a>"
    160       "<a href=\"http://www.chromium.org/\">chromium</a>"
    161       "</body></html";
    162 
    163   FeatureMap expected_features;
    164   expected_features.AddRealFeature(features::kPageExternalLinksFreq, 0.5);
    165   expected_features.AddRealFeature(features::kPageSecureLinksFreq, 0.0);
    166   expected_features.AddBooleanFeature(features::kPageLinkDomain +
    167                                       std::string("chromium.org"));
    168 
    169   FeatureMap features;
    170   LoadURL("http://www.host.com/");
    171   ASSERT_TRUE(ExtractFeatures(&features));
    172   ExpectFeatureMapsAreEqual(features, expected_features);
    173 
    174   responses_.clear();
    175   responses_["https://www.host.com/"] =
    176       "<html><head><body>"
    177       "<a href=\"login\">this is secure</a>"
    178       "<a href=\"http://host.com\">not secure</a>"
    179       "<a href=\"https://www2.host.com/login\">also secure</a>"
    180       "<a href=\"http://chromium.org/\">also not secure</a>"
    181       "</body></html>";
    182 
    183   expected_features.Clear();
    184   expected_features.AddRealFeature(features::kPageExternalLinksFreq, 0.25);
    185   expected_features.AddRealFeature(features::kPageSecureLinksFreq, 0.5);
    186   expected_features.AddBooleanFeature(features::kPageLinkDomain +
    187                                       std::string("chromium.org"));
    188 
    189   features.Clear();
    190   LoadURL("https://www.host.com/");
    191   ASSERT_TRUE(ExtractFeatures(&features));
    192   ExpectFeatureMapsAreEqual(features, expected_features);
    193 }
    194 
    195 TEST_F(PhishingDOMFeatureExtractorTest, ScriptAndImageFeatures) {
    196   // This test doesn't exercise the extraction timing.
    197   EXPECT_CALL(clock_, Now()).WillRepeatedly(Return(base::TimeTicks::Now()));
    198   responses_["http://host.com/"] =
    199       "<html><head><script></script><script></script></head></html>";
    200 
    201   FeatureMap expected_features;
    202   expected_features.AddBooleanFeature(features::kPageNumScriptTagsGTOne);
    203 
    204   FeatureMap features;
    205   LoadURL("http://host.com/");
    206   ASSERT_TRUE(ExtractFeatures(&features));
    207   ExpectFeatureMapsAreEqual(features, expected_features);
    208 
    209   responses_["http://host.com/"] =
    210       "<html><head><script></script><script></script><script></script>"
    211       "<script></script><script></script><script></script><script></script>"
    212       "</head><body><img src=\"blah.gif\">"
    213       "<img src=\"http://host2.com/blah.gif\"></body></html>";
    214 
    215   expected_features.Clear();
    216   expected_features.AddBooleanFeature(features::kPageNumScriptTagsGTOne);
    217   expected_features.AddBooleanFeature(features::kPageNumScriptTagsGTSix);
    218   expected_features.AddRealFeature(features::kPageImgOtherDomainFreq, 0.5);
    219 
    220   features.Clear();
    221   LoadURL("http://host.com/");
    222   ASSERT_TRUE(ExtractFeatures(&features));
    223   ExpectFeatureMapsAreEqual(features, expected_features);
    224 }
    225 
    226 TEST_F(PhishingDOMFeatureExtractorTest, SubFrames) {
    227   // This test doesn't exercise the extraction timing.
    228   EXPECT_CALL(clock_, Now()).WillRepeatedly(Return(base::TimeTicks::Now()));
    229 
    230   // Test that features are aggregated across all frames.
    231   responses_["http://host.com/"] =
    232       "<html><body><input type=text><a href=\"info.html\">link</a>"
    233       "<iframe src=\"http://host2.com/\"></iframe>"
    234       "<iframe src=\"http://host3.com/\"></iframe>"
    235       "</body></html>";
    236 
    237   responses_["http://host2.com/"] =
    238       "<html><head><script></script><body>"
    239       "<form action=\"http://host4.com/\"><input type=checkbox></form>"
    240       "<form action=\"http://host2.com/submit\"></form>"
    241       "<a href=\"http://www.host2.com/home\">link</a>"
    242       "<iframe src=\"nested.html\"></iframe>"
    243       "<body></html>";
    244 
    245   responses_["http://host2.com/nested.html"] =
    246       "<html><body><input type=password>"
    247       "<a href=\"https://host4.com/\">link</a>"
    248       "<a href=\"relative\">another</a>"
    249       "</body></html>";
    250 
    251   responses_["http://host3.com/"] =
    252       "<html><head><script></script><body>"
    253       "<img src=\"http://host.com/123.png\">"
    254       "</body></html>";
    255 
    256   FeatureMap expected_features;
    257   expected_features.AddBooleanFeature(features::kPageHasForms);
    258   // Form action domains are compared to the URL of the document they're in,
    259   // not the URL of the toplevel page.  So http://host2.com/ has two form
    260   // actions, one of which is external.
    261   expected_features.AddRealFeature(features::kPageActionOtherDomainFreq, 0.5);
    262   expected_features.AddBooleanFeature(features::kPageHasTextInputs);
    263   expected_features.AddBooleanFeature(features::kPageHasPswdInputs);
    264   expected_features.AddBooleanFeature(features::kPageHasCheckInputs);
    265   expected_features.AddRealFeature(features::kPageExternalLinksFreq, 0.25);
    266   expected_features.AddBooleanFeature(features::kPageLinkDomain +
    267                                       std::string("host4.com"));
    268   expected_features.AddRealFeature(features::kPageSecureLinksFreq, 0.25);
    269   expected_features.AddBooleanFeature(features::kPageNumScriptTagsGTOne);
    270   expected_features.AddRealFeature(features::kPageImgOtherDomainFreq, 1.0);
    271 
    272   FeatureMap features;
    273   LoadURL("http://host.com/");
    274   ASSERT_TRUE(ExtractFeatures(&features));
    275   ExpectFeatureMapsAreEqual(features, expected_features);
    276 }
    277 
    278 TEST_F(PhishingDOMFeatureExtractorTest, Continuation) {
    279   // For this test, we'll cause the feature extraction to run multiple
    280   // iterations by incrementing the clock.
    281 
    282   // This page has a total of 50 elements.  For the external forms feature to
    283   // be computed correctly, the extractor has to examine the whole document.
    284   // Note: the empty HEAD is important -- WebKit will synthesize a HEAD if
    285   // there isn't one present, which can be confusing for the element counts.
    286   std::string response = "<html><head></head><body>"
    287       "<form action=\"ondomain\"></form>";
    288   for (int i = 0; i < 45; ++i) {
    289     response.append("<p>");
    290   }
    291   response.append("<form action=\"http://host2.com/\"></form></body></html>");
    292   responses_["http://host.com/"] = response;
    293 
    294   // Advance the clock 6 ms every 10 elements processed, 10 ms between chunks.
    295   // Note that this assumes kClockCheckGranularity = 10 and
    296   // kMaxTimePerChunkMs = 10.
    297   base::TimeTicks now = base::TimeTicks::Now();
    298   EXPECT_CALL(clock_, Now())
    299       // Time check at the start of extraction.
    300       .WillOnce(Return(now))
    301       // Time check at the start of the first chunk of work.
    302       .WillOnce(Return(now))
    303       // Time check after the first 10 elements.
    304       .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(6)))
    305       // Time check after the next 10 elements.  This is over the chunk
    306       // time limit, so a continuation task will be posted.
    307       .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(12)))
    308       // Time check at the start of the second chunk of work.
    309       .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(22)))
    310       // Time check after resuming iteration for the second chunk.
    311       .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(24)))
    312       // Time check after the next 10 elements.
    313       .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(30)))
    314       // Time check after the next 10 elements.  This will trigger another
    315       // continuation task.
    316       .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(36)))
    317       // Time check at the start of the third chunk of work.
    318       .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(46)))
    319       // Time check after resuming iteration for the third chunk.
    320       .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(48)))
    321       // Time check after the last 10 elements.
    322       .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(54)))
    323       // A final time check for the histograms.
    324       .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(56)));
    325 
    326   FeatureMap expected_features;
    327   expected_features.AddBooleanFeature(features::kPageHasForms);
    328   expected_features.AddRealFeature(features::kPageActionOtherDomainFreq, 0.5);
    329 
    330   FeatureMap features;
    331   LoadURL("http://host.com/");
    332   ASSERT_TRUE(ExtractFeatures(&features));
    333   ExpectFeatureMapsAreEqual(features, expected_features);
    334   // Make sure none of the mock expectations carry over to the next test.
    335   ::testing::Mock::VerifyAndClearExpectations(&clock_);
    336 
    337   // Now repeat the test with the same page, but advance the clock faster so
    338   // that the extraction time exceeds the maximum total time for the feature
    339   // extractor.  Extraction should fail.  Note that this assumes
    340   // kMaxTotalTimeMs = 500.
    341   EXPECT_CALL(clock_, Now())
    342       // Time check at the start of extraction.
    343       .WillOnce(Return(now))
    344       // Time check at the start of the first chunk of work.
    345       .WillOnce(Return(now))
    346       // Time check after the first 10 elements.
    347       .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(300)))
    348       // Time check at the start of the second chunk of work.
    349       .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(350)))
    350       // Time check after resuming iteration for the second chunk.
    351       .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(360)))
    352       // Time check after the next 10 elements.  This is over the limit.
    353       .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(600)))
    354       // A final time check for the histograms.
    355       .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(620)));
    356 
    357   features.Clear();
    358   EXPECT_FALSE(ExtractFeatures(&features));
    359 }
    360 
    361 TEST_F(PhishingDOMFeatureExtractorTest, SubframeRemoval) {
    362   // In this test, we'll advance the feature extractor so that it is positioned
    363   // inside an iframe, and have it pause due to exceeding the chunk time limit.
    364   // Then, prior to continuation, the iframe is removed from the document.
    365   // As currently implemented, this should finish extraction from the removed
    366   // iframe document.
    367   responses_["http://host.com/"] =
    368       "<html><head></head><body>"
    369       "<iframe src=\"frame.html\" id=\"frame1\"></iframe>"
    370       "<form></form></body></html>";
    371   responses_["http://host.com/frame.html"] =
    372       "<html><body><p><p><p><input type=password></body></html>";
    373 
    374   base::TimeTicks now = base::TimeTicks::Now();
    375   EXPECT_CALL(clock_, Now())
    376       // Time check at the start of extraction.
    377       .WillOnce(Return(now))
    378       // Time check at the start of the first chunk of work.
    379       .WillOnce(Return(now))
    380       // Time check after the first 10 elements.  Enough time has passed
    381       // to stop extraction.  Schedule the iframe removal to happen as soon as
    382       // the feature extractor returns control to the message loop.
    383       .WillOnce(DoAll(
    384           Invoke(this, &PhishingDOMFeatureExtractorTest::ScheduleRemoveIframe),
    385           Return(now + base::TimeDelta::FromMilliseconds(21))))
    386       // Time check at the start of the second chunk of work.
    387       .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(25)))
    388       // Time check after resuming iteration for the second chunk.
    389       .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(27)))
    390       // A final time check for the histograms.
    391       .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(33)));
    392 
    393   FeatureMap expected_features;
    394   expected_features.AddBooleanFeature(features::kPageHasForms);
    395   expected_features.AddBooleanFeature(features::kPageHasPswdInputs);
    396 
    397   FeatureMap features;
    398   LoadURL("http://host.com/");
    399   ASSERT_TRUE(ExtractFeatures(&features));
    400   ExpectFeatureMapsAreEqual(features, expected_features);
    401 }
    402 
    403 }  // namespace safe_browsing
    404