Home | History | Annotate | Download | only in safe_browsing
      1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "chrome/renderer/safe_browsing/scorer.h"
      6 
      7 #include "base/containers/hash_tables.h"
      8 #include "base/files/file_path.h"
      9 #include "base/files/scoped_temp_dir.h"
     10 #include "base/format_macros.h"
     11 #include "base/memory/scoped_ptr.h"
     12 #include "base/message_loop/message_loop.h"
     13 #include "base/threading/thread.h"
     14 #include "chrome/common/safe_browsing/client_model.pb.h"
     15 #include "chrome/renderer/safe_browsing/features.h"
     16 #include "testing/gmock/include/gmock/gmock.h"
     17 #include "testing/gtest/include/gtest/gtest.h"
     18 
     19 namespace safe_browsing {
     20 
     21 class PhishingScorerTest : public ::testing::Test {
     22  protected:
     23   virtual void SetUp() {
     24     // Setup a simple model.  Note that the scorer does not care about
     25     // how features are encoded so we use readable strings here to make
     26     // the test simpler to follow.
     27     model_.Clear();
     28     model_.add_hashes("feature1");
     29     model_.add_hashes("feature2");
     30     model_.add_hashes("feature3");
     31     model_.add_hashes("token one");
     32     model_.add_hashes("token two");
     33 
     34     ClientSideModel::Rule* rule;
     35     rule = model_.add_rule();
     36     rule->set_weight(0.5);
     37 
     38     rule = model_.add_rule();
     39     rule->add_feature(0);  // feature1
     40     rule->set_weight(2.0);
     41 
     42     rule = model_.add_rule();
     43     rule->add_feature(0);  // feature1
     44     rule->add_feature(1);  // feature2
     45     rule->set_weight(3.0);
     46 
     47     model_.add_page_term(3);  // token one
     48     model_.add_page_term(4);  // token two
     49 
     50     // These will be murmur3 hashes, but for this test it's not necessary
     51     // that the hashes correspond to actual words.
     52     model_.add_page_word(1000U);
     53     model_.add_page_word(2000U);
     54     model_.add_page_word(3000U);
     55 
     56     model_.set_max_words_per_term(2);
     57     model_.set_murmur_hash_seed(12345U);
     58   }
     59 
     60   ClientSideModel model_;
     61 };
     62 
     63 TEST_F(PhishingScorerTest, HasValidModel) {
     64   scoped_ptr<Scorer> scorer;
     65   scorer.reset(Scorer::Create(model_.SerializeAsString()));
     66   EXPECT_TRUE(scorer.get() != NULL);
     67 
     68   // Invalid model string.
     69   scorer.reset(Scorer::Create("bogus string"));
     70   EXPECT_FALSE(scorer.get());
     71 
     72   // Mode is missing a required field.
     73   model_.clear_max_words_per_term();
     74   scorer.reset(Scorer::Create(model_.SerializePartialAsString()));
     75   EXPECT_FALSE(scorer.get());
     76 }
     77 
     78 TEST_F(PhishingScorerTest, PageTerms) {
     79   scoped_ptr<Scorer> scorer(Scorer::Create(model_.SerializeAsString()));
     80   ASSERT_TRUE(scorer.get());
     81   base::hash_set<std::string> expected_page_terms;
     82   expected_page_terms.insert("token one");
     83   expected_page_terms.insert("token two");
     84   EXPECT_THAT(scorer->page_terms(),
     85               ::testing::ContainerEq(expected_page_terms));
     86 }
     87 
     88 TEST_F(PhishingScorerTest, PageWords) {
     89   scoped_ptr<Scorer> scorer(Scorer::Create(model_.SerializeAsString()));
     90   ASSERT_TRUE(scorer.get());
     91   base::hash_set<uint32> expected_page_words;
     92   expected_page_words.insert(1000U);
     93   expected_page_words.insert(2000U);
     94   expected_page_words.insert(3000U);
     95   EXPECT_THAT(scorer->page_words(),
     96               ::testing::ContainerEq(expected_page_words));
     97   EXPECT_EQ(2U, scorer->max_words_per_term());
     98   EXPECT_EQ(12345U, scorer->murmurhash3_seed());
     99 }
    100 
    101 TEST_F(PhishingScorerTest, ComputeScore) {
    102   scoped_ptr<Scorer> scorer(Scorer::Create(model_.SerializeAsString()));
    103   ASSERT_TRUE(scorer.get());
    104 
    105   // An empty feature map should match the empty rule.
    106   FeatureMap features;
    107   // The expected logodds is 0.5 (empty rule) => p = exp(0.5) / (exp(0.5) + 1)
    108   // => 0.62245933120185459
    109   EXPECT_DOUBLE_EQ(0.62245933120185459, scorer->ComputeScore(features));
    110   // Same if the feature does not match any rule.
    111   EXPECT_TRUE(features.AddBooleanFeature("not existing feature"));
    112   EXPECT_DOUBLE_EQ(0.62245933120185459, scorer->ComputeScore(features));
    113 
    114   // Feature 1 matches which means that the logodds will be:
    115   //   0.5 (empty rule) + 2.0 (rule weight) * 0.15 (feature weight) = 0.8
    116   //   => p = 0.6899744811276125
    117   EXPECT_TRUE(features.AddRealFeature("feature1", 0.15));
    118   EXPECT_DOUBLE_EQ(0.6899744811276125, scorer->ComputeScore(features));
    119 
    120   // Now, both feature 1 and feature 2 match.  Expected logodds:
    121   //   0.5 (empty rule) + 2.0 (rule weight) * 0.15 (feature weight) +
    122   //   3.0 (rule weight) * 0.15 (feature1 weight) * 1.0 (feature2) weight = 9.8
    123   //   => p = 0.99999627336071584
    124   EXPECT_TRUE(features.AddBooleanFeature("feature2"));
    125   EXPECT_DOUBLE_EQ(0.77729986117469119, scorer->ComputeScore(features));
    126 }
    127 }  // namespace safe_browsing
    128