Home | History | Annotate | Download | only in safe_browsing
      1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "chrome/renderer/safe_browsing/scorer.h"
      6 
      7 #include "base/containers/hash_tables.h"
      8 #include "base/files/file_path.h"
      9 #include "base/files/scoped_temp_dir.h"
     10 #include "base/format_macros.h"
     11 #include "base/memory/scoped_ptr.h"
     12 #include "base/message_loop/message_loop.h"
     13 #include "base/threading/thread.h"
     14 #include "chrome/common/safe_browsing/client_model.pb.h"
     15 #include "chrome/renderer/safe_browsing/features.h"
     16 #include "testing/gmock/include/gmock/gmock.h"
     17 #include "testing/gtest/include/gtest/gtest.h"
     18 
     19 namespace safe_browsing {
     20 
     21 class PhishingScorerTest : public ::testing::Test {
     22  protected:
     23   virtual void SetUp() {
     24     // Setup a simple model.  Note that the scorer does not care about
     25     // how features are encoded so we use readable strings here to make
     26     // the test simpler to follow.
     27     model_.Clear();
     28     model_.add_hashes("feature1");
     29     model_.add_hashes("feature2");
     30     model_.add_hashes("feature3");
     31     model_.add_hashes("token one");
     32     model_.add_hashes("token two");
     33 
     34     ClientSideModel::Rule* rule;
     35     rule = model_.add_rule();
     36     rule->set_weight(0.5);
     37 
     38     rule = model_.add_rule();
     39     rule->add_feature(0);  // feature1
     40     rule->set_weight(2.0);
     41 
     42     rule = model_.add_rule();
     43     rule->add_feature(0);  // feature1
     44     rule->add_feature(1);  // feature2
     45     rule->set_weight(3.0);
     46 
     47     model_.add_page_term(3);  // token one
     48     model_.add_page_term(4);  // token two
     49 
     50     // These will be murmur3 hashes, but for this test it's not necessary
     51     // that the hashes correspond to actual words.
     52     model_.add_page_word(1000U);
     53     model_.add_page_word(2000U);
     54     model_.add_page_word(3000U);
     55 
     56     model_.set_max_words_per_term(2);
     57     model_.set_murmur_hash_seed(12345U);
     58     model_.set_max_shingles_per_page(10);
     59     model_.set_shingle_size(3);
     60   }
     61 
     62   ClientSideModel model_;
     63 };
     64 
     65 TEST_F(PhishingScorerTest, HasValidModel) {
     66   scoped_ptr<Scorer> scorer;
     67   scorer.reset(Scorer::Create(model_.SerializeAsString()));
     68   EXPECT_TRUE(scorer.get() != NULL);
     69 
     70   // Invalid model string.
     71   scorer.reset(Scorer::Create("bogus string"));
     72   EXPECT_FALSE(scorer.get());
     73 
     74   // Mode is missing a required field.
     75   model_.clear_max_words_per_term();
     76   scorer.reset(Scorer::Create(model_.SerializePartialAsString()));
     77   EXPECT_FALSE(scorer.get());
     78 }
     79 
     80 TEST_F(PhishingScorerTest, PageTerms) {
     81   scoped_ptr<Scorer> scorer(Scorer::Create(model_.SerializeAsString()));
     82   ASSERT_TRUE(scorer.get());
     83 
     84   // Use std::vector instead of base::hash_set for comparison.
     85   // On Android, EXPECT_THAT(..., ContainerEq(...)) doesn't support
     86   // std::hash_set, but std::vector works fine.
     87   std::vector<std::string> expected_page_terms;
     88   expected_page_terms.push_back("token one");
     89   expected_page_terms.push_back("token two");
     90   std::sort(expected_page_terms.begin(), expected_page_terms.end());
     91 
     92   base::hash_set<std::string> page_terms = scorer->page_terms();
     93   std::vector<std::string> page_terms_v(page_terms.begin(), page_terms.end());
     94   std::sort(page_terms_v.begin(), page_terms_v.end());
     95 
     96   EXPECT_THAT(page_terms_v, ::testing::ContainerEq(expected_page_terms));
     97 }
     98 
     99 TEST_F(PhishingScorerTest, PageWords) {
    100   scoped_ptr<Scorer> scorer(Scorer::Create(model_.SerializeAsString()));
    101   ASSERT_TRUE(scorer.get());
    102   std::vector<uint32> expected_page_words;
    103   expected_page_words.push_back(1000U);
    104   expected_page_words.push_back(2000U);
    105   expected_page_words.push_back(3000U);
    106   std::sort(expected_page_words.begin(), expected_page_words.end());
    107 
    108   base::hash_set<uint32> page_words = scorer->page_words();
    109   std::vector<uint32> page_words_v(page_words.begin(), page_words.end());
    110   std::sort(page_words_v.begin(), page_words_v.end());
    111 
    112   EXPECT_THAT(page_words_v, ::testing::ContainerEq(expected_page_words));
    113 
    114   EXPECT_EQ(2U, scorer->max_words_per_term());
    115   EXPECT_EQ(12345U, scorer->murmurhash3_seed());
    116   EXPECT_EQ(10U, scorer->max_shingles_per_page());
    117   EXPECT_EQ(3U, scorer->shingle_size());
    118 }
    119 
    120 TEST_F(PhishingScorerTest, ComputeScore) {
    121   scoped_ptr<Scorer> scorer(Scorer::Create(model_.SerializeAsString()));
    122   ASSERT_TRUE(scorer.get());
    123 
    124   // An empty feature map should match the empty rule.
    125   FeatureMap features;
    126   // The expected logodds is 0.5 (empty rule) => p = exp(0.5) / (exp(0.5) + 1)
    127   // => 0.62245933120185459
    128   EXPECT_DOUBLE_EQ(0.62245933120185459, scorer->ComputeScore(features));
    129   // Same if the feature does not match any rule.
    130   EXPECT_TRUE(features.AddBooleanFeature("not existing feature"));
    131   EXPECT_DOUBLE_EQ(0.62245933120185459, scorer->ComputeScore(features));
    132 
    133   // Feature 1 matches which means that the logodds will be:
    134   //   0.5 (empty rule) + 2.0 (rule weight) * 0.15 (feature weight) = 0.8
    135   //   => p = 0.6899744811276125
    136   EXPECT_TRUE(features.AddRealFeature("feature1", 0.15));
    137   EXPECT_DOUBLE_EQ(0.6899744811276125, scorer->ComputeScore(features));
    138 
    139   // Now, both feature 1 and feature 2 match.  Expected logodds:
    140   //   0.5 (empty rule) + 2.0 (rule weight) * 0.15 (feature weight) +
    141   //   3.0 (rule weight) * 0.15 (feature1 weight) * 1.0 (feature2) weight = 9.8
    142   //   => p = 0.99999627336071584
    143   EXPECT_TRUE(features.AddBooleanFeature("feature2"));
    144   EXPECT_DOUBLE_EQ(0.77729986117469119, scorer->ComputeScore(features));
    145 }
    146 }  // namespace safe_browsing
    147