1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "chrome/renderer/safe_browsing/scorer.h" 6 7 #include "base/containers/hash_tables.h" 8 #include "base/files/file_path.h" 9 #include "base/files/scoped_temp_dir.h" 10 #include "base/format_macros.h" 11 #include "base/memory/scoped_ptr.h" 12 #include "base/message_loop/message_loop.h" 13 #include "base/threading/thread.h" 14 #include "chrome/common/safe_browsing/client_model.pb.h" 15 #include "chrome/renderer/safe_browsing/features.h" 16 #include "testing/gmock/include/gmock/gmock.h" 17 #include "testing/gtest/include/gtest/gtest.h" 18 19 namespace safe_browsing { 20 21 class PhishingScorerTest : public ::testing::Test { 22 protected: 23 virtual void SetUp() { 24 // Setup a simple model. Note that the scorer does not care about 25 // how features are encoded so we use readable strings here to make 26 // the test simpler to follow. 27 model_.Clear(); 28 model_.add_hashes("feature1"); 29 model_.add_hashes("feature2"); 30 model_.add_hashes("feature3"); 31 model_.add_hashes("token one"); 32 model_.add_hashes("token two"); 33 34 ClientSideModel::Rule* rule; 35 rule = model_.add_rule(); 36 rule->set_weight(0.5); 37 38 rule = model_.add_rule(); 39 rule->add_feature(0); // feature1 40 rule->set_weight(2.0); 41 42 rule = model_.add_rule(); 43 rule->add_feature(0); // feature1 44 rule->add_feature(1); // feature2 45 rule->set_weight(3.0); 46 47 model_.add_page_term(3); // token one 48 model_.add_page_term(4); // token two 49 50 // These will be murmur3 hashes, but for this test it's not necessary 51 // that the hashes correspond to actual words. 52 model_.add_page_word(1000U); 53 model_.add_page_word(2000U); 54 model_.add_page_word(3000U); 55 56 model_.set_max_words_per_term(2); 57 model_.set_murmur_hash_seed(12345U); 58 } 59 60 ClientSideModel model_; 61 }; 62 63 TEST_F(PhishingScorerTest, HasValidModel) { 64 scoped_ptr<Scorer> scorer; 65 scorer.reset(Scorer::Create(model_.SerializeAsString())); 66 EXPECT_TRUE(scorer.get() != NULL); 67 68 // Invalid model string. 69 scorer.reset(Scorer::Create("bogus string")); 70 EXPECT_FALSE(scorer.get()); 71 72 // Mode is missing a required field. 73 model_.clear_max_words_per_term(); 74 scorer.reset(Scorer::Create(model_.SerializePartialAsString())); 75 EXPECT_FALSE(scorer.get()); 76 } 77 78 TEST_F(PhishingScorerTest, PageTerms) { 79 scoped_ptr<Scorer> scorer(Scorer::Create(model_.SerializeAsString())); 80 ASSERT_TRUE(scorer.get()); 81 base::hash_set<std::string> expected_page_terms; 82 expected_page_terms.insert("token one"); 83 expected_page_terms.insert("token two"); 84 EXPECT_THAT(scorer->page_terms(), 85 ::testing::ContainerEq(expected_page_terms)); 86 } 87 88 TEST_F(PhishingScorerTest, PageWords) { 89 scoped_ptr<Scorer> scorer(Scorer::Create(model_.SerializeAsString())); 90 ASSERT_TRUE(scorer.get()); 91 base::hash_set<uint32> expected_page_words; 92 expected_page_words.insert(1000U); 93 expected_page_words.insert(2000U); 94 expected_page_words.insert(3000U); 95 EXPECT_THAT(scorer->page_words(), 96 ::testing::ContainerEq(expected_page_words)); 97 EXPECT_EQ(2U, scorer->max_words_per_term()); 98 EXPECT_EQ(12345U, scorer->murmurhash3_seed()); 99 } 100 101 TEST_F(PhishingScorerTest, ComputeScore) { 102 scoped_ptr<Scorer> scorer(Scorer::Create(model_.SerializeAsString())); 103 ASSERT_TRUE(scorer.get()); 104 105 // An empty feature map should match the empty rule. 106 FeatureMap features; 107 // The expected logodds is 0.5 (empty rule) => p = exp(0.5) / (exp(0.5) + 1) 108 // => 0.62245933120185459 109 EXPECT_DOUBLE_EQ(0.62245933120185459, scorer->ComputeScore(features)); 110 // Same if the feature does not match any rule. 111 EXPECT_TRUE(features.AddBooleanFeature("not existing feature")); 112 EXPECT_DOUBLE_EQ(0.62245933120185459, scorer->ComputeScore(features)); 113 114 // Feature 1 matches which means that the logodds will be: 115 // 0.5 (empty rule) + 2.0 (rule weight) * 0.15 (feature weight) = 0.8 116 // => p = 0.6899744811276125 117 EXPECT_TRUE(features.AddRealFeature("feature1", 0.15)); 118 EXPECT_DOUBLE_EQ(0.6899744811276125, scorer->ComputeScore(features)); 119 120 // Now, both feature 1 and feature 2 match. Expected logodds: 121 // 0.5 (empty rule) + 2.0 (rule weight) * 0.15 (feature weight) + 122 // 3.0 (rule weight) * 0.15 (feature1 weight) * 1.0 (feature2) weight = 9.8 123 // => p = 0.99999627336071584 124 EXPECT_TRUE(features.AddBooleanFeature("feature2")); 125 EXPECT_DOUBLE_EQ(0.77729986117469119, scorer->ComputeScore(features)); 126 } 127 } // namespace safe_browsing 128