1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "chrome/renderer/safe_browsing/features.h" 6 7 #include "base/logging.h" 8 #include "base/metrics/histogram.h" 9 10 namespace safe_browsing { 11 12 const size_t FeatureMap::kMaxFeatureMapSize = 10000; 13 14 FeatureMap::FeatureMap() {} 15 FeatureMap::~FeatureMap() {} 16 17 bool FeatureMap::AddBooleanFeature(const std::string& name) { 18 return AddRealFeature(name, 1.0); 19 } 20 21 bool FeatureMap::AddRealFeature(const std::string& name, double value) { 22 if (features_.size() >= kMaxFeatureMapSize) { 23 // If we hit this case, it indicates that either kMaxFeatureMapSize is 24 // too small, or there is a bug causing too many features to be added. 25 // In this case, we'll log to a histogram so we can see that this is 26 // happening, and make phishing classification fail silently. 27 LOG(ERROR) << "Not adding feature: " << name << " because the " 28 << "feature map is too large."; 29 UMA_HISTOGRAM_COUNTS("SBClientPhishing.TooManyFeatures", 1); 30 return false; 31 } 32 // We only expect features in the range [0.0, 1.0], so fail if the feature is 33 // outside this range. 34 if (value < 0.0 || value > 1.0) { 35 LOG(ERROR) << "Not adding feature: " << name << " because the value " 36 << value << " is not in the range [0.0, 1.0]."; 37 UMA_HISTOGRAM_COUNTS("SBClientPhishing.IllegalFeatureValue", 1); 38 return false; 39 } 40 41 features_[name] = value; 42 return true; 43 } 44 45 void FeatureMap::Clear() { 46 features_.clear(); 47 } 48 49 namespace features { 50 // URL host features 51 const char kUrlHostIsIpAddress[] = "UrlHostIsIpAddress"; 52 const char kUrlTldToken[] = "UrlTld="; 53 const char kUrlDomainToken[] = "UrlDomain="; 54 const char kUrlOtherHostToken[] = "UrlOtherHostToken="; 55 56 // URL host aggregate features 57 const char kUrlNumOtherHostTokensGTOne[] = "UrlNumOtherHostTokens>1"; 58 const char kUrlNumOtherHostTokensGTThree[] = "UrlNumOtherHostTokens>3"; 59 60 // URL path features 61 const char kUrlPathToken[] = "UrlPathToken="; 62 63 // DOM HTML form features 64 const char kPageHasForms[] = "PageHasForms"; 65 const char kPageActionOtherDomainFreq[] = "PageActionOtherDomainFreq"; 66 const char kPageHasTextInputs[] = "PageHasTextInputs"; 67 const char kPageHasPswdInputs[] = "PageHasPswdInputs"; 68 const char kPageHasRadioInputs[] = "PageHasRadioInputs"; 69 const char kPageHasCheckInputs[] = "PageHasCheckInputs"; 70 71 // DOM HTML link features 72 const char kPageExternalLinksFreq[] = "PageExternalLinksFreq"; 73 const char kPageLinkDomain[] = "PageLinkDomain="; 74 const char kPageSecureLinksFreq[] = "PageSecureLinksFreq"; 75 76 // DOM HTML script features 77 const char kPageNumScriptTagsGTOne[] = "PageNumScriptTags>1"; 78 const char kPageNumScriptTagsGTSix[] = "PageNumScriptTags>6"; 79 80 // Other DOM HTML features 81 const char kPageImgOtherDomainFreq[] = "PageImgOtherDomainFreq"; 82 83 // Page term features 84 const char kPageTerm[] = "PageTerm="; 85 86 } // namespace features 87 } // namespace safe_browsing 88