Home | History | Annotate | Download | only in safe_browsing
      1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "chrome/renderer/safe_browsing/features.h"
      6 
      7 #include "base/logging.h"
      8 #include "base/metrics/histogram.h"
      9 
     10 namespace safe_browsing {
     11 
     12 const size_t FeatureMap::kMaxFeatureMapSize = 10000;
     13 
     14 FeatureMap::FeatureMap() {}
     15 FeatureMap::~FeatureMap() {}
     16 
     17 bool FeatureMap::AddBooleanFeature(const std::string& name) {
     18   return AddRealFeature(name, 1.0);
     19 }
     20 
     21 bool FeatureMap::AddRealFeature(const std::string& name, double value) {
     22   if (features_.size() >= kMaxFeatureMapSize) {
     23     // If we hit this case, it indicates that either kMaxFeatureMapSize is
     24     // too small, or there is a bug causing too many features to be added.
     25     // In this case, we'll log to a histogram so we can see that this is
     26     // happening, and make phishing classification fail silently.
     27     LOG(ERROR) << "Not adding feature: " << name << " because the "
     28                << "feature map is too large.";
     29     UMA_HISTOGRAM_COUNTS("SBClientPhishing.TooManyFeatures", 1);
     30     return false;
     31   }
     32   // We only expect features in the range [0.0, 1.0], so fail if the feature is
     33   // outside this range.
     34   if (value < 0.0 || value > 1.0) {
     35     LOG(ERROR) << "Not adding feature: " << name << " because the value "
     36                << value << " is not in the range [0.0, 1.0].";
     37     UMA_HISTOGRAM_COUNTS("SBClientPhishing.IllegalFeatureValue", 1);
     38     return false;
     39   }
     40 
     41   features_[name] = value;
     42   return true;
     43 }
     44 
     45 void FeatureMap::Clear() {
     46   features_.clear();
     47 }
     48 
     49 namespace features {
     50 // URL host features
     51 const char kUrlHostIsIpAddress[] = "UrlHostIsIpAddress";
     52 const char kUrlTldToken[] = "UrlTld=";
     53 const char kUrlDomainToken[] = "UrlDomain=";
     54 const char kUrlOtherHostToken[] = "UrlOtherHostToken=";
     55 
     56 // URL host aggregate features
     57 const char kUrlNumOtherHostTokensGTOne[] = "UrlNumOtherHostTokens>1";
     58 const char kUrlNumOtherHostTokensGTThree[] = "UrlNumOtherHostTokens>3";
     59 
     60 // URL path features
     61 const char kUrlPathToken[] = "UrlPathToken=";
     62 
     63 // DOM HTML form features
     64 const char kPageHasForms[] = "PageHasForms";
     65 const char kPageActionOtherDomainFreq[] = "PageActionOtherDomainFreq";
     66 const char kPageHasTextInputs[] = "PageHasTextInputs";
     67 const char kPageHasPswdInputs[] = "PageHasPswdInputs";
     68 const char kPageHasRadioInputs[] = "PageHasRadioInputs";
     69 const char kPageHasCheckInputs[] = "PageHasCheckInputs";
     70 
     71 // DOM HTML link features
     72 const char kPageExternalLinksFreq[] = "PageExternalLinksFreq";
     73 const char kPageLinkDomain[] = "PageLinkDomain=";
     74 const char kPageSecureLinksFreq[] = "PageSecureLinksFreq";
     75 
     76 // DOM HTML script features
     77 const char kPageNumScriptTagsGTOne[] = "PageNumScriptTags>1";
     78 const char kPageNumScriptTagsGTSix[] = "PageNumScriptTags>6";
     79 
     80 // Other DOM HTML features
     81 const char kPageImgOtherDomainFreq[] = "PageImgOtherDomainFreq";
     82 
     83 // Page term features
     84 const char kPageTerm[] = "PageTerm=";
     85 
     86 }  // namespace features
     87 }  // namespace safe_browsing
     88