Home | History | Annotate | Download | only in managed_mode
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "chrome/browser/managed_mode/managed_mode_url_filter.h"
      6 
      7 #include "base/containers/hash_tables.h"
      8 #include "base/files/file_path.h"
      9 #include "base/json/json_file_value_serializer.h"
     10 #include "base/metrics/histogram.h"
     11 #include "base/sha1.h"
     12 #include "base/strings/string_number_conversions.h"
     13 #include "base/strings/string_util.h"
     14 #include "base/task_runner_util.h"
     15 #include "base/threading/sequenced_worker_pool.h"
     16 #include "content/public/browser/browser_thread.h"
     17 #include "extensions/common/matcher/url_matcher.h"
     18 #include "url/gurl.h"
     19 
     20 using content::BrowserThread;
     21 using extensions::URLMatcher;
     22 using extensions::URLMatcherConditionSet;
     23 
     24 struct ManagedModeURLFilter::Contents {
     25   URLMatcher url_matcher;
     26   std::map<URLMatcherConditionSet::ID, int> matcher_site_map;
     27   base::hash_multimap<std::string, int> hash_site_map;
     28   std::vector<ManagedModeSiteList::Site> sites;
     29 };
     30 
     31 namespace {
     32 
     33 const char* kStandardSchemes[] = {
     34   "http",
     35   "https",
     36   "file",
     37   "ftp",
     38   "gopher",
     39   "ws",
     40   "wss"
     41 };
     42 
     43 
     44 // This class encapsulates all the state that is required during construction of
     45 // a new ManagedModeURLFilter::Contents.
     46 class FilterBuilder {
     47  public:
     48   FilterBuilder();
     49   ~FilterBuilder();
     50 
     51   // Adds a single URL pattern for the site identified by |site_id|.
     52   bool AddPattern(const std::string& pattern, int site_id);
     53 
     54   // Adds a single hostname SHA1 hash for the site identified by |site_id|.
     55   void AddHostnameHash(const std::string& hash, int site_id);
     56 
     57   // Adds all the sites in |site_list|, with URL patterns and hostname hashes.
     58   void AddSiteList(ManagedModeSiteList* site_list);
     59 
     60   // Finalizes construction of the ManagedModeURLFilter::Contents and returns
     61   // them. This method should be called before this object is destroyed.
     62   scoped_ptr<ManagedModeURLFilter::Contents> Build();
     63 
     64  private:
     65   scoped_ptr<ManagedModeURLFilter::Contents> contents_;
     66   URLMatcherConditionSet::Vector all_conditions_;
     67   URLMatcherConditionSet::ID matcher_id_;
     68 };
     69 
     70 FilterBuilder::FilterBuilder()
     71     : contents_(new ManagedModeURLFilter::Contents()),
     72       matcher_id_(0) {}
     73 
     74 FilterBuilder::~FilterBuilder() {
     75   DCHECK(!contents_.get());
     76 }
     77 
     78 bool FilterBuilder::AddPattern(const std::string& pattern, int site_id) {
     79   DCHECK(BrowserThread::GetBlockingPool()->RunsTasksOnCurrentThread());
     80 #if defined(ENABLE_CONFIGURATION_POLICY)
     81   std::string scheme;
     82   std::string host;
     83   uint16 port;
     84   std::string path;
     85   bool match_subdomains = true;
     86   if (!policy::URLBlacklist::FilterToComponents(
     87           pattern, &scheme, &host, &match_subdomains, &port, &path)) {
     88     LOG(ERROR) << "Invalid pattern " << pattern;
     89     return false;
     90   }
     91 
     92   scoped_refptr<extensions::URLMatcherConditionSet> condition_set =
     93       policy::URLBlacklist::CreateConditionSet(
     94           &contents_->url_matcher, ++matcher_id_,
     95           scheme, host, match_subdomains, port, path);
     96   all_conditions_.push_back(condition_set);
     97   contents_->matcher_site_map[matcher_id_] = site_id;
     98   return true;
     99 #else
    100   NOTREACHED();
    101   return false;
    102 #endif
    103 }
    104 
    105 void FilterBuilder::AddHostnameHash(const std::string& hash, int site_id) {
    106   contents_->hash_site_map.insert(std::make_pair(StringToUpperASCII(hash),
    107                                                  site_id));
    108 }
    109 
    110 void FilterBuilder::AddSiteList(ManagedModeSiteList* site_list) {
    111   std::vector<ManagedModeSiteList::Site> sites;
    112   site_list->GetSites(&sites);
    113   int site_id = contents_->sites.size();
    114   for (std::vector<ManagedModeSiteList::Site>::const_iterator it =
    115            sites.begin(); it != sites.end(); ++it) {
    116     const ManagedModeSiteList::Site& site = *it;
    117     contents_->sites.push_back(site);
    118 
    119     for (std::vector<std::string>::const_iterator pattern_it =
    120              site.patterns.begin();
    121          pattern_it != site.patterns.end(); ++pattern_it) {
    122       AddPattern(*pattern_it, site_id);
    123     }
    124 
    125     for (std::vector<std::string>::const_iterator hash_it =
    126              site.hostname_hashes.begin();
    127          hash_it != site.hostname_hashes.end(); ++hash_it) {
    128       AddHostnameHash(*hash_it, site_id);
    129     }
    130 
    131     site_id++;
    132   }
    133 }
    134 
    135 scoped_ptr<ManagedModeURLFilter::Contents> FilterBuilder::Build() {
    136   DCHECK(BrowserThread::GetBlockingPool()->RunsTasksOnCurrentThread());
    137   contents_->url_matcher.AddConditionSets(all_conditions_);
    138   return contents_.Pass();
    139 }
    140 
    141 scoped_ptr<ManagedModeURLFilter::Contents> CreateWhitelistFromPatterns(
    142     const std::vector<std::string>& patterns) {
    143   DCHECK(BrowserThread::GetBlockingPool()->RunsTasksOnCurrentThread());
    144 
    145   FilterBuilder builder;
    146   for (std::vector<std::string>::const_iterator it = patterns.begin();
    147        it != patterns.end(); ++it) {
    148     // TODO(bauerb): We should create a fake site for the whitelist.
    149     builder.AddPattern(*it, -1);
    150   }
    151 
    152   return builder.Build();
    153 }
    154 
    155 scoped_ptr<ManagedModeURLFilter::Contents> LoadWhitelistsOnBlockingPoolThread(
    156     ScopedVector<ManagedModeSiteList> site_lists) {
    157   DCHECK(BrowserThread::GetBlockingPool()->RunsTasksOnCurrentThread());
    158 
    159   FilterBuilder builder;
    160   for (ScopedVector<ManagedModeSiteList>::iterator it = site_lists.begin();
    161        it != site_lists.end(); ++it) {
    162     builder.AddSiteList(*it);
    163   }
    164 
    165   return builder.Build();
    166 }
    167 
    168 }  // namespace
    169 
    170 ManagedModeURLFilter::ManagedModeURLFilter()
    171     : default_behavior_(ALLOW),
    172       contents_(new Contents()) {
    173   // Detach from the current thread so we can be constructed on a different
    174   // thread than the one where we're used.
    175   DetachFromThread();
    176 }
    177 
    178 ManagedModeURLFilter::~ManagedModeURLFilter() {
    179   DCHECK(CalledOnValidThread());
    180 }
    181 
    182 // static
    183 ManagedModeURLFilter::FilteringBehavior
    184 ManagedModeURLFilter::BehaviorFromInt(int behavior_value) {
    185   DCHECK_GE(behavior_value, ALLOW);
    186   DCHECK_LE(behavior_value, BLOCK);
    187   return static_cast<FilteringBehavior>(behavior_value);
    188 }
    189 
    190 // static
    191 GURL ManagedModeURLFilter::Normalize(const GURL& url) {
    192   GURL normalized_url = url;
    193   GURL::Replacements replacements;
    194   // Strip username, password, query, and ref.
    195   replacements.ClearUsername();
    196   replacements.ClearPassword();
    197   replacements.ClearQuery();
    198   replacements.ClearRef();
    199   return url.ReplaceComponents(replacements);
    200 }
    201 
    202 // static
    203 bool ManagedModeURLFilter::HasStandardScheme(const GURL& url) {
    204   for (size_t i = 0; i < arraysize(kStandardSchemes); ++i) {
    205       if (url.scheme() == kStandardSchemes[i])
    206         return true;
    207     }
    208   return false;
    209 }
    210 
    211 ManagedModeURLFilter::FilteringBehavior
    212 ManagedModeURLFilter::GetFilteringBehaviorForURL(const GURL& url) const {
    213   DCHECK(CalledOnValidThread());
    214 
    215   // URLs with a non-standard scheme (e.g. chrome://) are always allowed.
    216   if (!HasStandardScheme(url))
    217     return ALLOW;
    218 
    219   // Check manual overrides for the exact URL.
    220   std::map<GURL, bool>::const_iterator url_it = url_map_.find(Normalize(url));
    221   if (url_it != url_map_.end())
    222     return url_it->second ? ALLOW : BLOCK;
    223 
    224   // Check manual overrides for the hostname.
    225   std::map<std::string, bool>::const_iterator host_it =
    226       host_map_.find(url.host());
    227   if (host_it != host_map_.end())
    228     return host_it->second ? ALLOW : BLOCK;
    229 
    230   // If the default behavior is to allow, we don't need to check anything else.
    231   if (default_behavior_ == ALLOW)
    232     return ALLOW;
    233 
    234   // Check the list of URL patterns.
    235   std::set<URLMatcherConditionSet::ID> matching_ids =
    236       contents_->url_matcher.MatchURL(url);
    237   if (!matching_ids.empty())
    238     return ALLOW;
    239 
    240   // Check the list of hostname hashes.
    241   std::string hash = base::SHA1HashString(url.host());
    242   std::string hash_hex = base::HexEncode(hash.data(), hash.length());
    243   if (contents_->hash_site_map.count(hash_hex))
    244     return ALLOW;
    245 
    246   // Fall back to the default behavior.
    247   return default_behavior_;
    248 }
    249 
    250 void ManagedModeURLFilter::GetSites(
    251     const GURL& url,
    252     std::vector<ManagedModeSiteList::Site*>* sites) const {
    253   std::set<URLMatcherConditionSet::ID> matching_ids =
    254       contents_->url_matcher.MatchURL(url);
    255   for (std::set<URLMatcherConditionSet::ID>::const_iterator it =
    256            matching_ids.begin(); it != matching_ids.end(); ++it) {
    257     std::map<URLMatcherConditionSet::ID, int>::const_iterator entry =
    258         contents_->matcher_site_map.find(*it);
    259     if (entry == contents_->matcher_site_map.end()) {
    260       NOTREACHED();
    261       continue;
    262     }
    263     sites->push_back(&contents_->sites[entry->second]);
    264   }
    265 
    266   typedef base::hash_map<std::string, int>::const_iterator
    267       hash_site_map_iterator;
    268   std::pair<hash_site_map_iterator, hash_site_map_iterator> bounds =
    269       contents_->hash_site_map.equal_range(url.host());
    270   for (hash_site_map_iterator hash_it = bounds.first;
    271        hash_it != bounds.second; hash_it++) {
    272     sites->push_back(&contents_->sites[hash_it->second]);
    273   }
    274 }
    275 
    276 void ManagedModeURLFilter::SetDefaultFilteringBehavior(
    277     FilteringBehavior behavior) {
    278   DCHECK(CalledOnValidThread());
    279   default_behavior_ = behavior;
    280 }
    281 
    282 void ManagedModeURLFilter::LoadWhitelists(
    283     ScopedVector<ManagedModeSiteList> site_lists) {
    284   DCHECK(CalledOnValidThread());
    285 
    286   base::PostTaskAndReplyWithResult(
    287       BrowserThread::GetBlockingPool(),
    288       FROM_HERE,
    289       base::Bind(&LoadWhitelistsOnBlockingPoolThread,
    290                  base::Passed(&site_lists)),
    291       base::Bind(&ManagedModeURLFilter::SetContents, this));
    292 }
    293 
    294 void ManagedModeURLFilter::SetFromPatterns(
    295     const std::vector<std::string>& patterns) {
    296   DCHECK(CalledOnValidThread());
    297 
    298   base::PostTaskAndReplyWithResult(
    299       BrowserThread::GetBlockingPool(),
    300       FROM_HERE,
    301       base::Bind(&CreateWhitelistFromPatterns, patterns),
    302       base::Bind(&ManagedModeURLFilter::SetContents, this));
    303 }
    304 
    305 void ManagedModeURLFilter::SetManualHosts(
    306     const std::map<std::string, bool>* host_map) {
    307   DCHECK(CalledOnValidThread());
    308   host_map_ = *host_map;
    309   UMA_HISTOGRAM_CUSTOM_COUNTS("ManagedMode.ManualHostsEntries",
    310                               host_map->size(), 1, 1000, 50);
    311 }
    312 
    313 void ManagedModeURLFilter::SetManualURLs(
    314     const std::map<GURL, bool>* url_map) {
    315   DCHECK(CalledOnValidThread());
    316   url_map_ = *url_map;
    317   UMA_HISTOGRAM_CUSTOM_COUNTS("ManagedMode.ManualURLsEntries",
    318                               url_map->size(), 1, 1000, 50);
    319 }
    320 
    321 void ManagedModeURLFilter::AddObserver(Observer* observer) {
    322   observers_.AddObserver(observer);
    323 }
    324 
    325 void ManagedModeURLFilter::RemoveObserver(Observer* observer) {
    326   observers_.RemoveObserver(observer);
    327 }
    328 
    329 void ManagedModeURLFilter::SetContents(scoped_ptr<Contents> contents) {
    330   DCHECK(CalledOnValidThread());
    331   contents_ = contents.Pass();
    332   FOR_EACH_OBSERVER(Observer, observers_, OnSiteListUpdated());
    333 }
    334