Home | History | Annotate | Download | only in supervised_user
      1 // Copyright 2014 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "chrome/browser/supervised_user/supervised_user_url_filter.h"
      6 
      7 #include "base/containers/hash_tables.h"
      8 #include "base/files/file_path.h"
      9 #include "base/json/json_file_value_serializer.h"
     10 #include "base/metrics/histogram.h"
     11 #include "base/sha1.h"
     12 #include "base/strings/string_number_conversions.h"
     13 #include "base/strings/string_util.h"
     14 #include "base/task_runner_util.h"
     15 #include "base/threading/sequenced_worker_pool.h"
     16 #include "components/policy/core/browser/url_blacklist_manager.h"
     17 #include "components/url_fixer/url_fixer.h"
     18 #include "components/url_matcher/url_matcher.h"
     19 #include "content/public/browser/browser_thread.h"
     20 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
     21 #include "url/gurl.h"
     22 
     23 using content::BrowserThread;
     24 using net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES;
     25 using net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES;
     26 using net::registry_controlled_domains::GetRegistryLength;
     27 using policy::URLBlacklist;
     28 using url_matcher::URLMatcher;
     29 using url_matcher::URLMatcherConditionSet;
     30 
     31 struct SupervisedUserURLFilter::Contents {
     32   URLMatcher url_matcher;
     33   std::map<URLMatcherConditionSet::ID, int> matcher_site_map;
     34   base::hash_multimap<std::string, int> hash_site_map;
     35   std::vector<SupervisedUserSiteList::Site> sites;
     36 };
     37 
     38 namespace {
     39 
     40 // URL schemes not in this list (e.g., file:// and chrome://) will always be
     41 // allowed.
     42 const char* kFilteredSchemes[] = {
     43   "http",
     44   "https",
     45   "ftp",
     46   "gopher",
     47   "ws",
     48   "wss"
     49 };
     50 
     51 
     52 // This class encapsulates all the state that is required during construction of
     53 // a new SupervisedUserURLFilter::Contents.
     54 class FilterBuilder {
     55  public:
     56   FilterBuilder();
     57   ~FilterBuilder();
     58 
     59   // Adds a single URL pattern for the site identified by |site_id|.
     60   bool AddPattern(const std::string& pattern, int site_id);
     61 
     62   // Adds a single hostname SHA1 hash for the site identified by |site_id|.
     63   void AddHostnameHash(const std::string& hash, int site_id);
     64 
     65   // Adds all the sites in |site_list|, with URL patterns and hostname hashes.
     66   void AddSiteList(SupervisedUserSiteList* site_list);
     67 
     68   // Finalizes construction of the SupervisedUserURLFilter::Contents and returns
     69   // them. This method should be called before this object is destroyed.
     70   scoped_ptr<SupervisedUserURLFilter::Contents> Build();
     71 
     72  private:
     73   scoped_ptr<SupervisedUserURLFilter::Contents> contents_;
     74   URLMatcherConditionSet::Vector all_conditions_;
     75   URLMatcherConditionSet::ID matcher_id_;
     76 };
     77 
     78 FilterBuilder::FilterBuilder()
     79     : contents_(new SupervisedUserURLFilter::Contents()),
     80       matcher_id_(0) {}
     81 
     82 FilterBuilder::~FilterBuilder() {
     83   DCHECK(!contents_.get());
     84 }
     85 
     86 bool FilterBuilder::AddPattern(const std::string& pattern, int site_id) {
     87   DCHECK(BrowserThread::GetBlockingPool()->RunsTasksOnCurrentThread());
     88   std::string scheme;
     89   std::string host;
     90   uint16 port;
     91   std::string path;
     92   std::string query;
     93   bool match_subdomains = true;
     94   URLBlacklist::SegmentURLCallback callback =
     95       static_cast<URLBlacklist::SegmentURLCallback>(url_fixer::SegmentURL);
     96   if (!URLBlacklist::FilterToComponents(
     97           callback, pattern,
     98           &scheme, &host, &match_subdomains, &port, &path, &query)) {
     99     LOG(ERROR) << "Invalid pattern " << pattern;
    100     return false;
    101   }
    102 
    103   scoped_refptr<URLMatcherConditionSet> condition_set =
    104       URLBlacklist::CreateConditionSet(
    105           &contents_->url_matcher, ++matcher_id_,
    106           scheme, host, match_subdomains, port, path, query, true);
    107   all_conditions_.push_back(condition_set);
    108   contents_->matcher_site_map[matcher_id_] = site_id;
    109   return true;
    110 }
    111 
    112 void FilterBuilder::AddHostnameHash(const std::string& hash, int site_id) {
    113   contents_->hash_site_map.insert(std::make_pair(StringToUpperASCII(hash),
    114                                                  site_id));
    115 }
    116 
    117 void FilterBuilder::AddSiteList(SupervisedUserSiteList* site_list) {
    118   std::vector<SupervisedUserSiteList::Site> sites;
    119   site_list->GetSites(&sites);
    120   int site_id = contents_->sites.size();
    121   for (std::vector<SupervisedUserSiteList::Site>::const_iterator it =
    122            sites.begin(); it != sites.end(); ++it) {
    123     const SupervisedUserSiteList::Site& site = *it;
    124     contents_->sites.push_back(site);
    125 
    126     for (std::vector<std::string>::const_iterator pattern_it =
    127              site.patterns.begin();
    128          pattern_it != site.patterns.end(); ++pattern_it) {
    129       AddPattern(*pattern_it, site_id);
    130     }
    131 
    132     for (std::vector<std::string>::const_iterator hash_it =
    133              site.hostname_hashes.begin();
    134          hash_it != site.hostname_hashes.end(); ++hash_it) {
    135       AddHostnameHash(*hash_it, site_id);
    136     }
    137 
    138     site_id++;
    139   }
    140 }
    141 
    142 scoped_ptr<SupervisedUserURLFilter::Contents> FilterBuilder::Build() {
    143   DCHECK(BrowserThread::GetBlockingPool()->RunsTasksOnCurrentThread());
    144   contents_->url_matcher.AddConditionSets(all_conditions_);
    145   return contents_.Pass();
    146 }
    147 
    148 scoped_ptr<SupervisedUserURLFilter::Contents> CreateWhitelistFromPatterns(
    149     const std::vector<std::string>& patterns) {
    150   DCHECK(BrowserThread::GetBlockingPool()->RunsTasksOnCurrentThread());
    151 
    152   FilterBuilder builder;
    153   for (std::vector<std::string>::const_iterator it = patterns.begin();
    154        it != patterns.end(); ++it) {
    155     // TODO(bauerb): We should create a fake site for the whitelist.
    156     builder.AddPattern(*it, -1);
    157   }
    158 
    159   return builder.Build();
    160 }
    161 
    162 scoped_ptr<SupervisedUserURLFilter::Contents>
    163 LoadWhitelistsOnBlockingPoolThread(
    164     ScopedVector<SupervisedUserSiteList> site_lists) {
    165   DCHECK(BrowserThread::GetBlockingPool()->RunsTasksOnCurrentThread());
    166 
    167   FilterBuilder builder;
    168   for (ScopedVector<SupervisedUserSiteList>::iterator it = site_lists.begin();
    169        it != site_lists.end(); ++it) {
    170     builder.AddSiteList(*it);
    171   }
    172 
    173   return builder.Build();
    174 }
    175 
    176 }  // namespace
    177 
    178 SupervisedUserURLFilter::SupervisedUserURLFilter()
    179     : default_behavior_(ALLOW),
    180       contents_(new Contents()) {
    181   // Detach from the current thread so we can be constructed on a different
    182   // thread than the one where we're used.
    183   DetachFromThread();
    184 }
    185 
    186 SupervisedUserURLFilter::~SupervisedUserURLFilter() {
    187   DCHECK(CalledOnValidThread());
    188 }
    189 
    190 // static
    191 SupervisedUserURLFilter::FilteringBehavior
    192 SupervisedUserURLFilter::BehaviorFromInt(int behavior_value) {
    193   DCHECK_GE(behavior_value, ALLOW);
    194   DCHECK_LE(behavior_value, BLOCK);
    195   return static_cast<FilteringBehavior>(behavior_value);
    196 }
    197 
    198 // static
    199 GURL SupervisedUserURLFilter::Normalize(const GURL& url) {
    200   GURL normalized_url = url;
    201   GURL::Replacements replacements;
    202   // Strip username, password, query, and ref.
    203   replacements.ClearUsername();
    204   replacements.ClearPassword();
    205   replacements.ClearQuery();
    206   replacements.ClearRef();
    207   return url.ReplaceComponents(replacements);
    208 }
    209 
    210 // static
    211 bool SupervisedUserURLFilter::HasFilteredScheme(const GURL& url) {
    212   for (size_t i = 0; i < arraysize(kFilteredSchemes); ++i) {
    213       if (url.scheme() == kFilteredSchemes[i])
    214         return true;
    215     }
    216   return false;
    217 }
    218 
    219 std::string GetHostnameHash(const GURL& url) {
    220   std::string hash = base::SHA1HashString(url.host());
    221   return base::HexEncode(hash.data(), hash.length());
    222 }
    223 
    224 // static
    225 bool SupervisedUserURLFilter::HostMatchesPattern(const std::string& host,
    226                                                  const std::string& pattern) {
    227   std::string trimmed_pattern = pattern;
    228   std::string trimmed_host = host;
    229   if (EndsWith(pattern, ".*", true)) {
    230     size_t registry_length = GetRegistryLength(
    231         trimmed_host, EXCLUDE_UNKNOWN_REGISTRIES, EXCLUDE_PRIVATE_REGISTRIES);
    232     // A host without a known registry part does not match.
    233     if (registry_length == 0)
    234       return false;
    235 
    236     trimmed_pattern.erase(trimmed_pattern.length() - 2);
    237     trimmed_host.erase(trimmed_host.length() - (registry_length + 1));
    238   }
    239 
    240   if (StartsWithASCII(trimmed_pattern, "*.", true)) {
    241     trimmed_pattern.erase(0, 2);
    242 
    243     // The remaining pattern should be non-empty, and it should not contain
    244     // further stars. Also the trimmed host needs to end with the trimmed
    245     // pattern.
    246     if (trimmed_pattern.empty() ||
    247         trimmed_pattern.find('*') != std::string::npos ||
    248         !EndsWith(trimmed_host, trimmed_pattern, true)) {
    249       return false;
    250     }
    251 
    252     // The trimmed host needs to have a dot separating the subdomain from the
    253     // matched pattern piece, unless there is no subdomain.
    254     int pos = trimmed_host.length() - trimmed_pattern.length();
    255     DCHECK_GE(pos, 0);
    256     return (pos == 0) || (trimmed_host[pos - 1] == '.');
    257   }
    258 
    259   return trimmed_host == trimmed_pattern;
    260 }
    261 
    262 SupervisedUserURLFilter::FilteringBehavior
    263 SupervisedUserURLFilter::GetFilteringBehaviorForURL(const GURL& url) const {
    264   DCHECK(CalledOnValidThread());
    265 
    266   // URLs with a non-standard scheme (e.g. chrome://) are always allowed.
    267   if (!HasFilteredScheme(url))
    268     return ALLOW;
    269 
    270   // Check manual overrides for the exact URL.
    271   std::map<GURL, bool>::const_iterator url_it = url_map_.find(Normalize(url));
    272   if (url_it != url_map_.end())
    273     return url_it->second ? ALLOW : BLOCK;
    274 
    275   // Check manual overrides for the hostname.
    276   std::string host = url.host();
    277   std::map<std::string, bool>::const_iterator host_it = host_map_.find(host);
    278   if (host_it != host_map_.end())
    279     return host_it->second ? ALLOW : BLOCK;
    280 
    281   // Look for patterns matching the hostname, with a value that is different
    282   // from the default (a value of true in the map meaning allowed).
    283   for (std::map<std::string, bool>::const_iterator host_it =
    284       host_map_.begin(); host_it != host_map_.end(); ++host_it) {
    285     if ((host_it->second == (default_behavior_ == BLOCK)) &&
    286         HostMatchesPattern(host, host_it->first)) {
    287       return host_it->second ? ALLOW : BLOCK;
    288     }
    289   }
    290 
    291   // If the default behavior is to allow, we don't need to check anything else.
    292   if (default_behavior_ == ALLOW)
    293     return ALLOW;
    294 
    295   // Check the list of URL patterns.
    296   std::set<URLMatcherConditionSet::ID> matching_ids =
    297       contents_->url_matcher.MatchURL(url);
    298   if (!matching_ids.empty())
    299     return ALLOW;
    300 
    301   // Check the list of hostname hashes.
    302   if (contents_->hash_site_map.count(GetHostnameHash(url)))
    303     return ALLOW;
    304 
    305   // Fall back to the default behavior.
    306   return default_behavior_;
    307 }
    308 
    309 void SupervisedUserURLFilter::GetSites(
    310     const GURL& url,
    311     std::vector<SupervisedUserSiteList::Site*>* sites) const {
    312   std::set<URLMatcherConditionSet::ID> matching_ids =
    313       contents_->url_matcher.MatchURL(url);
    314   for (std::set<URLMatcherConditionSet::ID>::const_iterator it =
    315            matching_ids.begin(); it != matching_ids.end(); ++it) {
    316     std::map<URLMatcherConditionSet::ID, int>::const_iterator entry =
    317         contents_->matcher_site_map.find(*it);
    318     if (entry == contents_->matcher_site_map.end()) {
    319       NOTREACHED();
    320       continue;
    321     }
    322     sites->push_back(&contents_->sites[entry->second]);
    323   }
    324 
    325   typedef base::hash_multimap<std::string, int>::const_iterator
    326       hash_site_map_iterator;
    327   std::pair<hash_site_map_iterator, hash_site_map_iterator> bounds =
    328       contents_->hash_site_map.equal_range(GetHostnameHash(url));
    329   for (hash_site_map_iterator hash_it = bounds.first;
    330        hash_it != bounds.second; hash_it++) {
    331     sites->push_back(&contents_->sites[hash_it->second]);
    332   }
    333 }
    334 
    335 void SupervisedUserURLFilter::SetDefaultFilteringBehavior(
    336     FilteringBehavior behavior) {
    337   DCHECK(CalledOnValidThread());
    338   default_behavior_ = behavior;
    339 }
    340 
    341 void SupervisedUserURLFilter::LoadWhitelists(
    342     ScopedVector<SupervisedUserSiteList> site_lists) {
    343   DCHECK(CalledOnValidThread());
    344 
    345   base::PostTaskAndReplyWithResult(
    346       BrowserThread::GetBlockingPool(),
    347       FROM_HERE,
    348       base::Bind(&LoadWhitelistsOnBlockingPoolThread,
    349                  base::Passed(&site_lists)),
    350       base::Bind(&SupervisedUserURLFilter::SetContents, this));
    351 }
    352 
    353 void SupervisedUserURLFilter::SetFromPatterns(
    354     const std::vector<std::string>& patterns) {
    355   DCHECK(CalledOnValidThread());
    356 
    357   base::PostTaskAndReplyWithResult(
    358       BrowserThread::GetBlockingPool(),
    359       FROM_HERE,
    360       base::Bind(&CreateWhitelistFromPatterns, patterns),
    361       base::Bind(&SupervisedUserURLFilter::SetContents, this));
    362 }
    363 
    364 void SupervisedUserURLFilter::SetManualHosts(
    365     const std::map<std::string, bool>* host_map) {
    366   DCHECK(CalledOnValidThread());
    367   host_map_ = *host_map;
    368   UMA_HISTOGRAM_CUSTOM_COUNTS("ManagedMode.ManualHostsEntries",
    369                               host_map->size(), 1, 1000, 50);
    370 }
    371 
    372 void SupervisedUserURLFilter::SetManualURLs(
    373     const std::map<GURL, bool>* url_map) {
    374   DCHECK(CalledOnValidThread());
    375   url_map_ = *url_map;
    376   UMA_HISTOGRAM_CUSTOM_COUNTS("ManagedMode.ManualURLsEntries",
    377                               url_map->size(), 1, 1000, 50);
    378 }
    379 
    380 void SupervisedUserURLFilter::AddObserver(Observer* observer) {
    381   observers_.AddObserver(observer);
    382 }
    383 
    384 void SupervisedUserURLFilter::RemoveObserver(Observer* observer) {
    385   observers_.RemoveObserver(observer);
    386 }
    387 
    388 void SupervisedUserURLFilter::SetContents(scoped_ptr<Contents> contents) {
    389   DCHECK(CalledOnValidThread());
    390   contents_ = contents.Pass();
    391   FOR_EACH_OBSERVER(Observer, observers_, OnSiteListUpdated());
    392 }
    393