Home | History | Annotate | Download | only in supervised_user
      1 // Copyright 2014 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "chrome/browser/supervised_user/supervised_user_url_filter.h"
      6 
      7 #include "base/containers/hash_tables.h"
      8 #include "base/files/file_path.h"
      9 #include "base/json/json_file_value_serializer.h"
     10 #include "base/metrics/histogram.h"
     11 #include "base/sha1.h"
     12 #include "base/strings/string_number_conversions.h"
     13 #include "base/strings/string_util.h"
     14 #include "base/task_runner_util.h"
     15 #include "base/threading/sequenced_worker_pool.h"
     16 #include "chrome/browser/supervised_user/experimental/supervised_user_blacklist.h"
     17 #include "components/policy/core/browser/url_blacklist_manager.h"
     18 #include "components/url_fixer/url_fixer.h"
     19 #include "components/url_matcher/url_matcher.h"
     20 #include "content/public/browser/browser_thread.h"
     21 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
     22 #include "url/gurl.h"
     23 
     24 using content::BrowserThread;
     25 using net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES;
     26 using net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES;
     27 using net::registry_controlled_domains::GetRegistryLength;
     28 using policy::URLBlacklist;
     29 using url_matcher::URLMatcher;
     30 using url_matcher::URLMatcherConditionSet;
     31 
     32 struct SupervisedUserURLFilter::Contents {
     33   URLMatcher url_matcher;
     34   std::map<URLMatcherConditionSet::ID, int> matcher_site_map;
     35   base::hash_multimap<std::string, int> hash_site_map;
     36   std::vector<SupervisedUserSiteList::Site> sites;
     37 };
     38 
     39 namespace {
     40 
     41 // URL schemes not in this list (e.g., file:// and chrome://) will always be
     42 // allowed.
     43 const char* kFilteredSchemes[] = {
     44   "http",
     45   "https",
     46   "ftp",
     47   "gopher",
     48   "ws",
     49   "wss"
     50 };
     51 
     52 
     53 // This class encapsulates all the state that is required during construction of
     54 // a new SupervisedUserURLFilter::Contents.
     55 class FilterBuilder {
     56  public:
     57   FilterBuilder();
     58   ~FilterBuilder();
     59 
     60   // Adds a single URL pattern for the site identified by |site_id|.
     61   bool AddPattern(const std::string& pattern, int site_id);
     62 
     63   // Adds a single hostname SHA1 hash for the site identified by |site_id|.
     64   void AddHostnameHash(const std::string& hash, int site_id);
     65 
     66   // Adds all the sites in |site_list|, with URL patterns and hostname hashes.
     67   void AddSiteList(SupervisedUserSiteList* site_list);
     68 
     69   // Finalizes construction of the SupervisedUserURLFilter::Contents and returns
     70   // them. This method should be called before this object is destroyed.
     71   scoped_ptr<SupervisedUserURLFilter::Contents> Build();
     72 
     73  private:
     74   scoped_ptr<SupervisedUserURLFilter::Contents> contents_;
     75   URLMatcherConditionSet::Vector all_conditions_;
     76   URLMatcherConditionSet::ID matcher_id_;
     77 };
     78 
     79 FilterBuilder::FilterBuilder()
     80     : contents_(new SupervisedUserURLFilter::Contents()),
     81       matcher_id_(0) {}
     82 
     83 FilterBuilder::~FilterBuilder() {
     84   DCHECK(!contents_.get());
     85 }
     86 
     87 bool FilterBuilder::AddPattern(const std::string& pattern, int site_id) {
     88   DCHECK(BrowserThread::GetBlockingPool()->RunsTasksOnCurrentThread());
     89   std::string scheme;
     90   std::string host;
     91   uint16 port;
     92   std::string path;
     93   std::string query;
     94   bool match_subdomains = true;
     95   URLBlacklist::SegmentURLCallback callback =
     96       static_cast<URLBlacklist::SegmentURLCallback>(url_fixer::SegmentURL);
     97   if (!URLBlacklist::FilterToComponents(
     98           callback, pattern,
     99           &scheme, &host, &match_subdomains, &port, &path, &query)) {
    100     LOG(ERROR) << "Invalid pattern " << pattern;
    101     return false;
    102   }
    103 
    104   scoped_refptr<URLMatcherConditionSet> condition_set =
    105       URLBlacklist::CreateConditionSet(
    106           &contents_->url_matcher, ++matcher_id_,
    107           scheme, host, match_subdomains, port, path, query, true);
    108   all_conditions_.push_back(condition_set);
    109   contents_->matcher_site_map[matcher_id_] = site_id;
    110   return true;
    111 }
    112 
    113 void FilterBuilder::AddHostnameHash(const std::string& hash, int site_id) {
    114   contents_->hash_site_map.insert(std::make_pair(StringToUpperASCII(hash),
    115                                                  site_id));
    116 }
    117 
    118 void FilterBuilder::AddSiteList(SupervisedUserSiteList* site_list) {
    119   std::vector<SupervisedUserSiteList::Site> sites;
    120   site_list->GetSites(&sites);
    121   int site_id = contents_->sites.size();
    122   for (std::vector<SupervisedUserSiteList::Site>::const_iterator it =
    123            sites.begin(); it != sites.end(); ++it) {
    124     const SupervisedUserSiteList::Site& site = *it;
    125     contents_->sites.push_back(site);
    126 
    127     for (std::vector<std::string>::const_iterator pattern_it =
    128              site.patterns.begin();
    129          pattern_it != site.patterns.end(); ++pattern_it) {
    130       AddPattern(*pattern_it, site_id);
    131     }
    132 
    133     for (std::vector<std::string>::const_iterator hash_it =
    134              site.hostname_hashes.begin();
    135          hash_it != site.hostname_hashes.end(); ++hash_it) {
    136       AddHostnameHash(*hash_it, site_id);
    137     }
    138 
    139     site_id++;
    140   }
    141 }
    142 
    143 scoped_ptr<SupervisedUserURLFilter::Contents> FilterBuilder::Build() {
    144   DCHECK(BrowserThread::GetBlockingPool()->RunsTasksOnCurrentThread());
    145   contents_->url_matcher.AddConditionSets(all_conditions_);
    146   return contents_.Pass();
    147 }
    148 
    149 scoped_ptr<SupervisedUserURLFilter::Contents> CreateWhitelistFromPatterns(
    150     const std::vector<std::string>& patterns) {
    151   DCHECK(BrowserThread::GetBlockingPool()->RunsTasksOnCurrentThread());
    152 
    153   FilterBuilder builder;
    154   for (std::vector<std::string>::const_iterator it = patterns.begin();
    155        it != patterns.end(); ++it) {
    156     // TODO(bauerb): We should create a fake site for the whitelist.
    157     builder.AddPattern(*it, -1);
    158   }
    159 
    160   return builder.Build();
    161 }
    162 
    163 scoped_ptr<SupervisedUserURLFilter::Contents>
    164 LoadWhitelistsOnBlockingPoolThread(
    165     ScopedVector<SupervisedUserSiteList> site_lists) {
    166   DCHECK(BrowserThread::GetBlockingPool()->RunsTasksOnCurrentThread());
    167 
    168   FilterBuilder builder;
    169   for (ScopedVector<SupervisedUserSiteList>::iterator it = site_lists.begin();
    170        it != site_lists.end(); ++it) {
    171     builder.AddSiteList(*it);
    172   }
    173 
    174   return builder.Build();
    175 }
    176 
    177 }  // namespace
    178 
    179 SupervisedUserURLFilter::SupervisedUserURLFilter()
    180     : default_behavior_(ALLOW),
    181       contents_(new Contents()),
    182       blacklist_(NULL) {
    183   // Detach from the current thread so we can be constructed on a different
    184   // thread than the one where we're used.
    185   DetachFromThread();
    186 }
    187 
    188 SupervisedUserURLFilter::~SupervisedUserURLFilter() {
    189   DCHECK(CalledOnValidThread());
    190 }
    191 
    192 // static
    193 SupervisedUserURLFilter::FilteringBehavior
    194 SupervisedUserURLFilter::BehaviorFromInt(int behavior_value) {
    195   DCHECK_GE(behavior_value, ALLOW);
    196   DCHECK_LE(behavior_value, BLOCK);
    197   return static_cast<FilteringBehavior>(behavior_value);
    198 }
    199 
    200 // static
    201 GURL SupervisedUserURLFilter::Normalize(const GURL& url) {
    202   GURL normalized_url = url;
    203   GURL::Replacements replacements;
    204   // Strip username, password, query, and ref.
    205   replacements.ClearUsername();
    206   replacements.ClearPassword();
    207   replacements.ClearQuery();
    208   replacements.ClearRef();
    209   return url.ReplaceComponents(replacements);
    210 }
    211 
    212 // static
    213 bool SupervisedUserURLFilter::HasFilteredScheme(const GURL& url) {
    214   for (size_t i = 0; i < arraysize(kFilteredSchemes); ++i) {
    215     if (url.scheme() == kFilteredSchemes[i])
    216       return true;
    217   }
    218   return false;
    219 }
    220 
    221 std::string GetHostnameHash(const GURL& url) {
    222   std::string hash = base::SHA1HashString(url.host());
    223   return base::HexEncode(hash.data(), hash.length());
    224 }
    225 
    226 // static
    227 bool SupervisedUserURLFilter::HostMatchesPattern(const std::string& host,
    228                                                  const std::string& pattern) {
    229   std::string trimmed_pattern = pattern;
    230   std::string trimmed_host = host;
    231   if (EndsWith(pattern, ".*", true)) {
    232     size_t registry_length = GetRegistryLength(
    233         trimmed_host, EXCLUDE_UNKNOWN_REGISTRIES, EXCLUDE_PRIVATE_REGISTRIES);
    234     // A host without a known registry part does not match.
    235     if (registry_length == 0)
    236       return false;
    237 
    238     trimmed_pattern.erase(trimmed_pattern.length() - 2);
    239     trimmed_host.erase(trimmed_host.length() - (registry_length + 1));
    240   }
    241 
    242   if (StartsWithASCII(trimmed_pattern, "*.", true)) {
    243     trimmed_pattern.erase(0, 2);
    244 
    245     // The remaining pattern should be non-empty, and it should not contain
    246     // further stars. Also the trimmed host needs to end with the trimmed
    247     // pattern.
    248     if (trimmed_pattern.empty() ||
    249         trimmed_pattern.find('*') != std::string::npos ||
    250         !EndsWith(trimmed_host, trimmed_pattern, true)) {
    251       return false;
    252     }
    253 
    254     // The trimmed host needs to have a dot separating the subdomain from the
    255     // matched pattern piece, unless there is no subdomain.
    256     int pos = trimmed_host.length() - trimmed_pattern.length();
    257     DCHECK_GE(pos, 0);
    258     return (pos == 0) || (trimmed_host[pos - 1] == '.');
    259   }
    260 
    261   return trimmed_host == trimmed_pattern;
    262 }
    263 
    264 SupervisedUserURLFilter::FilteringBehavior
    265 SupervisedUserURLFilter::GetFilteringBehaviorForURL(const GURL& url) const {
    266   DCHECK(CalledOnValidThread());
    267 
    268   // URLs with a non-standard scheme (e.g. chrome://) are always allowed.
    269   if (!HasFilteredScheme(url))
    270     return ALLOW;
    271 
    272   // Check manual overrides for the exact URL.
    273   std::map<GURL, bool>::const_iterator url_it = url_map_.find(Normalize(url));
    274   if (url_it != url_map_.end())
    275     return url_it->second ? ALLOW : BLOCK;
    276 
    277   // Check manual overrides for the hostname.
    278   std::string host = url.host();
    279   std::map<std::string, bool>::const_iterator host_it = host_map_.find(host);
    280   if (host_it != host_map_.end())
    281     return host_it->second ? ALLOW : BLOCK;
    282 
    283   // Look for patterns matching the hostname, with a value that is different
    284   // from the default (a value of true in the map meaning allowed).
    285   for (std::map<std::string, bool>::const_iterator host_it =
    286       host_map_.begin(); host_it != host_map_.end(); ++host_it) {
    287     if ((host_it->second == (default_behavior_ == BLOCK)) &&
    288         HostMatchesPattern(host, host_it->first)) {
    289       return host_it->second ? ALLOW : BLOCK;
    290     }
    291   }
    292 
    293   // If there's no blacklist and the default behavior is to allow, we don't need
    294   // to check anything else.
    295   if (!blacklist_ && default_behavior_ == ALLOW)
    296     return ALLOW;
    297 
    298   // Check the list of URL patterns.
    299   std::set<URLMatcherConditionSet::ID> matching_ids =
    300       contents_->url_matcher.MatchURL(url);
    301   if (!matching_ids.empty())
    302     return ALLOW;
    303 
    304   // Check the list of hostname hashes.
    305   if (contents_->hash_site_map.count(GetHostnameHash(url)))
    306     return ALLOW;
    307 
    308   // Check the static blacklist.
    309   if (blacklist_ && blacklist_->HasURL(url))
    310     return BLOCK;
    311 
    312   // Fall back to the default behavior.
    313   return default_behavior_;
    314 }
    315 
    316 void SupervisedUserURLFilter::GetSites(
    317     const GURL& url,
    318     std::vector<SupervisedUserSiteList::Site*>* sites) const {
    319   std::set<URLMatcherConditionSet::ID> matching_ids =
    320       contents_->url_matcher.MatchURL(url);
    321   for (std::set<URLMatcherConditionSet::ID>::const_iterator it =
    322            matching_ids.begin(); it != matching_ids.end(); ++it) {
    323     std::map<URLMatcherConditionSet::ID, int>::const_iterator entry =
    324         contents_->matcher_site_map.find(*it);
    325     if (entry == contents_->matcher_site_map.end()) {
    326       NOTREACHED();
    327       continue;
    328     }
    329     sites->push_back(&contents_->sites[entry->second]);
    330   }
    331 
    332   typedef base::hash_multimap<std::string, int>::const_iterator
    333       hash_site_map_iterator;
    334   std::pair<hash_site_map_iterator, hash_site_map_iterator> bounds =
    335       contents_->hash_site_map.equal_range(GetHostnameHash(url));
    336   for (hash_site_map_iterator hash_it = bounds.first;
    337        hash_it != bounds.second; hash_it++) {
    338     sites->push_back(&contents_->sites[hash_it->second]);
    339   }
    340 }
    341 
    342 void SupervisedUserURLFilter::SetDefaultFilteringBehavior(
    343     FilteringBehavior behavior) {
    344   DCHECK(CalledOnValidThread());
    345   default_behavior_ = behavior;
    346 }
    347 
    348 void SupervisedUserURLFilter::LoadWhitelists(
    349     ScopedVector<SupervisedUserSiteList> site_lists) {
    350   DCHECK(CalledOnValidThread());
    351 
    352   base::PostTaskAndReplyWithResult(
    353       BrowserThread::GetBlockingPool(),
    354       FROM_HERE,
    355       base::Bind(&LoadWhitelistsOnBlockingPoolThread,
    356                  base::Passed(&site_lists)),
    357       base::Bind(&SupervisedUserURLFilter::SetContents, this));
    358 }
    359 
    360 void SupervisedUserURLFilter::SetBlacklist(SupervisedUserBlacklist* blacklist) {
    361   blacklist_ = blacklist;
    362 }
    363 
    364 void SupervisedUserURLFilter::SetFromPatterns(
    365     const std::vector<std::string>& patterns) {
    366   DCHECK(CalledOnValidThread());
    367 
    368   base::PostTaskAndReplyWithResult(
    369       BrowserThread::GetBlockingPool(),
    370       FROM_HERE,
    371       base::Bind(&CreateWhitelistFromPatterns, patterns),
    372       base::Bind(&SupervisedUserURLFilter::SetContents, this));
    373 }
    374 
    375 void SupervisedUserURLFilter::SetManualHosts(
    376     const std::map<std::string, bool>* host_map) {
    377   DCHECK(CalledOnValidThread());
    378   host_map_ = *host_map;
    379   UMA_HISTOGRAM_CUSTOM_COUNTS("ManagedMode.ManualHostsEntries",
    380                               host_map->size(), 1, 1000, 50);
    381 }
    382 
    383 void SupervisedUserURLFilter::SetManualURLs(
    384     const std::map<GURL, bool>* url_map) {
    385   DCHECK(CalledOnValidThread());
    386   url_map_ = *url_map;
    387   UMA_HISTOGRAM_CUSTOM_COUNTS("ManagedMode.ManualURLsEntries",
    388                               url_map->size(), 1, 1000, 50);
    389 }
    390 
    391 void SupervisedUserURLFilter::AddObserver(Observer* observer) {
    392   observers_.AddObserver(observer);
    393 }
    394 
    395 void SupervisedUserURLFilter::RemoveObserver(Observer* observer) {
    396   observers_.RemoveObserver(observer);
    397 }
    398 
    399 void SupervisedUserURLFilter::SetContents(scoped_ptr<Contents> contents) {
    400   DCHECK(CalledOnValidThread());
    401   contents_ = contents.Pass();
    402   FOR_EACH_OBSERVER(Observer, observers_, OnSiteListUpdated());
    403 }
    404