Home | History | Annotate | Download | only in browser
      1 // Copyright 2014 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "components/policy/core/browser/url_blacklist_manager.h"
      6 
      7 #include "base/bind.h"
      8 #include "base/files/file_path.h"
      9 #include "base/location.h"
     10 #include "base/message_loop/message_loop_proxy.h"
     11 #include "base/prefs/pref_service.h"
     12 #include "base/sequenced_task_runner.h"
     13 #include "base/stl_util.h"
     14 #include "base/strings/string_number_conversions.h"
     15 #include "base/task_runner_util.h"
     16 #include "base/values.h"
     17 #include "components/policy/core/common/policy_pref_names.h"
     18 #include "components/pref_registry/pref_registry_syncable.h"
     19 #include "net/base/filename_util.h"
     20 #include "net/base/load_flags.h"
     21 #include "net/base/net_errors.h"
     22 #include "net/url_request/url_request.h"
     23 #include "url/url_parse.h"
     24 
     25 using url_matcher::URLMatcher;
     26 using url_matcher::URLMatcherCondition;
     27 using url_matcher::URLMatcherConditionFactory;
     28 using url_matcher::URLMatcherConditionSet;
     29 using url_matcher::URLMatcherPortFilter;
     30 using url_matcher::URLMatcherSchemeFilter;
     31 using url_matcher::URLQueryElementMatcherCondition;
     32 
     33 namespace policy {
     34 
     35 namespace {
     36 
     37 const char kFileScheme[] = "file";
     38 
     39 // Maximum filters per policy. Filters over this index are ignored.
     40 const size_t kMaxFiltersPerPolicy = 1000;
     41 
     42 // A task that builds the blacklist on a background thread.
     43 scoped_ptr<URLBlacklist> BuildBlacklist(
     44     scoped_ptr<base::ListValue> block,
     45     scoped_ptr<base::ListValue> allow,
     46     URLBlacklist::SegmentURLCallback segment_url) {
     47   scoped_ptr<URLBlacklist> blacklist(new URLBlacklist(segment_url));
     48   blacklist->Block(block.get());
     49   blacklist->Allow(allow.get());
     50   return blacklist.Pass();
     51 }
     52 
     53 // Tokenise the parameter |query| and add appropriate query element matcher
     54 // conditions to the |query_conditions|.
     55 void ProcessQueryToConditions(
     56     url_matcher::URLMatcherConditionFactory* condition_factory,
     57     const std::string& query,
     58     bool allow,
     59     std::set<URLQueryElementMatcherCondition>* query_conditions) {
     60   url::Component query_left = url::MakeRange(0, query.length());
     61   url::Component key;
     62   url::Component value;
     63   // Depending on the filter type being black-list or white-list, the matcher
     64   // choose any or every match. The idea is a URL should be black-listed if
     65   // there is any occurrence of the key value pair. It should be white-listed
     66   // only if every occurrence of the key is followed by the value. This avoids
     67   // situations such as a user appending a white-listed video parameter in the
     68   // end of the query and watching a video of his choice (the last parameter is
     69   // ignored by some web servers like youtube's).
     70   URLQueryElementMatcherCondition::Type match_type =
     71       allow ? URLQueryElementMatcherCondition::MATCH_ALL
     72             : URLQueryElementMatcherCondition::MATCH_ANY;
     73 
     74   while (ExtractQueryKeyValue(query.data(), &query_left, &key, &value)) {
     75     URLQueryElementMatcherCondition::QueryElementType query_element_type =
     76         value.len ? URLQueryElementMatcherCondition::ELEMENT_TYPE_KEY_VALUE
     77                   : URLQueryElementMatcherCondition::ELEMENT_TYPE_KEY;
     78     URLQueryElementMatcherCondition::QueryValueMatchType query_value_match_type;
     79     if (!value.len && key.len && query[key.end() - 1] == '*') {
     80       --key.len;
     81       query_value_match_type =
     82           URLQueryElementMatcherCondition::QUERY_VALUE_MATCH_PREFIX;
     83     } else if (value.len && query[value.end() - 1] == '*') {
     84       --value.len;
     85       query_value_match_type =
     86           URLQueryElementMatcherCondition::QUERY_VALUE_MATCH_PREFIX;
     87     } else {
     88       query_value_match_type =
     89           URLQueryElementMatcherCondition::QUERY_VALUE_MATCH_EXACT;
     90     }
     91     query_conditions->insert(
     92         URLQueryElementMatcherCondition(query.substr(key.begin, key.len),
     93                                         query.substr(value.begin, value.len),
     94                                         query_value_match_type,
     95                                         query_element_type,
     96                                         match_type,
     97                                         condition_factory));
     98   }
     99 }
    100 
    101 }  // namespace
    102 
    103 struct URLBlacklist::FilterComponents {
    104   FilterComponents() : port(0), match_subdomains(true), allow(true) {}
    105   ~FilterComponents() {}
    106 
    107   std::string scheme;
    108   std::string host;
    109   uint16 port;
    110   std::string path;
    111   std::string query;
    112   int number_of_key_value_pairs;
    113   bool match_subdomains;
    114   bool allow;
    115 };
    116 
    117 URLBlacklist::URLBlacklist(SegmentURLCallback segment_url)
    118     : segment_url_(segment_url), id_(0), url_matcher_(new URLMatcher) {}
    119 
    120 URLBlacklist::~URLBlacklist() {}
    121 
    122 void URLBlacklist::AddFilters(bool allow,
    123                               const base::ListValue* list) {
    124   URLMatcherConditionSet::Vector all_conditions;
    125   size_t size = std::min(kMaxFiltersPerPolicy, list->GetSize());
    126   for (size_t i = 0; i < size; ++i) {
    127     std::string pattern;
    128     bool success = list->GetString(i, &pattern);
    129     DCHECK(success);
    130     FilterComponents components;
    131     components.allow = allow;
    132     if (!FilterToComponents(segment_url_,
    133                             pattern,
    134                             &components.scheme,
    135                             &components.host,
    136                             &components.match_subdomains,
    137                             &components.port,
    138                             &components.path,
    139                             &components.query)) {
    140       LOG(ERROR) << "Invalid pattern " << pattern;
    141       continue;
    142     }
    143 
    144     scoped_refptr<URLMatcherConditionSet> condition_set =
    145         CreateConditionSet(url_matcher_.get(),
    146                            ++id_,
    147                            components.scheme,
    148                            components.host,
    149                            components.match_subdomains,
    150                            components.port,
    151                            components.path,
    152                            components.query,
    153                            allow);
    154     components.number_of_key_value_pairs =
    155         condition_set->query_conditions().size();
    156     all_conditions.push_back(condition_set);
    157     filters_[id_] = components;
    158   }
    159   url_matcher_->AddConditionSets(all_conditions);
    160 }
    161 
    162 void URLBlacklist::Block(const base::ListValue* filters) {
    163   AddFilters(false, filters);
    164 }
    165 
    166 void URLBlacklist::Allow(const base::ListValue* filters) {
    167   AddFilters(true, filters);
    168 }
    169 
    170 bool URLBlacklist::IsURLBlocked(const GURL& url) const {
    171   std::set<URLMatcherConditionSet::ID> matching_ids =
    172       url_matcher_->MatchURL(url);
    173 
    174   const FilterComponents* max = NULL;
    175   for (std::set<URLMatcherConditionSet::ID>::iterator id = matching_ids.begin();
    176        id != matching_ids.end(); ++id) {
    177     std::map<int, FilterComponents>::const_iterator it = filters_.find(*id);
    178     DCHECK(it != filters_.end());
    179     const FilterComponents& filter = it->second;
    180     if (!max || FilterTakesPrecedence(filter, *max))
    181       max = &filter;
    182   }
    183 
    184   // Default to allow.
    185   if (!max)
    186     return false;
    187 
    188   return !max->allow;
    189 }
    190 
    191 size_t URLBlacklist::Size() const {
    192   return filters_.size();
    193 }
    194 
    195 // static
    196 bool URLBlacklist::FilterToComponents(SegmentURLCallback segment_url,
    197                                       const std::string& filter,
    198                                       std::string* scheme,
    199                                       std::string* host,
    200                                       bool* match_subdomains,
    201                                       uint16* port,
    202                                       std::string* path,
    203                                       std::string* query) {
    204   url::Parsed parsed;
    205 
    206   if (segment_url(filter, &parsed) == kFileScheme) {
    207     base::FilePath file_path;
    208     if (!net::FileURLToFilePath(GURL(filter), &file_path))
    209       return false;
    210 
    211     *scheme = kFileScheme;
    212     host->clear();
    213     *match_subdomains = true;
    214     *port = 0;
    215     // Special path when the |filter| is 'file://*'.
    216     *path = (filter == "file://*") ? "" : file_path.AsUTF8Unsafe();
    217 #if defined(FILE_PATH_USES_WIN_SEPARATORS)
    218     // Separators have to be canonicalized on Windows.
    219     std::replace(path->begin(), path->end(), '\\', '/');
    220     *path = "/" + *path;
    221 #endif
    222     return true;
    223   }
    224 
    225   if (!parsed.host.is_nonempty())
    226     return false;
    227 
    228   if (parsed.scheme.is_nonempty())
    229     scheme->assign(filter, parsed.scheme.begin, parsed.scheme.len);
    230   else
    231     scheme->clear();
    232 
    233   host->assign(filter, parsed.host.begin, parsed.host.len);
    234   // Special '*' host, matches all hosts.
    235   if (*host == "*") {
    236     host->clear();
    237     *match_subdomains = true;
    238   } else if ((*host)[0] == '.') {
    239     // A leading dot in the pattern syntax means that we don't want to match
    240     // subdomains.
    241     host->erase(0, 1);
    242     *match_subdomains = false;
    243   } else {
    244     url::RawCanonOutputT<char> output;
    245     url::CanonHostInfo host_info;
    246     url::CanonicalizeHostVerbose(filter.c_str(), parsed.host, &output,
    247                                  &host_info);
    248     if (host_info.family == url::CanonHostInfo::NEUTRAL) {
    249       // We want to match subdomains. Add a dot in front to make sure we only
    250       // match at domain component boundaries.
    251       *host = "." + *host;
    252       *match_subdomains = true;
    253     } else {
    254       *match_subdomains = false;
    255     }
    256   }
    257 
    258   if (parsed.port.is_nonempty()) {
    259     int int_port;
    260     if (!base::StringToInt(filter.substr(parsed.port.begin, parsed.port.len),
    261                            &int_port)) {
    262       return false;
    263     }
    264     if (int_port <= 0 || int_port > kuint16max)
    265       return false;
    266     *port = int_port;
    267   } else {
    268     // Match any port.
    269     *port = 0;
    270   }
    271 
    272   if (parsed.path.is_nonempty())
    273     path->assign(filter, parsed.path.begin, parsed.path.len);
    274   else
    275     path->clear();
    276 
    277   if (query) {
    278     if (parsed.query.is_nonempty())
    279       query->assign(filter, parsed.query.begin, parsed.query.len);
    280     else
    281       query->clear();
    282   }
    283 
    284   return true;
    285 }
    286 
    287 // static
    288 scoped_refptr<URLMatcherConditionSet> URLBlacklist::CreateConditionSet(
    289     URLMatcher* url_matcher,
    290     int id,
    291     const std::string& scheme,
    292     const std::string& host,
    293     bool match_subdomains,
    294     uint16 port,
    295     const std::string& path,
    296     const std::string& query,
    297     bool allow) {
    298   URLMatcherConditionFactory* condition_factory =
    299       url_matcher->condition_factory();
    300   std::set<URLMatcherCondition> conditions;
    301   conditions.insert(match_subdomains ?
    302       condition_factory->CreateHostSuffixPathPrefixCondition(host, path) :
    303       condition_factory->CreateHostEqualsPathPrefixCondition(host, path));
    304 
    305   std::set<URLQueryElementMatcherCondition> query_conditions;
    306   if (!query.empty()) {
    307     ProcessQueryToConditions(
    308         condition_factory, query, allow, &query_conditions);
    309   }
    310 
    311   scoped_ptr<URLMatcherSchemeFilter> scheme_filter;
    312   if (!scheme.empty())
    313     scheme_filter.reset(new URLMatcherSchemeFilter(scheme));
    314 
    315   scoped_ptr<URLMatcherPortFilter> port_filter;
    316   if (port != 0) {
    317     std::vector<URLMatcherPortFilter::Range> ranges;
    318     ranges.push_back(URLMatcherPortFilter::CreateRange(port));
    319     port_filter.reset(new URLMatcherPortFilter(ranges));
    320   }
    321 
    322   return new URLMatcherConditionSet(id,
    323                                     conditions,
    324                                     query_conditions,
    325                                     scheme_filter.Pass(),
    326                                     port_filter.Pass());
    327 }
    328 
    329 // static
    330 bool URLBlacklist::FilterTakesPrecedence(const FilterComponents& lhs,
    331                                          const FilterComponents& rhs) {
    332   if (lhs.match_subdomains && !rhs.match_subdomains)
    333     return false;
    334   if (!lhs.match_subdomains && rhs.match_subdomains)
    335     return true;
    336 
    337   size_t host_length = lhs.host.length();
    338   size_t other_host_length = rhs.host.length();
    339   if (host_length != other_host_length)
    340     return host_length > other_host_length;
    341 
    342   size_t path_length = lhs.path.length();
    343   size_t other_path_length = rhs.path.length();
    344   if (path_length != other_path_length)
    345     return path_length > other_path_length;
    346 
    347   if (lhs.number_of_key_value_pairs != rhs.number_of_key_value_pairs)
    348     return lhs.number_of_key_value_pairs > rhs.number_of_key_value_pairs;
    349 
    350   if (lhs.allow && !rhs.allow)
    351     return true;
    352 
    353   return false;
    354 }
    355 
    356 URLBlacklistManager::URLBlacklistManager(
    357     PrefService* pref_service,
    358     const scoped_refptr<base::SequencedTaskRunner>& background_task_runner,
    359     const scoped_refptr<base::SequencedTaskRunner>& io_task_runner,
    360     URLBlacklist::SegmentURLCallback segment_url,
    361     OverrideBlacklistCallback override_blacklist)
    362     : ui_weak_ptr_factory_(this),
    363       pref_service_(pref_service),
    364       background_task_runner_(background_task_runner),
    365       io_task_runner_(io_task_runner),
    366       segment_url_(segment_url),
    367       override_blacklist_(override_blacklist),
    368       io_weak_ptr_factory_(this),
    369       ui_task_runner_(base::MessageLoopProxy::current()),
    370       blacklist_(new URLBlacklist(segment_url)) {
    371   pref_change_registrar_.Init(pref_service_);
    372   base::Closure callback = base::Bind(&URLBlacklistManager::ScheduleUpdate,
    373                                       base::Unretained(this));
    374   pref_change_registrar_.Add(policy_prefs::kUrlBlacklist, callback);
    375   pref_change_registrar_.Add(policy_prefs::kUrlWhitelist, callback);
    376 
    377   // Start enforcing the policies without a delay when they are present at
    378   // startup.
    379   if (pref_service_->HasPrefPath(policy_prefs::kUrlBlacklist))
    380     Update();
    381 }
    382 
    383 void URLBlacklistManager::ShutdownOnUIThread() {
    384   DCHECK(ui_task_runner_->RunsTasksOnCurrentThread());
    385   // Cancel any pending updates, and stop listening for pref change updates.
    386   ui_weak_ptr_factory_.InvalidateWeakPtrs();
    387   pref_change_registrar_.RemoveAll();
    388 }
    389 
    390 URLBlacklistManager::~URLBlacklistManager() {
    391 }
    392 
    393 void URLBlacklistManager::ScheduleUpdate() {
    394   DCHECK(ui_task_runner_->RunsTasksOnCurrentThread());
    395   // Cancel pending updates, if any. This can happen if two preferences that
    396   // change the blacklist are updated in one message loop cycle. In those cases,
    397   // only rebuild the blacklist after all the preference updates are processed.
    398   ui_weak_ptr_factory_.InvalidateWeakPtrs();
    399   ui_task_runner_->PostTask(
    400       FROM_HERE,
    401       base::Bind(&URLBlacklistManager::Update,
    402                  ui_weak_ptr_factory_.GetWeakPtr()));
    403 }
    404 
    405 void URLBlacklistManager::Update() {
    406   DCHECK(ui_task_runner_->RunsTasksOnCurrentThread());
    407 
    408   // The preferences can only be read on the UI thread.
    409   scoped_ptr<base::ListValue> block(
    410       pref_service_->GetList(policy_prefs::kUrlBlacklist)->DeepCopy());
    411   scoped_ptr<base::ListValue> allow(
    412       pref_service_->GetList(policy_prefs::kUrlWhitelist)->DeepCopy());
    413 
    414   // Go through the IO thread to grab a WeakPtr to |this|. This is safe from
    415   // here, since this task will always execute before a potential deletion of
    416   // ProfileIOData on IO.
    417   io_task_runner_->PostTask(FROM_HERE,
    418                             base::Bind(&URLBlacklistManager::UpdateOnIO,
    419                                        base::Unretained(this),
    420                                        base::Passed(&block),
    421                                        base::Passed(&allow)));
    422 }
    423 
    424 void URLBlacklistManager::UpdateOnIO(scoped_ptr<base::ListValue> block,
    425                                      scoped_ptr<base::ListValue> allow) {
    426   DCHECK(io_task_runner_->RunsTasksOnCurrentThread());
    427   // The URLBlacklist is built on a worker thread. Once it's ready, it is passed
    428   // to the URLBlacklistManager on IO.
    429   base::PostTaskAndReplyWithResult(
    430       background_task_runner_,
    431       FROM_HERE,
    432       base::Bind(&BuildBlacklist,
    433                  base::Passed(&block),
    434                  base::Passed(&allow),
    435                  segment_url_),
    436       base::Bind(&URLBlacklistManager::SetBlacklist,
    437                  io_weak_ptr_factory_.GetWeakPtr()));
    438 }
    439 
    440 void URLBlacklistManager::SetBlacklist(scoped_ptr<URLBlacklist> blacklist) {
    441   DCHECK(io_task_runner_->RunsTasksOnCurrentThread());
    442   blacklist_ = blacklist.Pass();
    443 }
    444 
    445 bool URLBlacklistManager::IsURLBlocked(const GURL& url) const {
    446   DCHECK(io_task_runner_->RunsTasksOnCurrentThread());
    447   return blacklist_->IsURLBlocked(url);
    448 }
    449 
    450 bool URLBlacklistManager::IsRequestBlocked(
    451     const net::URLRequest& request, int* reason) const {
    452   DCHECK(io_task_runner_->RunsTasksOnCurrentThread());
    453 #if !defined(OS_IOS)
    454   // TODO(joaodasilva): iOS doesn't set these flags. http://crbug.com/338283
    455   int filter_flags = net::LOAD_MAIN_FRAME | net::LOAD_SUB_FRAME;
    456   if ((request.load_flags() & filter_flags) == 0)
    457     return false;
    458 #endif
    459 
    460   bool block = false;
    461   if (override_blacklist_.Run(request.url(), &block, reason))
    462     return block;
    463 
    464   *reason = net::ERR_BLOCKED_BY_ADMINISTRATOR;
    465   return IsURLBlocked(request.url());
    466 }
    467 
    468 // static
    469 void URLBlacklistManager::RegisterProfilePrefs(
    470     user_prefs::PrefRegistrySyncable* registry) {
    471   registry->RegisterListPref(policy_prefs::kUrlBlacklist,
    472                              user_prefs::PrefRegistrySyncable::UNSYNCABLE_PREF);
    473   registry->RegisterListPref(policy_prefs::kUrlWhitelist,
    474                              user_prefs::PrefRegistrySyncable::UNSYNCABLE_PREF);
    475 }
    476 
    477 }  // namespace policy
    478