Home | History | Annotate | Download | only in browser
      1 // Copyright 2014 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "components/policy/core/browser/url_blacklist_manager.h"
      6 
      7 #include "base/bind.h"
      8 #include "base/files/file_path.h"
      9 #include "base/location.h"
     10 #include "base/message_loop/message_loop_proxy.h"
     11 #include "base/prefs/pref_service.h"
     12 #include "base/sequenced_task_runner.h"
     13 #include "base/stl_util.h"
     14 #include "base/strings/string_number_conversions.h"
     15 #include "base/task_runner_util.h"
     16 #include "base/values.h"
     17 #include "components/policy/core/common/policy_pref_names.h"
     18 #include "components/pref_registry/pref_registry_syncable.h"
     19 #include "net/base/filename_util.h"
     20 #include "net/base/load_flags.h"
     21 #include "net/base/net_errors.h"
     22 #include "net/url_request/url_request.h"
     23 #include "url/url_constants.h"
     24 #include "url/url_parse.h"
     25 
     26 using url_matcher::URLMatcher;
     27 using url_matcher::URLMatcherCondition;
     28 using url_matcher::URLMatcherConditionFactory;
     29 using url_matcher::URLMatcherConditionSet;
     30 using url_matcher::URLMatcherPortFilter;
     31 using url_matcher::URLMatcherSchemeFilter;
     32 using url_matcher::URLQueryElementMatcherCondition;
     33 
     34 namespace policy {
     35 
     36 namespace {
     37 
     38 // List of schemes of URLs that should not be blocked by the "*" wildcard in
     39 // the blacklist. Note that URLs with these schemes can still be blocked with
     40 // a more specific filter e.g. "chrome-extension://*".
     41 // The schemes are hardcoded here to avoid dependencies on //extensions and
     42 // //chrome.
     43 const char* kBypassBlacklistWildcardForSchemes[] = {
     44   // For internal extension URLs e.g. the Bookmark Manager and the File
     45   // Manager on Chrome OS.
     46   "chrome-extension",
     47 
     48   // NTP on Android.
     49   "chrome-native",
     50 
     51   // NTP on other platforms.
     52   "chrome-search",
     53 };
     54 
     55 // Maximum filters per policy. Filters over this index are ignored.
     56 const size_t kMaxFiltersPerPolicy = 1000;
     57 
     58 // A task that builds the blacklist on a background thread.
     59 scoped_ptr<URLBlacklist> BuildBlacklist(
     60     scoped_ptr<base::ListValue> block,
     61     scoped_ptr<base::ListValue> allow,
     62     URLBlacklist::SegmentURLCallback segment_url) {
     63   scoped_ptr<URLBlacklist> blacklist(new URLBlacklist(segment_url));
     64   blacklist->Block(block.get());
     65   blacklist->Allow(allow.get());
     66   return blacklist.Pass();
     67 }
     68 
     69 // Tokenise the parameter |query| and add appropriate query element matcher
     70 // conditions to the |query_conditions|.
     71 void ProcessQueryToConditions(
     72     url_matcher::URLMatcherConditionFactory* condition_factory,
     73     const std::string& query,
     74     bool allow,
     75     std::set<URLQueryElementMatcherCondition>* query_conditions) {
     76   url::Component query_left = url::MakeRange(0, query.length());
     77   url::Component key;
     78   url::Component value;
     79   // Depending on the filter type being black-list or white-list, the matcher
     80   // choose any or every match. The idea is a URL should be black-listed if
     81   // there is any occurrence of the key value pair. It should be white-listed
     82   // only if every occurrence of the key is followed by the value. This avoids
     83   // situations such as a user appending a white-listed video parameter in the
     84   // end of the query and watching a video of his choice (the last parameter is
     85   // ignored by some web servers like youtube's).
     86   URLQueryElementMatcherCondition::Type match_type =
     87       allow ? URLQueryElementMatcherCondition::MATCH_ALL
     88             : URLQueryElementMatcherCondition::MATCH_ANY;
     89 
     90   while (ExtractQueryKeyValue(query.data(), &query_left, &key, &value)) {
     91     URLQueryElementMatcherCondition::QueryElementType query_element_type =
     92         value.len ? URLQueryElementMatcherCondition::ELEMENT_TYPE_KEY_VALUE
     93                   : URLQueryElementMatcherCondition::ELEMENT_TYPE_KEY;
     94     URLQueryElementMatcherCondition::QueryValueMatchType query_value_match_type;
     95     if (!value.len && key.len && query[key.end() - 1] == '*') {
     96       --key.len;
     97       query_value_match_type =
     98           URLQueryElementMatcherCondition::QUERY_VALUE_MATCH_PREFIX;
     99     } else if (value.len && query[value.end() - 1] == '*') {
    100       --value.len;
    101       query_value_match_type =
    102           URLQueryElementMatcherCondition::QUERY_VALUE_MATCH_PREFIX;
    103     } else {
    104       query_value_match_type =
    105           URLQueryElementMatcherCondition::QUERY_VALUE_MATCH_EXACT;
    106     }
    107     query_conditions->insert(
    108         URLQueryElementMatcherCondition(query.substr(key.begin, key.len),
    109                                         query.substr(value.begin, value.len),
    110                                         query_value_match_type,
    111                                         query_element_type,
    112                                         match_type,
    113                                         condition_factory));
    114   }
    115 }
    116 
    117 bool BypassBlacklistWildcardForURL(const GURL& url) {
    118   const std::string& scheme = url.scheme();
    119   for (size_t i = 0; i < arraysize(kBypassBlacklistWildcardForSchemes); ++i) {
    120     if (scheme == kBypassBlacklistWildcardForSchemes[i])
    121       return true;
    122   }
    123   return false;
    124 }
    125 
    126 }  // namespace
    127 
    128 struct URLBlacklist::FilterComponents {
    129   FilterComponents() : port(0), match_subdomains(true), allow(true) {}
    130   ~FilterComponents() {}
    131 
    132   // Returns true if |this| represents the "*" filter in the blacklist.
    133   bool IsBlacklistWildcard() const {
    134     return !allow && host.empty() && scheme.empty() && path.empty() &&
    135            query.empty() && port == 0 && number_of_key_value_pairs == 0 &&
    136            match_subdomains;
    137   }
    138 
    139   std::string scheme;
    140   std::string host;
    141   uint16 port;
    142   std::string path;
    143   std::string query;
    144   int number_of_key_value_pairs;
    145   bool match_subdomains;
    146   bool allow;
    147 };
    148 
    149 URLBlacklist::URLBlacklist(SegmentURLCallback segment_url)
    150     : segment_url_(segment_url), id_(0), url_matcher_(new URLMatcher) {}
    151 
    152 URLBlacklist::~URLBlacklist() {}
    153 
    154 void URLBlacklist::AddFilters(bool allow,
    155                               const base::ListValue* list) {
    156   URLMatcherConditionSet::Vector all_conditions;
    157   size_t size = std::min(kMaxFiltersPerPolicy, list->GetSize());
    158   for (size_t i = 0; i < size; ++i) {
    159     std::string pattern;
    160     bool success = list->GetString(i, &pattern);
    161     DCHECK(success);
    162     FilterComponents components;
    163     components.allow = allow;
    164     if (!FilterToComponents(segment_url_,
    165                             pattern,
    166                             &components.scheme,
    167                             &components.host,
    168                             &components.match_subdomains,
    169                             &components.port,
    170                             &components.path,
    171                             &components.query)) {
    172       LOG(ERROR) << "Invalid pattern " << pattern;
    173       continue;
    174     }
    175 
    176     scoped_refptr<URLMatcherConditionSet> condition_set =
    177         CreateConditionSet(url_matcher_.get(),
    178                            ++id_,
    179                            components.scheme,
    180                            components.host,
    181                            components.match_subdomains,
    182                            components.port,
    183                            components.path,
    184                            components.query,
    185                            allow);
    186     components.number_of_key_value_pairs =
    187         condition_set->query_conditions().size();
    188     all_conditions.push_back(condition_set);
    189     filters_[id_] = components;
    190   }
    191   url_matcher_->AddConditionSets(all_conditions);
    192 }
    193 
    194 void URLBlacklist::Block(const base::ListValue* filters) {
    195   AddFilters(false, filters);
    196 }
    197 
    198 void URLBlacklist::Allow(const base::ListValue* filters) {
    199   AddFilters(true, filters);
    200 }
    201 
    202 bool URLBlacklist::IsURLBlocked(const GURL& url) const {
    203   std::set<URLMatcherConditionSet::ID> matching_ids =
    204       url_matcher_->MatchURL(url);
    205 
    206   const FilterComponents* max = NULL;
    207   for (std::set<URLMatcherConditionSet::ID>::iterator id = matching_ids.begin();
    208        id != matching_ids.end(); ++id) {
    209     std::map<int, FilterComponents>::const_iterator it = filters_.find(*id);
    210     DCHECK(it != filters_.end());
    211     const FilterComponents& filter = it->second;
    212     if (!max || FilterTakesPrecedence(filter, *max))
    213       max = &filter;
    214   }
    215 
    216   // Default to allow.
    217   if (!max)
    218     return false;
    219 
    220   // Some of the internal Chrome URLs are not affected by the "*" in the
    221   // blacklist. Note that the "*" is the lowest priority filter possible, so
    222   // any higher priority filter will be applied first.
    223   if (max->IsBlacklistWildcard() && BypassBlacklistWildcardForURL(url))
    224     return false;
    225 
    226   return !max->allow;
    227 }
    228 
    229 size_t URLBlacklist::Size() const {
    230   return filters_.size();
    231 }
    232 
    233 // static
    234 bool URLBlacklist::FilterToComponents(SegmentURLCallback segment_url,
    235                                       const std::string& filter,
    236                                       std::string* scheme,
    237                                       std::string* host,
    238                                       bool* match_subdomains,
    239                                       uint16* port,
    240                                       std::string* path,
    241                                       std::string* query) {
    242   url::Parsed parsed;
    243 
    244   if (segment_url(filter, &parsed) == url::kFileScheme) {
    245     base::FilePath file_path;
    246     if (!net::FileURLToFilePath(GURL(filter), &file_path))
    247       return false;
    248 
    249     *scheme = url::kFileScheme;
    250     host->clear();
    251     *match_subdomains = true;
    252     *port = 0;
    253     // Special path when the |filter| is 'file://*'.
    254     *path = (filter == "file://*") ? "" : file_path.AsUTF8Unsafe();
    255 #if defined(FILE_PATH_USES_WIN_SEPARATORS)
    256     // Separators have to be canonicalized on Windows.
    257     std::replace(path->begin(), path->end(), '\\', '/');
    258     *path = "/" + *path;
    259 #endif
    260     return true;
    261   }
    262 
    263   if (!parsed.host.is_nonempty())
    264     return false;
    265 
    266   if (parsed.scheme.is_nonempty())
    267     scheme->assign(filter, parsed.scheme.begin, parsed.scheme.len);
    268   else
    269     scheme->clear();
    270 
    271   host->assign(filter, parsed.host.begin, parsed.host.len);
    272   // Special '*' host, matches all hosts.
    273   if (*host == "*") {
    274     host->clear();
    275     *match_subdomains = true;
    276   } else if ((*host)[0] == '.') {
    277     // A leading dot in the pattern syntax means that we don't want to match
    278     // subdomains.
    279     host->erase(0, 1);
    280     *match_subdomains = false;
    281   } else {
    282     url::RawCanonOutputT<char> output;
    283     url::CanonHostInfo host_info;
    284     url::CanonicalizeHostVerbose(filter.c_str(), parsed.host, &output,
    285                                  &host_info);
    286     if (host_info.family == url::CanonHostInfo::NEUTRAL) {
    287       // We want to match subdomains. Add a dot in front to make sure we only
    288       // match at domain component boundaries.
    289       *host = "." + *host;
    290       *match_subdomains = true;
    291     } else {
    292       *match_subdomains = false;
    293     }
    294   }
    295 
    296   if (parsed.port.is_nonempty()) {
    297     int int_port;
    298     if (!base::StringToInt(filter.substr(parsed.port.begin, parsed.port.len),
    299                            &int_port)) {
    300       return false;
    301     }
    302     if (int_port <= 0 || int_port > kuint16max)
    303       return false;
    304     *port = int_port;
    305   } else {
    306     // Match any port.
    307     *port = 0;
    308   }
    309 
    310   if (parsed.path.is_nonempty())
    311     path->assign(filter, parsed.path.begin, parsed.path.len);
    312   else
    313     path->clear();
    314 
    315   if (query) {
    316     if (parsed.query.is_nonempty())
    317       query->assign(filter, parsed.query.begin, parsed.query.len);
    318     else
    319       query->clear();
    320   }
    321 
    322   return true;
    323 }
    324 
    325 // static
    326 scoped_refptr<URLMatcherConditionSet> URLBlacklist::CreateConditionSet(
    327     URLMatcher* url_matcher,
    328     int id,
    329     const std::string& scheme,
    330     const std::string& host,
    331     bool match_subdomains,
    332     uint16 port,
    333     const std::string& path,
    334     const std::string& query,
    335     bool allow) {
    336   URLMatcherConditionFactory* condition_factory =
    337       url_matcher->condition_factory();
    338   std::set<URLMatcherCondition> conditions;
    339   conditions.insert(match_subdomains ?
    340       condition_factory->CreateHostSuffixPathPrefixCondition(host, path) :
    341       condition_factory->CreateHostEqualsPathPrefixCondition(host, path));
    342 
    343   std::set<URLQueryElementMatcherCondition> query_conditions;
    344   if (!query.empty()) {
    345     ProcessQueryToConditions(
    346         condition_factory, query, allow, &query_conditions);
    347   }
    348 
    349   scoped_ptr<URLMatcherSchemeFilter> scheme_filter;
    350   if (!scheme.empty())
    351     scheme_filter.reset(new URLMatcherSchemeFilter(scheme));
    352 
    353   scoped_ptr<URLMatcherPortFilter> port_filter;
    354   if (port != 0) {
    355     std::vector<URLMatcherPortFilter::Range> ranges;
    356     ranges.push_back(URLMatcherPortFilter::CreateRange(port));
    357     port_filter.reset(new URLMatcherPortFilter(ranges));
    358   }
    359 
    360   return new URLMatcherConditionSet(id,
    361                                     conditions,
    362                                     query_conditions,
    363                                     scheme_filter.Pass(),
    364                                     port_filter.Pass());
    365 }
    366 
    367 // static
    368 bool URLBlacklist::FilterTakesPrecedence(const FilterComponents& lhs,
    369                                          const FilterComponents& rhs) {
    370   // The "*" wildcard is the lowest priority filter.
    371   if (rhs.IsBlacklistWildcard())
    372     return true;
    373 
    374   if (lhs.match_subdomains && !rhs.match_subdomains)
    375     return false;
    376   if (!lhs.match_subdomains && rhs.match_subdomains)
    377     return true;
    378 
    379   size_t host_length = lhs.host.length();
    380   size_t other_host_length = rhs.host.length();
    381   if (host_length != other_host_length)
    382     return host_length > other_host_length;
    383 
    384   size_t path_length = lhs.path.length();
    385   size_t other_path_length = rhs.path.length();
    386   if (path_length != other_path_length)
    387     return path_length > other_path_length;
    388 
    389   if (lhs.number_of_key_value_pairs != rhs.number_of_key_value_pairs)
    390     return lhs.number_of_key_value_pairs > rhs.number_of_key_value_pairs;
    391 
    392   if (lhs.allow && !rhs.allow)
    393     return true;
    394 
    395   return false;
    396 }
    397 
    398 URLBlacklistManager::URLBlacklistManager(
    399     PrefService* pref_service,
    400     const scoped_refptr<base::SequencedTaskRunner>& background_task_runner,
    401     const scoped_refptr<base::SequencedTaskRunner>& io_task_runner,
    402     URLBlacklist::SegmentURLCallback segment_url,
    403     OverrideBlacklistCallback override_blacklist)
    404     : pref_service_(pref_service),
    405       background_task_runner_(background_task_runner),
    406       io_task_runner_(io_task_runner),
    407       segment_url_(segment_url),
    408       override_blacklist_(override_blacklist),
    409       ui_task_runner_(base::MessageLoopProxy::current()),
    410       blacklist_(new URLBlacklist(segment_url)),
    411       ui_weak_ptr_factory_(this),
    412       io_weak_ptr_factory_(this) {
    413   pref_change_registrar_.Init(pref_service_);
    414   base::Closure callback = base::Bind(&URLBlacklistManager::ScheduleUpdate,
    415                                       base::Unretained(this));
    416   pref_change_registrar_.Add(policy_prefs::kUrlBlacklist, callback);
    417   pref_change_registrar_.Add(policy_prefs::kUrlWhitelist, callback);
    418 
    419   // Start enforcing the policies without a delay when they are present at
    420   // startup.
    421   if (pref_service_->HasPrefPath(policy_prefs::kUrlBlacklist))
    422     Update();
    423 }
    424 
    425 void URLBlacklistManager::ShutdownOnUIThread() {
    426   DCHECK(ui_task_runner_->RunsTasksOnCurrentThread());
    427   // Cancel any pending updates, and stop listening for pref change updates.
    428   ui_weak_ptr_factory_.InvalidateWeakPtrs();
    429   pref_change_registrar_.RemoveAll();
    430 }
    431 
    432 URLBlacklistManager::~URLBlacklistManager() {
    433 }
    434 
    435 void URLBlacklistManager::ScheduleUpdate() {
    436   DCHECK(ui_task_runner_->RunsTasksOnCurrentThread());
    437   // Cancel pending updates, if any. This can happen if two preferences that
    438   // change the blacklist are updated in one message loop cycle. In those cases,
    439   // only rebuild the blacklist after all the preference updates are processed.
    440   ui_weak_ptr_factory_.InvalidateWeakPtrs();
    441   ui_task_runner_->PostTask(
    442       FROM_HERE,
    443       base::Bind(&URLBlacklistManager::Update,
    444                  ui_weak_ptr_factory_.GetWeakPtr()));
    445 }
    446 
    447 void URLBlacklistManager::Update() {
    448   DCHECK(ui_task_runner_->RunsTasksOnCurrentThread());
    449 
    450   // The preferences can only be read on the UI thread.
    451   scoped_ptr<base::ListValue> block(
    452       pref_service_->GetList(policy_prefs::kUrlBlacklist)->DeepCopy());
    453   scoped_ptr<base::ListValue> allow(
    454       pref_service_->GetList(policy_prefs::kUrlWhitelist)->DeepCopy());
    455 
    456   // Go through the IO thread to grab a WeakPtr to |this|. This is safe from
    457   // here, since this task will always execute before a potential deletion of
    458   // ProfileIOData on IO.
    459   io_task_runner_->PostTask(FROM_HERE,
    460                             base::Bind(&URLBlacklistManager::UpdateOnIO,
    461                                        base::Unretained(this),
    462                                        base::Passed(&block),
    463                                        base::Passed(&allow)));
    464 }
    465 
    466 void URLBlacklistManager::UpdateOnIO(scoped_ptr<base::ListValue> block,
    467                                      scoped_ptr<base::ListValue> allow) {
    468   DCHECK(io_task_runner_->RunsTasksOnCurrentThread());
    469   // The URLBlacklist is built on a worker thread. Once it's ready, it is passed
    470   // to the URLBlacklistManager on IO.
    471   base::PostTaskAndReplyWithResult(
    472       background_task_runner_.get(),
    473       FROM_HERE,
    474       base::Bind(&BuildBlacklist,
    475                  base::Passed(&block),
    476                  base::Passed(&allow),
    477                  segment_url_),
    478       base::Bind(&URLBlacklistManager::SetBlacklist,
    479                  io_weak_ptr_factory_.GetWeakPtr()));
    480 }
    481 
    482 void URLBlacklistManager::SetBlacklist(scoped_ptr<URLBlacklist> blacklist) {
    483   DCHECK(io_task_runner_->RunsTasksOnCurrentThread());
    484   blacklist_ = blacklist.Pass();
    485 }
    486 
    487 bool URLBlacklistManager::IsURLBlocked(const GURL& url) const {
    488   DCHECK(io_task_runner_->RunsTasksOnCurrentThread());
    489   return blacklist_->IsURLBlocked(url);
    490 }
    491 
    492 bool URLBlacklistManager::IsRequestBlocked(
    493     const net::URLRequest& request, int* reason) const {
    494   DCHECK(io_task_runner_->RunsTasksOnCurrentThread());
    495 #if !defined(OS_IOS)
    496   // TODO(joaodasilva): iOS doesn't set these flags. http://crbug.com/338283
    497   int filter_flags = net::LOAD_MAIN_FRAME | net::LOAD_SUB_FRAME;
    498   if ((request.load_flags() & filter_flags) == 0)
    499     return false;
    500 #endif
    501 
    502   bool block = false;
    503   if (override_blacklist_.Run(request.url(), &block, reason))
    504     return block;
    505 
    506   *reason = net::ERR_BLOCKED_BY_ADMINISTRATOR;
    507   return IsURLBlocked(request.url());
    508 }
    509 
    510 // static
    511 void URLBlacklistManager::RegisterProfilePrefs(
    512     user_prefs::PrefRegistrySyncable* registry) {
    513   registry->RegisterListPref(policy_prefs::kUrlBlacklist,
    514                              user_prefs::PrefRegistrySyncable::UNSYNCABLE_PREF);
    515   registry->RegisterListPref(policy_prefs::kUrlWhitelist,
    516                              user_prefs::PrefRegistrySyncable::UNSYNCABLE_PREF);
    517 }
    518 
    519 }  // namespace policy
    520