1 // Copyright 2014 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "components/policy/core/browser/url_blacklist_manager.h" 6 7 #include "base/bind.h" 8 #include "base/files/file_path.h" 9 #include "base/location.h" 10 #include "base/message_loop/message_loop_proxy.h" 11 #include "base/prefs/pref_service.h" 12 #include "base/sequenced_task_runner.h" 13 #include "base/stl_util.h" 14 #include "base/strings/string_number_conversions.h" 15 #include "base/task_runner_util.h" 16 #include "base/values.h" 17 #include "components/policy/core/common/policy_pref_names.h" 18 #include "components/pref_registry/pref_registry_syncable.h" 19 #include "net/base/filename_util.h" 20 #include "net/base/load_flags.h" 21 #include "net/base/net_errors.h" 22 #include "net/url_request/url_request.h" 23 #include "url/url_parse.h" 24 25 using url_matcher::URLMatcher; 26 using url_matcher::URLMatcherCondition; 27 using url_matcher::URLMatcherConditionFactory; 28 using url_matcher::URLMatcherConditionSet; 29 using url_matcher::URLMatcherPortFilter; 30 using url_matcher::URLMatcherSchemeFilter; 31 using url_matcher::URLQueryElementMatcherCondition; 32 33 namespace policy { 34 35 namespace { 36 37 const char kFileScheme[] = "file"; 38 39 // Maximum filters per policy. Filters over this index are ignored. 40 const size_t kMaxFiltersPerPolicy = 1000; 41 42 // A task that builds the blacklist on a background thread. 43 scoped_ptr<URLBlacklist> BuildBlacklist( 44 scoped_ptr<base::ListValue> block, 45 scoped_ptr<base::ListValue> allow, 46 URLBlacklist::SegmentURLCallback segment_url) { 47 scoped_ptr<URLBlacklist> blacklist(new URLBlacklist(segment_url)); 48 blacklist->Block(block.get()); 49 blacklist->Allow(allow.get()); 50 return blacklist.Pass(); 51 } 52 53 // Tokenise the parameter |query| and add appropriate query element matcher 54 // conditions to the |query_conditions|. 55 void ProcessQueryToConditions( 56 url_matcher::URLMatcherConditionFactory* condition_factory, 57 const std::string& query, 58 bool allow, 59 std::set<URLQueryElementMatcherCondition>* query_conditions) { 60 url::Component query_left = url::MakeRange(0, query.length()); 61 url::Component key; 62 url::Component value; 63 // Depending on the filter type being black-list or white-list, the matcher 64 // choose any or every match. The idea is a URL should be black-listed if 65 // there is any occurrence of the key value pair. It should be white-listed 66 // only if every occurrence of the key is followed by the value. This avoids 67 // situations such as a user appending a white-listed video parameter in the 68 // end of the query and watching a video of his choice (the last parameter is 69 // ignored by some web servers like youtube's). 70 URLQueryElementMatcherCondition::Type match_type = 71 allow ? URLQueryElementMatcherCondition::MATCH_ALL 72 : URLQueryElementMatcherCondition::MATCH_ANY; 73 74 while (ExtractQueryKeyValue(query.data(), &query_left, &key, &value)) { 75 URLQueryElementMatcherCondition::QueryElementType query_element_type = 76 value.len ? URLQueryElementMatcherCondition::ELEMENT_TYPE_KEY_VALUE 77 : URLQueryElementMatcherCondition::ELEMENT_TYPE_KEY; 78 URLQueryElementMatcherCondition::QueryValueMatchType query_value_match_type; 79 if (!value.len && key.len && query[key.end() - 1] == '*') { 80 --key.len; 81 query_value_match_type = 82 URLQueryElementMatcherCondition::QUERY_VALUE_MATCH_PREFIX; 83 } else if (value.len && query[value.end() - 1] == '*') { 84 --value.len; 85 query_value_match_type = 86 URLQueryElementMatcherCondition::QUERY_VALUE_MATCH_PREFIX; 87 } else { 88 query_value_match_type = 89 URLQueryElementMatcherCondition::QUERY_VALUE_MATCH_EXACT; 90 } 91 query_conditions->insert( 92 URLQueryElementMatcherCondition(query.substr(key.begin, key.len), 93 query.substr(value.begin, value.len), 94 query_value_match_type, 95 query_element_type, 96 match_type, 97 condition_factory)); 98 } 99 } 100 101 } // namespace 102 103 struct URLBlacklist::FilterComponents { 104 FilterComponents() : port(0), match_subdomains(true), allow(true) {} 105 ~FilterComponents() {} 106 107 std::string scheme; 108 std::string host; 109 uint16 port; 110 std::string path; 111 std::string query; 112 int number_of_key_value_pairs; 113 bool match_subdomains; 114 bool allow; 115 }; 116 117 URLBlacklist::URLBlacklist(SegmentURLCallback segment_url) 118 : segment_url_(segment_url), id_(0), url_matcher_(new URLMatcher) {} 119 120 URLBlacklist::~URLBlacklist() {} 121 122 void URLBlacklist::AddFilters(bool allow, 123 const base::ListValue* list) { 124 URLMatcherConditionSet::Vector all_conditions; 125 size_t size = std::min(kMaxFiltersPerPolicy, list->GetSize()); 126 for (size_t i = 0; i < size; ++i) { 127 std::string pattern; 128 bool success = list->GetString(i, &pattern); 129 DCHECK(success); 130 FilterComponents components; 131 components.allow = allow; 132 if (!FilterToComponents(segment_url_, 133 pattern, 134 &components.scheme, 135 &components.host, 136 &components.match_subdomains, 137 &components.port, 138 &components.path, 139 &components.query)) { 140 LOG(ERROR) << "Invalid pattern " << pattern; 141 continue; 142 } 143 144 scoped_refptr<URLMatcherConditionSet> condition_set = 145 CreateConditionSet(url_matcher_.get(), 146 ++id_, 147 components.scheme, 148 components.host, 149 components.match_subdomains, 150 components.port, 151 components.path, 152 components.query, 153 allow); 154 components.number_of_key_value_pairs = 155 condition_set->query_conditions().size(); 156 all_conditions.push_back(condition_set); 157 filters_[id_] = components; 158 } 159 url_matcher_->AddConditionSets(all_conditions); 160 } 161 162 void URLBlacklist::Block(const base::ListValue* filters) { 163 AddFilters(false, filters); 164 } 165 166 void URLBlacklist::Allow(const base::ListValue* filters) { 167 AddFilters(true, filters); 168 } 169 170 bool URLBlacklist::IsURLBlocked(const GURL& url) const { 171 std::set<URLMatcherConditionSet::ID> matching_ids = 172 url_matcher_->MatchURL(url); 173 174 const FilterComponents* max = NULL; 175 for (std::set<URLMatcherConditionSet::ID>::iterator id = matching_ids.begin(); 176 id != matching_ids.end(); ++id) { 177 std::map<int, FilterComponents>::const_iterator it = filters_.find(*id); 178 DCHECK(it != filters_.end()); 179 const FilterComponents& filter = it->second; 180 if (!max || FilterTakesPrecedence(filter, *max)) 181 max = &filter; 182 } 183 184 // Default to allow. 185 if (!max) 186 return false; 187 188 return !max->allow; 189 } 190 191 size_t URLBlacklist::Size() const { 192 return filters_.size(); 193 } 194 195 // static 196 bool URLBlacklist::FilterToComponents(SegmentURLCallback segment_url, 197 const std::string& filter, 198 std::string* scheme, 199 std::string* host, 200 bool* match_subdomains, 201 uint16* port, 202 std::string* path, 203 std::string* query) { 204 url::Parsed parsed; 205 206 if (segment_url(filter, &parsed) == kFileScheme) { 207 base::FilePath file_path; 208 if (!net::FileURLToFilePath(GURL(filter), &file_path)) 209 return false; 210 211 *scheme = kFileScheme; 212 host->clear(); 213 *match_subdomains = true; 214 *port = 0; 215 // Special path when the |filter| is 'file://*'. 216 *path = (filter == "file://*") ? "" : file_path.AsUTF8Unsafe(); 217 #if defined(FILE_PATH_USES_WIN_SEPARATORS) 218 // Separators have to be canonicalized on Windows. 219 std::replace(path->begin(), path->end(), '\\', '/'); 220 *path = "/" + *path; 221 #endif 222 return true; 223 } 224 225 if (!parsed.host.is_nonempty()) 226 return false; 227 228 if (parsed.scheme.is_nonempty()) 229 scheme->assign(filter, parsed.scheme.begin, parsed.scheme.len); 230 else 231 scheme->clear(); 232 233 host->assign(filter, parsed.host.begin, parsed.host.len); 234 // Special '*' host, matches all hosts. 235 if (*host == "*") { 236 host->clear(); 237 *match_subdomains = true; 238 } else if ((*host)[0] == '.') { 239 // A leading dot in the pattern syntax means that we don't want to match 240 // subdomains. 241 host->erase(0, 1); 242 *match_subdomains = false; 243 } else { 244 url::RawCanonOutputT<char> output; 245 url::CanonHostInfo host_info; 246 url::CanonicalizeHostVerbose(filter.c_str(), parsed.host, &output, 247 &host_info); 248 if (host_info.family == url::CanonHostInfo::NEUTRAL) { 249 // We want to match subdomains. Add a dot in front to make sure we only 250 // match at domain component boundaries. 251 *host = "." + *host; 252 *match_subdomains = true; 253 } else { 254 *match_subdomains = false; 255 } 256 } 257 258 if (parsed.port.is_nonempty()) { 259 int int_port; 260 if (!base::StringToInt(filter.substr(parsed.port.begin, parsed.port.len), 261 &int_port)) { 262 return false; 263 } 264 if (int_port <= 0 || int_port > kuint16max) 265 return false; 266 *port = int_port; 267 } else { 268 // Match any port. 269 *port = 0; 270 } 271 272 if (parsed.path.is_nonempty()) 273 path->assign(filter, parsed.path.begin, parsed.path.len); 274 else 275 path->clear(); 276 277 if (query) { 278 if (parsed.query.is_nonempty()) 279 query->assign(filter, parsed.query.begin, parsed.query.len); 280 else 281 query->clear(); 282 } 283 284 return true; 285 } 286 287 // static 288 scoped_refptr<URLMatcherConditionSet> URLBlacklist::CreateConditionSet( 289 URLMatcher* url_matcher, 290 int id, 291 const std::string& scheme, 292 const std::string& host, 293 bool match_subdomains, 294 uint16 port, 295 const std::string& path, 296 const std::string& query, 297 bool allow) { 298 URLMatcherConditionFactory* condition_factory = 299 url_matcher->condition_factory(); 300 std::set<URLMatcherCondition> conditions; 301 conditions.insert(match_subdomains ? 302 condition_factory->CreateHostSuffixPathPrefixCondition(host, path) : 303 condition_factory->CreateHostEqualsPathPrefixCondition(host, path)); 304 305 std::set<URLQueryElementMatcherCondition> query_conditions; 306 if (!query.empty()) { 307 ProcessQueryToConditions( 308 condition_factory, query, allow, &query_conditions); 309 } 310 311 scoped_ptr<URLMatcherSchemeFilter> scheme_filter; 312 if (!scheme.empty()) 313 scheme_filter.reset(new URLMatcherSchemeFilter(scheme)); 314 315 scoped_ptr<URLMatcherPortFilter> port_filter; 316 if (port != 0) { 317 std::vector<URLMatcherPortFilter::Range> ranges; 318 ranges.push_back(URLMatcherPortFilter::CreateRange(port)); 319 port_filter.reset(new URLMatcherPortFilter(ranges)); 320 } 321 322 return new URLMatcherConditionSet(id, 323 conditions, 324 query_conditions, 325 scheme_filter.Pass(), 326 port_filter.Pass()); 327 } 328 329 // static 330 bool URLBlacklist::FilterTakesPrecedence(const FilterComponents& lhs, 331 const FilterComponents& rhs) { 332 if (lhs.match_subdomains && !rhs.match_subdomains) 333 return false; 334 if (!lhs.match_subdomains && rhs.match_subdomains) 335 return true; 336 337 size_t host_length = lhs.host.length(); 338 size_t other_host_length = rhs.host.length(); 339 if (host_length != other_host_length) 340 return host_length > other_host_length; 341 342 size_t path_length = lhs.path.length(); 343 size_t other_path_length = rhs.path.length(); 344 if (path_length != other_path_length) 345 return path_length > other_path_length; 346 347 if (lhs.number_of_key_value_pairs != rhs.number_of_key_value_pairs) 348 return lhs.number_of_key_value_pairs > rhs.number_of_key_value_pairs; 349 350 if (lhs.allow && !rhs.allow) 351 return true; 352 353 return false; 354 } 355 356 URLBlacklistManager::URLBlacklistManager( 357 PrefService* pref_service, 358 const scoped_refptr<base::SequencedTaskRunner>& background_task_runner, 359 const scoped_refptr<base::SequencedTaskRunner>& io_task_runner, 360 URLBlacklist::SegmentURLCallback segment_url, 361 OverrideBlacklistCallback override_blacklist) 362 : ui_weak_ptr_factory_(this), 363 pref_service_(pref_service), 364 background_task_runner_(background_task_runner), 365 io_task_runner_(io_task_runner), 366 segment_url_(segment_url), 367 override_blacklist_(override_blacklist), 368 io_weak_ptr_factory_(this), 369 ui_task_runner_(base::MessageLoopProxy::current()), 370 blacklist_(new URLBlacklist(segment_url)) { 371 pref_change_registrar_.Init(pref_service_); 372 base::Closure callback = base::Bind(&URLBlacklistManager::ScheduleUpdate, 373 base::Unretained(this)); 374 pref_change_registrar_.Add(policy_prefs::kUrlBlacklist, callback); 375 pref_change_registrar_.Add(policy_prefs::kUrlWhitelist, callback); 376 377 // Start enforcing the policies without a delay when they are present at 378 // startup. 379 if (pref_service_->HasPrefPath(policy_prefs::kUrlBlacklist)) 380 Update(); 381 } 382 383 void URLBlacklistManager::ShutdownOnUIThread() { 384 DCHECK(ui_task_runner_->RunsTasksOnCurrentThread()); 385 // Cancel any pending updates, and stop listening for pref change updates. 386 ui_weak_ptr_factory_.InvalidateWeakPtrs(); 387 pref_change_registrar_.RemoveAll(); 388 } 389 390 URLBlacklistManager::~URLBlacklistManager() { 391 } 392 393 void URLBlacklistManager::ScheduleUpdate() { 394 DCHECK(ui_task_runner_->RunsTasksOnCurrentThread()); 395 // Cancel pending updates, if any. This can happen if two preferences that 396 // change the blacklist are updated in one message loop cycle. In those cases, 397 // only rebuild the blacklist after all the preference updates are processed. 398 ui_weak_ptr_factory_.InvalidateWeakPtrs(); 399 ui_task_runner_->PostTask( 400 FROM_HERE, 401 base::Bind(&URLBlacklistManager::Update, 402 ui_weak_ptr_factory_.GetWeakPtr())); 403 } 404 405 void URLBlacklistManager::Update() { 406 DCHECK(ui_task_runner_->RunsTasksOnCurrentThread()); 407 408 // The preferences can only be read on the UI thread. 409 scoped_ptr<base::ListValue> block( 410 pref_service_->GetList(policy_prefs::kUrlBlacklist)->DeepCopy()); 411 scoped_ptr<base::ListValue> allow( 412 pref_service_->GetList(policy_prefs::kUrlWhitelist)->DeepCopy()); 413 414 // Go through the IO thread to grab a WeakPtr to |this|. This is safe from 415 // here, since this task will always execute before a potential deletion of 416 // ProfileIOData on IO. 417 io_task_runner_->PostTask(FROM_HERE, 418 base::Bind(&URLBlacklistManager::UpdateOnIO, 419 base::Unretained(this), 420 base::Passed(&block), 421 base::Passed(&allow))); 422 } 423 424 void URLBlacklistManager::UpdateOnIO(scoped_ptr<base::ListValue> block, 425 scoped_ptr<base::ListValue> allow) { 426 DCHECK(io_task_runner_->RunsTasksOnCurrentThread()); 427 // The URLBlacklist is built on a worker thread. Once it's ready, it is passed 428 // to the URLBlacklistManager on IO. 429 base::PostTaskAndReplyWithResult( 430 background_task_runner_, 431 FROM_HERE, 432 base::Bind(&BuildBlacklist, 433 base::Passed(&block), 434 base::Passed(&allow), 435 segment_url_), 436 base::Bind(&URLBlacklistManager::SetBlacklist, 437 io_weak_ptr_factory_.GetWeakPtr())); 438 } 439 440 void URLBlacklistManager::SetBlacklist(scoped_ptr<URLBlacklist> blacklist) { 441 DCHECK(io_task_runner_->RunsTasksOnCurrentThread()); 442 blacklist_ = blacklist.Pass(); 443 } 444 445 bool URLBlacklistManager::IsURLBlocked(const GURL& url) const { 446 DCHECK(io_task_runner_->RunsTasksOnCurrentThread()); 447 return blacklist_->IsURLBlocked(url); 448 } 449 450 bool URLBlacklistManager::IsRequestBlocked( 451 const net::URLRequest& request, int* reason) const { 452 DCHECK(io_task_runner_->RunsTasksOnCurrentThread()); 453 #if !defined(OS_IOS) 454 // TODO(joaodasilva): iOS doesn't set these flags. http://crbug.com/338283 455 int filter_flags = net::LOAD_MAIN_FRAME | net::LOAD_SUB_FRAME; 456 if ((request.load_flags() & filter_flags) == 0) 457 return false; 458 #endif 459 460 bool block = false; 461 if (override_blacklist_.Run(request.url(), &block, reason)) 462 return block; 463 464 *reason = net::ERR_BLOCKED_BY_ADMINISTRATOR; 465 return IsURLBlocked(request.url()); 466 } 467 468 // static 469 void URLBlacklistManager::RegisterProfilePrefs( 470 user_prefs::PrefRegistrySyncable* registry) { 471 registry->RegisterListPref(policy_prefs::kUrlBlacklist, 472 user_prefs::PrefRegistrySyncable::UNSYNCABLE_PREF); 473 registry->RegisterListPref(policy_prefs::kUrlWhitelist, 474 user_prefs::PrefRegistrySyncable::UNSYNCABLE_PREF); 475 } 476 477 } // namespace policy 478