1 // Copyright 2014 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "chrome/browser/supervised_user/supervised_user_url_filter.h" 6 7 #include "base/containers/hash_tables.h" 8 #include "base/files/file_path.h" 9 #include "base/json/json_file_value_serializer.h" 10 #include "base/metrics/histogram.h" 11 #include "base/sha1.h" 12 #include "base/strings/string_number_conversions.h" 13 #include "base/strings/string_util.h" 14 #include "base/task_runner_util.h" 15 #include "base/threading/sequenced_worker_pool.h" 16 #include "chrome/browser/supervised_user/experimental/supervised_user_blacklist.h" 17 #include "components/policy/core/browser/url_blacklist_manager.h" 18 #include "components/url_fixer/url_fixer.h" 19 #include "components/url_matcher/url_matcher.h" 20 #include "content/public/browser/browser_thread.h" 21 #include "net/base/registry_controlled_domains/registry_controlled_domain.h" 22 #include "url/gurl.h" 23 24 using content::BrowserThread; 25 using net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES; 26 using net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES; 27 using net::registry_controlled_domains::GetRegistryLength; 28 using policy::URLBlacklist; 29 using url_matcher::URLMatcher; 30 using url_matcher::URLMatcherConditionSet; 31 32 struct SupervisedUserURLFilter::Contents { 33 URLMatcher url_matcher; 34 std::map<URLMatcherConditionSet::ID, int> matcher_site_map; 35 base::hash_multimap<std::string, int> hash_site_map; 36 std::vector<SupervisedUserSiteList::Site> sites; 37 }; 38 39 namespace { 40 41 // URL schemes not in this list (e.g., file:// and chrome://) will always be 42 // allowed. 43 const char* kFilteredSchemes[] = { 44 "http", 45 "https", 46 "ftp", 47 "gopher", 48 "ws", 49 "wss" 50 }; 51 52 53 // This class encapsulates all the state that is required during construction of 54 // a new SupervisedUserURLFilter::Contents. 55 class FilterBuilder { 56 public: 57 FilterBuilder(); 58 ~FilterBuilder(); 59 60 // Adds a single URL pattern for the site identified by |site_id|. 61 bool AddPattern(const std::string& pattern, int site_id); 62 63 // Adds a single hostname SHA1 hash for the site identified by |site_id|. 64 void AddHostnameHash(const std::string& hash, int site_id); 65 66 // Adds all the sites in |site_list|, with URL patterns and hostname hashes. 67 void AddSiteList(SupervisedUserSiteList* site_list); 68 69 // Finalizes construction of the SupervisedUserURLFilter::Contents and returns 70 // them. This method should be called before this object is destroyed. 71 scoped_ptr<SupervisedUserURLFilter::Contents> Build(); 72 73 private: 74 scoped_ptr<SupervisedUserURLFilter::Contents> contents_; 75 URLMatcherConditionSet::Vector all_conditions_; 76 URLMatcherConditionSet::ID matcher_id_; 77 }; 78 79 FilterBuilder::FilterBuilder() 80 : contents_(new SupervisedUserURLFilter::Contents()), 81 matcher_id_(0) {} 82 83 FilterBuilder::~FilterBuilder() { 84 DCHECK(!contents_.get()); 85 } 86 87 bool FilterBuilder::AddPattern(const std::string& pattern, int site_id) { 88 DCHECK(BrowserThread::GetBlockingPool()->RunsTasksOnCurrentThread()); 89 std::string scheme; 90 std::string host; 91 uint16 port; 92 std::string path; 93 std::string query; 94 bool match_subdomains = true; 95 URLBlacklist::SegmentURLCallback callback = 96 static_cast<URLBlacklist::SegmentURLCallback>(url_fixer::SegmentURL); 97 if (!URLBlacklist::FilterToComponents( 98 callback, pattern, 99 &scheme, &host, &match_subdomains, &port, &path, &query)) { 100 LOG(ERROR) << "Invalid pattern " << pattern; 101 return false; 102 } 103 104 scoped_refptr<URLMatcherConditionSet> condition_set = 105 URLBlacklist::CreateConditionSet( 106 &contents_->url_matcher, ++matcher_id_, 107 scheme, host, match_subdomains, port, path, query, true); 108 all_conditions_.push_back(condition_set); 109 contents_->matcher_site_map[matcher_id_] = site_id; 110 return true; 111 } 112 113 void FilterBuilder::AddHostnameHash(const std::string& hash, int site_id) { 114 contents_->hash_site_map.insert(std::make_pair(StringToUpperASCII(hash), 115 site_id)); 116 } 117 118 void FilterBuilder::AddSiteList(SupervisedUserSiteList* site_list) { 119 std::vector<SupervisedUserSiteList::Site> sites; 120 site_list->GetSites(&sites); 121 int site_id = contents_->sites.size(); 122 for (std::vector<SupervisedUserSiteList::Site>::const_iterator it = 123 sites.begin(); it != sites.end(); ++it) { 124 const SupervisedUserSiteList::Site& site = *it; 125 contents_->sites.push_back(site); 126 127 for (std::vector<std::string>::const_iterator pattern_it = 128 site.patterns.begin(); 129 pattern_it != site.patterns.end(); ++pattern_it) { 130 AddPattern(*pattern_it, site_id); 131 } 132 133 for (std::vector<std::string>::const_iterator hash_it = 134 site.hostname_hashes.begin(); 135 hash_it != site.hostname_hashes.end(); ++hash_it) { 136 AddHostnameHash(*hash_it, site_id); 137 } 138 139 site_id++; 140 } 141 } 142 143 scoped_ptr<SupervisedUserURLFilter::Contents> FilterBuilder::Build() { 144 DCHECK(BrowserThread::GetBlockingPool()->RunsTasksOnCurrentThread()); 145 contents_->url_matcher.AddConditionSets(all_conditions_); 146 return contents_.Pass(); 147 } 148 149 scoped_ptr<SupervisedUserURLFilter::Contents> CreateWhitelistFromPatterns( 150 const std::vector<std::string>& patterns) { 151 DCHECK(BrowserThread::GetBlockingPool()->RunsTasksOnCurrentThread()); 152 153 FilterBuilder builder; 154 for (std::vector<std::string>::const_iterator it = patterns.begin(); 155 it != patterns.end(); ++it) { 156 // TODO(bauerb): We should create a fake site for the whitelist. 157 builder.AddPattern(*it, -1); 158 } 159 160 return builder.Build(); 161 } 162 163 scoped_ptr<SupervisedUserURLFilter::Contents> 164 LoadWhitelistsOnBlockingPoolThread( 165 ScopedVector<SupervisedUserSiteList> site_lists) { 166 DCHECK(BrowserThread::GetBlockingPool()->RunsTasksOnCurrentThread()); 167 168 FilterBuilder builder; 169 for (ScopedVector<SupervisedUserSiteList>::iterator it = site_lists.begin(); 170 it != site_lists.end(); ++it) { 171 builder.AddSiteList(*it); 172 } 173 174 return builder.Build(); 175 } 176 177 } // namespace 178 179 SupervisedUserURLFilter::SupervisedUserURLFilter() 180 : default_behavior_(ALLOW), 181 contents_(new Contents()), 182 blacklist_(NULL) { 183 // Detach from the current thread so we can be constructed on a different 184 // thread than the one where we're used. 185 DetachFromThread(); 186 } 187 188 SupervisedUserURLFilter::~SupervisedUserURLFilter() { 189 DCHECK(CalledOnValidThread()); 190 } 191 192 // static 193 SupervisedUserURLFilter::FilteringBehavior 194 SupervisedUserURLFilter::BehaviorFromInt(int behavior_value) { 195 DCHECK_GE(behavior_value, ALLOW); 196 DCHECK_LE(behavior_value, BLOCK); 197 return static_cast<FilteringBehavior>(behavior_value); 198 } 199 200 // static 201 GURL SupervisedUserURLFilter::Normalize(const GURL& url) { 202 GURL normalized_url = url; 203 GURL::Replacements replacements; 204 // Strip username, password, query, and ref. 205 replacements.ClearUsername(); 206 replacements.ClearPassword(); 207 replacements.ClearQuery(); 208 replacements.ClearRef(); 209 return url.ReplaceComponents(replacements); 210 } 211 212 // static 213 bool SupervisedUserURLFilter::HasFilteredScheme(const GURL& url) { 214 for (size_t i = 0; i < arraysize(kFilteredSchemes); ++i) { 215 if (url.scheme() == kFilteredSchemes[i]) 216 return true; 217 } 218 return false; 219 } 220 221 std::string GetHostnameHash(const GURL& url) { 222 std::string hash = base::SHA1HashString(url.host()); 223 return base::HexEncode(hash.data(), hash.length()); 224 } 225 226 // static 227 bool SupervisedUserURLFilter::HostMatchesPattern(const std::string& host, 228 const std::string& pattern) { 229 std::string trimmed_pattern = pattern; 230 std::string trimmed_host = host; 231 if (EndsWith(pattern, ".*", true)) { 232 size_t registry_length = GetRegistryLength( 233 trimmed_host, EXCLUDE_UNKNOWN_REGISTRIES, EXCLUDE_PRIVATE_REGISTRIES); 234 // A host without a known registry part does not match. 235 if (registry_length == 0) 236 return false; 237 238 trimmed_pattern.erase(trimmed_pattern.length() - 2); 239 trimmed_host.erase(trimmed_host.length() - (registry_length + 1)); 240 } 241 242 if (StartsWithASCII(trimmed_pattern, "*.", true)) { 243 trimmed_pattern.erase(0, 2); 244 245 // The remaining pattern should be non-empty, and it should not contain 246 // further stars. Also the trimmed host needs to end with the trimmed 247 // pattern. 248 if (trimmed_pattern.empty() || 249 trimmed_pattern.find('*') != std::string::npos || 250 !EndsWith(trimmed_host, trimmed_pattern, true)) { 251 return false; 252 } 253 254 // The trimmed host needs to have a dot separating the subdomain from the 255 // matched pattern piece, unless there is no subdomain. 256 int pos = trimmed_host.length() - trimmed_pattern.length(); 257 DCHECK_GE(pos, 0); 258 return (pos == 0) || (trimmed_host[pos - 1] == '.'); 259 } 260 261 return trimmed_host == trimmed_pattern; 262 } 263 264 SupervisedUserURLFilter::FilteringBehavior 265 SupervisedUserURLFilter::GetFilteringBehaviorForURL(const GURL& url) const { 266 DCHECK(CalledOnValidThread()); 267 268 // URLs with a non-standard scheme (e.g. chrome://) are always allowed. 269 if (!HasFilteredScheme(url)) 270 return ALLOW; 271 272 // Check manual overrides for the exact URL. 273 std::map<GURL, bool>::const_iterator url_it = url_map_.find(Normalize(url)); 274 if (url_it != url_map_.end()) 275 return url_it->second ? ALLOW : BLOCK; 276 277 // Check manual overrides for the hostname. 278 std::string host = url.host(); 279 std::map<std::string, bool>::const_iterator host_it = host_map_.find(host); 280 if (host_it != host_map_.end()) 281 return host_it->second ? ALLOW : BLOCK; 282 283 // Look for patterns matching the hostname, with a value that is different 284 // from the default (a value of true in the map meaning allowed). 285 for (std::map<std::string, bool>::const_iterator host_it = 286 host_map_.begin(); host_it != host_map_.end(); ++host_it) { 287 if ((host_it->second == (default_behavior_ == BLOCK)) && 288 HostMatchesPattern(host, host_it->first)) { 289 return host_it->second ? ALLOW : BLOCK; 290 } 291 } 292 293 // If there's no blacklist and the default behavior is to allow, we don't need 294 // to check anything else. 295 if (!blacklist_ && default_behavior_ == ALLOW) 296 return ALLOW; 297 298 // Check the list of URL patterns. 299 std::set<URLMatcherConditionSet::ID> matching_ids = 300 contents_->url_matcher.MatchURL(url); 301 if (!matching_ids.empty()) 302 return ALLOW; 303 304 // Check the list of hostname hashes. 305 if (contents_->hash_site_map.count(GetHostnameHash(url))) 306 return ALLOW; 307 308 // Check the static blacklist. 309 if (blacklist_ && blacklist_->HasURL(url)) 310 return BLOCK; 311 312 // Fall back to the default behavior. 313 return default_behavior_; 314 } 315 316 void SupervisedUserURLFilter::GetSites( 317 const GURL& url, 318 std::vector<SupervisedUserSiteList::Site*>* sites) const { 319 std::set<URLMatcherConditionSet::ID> matching_ids = 320 contents_->url_matcher.MatchURL(url); 321 for (std::set<URLMatcherConditionSet::ID>::const_iterator it = 322 matching_ids.begin(); it != matching_ids.end(); ++it) { 323 std::map<URLMatcherConditionSet::ID, int>::const_iterator entry = 324 contents_->matcher_site_map.find(*it); 325 if (entry == contents_->matcher_site_map.end()) { 326 NOTREACHED(); 327 continue; 328 } 329 sites->push_back(&contents_->sites[entry->second]); 330 } 331 332 typedef base::hash_multimap<std::string, int>::const_iterator 333 hash_site_map_iterator; 334 std::pair<hash_site_map_iterator, hash_site_map_iterator> bounds = 335 contents_->hash_site_map.equal_range(GetHostnameHash(url)); 336 for (hash_site_map_iterator hash_it = bounds.first; 337 hash_it != bounds.second; hash_it++) { 338 sites->push_back(&contents_->sites[hash_it->second]); 339 } 340 } 341 342 void SupervisedUserURLFilter::SetDefaultFilteringBehavior( 343 FilteringBehavior behavior) { 344 DCHECK(CalledOnValidThread()); 345 default_behavior_ = behavior; 346 } 347 348 void SupervisedUserURLFilter::LoadWhitelists( 349 ScopedVector<SupervisedUserSiteList> site_lists) { 350 DCHECK(CalledOnValidThread()); 351 352 base::PostTaskAndReplyWithResult( 353 BrowserThread::GetBlockingPool(), 354 FROM_HERE, 355 base::Bind(&LoadWhitelistsOnBlockingPoolThread, 356 base::Passed(&site_lists)), 357 base::Bind(&SupervisedUserURLFilter::SetContents, this)); 358 } 359 360 void SupervisedUserURLFilter::SetBlacklist(SupervisedUserBlacklist* blacklist) { 361 blacklist_ = blacklist; 362 } 363 364 void SupervisedUserURLFilter::SetFromPatterns( 365 const std::vector<std::string>& patterns) { 366 DCHECK(CalledOnValidThread()); 367 368 base::PostTaskAndReplyWithResult( 369 BrowserThread::GetBlockingPool(), 370 FROM_HERE, 371 base::Bind(&CreateWhitelistFromPatterns, patterns), 372 base::Bind(&SupervisedUserURLFilter::SetContents, this)); 373 } 374 375 void SupervisedUserURLFilter::SetManualHosts( 376 const std::map<std::string, bool>* host_map) { 377 DCHECK(CalledOnValidThread()); 378 host_map_ = *host_map; 379 UMA_HISTOGRAM_CUSTOM_COUNTS("ManagedMode.ManualHostsEntries", 380 host_map->size(), 1, 1000, 50); 381 } 382 383 void SupervisedUserURLFilter::SetManualURLs( 384 const std::map<GURL, bool>* url_map) { 385 DCHECK(CalledOnValidThread()); 386 url_map_ = *url_map; 387 UMA_HISTOGRAM_CUSTOM_COUNTS("ManagedMode.ManualURLsEntries", 388 url_map->size(), 1, 1000, 50); 389 } 390 391 void SupervisedUserURLFilter::AddObserver(Observer* observer) { 392 observers_.AddObserver(observer); 393 } 394 395 void SupervisedUserURLFilter::RemoveObserver(Observer* observer) { 396 observers_.RemoveObserver(observer); 397 } 398 399 void SupervisedUserURLFilter::SetContents(scoped_ptr<Contents> contents) { 400 DCHECK(CalledOnValidThread()); 401 contents_ = contents.Pass(); 402 FOR_EACH_OBSERVER(Observer, observers_, OnSiteListUpdated()); 403 } 404