1 // Copyright 2014 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "chrome/browser/supervised_user/supervised_user_url_filter.h" 6 7 #include "base/containers/hash_tables.h" 8 #include "base/files/file_path.h" 9 #include "base/json/json_file_value_serializer.h" 10 #include "base/metrics/histogram.h" 11 #include "base/sha1.h" 12 #include "base/strings/string_number_conversions.h" 13 #include "base/strings/string_util.h" 14 #include "base/task_runner_util.h" 15 #include "base/threading/sequenced_worker_pool.h" 16 #include "components/policy/core/browser/url_blacklist_manager.h" 17 #include "components/url_fixer/url_fixer.h" 18 #include "components/url_matcher/url_matcher.h" 19 #include "content/public/browser/browser_thread.h" 20 #include "net/base/registry_controlled_domains/registry_controlled_domain.h" 21 #include "url/gurl.h" 22 23 using content::BrowserThread; 24 using net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES; 25 using net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES; 26 using net::registry_controlled_domains::GetRegistryLength; 27 using policy::URLBlacklist; 28 using url_matcher::URLMatcher; 29 using url_matcher::URLMatcherConditionSet; 30 31 struct SupervisedUserURLFilter::Contents { 32 URLMatcher url_matcher; 33 std::map<URLMatcherConditionSet::ID, int> matcher_site_map; 34 base::hash_multimap<std::string, int> hash_site_map; 35 std::vector<SupervisedUserSiteList::Site> sites; 36 }; 37 38 namespace { 39 40 // URL schemes not in this list (e.g., file:// and chrome://) will always be 41 // allowed. 42 const char* kFilteredSchemes[] = { 43 "http", 44 "https", 45 "ftp", 46 "gopher", 47 "ws", 48 "wss" 49 }; 50 51 52 // This class encapsulates all the state that is required during construction of 53 // a new SupervisedUserURLFilter::Contents. 54 class FilterBuilder { 55 public: 56 FilterBuilder(); 57 ~FilterBuilder(); 58 59 // Adds a single URL pattern for the site identified by |site_id|. 60 bool AddPattern(const std::string& pattern, int site_id); 61 62 // Adds a single hostname SHA1 hash for the site identified by |site_id|. 63 void AddHostnameHash(const std::string& hash, int site_id); 64 65 // Adds all the sites in |site_list|, with URL patterns and hostname hashes. 66 void AddSiteList(SupervisedUserSiteList* site_list); 67 68 // Finalizes construction of the SupervisedUserURLFilter::Contents and returns 69 // them. This method should be called before this object is destroyed. 70 scoped_ptr<SupervisedUserURLFilter::Contents> Build(); 71 72 private: 73 scoped_ptr<SupervisedUserURLFilter::Contents> contents_; 74 URLMatcherConditionSet::Vector all_conditions_; 75 URLMatcherConditionSet::ID matcher_id_; 76 }; 77 78 FilterBuilder::FilterBuilder() 79 : contents_(new SupervisedUserURLFilter::Contents()), 80 matcher_id_(0) {} 81 82 FilterBuilder::~FilterBuilder() { 83 DCHECK(!contents_.get()); 84 } 85 86 bool FilterBuilder::AddPattern(const std::string& pattern, int site_id) { 87 DCHECK(BrowserThread::GetBlockingPool()->RunsTasksOnCurrentThread()); 88 std::string scheme; 89 std::string host; 90 uint16 port; 91 std::string path; 92 std::string query; 93 bool match_subdomains = true; 94 URLBlacklist::SegmentURLCallback callback = 95 static_cast<URLBlacklist::SegmentURLCallback>(url_fixer::SegmentURL); 96 if (!URLBlacklist::FilterToComponents( 97 callback, pattern, 98 &scheme, &host, &match_subdomains, &port, &path, &query)) { 99 LOG(ERROR) << "Invalid pattern " << pattern; 100 return false; 101 } 102 103 scoped_refptr<URLMatcherConditionSet> condition_set = 104 URLBlacklist::CreateConditionSet( 105 &contents_->url_matcher, ++matcher_id_, 106 scheme, host, match_subdomains, port, path, query, true); 107 all_conditions_.push_back(condition_set); 108 contents_->matcher_site_map[matcher_id_] = site_id; 109 return true; 110 } 111 112 void FilterBuilder::AddHostnameHash(const std::string& hash, int site_id) { 113 contents_->hash_site_map.insert(std::make_pair(StringToUpperASCII(hash), 114 site_id)); 115 } 116 117 void FilterBuilder::AddSiteList(SupervisedUserSiteList* site_list) { 118 std::vector<SupervisedUserSiteList::Site> sites; 119 site_list->GetSites(&sites); 120 int site_id = contents_->sites.size(); 121 for (std::vector<SupervisedUserSiteList::Site>::const_iterator it = 122 sites.begin(); it != sites.end(); ++it) { 123 const SupervisedUserSiteList::Site& site = *it; 124 contents_->sites.push_back(site); 125 126 for (std::vector<std::string>::const_iterator pattern_it = 127 site.patterns.begin(); 128 pattern_it != site.patterns.end(); ++pattern_it) { 129 AddPattern(*pattern_it, site_id); 130 } 131 132 for (std::vector<std::string>::const_iterator hash_it = 133 site.hostname_hashes.begin(); 134 hash_it != site.hostname_hashes.end(); ++hash_it) { 135 AddHostnameHash(*hash_it, site_id); 136 } 137 138 site_id++; 139 } 140 } 141 142 scoped_ptr<SupervisedUserURLFilter::Contents> FilterBuilder::Build() { 143 DCHECK(BrowserThread::GetBlockingPool()->RunsTasksOnCurrentThread()); 144 contents_->url_matcher.AddConditionSets(all_conditions_); 145 return contents_.Pass(); 146 } 147 148 scoped_ptr<SupervisedUserURLFilter::Contents> CreateWhitelistFromPatterns( 149 const std::vector<std::string>& patterns) { 150 DCHECK(BrowserThread::GetBlockingPool()->RunsTasksOnCurrentThread()); 151 152 FilterBuilder builder; 153 for (std::vector<std::string>::const_iterator it = patterns.begin(); 154 it != patterns.end(); ++it) { 155 // TODO(bauerb): We should create a fake site for the whitelist. 156 builder.AddPattern(*it, -1); 157 } 158 159 return builder.Build(); 160 } 161 162 scoped_ptr<SupervisedUserURLFilter::Contents> 163 LoadWhitelistsOnBlockingPoolThread( 164 ScopedVector<SupervisedUserSiteList> site_lists) { 165 DCHECK(BrowserThread::GetBlockingPool()->RunsTasksOnCurrentThread()); 166 167 FilterBuilder builder; 168 for (ScopedVector<SupervisedUserSiteList>::iterator it = site_lists.begin(); 169 it != site_lists.end(); ++it) { 170 builder.AddSiteList(*it); 171 } 172 173 return builder.Build(); 174 } 175 176 } // namespace 177 178 SupervisedUserURLFilter::SupervisedUserURLFilter() 179 : default_behavior_(ALLOW), 180 contents_(new Contents()) { 181 // Detach from the current thread so we can be constructed on a different 182 // thread than the one where we're used. 183 DetachFromThread(); 184 } 185 186 SupervisedUserURLFilter::~SupervisedUserURLFilter() { 187 DCHECK(CalledOnValidThread()); 188 } 189 190 // static 191 SupervisedUserURLFilter::FilteringBehavior 192 SupervisedUserURLFilter::BehaviorFromInt(int behavior_value) { 193 DCHECK_GE(behavior_value, ALLOW); 194 DCHECK_LE(behavior_value, BLOCK); 195 return static_cast<FilteringBehavior>(behavior_value); 196 } 197 198 // static 199 GURL SupervisedUserURLFilter::Normalize(const GURL& url) { 200 GURL normalized_url = url; 201 GURL::Replacements replacements; 202 // Strip username, password, query, and ref. 203 replacements.ClearUsername(); 204 replacements.ClearPassword(); 205 replacements.ClearQuery(); 206 replacements.ClearRef(); 207 return url.ReplaceComponents(replacements); 208 } 209 210 // static 211 bool SupervisedUserURLFilter::HasFilteredScheme(const GURL& url) { 212 for (size_t i = 0; i < arraysize(kFilteredSchemes); ++i) { 213 if (url.scheme() == kFilteredSchemes[i]) 214 return true; 215 } 216 return false; 217 } 218 219 std::string GetHostnameHash(const GURL& url) { 220 std::string hash = base::SHA1HashString(url.host()); 221 return base::HexEncode(hash.data(), hash.length()); 222 } 223 224 // static 225 bool SupervisedUserURLFilter::HostMatchesPattern(const std::string& host, 226 const std::string& pattern) { 227 std::string trimmed_pattern = pattern; 228 std::string trimmed_host = host; 229 if (EndsWith(pattern, ".*", true)) { 230 size_t registry_length = GetRegistryLength( 231 trimmed_host, EXCLUDE_UNKNOWN_REGISTRIES, EXCLUDE_PRIVATE_REGISTRIES); 232 // A host without a known registry part does not match. 233 if (registry_length == 0) 234 return false; 235 236 trimmed_pattern.erase(trimmed_pattern.length() - 2); 237 trimmed_host.erase(trimmed_host.length() - (registry_length + 1)); 238 } 239 240 if (StartsWithASCII(trimmed_pattern, "*.", true)) { 241 trimmed_pattern.erase(0, 2); 242 243 // The remaining pattern should be non-empty, and it should not contain 244 // further stars. Also the trimmed host needs to end with the trimmed 245 // pattern. 246 if (trimmed_pattern.empty() || 247 trimmed_pattern.find('*') != std::string::npos || 248 !EndsWith(trimmed_host, trimmed_pattern, true)) { 249 return false; 250 } 251 252 // The trimmed host needs to have a dot separating the subdomain from the 253 // matched pattern piece, unless there is no subdomain. 254 int pos = trimmed_host.length() - trimmed_pattern.length(); 255 DCHECK_GE(pos, 0); 256 return (pos == 0) || (trimmed_host[pos - 1] == '.'); 257 } 258 259 return trimmed_host == trimmed_pattern; 260 } 261 262 SupervisedUserURLFilter::FilteringBehavior 263 SupervisedUserURLFilter::GetFilteringBehaviorForURL(const GURL& url) const { 264 DCHECK(CalledOnValidThread()); 265 266 // URLs with a non-standard scheme (e.g. chrome://) are always allowed. 267 if (!HasFilteredScheme(url)) 268 return ALLOW; 269 270 // Check manual overrides for the exact URL. 271 std::map<GURL, bool>::const_iterator url_it = url_map_.find(Normalize(url)); 272 if (url_it != url_map_.end()) 273 return url_it->second ? ALLOW : BLOCK; 274 275 // Check manual overrides for the hostname. 276 std::string host = url.host(); 277 std::map<std::string, bool>::const_iterator host_it = host_map_.find(host); 278 if (host_it != host_map_.end()) 279 return host_it->second ? ALLOW : BLOCK; 280 281 // Look for patterns matching the hostname, with a value that is different 282 // from the default (a value of true in the map meaning allowed). 283 for (std::map<std::string, bool>::const_iterator host_it = 284 host_map_.begin(); host_it != host_map_.end(); ++host_it) { 285 if ((host_it->second == (default_behavior_ == BLOCK)) && 286 HostMatchesPattern(host, host_it->first)) { 287 return host_it->second ? ALLOW : BLOCK; 288 } 289 } 290 291 // If the default behavior is to allow, we don't need to check anything else. 292 if (default_behavior_ == ALLOW) 293 return ALLOW; 294 295 // Check the list of URL patterns. 296 std::set<URLMatcherConditionSet::ID> matching_ids = 297 contents_->url_matcher.MatchURL(url); 298 if (!matching_ids.empty()) 299 return ALLOW; 300 301 // Check the list of hostname hashes. 302 if (contents_->hash_site_map.count(GetHostnameHash(url))) 303 return ALLOW; 304 305 // Fall back to the default behavior. 306 return default_behavior_; 307 } 308 309 void SupervisedUserURLFilter::GetSites( 310 const GURL& url, 311 std::vector<SupervisedUserSiteList::Site*>* sites) const { 312 std::set<URLMatcherConditionSet::ID> matching_ids = 313 contents_->url_matcher.MatchURL(url); 314 for (std::set<URLMatcherConditionSet::ID>::const_iterator it = 315 matching_ids.begin(); it != matching_ids.end(); ++it) { 316 std::map<URLMatcherConditionSet::ID, int>::const_iterator entry = 317 contents_->matcher_site_map.find(*it); 318 if (entry == contents_->matcher_site_map.end()) { 319 NOTREACHED(); 320 continue; 321 } 322 sites->push_back(&contents_->sites[entry->second]); 323 } 324 325 typedef base::hash_multimap<std::string, int>::const_iterator 326 hash_site_map_iterator; 327 std::pair<hash_site_map_iterator, hash_site_map_iterator> bounds = 328 contents_->hash_site_map.equal_range(GetHostnameHash(url)); 329 for (hash_site_map_iterator hash_it = bounds.first; 330 hash_it != bounds.second; hash_it++) { 331 sites->push_back(&contents_->sites[hash_it->second]); 332 } 333 } 334 335 void SupervisedUserURLFilter::SetDefaultFilteringBehavior( 336 FilteringBehavior behavior) { 337 DCHECK(CalledOnValidThread()); 338 default_behavior_ = behavior; 339 } 340 341 void SupervisedUserURLFilter::LoadWhitelists( 342 ScopedVector<SupervisedUserSiteList> site_lists) { 343 DCHECK(CalledOnValidThread()); 344 345 base::PostTaskAndReplyWithResult( 346 BrowserThread::GetBlockingPool(), 347 FROM_HERE, 348 base::Bind(&LoadWhitelistsOnBlockingPoolThread, 349 base::Passed(&site_lists)), 350 base::Bind(&SupervisedUserURLFilter::SetContents, this)); 351 } 352 353 void SupervisedUserURLFilter::SetFromPatterns( 354 const std::vector<std::string>& patterns) { 355 DCHECK(CalledOnValidThread()); 356 357 base::PostTaskAndReplyWithResult( 358 BrowserThread::GetBlockingPool(), 359 FROM_HERE, 360 base::Bind(&CreateWhitelistFromPatterns, patterns), 361 base::Bind(&SupervisedUserURLFilter::SetContents, this)); 362 } 363 364 void SupervisedUserURLFilter::SetManualHosts( 365 const std::map<std::string, bool>* host_map) { 366 DCHECK(CalledOnValidThread()); 367 host_map_ = *host_map; 368 UMA_HISTOGRAM_CUSTOM_COUNTS("ManagedMode.ManualHostsEntries", 369 host_map->size(), 1, 1000, 50); 370 } 371 372 void SupervisedUserURLFilter::SetManualURLs( 373 const std::map<GURL, bool>* url_map) { 374 DCHECK(CalledOnValidThread()); 375 url_map_ = *url_map; 376 UMA_HISTOGRAM_CUSTOM_COUNTS("ManagedMode.ManualURLsEntries", 377 url_map->size(), 1, 1000, 50); 378 } 379 380 void SupervisedUserURLFilter::AddObserver(Observer* observer) { 381 observers_.AddObserver(observer); 382 } 383 384 void SupervisedUserURLFilter::RemoveObserver(Observer* observer) { 385 observers_.RemoveObserver(observer); 386 } 387 388 void SupervisedUserURLFilter::SetContents(scoped_ptr<Contents> contents) { 389 DCHECK(CalledOnValidThread()); 390 contents_ = contents.Pass(); 391 FOR_EACH_OBSERVER(Observer, observers_, OnSiteListUpdated()); 392 } 393