1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "chrome/browser/managed_mode/managed_mode_url_filter.h" 6 7 #include "base/containers/hash_tables.h" 8 #include "base/files/file_path.h" 9 #include "base/json/json_file_value_serializer.h" 10 #include "base/metrics/histogram.h" 11 #include "base/sha1.h" 12 #include "base/strings/string_number_conversions.h" 13 #include "base/strings/string_util.h" 14 #include "base/task_runner_util.h" 15 #include "base/threading/sequenced_worker_pool.h" 16 #include "content/public/browser/browser_thread.h" 17 #include "extensions/common/matcher/url_matcher.h" 18 #include "url/gurl.h" 19 20 using content::BrowserThread; 21 using extensions::URLMatcher; 22 using extensions::URLMatcherConditionSet; 23 24 struct ManagedModeURLFilter::Contents { 25 URLMatcher url_matcher; 26 std::map<URLMatcherConditionSet::ID, int> matcher_site_map; 27 base::hash_multimap<std::string, int> hash_site_map; 28 std::vector<ManagedModeSiteList::Site> sites; 29 }; 30 31 namespace { 32 33 const char* kStandardSchemes[] = { 34 "http", 35 "https", 36 "file", 37 "ftp", 38 "gopher", 39 "ws", 40 "wss" 41 }; 42 43 44 // This class encapsulates all the state that is required during construction of 45 // a new ManagedModeURLFilter::Contents. 46 class FilterBuilder { 47 public: 48 FilterBuilder(); 49 ~FilterBuilder(); 50 51 // Adds a single URL pattern for the site identified by |site_id|. 52 bool AddPattern(const std::string& pattern, int site_id); 53 54 // Adds a single hostname SHA1 hash for the site identified by |site_id|. 55 void AddHostnameHash(const std::string& hash, int site_id); 56 57 // Adds all the sites in |site_list|, with URL patterns and hostname hashes. 58 void AddSiteList(ManagedModeSiteList* site_list); 59 60 // Finalizes construction of the ManagedModeURLFilter::Contents and returns 61 // them. This method should be called before this object is destroyed. 62 scoped_ptr<ManagedModeURLFilter::Contents> Build(); 63 64 private: 65 scoped_ptr<ManagedModeURLFilter::Contents> contents_; 66 URLMatcherConditionSet::Vector all_conditions_; 67 URLMatcherConditionSet::ID matcher_id_; 68 }; 69 70 FilterBuilder::FilterBuilder() 71 : contents_(new ManagedModeURLFilter::Contents()), 72 matcher_id_(0) {} 73 74 FilterBuilder::~FilterBuilder() { 75 DCHECK(!contents_.get()); 76 } 77 78 bool FilterBuilder::AddPattern(const std::string& pattern, int site_id) { 79 DCHECK(BrowserThread::GetBlockingPool()->RunsTasksOnCurrentThread()); 80 #if defined(ENABLE_CONFIGURATION_POLICY) 81 std::string scheme; 82 std::string host; 83 uint16 port; 84 std::string path; 85 bool match_subdomains = true; 86 if (!policy::URLBlacklist::FilterToComponents( 87 pattern, &scheme, &host, &match_subdomains, &port, &path)) { 88 LOG(ERROR) << "Invalid pattern " << pattern; 89 return false; 90 } 91 92 scoped_refptr<extensions::URLMatcherConditionSet> condition_set = 93 policy::URLBlacklist::CreateConditionSet( 94 &contents_->url_matcher, ++matcher_id_, 95 scheme, host, match_subdomains, port, path); 96 all_conditions_.push_back(condition_set); 97 contents_->matcher_site_map[matcher_id_] = site_id; 98 return true; 99 #else 100 NOTREACHED(); 101 return false; 102 #endif 103 } 104 105 void FilterBuilder::AddHostnameHash(const std::string& hash, int site_id) { 106 contents_->hash_site_map.insert(std::make_pair(StringToUpperASCII(hash), 107 site_id)); 108 } 109 110 void FilterBuilder::AddSiteList(ManagedModeSiteList* site_list) { 111 std::vector<ManagedModeSiteList::Site> sites; 112 site_list->GetSites(&sites); 113 int site_id = contents_->sites.size(); 114 for (std::vector<ManagedModeSiteList::Site>::const_iterator it = 115 sites.begin(); it != sites.end(); ++it) { 116 const ManagedModeSiteList::Site& site = *it; 117 contents_->sites.push_back(site); 118 119 for (std::vector<std::string>::const_iterator pattern_it = 120 site.patterns.begin(); 121 pattern_it != site.patterns.end(); ++pattern_it) { 122 AddPattern(*pattern_it, site_id); 123 } 124 125 for (std::vector<std::string>::const_iterator hash_it = 126 site.hostname_hashes.begin(); 127 hash_it != site.hostname_hashes.end(); ++hash_it) { 128 AddHostnameHash(*hash_it, site_id); 129 } 130 131 site_id++; 132 } 133 } 134 135 scoped_ptr<ManagedModeURLFilter::Contents> FilterBuilder::Build() { 136 DCHECK(BrowserThread::GetBlockingPool()->RunsTasksOnCurrentThread()); 137 contents_->url_matcher.AddConditionSets(all_conditions_); 138 return contents_.Pass(); 139 } 140 141 scoped_ptr<ManagedModeURLFilter::Contents> CreateWhitelistFromPatterns( 142 const std::vector<std::string>& patterns) { 143 DCHECK(BrowserThread::GetBlockingPool()->RunsTasksOnCurrentThread()); 144 145 FilterBuilder builder; 146 for (std::vector<std::string>::const_iterator it = patterns.begin(); 147 it != patterns.end(); ++it) { 148 // TODO(bauerb): We should create a fake site for the whitelist. 149 builder.AddPattern(*it, -1); 150 } 151 152 return builder.Build(); 153 } 154 155 scoped_ptr<ManagedModeURLFilter::Contents> LoadWhitelistsOnBlockingPoolThread( 156 ScopedVector<ManagedModeSiteList> site_lists) { 157 DCHECK(BrowserThread::GetBlockingPool()->RunsTasksOnCurrentThread()); 158 159 FilterBuilder builder; 160 for (ScopedVector<ManagedModeSiteList>::iterator it = site_lists.begin(); 161 it != site_lists.end(); ++it) { 162 builder.AddSiteList(*it); 163 } 164 165 return builder.Build(); 166 } 167 168 } // namespace 169 170 ManagedModeURLFilter::ManagedModeURLFilter() 171 : default_behavior_(ALLOW), 172 contents_(new Contents()) { 173 // Detach from the current thread so we can be constructed on a different 174 // thread than the one where we're used. 175 DetachFromThread(); 176 } 177 178 ManagedModeURLFilter::~ManagedModeURLFilter() { 179 DCHECK(CalledOnValidThread()); 180 } 181 182 // static 183 ManagedModeURLFilter::FilteringBehavior 184 ManagedModeURLFilter::BehaviorFromInt(int behavior_value) { 185 DCHECK_GE(behavior_value, ALLOW); 186 DCHECK_LE(behavior_value, BLOCK); 187 return static_cast<FilteringBehavior>(behavior_value); 188 } 189 190 // static 191 GURL ManagedModeURLFilter::Normalize(const GURL& url) { 192 GURL normalized_url = url; 193 GURL::Replacements replacements; 194 // Strip username, password, query, and ref. 195 replacements.ClearUsername(); 196 replacements.ClearPassword(); 197 replacements.ClearQuery(); 198 replacements.ClearRef(); 199 return url.ReplaceComponents(replacements); 200 } 201 202 // static 203 bool ManagedModeURLFilter::HasStandardScheme(const GURL& url) { 204 for (size_t i = 0; i < arraysize(kStandardSchemes); ++i) { 205 if (url.scheme() == kStandardSchemes[i]) 206 return true; 207 } 208 return false; 209 } 210 211 ManagedModeURLFilter::FilteringBehavior 212 ManagedModeURLFilter::GetFilteringBehaviorForURL(const GURL& url) const { 213 DCHECK(CalledOnValidThread()); 214 215 // URLs with a non-standard scheme (e.g. chrome://) are always allowed. 216 if (!HasStandardScheme(url)) 217 return ALLOW; 218 219 // Check manual overrides for the exact URL. 220 std::map<GURL, bool>::const_iterator url_it = url_map_.find(Normalize(url)); 221 if (url_it != url_map_.end()) 222 return url_it->second ? ALLOW : BLOCK; 223 224 // Check manual overrides for the hostname. 225 std::map<std::string, bool>::const_iterator host_it = 226 host_map_.find(url.host()); 227 if (host_it != host_map_.end()) 228 return host_it->second ? ALLOW : BLOCK; 229 230 // If the default behavior is to allow, we don't need to check anything else. 231 if (default_behavior_ == ALLOW) 232 return ALLOW; 233 234 // Check the list of URL patterns. 235 std::set<URLMatcherConditionSet::ID> matching_ids = 236 contents_->url_matcher.MatchURL(url); 237 if (!matching_ids.empty()) 238 return ALLOW; 239 240 // Check the list of hostname hashes. 241 std::string hash = base::SHA1HashString(url.host()); 242 std::string hash_hex = base::HexEncode(hash.data(), hash.length()); 243 if (contents_->hash_site_map.count(hash_hex)) 244 return ALLOW; 245 246 // Fall back to the default behavior. 247 return default_behavior_; 248 } 249 250 void ManagedModeURLFilter::GetSites( 251 const GURL& url, 252 std::vector<ManagedModeSiteList::Site*>* sites) const { 253 std::set<URLMatcherConditionSet::ID> matching_ids = 254 contents_->url_matcher.MatchURL(url); 255 for (std::set<URLMatcherConditionSet::ID>::const_iterator it = 256 matching_ids.begin(); it != matching_ids.end(); ++it) { 257 std::map<URLMatcherConditionSet::ID, int>::const_iterator entry = 258 contents_->matcher_site_map.find(*it); 259 if (entry == contents_->matcher_site_map.end()) { 260 NOTREACHED(); 261 continue; 262 } 263 sites->push_back(&contents_->sites[entry->second]); 264 } 265 266 typedef base::hash_map<std::string, int>::const_iterator 267 hash_site_map_iterator; 268 std::pair<hash_site_map_iterator, hash_site_map_iterator> bounds = 269 contents_->hash_site_map.equal_range(url.host()); 270 for (hash_site_map_iterator hash_it = bounds.first; 271 hash_it != bounds.second; hash_it++) { 272 sites->push_back(&contents_->sites[hash_it->second]); 273 } 274 } 275 276 void ManagedModeURLFilter::SetDefaultFilteringBehavior( 277 FilteringBehavior behavior) { 278 DCHECK(CalledOnValidThread()); 279 default_behavior_ = behavior; 280 } 281 282 void ManagedModeURLFilter::LoadWhitelists( 283 ScopedVector<ManagedModeSiteList> site_lists) { 284 DCHECK(CalledOnValidThread()); 285 286 base::PostTaskAndReplyWithResult( 287 BrowserThread::GetBlockingPool(), 288 FROM_HERE, 289 base::Bind(&LoadWhitelistsOnBlockingPoolThread, 290 base::Passed(&site_lists)), 291 base::Bind(&ManagedModeURLFilter::SetContents, this)); 292 } 293 294 void ManagedModeURLFilter::SetFromPatterns( 295 const std::vector<std::string>& patterns) { 296 DCHECK(CalledOnValidThread()); 297 298 base::PostTaskAndReplyWithResult( 299 BrowserThread::GetBlockingPool(), 300 FROM_HERE, 301 base::Bind(&CreateWhitelistFromPatterns, patterns), 302 base::Bind(&ManagedModeURLFilter::SetContents, this)); 303 } 304 305 void ManagedModeURLFilter::SetManualHosts( 306 const std::map<std::string, bool>* host_map) { 307 DCHECK(CalledOnValidThread()); 308 host_map_ = *host_map; 309 UMA_HISTOGRAM_CUSTOM_COUNTS("ManagedMode.ManualHostsEntries", 310 host_map->size(), 1, 1000, 50); 311 } 312 313 void ManagedModeURLFilter::SetManualURLs( 314 const std::map<GURL, bool>* url_map) { 315 DCHECK(CalledOnValidThread()); 316 url_map_ = *url_map; 317 UMA_HISTOGRAM_CUSTOM_COUNTS("ManagedMode.ManualURLsEntries", 318 url_map->size(), 1, 1000, 50); 319 } 320 321 void ManagedModeURLFilter::AddObserver(Observer* observer) { 322 observers_.AddObserver(observer); 323 } 324 325 void ManagedModeURLFilter::RemoveObserver(Observer* observer) { 326 observers_.RemoveObserver(observer); 327 } 328 329 void ManagedModeURLFilter::SetContents(scoped_ptr<Contents> contents) { 330 DCHECK(CalledOnValidThread()); 331 contents_ = contents.Pass(); 332 FOR_EACH_OBSERVER(Observer, observers_, OnSiteListUpdated()); 333 } 334