1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "chrome/browser/policy/url_blacklist_manager.h" 6 7 #include "base/bind.h" 8 #include "base/files/file_path.h" 9 #include "base/message_loop/message_loop.h" 10 #include "base/prefs/pref_service.h" 11 #include "base/stl_util.h" 12 #include "base/strings/string_number_conversions.h" 13 #include "base/values.h" 14 #include "chrome/browser/chrome_notification_types.h" 15 #include "chrome/common/net/url_fixer_upper.h" 16 #include "chrome/common/pref_names.h" 17 #include "components/user_prefs/pref_registry_syncable.h" 18 #include "content/public/browser/browser_thread.h" 19 #include "content/public/browser/notification_details.h" 20 #include "content/public/browser/notification_source.h" 21 #include "content/public/common/url_constants.h" 22 #include "google_apis/gaia/gaia_urls.h" 23 #include "net/base/load_flags.h" 24 #include "net/base/net_util.h" 25 #include "net/url_request/url_request.h" 26 #include "url/gurl.h" 27 28 #if !defined(OS_CHROMEOS) 29 #include "chrome/browser/signin/signin_manager.h" 30 #endif 31 32 using content::BrowserThread; 33 using extensions::URLMatcher; 34 using extensions::URLMatcherCondition; 35 using extensions::URLMatcherConditionFactory; 36 using extensions::URLMatcherConditionSet; 37 using extensions::URLMatcherPortFilter; 38 using extensions::URLMatcherSchemeFilter; 39 40 namespace policy { 41 42 namespace { 43 44 // Maximum filters per policy. Filters over this index are ignored. 45 const size_t kMaxFiltersPerPolicy = 1000; 46 47 const char kServiceLoginAuth[] = "/ServiceLoginAuth"; 48 49 #if !defined(OS_CHROMEOS) 50 51 bool IsSigninFlowURL(const GURL& url) { 52 // Whitelist all the signin flow URLs flagged by the SigninManager. 53 if (SigninManager::IsWebBasedSigninFlowURL(url)) 54 return true; 55 56 // Additionally whitelist /ServiceLoginAuth. 57 if (url.GetOrigin() != GaiaUrls::GetInstance()->gaia_url().GetOrigin()) 58 return false; 59 return url.path() == kServiceLoginAuth; 60 } 61 62 #endif // !defined(OS_CHROMEOS) 63 64 // A task that builds the blacklist on the FILE thread. 65 scoped_ptr<URLBlacklist> BuildBlacklist(scoped_ptr<base::ListValue> block, 66 scoped_ptr<base::ListValue> allow) { 67 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE)); 68 69 scoped_ptr<URLBlacklist> blacklist(new URLBlacklist); 70 blacklist->Block(block.get()); 71 blacklist->Allow(allow.get()); 72 return blacklist.Pass(); 73 } 74 75 } // namespace 76 77 struct URLBlacklist::FilterComponents { 78 FilterComponents() : port(0), match_subdomains(true), allow(true) {} 79 ~FilterComponents() {} 80 81 std::string scheme; 82 std::string host; 83 uint16 port; 84 std::string path; 85 bool match_subdomains; 86 bool allow; 87 }; 88 89 URLBlacklist::URLBlacklist() : id_(0), 90 url_matcher_(new URLMatcher) { 91 } 92 93 URLBlacklist::~URLBlacklist() { 94 } 95 96 void URLBlacklist::AddFilters(bool allow, 97 const base::ListValue* list) { 98 URLMatcherConditionSet::Vector all_conditions; 99 size_t size = std::min(kMaxFiltersPerPolicy, list->GetSize()); 100 for (size_t i = 0; i < size; ++i) { 101 std::string pattern; 102 bool success = list->GetString(i, &pattern); 103 DCHECK(success); 104 FilterComponents components; 105 components.allow = allow; 106 if (!FilterToComponents(pattern, &components.scheme, &components.host, 107 &components.match_subdomains, &components.port, 108 &components.path)) { 109 LOG(ERROR) << "Invalid pattern " << pattern; 110 continue; 111 } 112 113 all_conditions.push_back( 114 CreateConditionSet(url_matcher_.get(), ++id_, components.scheme, 115 components.host, components.match_subdomains, 116 components.port, components.path)); 117 filters_[id_] = components; 118 } 119 url_matcher_->AddConditionSets(all_conditions); 120 } 121 122 void URLBlacklist::Block(const base::ListValue* filters) { 123 AddFilters(false, filters); 124 } 125 126 void URLBlacklist::Allow(const base::ListValue* filters) { 127 AddFilters(true, filters); 128 } 129 130 bool URLBlacklist::IsURLBlocked(const GURL& url) const { 131 std::set<URLMatcherConditionSet::ID> matching_ids = 132 url_matcher_->MatchURL(url); 133 134 const FilterComponents* max = NULL; 135 for (std::set<URLMatcherConditionSet::ID>::iterator id = matching_ids.begin(); 136 id != matching_ids.end(); ++id) { 137 std::map<int, FilterComponents>::const_iterator it = filters_.find(*id); 138 DCHECK(it != filters_.end()); 139 const FilterComponents& filter = it->second; 140 if (!max || FilterTakesPrecedence(filter, *max)) 141 max = &filter; 142 } 143 144 // Default to allow. 145 if (!max) 146 return false; 147 148 return !max->allow; 149 } 150 151 size_t URLBlacklist::Size() const { 152 return filters_.size(); 153 } 154 155 // static 156 bool URLBlacklist::FilterToComponents(const std::string& filter, 157 std::string* scheme, 158 std::string* host, 159 bool* match_subdomains, 160 uint16* port, 161 std::string* path) { 162 url_parse::Parsed parsed; 163 164 if (URLFixerUpper::SegmentURL(filter, &parsed) == chrome::kFileScheme) { 165 base::FilePath file_path; 166 if (!net::FileURLToFilePath(GURL(filter), &file_path)) 167 return false; 168 169 *scheme = chrome::kFileScheme; 170 host->clear(); 171 *match_subdomains = true; 172 *port = 0; 173 // Special path when the |filter| is 'file://*'. 174 *path = (filter == "file://*") ? "" : file_path.AsUTF8Unsafe(); 175 #if defined(FILE_PATH_USES_WIN_SEPARATORS) 176 // Separators have to be canonicalized on Windows. 177 std::replace(path->begin(), path->end(), '\\', '/'); 178 *path = "/" + *path; 179 #endif 180 return true; 181 } 182 183 if (!parsed.host.is_nonempty()) 184 return false; 185 186 if (parsed.scheme.is_nonempty()) 187 scheme->assign(filter, parsed.scheme.begin, parsed.scheme.len); 188 else 189 scheme->clear(); 190 191 host->assign(filter, parsed.host.begin, parsed.host.len); 192 // Special '*' host, matches all hosts. 193 if (*host == "*") { 194 host->clear(); 195 *match_subdomains = true; 196 } else if ((*host)[0] == '.') { 197 // A leading dot in the pattern syntax means that we don't want to match 198 // subdomains. 199 host->erase(0, 1); 200 *match_subdomains = false; 201 } else { 202 url_canon::RawCanonOutputT<char> output; 203 url_canon::CanonHostInfo host_info; 204 url_canon::CanonicalizeHostVerbose(filter.c_str(), parsed.host, 205 &output, &host_info); 206 if (host_info.family == url_canon::CanonHostInfo::NEUTRAL) { 207 // We want to match subdomains. Add a dot in front to make sure we only 208 // match at domain component boundaries. 209 *host = "." + *host; 210 *match_subdomains = true; 211 } else { 212 *match_subdomains = false; 213 } 214 } 215 216 if (parsed.port.is_nonempty()) { 217 int int_port; 218 if (!base::StringToInt(filter.substr(parsed.port.begin, parsed.port.len), 219 &int_port)) { 220 return false; 221 } 222 if (int_port <= 0 || int_port > kuint16max) 223 return false; 224 *port = int_port; 225 } else { 226 // Match any port. 227 *port = 0; 228 } 229 230 if (parsed.path.is_nonempty()) 231 path->assign(filter, parsed.path.begin, parsed.path.len); 232 else 233 path->clear(); 234 235 return true; 236 } 237 238 // static 239 scoped_refptr<extensions::URLMatcherConditionSet> 240 URLBlacklist::CreateConditionSet( 241 extensions::URLMatcher* url_matcher, 242 int id, 243 const std::string& scheme, 244 const std::string& host, 245 bool match_subdomains, 246 uint16 port, 247 const std::string& path) { 248 URLMatcherConditionFactory* condition_factory = 249 url_matcher->condition_factory(); 250 std::set<URLMatcherCondition> conditions; 251 conditions.insert(match_subdomains ? 252 condition_factory->CreateHostSuffixPathPrefixCondition(host, path) : 253 condition_factory->CreateHostEqualsPathPrefixCondition(host, path)); 254 255 scoped_ptr<URLMatcherSchemeFilter> scheme_filter; 256 if (!scheme.empty()) 257 scheme_filter.reset(new URLMatcherSchemeFilter(scheme)); 258 259 scoped_ptr<URLMatcherPortFilter> port_filter; 260 if (port != 0) { 261 std::vector<URLMatcherPortFilter::Range> ranges; 262 ranges.push_back(URLMatcherPortFilter::CreateRange(port)); 263 port_filter.reset(new URLMatcherPortFilter(ranges)); 264 } 265 266 return new URLMatcherConditionSet(id, conditions, 267 scheme_filter.Pass(), port_filter.Pass()); 268 } 269 270 // static 271 bool URLBlacklist::FilterTakesPrecedence(const FilterComponents& lhs, 272 const FilterComponents& rhs) { 273 if (lhs.match_subdomains && !rhs.match_subdomains) 274 return false; 275 if (!lhs.match_subdomains && rhs.match_subdomains) 276 return true; 277 278 size_t host_length = lhs.host.length(); 279 size_t other_host_length = rhs.host.length(); 280 if (host_length != other_host_length) 281 return host_length > other_host_length; 282 283 size_t path_length = lhs.path.length(); 284 size_t other_path_length = rhs.path.length(); 285 if (path_length != other_path_length) 286 return path_length > other_path_length; 287 288 if (lhs.allow && !rhs.allow) 289 return true; 290 291 return false; 292 } 293 294 URLBlacklistManager::URLBlacklistManager(PrefService* pref_service) 295 : ui_weak_ptr_factory_(this), 296 pref_service_(pref_service), 297 io_weak_ptr_factory_(this), 298 blacklist_(new URLBlacklist) { 299 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 300 301 pref_change_registrar_.Init(pref_service_); 302 base::Closure callback = base::Bind(&URLBlacklistManager::ScheduleUpdate, 303 base::Unretained(this)); 304 pref_change_registrar_.Add(prefs::kUrlBlacklist, callback); 305 pref_change_registrar_.Add(prefs::kUrlWhitelist, callback); 306 307 // Start enforcing the policies without a delay when they are present at 308 // startup. 309 if (pref_service_->HasPrefPath(prefs::kUrlBlacklist)) 310 Update(); 311 } 312 313 void URLBlacklistManager::ShutdownOnUIThread() { 314 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 315 // Cancel any pending updates, and stop listening for pref change updates. 316 ui_weak_ptr_factory_.InvalidateWeakPtrs(); 317 pref_change_registrar_.RemoveAll(); 318 } 319 320 URLBlacklistManager::~URLBlacklistManager() { 321 } 322 323 void URLBlacklistManager::ScheduleUpdate() { 324 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 325 // Cancel pending updates, if any. This can happen if two preferences that 326 // change the blacklist are updated in one message loop cycle. In those cases, 327 // only rebuild the blacklist after all the preference updates are processed. 328 ui_weak_ptr_factory_.InvalidateWeakPtrs(); 329 base::MessageLoop::current()->PostTask( 330 FROM_HERE, 331 base::Bind(&URLBlacklistManager::Update, 332 ui_weak_ptr_factory_.GetWeakPtr())); 333 } 334 335 void URLBlacklistManager::Update() { 336 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 337 338 // The preferences can only be read on the UI thread. 339 scoped_ptr<base::ListValue> block( 340 pref_service_->GetList(prefs::kUrlBlacklist)->DeepCopy()); 341 scoped_ptr<base::ListValue> allow( 342 pref_service_->GetList(prefs::kUrlWhitelist)->DeepCopy()); 343 344 // Go through the IO thread to grab a WeakPtr to |this|. This is safe from 345 // here, since this task will always execute before a potential deletion of 346 // ProfileIOData on IO. 347 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, 348 base::Bind(&URLBlacklistManager::UpdateOnIO, 349 base::Unretained(this), 350 base::Passed(&block), 351 base::Passed(&allow))); 352 } 353 354 void URLBlacklistManager::UpdateOnIO(scoped_ptr<base::ListValue> block, 355 scoped_ptr<base::ListValue> allow) { 356 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 357 // The URLBlacklist is built on the FILE thread. Once it's ready, it is passed 358 // to the URLBlacklistManager on IO. 359 BrowserThread::PostTaskAndReplyWithResult( 360 BrowserThread::FILE, FROM_HERE, 361 base::Bind(&BuildBlacklist, 362 base::Passed(&block), 363 base::Passed(&allow)), 364 base::Bind(&URLBlacklistManager::SetBlacklist, 365 io_weak_ptr_factory_.GetWeakPtr())); 366 } 367 368 void URLBlacklistManager::SetBlacklist(scoped_ptr<URLBlacklist> blacklist) { 369 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 370 blacklist_ = blacklist.Pass(); 371 } 372 373 bool URLBlacklistManager::IsURLBlocked(const GURL& url) const { 374 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 375 return blacklist_->IsURLBlocked(url); 376 } 377 378 bool URLBlacklistManager::IsRequestBlocked( 379 const net::URLRequest& request) const { 380 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 381 int filter_flags = net::LOAD_MAIN_FRAME | net::LOAD_SUB_FRAME; 382 if ((request.load_flags() & filter_flags) == 0) 383 return false; 384 385 #if !defined(OS_CHROMEOS) 386 if (IsSigninFlowURL(request.url())) 387 return false; 388 #endif 389 390 return IsURLBlocked(request.url()); 391 } 392 393 // static 394 void URLBlacklistManager::RegisterProfilePrefs( 395 user_prefs::PrefRegistrySyncable* registry) { 396 registry->RegisterListPref(prefs::kUrlBlacklist, 397 user_prefs::PrefRegistrySyncable::UNSYNCABLE_PREF); 398 registry->RegisterListPref(prefs::kUrlWhitelist, 399 user_prefs::PrefRegistrySyncable::UNSYNCABLE_PREF); 400 } 401 402 } // namespace policy 403