1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "chrome/browser/policy/url_blacklist_manager.h" 6 7 #include "base/bind.h" 8 #include "base/files/file_path.h" 9 #include "base/message_loop/message_loop.h" 10 #include "base/prefs/pref_service.h" 11 #include "base/stl_util.h" 12 #include "base/strings/string_number_conversions.h" 13 #include "base/values.h" 14 #include "chrome/browser/chrome_notification_types.h" 15 #include "chrome/common/net/url_fixer_upper.h" 16 #include "chrome/common/pref_names.h" 17 #include "components/user_prefs/pref_registry_syncable.h" 18 #include "content/public/browser/browser_thread.h" 19 #include "content/public/browser/notification_details.h" 20 #include "content/public/browser/notification_source.h" 21 #include "content/public/common/url_constants.h" 22 #include "google_apis/gaia/gaia_urls.h" 23 #include "net/base/load_flags.h" 24 #include "net/base/net_util.h" 25 #include "net/url_request/url_request.h" 26 #include "url/gurl.h" 27 28 #if !defined(OS_CHROMEOS) 29 #include "chrome/browser/signin/signin_manager.h" 30 #endif 31 32 using content::BrowserThread; 33 using url_matcher::URLMatcher; 34 using url_matcher::URLMatcherCondition; 35 using url_matcher::URLMatcherConditionFactory; 36 using url_matcher::URLMatcherConditionSet; 37 using url_matcher::URLMatcherPortFilter; 38 using url_matcher::URLMatcherSchemeFilter; 39 40 namespace policy { 41 42 namespace { 43 44 // Maximum filters per policy. Filters over this index are ignored. 45 const size_t kMaxFiltersPerPolicy = 1000; 46 47 #if !defined(OS_CHROMEOS) 48 49 const char kServiceLoginAuth[] = "/ServiceLoginAuth"; 50 51 bool IsSigninFlowURL(const GURL& url) { 52 // Whitelist all the signin flow URLs flagged by the SigninManager. 53 if (SigninManager::IsWebBasedSigninFlowURL(url)) 54 return true; 55 56 // Additionally whitelist /ServiceLoginAuth. 57 if (url.GetOrigin() != GaiaUrls::GetInstance()->gaia_url().GetOrigin()) 58 return false; 59 return url.path() == kServiceLoginAuth; 60 } 61 62 #endif // !defined(OS_CHROMEOS) 63 64 // A task that builds the blacklist on the FILE thread. 65 scoped_ptr<URLBlacklist> BuildBlacklist(scoped_ptr<base::ListValue> block, 66 scoped_ptr<base::ListValue> allow) { 67 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE)); 68 69 scoped_ptr<URLBlacklist> blacklist(new URLBlacklist); 70 blacklist->Block(block.get()); 71 blacklist->Allow(allow.get()); 72 return blacklist.Pass(); 73 } 74 75 } // namespace 76 77 struct URLBlacklist::FilterComponents { 78 FilterComponents() : port(0), match_subdomains(true), allow(true) {} 79 ~FilterComponents() {} 80 81 std::string scheme; 82 std::string host; 83 uint16 port; 84 std::string path; 85 bool match_subdomains; 86 bool allow; 87 }; 88 89 URLBlacklist::URLBlacklist() : id_(0), 90 url_matcher_(new URLMatcher) { 91 } 92 93 URLBlacklist::~URLBlacklist() { 94 } 95 96 void URLBlacklist::AddFilters(bool allow, 97 const base::ListValue* list) { 98 URLMatcherConditionSet::Vector all_conditions; 99 size_t size = std::min(kMaxFiltersPerPolicy, list->GetSize()); 100 for (size_t i = 0; i < size; ++i) { 101 std::string pattern; 102 bool success = list->GetString(i, &pattern); 103 DCHECK(success); 104 FilterComponents components; 105 components.allow = allow; 106 if (!FilterToComponents(pattern, &components.scheme, &components.host, 107 &components.match_subdomains, &components.port, 108 &components.path)) { 109 LOG(ERROR) << "Invalid pattern " << pattern; 110 continue; 111 } 112 113 all_conditions.push_back( 114 CreateConditionSet(url_matcher_.get(), ++id_, components.scheme, 115 components.host, components.match_subdomains, 116 components.port, components.path)); 117 filters_[id_] = components; 118 } 119 url_matcher_->AddConditionSets(all_conditions); 120 } 121 122 void URLBlacklist::Block(const base::ListValue* filters) { 123 AddFilters(false, filters); 124 } 125 126 void URLBlacklist::Allow(const base::ListValue* filters) { 127 AddFilters(true, filters); 128 } 129 130 bool URLBlacklist::IsURLBlocked(const GURL& url) const { 131 std::set<URLMatcherConditionSet::ID> matching_ids = 132 url_matcher_->MatchURL(url); 133 134 const FilterComponents* max = NULL; 135 for (std::set<URLMatcherConditionSet::ID>::iterator id = matching_ids.begin(); 136 id != matching_ids.end(); ++id) { 137 std::map<int, FilterComponents>::const_iterator it = filters_.find(*id); 138 DCHECK(it != filters_.end()); 139 const FilterComponents& filter = it->second; 140 if (!max || FilterTakesPrecedence(filter, *max)) 141 max = &filter; 142 } 143 144 // Default to allow. 145 if (!max) 146 return false; 147 148 return !max->allow; 149 } 150 151 size_t URLBlacklist::Size() const { 152 return filters_.size(); 153 } 154 155 // static 156 bool URLBlacklist::FilterToComponents(const std::string& filter, 157 std::string* scheme, 158 std::string* host, 159 bool* match_subdomains, 160 uint16* port, 161 std::string* path) { 162 url_parse::Parsed parsed; 163 164 if (URLFixerUpper::SegmentURL(filter, &parsed) == chrome::kFileScheme) { 165 base::FilePath file_path; 166 if (!net::FileURLToFilePath(GURL(filter), &file_path)) 167 return false; 168 169 *scheme = chrome::kFileScheme; 170 host->clear(); 171 *match_subdomains = true; 172 *port = 0; 173 // Special path when the |filter| is 'file://*'. 174 *path = (filter == "file://*") ? "" : file_path.AsUTF8Unsafe(); 175 #if defined(FILE_PATH_USES_WIN_SEPARATORS) 176 // Separators have to be canonicalized on Windows. 177 std::replace(path->begin(), path->end(), '\\', '/'); 178 *path = "/" + *path; 179 #endif 180 return true; 181 } 182 183 if (!parsed.host.is_nonempty()) 184 return false; 185 186 if (parsed.scheme.is_nonempty()) 187 scheme->assign(filter, parsed.scheme.begin, parsed.scheme.len); 188 else 189 scheme->clear(); 190 191 host->assign(filter, parsed.host.begin, parsed.host.len); 192 // Special '*' host, matches all hosts. 193 if (*host == "*") { 194 host->clear(); 195 *match_subdomains = true; 196 } else if ((*host)[0] == '.') { 197 // A leading dot in the pattern syntax means that we don't want to match 198 // subdomains. 199 host->erase(0, 1); 200 *match_subdomains = false; 201 } else { 202 url_canon::RawCanonOutputT<char> output; 203 url_canon::CanonHostInfo host_info; 204 url_canon::CanonicalizeHostVerbose(filter.c_str(), parsed.host, 205 &output, &host_info); 206 if (host_info.family == url_canon::CanonHostInfo::NEUTRAL) { 207 // We want to match subdomains. Add a dot in front to make sure we only 208 // match at domain component boundaries. 209 *host = "." + *host; 210 *match_subdomains = true; 211 } else { 212 *match_subdomains = false; 213 } 214 } 215 216 if (parsed.port.is_nonempty()) { 217 int int_port; 218 if (!base::StringToInt(filter.substr(parsed.port.begin, parsed.port.len), 219 &int_port)) { 220 return false; 221 } 222 if (int_port <= 0 || int_port > kuint16max) 223 return false; 224 *port = int_port; 225 } else { 226 // Match any port. 227 *port = 0; 228 } 229 230 if (parsed.path.is_nonempty()) 231 path->assign(filter, parsed.path.begin, parsed.path.len); 232 else 233 path->clear(); 234 235 return true; 236 } 237 238 // static 239 scoped_refptr<URLMatcherConditionSet> URLBlacklist::CreateConditionSet( 240 URLMatcher* url_matcher, 241 int id, 242 const std::string& scheme, 243 const std::string& host, 244 bool match_subdomains, 245 uint16 port, 246 const std::string& path) { 247 URLMatcherConditionFactory* condition_factory = 248 url_matcher->condition_factory(); 249 std::set<URLMatcherCondition> conditions; 250 conditions.insert(match_subdomains ? 251 condition_factory->CreateHostSuffixPathPrefixCondition(host, path) : 252 condition_factory->CreateHostEqualsPathPrefixCondition(host, path)); 253 254 scoped_ptr<URLMatcherSchemeFilter> scheme_filter; 255 if (!scheme.empty()) 256 scheme_filter.reset(new URLMatcherSchemeFilter(scheme)); 257 258 scoped_ptr<URLMatcherPortFilter> port_filter; 259 if (port != 0) { 260 std::vector<URLMatcherPortFilter::Range> ranges; 261 ranges.push_back(URLMatcherPortFilter::CreateRange(port)); 262 port_filter.reset(new URLMatcherPortFilter(ranges)); 263 } 264 265 return new URLMatcherConditionSet(id, conditions, 266 scheme_filter.Pass(), port_filter.Pass()); 267 } 268 269 // static 270 bool URLBlacklist::FilterTakesPrecedence(const FilterComponents& lhs, 271 const FilterComponents& rhs) { 272 if (lhs.match_subdomains && !rhs.match_subdomains) 273 return false; 274 if (!lhs.match_subdomains && rhs.match_subdomains) 275 return true; 276 277 size_t host_length = lhs.host.length(); 278 size_t other_host_length = rhs.host.length(); 279 if (host_length != other_host_length) 280 return host_length > other_host_length; 281 282 size_t path_length = lhs.path.length(); 283 size_t other_path_length = rhs.path.length(); 284 if (path_length != other_path_length) 285 return path_length > other_path_length; 286 287 if (lhs.allow && !rhs.allow) 288 return true; 289 290 return false; 291 } 292 293 URLBlacklistManager::URLBlacklistManager(PrefService* pref_service) 294 : ui_weak_ptr_factory_(this), 295 pref_service_(pref_service), 296 io_weak_ptr_factory_(this), 297 blacklist_(new URLBlacklist) { 298 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 299 300 pref_change_registrar_.Init(pref_service_); 301 base::Closure callback = base::Bind(&URLBlacklistManager::ScheduleUpdate, 302 base::Unretained(this)); 303 pref_change_registrar_.Add(prefs::kUrlBlacklist, callback); 304 pref_change_registrar_.Add(prefs::kUrlWhitelist, callback); 305 306 // Start enforcing the policies without a delay when they are present at 307 // startup. 308 if (pref_service_->HasPrefPath(prefs::kUrlBlacklist)) 309 Update(); 310 } 311 312 void URLBlacklistManager::ShutdownOnUIThread() { 313 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 314 // Cancel any pending updates, and stop listening for pref change updates. 315 ui_weak_ptr_factory_.InvalidateWeakPtrs(); 316 pref_change_registrar_.RemoveAll(); 317 } 318 319 URLBlacklistManager::~URLBlacklistManager() { 320 } 321 322 void URLBlacklistManager::ScheduleUpdate() { 323 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 324 // Cancel pending updates, if any. This can happen if two preferences that 325 // change the blacklist are updated in one message loop cycle. In those cases, 326 // only rebuild the blacklist after all the preference updates are processed. 327 ui_weak_ptr_factory_.InvalidateWeakPtrs(); 328 base::MessageLoop::current()->PostTask( 329 FROM_HERE, 330 base::Bind(&URLBlacklistManager::Update, 331 ui_weak_ptr_factory_.GetWeakPtr())); 332 } 333 334 void URLBlacklistManager::Update() { 335 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 336 337 // The preferences can only be read on the UI thread. 338 scoped_ptr<base::ListValue> block( 339 pref_service_->GetList(prefs::kUrlBlacklist)->DeepCopy()); 340 scoped_ptr<base::ListValue> allow( 341 pref_service_->GetList(prefs::kUrlWhitelist)->DeepCopy()); 342 343 // Go through the IO thread to grab a WeakPtr to |this|. This is safe from 344 // here, since this task will always execute before a potential deletion of 345 // ProfileIOData on IO. 346 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, 347 base::Bind(&URLBlacklistManager::UpdateOnIO, 348 base::Unretained(this), 349 base::Passed(&block), 350 base::Passed(&allow))); 351 } 352 353 void URLBlacklistManager::UpdateOnIO(scoped_ptr<base::ListValue> block, 354 scoped_ptr<base::ListValue> allow) { 355 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 356 // The URLBlacklist is built on the FILE thread. Once it's ready, it is passed 357 // to the URLBlacklistManager on IO. 358 BrowserThread::PostTaskAndReplyWithResult( 359 BrowserThread::FILE, FROM_HERE, 360 base::Bind(&BuildBlacklist, 361 base::Passed(&block), 362 base::Passed(&allow)), 363 base::Bind(&URLBlacklistManager::SetBlacklist, 364 io_weak_ptr_factory_.GetWeakPtr())); 365 } 366 367 void URLBlacklistManager::SetBlacklist(scoped_ptr<URLBlacklist> blacklist) { 368 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 369 blacklist_ = blacklist.Pass(); 370 } 371 372 bool URLBlacklistManager::IsURLBlocked(const GURL& url) const { 373 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 374 return blacklist_->IsURLBlocked(url); 375 } 376 377 bool URLBlacklistManager::IsRequestBlocked( 378 const net::URLRequest& request) const { 379 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 380 int filter_flags = net::LOAD_MAIN_FRAME | net::LOAD_SUB_FRAME; 381 if ((request.load_flags() & filter_flags) == 0) 382 return false; 383 384 #if !defined(OS_CHROMEOS) 385 if (IsSigninFlowURL(request.url())) 386 return false; 387 #endif 388 389 return IsURLBlocked(request.url()); 390 } 391 392 // static 393 void URLBlacklistManager::RegisterProfilePrefs( 394 user_prefs::PrefRegistrySyncable* registry) { 395 registry->RegisterListPref(prefs::kUrlBlacklist, 396 user_prefs::PrefRegistrySyncable::UNSYNCABLE_PREF); 397 registry->RegisterListPref(prefs::kUrlWhitelist, 398 user_prefs::PrefRegistrySyncable::UNSYNCABLE_PREF); 399 } 400 401 } // namespace policy 402