1 // Copyright 2013 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "chrome/browser/extensions/activity_log/uma_policy.h" 6 7 #include "base/metrics/histogram.h" 8 #include "base/strings/stringprintf.h" 9 #include "chrome/browser/browser_process.h" 10 #include "chrome/browser/extensions/active_script_controller.h" 11 #include "chrome/browser/extensions/activity_log/activity_action_constants.h" 12 #include "chrome/browser/extensions/activity_log/ad_network_database.h" 13 #include "chrome/browser/sessions/session_id.h" 14 #include "chrome/browser/ui/browser.h" 15 #include "chrome/browser/ui/browser_list.h" 16 #include "chrome/browser/ui/tabs/tab_strip_model.h" 17 #include "chrome/common/url_constants.h" 18 #include "content/public/browser/web_contents.h" 19 #include "content/public/common/url_constants.h" 20 #include "extensions/browser/extension_registry.h" 21 #include "extensions/common/dom_action_types.h" 22 #include "extensions/common/extension.h" 23 #include "extensions/common/manifest.h" 24 25 namespace extensions { 26 27 namespace { 28 29 // For convenience. 30 const int kNoStatus = UmaPolicy::NONE; 31 const int kContentScript = 1 << UmaPolicy::CONTENT_SCRIPT; 32 const int kReadDom = 1 << UmaPolicy::READ_DOM; 33 const int kModifiedDom = 1 << UmaPolicy::MODIFIED_DOM; 34 const int kDomMethod = 1 << UmaPolicy::DOM_METHOD; 35 const int kDocumentWrite = 1 << UmaPolicy::DOCUMENT_WRITE; 36 const int kInnerHtml = 1 << UmaPolicy::INNER_HTML; 37 const int kCreatedScript = 1 << UmaPolicy::CREATED_SCRIPT; 38 const int kCreatedIframe = 1 << UmaPolicy::CREATED_IFRAME; 39 const int kCreatedDiv = 1 << UmaPolicy::CREATED_DIV; 40 const int kCreatedLink = 1 << UmaPolicy::CREATED_LINK; 41 const int kCreatedInput = 1 << UmaPolicy::CREATED_INPUT; 42 const int kCreatedEmbed = 1 << UmaPolicy::CREATED_EMBED; 43 const int kCreatedObject = 1 << UmaPolicy::CREATED_OBJECT; 44 const int kAdInjected = 1 << UmaPolicy::AD_INJECTED; 45 const int kAdRemoved = 1 << UmaPolicy::AD_REMOVED; 46 const int kAdReplaced = 1 << UmaPolicy::AD_REPLACED; 47 const int kAdLikelyInjected = 1 << UmaPolicy::AD_LIKELY_INJECTED; 48 const int kAdLikelyReplaced = 1 << UmaPolicy::AD_LIKELY_REPLACED; 49 50 // A mask of all the ad injection flags. 51 const int kAnyAdActivity = kAdInjected | 52 kAdRemoved | 53 kAdReplaced | 54 kAdLikelyInjected | 55 kAdLikelyReplaced; 56 57 } // namespace 58 59 // Class constants, also used in testing. -------------------------------------- 60 61 const char UmaPolicy::kNumberOfTabs[] = "num_tabs"; 62 const size_t UmaPolicy::kMaxTabsTracked = 50; 63 64 // Setup and shutdown. --------------------------------------------------------- 65 66 UmaPolicy::UmaPolicy(Profile* profile) 67 : ActivityLogPolicy(profile), profile_(profile) { 68 DCHECK(!profile->IsOffTheRecord()); 69 BrowserList::AddObserver(this); 70 } 71 72 UmaPolicy::~UmaPolicy() { 73 BrowserList::RemoveObserver(this); 74 } 75 76 // Unlike the other policies, UmaPolicy can commit suicide directly because it 77 // doesn't have a dependency on a database. 78 void UmaPolicy::Close() { 79 delete this; 80 } 81 82 // Process actions. ------------------------------------------------------------ 83 84 void UmaPolicy::ProcessAction(scoped_refptr<Action> action) { 85 if (!action->page_url().is_valid() && !action->arg_url().is_valid()) 86 return; 87 if (action->page_incognito() || action->arg_incognito()) 88 return; 89 std::string url; 90 int status = MatchActionToStatus(action); 91 if (action->page_url().is_valid()) { 92 url = CleanURL(action->page_url()); 93 } else if (status & kContentScript) { 94 // This is for the tabs.executeScript case. 95 url = CleanURL(action->arg_url()); 96 } 97 if (url.empty()) 98 return; 99 100 SiteMap::iterator site_lookup = url_status_.find(url); 101 if (site_lookup != url_status_.end()) 102 site_lookup->second[action->extension_id()] |= status; 103 } 104 105 int UmaPolicy::MatchActionToStatus(scoped_refptr<Action> action) { 106 if (action->action_type() == Action::ACTION_CONTENT_SCRIPT) { 107 return kContentScript; 108 } else if (action->action_type() == Action::ACTION_API_CALL && 109 action->api_name() == "tabs.executeScript") { 110 return kContentScript; 111 } else if (action->action_type() != Action::ACTION_DOM_ACCESS) { 112 return kNoStatus; 113 } 114 115 int dom_verb; 116 if (!action->other() || 117 !action->other()->GetIntegerWithoutPathExpansion( 118 activity_log_constants::kActionDomVerb, &dom_verb)) { 119 return kNoStatus; 120 } 121 122 int ret_bit = kNoStatus; 123 DomActionType::Type dom_type = static_cast<DomActionType::Type>(dom_verb); 124 if (dom_type == DomActionType::GETTER) 125 return kReadDom; 126 if (dom_type == DomActionType::SETTER) { 127 ret_bit |= kModifiedDom; 128 } else if (dom_type == DomActionType::METHOD) { 129 ret_bit |= kDomMethod; 130 } else { 131 return kNoStatus; 132 } 133 134 if (action->api_name() == "HTMLDocument.write" || 135 action->api_name() == "HTMLDocument.writeln") { 136 ret_bit |= kDocumentWrite; 137 } else if (action->api_name() == "Element.innerHTML") { 138 ret_bit |= kInnerHtml; 139 } else if (action->api_name() == "Document.createElement") { 140 std::string arg; 141 action->args()->GetString(0, &arg); 142 if (arg == "script") { 143 ret_bit |= kCreatedScript; 144 } else if (arg == "iframe") { 145 ret_bit |= kCreatedIframe; 146 } else if (arg == "div") { 147 ret_bit |= kCreatedDiv; 148 } else if (arg == "a") { 149 ret_bit |= kCreatedLink; 150 } else if (arg == "input") { 151 ret_bit |= kCreatedInput; 152 } else if (arg == "embed") { 153 ret_bit |= kCreatedEmbed; 154 } else if (arg == "object") { 155 ret_bit |= kCreatedObject; 156 } 157 } 158 159 const Action::InjectionType ad_injection = 160 action->DidInjectAd(g_browser_process->rappor_service()); 161 switch (ad_injection) { 162 case Action::INJECTION_NEW_AD: 163 ret_bit |= kAdInjected; 164 break; 165 case Action::INJECTION_REMOVED_AD: 166 ret_bit |= kAdRemoved; 167 break; 168 case Action::INJECTION_REPLACED_AD: 169 ret_bit |= kAdReplaced; 170 break; 171 case Action::INJECTION_LIKELY_NEW_AD: 172 ret_bit |= kAdLikelyInjected; 173 break; 174 case Action::INJECTION_LIKELY_REPLACED_AD: 175 ret_bit |= kAdLikelyReplaced; 176 break; 177 case Action::NO_AD_INJECTION: 178 break; 179 case Action::NUM_INJECTION_TYPES: 180 NOTREACHED(); 181 } 182 183 return ret_bit; 184 } 185 186 void UmaPolicy::HistogramOnClose(const std::string& cleaned_url, 187 content::WebContents* web_contents) { 188 // Let's try to avoid histogramming useless URLs. 189 if (cleaned_url.empty() || cleaned_url == url::kAboutBlankURL || 190 cleaned_url == chrome::kChromeUINewTabURL) 191 return; 192 193 int statuses[MAX_STATUS - 1]; 194 std::memset(statuses, 0, sizeof(statuses)); 195 196 ActiveScriptController* active_script_controller = 197 ActiveScriptController::GetForWebContents(web_contents); 198 SiteMap::iterator site_lookup = url_status_.find(cleaned_url); 199 const ExtensionMap& exts = site_lookup->second; 200 std::set<std::string> ad_injectors; 201 for (ExtensionMap::const_iterator ext_iter = exts.begin(); 202 ext_iter != exts.end(); 203 ++ext_iter) { 204 if (ext_iter->first == kNumberOfTabs) 205 continue; 206 for (int i = NONE + 1; i < MAX_STATUS; ++i) { 207 if (ext_iter->second & (1 << i)) 208 statuses[i-1]++; 209 } 210 211 if (ext_iter->second & kAnyAdActivity) 212 ad_injectors.insert(ext_iter->first); 213 } 214 if (active_script_controller) 215 active_script_controller->OnAdInjectionDetected(ad_injectors); 216 217 ExtensionRegistry* registry = ExtensionRegistry::Get(profile_); 218 for (std::set<std::string>::const_iterator iter = ad_injectors.begin(); 219 iter != ad_injectors.end(); 220 ++iter) { 221 const Extension* extension = 222 registry->GetExtensionById(*iter, ExtensionRegistry::EVERYTHING); 223 if (extension) { 224 UMA_HISTOGRAM_ENUMERATION("Extensions.AdInjection.InstallLocation", 225 extension->location(), 226 Manifest::NUM_LOCATIONS); 227 } 228 } 229 230 std::string prefix = "ExtensionActivity."; 231 if (GURL(cleaned_url).host() != "www.google.com") { 232 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CONTENT_SCRIPT), 233 statuses[CONTENT_SCRIPT - 1]); 234 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(READ_DOM), 235 statuses[READ_DOM - 1]); 236 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(MODIFIED_DOM), 237 statuses[MODIFIED_DOM - 1]); 238 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(DOM_METHOD), 239 statuses[DOM_METHOD - 1]); 240 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(DOCUMENT_WRITE), 241 statuses[DOCUMENT_WRITE - 1]); 242 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(INNER_HTML), 243 statuses[INNER_HTML - 1]); 244 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_SCRIPT), 245 statuses[CREATED_SCRIPT - 1]); 246 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_IFRAME), 247 statuses[CREATED_IFRAME - 1]); 248 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_DIV), 249 statuses[CREATED_DIV - 1]); 250 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_LINK), 251 statuses[CREATED_LINK - 1]); 252 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_INPUT), 253 statuses[CREATED_INPUT - 1]); 254 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_EMBED), 255 statuses[CREATED_EMBED - 1]); 256 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_OBJECT), 257 statuses[CREATED_OBJECT - 1]); 258 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(AD_INJECTED), 259 statuses[AD_INJECTED - 1]); 260 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(AD_REMOVED), 261 statuses[AD_REMOVED - 1]); 262 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(AD_REPLACED), 263 statuses[AD_REPLACED - 1]); 264 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(AD_LIKELY_INJECTED), 265 statuses[AD_LIKELY_INJECTED - 1]); 266 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(AD_LIKELY_REPLACED), 267 statuses[AD_LIKELY_REPLACED - 1]); 268 } else { 269 prefix += "Google."; 270 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CONTENT_SCRIPT), 271 statuses[CONTENT_SCRIPT - 1]); 272 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(READ_DOM), 273 statuses[READ_DOM - 1]); 274 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(MODIFIED_DOM), 275 statuses[MODIFIED_DOM - 1]); 276 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(DOM_METHOD), 277 statuses[DOM_METHOD - 1]); 278 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(DOCUMENT_WRITE), 279 statuses[DOCUMENT_WRITE - 1]); 280 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(INNER_HTML), 281 statuses[INNER_HTML - 1]); 282 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_SCRIPT), 283 statuses[CREATED_SCRIPT - 1]); 284 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_IFRAME), 285 statuses[CREATED_IFRAME - 1]); 286 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_DIV), 287 statuses[CREATED_DIV - 1]); 288 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_LINK), 289 statuses[CREATED_LINK - 1]); 290 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_INPUT), 291 statuses[CREATED_INPUT - 1]); 292 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_EMBED), 293 statuses[CREATED_EMBED - 1]); 294 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_OBJECT), 295 statuses[CREATED_OBJECT - 1]); 296 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(AD_INJECTED), 297 statuses[AD_INJECTED - 1]); 298 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(AD_REMOVED), 299 statuses[AD_REMOVED - 1]); 300 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(AD_REPLACED), 301 statuses[AD_REPLACED - 1]); 302 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(AD_LIKELY_INJECTED), 303 statuses[AD_LIKELY_INJECTED - 1]); 304 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(AD_LIKELY_REPLACED), 305 statuses[AD_LIKELY_REPLACED - 1]); 306 } 307 } 308 309 // Handle tab tracking. -------------------------------------------------------- 310 311 void UmaPolicy::OnBrowserAdded(Browser* browser) { 312 if (!profile_->IsSameProfile(browser->profile())) 313 return; 314 browser->tab_strip_model()->AddObserver(this); 315 } 316 317 void UmaPolicy::OnBrowserRemoved(Browser* browser) { 318 if (!profile_->IsSameProfile(browser->profile())) 319 return; 320 browser->tab_strip_model()->RemoveObserver(this); 321 } 322 323 // Use the value from SessionID::IdForTab, *not* |index|. |index| will be 324 // duplicated across tabs in a session, whereas IdForTab uniquely identifies 325 // each tab. 326 void UmaPolicy::TabChangedAt(content::WebContents* contents, 327 int index, 328 TabChangeType change_type) { 329 if (change_type != TabStripModelObserver::LOADING_ONLY) 330 return; 331 if (!contents) 332 return; 333 334 std::string url = CleanURL(contents->GetLastCommittedURL()); 335 int32 tab_id = SessionID::IdForTab(contents); 336 337 std::map<int32, std::string>::iterator tab_it = tab_list_.find(tab_id); 338 339 // Ignore tabs that haven't changed status. 340 if (tab_it != tab_list_.end() && tab_it->second == url) 341 return; 342 343 // Is this an existing tab whose URL has changed. 344 if (tab_it != tab_list_.end()) { 345 CleanupClosedPage(tab_it->second, contents); 346 tab_list_.erase(tab_id); 347 } 348 349 // Check that tab_list_ isn't over the kMaxTabsTracked budget. 350 if (tab_list_.size() >= kMaxTabsTracked) 351 return; 352 353 // Set up the new entries. 354 tab_list_[tab_id] = url; 355 SetupOpenedPage(url); 356 } 357 358 // Use the value from SessionID::IdForTab, *not* |index|. |index| will be 359 // duplicated across tabs in a session, whereas IdForTab uniquely identifies 360 // each tab. 361 void UmaPolicy::TabClosingAt(TabStripModel* tab_strip_model, 362 content::WebContents* contents, 363 int index) { 364 if (!contents) 365 return; 366 std::string url = CleanURL(contents->GetLastCommittedURL()); 367 int32 tab_id = SessionID::IdForTab(contents); 368 std::map<int, std::string>::iterator tab_it = tab_list_.find(tab_id); 369 if (tab_it != tab_list_.end()) 370 tab_list_.erase(tab_id); 371 372 CleanupClosedPage(url, contents); 373 } 374 375 void UmaPolicy::SetupOpenedPage(const std::string& url) { 376 url_status_[url][kNumberOfTabs]++; 377 } 378 379 void UmaPolicy::CleanupClosedPage(const std::string& cleaned_url, 380 content::WebContents* web_contents) { 381 SiteMap::iterator old_site_lookup = url_status_.find(cleaned_url); 382 if (old_site_lookup == url_status_.end()) 383 return; 384 old_site_lookup->second[kNumberOfTabs]--; 385 if (old_site_lookup->second[kNumberOfTabs] == 0) { 386 HistogramOnClose(cleaned_url, web_contents); 387 url_status_.erase(cleaned_url); 388 } 389 } 390 391 // Helpers. -------------------------------------------------------------------- 392 393 // We don't want to treat # ref navigations as if they were new pageloads. 394 // So we get rid of the ref if it has it. 395 // We convert to a string in the hopes that this is faster than Replacements. 396 std::string UmaPolicy::CleanURL(const GURL& gurl) { 397 if (gurl.spec().empty()) 398 return GURL(url::kAboutBlankURL).spec(); 399 if (!gurl.is_valid()) 400 return gurl.spec(); 401 if (!gurl.has_ref()) 402 return gurl.spec(); 403 std::string port = ""; 404 if (gurl.has_port()) 405 port = ":" + gurl.port(); 406 std::string query = ""; 407 if (gurl.has_query()) 408 query = "?" + gurl.query(); 409 return base::StringPrintf("%s://%s%s%s%s", 410 gurl.scheme().c_str(), 411 gurl.host().c_str(), 412 port.c_str(), 413 gurl.path().c_str(), 414 query.c_str()); 415 } 416 417 const char* UmaPolicy::GetHistogramName(PageStatus status) { 418 switch (status) { 419 case CONTENT_SCRIPT: 420 return "ContentScript"; 421 case READ_DOM: 422 return "ReadDom"; 423 case MODIFIED_DOM: 424 return "ModifiedDom"; 425 case DOM_METHOD: 426 return "InvokedDomMethod"; 427 case DOCUMENT_WRITE: 428 return "DocumentWrite"; 429 case INNER_HTML: 430 return "InnerHtml"; 431 case CREATED_SCRIPT: 432 return "CreatedScript"; 433 case CREATED_IFRAME: 434 return "CreatedIframe"; 435 case CREATED_DIV: 436 return "CreatedDiv"; 437 case CREATED_LINK: 438 return "CreatedLink"; 439 case CREATED_INPUT: 440 return "CreatedInput"; 441 case CREATED_EMBED: 442 return "CreatedEmbed"; 443 case CREATED_OBJECT: 444 return "CreatedObject"; 445 case AD_INJECTED: 446 return "AdInjected"; 447 case AD_REMOVED: 448 return "AdRemoved"; 449 case AD_REPLACED: 450 return "AdReplaced"; 451 case AD_LIKELY_INJECTED: 452 return "AdLikelyInjected"; 453 case AD_LIKELY_REPLACED: 454 return "AdLikelyReplaced"; 455 case NONE: 456 case MAX_STATUS: 457 default: 458 NOTREACHED(); 459 return ""; 460 } 461 } 462 463 } // namespace extensions 464