1 // Copyright 2013 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "chrome/browser/extensions/activity_log/uma_policy.h" 6 7 #include "base/metrics/histogram.h" 8 #include "base/strings/stringprintf.h" 9 #include "chrome/browser/browser_process.h" 10 #include "chrome/browser/extensions/active_script_controller.h" 11 #include "chrome/browser/extensions/activity_log/activity_action_constants.h" 12 #include "chrome/browser/extensions/activity_log/ad_network_database.h" 13 #include "chrome/browser/sessions/session_tab_helper.h" 14 #include "chrome/browser/ui/browser.h" 15 #include "chrome/browser/ui/browser_list.h" 16 #include "chrome/browser/ui/tabs/tab_strip_model.h" 17 #include "chrome/common/url_constants.h" 18 #include "content/public/browser/web_contents.h" 19 #include "content/public/common/url_constants.h" 20 #include "extensions/browser/extension_registry.h" 21 #include "extensions/common/dom_action_types.h" 22 #include "extensions/common/extension.h" 23 #include "extensions/common/manifest.h" 24 25 namespace extensions { 26 27 namespace { 28 29 // For convenience. 30 const int kNoStatus = UmaPolicy::NONE; 31 const int kContentScript = 1 << UmaPolicy::CONTENT_SCRIPT; 32 const int kReadDom = 1 << UmaPolicy::READ_DOM; 33 const int kModifiedDom = 1 << UmaPolicy::MODIFIED_DOM; 34 const int kDomMethod = 1 << UmaPolicy::DOM_METHOD; 35 const int kDocumentWrite = 1 << UmaPolicy::DOCUMENT_WRITE; 36 const int kInnerHtml = 1 << UmaPolicy::INNER_HTML; 37 const int kCreatedScript = 1 << UmaPolicy::CREATED_SCRIPT; 38 const int kCreatedIframe = 1 << UmaPolicy::CREATED_IFRAME; 39 const int kCreatedDiv = 1 << UmaPolicy::CREATED_DIV; 40 const int kCreatedLink = 1 << UmaPolicy::CREATED_LINK; 41 const int kCreatedInput = 1 << UmaPolicy::CREATED_INPUT; 42 const int kCreatedEmbed = 1 << UmaPolicy::CREATED_EMBED; 43 const int kCreatedObject = 1 << UmaPolicy::CREATED_OBJECT; 44 const int kAdInjected = 1 << UmaPolicy::AD_INJECTED; 45 const int kAdRemoved = 1 << UmaPolicy::AD_REMOVED; 46 const int kAdReplaced = 1 << UmaPolicy::AD_REPLACED; 47 const int kAdLikelyInjected = 1 << UmaPolicy::AD_LIKELY_INJECTED; 48 const int kAdLikelyReplaced = 1 << UmaPolicy::AD_LIKELY_REPLACED; 49 50 // A mask of all the ad injection flags. 51 const int kAnyAdActivity = kAdInjected | 52 kAdRemoved | 53 kAdReplaced | 54 kAdLikelyInjected | 55 kAdLikelyReplaced; 56 57 } // namespace 58 59 // Class constants, also used in testing. -------------------------------------- 60 61 const char UmaPolicy::kNumberOfTabs[] = "num_tabs"; 62 const size_t UmaPolicy::kMaxTabsTracked = 50; 63 64 // Setup and shutdown. --------------------------------------------------------- 65 66 UmaPolicy::UmaPolicy(Profile* profile) 67 : ActivityLogPolicy(profile), profile_(profile) { 68 DCHECK(!profile->IsOffTheRecord()); 69 BrowserList::AddObserver(this); 70 } 71 72 UmaPolicy::~UmaPolicy() { 73 BrowserList::RemoveObserver(this); 74 } 75 76 // Unlike the other policies, UmaPolicy can commit suicide directly because it 77 // doesn't have a dependency on a database. 78 void UmaPolicy::Close() { 79 delete this; 80 } 81 82 // Process actions. ------------------------------------------------------------ 83 84 void UmaPolicy::ProcessAction(scoped_refptr<Action> action) { 85 if (!action->page_url().is_valid() && !action->arg_url().is_valid()) 86 return; 87 if (action->page_incognito() || action->arg_incognito()) 88 return; 89 std::string url; 90 int status = MatchActionToStatus(action); 91 if (action->page_url().is_valid()) { 92 url = CleanURL(action->page_url()); 93 } else if (status & kContentScript) { 94 // This is for the tabs.executeScript case. 95 url = CleanURL(action->arg_url()); 96 } 97 if (url.empty()) 98 return; 99 100 SiteMap::iterator site_lookup = url_status_.find(url); 101 if (site_lookup != url_status_.end()) 102 site_lookup->second[action->extension_id()] |= status; 103 } 104 105 int UmaPolicy::MatchActionToStatus(scoped_refptr<Action> action) { 106 if (action->action_type() == Action::ACTION_CONTENT_SCRIPT) 107 return kContentScript; 108 if (action->action_type() == Action::ACTION_API_CALL && 109 action->api_name() == "tabs.executeScript") 110 return kContentScript; 111 if (action->action_type() != Action::ACTION_DOM_ACCESS) 112 return kNoStatus; 113 114 int dom_verb = DomActionType::MODIFIED; 115 if (!action->other() || 116 !action->other()->GetIntegerWithoutPathExpansion( 117 activity_log_constants::kActionDomVerb, &dom_verb)) 118 return kNoStatus; 119 120 int ret_bit = kNoStatus; 121 DomActionType::Type dom_type = static_cast<DomActionType::Type>(dom_verb); 122 if (dom_type == DomActionType::GETTER) 123 return kReadDom; 124 if (dom_type == DomActionType::SETTER) 125 ret_bit |= kModifiedDom; 126 else if (dom_type == DomActionType::METHOD) 127 ret_bit |= kDomMethod; 128 else 129 return kNoStatus; 130 131 if (action->api_name() == "HTMLDocument.write" || 132 action->api_name() == "HTMLDocument.writeln") { 133 ret_bit |= kDocumentWrite; 134 } else if (action->api_name() == "Element.innerHTML") { 135 ret_bit |= kInnerHtml; 136 } else if (action->api_name() == "Document.createElement") { 137 std::string arg; 138 action->args()->GetString(0, &arg); 139 if (arg == "script") 140 ret_bit |= kCreatedScript; 141 else if (arg == "iframe") 142 ret_bit |= kCreatedIframe; 143 else if (arg == "div") 144 ret_bit |= kCreatedDiv; 145 else if (arg == "a") 146 ret_bit |= kCreatedLink; 147 else if (arg == "input") 148 ret_bit |= kCreatedInput; 149 else if (arg == "embed") 150 ret_bit |= kCreatedEmbed; 151 else if (arg == "object") 152 ret_bit |= kCreatedObject; 153 } 154 155 const Action::InjectionType ad_injection = 156 action->DidInjectAd(g_browser_process->rappor_service()); 157 switch (ad_injection) { 158 case Action::INJECTION_NEW_AD: 159 ret_bit |= kAdInjected; 160 break; 161 case Action::INJECTION_REMOVED_AD: 162 ret_bit |= kAdRemoved; 163 break; 164 case Action::INJECTION_REPLACED_AD: 165 ret_bit |= kAdReplaced; 166 break; 167 case Action::INJECTION_LIKELY_NEW_AD: 168 ret_bit |= kAdLikelyInjected; 169 break; 170 case Action::INJECTION_LIKELY_REPLACED_AD: 171 ret_bit |= kAdLikelyReplaced; 172 break; 173 case Action::NO_AD_INJECTION: 174 break; 175 case Action::NUM_INJECTION_TYPES: 176 NOTREACHED(); 177 } 178 179 return ret_bit; 180 } 181 182 void UmaPolicy::HistogramOnClose(const std::string& cleaned_url, 183 content::WebContents* web_contents) { 184 // Let's try to avoid histogramming useless URLs. 185 if (cleaned_url.empty() || cleaned_url == url::kAboutBlankURL || 186 cleaned_url == chrome::kChromeUINewTabURL) 187 return; 188 189 int statuses[MAX_STATUS - 1]; 190 std::memset(statuses, 0, sizeof(statuses)); 191 192 ActiveScriptController* active_script_controller = 193 ActiveScriptController::GetForWebContents(web_contents); 194 SiteMap::iterator site_lookup = url_status_.find(cleaned_url); 195 const ExtensionMap& exts = site_lookup->second; 196 std::set<std::string> ad_injectors; 197 for (ExtensionMap::const_iterator ext_iter = exts.begin(); 198 ext_iter != exts.end(); 199 ++ext_iter) { 200 if (ext_iter->first == kNumberOfTabs) 201 continue; 202 for (int i = NONE + 1; i < MAX_STATUS; ++i) { 203 if (ext_iter->second & (1 << i)) 204 statuses[i-1]++; 205 } 206 207 if (ext_iter->second & kAnyAdActivity) 208 ad_injectors.insert(ext_iter->first); 209 } 210 if (active_script_controller) 211 active_script_controller->OnAdInjectionDetected(ad_injectors); 212 213 ExtensionRegistry* registry = ExtensionRegistry::Get(profile_); 214 for (std::set<std::string>::const_iterator iter = ad_injectors.begin(); 215 iter != ad_injectors.end(); 216 ++iter) { 217 const Extension* extension = 218 registry->GetExtensionById(*iter, ExtensionRegistry::EVERYTHING); 219 if (extension) { 220 UMA_HISTOGRAM_ENUMERATION("Extensions.AdInjection.InstallLocation", 221 extension->location(), 222 Manifest::NUM_LOCATIONS); 223 } 224 } 225 226 std::string prefix = "ExtensionActivity."; 227 if (GURL(cleaned_url).host() != "www.google.com") { 228 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CONTENT_SCRIPT), 229 statuses[CONTENT_SCRIPT - 1]); 230 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(READ_DOM), 231 statuses[READ_DOM - 1]); 232 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(MODIFIED_DOM), 233 statuses[MODIFIED_DOM - 1]); 234 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(DOM_METHOD), 235 statuses[DOM_METHOD - 1]); 236 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(DOCUMENT_WRITE), 237 statuses[DOCUMENT_WRITE - 1]); 238 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(INNER_HTML), 239 statuses[INNER_HTML - 1]); 240 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_SCRIPT), 241 statuses[CREATED_SCRIPT - 1]); 242 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_IFRAME), 243 statuses[CREATED_IFRAME - 1]); 244 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_DIV), 245 statuses[CREATED_DIV - 1]); 246 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_LINK), 247 statuses[CREATED_LINK - 1]); 248 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_INPUT), 249 statuses[CREATED_INPUT - 1]); 250 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_EMBED), 251 statuses[CREATED_EMBED - 1]); 252 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_OBJECT), 253 statuses[CREATED_OBJECT - 1]); 254 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(AD_INJECTED), 255 statuses[AD_INJECTED - 1]); 256 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(AD_REMOVED), 257 statuses[AD_REMOVED - 1]); 258 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(AD_REPLACED), 259 statuses[AD_REPLACED - 1]); 260 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(AD_LIKELY_INJECTED), 261 statuses[AD_LIKELY_INJECTED - 1]); 262 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(AD_LIKELY_REPLACED), 263 statuses[AD_LIKELY_REPLACED - 1]); 264 } else { 265 prefix += "Google."; 266 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CONTENT_SCRIPT), 267 statuses[CONTENT_SCRIPT - 1]); 268 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(READ_DOM), 269 statuses[READ_DOM - 1]); 270 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(MODIFIED_DOM), 271 statuses[MODIFIED_DOM - 1]); 272 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(DOM_METHOD), 273 statuses[DOM_METHOD - 1]); 274 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(DOCUMENT_WRITE), 275 statuses[DOCUMENT_WRITE - 1]); 276 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(INNER_HTML), 277 statuses[INNER_HTML - 1]); 278 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_SCRIPT), 279 statuses[CREATED_SCRIPT - 1]); 280 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_IFRAME), 281 statuses[CREATED_IFRAME - 1]); 282 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_DIV), 283 statuses[CREATED_DIV - 1]); 284 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_LINK), 285 statuses[CREATED_LINK - 1]); 286 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_INPUT), 287 statuses[CREATED_INPUT - 1]); 288 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_EMBED), 289 statuses[CREATED_EMBED - 1]); 290 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_OBJECT), 291 statuses[CREATED_OBJECT - 1]); 292 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(AD_INJECTED), 293 statuses[AD_INJECTED - 1]); 294 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(AD_REMOVED), 295 statuses[AD_REMOVED - 1]); 296 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(AD_REPLACED), 297 statuses[AD_REPLACED - 1]); 298 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(AD_LIKELY_INJECTED), 299 statuses[AD_LIKELY_INJECTED - 1]); 300 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(AD_LIKELY_REPLACED), 301 statuses[AD_LIKELY_REPLACED - 1]); 302 } 303 } 304 305 // Handle tab tracking. -------------------------------------------------------- 306 307 void UmaPolicy::OnBrowserAdded(Browser* browser) { 308 if (!profile_->IsSameProfile(browser->profile())) 309 return; 310 browser->tab_strip_model()->AddObserver(this); 311 } 312 313 void UmaPolicy::OnBrowserRemoved(Browser* browser) { 314 if (!profile_->IsSameProfile(browser->profile())) 315 return; 316 browser->tab_strip_model()->RemoveObserver(this); 317 } 318 319 // Use the value from SessionTabHelper::IdForTab, *not* |index|. |index| will be 320 // duplicated across tabs in a session, whereas IdForTab uniquely identifies 321 // each tab. 322 void UmaPolicy::TabChangedAt(content::WebContents* contents, 323 int index, 324 TabChangeType change_type) { 325 if (change_type != TabStripModelObserver::LOADING_ONLY) 326 return; 327 if (!contents) 328 return; 329 330 std::string url = CleanURL(contents->GetLastCommittedURL()); 331 int32 tab_id = SessionTabHelper::IdForTab(contents); 332 333 std::map<int32, std::string>::iterator tab_it = tab_list_.find(tab_id); 334 335 // Ignore tabs that haven't changed status. 336 if (tab_it != tab_list_.end() && tab_it->second == url) 337 return; 338 339 // Is this an existing tab whose URL has changed. 340 if (tab_it != tab_list_.end()) { 341 CleanupClosedPage(tab_it->second, contents); 342 tab_list_.erase(tab_id); 343 } 344 345 // Check that tab_list_ isn't over the kMaxTabsTracked budget. 346 if (tab_list_.size() >= kMaxTabsTracked) 347 return; 348 349 // Set up the new entries. 350 tab_list_[tab_id] = url; 351 SetupOpenedPage(url); 352 } 353 354 // Use the value from SessionTabHelper::IdForTab, *not* |index|. |index| will be 355 // duplicated across tabs in a session, whereas IdForTab uniquely identifies 356 // each tab. 357 void UmaPolicy::TabClosingAt(TabStripModel* tab_strip_model, 358 content::WebContents* contents, 359 int index) { 360 if (!contents) 361 return; 362 std::string url = CleanURL(contents->GetLastCommittedURL()); 363 int32 tab_id = SessionTabHelper::IdForTab(contents); 364 std::map<int, std::string>::iterator tab_it = tab_list_.find(tab_id); 365 if (tab_it != tab_list_.end()) 366 tab_list_.erase(tab_id); 367 368 CleanupClosedPage(url, contents); 369 } 370 371 void UmaPolicy::SetupOpenedPage(const std::string& url) { 372 url_status_[url][kNumberOfTabs]++; 373 } 374 375 void UmaPolicy::CleanupClosedPage(const std::string& cleaned_url, 376 content::WebContents* web_contents) { 377 SiteMap::iterator old_site_lookup = url_status_.find(cleaned_url); 378 if (old_site_lookup == url_status_.end()) 379 return; 380 old_site_lookup->second[kNumberOfTabs]--; 381 if (old_site_lookup->second[kNumberOfTabs] == 0) { 382 HistogramOnClose(cleaned_url, web_contents); 383 url_status_.erase(cleaned_url); 384 } 385 } 386 387 // Helpers. -------------------------------------------------------------------- 388 389 // We don't want to treat # ref navigations as if they were new pageloads. 390 // So we get rid of the ref if it has it. 391 // We convert to a string in the hopes that this is faster than Replacements. 392 std::string UmaPolicy::CleanURL(const GURL& gurl) { 393 if (gurl.spec().empty()) 394 return GURL(url::kAboutBlankURL).spec(); 395 if (!gurl.is_valid()) 396 return gurl.spec(); 397 if (!gurl.has_ref()) 398 return gurl.spec(); 399 std::string port = ""; 400 if (gurl.has_port()) 401 port = ":" + gurl.port(); 402 std::string query = ""; 403 if (gurl.has_query()) 404 query = "?" + gurl.query(); 405 return base::StringPrintf("%s://%s%s%s%s", 406 gurl.scheme().c_str(), 407 gurl.host().c_str(), 408 port.c_str(), 409 gurl.path().c_str(), 410 query.c_str()); 411 } 412 413 const char* UmaPolicy::GetHistogramName(PageStatus status) { 414 switch (status) { 415 case CONTENT_SCRIPT: 416 return "ContentScript"; 417 case READ_DOM: 418 return "ReadDom"; 419 case MODIFIED_DOM: 420 return "ModifiedDom"; 421 case DOM_METHOD: 422 return "InvokedDomMethod"; 423 case DOCUMENT_WRITE: 424 return "DocumentWrite"; 425 case INNER_HTML: 426 return "InnerHtml"; 427 case CREATED_SCRIPT: 428 return "CreatedScript"; 429 case CREATED_IFRAME: 430 return "CreatedIframe"; 431 case CREATED_DIV: 432 return "CreatedDiv"; 433 case CREATED_LINK: 434 return "CreatedLink"; 435 case CREATED_INPUT: 436 return "CreatedInput"; 437 case CREATED_EMBED: 438 return "CreatedEmbed"; 439 case CREATED_OBJECT: 440 return "CreatedObject"; 441 case AD_INJECTED: 442 return "AdInjected"; 443 case AD_REMOVED: 444 return "AdRemoved"; 445 case AD_REPLACED: 446 return "AdReplaced"; 447 case AD_LIKELY_INJECTED: 448 return "AdLikelyInjected"; 449 case AD_LIKELY_REPLACED: 450 return "AdLikelyReplaced"; 451 case NONE: 452 case MAX_STATUS: 453 default: 454 NOTREACHED(); 455 return ""; 456 } 457 } 458 459 } // namespace extensions 460