Home | History | Annotate | Download | only in activity_log
      1 // Copyright 2013 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "chrome/browser/extensions/activity_log/uma_policy.h"
      6 
      7 #include "base/metrics/histogram.h"
      8 #include "base/strings/stringprintf.h"
      9 #include "chrome/browser/browser_process.h"
     10 #include "chrome/browser/extensions/active_script_controller.h"
     11 #include "chrome/browser/extensions/activity_log/activity_action_constants.h"
     12 #include "chrome/browser/extensions/activity_log/ad_network_database.h"
     13 #include "chrome/browser/sessions/session_id.h"
     14 #include "chrome/browser/ui/browser.h"
     15 #include "chrome/browser/ui/browser_list.h"
     16 #include "chrome/browser/ui/tabs/tab_strip_model.h"
     17 #include "chrome/common/url_constants.h"
     18 #include "content/public/browser/web_contents.h"
     19 #include "content/public/common/url_constants.h"
     20 #include "extensions/browser/extension_registry.h"
     21 #include "extensions/common/dom_action_types.h"
     22 #include "extensions/common/extension.h"
     23 #include "extensions/common/manifest.h"
     24 
     25 namespace extensions {
     26 
     27 namespace {
     28 
     29 // For convenience.
     30 const int kNoStatus           = UmaPolicy::NONE;
     31 const int kContentScript      = 1 << UmaPolicy::CONTENT_SCRIPT;
     32 const int kReadDom            = 1 << UmaPolicy::READ_DOM;
     33 const int kModifiedDom        = 1 << UmaPolicy::MODIFIED_DOM;
     34 const int kDomMethod          = 1 << UmaPolicy::DOM_METHOD;
     35 const int kDocumentWrite      = 1 << UmaPolicy::DOCUMENT_WRITE;
     36 const int kInnerHtml          = 1 << UmaPolicy::INNER_HTML;
     37 const int kCreatedScript      = 1 << UmaPolicy::CREATED_SCRIPT;
     38 const int kCreatedIframe      = 1 << UmaPolicy::CREATED_IFRAME;
     39 const int kCreatedDiv         = 1 << UmaPolicy::CREATED_DIV;
     40 const int kCreatedLink        = 1 << UmaPolicy::CREATED_LINK;
     41 const int kCreatedInput       = 1 << UmaPolicy::CREATED_INPUT;
     42 const int kCreatedEmbed       = 1 << UmaPolicy::CREATED_EMBED;
     43 const int kCreatedObject      = 1 << UmaPolicy::CREATED_OBJECT;
     44 const int kAdInjected         = 1 << UmaPolicy::AD_INJECTED;
     45 const int kAdRemoved          = 1 << UmaPolicy::AD_REMOVED;
     46 const int kAdReplaced         = 1 << UmaPolicy::AD_REPLACED;
     47 const int kAdLikelyInjected   = 1 << UmaPolicy::AD_LIKELY_INJECTED;
     48 const int kAdLikelyReplaced   = 1 << UmaPolicy::AD_LIKELY_REPLACED;
     49 
     50 // A mask of all the ad injection flags.
     51 const int kAnyAdActivity = kAdInjected |
     52                            kAdRemoved |
     53                            kAdReplaced |
     54                            kAdLikelyInjected |
     55                            kAdLikelyReplaced;
     56 
     57 }  // namespace
     58 
     59 // Class constants, also used in testing. --------------------------------------
     60 
     61 const char UmaPolicy::kNumberOfTabs[]       = "num_tabs";
     62 const size_t UmaPolicy::kMaxTabsTracked     = 50;
     63 
     64 // Setup and shutdown. ---------------------------------------------------------
     65 
     66 UmaPolicy::UmaPolicy(Profile* profile)
     67     : ActivityLogPolicy(profile), profile_(profile) {
     68   DCHECK(!profile->IsOffTheRecord());
     69   BrowserList::AddObserver(this);
     70 }
     71 
     72 UmaPolicy::~UmaPolicy() {
     73   BrowserList::RemoveObserver(this);
     74 }
     75 
     76 // Unlike the other policies, UmaPolicy can commit suicide directly because it
     77 // doesn't have a dependency on a database.
     78 void UmaPolicy::Close() {
     79   delete this;
     80 }
     81 
     82 // Process actions. ------------------------------------------------------------
     83 
     84 void UmaPolicy::ProcessAction(scoped_refptr<Action> action) {
     85   if (!action->page_url().is_valid() && !action->arg_url().is_valid())
     86     return;
     87   if (action->page_incognito() || action->arg_incognito())
     88     return;
     89   std::string url;
     90   int status = MatchActionToStatus(action);
     91   if (action->page_url().is_valid()) {
     92     url = CleanURL(action->page_url());
     93   } else if (status & kContentScript) {
     94     // This is for the tabs.executeScript case.
     95     url = CleanURL(action->arg_url());
     96   }
     97   if (url.empty())
     98     return;
     99 
    100   SiteMap::iterator site_lookup = url_status_.find(url);
    101   if (site_lookup != url_status_.end())
    102     site_lookup->second[action->extension_id()] |= status;
    103 }
    104 
    105 int UmaPolicy::MatchActionToStatus(scoped_refptr<Action> action) {
    106   if (action->action_type() == Action::ACTION_CONTENT_SCRIPT) {
    107     return kContentScript;
    108   } else if (action->action_type() == Action::ACTION_API_CALL &&
    109              action->api_name() == "tabs.executeScript") {
    110     return kContentScript;
    111   } else if (action->action_type() != Action::ACTION_DOM_ACCESS) {
    112     return kNoStatus;
    113   }
    114 
    115   int dom_verb;
    116   if (!action->other() ||
    117       !action->other()->GetIntegerWithoutPathExpansion(
    118           activity_log_constants::kActionDomVerb, &dom_verb)) {
    119     return kNoStatus;
    120   }
    121 
    122   int ret_bit = kNoStatus;
    123   DomActionType::Type dom_type = static_cast<DomActionType::Type>(dom_verb);
    124   if (dom_type == DomActionType::GETTER)
    125     return kReadDom;
    126   if (dom_type == DomActionType::SETTER) {
    127     ret_bit |= kModifiedDom;
    128   } else if (dom_type == DomActionType::METHOD) {
    129     ret_bit |= kDomMethod;
    130   } else {
    131     return kNoStatus;
    132   }
    133 
    134   if (action->api_name() == "HTMLDocument.write" ||
    135       action->api_name() == "HTMLDocument.writeln") {
    136     ret_bit |= kDocumentWrite;
    137   } else if (action->api_name() == "Element.innerHTML") {
    138     ret_bit |= kInnerHtml;
    139   } else if (action->api_name() == "Document.createElement") {
    140     std::string arg;
    141     action->args()->GetString(0, &arg);
    142     if (arg == "script") {
    143       ret_bit |= kCreatedScript;
    144     } else if (arg == "iframe") {
    145       ret_bit |= kCreatedIframe;
    146     } else if (arg == "div") {
    147       ret_bit |= kCreatedDiv;
    148     } else if (arg == "a") {
    149       ret_bit |= kCreatedLink;
    150     } else if (arg == "input") {
    151       ret_bit |= kCreatedInput;
    152     } else if (arg == "embed") {
    153       ret_bit |= kCreatedEmbed;
    154     } else if (arg == "object") {
    155       ret_bit |= kCreatedObject;
    156     }
    157   }
    158 
    159   const Action::InjectionType ad_injection =
    160       action->DidInjectAd(g_browser_process->rappor_service());
    161   switch (ad_injection) {
    162     case Action::INJECTION_NEW_AD:
    163       ret_bit |= kAdInjected;
    164       break;
    165     case Action::INJECTION_REMOVED_AD:
    166       ret_bit |= kAdRemoved;
    167       break;
    168     case Action::INJECTION_REPLACED_AD:
    169       ret_bit |= kAdReplaced;
    170       break;
    171     case Action::INJECTION_LIKELY_NEW_AD:
    172       ret_bit |= kAdLikelyInjected;
    173       break;
    174     case Action::INJECTION_LIKELY_REPLACED_AD:
    175       ret_bit |= kAdLikelyReplaced;
    176       break;
    177     case Action::NO_AD_INJECTION:
    178       break;
    179     case Action::NUM_INJECTION_TYPES:
    180       NOTREACHED();
    181   }
    182 
    183   return ret_bit;
    184 }
    185 
    186 void UmaPolicy::HistogramOnClose(const std::string& cleaned_url,
    187                                  content::WebContents* web_contents) {
    188   // Let's try to avoid histogramming useless URLs.
    189   if (cleaned_url.empty() || cleaned_url == url::kAboutBlankURL ||
    190       cleaned_url == chrome::kChromeUINewTabURL)
    191     return;
    192 
    193   int statuses[MAX_STATUS - 1];
    194   std::memset(statuses, 0, sizeof(statuses));
    195 
    196   ActiveScriptController* active_script_controller =
    197       ActiveScriptController::GetForWebContents(web_contents);
    198   SiteMap::iterator site_lookup = url_status_.find(cleaned_url);
    199   const ExtensionMap& exts = site_lookup->second;
    200   std::set<std::string> ad_injectors;
    201   for (ExtensionMap::const_iterator ext_iter = exts.begin();
    202        ext_iter != exts.end();
    203        ++ext_iter) {
    204     if (ext_iter->first == kNumberOfTabs)
    205       continue;
    206     for (int i = NONE + 1; i < MAX_STATUS; ++i) {
    207       if (ext_iter->second & (1 << i))
    208         statuses[i-1]++;
    209     }
    210 
    211     if (ext_iter->second & kAnyAdActivity)
    212       ad_injectors.insert(ext_iter->first);
    213   }
    214   if (active_script_controller)
    215     active_script_controller->OnAdInjectionDetected(ad_injectors);
    216 
    217   ExtensionRegistry* registry = ExtensionRegistry::Get(profile_);
    218   for (std::set<std::string>::const_iterator iter = ad_injectors.begin();
    219        iter != ad_injectors.end();
    220        ++iter) {
    221     const Extension* extension =
    222         registry->GetExtensionById(*iter, ExtensionRegistry::EVERYTHING);
    223     if (extension) {
    224       UMA_HISTOGRAM_ENUMERATION("Extensions.AdInjection.InstallLocation",
    225                                 extension->location(),
    226                                 Manifest::NUM_LOCATIONS);
    227     }
    228   }
    229 
    230   std::string prefix = "ExtensionActivity.";
    231   if (GURL(cleaned_url).host() != "www.google.com") {
    232     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CONTENT_SCRIPT),
    233                              statuses[CONTENT_SCRIPT - 1]);
    234     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(READ_DOM),
    235                              statuses[READ_DOM - 1]);
    236     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(MODIFIED_DOM),
    237                              statuses[MODIFIED_DOM - 1]);
    238     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(DOM_METHOD),
    239                              statuses[DOM_METHOD - 1]);
    240     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(DOCUMENT_WRITE),
    241                              statuses[DOCUMENT_WRITE - 1]);
    242     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(INNER_HTML),
    243                              statuses[INNER_HTML - 1]);
    244     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_SCRIPT),
    245                              statuses[CREATED_SCRIPT - 1]);
    246     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_IFRAME),
    247                              statuses[CREATED_IFRAME - 1]);
    248     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_DIV),
    249                              statuses[CREATED_DIV - 1]);
    250     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_LINK),
    251                              statuses[CREATED_LINK - 1]);
    252     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_INPUT),
    253                              statuses[CREATED_INPUT - 1]);
    254     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_EMBED),
    255                              statuses[CREATED_EMBED - 1]);
    256     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_OBJECT),
    257                              statuses[CREATED_OBJECT - 1]);
    258     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(AD_INJECTED),
    259                              statuses[AD_INJECTED - 1]);
    260     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(AD_REMOVED),
    261                              statuses[AD_REMOVED - 1]);
    262     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(AD_REPLACED),
    263                              statuses[AD_REPLACED - 1]);
    264     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(AD_LIKELY_INJECTED),
    265                              statuses[AD_LIKELY_INJECTED - 1]);
    266     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(AD_LIKELY_REPLACED),
    267                              statuses[AD_LIKELY_REPLACED - 1]);
    268   } else {
    269     prefix += "Google.";
    270     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CONTENT_SCRIPT),
    271                              statuses[CONTENT_SCRIPT - 1]);
    272     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(READ_DOM),
    273                              statuses[READ_DOM - 1]);
    274     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(MODIFIED_DOM),
    275                              statuses[MODIFIED_DOM - 1]);
    276     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(DOM_METHOD),
    277                              statuses[DOM_METHOD - 1]);
    278     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(DOCUMENT_WRITE),
    279                              statuses[DOCUMENT_WRITE - 1]);
    280     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(INNER_HTML),
    281                              statuses[INNER_HTML - 1]);
    282     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_SCRIPT),
    283                              statuses[CREATED_SCRIPT - 1]);
    284     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_IFRAME),
    285                              statuses[CREATED_IFRAME - 1]);
    286     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_DIV),
    287                              statuses[CREATED_DIV - 1]);
    288     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_LINK),
    289                              statuses[CREATED_LINK - 1]);
    290     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_INPUT),
    291                              statuses[CREATED_INPUT - 1]);
    292     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_EMBED),
    293                              statuses[CREATED_EMBED - 1]);
    294     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_OBJECT),
    295                              statuses[CREATED_OBJECT - 1]);
    296     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(AD_INJECTED),
    297                              statuses[AD_INJECTED - 1]);
    298     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(AD_REMOVED),
    299                              statuses[AD_REMOVED - 1]);
    300     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(AD_REPLACED),
    301                              statuses[AD_REPLACED - 1]);
    302     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(AD_LIKELY_INJECTED),
    303                              statuses[AD_LIKELY_INJECTED - 1]);
    304     UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(AD_LIKELY_REPLACED),
    305                              statuses[AD_LIKELY_REPLACED - 1]);
    306   }
    307 }
    308 
    309 // Handle tab tracking. --------------------------------------------------------
    310 
    311 void UmaPolicy::OnBrowserAdded(Browser* browser) {
    312   if (!profile_->IsSameProfile(browser->profile()))
    313     return;
    314   browser->tab_strip_model()->AddObserver(this);
    315 }
    316 
    317 void UmaPolicy::OnBrowserRemoved(Browser* browser) {
    318   if (!profile_->IsSameProfile(browser->profile()))
    319     return;
    320   browser->tab_strip_model()->RemoveObserver(this);
    321 }
    322 
    323 // Use the value from SessionID::IdForTab, *not* |index|. |index| will be
    324 // duplicated across tabs in a session, whereas IdForTab uniquely identifies
    325 // each tab.
    326 void UmaPolicy::TabChangedAt(content::WebContents* contents,
    327                              int index,
    328                              TabChangeType change_type) {
    329   if (change_type != TabStripModelObserver::LOADING_ONLY)
    330     return;
    331   if (!contents)
    332     return;
    333 
    334   std::string url = CleanURL(contents->GetLastCommittedURL());
    335   int32 tab_id = SessionID::IdForTab(contents);
    336 
    337   std::map<int32, std::string>::iterator tab_it = tab_list_.find(tab_id);
    338 
    339   // Ignore tabs that haven't changed status.
    340   if (tab_it != tab_list_.end() && tab_it->second == url)
    341     return;
    342 
    343   // Is this an existing tab whose URL has changed.
    344   if (tab_it != tab_list_.end()) {
    345     CleanupClosedPage(tab_it->second, contents);
    346     tab_list_.erase(tab_id);
    347   }
    348 
    349   // Check that tab_list_ isn't over the kMaxTabsTracked budget.
    350   if (tab_list_.size() >= kMaxTabsTracked)
    351     return;
    352 
    353   // Set up the new entries.
    354   tab_list_[tab_id] = url;
    355   SetupOpenedPage(url);
    356 }
    357 
    358 // Use the value from SessionID::IdForTab, *not* |index|. |index| will be
    359 // duplicated across tabs in a session, whereas IdForTab uniquely identifies
    360 // each tab.
    361 void UmaPolicy::TabClosingAt(TabStripModel* tab_strip_model,
    362                              content::WebContents* contents,
    363                              int index) {
    364   if (!contents)
    365     return;
    366   std::string url = CleanURL(contents->GetLastCommittedURL());
    367   int32 tab_id = SessionID::IdForTab(contents);
    368   std::map<int, std::string>::iterator tab_it = tab_list_.find(tab_id);
    369   if (tab_it != tab_list_.end())
    370     tab_list_.erase(tab_id);
    371 
    372   CleanupClosedPage(url, contents);
    373 }
    374 
    375 void UmaPolicy::SetupOpenedPage(const std::string& url) {
    376   url_status_[url][kNumberOfTabs]++;
    377 }
    378 
    379 void UmaPolicy::CleanupClosedPage(const std::string& cleaned_url,
    380                                   content::WebContents* web_contents) {
    381   SiteMap::iterator old_site_lookup = url_status_.find(cleaned_url);
    382   if (old_site_lookup == url_status_.end())
    383     return;
    384   old_site_lookup->second[kNumberOfTabs]--;
    385   if (old_site_lookup->second[kNumberOfTabs] == 0) {
    386     HistogramOnClose(cleaned_url, web_contents);
    387     url_status_.erase(cleaned_url);
    388   }
    389 }
    390 
    391 // Helpers. --------------------------------------------------------------------
    392 
    393 // We don't want to treat # ref navigations as if they were new pageloads.
    394 // So we get rid of the ref if it has it.
    395 // We convert to a string in the hopes that this is faster than Replacements.
    396 std::string UmaPolicy::CleanURL(const GURL& gurl) {
    397   if (gurl.spec().empty())
    398     return GURL(url::kAboutBlankURL).spec();
    399   if (!gurl.is_valid())
    400     return gurl.spec();
    401   if (!gurl.has_ref())
    402     return gurl.spec();
    403   std::string port = "";
    404   if (gurl.has_port())
    405     port = ":" + gurl.port();
    406   std::string query = "";
    407   if (gurl.has_query())
    408     query = "?" + gurl.query();
    409   return base::StringPrintf("%s://%s%s%s%s",
    410                             gurl.scheme().c_str(),
    411                             gurl.host().c_str(),
    412                             port.c_str(),
    413                             gurl.path().c_str(),
    414                             query.c_str());
    415 }
    416 
    417 const char* UmaPolicy::GetHistogramName(PageStatus status) {
    418   switch (status) {
    419     case CONTENT_SCRIPT:
    420       return "ContentScript";
    421     case READ_DOM:
    422       return "ReadDom";
    423     case MODIFIED_DOM:
    424       return "ModifiedDom";
    425     case DOM_METHOD:
    426       return "InvokedDomMethod";
    427     case DOCUMENT_WRITE:
    428       return "DocumentWrite";
    429     case INNER_HTML:
    430       return "InnerHtml";
    431     case CREATED_SCRIPT:
    432       return "CreatedScript";
    433     case CREATED_IFRAME:
    434       return "CreatedIframe";
    435     case CREATED_DIV:
    436       return "CreatedDiv";
    437     case CREATED_LINK:
    438       return "CreatedLink";
    439     case CREATED_INPUT:
    440       return "CreatedInput";
    441     case CREATED_EMBED:
    442       return "CreatedEmbed";
    443     case CREATED_OBJECT:
    444       return "CreatedObject";
    445     case AD_INJECTED:
    446       return "AdInjected";
    447     case AD_REMOVED:
    448       return "AdRemoved";
    449     case AD_REPLACED:
    450       return "AdReplaced";
    451     case AD_LIKELY_INJECTED:
    452       return "AdLikelyInjected";
    453     case AD_LIKELY_REPLACED:
    454       return "AdLikelyReplaced";
    455     case NONE:
    456     case MAX_STATUS:
    457     default:
    458       NOTREACHED();
    459       return "";
    460   }
    461 }
    462 
    463 }  // namespace extensions
    464