Home | History | Annotate | Download | only in activity_log
      1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "chrome/browser/extensions/activity_log/activity_actions.h"
      6 
      7 #include <algorithm>  // for std::find.
      8 #include <string>
      9 
     10 #include "base/command_line.h"
     11 #include "base/format_macros.h"
     12 #include "base/json/json_string_value_serializer.h"
     13 #include "base/logging.h"
     14 #include "base/macros.h"
     15 #include "base/memory/singleton.h"
     16 #include "base/metrics/histogram.h"
     17 #include "base/strings/string_number_conversions.h"
     18 #include "base/strings/string_util.h"
     19 #include "base/strings/stringprintf.h"
     20 #include "base/values.h"
     21 #include "chrome/browser/extensions/activity_log/activity_action_constants.h"
     22 #include "chrome/browser/extensions/activity_log/ad_network_database.h"
     23 #include "chrome/browser/extensions/activity_log/fullstream_ui_policy.h"
     24 #include "chrome/browser/ui/browser.h"
     25 #include "chrome/common/chrome_switches.h"
     26 #include "components/rappor/rappor_service.h"
     27 #include "content/public/browser/web_contents.h"
     28 #include "extensions/common/constants.h"
     29 #include "extensions/common/dom_action_types.h"
     30 #include "sql/statement.h"
     31 #include "url/gurl.h"
     32 
     33 namespace constants = activity_log_constants;
     34 
     35 namespace extensions {
     36 
     37 namespace {
     38 
     39 // The "Extensions.PossibleAdInjection2" metric uses different Rappor
     40 // parameters than the original metric.
     41 const char* kExtensionAdInjectionRapporMetricName =
     42     "Extensions.PossibleAdInjection2";
     43 
     44 const char kBlinkSetAttributeEvent[] = "blinkSetAttribute";
     45 const char kBlinkAddElementEvent[] = "blinkAddElement";
     46 
     47 const char kIframe[] = "iframe";
     48 const char kAnchor[] = "a";
     49 const char kScript[] = "script";
     50 
     51 const char kSrc[] = "src";
     52 const char kHref[] = "href";
     53 
     54 std::string Serialize(const base::Value* value) {
     55   std::string value_as_text;
     56   if (!value) {
     57     value_as_text = "null";
     58   } else {
     59     JSONStringValueSerializer serializer(&value_as_text);
     60     serializer.SerializeAndOmitBinaryValues(*value);
     61   }
     62   return value_as_text;
     63 }
     64 
     65 }  // namespace
     66 
     67 using api::activity_log_private::ExtensionActivity;
     68 
     69 Action::Action(const std::string& extension_id,
     70                const base::Time& time,
     71                const ActionType action_type,
     72                const std::string& api_name,
     73                int64 action_id)
     74     : extension_id_(extension_id),
     75       time_(time),
     76       action_type_(action_type),
     77       api_name_(api_name),
     78       page_incognito_(false),
     79       arg_incognito_(false),
     80       count_(0),
     81       action_id_(action_id) {}
     82 
     83 Action::~Action() {}
     84 
     85 // TODO(mvrable): As an optimization, we might return this directly if the
     86 // refcount is one.  However, there are likely to be other stray references in
     87 // many cases that will prevent this optimization.
     88 scoped_refptr<Action> Action::Clone() const {
     89   scoped_refptr<Action> clone(
     90       new Action(
     91           extension_id(), time(), action_type(), api_name(), action_id()));
     92   if (args())
     93     clone->set_args(make_scoped_ptr(args()->DeepCopy()));
     94   clone->set_page_url(page_url());
     95   clone->set_page_title(page_title());
     96   clone->set_page_incognito(page_incognito());
     97   clone->set_arg_url(arg_url());
     98   clone->set_arg_incognito(arg_incognito());
     99   if (other())
    100     clone->set_other(make_scoped_ptr(other()->DeepCopy()));
    101   return clone;
    102 }
    103 
    104 Action::InjectionType Action::DidInjectAd(
    105     rappor::RapporService* rappor_service) const {
    106   MaybeUploadUrl(rappor_service);
    107 
    108   // We should always have an AdNetworkDatabase, but, on the offchance we don't,
    109   // don't crash in a release build.
    110   if (!AdNetworkDatabase::Get()) {
    111     NOTREACHED();
    112     return NO_AD_INJECTION;
    113   }
    114 
    115   AdType ad_type = AD_TYPE_NONE;
    116   InjectionType injection_type = NO_AD_INJECTION;
    117 
    118   if (api_name_ == kBlinkSetAttributeEvent) {
    119     std::string element_name;
    120     std::string attr_name;
    121     if (args_.get()) {
    122       args_->GetString(0u, &element_name);
    123       args_->GetString(1u, &attr_name);
    124     }
    125     if (attr_name == kSrc) {
    126       if (element_name == kIframe)
    127         ad_type = AD_TYPE_IFRAME;
    128       else if (element_name == kScript)
    129         ad_type = AD_TYPE_SCRIPT;
    130     } else if (element_name == kAnchor && attr_name == kHref) {
    131       ad_type = AD_TYPE_ANCHOR;
    132     }
    133 
    134     if (ad_type != AD_TYPE_NONE)
    135       injection_type = CheckAttrModification();
    136   } else if (api_name_ == kBlinkAddElementEvent) {
    137     std::string element_name;
    138     if (args_.get())
    139       args_->GetString(0u, &element_name);
    140     if (element_name == kIframe)
    141       ad_type = AD_TYPE_IFRAME;
    142     else if (element_name == kAnchor)
    143       ad_type = AD_TYPE_ANCHOR;
    144     else if (element_name == kScript)
    145       ad_type = AD_TYPE_SCRIPT;
    146 
    147     if (ad_type != AD_TYPE_NONE)
    148       injection_type = CheckElementAddition();
    149   }
    150 
    151   if (injection_type != NO_AD_INJECTION) {
    152     UMA_HISTOGRAM_ENUMERATION(
    153         "Extensions.AdInjection.AdType", ad_type, Action::NUM_AD_TYPES);
    154   }
    155 
    156   return injection_type;
    157 }
    158 
    159 void Action::set_args(scoped_ptr<base::ListValue> args) {
    160   args_.reset(args.release());
    161 }
    162 
    163 base::ListValue* Action::mutable_args() {
    164   if (!args_.get()) {
    165     args_.reset(new base::ListValue());
    166   }
    167   return args_.get();
    168 }
    169 
    170 void Action::set_page_url(const GURL& page_url) {
    171   page_url_ = page_url;
    172 }
    173 
    174 void Action::set_arg_url(const GURL& arg_url) {
    175   arg_url_ = arg_url;
    176 }
    177 
    178 void Action::set_other(scoped_ptr<base::DictionaryValue> other) {
    179   other_.reset(other.release());
    180 }
    181 
    182 base::DictionaryValue* Action::mutable_other() {
    183   if (!other_.get()) {
    184     other_.reset(new base::DictionaryValue());
    185   }
    186   return other_.get();
    187 }
    188 
    189 std::string Action::SerializePageUrl() const {
    190   return (page_incognito() ? constants::kIncognitoUrl : "") + page_url().spec();
    191 }
    192 
    193 void Action::ParsePageUrl(const std::string& url) {
    194   set_page_incognito(StartsWithASCII(url, constants::kIncognitoUrl, true));
    195   if (page_incognito())
    196     set_page_url(GURL(url.substr(strlen(constants::kIncognitoUrl))));
    197   else
    198     set_page_url(GURL(url));
    199 }
    200 
    201 std::string Action::SerializeArgUrl() const {
    202   return (arg_incognito() ? constants::kIncognitoUrl : "") + arg_url().spec();
    203 }
    204 
    205 void Action::ParseArgUrl(const std::string& url) {
    206   set_arg_incognito(StartsWithASCII(url, constants::kIncognitoUrl, true));
    207   if (arg_incognito())
    208     set_arg_url(GURL(url.substr(strlen(constants::kIncognitoUrl))));
    209   else
    210     set_arg_url(GURL(url));
    211 }
    212 
    213 scoped_ptr<ExtensionActivity> Action::ConvertToExtensionActivity() {
    214   scoped_ptr<ExtensionActivity> result(new ExtensionActivity);
    215 
    216   // We do this translation instead of using the same enum because the database
    217   // values need to be stable; this allows us to change the extension API
    218   // without affecting the database.
    219   switch (action_type()) {
    220     case ACTION_API_CALL:
    221       result->activity_type = ExtensionActivity::ACTIVITY_TYPE_API_CALL;
    222       break;
    223     case ACTION_API_EVENT:
    224       result->activity_type = ExtensionActivity::ACTIVITY_TYPE_API_EVENT;
    225       break;
    226     case ACTION_CONTENT_SCRIPT:
    227       result->activity_type = ExtensionActivity::ACTIVITY_TYPE_CONTENT_SCRIPT;
    228       break;
    229     case ACTION_DOM_ACCESS:
    230       result->activity_type = ExtensionActivity::ACTIVITY_TYPE_DOM_ACCESS;
    231       break;
    232     case ACTION_DOM_EVENT:
    233       result->activity_type = ExtensionActivity::ACTIVITY_TYPE_DOM_EVENT;
    234       break;
    235     case ACTION_WEB_REQUEST:
    236       result->activity_type = ExtensionActivity::ACTIVITY_TYPE_WEB_REQUEST;
    237       break;
    238     case UNUSED_ACTION_API_BLOCKED:
    239     case ACTION_ANY:
    240     default:
    241       // This shouldn't be reached, but some people might have old or otherwise
    242       // weird db entries. Treat it like an API call if that happens.
    243       result->activity_type = ExtensionActivity::ACTIVITY_TYPE_API_CALL;
    244       break;
    245   }
    246 
    247   result->extension_id.reset(new std::string(extension_id()));
    248   result->time.reset(new double(time().ToJsTime()));
    249   result->count.reset(new double(count()));
    250   result->api_call.reset(new std::string(api_name()));
    251   result->args.reset(new std::string(Serialize(args())));
    252   if (action_id() != -1)
    253     result->activity_id.reset(
    254         new std::string(base::StringPrintf("%" PRId64, action_id())));
    255   if (page_url().is_valid()) {
    256     if (!page_title().empty())
    257       result->page_title.reset(new std::string(page_title()));
    258     result->page_url.reset(new std::string(SerializePageUrl()));
    259   }
    260   if (arg_url().is_valid())
    261     result->arg_url.reset(new std::string(SerializeArgUrl()));
    262 
    263   if (other()) {
    264     scoped_ptr<ExtensionActivity::Other> other_field(
    265         new ExtensionActivity::Other);
    266     bool prerender;
    267     if (other()->GetBooleanWithoutPathExpansion(constants::kActionPrerender,
    268                                                 &prerender)) {
    269       other_field->prerender.reset(new bool(prerender));
    270     }
    271     const base::DictionaryValue* web_request;
    272     if (other()->GetDictionaryWithoutPathExpansion(constants::kActionWebRequest,
    273                                                    &web_request)) {
    274       other_field->web_request.reset(new std::string(
    275           ActivityLogPolicy::Util::Serialize(web_request)));
    276     }
    277     std::string extra;
    278     if (other()->GetStringWithoutPathExpansion(constants::kActionExtra, &extra))
    279       other_field->extra.reset(new std::string(extra));
    280     int dom_verb;
    281     if (other()->GetIntegerWithoutPathExpansion(constants::kActionDomVerb,
    282                                                 &dom_verb)) {
    283       switch (static_cast<DomActionType::Type>(dom_verb)) {
    284         case DomActionType::GETTER:
    285           other_field->dom_verb = ExtensionActivity::Other::DOM_VERB_GETTER;
    286           break;
    287         case DomActionType::SETTER:
    288           other_field->dom_verb = ExtensionActivity::Other::DOM_VERB_SETTER;
    289           break;
    290         case DomActionType::METHOD:
    291           other_field->dom_verb = ExtensionActivity::Other::DOM_VERB_METHOD;
    292           break;
    293         case DomActionType::INSERTED:
    294           other_field->dom_verb = ExtensionActivity::Other::DOM_VERB_INSERTED;
    295           break;
    296         case DomActionType::XHR:
    297           other_field->dom_verb = ExtensionActivity::Other::DOM_VERB_XHR;
    298           break;
    299         case DomActionType::WEBREQUEST:
    300           other_field->dom_verb = ExtensionActivity::Other::DOM_VERB_WEBREQUEST;
    301           break;
    302         case DomActionType::MODIFIED:
    303           other_field->dom_verb = ExtensionActivity::Other::DOM_VERB_MODIFIED;
    304           break;
    305         default:
    306           other_field->dom_verb = ExtensionActivity::Other::DOM_VERB_NONE;
    307       }
    308     } else {
    309       other_field->dom_verb = ExtensionActivity::Other::DOM_VERB_NONE;
    310     }
    311     result->other.reset(other_field.release());
    312   }
    313 
    314   return result.Pass();
    315 }
    316 
    317 std::string Action::PrintForDebug() const {
    318   std::string result = base::StringPrintf("ACTION ID=%" PRId64, action_id());
    319   result += " EXTENSION ID=" + extension_id() + " CATEGORY=";
    320   switch (action_type_) {
    321     case ACTION_API_CALL:
    322       result += "api_call";
    323       break;
    324     case ACTION_API_EVENT:
    325       result += "api_event_callback";
    326       break;
    327     case ACTION_WEB_REQUEST:
    328       result += "webrequest";
    329       break;
    330     case ACTION_CONTENT_SCRIPT:
    331       result += "content_script";
    332       break;
    333     case UNUSED_ACTION_API_BLOCKED:
    334       // This is deprecated.
    335       result += "api_blocked";
    336       break;
    337     case ACTION_DOM_EVENT:
    338       result += "dom_event";
    339       break;
    340     case ACTION_DOM_ACCESS:
    341       result += "dom_access";
    342       break;
    343     default:
    344       result += base::StringPrintf("type%d", static_cast<int>(action_type_));
    345   }
    346 
    347   result += " API=" + api_name_;
    348   if (args_.get()) {
    349     result += " ARGS=" + Serialize(args_.get());
    350   }
    351   if (page_url_.is_valid()) {
    352     if (page_incognito_)
    353       result += " PAGE_URL=(incognito)" + page_url_.spec();
    354     else
    355       result += " PAGE_URL=" + page_url_.spec();
    356   }
    357   if (!page_title_.empty()) {
    358     base::StringValue title(page_title_);
    359     result += " PAGE_TITLE=" + Serialize(&title);
    360   }
    361   if (arg_url_.is_valid()) {
    362     if (arg_incognito_)
    363       result += " ARG_URL=(incognito)" + arg_url_.spec();
    364     else
    365       result += " ARG_URL=" + arg_url_.spec();
    366   }
    367   if (other_.get()) {
    368     result += " OTHER=" + Serialize(other_.get());
    369   }
    370 
    371   result += base::StringPrintf(" COUNT=%d", count_);
    372   return result;
    373 }
    374 
    375 bool Action::UrlCouldBeAd(const GURL& url) const {
    376   // Ads can only be valid urls that don't match the page's host (linking to the
    377   // current page should be considered valid use), and aren't local to the
    378   // extension.
    379   return url.is_valid() &&
    380          !url.is_empty() &&
    381          url.host() != page_url_.host() &&
    382          !url.SchemeIs(kExtensionScheme);
    383 }
    384 
    385 void Action::MaybeUploadUrl(rappor::RapporService* rappor_service) const {
    386   // Don't bother recording if the url is innocuous (or no |rappor_service|).
    387   if (!rappor_service)
    388     return;
    389 
    390   GURL url;
    391 
    392   if (api_name_ == kBlinkSetAttributeEvent) {
    393     std::string element_name;
    394     std::string attr_name;
    395     std::string url_string;
    396     if (args_.get()) {
    397       args_->GetString(0u, &element_name);
    398       args_->GetString(1u, &attr_name);
    399     }
    400     if (element_name == kIframe && attr_name == kSrc) {
    401       args_->GetString(3u, &url_string);
    402       url = GURL(url_string);
    403     } else if (element_name == kAnchor && attr_name == kHref) {
    404       args_->GetString(3u, &url_string);
    405       url = GURL(url_string);
    406     }
    407   } else if (api_name_ == kBlinkAddElementEvent) {
    408     std::string element_name;
    409     std::string url_string;
    410     if (args_.get())
    411       args_->GetString(0u, &element_name);
    412     if (element_name == kIframe) {
    413       args_->GetString(1u, &url_string);
    414       url = GURL(url_string);
    415     } else if (element_name == kAnchor) {
    416       args_->GetString(1u, &url_string);
    417       url = GURL(url_string);
    418     }
    419   }
    420 
    421   if (!UrlCouldBeAd(url))
    422     return;
    423 
    424   // Record the URL - an ad *may* have been injected.
    425   rappor_service->RecordSample(kExtensionAdInjectionRapporMetricName,
    426                                rappor::ETLD_PLUS_ONE_RAPPOR_TYPE,
    427                                url.host());
    428 }
    429 
    430 Action::InjectionType Action::CheckAttrModification() const {
    431   if (api_name_ != kBlinkSetAttributeEvent)
    432     return NO_AD_INJECTION;
    433 
    434   const AdNetworkDatabase* database = AdNetworkDatabase::Get();
    435 
    436   GURL prev_url;
    437   std::string prev_url_string;
    438   if (args_.get() && args_->GetString(2u, &prev_url_string))
    439     prev_url = GURL(prev_url_string);
    440 
    441   GURL new_url;
    442   std::string new_url_string;
    443   if (args_.get() && args_->GetString(3u, &new_url_string))
    444     new_url = GURL(new_url_string);
    445 
    446   bool new_url_could_be_ad = UrlCouldBeAd(new_url);
    447   bool prev_url_valid = prev_url.is_valid() && !prev_url.is_empty();
    448 
    449   bool injected_ad = new_url_could_be_ad && database->IsAdNetwork(new_url);
    450   bool replaced_ad = prev_url_valid && database->IsAdNetwork(prev_url);
    451 
    452   if (injected_ad && replaced_ad)
    453     return INJECTION_REPLACED_AD;
    454   if (injected_ad)
    455     return INJECTION_NEW_AD;
    456   if (replaced_ad)
    457     return INJECTION_REMOVED_AD;
    458 
    459   // If the extension modified the URL with an external, valid URL then there's
    460   // a good chance it's ad injection. Log it as a likely one, which also helps
    461   // us determine the effectiveness of our IsAdNetwork() recognition.
    462   if (new_url_could_be_ad) {
    463     if (prev_url_valid)
    464       return INJECTION_LIKELY_REPLACED_AD;
    465     return INJECTION_LIKELY_NEW_AD;
    466   }
    467 
    468   return NO_AD_INJECTION;
    469 }
    470 
    471 Action::InjectionType Action::CheckElementAddition() const {
    472   DCHECK_EQ(kBlinkAddElementEvent, api_name_);
    473 
    474   GURL url;
    475   std::string url_string;
    476   if (args_.get() && args_->GetString(1u, &url_string))
    477     url = GURL(url_string);
    478 
    479   if (UrlCouldBeAd(url)) {
    480     if (AdNetworkDatabase::Get()->IsAdNetwork(url))
    481       return INJECTION_NEW_AD;
    482     // If the extension injected an URL which is not local to itself or the
    483     // page, there is a good chance it could be a new ad, and our database
    484     // missed it.
    485     return INJECTION_LIKELY_NEW_AD;
    486   }
    487   return NO_AD_INJECTION;
    488 }
    489 
    490 bool ActionComparator::operator()(
    491     const scoped_refptr<Action>& lhs,
    492     const scoped_refptr<Action>& rhs) const {
    493   if (lhs->time() != rhs->time())
    494     return lhs->time() < rhs->time();
    495   else if (lhs->action_id() != rhs->action_id())
    496     return lhs->action_id() < rhs->action_id();
    497   else
    498     return ActionComparatorExcludingTimeAndActionId()(lhs, rhs);
    499 }
    500 
    501 bool ActionComparatorExcludingTimeAndActionId::operator()(
    502     const scoped_refptr<Action>& lhs,
    503     const scoped_refptr<Action>& rhs) const {
    504   if (lhs->extension_id() != rhs->extension_id())
    505     return lhs->extension_id() < rhs->extension_id();
    506   if (lhs->action_type() != rhs->action_type())
    507     return lhs->action_type() < rhs->action_type();
    508   if (lhs->api_name() != rhs->api_name())
    509     return lhs->api_name() < rhs->api_name();
    510 
    511   // args might be null; treat a null value as less than all non-null values,
    512   // including the empty string.
    513   if (!lhs->args() && rhs->args())
    514     return true;
    515   if (lhs->args() && !rhs->args())
    516     return false;
    517   if (lhs->args() && rhs->args()) {
    518     std::string lhs_args = ActivityLogPolicy::Util::Serialize(lhs->args());
    519     std::string rhs_args = ActivityLogPolicy::Util::Serialize(rhs->args());
    520     if (lhs_args != rhs_args)
    521       return lhs_args < rhs_args;
    522   }
    523 
    524   // Compare URLs as strings, and treat the incognito flag as a separate field.
    525   if (lhs->page_url().spec() != rhs->page_url().spec())
    526     return lhs->page_url().spec() < rhs->page_url().spec();
    527   if (lhs->page_incognito() != rhs->page_incognito())
    528     return lhs->page_incognito() < rhs->page_incognito();
    529 
    530   if (lhs->page_title() != rhs->page_title())
    531     return lhs->page_title() < rhs->page_title();
    532 
    533   if (lhs->arg_url().spec() != rhs->arg_url().spec())
    534     return lhs->arg_url().spec() < rhs->arg_url().spec();
    535   if (lhs->arg_incognito() != rhs->arg_incognito())
    536     return lhs->arg_incognito() < rhs->arg_incognito();
    537 
    538   // other is treated much like the args field.
    539   if (!lhs->other() && rhs->other())
    540     return true;
    541   if (lhs->other() && !rhs->other())
    542     return false;
    543   if (lhs->other() && rhs->other()) {
    544     std::string lhs_other = ActivityLogPolicy::Util::Serialize(lhs->other());
    545     std::string rhs_other = ActivityLogPolicy::Util::Serialize(rhs->other());
    546     if (lhs_other != rhs_other)
    547       return lhs_other < rhs_other;
    548   }
    549 
    550   // All fields compare as equal if this point is reached.
    551   return false;
    552 }
    553 
    554 }  // namespace extensions
    555