Home | History | Annotate | Download | only in activity_log
      1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "chrome/browser/extensions/activity_log/activity_actions.h"
      6 
      7 #include <algorithm>  // for std::find.
      8 #include <string>
      9 
     10 #include "base/command_line.h"
     11 #include "base/format_macros.h"
     12 #include "base/json/json_string_value_serializer.h"
     13 #include "base/logging.h"
     14 #include "base/macros.h"
     15 #include "base/memory/singleton.h"
     16 #include "base/metrics/histogram.h"
     17 #include "base/strings/string_number_conversions.h"
     18 #include "base/strings/string_util.h"
     19 #include "base/strings/stringprintf.h"
     20 #include "base/values.h"
     21 #include "chrome/browser/extensions/activity_log/activity_action_constants.h"
     22 #include "chrome/browser/extensions/activity_log/ad_network_database.h"
     23 #include "chrome/browser/extensions/activity_log/fullstream_ui_policy.h"
     24 #include "chrome/browser/ui/browser.h"
     25 #include "chrome/common/chrome_switches.h"
     26 #include "components/rappor/rappor_service.h"
     27 #include "content/public/browser/web_contents.h"
     28 #include "extensions/common/ad_injection_constants.h"
     29 #include "extensions/common/constants.h"
     30 #include "extensions/common/dom_action_types.h"
     31 #include "sql/statement.h"
     32 #include "url/gurl.h"
     33 
     34 namespace constants = activity_log_constants;
     35 
     36 namespace extensions {
     37 
     38 namespace {
     39 
     40 namespace keys = ad_injection_constants::keys;
     41 
     42 // The list of APIs for which we upload the URL to RAPPOR.
     43 const char* kApisForRapporMetric[] = {
     44   ad_injection_constants::kHtmlIframeSrcApiName,
     45   ad_injection_constants::kHtmlEmbedSrcApiName,
     46   ad_injection_constants::kHtmlAnchorHrefApiName
     47 };
     48 
     49 // The "Extensions.PossibleAdInjection2" metric uses different Rappor
     50 // parameters than the original metric.
     51 const char* kExtensionAdInjectionRapporMetricName =
     52     "Extensions.PossibleAdInjection2";
     53 
     54 // The names of different types of HTML elements we check for ad injection.
     55 const char* kIframeElementType = "HTMLIFrameElement";
     56 const char* kEmbedElementType = "HTMLEmbedElement";
     57 const char* kAnchorElementType = "HTMLAnchorElement";
     58 
     59 std::string Serialize(const base::Value* value) {
     60   std::string value_as_text;
     61   if (!value) {
     62     value_as_text = "null";
     63   } else {
     64     JSONStringValueSerializer serializer(&value_as_text);
     65     serializer.SerializeAndOmitBinaryValues(*value);
     66   }
     67   return value_as_text;
     68 }
     69 
     70 }  // namespace
     71 
     72 using api::activity_log_private::ExtensionActivity;
     73 
     74 Action::Action(const std::string& extension_id,
     75                const base::Time& time,
     76                const ActionType action_type,
     77                const std::string& api_name,
     78                int64 action_id)
     79     : extension_id_(extension_id),
     80       time_(time),
     81       action_type_(action_type),
     82       api_name_(api_name),
     83       page_incognito_(false),
     84       arg_incognito_(false),
     85       count_(0),
     86       action_id_(action_id) {}
     87 
     88 Action::~Action() {}
     89 
     90 // TODO(mvrable): As an optimization, we might return this directly if the
     91 // refcount is one.  However, there are likely to be other stray references in
     92 // many cases that will prevent this optimization.
     93 scoped_refptr<Action> Action::Clone() const {
     94   scoped_refptr<Action> clone(
     95       new Action(
     96           extension_id(), time(), action_type(), api_name(), action_id()));
     97   if (args())
     98     clone->set_args(make_scoped_ptr(args()->DeepCopy()));
     99   clone->set_page_url(page_url());
    100   clone->set_page_title(page_title());
    101   clone->set_page_incognito(page_incognito());
    102   clone->set_arg_url(arg_url());
    103   clone->set_arg_incognito(arg_incognito());
    104   if (other())
    105     clone->set_other(make_scoped_ptr(other()->DeepCopy()));
    106   return clone;
    107 }
    108 
    109 Action::InjectionType Action::DidInjectAd(
    110     rappor::RapporService* rappor_service) const {
    111   MaybeUploadUrl(rappor_service);
    112 
    113   // We should always have an AdNetworkDatabase, but, on the offchance we don't,
    114   // don't crash in a release build.
    115   if (!AdNetworkDatabase::Get()) {
    116     NOTREACHED();
    117     return NO_AD_INJECTION;
    118   }
    119 
    120   AdType ad_type = AD_TYPE_NONE;
    121   InjectionType injection_type = NO_AD_INJECTION;
    122 
    123   if (EndsWith(api_name_,
    124                ad_injection_constants::kAppendChildApiSuffix,
    125                true /* case senstive */)) {
    126     injection_type = CheckAppendChild(&ad_type);
    127   } else {
    128     // Check if the action modified an element's src/href.
    129     if (api_name_ == ad_injection_constants::kHtmlIframeSrcApiName)
    130       ad_type = AD_TYPE_IFRAME;
    131     else if (api_name_ == ad_injection_constants::kHtmlEmbedSrcApiName)
    132       ad_type = AD_TYPE_EMBED;
    133     else if (api_name_ == ad_injection_constants::kHtmlAnchorHrefApiName)
    134       ad_type = AD_TYPE_ANCHOR;
    135 
    136     if (ad_type != AD_TYPE_NONE)
    137       injection_type = CheckSrcModification();
    138   }
    139 
    140   if (injection_type != NO_AD_INJECTION) {
    141     UMA_HISTOGRAM_ENUMERATION(
    142         "Extensions.AdInjection.AdType", ad_type, Action::NUM_AD_TYPES);
    143   }
    144 
    145   return injection_type;
    146 }
    147 
    148 void Action::set_args(scoped_ptr<base::ListValue> args) {
    149   args_.reset(args.release());
    150 }
    151 
    152 base::ListValue* Action::mutable_args() {
    153   if (!args_.get()) {
    154     args_.reset(new base::ListValue());
    155   }
    156   return args_.get();
    157 }
    158 
    159 void Action::set_page_url(const GURL& page_url) {
    160   page_url_ = page_url;
    161 }
    162 
    163 void Action::set_arg_url(const GURL& arg_url) {
    164   arg_url_ = arg_url;
    165 }
    166 
    167 void Action::set_other(scoped_ptr<base::DictionaryValue> other) {
    168   other_.reset(other.release());
    169 }
    170 
    171 base::DictionaryValue* Action::mutable_other() {
    172   if (!other_.get()) {
    173     other_.reset(new base::DictionaryValue());
    174   }
    175   return other_.get();
    176 }
    177 
    178 std::string Action::SerializePageUrl() const {
    179   return (page_incognito() ? constants::kIncognitoUrl : "") + page_url().spec();
    180 }
    181 
    182 void Action::ParsePageUrl(const std::string& url) {
    183   set_page_incognito(StartsWithASCII(url, constants::kIncognitoUrl, true));
    184   if (page_incognito())
    185     set_page_url(GURL(url.substr(strlen(constants::kIncognitoUrl))));
    186   else
    187     set_page_url(GURL(url));
    188 }
    189 
    190 std::string Action::SerializeArgUrl() const {
    191   return (arg_incognito() ? constants::kIncognitoUrl : "") + arg_url().spec();
    192 }
    193 
    194 void Action::ParseArgUrl(const std::string& url) {
    195   set_arg_incognito(StartsWithASCII(url, constants::kIncognitoUrl, true));
    196   if (arg_incognito())
    197     set_arg_url(GURL(url.substr(strlen(constants::kIncognitoUrl))));
    198   else
    199     set_arg_url(GURL(url));
    200 }
    201 
    202 scoped_ptr<ExtensionActivity> Action::ConvertToExtensionActivity() {
    203   scoped_ptr<ExtensionActivity> result(new ExtensionActivity);
    204 
    205   // We do this translation instead of using the same enum because the database
    206   // values need to be stable; this allows us to change the extension API
    207   // without affecting the database.
    208   switch (action_type()) {
    209     case ACTION_API_CALL:
    210       result->activity_type = ExtensionActivity::ACTIVITY_TYPE_API_CALL;
    211       break;
    212     case ACTION_API_EVENT:
    213       result->activity_type = ExtensionActivity::ACTIVITY_TYPE_API_EVENT;
    214       break;
    215     case ACTION_CONTENT_SCRIPT:
    216       result->activity_type = ExtensionActivity::ACTIVITY_TYPE_CONTENT_SCRIPT;
    217       break;
    218     case ACTION_DOM_ACCESS:
    219       result->activity_type = ExtensionActivity::ACTIVITY_TYPE_DOM_ACCESS;
    220       break;
    221     case ACTION_DOM_EVENT:
    222       result->activity_type = ExtensionActivity::ACTIVITY_TYPE_DOM_EVENT;
    223       break;
    224     case ACTION_WEB_REQUEST:
    225       result->activity_type = ExtensionActivity::ACTIVITY_TYPE_WEB_REQUEST;
    226       break;
    227     case UNUSED_ACTION_API_BLOCKED:
    228     case ACTION_ANY:
    229     default:
    230       // This shouldn't be reached, but some people might have old or otherwise
    231       // weird db entries. Treat it like an API call if that happens.
    232       result->activity_type = ExtensionActivity::ACTIVITY_TYPE_API_CALL;
    233       break;
    234   }
    235 
    236   result->extension_id.reset(new std::string(extension_id()));
    237   result->time.reset(new double(time().ToJsTime()));
    238   result->count.reset(new double(count()));
    239   result->api_call.reset(new std::string(api_name()));
    240   result->args.reset(new std::string(Serialize(args())));
    241   if (action_id() != -1)
    242     result->activity_id.reset(
    243         new std::string(base::StringPrintf("%" PRId64, action_id())));
    244   if (page_url().is_valid()) {
    245     if (!page_title().empty())
    246       result->page_title.reset(new std::string(page_title()));
    247     result->page_url.reset(new std::string(SerializePageUrl()));
    248   }
    249   if (arg_url().is_valid())
    250     result->arg_url.reset(new std::string(SerializeArgUrl()));
    251 
    252   if (other()) {
    253     scoped_ptr<ExtensionActivity::Other> other_field(
    254         new ExtensionActivity::Other);
    255     bool prerender;
    256     if (other()->GetBooleanWithoutPathExpansion(constants::kActionPrerender,
    257                                                 &prerender)) {
    258       other_field->prerender.reset(new bool(prerender));
    259     }
    260     const base::DictionaryValue* web_request;
    261     if (other()->GetDictionaryWithoutPathExpansion(constants::kActionWebRequest,
    262                                                    &web_request)) {
    263       other_field->web_request.reset(new std::string(
    264           ActivityLogPolicy::Util::Serialize(web_request)));
    265     }
    266     std::string extra;
    267     if (other()->GetStringWithoutPathExpansion(constants::kActionExtra, &extra))
    268       other_field->extra.reset(new std::string(extra));
    269     int dom_verb;
    270     if (other()->GetIntegerWithoutPathExpansion(constants::kActionDomVerb,
    271                                                 &dom_verb)) {
    272       switch (static_cast<DomActionType::Type>(dom_verb)) {
    273         case DomActionType::GETTER:
    274           other_field->dom_verb = ExtensionActivity::Other::DOM_VERB_GETTER;
    275           break;
    276         case DomActionType::SETTER:
    277           other_field->dom_verb = ExtensionActivity::Other::DOM_VERB_SETTER;
    278           break;
    279         case DomActionType::METHOD:
    280           other_field->dom_verb = ExtensionActivity::Other::DOM_VERB_METHOD;
    281           break;
    282         case DomActionType::INSERTED:
    283           other_field->dom_verb = ExtensionActivity::Other::DOM_VERB_INSERTED;
    284           break;
    285         case DomActionType::XHR:
    286           other_field->dom_verb = ExtensionActivity::Other::DOM_VERB_XHR;
    287           break;
    288         case DomActionType::WEBREQUEST:
    289           other_field->dom_verb = ExtensionActivity::Other::DOM_VERB_WEBREQUEST;
    290           break;
    291         case DomActionType::MODIFIED:
    292           other_field->dom_verb = ExtensionActivity::Other::DOM_VERB_MODIFIED;
    293           break;
    294         default:
    295           other_field->dom_verb = ExtensionActivity::Other::DOM_VERB_NONE;
    296       }
    297     } else {
    298       other_field->dom_verb = ExtensionActivity::Other::DOM_VERB_NONE;
    299     }
    300     result->other.reset(other_field.release());
    301   }
    302 
    303   return result.Pass();
    304 }
    305 
    306 std::string Action::PrintForDebug() const {
    307   std::string result = base::StringPrintf("ACTION ID=%" PRId64, action_id());
    308   result += " EXTENSION ID=" + extension_id() + " CATEGORY=";
    309   switch (action_type_) {
    310     case ACTION_API_CALL:
    311       result += "api_call";
    312       break;
    313     case ACTION_API_EVENT:
    314       result += "api_event_callback";
    315       break;
    316     case ACTION_WEB_REQUEST:
    317       result += "webrequest";
    318       break;
    319     case ACTION_CONTENT_SCRIPT:
    320       result += "content_script";
    321       break;
    322     case UNUSED_ACTION_API_BLOCKED:
    323       // This is deprecated.
    324       result += "api_blocked";
    325       break;
    326     case ACTION_DOM_EVENT:
    327       result += "dom_event";
    328       break;
    329     case ACTION_DOM_ACCESS:
    330       result += "dom_access";
    331       break;
    332     default:
    333       result += base::StringPrintf("type%d", static_cast<int>(action_type_));
    334   }
    335 
    336   result += " API=" + api_name_;
    337   if (args_.get()) {
    338     result += " ARGS=" + Serialize(args_.get());
    339   }
    340   if (page_url_.is_valid()) {
    341     if (page_incognito_)
    342       result += " PAGE_URL=(incognito)" + page_url_.spec();
    343     else
    344       result += " PAGE_URL=" + page_url_.spec();
    345   }
    346   if (!page_title_.empty()) {
    347     base::StringValue title(page_title_);
    348     result += " PAGE_TITLE=" + Serialize(&title);
    349   }
    350   if (arg_url_.is_valid()) {
    351     if (arg_incognito_)
    352       result += " ARG_URL=(incognito)" + arg_url_.spec();
    353     else
    354       result += " ARG_URL=" + arg_url_.spec();
    355   }
    356   if (other_.get()) {
    357     result += " OTHER=" + Serialize(other_.get());
    358   }
    359 
    360   result += base::StringPrintf(" COUNT=%d", count_);
    361   return result;
    362 }
    363 
    364 bool Action::UrlCouldBeAd(const GURL& url) const {
    365   // Ads can only be valid urls that don't match the page's host (linking to the
    366   // current page should be considered valid use), and aren't local to the
    367   // extension.
    368   return url.is_valid() &&
    369          !url.is_empty() &&
    370          url.host() != page_url_.host() &&
    371          !url.SchemeIs(kExtensionScheme);
    372 }
    373 
    374 void Action::MaybeUploadUrl(rappor::RapporService* rappor_service) const {
    375   // Don't bother recording if the url is innocuous (or no |rappor_service|).
    376   if (!rappor_service || !UrlCouldBeAd(arg_url_))
    377     return;
    378 
    379   bool can_inject_ads = false;
    380   for (size_t i = 0; i < arraysize(kApisForRapporMetric); ++i) {
    381     if (api_name_ == kApisForRapporMetric[i]) {
    382       can_inject_ads = true;
    383       break;
    384     }
    385   }
    386 
    387   if (!can_inject_ads)
    388     return;
    389 
    390   // Record the URL - an ad *may* have been injected.
    391   rappor_service->RecordSample(kExtensionAdInjectionRapporMetricName,
    392                                rappor::ETLD_PLUS_ONE_RAPPOR_TYPE,
    393                                arg_url_.host());
    394 }
    395 
    396 Action::InjectionType Action::CheckSrcModification() const {
    397   const AdNetworkDatabase* database = AdNetworkDatabase::Get();
    398 
    399   bool arg_url_could_be_ad = UrlCouldBeAd(arg_url_);
    400 
    401   GURL prev_url;
    402   std::string prev_url_string;
    403   if (args_.get() && args_->GetString(1u, &prev_url_string))
    404     prev_url = GURL(prev_url_string);
    405 
    406   bool prev_url_valid = prev_url.is_valid() && !prev_url.is_empty();
    407 
    408   bool injected_ad = arg_url_could_be_ad && database->IsAdNetwork(arg_url_);
    409   bool replaced_ad = prev_url_valid && database->IsAdNetwork(prev_url);
    410 
    411   if (injected_ad && replaced_ad)
    412     return INJECTION_REPLACED_AD;
    413   if (injected_ad)
    414     return INJECTION_NEW_AD;
    415   if (replaced_ad)
    416     return INJECTION_REMOVED_AD;
    417 
    418   // If the extension modified the URL with an external, valid URL then there's
    419   // a good chance it's ad injection. Log it as a likely one, which also helps
    420   // us determine the effectiveness of our IsAdNetwork() recognition.
    421   if (arg_url_could_be_ad) {
    422     if (prev_url_valid)
    423       return INJECTION_LIKELY_REPLACED_AD;
    424     return INJECTION_LIKELY_NEW_AD;
    425   }
    426 
    427   return NO_AD_INJECTION;
    428 }
    429 
    430 Action::InjectionType Action::CheckAppendChild(AdType* ad_type_out) const {
    431   const base::DictionaryValue* child = NULL;
    432   if (!args_->GetDictionary(0u, &child))
    433     return NO_AD_INJECTION;
    434 
    435   return CheckDomObject(child, ad_type_out);
    436 }
    437 
    438 Action::InjectionType Action::CheckDomObject(
    439     const base::DictionaryValue* object,
    440     AdType* ad_type_out) const {
    441   DCHECK(ad_type_out);
    442   std::string type;
    443   object->GetString(keys::kType, &type);
    444 
    445   AdType ad_type = AD_TYPE_NONE;
    446   std::string url_key;
    447   if (type == kIframeElementType) {
    448     ad_type = AD_TYPE_IFRAME;
    449     url_key = keys::kSrc;
    450   } else if (type == kEmbedElementType) {
    451     ad_type = AD_TYPE_EMBED;
    452     url_key = keys::kSrc;
    453   } else if (type == kAnchorElementType) {
    454     ad_type = AD_TYPE_ANCHOR;
    455     url_key = keys::kHref;
    456   }
    457 
    458   if (!url_key.empty()) {
    459     std::string url;
    460     if (object->GetString(url_key, &url)) {
    461       GURL gurl(url);
    462       if (UrlCouldBeAd(gurl)) {
    463         *ad_type_out = ad_type;
    464         if (AdNetworkDatabase::Get()->IsAdNetwork(gurl))
    465           return INJECTION_NEW_AD;
    466         // If the extension injected an URL which is not local to itself or the
    467         // page, there is a good chance it could be a new ad, and our database
    468         // missed it.
    469         return INJECTION_LIKELY_NEW_AD;
    470       }
    471     }
    472   }
    473 
    474   const base::ListValue* children = NULL;
    475   if (object->GetList(keys::kChildren, &children)) {
    476     const base::DictionaryValue* child = NULL;
    477     for (size_t i = 0;
    478          i < children->GetSize() &&
    479              i < ad_injection_constants::kMaximumChildrenToCheck;
    480          ++i) {
    481       if (children->GetDictionary(i, &child)) {
    482         InjectionType type = CheckDomObject(child, ad_type_out);
    483         if (type != NO_AD_INJECTION)
    484           return type;
    485       }
    486     }
    487   }
    488 
    489   return NO_AD_INJECTION;
    490 }
    491 
    492 bool ActionComparator::operator()(
    493     const scoped_refptr<Action>& lhs,
    494     const scoped_refptr<Action>& rhs) const {
    495   if (lhs->time() != rhs->time())
    496     return lhs->time() < rhs->time();
    497   else if (lhs->action_id() != rhs->action_id())
    498     return lhs->action_id() < rhs->action_id();
    499   else
    500     return ActionComparatorExcludingTimeAndActionId()(lhs, rhs);
    501 }
    502 
    503 bool ActionComparatorExcludingTimeAndActionId::operator()(
    504     const scoped_refptr<Action>& lhs,
    505     const scoped_refptr<Action>& rhs) const {
    506   if (lhs->extension_id() != rhs->extension_id())
    507     return lhs->extension_id() < rhs->extension_id();
    508   if (lhs->action_type() != rhs->action_type())
    509     return lhs->action_type() < rhs->action_type();
    510   if (lhs->api_name() != rhs->api_name())
    511     return lhs->api_name() < rhs->api_name();
    512 
    513   // args might be null; treat a null value as less than all non-null values,
    514   // including the empty string.
    515   if (!lhs->args() && rhs->args())
    516     return true;
    517   if (lhs->args() && !rhs->args())
    518     return false;
    519   if (lhs->args() && rhs->args()) {
    520     std::string lhs_args = ActivityLogPolicy::Util::Serialize(lhs->args());
    521     std::string rhs_args = ActivityLogPolicy::Util::Serialize(rhs->args());
    522     if (lhs_args != rhs_args)
    523       return lhs_args < rhs_args;
    524   }
    525 
    526   // Compare URLs as strings, and treat the incognito flag as a separate field.
    527   if (lhs->page_url().spec() != rhs->page_url().spec())
    528     return lhs->page_url().spec() < rhs->page_url().spec();
    529   if (lhs->page_incognito() != rhs->page_incognito())
    530     return lhs->page_incognito() < rhs->page_incognito();
    531 
    532   if (lhs->page_title() != rhs->page_title())
    533     return lhs->page_title() < rhs->page_title();
    534 
    535   if (lhs->arg_url().spec() != rhs->arg_url().spec())
    536     return lhs->arg_url().spec() < rhs->arg_url().spec();
    537   if (lhs->arg_incognito() != rhs->arg_incognito())
    538     return lhs->arg_incognito() < rhs->arg_incognito();
    539 
    540   // other is treated much like the args field.
    541   if (!lhs->other() && rhs->other())
    542     return true;
    543   if (lhs->other() && !rhs->other())
    544     return false;
    545   if (lhs->other() && rhs->other()) {
    546     std::string lhs_other = ActivityLogPolicy::Util::Serialize(lhs->other());
    547     std::string rhs_other = ActivityLogPolicy::Util::Serialize(rhs->other());
    548     if (lhs_other != rhs_other)
    549       return lhs_other < rhs_other;
    550   }
    551 
    552   // All fields compare as equal if this point is reached.
    553   return false;
    554 }
    555 
    556 }  // namespace extensions
    557