1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "chrome/browser/extensions/activity_log/activity_actions.h" 6 7 #include <algorithm> // for std::find. 8 #include <string> 9 10 #include "base/command_line.h" 11 #include "base/format_macros.h" 12 #include "base/json/json_string_value_serializer.h" 13 #include "base/logging.h" 14 #include "base/macros.h" 15 #include "base/memory/singleton.h" 16 #include "base/metrics/histogram.h" 17 #include "base/strings/string_number_conversions.h" 18 #include "base/strings/string_util.h" 19 #include "base/strings/stringprintf.h" 20 #include "base/values.h" 21 #include "chrome/browser/extensions/activity_log/activity_action_constants.h" 22 #include "chrome/browser/extensions/activity_log/ad_network_database.h" 23 #include "chrome/browser/extensions/activity_log/fullstream_ui_policy.h" 24 #include "chrome/browser/ui/browser.h" 25 #include "chrome/common/chrome_switches.h" 26 #include "components/rappor/rappor_service.h" 27 #include "content/public/browser/web_contents.h" 28 #include "extensions/common/ad_injection_constants.h" 29 #include "extensions/common/constants.h" 30 #include "extensions/common/dom_action_types.h" 31 #include "sql/statement.h" 32 #include "url/gurl.h" 33 34 namespace constants = activity_log_constants; 35 36 namespace extensions { 37 38 namespace { 39 40 namespace keys = ad_injection_constants::keys; 41 42 // The list of APIs for which we upload the URL to RAPPOR. 43 const char* kApisForRapporMetric[] = { 44 ad_injection_constants::kHtmlIframeSrcApiName, 45 ad_injection_constants::kHtmlEmbedSrcApiName, 46 ad_injection_constants::kHtmlAnchorHrefApiName 47 }; 48 49 // The "Extensions.PossibleAdInjection2" metric uses different Rappor 50 // parameters than the original metric. 51 const char* kExtensionAdInjectionRapporMetricName = 52 "Extensions.PossibleAdInjection2"; 53 54 // The names of different types of HTML elements we check for ad injection. 55 const char* kIframeElementType = "HTMLIFrameElement"; 56 const char* kEmbedElementType = "HTMLEmbedElement"; 57 const char* kAnchorElementType = "HTMLAnchorElement"; 58 59 std::string Serialize(const base::Value* value) { 60 std::string value_as_text; 61 if (!value) { 62 value_as_text = "null"; 63 } else { 64 JSONStringValueSerializer serializer(&value_as_text); 65 serializer.SerializeAndOmitBinaryValues(*value); 66 } 67 return value_as_text; 68 } 69 70 } // namespace 71 72 using api::activity_log_private::ExtensionActivity; 73 74 Action::Action(const std::string& extension_id, 75 const base::Time& time, 76 const ActionType action_type, 77 const std::string& api_name, 78 int64 action_id) 79 : extension_id_(extension_id), 80 time_(time), 81 action_type_(action_type), 82 api_name_(api_name), 83 page_incognito_(false), 84 arg_incognito_(false), 85 count_(0), 86 action_id_(action_id) {} 87 88 Action::~Action() {} 89 90 // TODO(mvrable): As an optimization, we might return this directly if the 91 // refcount is one. However, there are likely to be other stray references in 92 // many cases that will prevent this optimization. 93 scoped_refptr<Action> Action::Clone() const { 94 scoped_refptr<Action> clone( 95 new Action( 96 extension_id(), time(), action_type(), api_name(), action_id())); 97 if (args()) 98 clone->set_args(make_scoped_ptr(args()->DeepCopy())); 99 clone->set_page_url(page_url()); 100 clone->set_page_title(page_title()); 101 clone->set_page_incognito(page_incognito()); 102 clone->set_arg_url(arg_url()); 103 clone->set_arg_incognito(arg_incognito()); 104 if (other()) 105 clone->set_other(make_scoped_ptr(other()->DeepCopy())); 106 return clone; 107 } 108 109 Action::InjectionType Action::DidInjectAd( 110 rappor::RapporService* rappor_service) const { 111 MaybeUploadUrl(rappor_service); 112 113 // We should always have an AdNetworkDatabase, but, on the offchance we don't, 114 // don't crash in a release build. 115 if (!AdNetworkDatabase::Get()) { 116 NOTREACHED(); 117 return NO_AD_INJECTION; 118 } 119 120 AdType ad_type = AD_TYPE_NONE; 121 InjectionType injection_type = NO_AD_INJECTION; 122 123 if (EndsWith(api_name_, 124 ad_injection_constants::kAppendChildApiSuffix, 125 true /* case senstive */)) { 126 injection_type = CheckAppendChild(&ad_type); 127 } else { 128 // Check if the action modified an element's src/href. 129 if (api_name_ == ad_injection_constants::kHtmlIframeSrcApiName) 130 ad_type = AD_TYPE_IFRAME; 131 else if (api_name_ == ad_injection_constants::kHtmlEmbedSrcApiName) 132 ad_type = AD_TYPE_EMBED; 133 else if (api_name_ == ad_injection_constants::kHtmlAnchorHrefApiName) 134 ad_type = AD_TYPE_ANCHOR; 135 136 if (ad_type != AD_TYPE_NONE) 137 injection_type = CheckSrcModification(); 138 } 139 140 if (injection_type != NO_AD_INJECTION) { 141 UMA_HISTOGRAM_ENUMERATION( 142 "Extensions.AdInjection.AdType", ad_type, Action::NUM_AD_TYPES); 143 } 144 145 return injection_type; 146 } 147 148 void Action::set_args(scoped_ptr<base::ListValue> args) { 149 args_.reset(args.release()); 150 } 151 152 base::ListValue* Action::mutable_args() { 153 if (!args_.get()) { 154 args_.reset(new base::ListValue()); 155 } 156 return args_.get(); 157 } 158 159 void Action::set_page_url(const GURL& page_url) { 160 page_url_ = page_url; 161 } 162 163 void Action::set_arg_url(const GURL& arg_url) { 164 arg_url_ = arg_url; 165 } 166 167 void Action::set_other(scoped_ptr<base::DictionaryValue> other) { 168 other_.reset(other.release()); 169 } 170 171 base::DictionaryValue* Action::mutable_other() { 172 if (!other_.get()) { 173 other_.reset(new base::DictionaryValue()); 174 } 175 return other_.get(); 176 } 177 178 std::string Action::SerializePageUrl() const { 179 return (page_incognito() ? constants::kIncognitoUrl : "") + page_url().spec(); 180 } 181 182 void Action::ParsePageUrl(const std::string& url) { 183 set_page_incognito(StartsWithASCII(url, constants::kIncognitoUrl, true)); 184 if (page_incognito()) 185 set_page_url(GURL(url.substr(strlen(constants::kIncognitoUrl)))); 186 else 187 set_page_url(GURL(url)); 188 } 189 190 std::string Action::SerializeArgUrl() const { 191 return (arg_incognito() ? constants::kIncognitoUrl : "") + arg_url().spec(); 192 } 193 194 void Action::ParseArgUrl(const std::string& url) { 195 set_arg_incognito(StartsWithASCII(url, constants::kIncognitoUrl, true)); 196 if (arg_incognito()) 197 set_arg_url(GURL(url.substr(strlen(constants::kIncognitoUrl)))); 198 else 199 set_arg_url(GURL(url)); 200 } 201 202 scoped_ptr<ExtensionActivity> Action::ConvertToExtensionActivity() { 203 scoped_ptr<ExtensionActivity> result(new ExtensionActivity); 204 205 // We do this translation instead of using the same enum because the database 206 // values need to be stable; this allows us to change the extension API 207 // without affecting the database. 208 switch (action_type()) { 209 case ACTION_API_CALL: 210 result->activity_type = ExtensionActivity::ACTIVITY_TYPE_API_CALL; 211 break; 212 case ACTION_API_EVENT: 213 result->activity_type = ExtensionActivity::ACTIVITY_TYPE_API_EVENT; 214 break; 215 case ACTION_CONTENT_SCRIPT: 216 result->activity_type = ExtensionActivity::ACTIVITY_TYPE_CONTENT_SCRIPT; 217 break; 218 case ACTION_DOM_ACCESS: 219 result->activity_type = ExtensionActivity::ACTIVITY_TYPE_DOM_ACCESS; 220 break; 221 case ACTION_DOM_EVENT: 222 result->activity_type = ExtensionActivity::ACTIVITY_TYPE_DOM_EVENT; 223 break; 224 case ACTION_WEB_REQUEST: 225 result->activity_type = ExtensionActivity::ACTIVITY_TYPE_WEB_REQUEST; 226 break; 227 case UNUSED_ACTION_API_BLOCKED: 228 case ACTION_ANY: 229 default: 230 // This shouldn't be reached, but some people might have old or otherwise 231 // weird db entries. Treat it like an API call if that happens. 232 result->activity_type = ExtensionActivity::ACTIVITY_TYPE_API_CALL; 233 break; 234 } 235 236 result->extension_id.reset(new std::string(extension_id())); 237 result->time.reset(new double(time().ToJsTime())); 238 result->count.reset(new double(count())); 239 result->api_call.reset(new std::string(api_name())); 240 result->args.reset(new std::string(Serialize(args()))); 241 if (action_id() != -1) 242 result->activity_id.reset( 243 new std::string(base::StringPrintf("%" PRId64, action_id()))); 244 if (page_url().is_valid()) { 245 if (!page_title().empty()) 246 result->page_title.reset(new std::string(page_title())); 247 result->page_url.reset(new std::string(SerializePageUrl())); 248 } 249 if (arg_url().is_valid()) 250 result->arg_url.reset(new std::string(SerializeArgUrl())); 251 252 if (other()) { 253 scoped_ptr<ExtensionActivity::Other> other_field( 254 new ExtensionActivity::Other); 255 bool prerender; 256 if (other()->GetBooleanWithoutPathExpansion(constants::kActionPrerender, 257 &prerender)) { 258 other_field->prerender.reset(new bool(prerender)); 259 } 260 const base::DictionaryValue* web_request; 261 if (other()->GetDictionaryWithoutPathExpansion(constants::kActionWebRequest, 262 &web_request)) { 263 other_field->web_request.reset(new std::string( 264 ActivityLogPolicy::Util::Serialize(web_request))); 265 } 266 std::string extra; 267 if (other()->GetStringWithoutPathExpansion(constants::kActionExtra, &extra)) 268 other_field->extra.reset(new std::string(extra)); 269 int dom_verb; 270 if (other()->GetIntegerWithoutPathExpansion(constants::kActionDomVerb, 271 &dom_verb)) { 272 switch (static_cast<DomActionType::Type>(dom_verb)) { 273 case DomActionType::GETTER: 274 other_field->dom_verb = ExtensionActivity::Other::DOM_VERB_GETTER; 275 break; 276 case DomActionType::SETTER: 277 other_field->dom_verb = ExtensionActivity::Other::DOM_VERB_SETTER; 278 break; 279 case DomActionType::METHOD: 280 other_field->dom_verb = ExtensionActivity::Other::DOM_VERB_METHOD; 281 break; 282 case DomActionType::INSERTED: 283 other_field->dom_verb = ExtensionActivity::Other::DOM_VERB_INSERTED; 284 break; 285 case DomActionType::XHR: 286 other_field->dom_verb = ExtensionActivity::Other::DOM_VERB_XHR; 287 break; 288 case DomActionType::WEBREQUEST: 289 other_field->dom_verb = ExtensionActivity::Other::DOM_VERB_WEBREQUEST; 290 break; 291 case DomActionType::MODIFIED: 292 other_field->dom_verb = ExtensionActivity::Other::DOM_VERB_MODIFIED; 293 break; 294 default: 295 other_field->dom_verb = ExtensionActivity::Other::DOM_VERB_NONE; 296 } 297 } else { 298 other_field->dom_verb = ExtensionActivity::Other::DOM_VERB_NONE; 299 } 300 result->other.reset(other_field.release()); 301 } 302 303 return result.Pass(); 304 } 305 306 std::string Action::PrintForDebug() const { 307 std::string result = base::StringPrintf("ACTION ID=%" PRId64, action_id()); 308 result += " EXTENSION ID=" + extension_id() + " CATEGORY="; 309 switch (action_type_) { 310 case ACTION_API_CALL: 311 result += "api_call"; 312 break; 313 case ACTION_API_EVENT: 314 result += "api_event_callback"; 315 break; 316 case ACTION_WEB_REQUEST: 317 result += "webrequest"; 318 break; 319 case ACTION_CONTENT_SCRIPT: 320 result += "content_script"; 321 break; 322 case UNUSED_ACTION_API_BLOCKED: 323 // This is deprecated. 324 result += "api_blocked"; 325 break; 326 case ACTION_DOM_EVENT: 327 result += "dom_event"; 328 break; 329 case ACTION_DOM_ACCESS: 330 result += "dom_access"; 331 break; 332 default: 333 result += base::StringPrintf("type%d", static_cast<int>(action_type_)); 334 } 335 336 result += " API=" + api_name_; 337 if (args_.get()) { 338 result += " ARGS=" + Serialize(args_.get()); 339 } 340 if (page_url_.is_valid()) { 341 if (page_incognito_) 342 result += " PAGE_URL=(incognito)" + page_url_.spec(); 343 else 344 result += " PAGE_URL=" + page_url_.spec(); 345 } 346 if (!page_title_.empty()) { 347 base::StringValue title(page_title_); 348 result += " PAGE_TITLE=" + Serialize(&title); 349 } 350 if (arg_url_.is_valid()) { 351 if (arg_incognito_) 352 result += " ARG_URL=(incognito)" + arg_url_.spec(); 353 else 354 result += " ARG_URL=" + arg_url_.spec(); 355 } 356 if (other_.get()) { 357 result += " OTHER=" + Serialize(other_.get()); 358 } 359 360 result += base::StringPrintf(" COUNT=%d", count_); 361 return result; 362 } 363 364 bool Action::UrlCouldBeAd(const GURL& url) const { 365 // Ads can only be valid urls that don't match the page's host (linking to the 366 // current page should be considered valid use), and aren't local to the 367 // extension. 368 return url.is_valid() && 369 !url.is_empty() && 370 url.host() != page_url_.host() && 371 !url.SchemeIs(kExtensionScheme); 372 } 373 374 void Action::MaybeUploadUrl(rappor::RapporService* rappor_service) const { 375 // Don't bother recording if the url is innocuous (or no |rappor_service|). 376 if (!rappor_service || !UrlCouldBeAd(arg_url_)) 377 return; 378 379 bool can_inject_ads = false; 380 for (size_t i = 0; i < arraysize(kApisForRapporMetric); ++i) { 381 if (api_name_ == kApisForRapporMetric[i]) { 382 can_inject_ads = true; 383 break; 384 } 385 } 386 387 if (!can_inject_ads) 388 return; 389 390 // Record the URL - an ad *may* have been injected. 391 rappor_service->RecordSample(kExtensionAdInjectionRapporMetricName, 392 rappor::ETLD_PLUS_ONE_RAPPOR_TYPE, 393 arg_url_.host()); 394 } 395 396 Action::InjectionType Action::CheckSrcModification() const { 397 const AdNetworkDatabase* database = AdNetworkDatabase::Get(); 398 399 bool arg_url_could_be_ad = UrlCouldBeAd(arg_url_); 400 401 GURL prev_url; 402 std::string prev_url_string; 403 if (args_.get() && args_->GetString(1u, &prev_url_string)) 404 prev_url = GURL(prev_url_string); 405 406 bool prev_url_valid = prev_url.is_valid() && !prev_url.is_empty(); 407 408 bool injected_ad = arg_url_could_be_ad && database->IsAdNetwork(arg_url_); 409 bool replaced_ad = prev_url_valid && database->IsAdNetwork(prev_url); 410 411 if (injected_ad && replaced_ad) 412 return INJECTION_REPLACED_AD; 413 if (injected_ad) 414 return INJECTION_NEW_AD; 415 if (replaced_ad) 416 return INJECTION_REMOVED_AD; 417 418 // If the extension modified the URL with an external, valid URL then there's 419 // a good chance it's ad injection. Log it as a likely one, which also helps 420 // us determine the effectiveness of our IsAdNetwork() recognition. 421 if (arg_url_could_be_ad) { 422 if (prev_url_valid) 423 return INJECTION_LIKELY_REPLACED_AD; 424 return INJECTION_LIKELY_NEW_AD; 425 } 426 427 return NO_AD_INJECTION; 428 } 429 430 Action::InjectionType Action::CheckAppendChild(AdType* ad_type_out) const { 431 const base::DictionaryValue* child = NULL; 432 if (!args_->GetDictionary(0u, &child)) 433 return NO_AD_INJECTION; 434 435 return CheckDomObject(child, ad_type_out); 436 } 437 438 Action::InjectionType Action::CheckDomObject( 439 const base::DictionaryValue* object, 440 AdType* ad_type_out) const { 441 DCHECK(ad_type_out); 442 std::string type; 443 object->GetString(keys::kType, &type); 444 445 AdType ad_type = AD_TYPE_NONE; 446 std::string url_key; 447 if (type == kIframeElementType) { 448 ad_type = AD_TYPE_IFRAME; 449 url_key = keys::kSrc; 450 } else if (type == kEmbedElementType) { 451 ad_type = AD_TYPE_EMBED; 452 url_key = keys::kSrc; 453 } else if (type == kAnchorElementType) { 454 ad_type = AD_TYPE_ANCHOR; 455 url_key = keys::kHref; 456 } 457 458 if (!url_key.empty()) { 459 std::string url; 460 if (object->GetString(url_key, &url)) { 461 GURL gurl(url); 462 if (UrlCouldBeAd(gurl)) { 463 *ad_type_out = ad_type; 464 if (AdNetworkDatabase::Get()->IsAdNetwork(gurl)) 465 return INJECTION_NEW_AD; 466 // If the extension injected an URL which is not local to itself or the 467 // page, there is a good chance it could be a new ad, and our database 468 // missed it. 469 return INJECTION_LIKELY_NEW_AD; 470 } 471 } 472 } 473 474 const base::ListValue* children = NULL; 475 if (object->GetList(keys::kChildren, &children)) { 476 const base::DictionaryValue* child = NULL; 477 for (size_t i = 0; 478 i < children->GetSize() && 479 i < ad_injection_constants::kMaximumChildrenToCheck; 480 ++i) { 481 if (children->GetDictionary(i, &child)) { 482 InjectionType type = CheckDomObject(child, ad_type_out); 483 if (type != NO_AD_INJECTION) 484 return type; 485 } 486 } 487 } 488 489 return NO_AD_INJECTION; 490 } 491 492 bool ActionComparator::operator()( 493 const scoped_refptr<Action>& lhs, 494 const scoped_refptr<Action>& rhs) const { 495 if (lhs->time() != rhs->time()) 496 return lhs->time() < rhs->time(); 497 else if (lhs->action_id() != rhs->action_id()) 498 return lhs->action_id() < rhs->action_id(); 499 else 500 return ActionComparatorExcludingTimeAndActionId()(lhs, rhs); 501 } 502 503 bool ActionComparatorExcludingTimeAndActionId::operator()( 504 const scoped_refptr<Action>& lhs, 505 const scoped_refptr<Action>& rhs) const { 506 if (lhs->extension_id() != rhs->extension_id()) 507 return lhs->extension_id() < rhs->extension_id(); 508 if (lhs->action_type() != rhs->action_type()) 509 return lhs->action_type() < rhs->action_type(); 510 if (lhs->api_name() != rhs->api_name()) 511 return lhs->api_name() < rhs->api_name(); 512 513 // args might be null; treat a null value as less than all non-null values, 514 // including the empty string. 515 if (!lhs->args() && rhs->args()) 516 return true; 517 if (lhs->args() && !rhs->args()) 518 return false; 519 if (lhs->args() && rhs->args()) { 520 std::string lhs_args = ActivityLogPolicy::Util::Serialize(lhs->args()); 521 std::string rhs_args = ActivityLogPolicy::Util::Serialize(rhs->args()); 522 if (lhs_args != rhs_args) 523 return lhs_args < rhs_args; 524 } 525 526 // Compare URLs as strings, and treat the incognito flag as a separate field. 527 if (lhs->page_url().spec() != rhs->page_url().spec()) 528 return lhs->page_url().spec() < rhs->page_url().spec(); 529 if (lhs->page_incognito() != rhs->page_incognito()) 530 return lhs->page_incognito() < rhs->page_incognito(); 531 532 if (lhs->page_title() != rhs->page_title()) 533 return lhs->page_title() < rhs->page_title(); 534 535 if (lhs->arg_url().spec() != rhs->arg_url().spec()) 536 return lhs->arg_url().spec() < rhs->arg_url().spec(); 537 if (lhs->arg_incognito() != rhs->arg_incognito()) 538 return lhs->arg_incognito() < rhs->arg_incognito(); 539 540 // other is treated much like the args field. 541 if (!lhs->other() && rhs->other()) 542 return true; 543 if (lhs->other() && !rhs->other()) 544 return false; 545 if (lhs->other() && rhs->other()) { 546 std::string lhs_other = ActivityLogPolicy::Util::Serialize(lhs->other()); 547 std::string rhs_other = ActivityLogPolicy::Util::Serialize(rhs->other()); 548 if (lhs_other != rhs_other) 549 return lhs_other < rhs_other; 550 } 551 552 // All fields compare as equal if this point is reached. 553 return false; 554 } 555 556 } // namespace extensions 557