Home | History | Annotate | Download | only in safe_browsing
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 //
      5 // Implementation of the MalwareDetails class.
      6 
      7 #include "chrome/browser/safe_browsing/malware_details.h"
      8 
      9 #include "base/bind.h"
     10 #include "base/lazy_instance.h"
     11 #include "chrome/browser/net/chrome_url_request_context.h"
     12 #include "chrome/browser/profiles/profile.h"
     13 #include "chrome/browser/safe_browsing/malware_details_cache.h"
     14 #include "chrome/browser/safe_browsing/malware_details_history.h"
     15 #include "chrome/browser/safe_browsing/report.pb.h"
     16 #include "chrome/common/safe_browsing/safebrowsing_messages.h"
     17 #include "content/public/browser/browser_thread.h"
     18 #include "content/public/browser/navigation_controller.h"
     19 #include "content/public/browser/navigation_entry.h"
     20 #include "content/public/browser/render_view_host.h"
     21 #include "content/public/browser/web_contents.h"
     22 #include "net/base/io_buffer.h"
     23 #include "net/disk_cache/disk_cache.h"
     24 #include "net/url_request/url_request_context_getter.h"
     25 
     26 using content::BrowserThread;
     27 using content::NavigationEntry;
     28 using content::WebContents;
     29 using safe_browsing::ClientMalwareReportRequest;
     30 
     31 // Keep in sync with KMaxNodes in renderer/safe_browsing/malware_dom_details
     32 static const uint32 kMaxDomNodes = 500;
     33 
     34 // static
     35 MalwareDetailsFactory* MalwareDetails::factory_ = NULL;
     36 
     37 // The default MalwareDetailsFactory.  Global, made a singleton so we
     38 // don't leak it.
     39 class MalwareDetailsFactoryImpl
     40     : public MalwareDetailsFactory {
     41  public:
     42   virtual MalwareDetails* CreateMalwareDetails(
     43       SafeBrowsingUIManager* ui_manager,
     44       WebContents* web_contents,
     45       const SafeBrowsingUIManager::UnsafeResource& unsafe_resource) OVERRIDE {
     46     return new MalwareDetails(ui_manager, web_contents, unsafe_resource);
     47   }
     48 
     49  private:
     50   friend struct base::DefaultLazyInstanceTraits<
     51       MalwareDetailsFactoryImpl>;
     52 
     53   MalwareDetailsFactoryImpl() { }
     54 
     55   DISALLOW_COPY_AND_ASSIGN(MalwareDetailsFactoryImpl);
     56 };
     57 
     58 static base::LazyInstance<MalwareDetailsFactoryImpl>
     59     g_malware_details_factory_impl = LAZY_INSTANCE_INITIALIZER;
     60 
     61 // Create a MalwareDetails for the given tab.
     62 /* static */
     63 MalwareDetails* MalwareDetails::NewMalwareDetails(
     64     SafeBrowsingUIManager* ui_manager,
     65     WebContents* web_contents,
     66     const UnsafeResource& resource) {
     67   // Set up the factory if this has not been done already (tests do that
     68   // before this method is called).
     69   if (!factory_)
     70     factory_ = g_malware_details_factory_impl.Pointer();
     71   return factory_->CreateMalwareDetails(ui_manager, web_contents, resource);
     72 }
     73 
     74 // Create a MalwareDetails for the given tab. Runs in the UI thread.
     75 MalwareDetails::MalwareDetails(
     76     SafeBrowsingUIManager* ui_manager,
     77     content::WebContents* web_contents,
     78     const UnsafeResource& resource)
     79     : content::WebContentsObserver(web_contents),
     80       profile_(Profile::FromBrowserContext(web_contents->GetBrowserContext())),
     81       request_context_getter_(profile_->GetRequestContext()),
     82       ui_manager_(ui_manager),
     83       resource_(resource),
     84       cache_result_(false),
     85       cache_collector_(new MalwareDetailsCacheCollector),
     86       redirects_collector_(
     87           new MalwareDetailsRedirectsCollector(profile_)) {
     88   StartCollection();
     89 }
     90 
     91 MalwareDetails::~MalwareDetails() {
     92 }
     93 
     94 bool MalwareDetails::OnMessageReceived(const IPC::Message& message) {
     95   bool handled = true;
     96   IPC_BEGIN_MESSAGE_MAP(MalwareDetails, message)
     97     IPC_MESSAGE_HANDLER(SafeBrowsingHostMsg_MalwareDOMDetails,
     98                         OnReceivedMalwareDOMDetails)
     99     IPC_MESSAGE_UNHANDLED(handled = false)
    100   IPC_END_MESSAGE_MAP()
    101   return handled;
    102 }
    103 
    104 bool MalwareDetails::IsPublicUrl(const GURL& url) const {
    105   return url.SchemeIs("http");  // TODO(panayiotis): also skip internal urls.
    106 }
    107 
    108 // Looks for a Resource for the given url in resources_.  If found, it
    109 // updates |resource|. Otherwise, it creates a new message, adds it to
    110 // resources_ and updates |resource| to point to it.
    111 ClientMalwareReportRequest::Resource* MalwareDetails::FindOrCreateResource(
    112     const GURL& url) {
    113   safe_browsing::ResourceMap::iterator it = resources_.find(url.spec());
    114   if (it != resources_.end()) {
    115     return it->second.get();
    116   }
    117 
    118   // Create the resource for |url|.
    119   int id = resources_.size();
    120   linked_ptr<ClientMalwareReportRequest::Resource> new_resource(
    121       new ClientMalwareReportRequest::Resource());
    122   new_resource->set_url(url.spec());
    123   new_resource->set_id(id);
    124   resources_[url.spec()] = new_resource;
    125   return new_resource.get();
    126 }
    127 
    128 void MalwareDetails::AddUrl(const GURL& url,
    129                             const GURL& parent,
    130                             const std::string& tagname,
    131                             const std::vector<GURL>* children) {
    132   if (!url.is_valid() || !IsPublicUrl(url))
    133     return;
    134 
    135   // Find (or create) the resource for the url.
    136   ClientMalwareReportRequest::Resource* url_resource =
    137       FindOrCreateResource(url);
    138   if (!tagname.empty()) {
    139     url_resource->set_tag_name(tagname);
    140   }
    141   if (!parent.is_empty() && IsPublicUrl(parent)) {
    142     // Add the resource for the parent.
    143     ClientMalwareReportRequest::Resource* parent_resource =
    144         FindOrCreateResource(parent);
    145     // Update the parent-child relation
    146     url_resource->set_parent_id(parent_resource->id());
    147   }
    148   if (children) {
    149     for (std::vector<GURL>::const_iterator it = children->begin();
    150          it != children->end(); it++) {
    151       ClientMalwareReportRequest::Resource* child_resource =
    152           FindOrCreateResource(*it);
    153       url_resource->add_child_ids(child_resource->id());
    154     }
    155   }
    156 }
    157 
    158 void MalwareDetails::StartCollection() {
    159   DVLOG(1) << "Starting to compute malware details.";
    160   report_.reset(new ClientMalwareReportRequest());
    161 
    162   if (IsPublicUrl(resource_.url)) {
    163     report_->set_malware_url(resource_.url.spec());
    164   }
    165 
    166   GURL page_url = web_contents()->GetURL();
    167   if (IsPublicUrl(page_url)) {
    168     report_->set_page_url(page_url.spec());
    169   }
    170 
    171   GURL referrer_url;
    172   NavigationEntry* nav_entry = web_contents()->GetController().GetActiveEntry();
    173   if (nav_entry) {
    174     referrer_url = nav_entry->GetReferrer().url;
    175     if (IsPublicUrl(referrer_url)) {
    176       report_->set_referrer_url(referrer_url.spec());
    177     }
    178   }
    179 
    180   // Add the nodes, starting from the page url.
    181   AddUrl(page_url, GURL(), std::string(), NULL);
    182 
    183   // Add the resource_url and its original url, if non-empty and different.
    184   if (!resource_.original_url.is_empty() &&
    185       resource_.url != resource_.original_url) {
    186     // Add original_url, as the parent of resource_url.
    187     AddUrl(resource_.original_url, GURL(), std::string(), NULL);
    188     AddUrl(resource_.url, resource_.original_url, std::string(), NULL);
    189   } else {
    190     AddUrl(resource_.url, GURL(), std::string(), NULL);
    191   }
    192 
    193   // Add the redirect urls, if non-empty. The redirect urls do not include the
    194   // original url, but include the unsafe url which is the last one of the
    195   // redirect urls chain
    196   GURL parent_url;
    197   // Set the original url as the parent of the first redirect url if it's not
    198   // empty.
    199   if (!resource_.original_url.is_empty()) {
    200     parent_url = resource_.original_url;
    201   }
    202   // Set the previous redirect url as the parent of the next one
    203   for (unsigned int i = 0; i < resource_.redirect_urls.size(); ++i) {
    204     AddUrl(resource_.redirect_urls[i], parent_url, std::string(), NULL);
    205     parent_url = resource_.redirect_urls[i];
    206   }
    207 
    208   // Add the referrer url.
    209   if (nav_entry && !referrer_url.is_empty()) {
    210     AddUrl(referrer_url, GURL(), std::string(), NULL);
    211   }
    212 
    213   // Get URLs of frames, scripts etc from the DOM.
    214   // OnReceivedMalwareDOMDetails will be called when the renderer replies.
    215   content::RenderViewHost* view = web_contents()->GetRenderViewHost();
    216   view->Send(new SafeBrowsingMsg_GetMalwareDOMDetails(view->GetRoutingID()));
    217 }
    218 
    219 // When the renderer is done, this is called.
    220 void MalwareDetails::OnReceivedMalwareDOMDetails(
    221     const std::vector<SafeBrowsingHostMsg_MalwareDOMDetails_Node>& params) {
    222   // Schedule this in IO thread, so it doesn't conflict with future users
    223   // of our data structures (eg GetSerializedReport).
    224   BrowserThread::PostTask(
    225       BrowserThread::IO, FROM_HERE,
    226       base::Bind(&MalwareDetails::AddDOMDetails, this, params));
    227 }
    228 
    229 void MalwareDetails::AddDOMDetails(
    230     const std::vector<SafeBrowsingHostMsg_MalwareDOMDetails_Node>& params) {
    231   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
    232   DVLOG(1) << "Nodes from the DOM: " << params.size();
    233 
    234   // If we have already started getting redirects from history service,
    235   // don't modify state, otherwise will invalidate the iterators.
    236   if (redirects_collector_->HasStarted())
    237     return;
    238 
    239   // If we have already started collecting data from the HTTP cache, don't
    240   // modify our state.
    241   if (cache_collector_->HasStarted())
    242     return;
    243 
    244   // Add the urls from the DOM to |resources_|.  The renderer could be
    245   // sending bogus messages, so limit the number of nodes we accept.
    246   for (uint32 i = 0; i < params.size() && i < kMaxDomNodes; ++i) {
    247     SafeBrowsingHostMsg_MalwareDOMDetails_Node node = params[i];
    248     DVLOG(1) << node.url << ", " << node.tag_name << ", " << node.parent;
    249     AddUrl(node.url, node.parent, node.tag_name, &(node.children));
    250   }
    251 }
    252 
    253 // Called from the SB Service on the IO thread, after the user has
    254 // closed the tab, or clicked proceed or goback.  Since the user needs
    255 // to take an action, we expect this to be called after
    256 // OnReceivedMalwareDOMDetails in most cases. If not, we don't include
    257 // the DOM data in our report.
    258 void MalwareDetails::FinishCollection() {
    259   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
    260 
    261   std::vector<GURL> urls;
    262   for (safe_browsing::ResourceMap::const_iterator it = resources_.begin();
    263        it != resources_.end(); it++) {
    264     urls.push_back(GURL(it->first));
    265   }
    266   redirects_collector_->StartHistoryCollection(
    267       urls,
    268       base::Bind(&MalwareDetails::OnRedirectionCollectionReady, this));
    269 }
    270 
    271 void MalwareDetails::OnRedirectionCollectionReady() {
    272   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
    273   const std::vector<safe_browsing::RedirectChain>& redirects =
    274       redirects_collector_->GetCollectedUrls();
    275 
    276   for (size_t i = 0; i < redirects.size(); ++i)
    277     AddRedirectUrlList(redirects[i]);
    278 
    279   // Call the cache collector
    280   cache_collector_->StartCacheCollection(
    281       request_context_getter_.get(),
    282       &resources_,
    283       &cache_result_,
    284       base::Bind(&MalwareDetails::OnCacheCollectionReady, this));
    285 }
    286 
    287 void MalwareDetails::AddRedirectUrlList(const std::vector<GURL>& urls) {
    288   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
    289   for (size_t i = 0; i < urls.size()-1; ++i) {
    290     AddUrl(urls[i], urls[i + 1], std::string(), NULL);
    291   }
    292 }
    293 
    294 void MalwareDetails::OnCacheCollectionReady() {
    295   DVLOG(1) << "OnCacheCollectionReady.";
    296   // Add all the urls in our |resources_| maps to the |report_| protocol buffer.
    297   for (safe_browsing::ResourceMap::const_iterator it = resources_.begin();
    298        it != resources_.end(); it++) {
    299     ClientMalwareReportRequest::Resource* pb_resource =
    300         report_->add_resources();
    301     pb_resource->CopyFrom(*(it->second));
    302   }
    303 
    304   report_->set_complete(cache_result_);
    305 
    306   // Send the report, using the SafeBrowsingService.
    307   std::string serialized;
    308   if (!report_->SerializeToString(&serialized)) {
    309     DLOG(ERROR) << "Unable to serialize the malware report.";
    310     return;
    311   }
    312 
    313   ui_manager_->SendSerializedMalwareDetails(serialized);
    314 }
    315