Home | History | Annotate | Download | only in safe_browsing
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 //
      5 // Implementation of the MalwareDetails class.
      6 
      7 #include "chrome/browser/safe_browsing/malware_details.h"
      8 
      9 #include "base/bind.h"
     10 #include "base/lazy_instance.h"
     11 #include "chrome/browser/profiles/profile.h"
     12 #include "chrome/browser/safe_browsing/malware_details_cache.h"
     13 #include "chrome/browser/safe_browsing/malware_details_history.h"
     14 #include "chrome/browser/safe_browsing/report.pb.h"
     15 #include "chrome/common/safe_browsing/safebrowsing_messages.h"
     16 #include "content/public/browser/browser_thread.h"
     17 #include "content/public/browser/navigation_controller.h"
     18 #include "content/public/browser/navigation_entry.h"
     19 #include "content/public/browser/render_view_host.h"
     20 #include "content/public/browser/web_contents.h"
     21 #include "net/url_request/url_request_context_getter.h"
     22 
     23 using content::BrowserThread;
     24 using content::NavigationEntry;
     25 using content::WebContents;
     26 using safe_browsing::ClientMalwareReportRequest;
     27 
     28 // Keep in sync with KMaxNodes in renderer/safe_browsing/malware_dom_details
     29 static const uint32 kMaxDomNodes = 500;
     30 
     31 // static
     32 MalwareDetailsFactory* MalwareDetails::factory_ = NULL;
     33 
     34 // The default MalwareDetailsFactory.  Global, made a singleton so we
     35 // don't leak it.
     36 class MalwareDetailsFactoryImpl : public MalwareDetailsFactory {
     37  public:
     38   virtual MalwareDetails* CreateMalwareDetails(
     39       SafeBrowsingUIManager* ui_manager,
     40       WebContents* web_contents,
     41       const SafeBrowsingUIManager::UnsafeResource& unsafe_resource) OVERRIDE {
     42     return new MalwareDetails(ui_manager, web_contents, unsafe_resource);
     43   }
     44 
     45  private:
     46   friend struct base::DefaultLazyInstanceTraits<MalwareDetailsFactoryImpl>;
     47 
     48   MalwareDetailsFactoryImpl() {}
     49 
     50   DISALLOW_COPY_AND_ASSIGN(MalwareDetailsFactoryImpl);
     51 };
     52 
     53 static base::LazyInstance<MalwareDetailsFactoryImpl>
     54     g_malware_details_factory_impl = LAZY_INSTANCE_INITIALIZER;
     55 
     56 // Create a MalwareDetails for the given tab.
     57 /* static */
     58 MalwareDetails* MalwareDetails::NewMalwareDetails(
     59     SafeBrowsingUIManager* ui_manager,
     60     WebContents* web_contents,
     61     const UnsafeResource& resource) {
     62   // Set up the factory if this has not been done already (tests do that
     63   // before this method is called).
     64   if (!factory_)
     65     factory_ = g_malware_details_factory_impl.Pointer();
     66   return factory_->CreateMalwareDetails(ui_manager, web_contents, resource);
     67 }
     68 
     69 // Create a MalwareDetails for the given tab. Runs in the UI thread.
     70 MalwareDetails::MalwareDetails(
     71     SafeBrowsingUIManager* ui_manager,
     72     content::WebContents* web_contents,
     73     const UnsafeResource& resource)
     74     : content::WebContentsObserver(web_contents),
     75       profile_(Profile::FromBrowserContext(web_contents->GetBrowserContext())),
     76       request_context_getter_(profile_->GetRequestContext()),
     77       ui_manager_(ui_manager),
     78       resource_(resource),
     79       cache_result_(false),
     80       cache_collector_(new MalwareDetailsCacheCollector),
     81       redirects_collector_(
     82           new MalwareDetailsRedirectsCollector(profile_)) {
     83   StartCollection();
     84 }
     85 
     86 MalwareDetails::~MalwareDetails() {
     87 }
     88 
     89 bool MalwareDetails::OnMessageReceived(const IPC::Message& message) {
     90   bool handled = true;
     91   IPC_BEGIN_MESSAGE_MAP(MalwareDetails, message)
     92     IPC_MESSAGE_HANDLER(SafeBrowsingHostMsg_MalwareDOMDetails,
     93                         OnReceivedMalwareDOMDetails)
     94     IPC_MESSAGE_UNHANDLED(handled = false)
     95   IPC_END_MESSAGE_MAP()
     96   return handled;
     97 }
     98 
     99 bool MalwareDetails::IsPublicUrl(const GURL& url) const {
    100   return url.SchemeIs("http");  // TODO(panayiotis): also skip internal urls.
    101 }
    102 
    103 // Looks for a Resource for the given url in resources_.  If found, it
    104 // updates |resource|. Otherwise, it creates a new message, adds it to
    105 // resources_ and updates |resource| to point to it.
    106 ClientMalwareReportRequest::Resource* MalwareDetails::FindOrCreateResource(
    107     const GURL& url) {
    108   safe_browsing::ResourceMap::iterator it = resources_.find(url.spec());
    109   if (it != resources_.end())
    110     return it->second.get();
    111 
    112   // Create the resource for |url|.
    113   int id = resources_.size();
    114   linked_ptr<ClientMalwareReportRequest::Resource> new_resource(
    115       new ClientMalwareReportRequest::Resource());
    116   new_resource->set_url(url.spec());
    117   new_resource->set_id(id);
    118   resources_[url.spec()] = new_resource;
    119   return new_resource.get();
    120 }
    121 
    122 void MalwareDetails::AddUrl(const GURL& url,
    123                             const GURL& parent,
    124                             const std::string& tagname,
    125                             const std::vector<GURL>* children) {
    126   if (!url.is_valid() || !IsPublicUrl(url))
    127     return;
    128 
    129   // Find (or create) the resource for the url.
    130   ClientMalwareReportRequest::Resource* url_resource =
    131       FindOrCreateResource(url);
    132   if (!tagname.empty())
    133     url_resource->set_tag_name(tagname);
    134   if (!parent.is_empty() && IsPublicUrl(parent)) {
    135     // Add the resource for the parent.
    136     ClientMalwareReportRequest::Resource* parent_resource =
    137         FindOrCreateResource(parent);
    138     // Update the parent-child relation
    139     url_resource->set_parent_id(parent_resource->id());
    140   }
    141   if (children) {
    142     for (std::vector<GURL>::const_iterator it = children->begin();
    143          it != children->end(); ++it) {
    144       ClientMalwareReportRequest::Resource* child_resource =
    145           FindOrCreateResource(*it);
    146       url_resource->add_child_ids(child_resource->id());
    147     }
    148   }
    149 }
    150 
    151 void MalwareDetails::StartCollection() {
    152   DVLOG(1) << "Starting to compute malware details.";
    153   report_.reset(new ClientMalwareReportRequest());
    154 
    155   if (IsPublicUrl(resource_.url))
    156     report_->set_malware_url(resource_.url.spec());
    157 
    158   GURL page_url = web_contents()->GetURL();
    159   if (IsPublicUrl(page_url))
    160     report_->set_page_url(page_url.spec());
    161 
    162   GURL referrer_url;
    163   NavigationEntry* nav_entry = web_contents()->GetController().GetActiveEntry();
    164   if (nav_entry) {
    165     referrer_url = nav_entry->GetReferrer().url;
    166     if (IsPublicUrl(referrer_url)) {
    167       report_->set_referrer_url(referrer_url.spec());
    168     }
    169   }
    170 
    171   // Add the nodes, starting from the page url.
    172   AddUrl(page_url, GURL(), std::string(), NULL);
    173 
    174   // Add the resource_url and its original url, if non-empty and different.
    175   if (!resource_.original_url.is_empty() &&
    176       resource_.url != resource_.original_url) {
    177     // Add original_url, as the parent of resource_url.
    178     AddUrl(resource_.original_url, GURL(), std::string(), NULL);
    179     AddUrl(resource_.url, resource_.original_url, std::string(), NULL);
    180   } else {
    181     AddUrl(resource_.url, GURL(), std::string(), NULL);
    182   }
    183 
    184   // Add the redirect urls, if non-empty. The redirect urls do not include the
    185   // original url, but include the unsafe url which is the last one of the
    186   // redirect urls chain
    187   GURL parent_url;
    188   // Set the original url as the parent of the first redirect url if it's not
    189   // empty.
    190   if (!resource_.original_url.is_empty())
    191     parent_url = resource_.original_url;
    192 
    193   // Set the previous redirect url as the parent of the next one
    194   for (size_t i = 0; i < resource_.redirect_urls.size(); ++i) {
    195     AddUrl(resource_.redirect_urls[i], parent_url, std::string(), NULL);
    196     parent_url = resource_.redirect_urls[i];
    197   }
    198 
    199   // Add the referrer url.
    200   if (nav_entry && !referrer_url.is_empty())
    201     AddUrl(referrer_url, GURL(), std::string(), NULL);
    202 
    203   // Get URLs of frames, scripts etc from the DOM.
    204   // OnReceivedMalwareDOMDetails will be called when the renderer replies.
    205   content::RenderViewHost* view = web_contents()->GetRenderViewHost();
    206   view->Send(new SafeBrowsingMsg_GetMalwareDOMDetails(view->GetRoutingID()));
    207 }
    208 
    209 // When the renderer is done, this is called.
    210 void MalwareDetails::OnReceivedMalwareDOMDetails(
    211     const std::vector<SafeBrowsingHostMsg_MalwareDOMDetails_Node>& params) {
    212   // Schedule this in IO thread, so it doesn't conflict with future users
    213   // of our data structures (eg GetSerializedReport).
    214   BrowserThread::PostTask(
    215       BrowserThread::IO, FROM_HERE,
    216       base::Bind(&MalwareDetails::AddDOMDetails, this, params));
    217 }
    218 
    219 void MalwareDetails::AddDOMDetails(
    220     const std::vector<SafeBrowsingHostMsg_MalwareDOMDetails_Node>& params) {
    221   DCHECK_CURRENTLY_ON(BrowserThread::IO);
    222   DVLOG(1) << "Nodes from the DOM: " << params.size();
    223 
    224   // If we have already started getting redirects from history service,
    225   // don't modify state, otherwise will invalidate the iterators.
    226   if (redirects_collector_->HasStarted())
    227     return;
    228 
    229   // If we have already started collecting data from the HTTP cache, don't
    230   // modify our state.
    231   if (cache_collector_->HasStarted())
    232     return;
    233 
    234   // Add the urls from the DOM to |resources_|.  The renderer could be
    235   // sending bogus messages, so limit the number of nodes we accept.
    236   for (size_t i = 0; i < params.size() && i < kMaxDomNodes; ++i) {
    237     SafeBrowsingHostMsg_MalwareDOMDetails_Node node = params[i];
    238     DVLOG(1) << node.url << ", " << node.tag_name << ", " << node.parent;
    239     AddUrl(node.url, node.parent, node.tag_name, &(node.children));
    240   }
    241 }
    242 
    243 // Called from the SB Service on the IO thread, after the user has
    244 // closed the tab, or clicked proceed or goback.  Since the user needs
    245 // to take an action, we expect this to be called after
    246 // OnReceivedMalwareDOMDetails in most cases. If not, we don't include
    247 // the DOM data in our report.
    248 void MalwareDetails::FinishCollection() {
    249   DCHECK_CURRENTLY_ON(BrowserThread::IO);
    250 
    251   std::vector<GURL> urls;
    252   for (safe_browsing::ResourceMap::const_iterator it = resources_.begin();
    253        it != resources_.end(); ++it) {
    254     urls.push_back(GURL(it->first));
    255   }
    256   redirects_collector_->StartHistoryCollection(
    257       urls,
    258       base::Bind(&MalwareDetails::OnRedirectionCollectionReady, this));
    259 }
    260 
    261 void MalwareDetails::OnRedirectionCollectionReady() {
    262   DCHECK_CURRENTLY_ON(BrowserThread::IO);
    263   const std::vector<safe_browsing::RedirectChain>& redirects =
    264       redirects_collector_->GetCollectedUrls();
    265 
    266   for (size_t i = 0; i < redirects.size(); ++i)
    267     AddRedirectUrlList(redirects[i]);
    268 
    269   // Call the cache collector
    270   cache_collector_->StartCacheCollection(
    271       request_context_getter_.get(),
    272       &resources_,
    273       &cache_result_,
    274       base::Bind(&MalwareDetails::OnCacheCollectionReady, this));
    275 }
    276 
    277 void MalwareDetails::AddRedirectUrlList(const std::vector<GURL>& urls) {
    278   DCHECK_CURRENTLY_ON(BrowserThread::IO);
    279   for (size_t i = 0; i < urls.size() - 1; ++i) {
    280     AddUrl(urls[i], urls[i + 1], std::string(), NULL);
    281   }
    282 }
    283 
    284 void MalwareDetails::OnCacheCollectionReady() {
    285   DVLOG(1) << "OnCacheCollectionReady.";
    286   // Add all the urls in our |resources_| maps to the |report_| protocol buffer.
    287   for (safe_browsing::ResourceMap::const_iterator it = resources_.begin();
    288        it != resources_.end(); ++it) {
    289     ClientMalwareReportRequest::Resource* pb_resource =
    290         report_->add_resources();
    291     pb_resource->CopyFrom(*(it->second));
    292   }
    293 
    294   report_->set_complete(cache_result_);
    295 
    296   // Send the report, using the SafeBrowsingService.
    297   std::string serialized;
    298   if (!report_->SerializeToString(&serialized)) {
    299     DLOG(ERROR) << "Unable to serialize the malware report.";
    300     return;
    301   }
    302 
    303   ui_manager_->SendSerializedMalwareDetails(serialized);
    304 }
    305