Home | History | Annotate | Download | only in safe_browsing
      1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 //
      5 // Implementation of the MalwareDetails class.
      6 
      7 #include "chrome/browser/safe_browsing/malware_details.h"
      8 
      9 #include "base/callback.h"
     10 #include "base/lazy_instance.h"
     11 #include "chrome/browser/net/chrome_url_request_context.h"
     12 #include "chrome/browser/profiles/profile.h"
     13 #include "chrome/browser/safe_browsing/malware_details_cache.h"
     14 #include "chrome/browser/safe_browsing/report.pb.h"
     15 #include "chrome/browser/safe_browsing/safe_browsing_service.h"
     16 #include "chrome/common/safe_browsing/safebrowsing_messages.h"
     17 #include "content/browser/browser_thread.h"
     18 #include "content/browser/renderer_host/render_view_host.h"
     19 #include "content/browser/tab_contents/navigation_entry.h"
     20 #include "content/browser/tab_contents/tab_contents.h"
     21 #include "net/base/io_buffer.h"
     22 #include "net/disk_cache/disk_cache.h"
     23 #include "net/url_request/url_request_context_getter.h"
     24 
     25 using safe_browsing::ClientMalwareReportRequest;
     26 
     27 // Keep in sync with KMaxNodes in renderer/safe_browsing/malware_dom_details
     28 static const uint32 kMaxDomNodes = 500;
     29 
     30 // static
     31 MalwareDetailsFactory* MalwareDetails::factory_ = NULL;
     32 
     33 // The default MalwareDetailsFactory.  Global, made a singleton so we
     34 // don't leak it.
     35 class MalwareDetailsFactoryImpl
     36     : public MalwareDetailsFactory {
     37  public:
     38   MalwareDetails* CreateMalwareDetails(
     39       SafeBrowsingService* sb_service,
     40       TabContents* tab_contents,
     41       const SafeBrowsingService::UnsafeResource& unsafe_resource) {
     42     return new MalwareDetails(sb_service, tab_contents, unsafe_resource);
     43   }
     44 
     45  private:
     46   friend struct base::DefaultLazyInstanceTraits<
     47       MalwareDetailsFactoryImpl>;
     48 
     49   MalwareDetailsFactoryImpl() { }
     50 
     51   DISALLOW_COPY_AND_ASSIGN(MalwareDetailsFactoryImpl);
     52 };
     53 
     54 static base::LazyInstance<MalwareDetailsFactoryImpl>
     55     g_malware_details_factory_impl(base::LINKER_INITIALIZED);
     56 
     57 // Create a MalwareDetails for the given tab.
     58 /* static */
     59 MalwareDetails* MalwareDetails::NewMalwareDetails(
     60     SafeBrowsingService* sb_service,
     61     TabContents* tab_contents,
     62     const SafeBrowsingService::UnsafeResource& resource) {
     63   // Set up the factory if this has not been done already (tests do that
     64   // before this method is called).
     65   if (!factory_)
     66     factory_ = g_malware_details_factory_impl.Pointer();
     67   return factory_->CreateMalwareDetails(sb_service, tab_contents, resource);
     68 }
     69 
     70 // Create a MalwareDetails for the given tab. Runs in the UI thread.
     71 MalwareDetails::MalwareDetails(
     72     SafeBrowsingService* sb_service,
     73     TabContents* tab_contents,
     74     const SafeBrowsingService::UnsafeResource& resource)
     75     : TabContentsObserver(tab_contents),
     76       request_context_getter_(tab_contents->profile()->GetRequestContext()),
     77       sb_service_(sb_service),
     78       resource_(resource),
     79       cache_collector_(new MalwareDetailsCacheCollector) {
     80   StartCollection();
     81 }
     82 
     83 MalwareDetails::~MalwareDetails() {
     84 }
     85 
     86 bool MalwareDetails::OnMessageReceived(const IPC::Message& message) {
     87   bool handled = true;
     88   IPC_BEGIN_MESSAGE_MAP(MalwareDetails, message)
     89     IPC_MESSAGE_HANDLER(SafeBrowsingHostMsg_MalwareDOMDetails,
     90                         OnReceivedMalwareDOMDetails)
     91     IPC_MESSAGE_UNHANDLED(handled = false)
     92   IPC_END_MESSAGE_MAP()
     93   return handled;
     94 }
     95 
     96 bool MalwareDetails::IsPublicUrl(const GURL& url) const {
     97   return url.SchemeIs("http");  // TODO(panayiotis): also skip internal urls.
     98 }
     99 
    100 // Looks for a Resource for the given url in resources_.  If found, it
    101 // updates |resource|. Otherwise, it creates a new message, adds it to
    102 // resources_ and updates |resource| to point to it.
    103 ClientMalwareReportRequest::Resource* MalwareDetails::FindOrCreateResource(
    104     const GURL& url) {
    105   safe_browsing::ResourceMap::iterator it = resources_.find(url.spec());
    106   if (it != resources_.end()) {
    107     return it->second.get();
    108   }
    109 
    110   // Create the resource for |url|.
    111   int id = resources_.size();
    112   linked_ptr<ClientMalwareReportRequest::Resource> new_resource(
    113       new ClientMalwareReportRequest::Resource());
    114   new_resource->set_url(url.spec());
    115   new_resource->set_id(id);
    116   resources_[url.spec()] = new_resource;
    117   return new_resource.get();
    118 }
    119 
    120 void MalwareDetails::AddUrl(const GURL& url,
    121                             const GURL& parent,
    122                             const std::string& tagname,
    123                             const std::vector<GURL>* children) {
    124   if (!IsPublicUrl(url))
    125     return;
    126 
    127   // Find (or create) the resource for the url.
    128   ClientMalwareReportRequest::Resource* url_resource =
    129       FindOrCreateResource(url);
    130   if (!tagname.empty()) {
    131     url_resource->set_tag_name(tagname);
    132   }
    133   if (!parent.is_empty() && IsPublicUrl(parent)) {
    134     // Add the resource for the parent.
    135     ClientMalwareReportRequest::Resource* parent_resource =
    136         FindOrCreateResource(parent);
    137     // Update the parent-child relation
    138     url_resource->set_parent_id(parent_resource->id());
    139   }
    140   if (children) {
    141     for (std::vector<GURL>::const_iterator it = children->begin();
    142          it != children->end(); it++) {
    143       ClientMalwareReportRequest::Resource* child_resource =
    144           FindOrCreateResource(*it);
    145       url_resource->add_child_ids(child_resource->id());
    146     }
    147   }
    148 }
    149 
    150 void MalwareDetails::StartCollection() {
    151   DVLOG(1) << "Starting to compute malware details.";
    152   report_.reset(new ClientMalwareReportRequest());
    153 
    154   if (IsPublicUrl(resource_.url)) {
    155     report_->set_malware_url(resource_.url.spec());
    156   }
    157 
    158   GURL page_url = tab_contents()->GetURL();
    159   if (IsPublicUrl(page_url)) {
    160     report_->set_page_url(page_url.spec());
    161   }
    162 
    163   GURL referrer_url;
    164   NavigationEntry* nav_entry = tab_contents()->controller().GetActiveEntry();
    165   if (nav_entry) {
    166     referrer_url = nav_entry->referrer();
    167     if (IsPublicUrl(referrer_url)) {
    168       report_->set_referrer_url(referrer_url.spec());
    169     }
    170   }
    171 
    172   // Add the nodes, starting from the page url.
    173   AddUrl(page_url, GURL(), "", NULL);
    174 
    175   // Add the resource_url and its original url, if non-empty and different.
    176   if (!resource_.original_url.is_empty() &&
    177       resource_.url != resource_.original_url) {
    178     // Add original_url, as the parent of resource_url.
    179     AddUrl(resource_.original_url, GURL(), "", NULL);
    180     AddUrl(resource_.url, resource_.original_url, "", NULL);
    181   } else {
    182     AddUrl(resource_.url, GURL(), "", NULL);
    183   }
    184 
    185   // Add the redirect urls, if non-empty. The redirect urls do not include the
    186   // original url, but include the unsafe url which is the last one of the
    187   // redirect urls chain
    188   GURL parent_url;
    189   // Set the original url as the parent of the first redirect url if it's not
    190   // empty.
    191   if (!resource_.original_url.is_empty()) {
    192     parent_url = resource_.original_url;
    193   }
    194   // Set the previous redirect url as the parent of the next one
    195   for (unsigned int i = 0; i < resource_.redirect_urls.size(); ++i) {
    196     AddUrl(resource_.redirect_urls[i], parent_url, "", NULL);
    197     parent_url = resource_.redirect_urls[i];
    198   }
    199 
    200   // Add the referrer url.
    201   if (nav_entry && !referrer_url.is_empty()) {
    202     AddUrl(referrer_url, GURL(), "", NULL);
    203   }
    204 
    205   // Get URLs of frames, scripts etc from the DOM.
    206   // OnReceivedMalwareDOMDetails will be called when the renderer replies.
    207   tab_contents()->render_view_host()->GetMalwareDOMDetails();
    208 }
    209 
    210 // When the renderer is done, this is called.
    211 void MalwareDetails::OnReceivedMalwareDOMDetails(
    212     const std::vector<SafeBrowsingHostMsg_MalwareDOMDetails_Node>& params) {
    213   // Schedule this in IO thread, so it doesn't conflict with future users
    214   // of our data structures (eg GetSerializedReport).
    215   BrowserThread::PostTask(
    216       BrowserThread::IO, FROM_HERE,
    217       NewRunnableMethod(
    218           this, &MalwareDetails::AddDOMDetails, params));
    219 }
    220 
    221 void MalwareDetails::AddDOMDetails(
    222     const std::vector<SafeBrowsingHostMsg_MalwareDOMDetails_Node>& params) {
    223   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
    224   DVLOG(1) << "Nodes from the DOM: " << params.size();
    225 
    226   // If we have already started collecting data from the HTTP cache, don't
    227   // modify our state.
    228   if (cache_collector_->HasStarted())
    229     return;
    230 
    231   // Add the urls from the DOM to |resources_|.  The renderer could be
    232   // sending bogus messages, so limit the number of nodes we accept.
    233   for (uint32 i = 0; i < params.size() && i < kMaxDomNodes; ++i) {
    234     SafeBrowsingHostMsg_MalwareDOMDetails_Node node = params[i];
    235     DVLOG(1) << node.url << ", " << node.tag_name << ", " << node.parent;
    236     AddUrl(node.url, node.parent, node.tag_name, &(node.children));
    237   }
    238 }
    239 
    240 // Called from the SB Service on the IO thread, after the user has
    241 // closed the tab, or clicked proceed or goback.  Since the user needs
    242 // to take an action, we expect this to be called after
    243 // OnReceivedMalwareDOMDetails in most cases. If not, we don't include
    244 // the DOM data in our report.
    245 void MalwareDetails::FinishCollection() {
    246   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
    247 
    248   cache_collector_->StartCacheCollection(
    249       request_context_getter_,
    250       &resources_,
    251       &cache_result_,
    252       NewRunnableMethod(this, &MalwareDetails::OnCacheCollectionReady));
    253 }
    254 
    255 void MalwareDetails::OnCacheCollectionReady() {
    256   DVLOG(1) << "OnCacheCollectionReady.";
    257   // Add all the urls in our |resources_| maps to the |report_| protocol buffer.
    258   for (safe_browsing::ResourceMap::const_iterator it = resources_.begin();
    259        it != resources_.end(); it++) {
    260     ClientMalwareReportRequest::Resource* pb_resource =
    261         report_->add_resources();
    262     pb_resource->CopyFrom(*(it->second));
    263   }
    264 
    265   report_->set_complete(cache_result_);
    266 
    267   // Send the report, using the SafeBrowsingService.
    268   std::string serialized;
    269   if (!report_->SerializeToString(&serialized)) {
    270     DLOG(ERROR) << "Unable to serialize the malware report.";
    271     return;
    272   }
    273 
    274   sb_service_->SendSerializedMalwareDetails(serialized);
    275 }
    276