1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 // 5 // Implementation of the MalwareDetails class. 6 7 #include "chrome/browser/safe_browsing/malware_details.h" 8 9 #include "base/callback.h" 10 #include "base/lazy_instance.h" 11 #include "chrome/browser/net/chrome_url_request_context.h" 12 #include "chrome/browser/profiles/profile.h" 13 #include "chrome/browser/safe_browsing/malware_details_cache.h" 14 #include "chrome/browser/safe_browsing/report.pb.h" 15 #include "chrome/browser/safe_browsing/safe_browsing_service.h" 16 #include "chrome/common/safe_browsing/safebrowsing_messages.h" 17 #include "content/browser/browser_thread.h" 18 #include "content/browser/renderer_host/render_view_host.h" 19 #include "content/browser/tab_contents/navigation_entry.h" 20 #include "content/browser/tab_contents/tab_contents.h" 21 #include "net/base/io_buffer.h" 22 #include "net/disk_cache/disk_cache.h" 23 #include "net/url_request/url_request_context_getter.h" 24 25 using safe_browsing::ClientMalwareReportRequest; 26 27 // Keep in sync with KMaxNodes in renderer/safe_browsing/malware_dom_details 28 static const uint32 kMaxDomNodes = 500; 29 30 // static 31 MalwareDetailsFactory* MalwareDetails::factory_ = NULL; 32 33 // The default MalwareDetailsFactory. Global, made a singleton so we 34 // don't leak it. 35 class MalwareDetailsFactoryImpl 36 : public MalwareDetailsFactory { 37 public: 38 MalwareDetails* CreateMalwareDetails( 39 SafeBrowsingService* sb_service, 40 TabContents* tab_contents, 41 const SafeBrowsingService::UnsafeResource& unsafe_resource) { 42 return new MalwareDetails(sb_service, tab_contents, unsafe_resource); 43 } 44 45 private: 46 friend struct base::DefaultLazyInstanceTraits< 47 MalwareDetailsFactoryImpl>; 48 49 MalwareDetailsFactoryImpl() { } 50 51 DISALLOW_COPY_AND_ASSIGN(MalwareDetailsFactoryImpl); 52 }; 53 54 static base::LazyInstance<MalwareDetailsFactoryImpl> 55 g_malware_details_factory_impl(base::LINKER_INITIALIZED); 56 57 // Create a MalwareDetails for the given tab. 58 /* static */ 59 MalwareDetails* MalwareDetails::NewMalwareDetails( 60 SafeBrowsingService* sb_service, 61 TabContents* tab_contents, 62 const SafeBrowsingService::UnsafeResource& resource) { 63 // Set up the factory if this has not been done already (tests do that 64 // before this method is called). 65 if (!factory_) 66 factory_ = g_malware_details_factory_impl.Pointer(); 67 return factory_->CreateMalwareDetails(sb_service, tab_contents, resource); 68 } 69 70 // Create a MalwareDetails for the given tab. Runs in the UI thread. 71 MalwareDetails::MalwareDetails( 72 SafeBrowsingService* sb_service, 73 TabContents* tab_contents, 74 const SafeBrowsingService::UnsafeResource& resource) 75 : TabContentsObserver(tab_contents), 76 request_context_getter_(tab_contents->profile()->GetRequestContext()), 77 sb_service_(sb_service), 78 resource_(resource), 79 cache_collector_(new MalwareDetailsCacheCollector) { 80 StartCollection(); 81 } 82 83 MalwareDetails::~MalwareDetails() { 84 } 85 86 bool MalwareDetails::OnMessageReceived(const IPC::Message& message) { 87 bool handled = true; 88 IPC_BEGIN_MESSAGE_MAP(MalwareDetails, message) 89 IPC_MESSAGE_HANDLER(SafeBrowsingHostMsg_MalwareDOMDetails, 90 OnReceivedMalwareDOMDetails) 91 IPC_MESSAGE_UNHANDLED(handled = false) 92 IPC_END_MESSAGE_MAP() 93 return handled; 94 } 95 96 bool MalwareDetails::IsPublicUrl(const GURL& url) const { 97 return url.SchemeIs("http"); // TODO(panayiotis): also skip internal urls. 98 } 99 100 // Looks for a Resource for the given url in resources_. If found, it 101 // updates |resource|. Otherwise, it creates a new message, adds it to 102 // resources_ and updates |resource| to point to it. 103 ClientMalwareReportRequest::Resource* MalwareDetails::FindOrCreateResource( 104 const GURL& url) { 105 safe_browsing::ResourceMap::iterator it = resources_.find(url.spec()); 106 if (it != resources_.end()) { 107 return it->second.get(); 108 } 109 110 // Create the resource for |url|. 111 int id = resources_.size(); 112 linked_ptr<ClientMalwareReportRequest::Resource> new_resource( 113 new ClientMalwareReportRequest::Resource()); 114 new_resource->set_url(url.spec()); 115 new_resource->set_id(id); 116 resources_[url.spec()] = new_resource; 117 return new_resource.get(); 118 } 119 120 void MalwareDetails::AddUrl(const GURL& url, 121 const GURL& parent, 122 const std::string& tagname, 123 const std::vector<GURL>* children) { 124 if (!IsPublicUrl(url)) 125 return; 126 127 // Find (or create) the resource for the url. 128 ClientMalwareReportRequest::Resource* url_resource = 129 FindOrCreateResource(url); 130 if (!tagname.empty()) { 131 url_resource->set_tag_name(tagname); 132 } 133 if (!parent.is_empty() && IsPublicUrl(parent)) { 134 // Add the resource for the parent. 135 ClientMalwareReportRequest::Resource* parent_resource = 136 FindOrCreateResource(parent); 137 // Update the parent-child relation 138 url_resource->set_parent_id(parent_resource->id()); 139 } 140 if (children) { 141 for (std::vector<GURL>::const_iterator it = children->begin(); 142 it != children->end(); it++) { 143 ClientMalwareReportRequest::Resource* child_resource = 144 FindOrCreateResource(*it); 145 url_resource->add_child_ids(child_resource->id()); 146 } 147 } 148 } 149 150 void MalwareDetails::StartCollection() { 151 DVLOG(1) << "Starting to compute malware details."; 152 report_.reset(new ClientMalwareReportRequest()); 153 154 if (IsPublicUrl(resource_.url)) { 155 report_->set_malware_url(resource_.url.spec()); 156 } 157 158 GURL page_url = tab_contents()->GetURL(); 159 if (IsPublicUrl(page_url)) { 160 report_->set_page_url(page_url.spec()); 161 } 162 163 GURL referrer_url; 164 NavigationEntry* nav_entry = tab_contents()->controller().GetActiveEntry(); 165 if (nav_entry) { 166 referrer_url = nav_entry->referrer(); 167 if (IsPublicUrl(referrer_url)) { 168 report_->set_referrer_url(referrer_url.spec()); 169 } 170 } 171 172 // Add the nodes, starting from the page url. 173 AddUrl(page_url, GURL(), "", NULL); 174 175 // Add the resource_url and its original url, if non-empty and different. 176 if (!resource_.original_url.is_empty() && 177 resource_.url != resource_.original_url) { 178 // Add original_url, as the parent of resource_url. 179 AddUrl(resource_.original_url, GURL(), "", NULL); 180 AddUrl(resource_.url, resource_.original_url, "", NULL); 181 } else { 182 AddUrl(resource_.url, GURL(), "", NULL); 183 } 184 185 // Add the redirect urls, if non-empty. The redirect urls do not include the 186 // original url, but include the unsafe url which is the last one of the 187 // redirect urls chain 188 GURL parent_url; 189 // Set the original url as the parent of the first redirect url if it's not 190 // empty. 191 if (!resource_.original_url.is_empty()) { 192 parent_url = resource_.original_url; 193 } 194 // Set the previous redirect url as the parent of the next one 195 for (unsigned int i = 0; i < resource_.redirect_urls.size(); ++i) { 196 AddUrl(resource_.redirect_urls[i], parent_url, "", NULL); 197 parent_url = resource_.redirect_urls[i]; 198 } 199 200 // Add the referrer url. 201 if (nav_entry && !referrer_url.is_empty()) { 202 AddUrl(referrer_url, GURL(), "", NULL); 203 } 204 205 // Get URLs of frames, scripts etc from the DOM. 206 // OnReceivedMalwareDOMDetails will be called when the renderer replies. 207 tab_contents()->render_view_host()->GetMalwareDOMDetails(); 208 } 209 210 // When the renderer is done, this is called. 211 void MalwareDetails::OnReceivedMalwareDOMDetails( 212 const std::vector<SafeBrowsingHostMsg_MalwareDOMDetails_Node>& params) { 213 // Schedule this in IO thread, so it doesn't conflict with future users 214 // of our data structures (eg GetSerializedReport). 215 BrowserThread::PostTask( 216 BrowserThread::IO, FROM_HERE, 217 NewRunnableMethod( 218 this, &MalwareDetails::AddDOMDetails, params)); 219 } 220 221 void MalwareDetails::AddDOMDetails( 222 const std::vector<SafeBrowsingHostMsg_MalwareDOMDetails_Node>& params) { 223 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 224 DVLOG(1) << "Nodes from the DOM: " << params.size(); 225 226 // If we have already started collecting data from the HTTP cache, don't 227 // modify our state. 228 if (cache_collector_->HasStarted()) 229 return; 230 231 // Add the urls from the DOM to |resources_|. The renderer could be 232 // sending bogus messages, so limit the number of nodes we accept. 233 for (uint32 i = 0; i < params.size() && i < kMaxDomNodes; ++i) { 234 SafeBrowsingHostMsg_MalwareDOMDetails_Node node = params[i]; 235 DVLOG(1) << node.url << ", " << node.tag_name << ", " << node.parent; 236 AddUrl(node.url, node.parent, node.tag_name, &(node.children)); 237 } 238 } 239 240 // Called from the SB Service on the IO thread, after the user has 241 // closed the tab, or clicked proceed or goback. Since the user needs 242 // to take an action, we expect this to be called after 243 // OnReceivedMalwareDOMDetails in most cases. If not, we don't include 244 // the DOM data in our report. 245 void MalwareDetails::FinishCollection() { 246 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 247 248 cache_collector_->StartCacheCollection( 249 request_context_getter_, 250 &resources_, 251 &cache_result_, 252 NewRunnableMethod(this, &MalwareDetails::OnCacheCollectionReady)); 253 } 254 255 void MalwareDetails::OnCacheCollectionReady() { 256 DVLOG(1) << "OnCacheCollectionReady."; 257 // Add all the urls in our |resources_| maps to the |report_| protocol buffer. 258 for (safe_browsing::ResourceMap::const_iterator it = resources_.begin(); 259 it != resources_.end(); it++) { 260 ClientMalwareReportRequest::Resource* pb_resource = 261 report_->add_resources(); 262 pb_resource->CopyFrom(*(it->second)); 263 } 264 265 report_->set_complete(cache_result_); 266 267 // Send the report, using the SafeBrowsingService. 268 std::string serialized; 269 if (!report_->SerializeToString(&serialized)) { 270 DLOG(ERROR) << "Unable to serialize the malware report."; 271 return; 272 } 273 274 sb_service_->SendSerializedMalwareDetails(serialized); 275 } 276