1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 // 5 // Implementation of the MalwareDetails class. 6 7 #include "chrome/browser/safe_browsing/malware_details.h" 8 9 #include "base/bind.h" 10 #include "base/lazy_instance.h" 11 #include "chrome/browser/net/chrome_url_request_context.h" 12 #include "chrome/browser/profiles/profile.h" 13 #include "chrome/browser/safe_browsing/malware_details_cache.h" 14 #include "chrome/browser/safe_browsing/malware_details_history.h" 15 #include "chrome/browser/safe_browsing/report.pb.h" 16 #include "chrome/common/safe_browsing/safebrowsing_messages.h" 17 #include "content/public/browser/browser_thread.h" 18 #include "content/public/browser/navigation_controller.h" 19 #include "content/public/browser/navigation_entry.h" 20 #include "content/public/browser/render_view_host.h" 21 #include "content/public/browser/web_contents.h" 22 #include "net/base/io_buffer.h" 23 #include "net/disk_cache/disk_cache.h" 24 #include "net/url_request/url_request_context_getter.h" 25 26 using content::BrowserThread; 27 using content::NavigationEntry; 28 using content::WebContents; 29 using safe_browsing::ClientMalwareReportRequest; 30 31 // Keep in sync with KMaxNodes in renderer/safe_browsing/malware_dom_details 32 static const uint32 kMaxDomNodes = 500; 33 34 // static 35 MalwareDetailsFactory* MalwareDetails::factory_ = NULL; 36 37 // The default MalwareDetailsFactory. Global, made a singleton so we 38 // don't leak it. 39 class MalwareDetailsFactoryImpl 40 : public MalwareDetailsFactory { 41 public: 42 virtual MalwareDetails* CreateMalwareDetails( 43 SafeBrowsingUIManager* ui_manager, 44 WebContents* web_contents, 45 const SafeBrowsingUIManager::UnsafeResource& unsafe_resource) OVERRIDE { 46 return new MalwareDetails(ui_manager, web_contents, unsafe_resource); 47 } 48 49 private: 50 friend struct base::DefaultLazyInstanceTraits< 51 MalwareDetailsFactoryImpl>; 52 53 MalwareDetailsFactoryImpl() { } 54 55 DISALLOW_COPY_AND_ASSIGN(MalwareDetailsFactoryImpl); 56 }; 57 58 static base::LazyInstance<MalwareDetailsFactoryImpl> 59 g_malware_details_factory_impl = LAZY_INSTANCE_INITIALIZER; 60 61 // Create a MalwareDetails for the given tab. 62 /* static */ 63 MalwareDetails* MalwareDetails::NewMalwareDetails( 64 SafeBrowsingUIManager* ui_manager, 65 WebContents* web_contents, 66 const UnsafeResource& resource) { 67 // Set up the factory if this has not been done already (tests do that 68 // before this method is called). 69 if (!factory_) 70 factory_ = g_malware_details_factory_impl.Pointer(); 71 return factory_->CreateMalwareDetails(ui_manager, web_contents, resource); 72 } 73 74 // Create a MalwareDetails for the given tab. Runs in the UI thread. 75 MalwareDetails::MalwareDetails( 76 SafeBrowsingUIManager* ui_manager, 77 content::WebContents* web_contents, 78 const UnsafeResource& resource) 79 : content::WebContentsObserver(web_contents), 80 profile_(Profile::FromBrowserContext(web_contents->GetBrowserContext())), 81 request_context_getter_(profile_->GetRequestContext()), 82 ui_manager_(ui_manager), 83 resource_(resource), 84 cache_result_(false), 85 cache_collector_(new MalwareDetailsCacheCollector), 86 redirects_collector_( 87 new MalwareDetailsRedirectsCollector(profile_)) { 88 StartCollection(); 89 } 90 91 MalwareDetails::~MalwareDetails() { 92 } 93 94 bool MalwareDetails::OnMessageReceived(const IPC::Message& message) { 95 bool handled = true; 96 IPC_BEGIN_MESSAGE_MAP(MalwareDetails, message) 97 IPC_MESSAGE_HANDLER(SafeBrowsingHostMsg_MalwareDOMDetails, 98 OnReceivedMalwareDOMDetails) 99 IPC_MESSAGE_UNHANDLED(handled = false) 100 IPC_END_MESSAGE_MAP() 101 return handled; 102 } 103 104 bool MalwareDetails::IsPublicUrl(const GURL& url) const { 105 return url.SchemeIs("http"); // TODO(panayiotis): also skip internal urls. 106 } 107 108 // Looks for a Resource for the given url in resources_. If found, it 109 // updates |resource|. Otherwise, it creates a new message, adds it to 110 // resources_ and updates |resource| to point to it. 111 ClientMalwareReportRequest::Resource* MalwareDetails::FindOrCreateResource( 112 const GURL& url) { 113 safe_browsing::ResourceMap::iterator it = resources_.find(url.spec()); 114 if (it != resources_.end()) { 115 return it->second.get(); 116 } 117 118 // Create the resource for |url|. 119 int id = resources_.size(); 120 linked_ptr<ClientMalwareReportRequest::Resource> new_resource( 121 new ClientMalwareReportRequest::Resource()); 122 new_resource->set_url(url.spec()); 123 new_resource->set_id(id); 124 resources_[url.spec()] = new_resource; 125 return new_resource.get(); 126 } 127 128 void MalwareDetails::AddUrl(const GURL& url, 129 const GURL& parent, 130 const std::string& tagname, 131 const std::vector<GURL>* children) { 132 if (!url.is_valid() || !IsPublicUrl(url)) 133 return; 134 135 // Find (or create) the resource for the url. 136 ClientMalwareReportRequest::Resource* url_resource = 137 FindOrCreateResource(url); 138 if (!tagname.empty()) { 139 url_resource->set_tag_name(tagname); 140 } 141 if (!parent.is_empty() && IsPublicUrl(parent)) { 142 // Add the resource for the parent. 143 ClientMalwareReportRequest::Resource* parent_resource = 144 FindOrCreateResource(parent); 145 // Update the parent-child relation 146 url_resource->set_parent_id(parent_resource->id()); 147 } 148 if (children) { 149 for (std::vector<GURL>::const_iterator it = children->begin(); 150 it != children->end(); it++) { 151 ClientMalwareReportRequest::Resource* child_resource = 152 FindOrCreateResource(*it); 153 url_resource->add_child_ids(child_resource->id()); 154 } 155 } 156 } 157 158 void MalwareDetails::StartCollection() { 159 DVLOG(1) << "Starting to compute malware details."; 160 report_.reset(new ClientMalwareReportRequest()); 161 162 if (IsPublicUrl(resource_.url)) { 163 report_->set_malware_url(resource_.url.spec()); 164 } 165 166 GURL page_url = web_contents()->GetURL(); 167 if (IsPublicUrl(page_url)) { 168 report_->set_page_url(page_url.spec()); 169 } 170 171 GURL referrer_url; 172 NavigationEntry* nav_entry = web_contents()->GetController().GetActiveEntry(); 173 if (nav_entry) { 174 referrer_url = nav_entry->GetReferrer().url; 175 if (IsPublicUrl(referrer_url)) { 176 report_->set_referrer_url(referrer_url.spec()); 177 } 178 } 179 180 // Add the nodes, starting from the page url. 181 AddUrl(page_url, GURL(), std::string(), NULL); 182 183 // Add the resource_url and its original url, if non-empty and different. 184 if (!resource_.original_url.is_empty() && 185 resource_.url != resource_.original_url) { 186 // Add original_url, as the parent of resource_url. 187 AddUrl(resource_.original_url, GURL(), std::string(), NULL); 188 AddUrl(resource_.url, resource_.original_url, std::string(), NULL); 189 } else { 190 AddUrl(resource_.url, GURL(), std::string(), NULL); 191 } 192 193 // Add the redirect urls, if non-empty. The redirect urls do not include the 194 // original url, but include the unsafe url which is the last one of the 195 // redirect urls chain 196 GURL parent_url; 197 // Set the original url as the parent of the first redirect url if it's not 198 // empty. 199 if (!resource_.original_url.is_empty()) { 200 parent_url = resource_.original_url; 201 } 202 // Set the previous redirect url as the parent of the next one 203 for (unsigned int i = 0; i < resource_.redirect_urls.size(); ++i) { 204 AddUrl(resource_.redirect_urls[i], parent_url, std::string(), NULL); 205 parent_url = resource_.redirect_urls[i]; 206 } 207 208 // Add the referrer url. 209 if (nav_entry && !referrer_url.is_empty()) { 210 AddUrl(referrer_url, GURL(), std::string(), NULL); 211 } 212 213 // Get URLs of frames, scripts etc from the DOM. 214 // OnReceivedMalwareDOMDetails will be called when the renderer replies. 215 content::RenderViewHost* view = web_contents()->GetRenderViewHost(); 216 view->Send(new SafeBrowsingMsg_GetMalwareDOMDetails(view->GetRoutingID())); 217 } 218 219 // When the renderer is done, this is called. 220 void MalwareDetails::OnReceivedMalwareDOMDetails( 221 const std::vector<SafeBrowsingHostMsg_MalwareDOMDetails_Node>& params) { 222 // Schedule this in IO thread, so it doesn't conflict with future users 223 // of our data structures (eg GetSerializedReport). 224 BrowserThread::PostTask( 225 BrowserThread::IO, FROM_HERE, 226 base::Bind(&MalwareDetails::AddDOMDetails, this, params)); 227 } 228 229 void MalwareDetails::AddDOMDetails( 230 const std::vector<SafeBrowsingHostMsg_MalwareDOMDetails_Node>& params) { 231 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 232 DVLOG(1) << "Nodes from the DOM: " << params.size(); 233 234 // If we have already started getting redirects from history service, 235 // don't modify state, otherwise will invalidate the iterators. 236 if (redirects_collector_->HasStarted()) 237 return; 238 239 // If we have already started collecting data from the HTTP cache, don't 240 // modify our state. 241 if (cache_collector_->HasStarted()) 242 return; 243 244 // Add the urls from the DOM to |resources_|. The renderer could be 245 // sending bogus messages, so limit the number of nodes we accept. 246 for (uint32 i = 0; i < params.size() && i < kMaxDomNodes; ++i) { 247 SafeBrowsingHostMsg_MalwareDOMDetails_Node node = params[i]; 248 DVLOG(1) << node.url << ", " << node.tag_name << ", " << node.parent; 249 AddUrl(node.url, node.parent, node.tag_name, &(node.children)); 250 } 251 } 252 253 // Called from the SB Service on the IO thread, after the user has 254 // closed the tab, or clicked proceed or goback. Since the user needs 255 // to take an action, we expect this to be called after 256 // OnReceivedMalwareDOMDetails in most cases. If not, we don't include 257 // the DOM data in our report. 258 void MalwareDetails::FinishCollection() { 259 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 260 261 std::vector<GURL> urls; 262 for (safe_browsing::ResourceMap::const_iterator it = resources_.begin(); 263 it != resources_.end(); it++) { 264 urls.push_back(GURL(it->first)); 265 } 266 redirects_collector_->StartHistoryCollection( 267 urls, 268 base::Bind(&MalwareDetails::OnRedirectionCollectionReady, this)); 269 } 270 271 void MalwareDetails::OnRedirectionCollectionReady() { 272 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 273 const std::vector<safe_browsing::RedirectChain>& redirects = 274 redirects_collector_->GetCollectedUrls(); 275 276 for (size_t i = 0; i < redirects.size(); ++i) 277 AddRedirectUrlList(redirects[i]); 278 279 // Call the cache collector 280 cache_collector_->StartCacheCollection( 281 request_context_getter_.get(), 282 &resources_, 283 &cache_result_, 284 base::Bind(&MalwareDetails::OnCacheCollectionReady, this)); 285 } 286 287 void MalwareDetails::AddRedirectUrlList(const std::vector<GURL>& urls) { 288 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 289 for (size_t i = 0; i < urls.size()-1; ++i) { 290 AddUrl(urls[i], urls[i + 1], std::string(), NULL); 291 } 292 } 293 294 void MalwareDetails::OnCacheCollectionReady() { 295 DVLOG(1) << "OnCacheCollectionReady."; 296 // Add all the urls in our |resources_| maps to the |report_| protocol buffer. 297 for (safe_browsing::ResourceMap::const_iterator it = resources_.begin(); 298 it != resources_.end(); it++) { 299 ClientMalwareReportRequest::Resource* pb_resource = 300 report_->add_resources(); 301 pb_resource->CopyFrom(*(it->second)); 302 } 303 304 report_->set_complete(cache_result_); 305 306 // Send the report, using the SafeBrowsingService. 307 std::string serialized; 308 if (!report_->SerializeToString(&serialized)) { 309 DLOG(ERROR) << "Unable to serialize the malware report."; 310 return; 311 } 312 313 ui_manager_->SendSerializedMalwareDetails(serialized); 314 } 315