1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 // 5 // Implementation of the MalwareDetails class. 6 7 #include "chrome/browser/safe_browsing/malware_details.h" 8 9 #include "base/bind.h" 10 #include "base/lazy_instance.h" 11 #include "chrome/browser/profiles/profile.h" 12 #include "chrome/browser/safe_browsing/malware_details_cache.h" 13 #include "chrome/browser/safe_browsing/malware_details_history.h" 14 #include "chrome/browser/safe_browsing/report.pb.h" 15 #include "chrome/common/safe_browsing/safebrowsing_messages.h" 16 #include "content/public/browser/browser_thread.h" 17 #include "content/public/browser/navigation_controller.h" 18 #include "content/public/browser/navigation_entry.h" 19 #include "content/public/browser/render_view_host.h" 20 #include "content/public/browser/web_contents.h" 21 #include "net/url_request/url_request_context_getter.h" 22 23 using content::BrowserThread; 24 using content::NavigationEntry; 25 using content::WebContents; 26 using safe_browsing::ClientMalwareReportRequest; 27 28 // Keep in sync with KMaxNodes in renderer/safe_browsing/malware_dom_details 29 static const uint32 kMaxDomNodes = 500; 30 31 // static 32 MalwareDetailsFactory* MalwareDetails::factory_ = NULL; 33 34 // The default MalwareDetailsFactory. Global, made a singleton so we 35 // don't leak it. 36 class MalwareDetailsFactoryImpl : public MalwareDetailsFactory { 37 public: 38 virtual MalwareDetails* CreateMalwareDetails( 39 SafeBrowsingUIManager* ui_manager, 40 WebContents* web_contents, 41 const SafeBrowsingUIManager::UnsafeResource& unsafe_resource) OVERRIDE { 42 return new MalwareDetails(ui_manager, web_contents, unsafe_resource); 43 } 44 45 private: 46 friend struct base::DefaultLazyInstanceTraits<MalwareDetailsFactoryImpl>; 47 48 MalwareDetailsFactoryImpl() {} 49 50 DISALLOW_COPY_AND_ASSIGN(MalwareDetailsFactoryImpl); 51 }; 52 53 static base::LazyInstance<MalwareDetailsFactoryImpl> 54 g_malware_details_factory_impl = LAZY_INSTANCE_INITIALIZER; 55 56 // Create a MalwareDetails for the given tab. 57 /* static */ 58 MalwareDetails* MalwareDetails::NewMalwareDetails( 59 SafeBrowsingUIManager* ui_manager, 60 WebContents* web_contents, 61 const UnsafeResource& resource) { 62 // Set up the factory if this has not been done already (tests do that 63 // before this method is called). 64 if (!factory_) 65 factory_ = g_malware_details_factory_impl.Pointer(); 66 return factory_->CreateMalwareDetails(ui_manager, web_contents, resource); 67 } 68 69 // Create a MalwareDetails for the given tab. Runs in the UI thread. 70 MalwareDetails::MalwareDetails( 71 SafeBrowsingUIManager* ui_manager, 72 content::WebContents* web_contents, 73 const UnsafeResource& resource) 74 : content::WebContentsObserver(web_contents), 75 profile_(Profile::FromBrowserContext(web_contents->GetBrowserContext())), 76 request_context_getter_(profile_->GetRequestContext()), 77 ui_manager_(ui_manager), 78 resource_(resource), 79 cache_result_(false), 80 cache_collector_(new MalwareDetailsCacheCollector), 81 redirects_collector_( 82 new MalwareDetailsRedirectsCollector(profile_)) { 83 StartCollection(); 84 } 85 86 MalwareDetails::~MalwareDetails() { 87 } 88 89 bool MalwareDetails::OnMessageReceived(const IPC::Message& message) { 90 bool handled = true; 91 IPC_BEGIN_MESSAGE_MAP(MalwareDetails, message) 92 IPC_MESSAGE_HANDLER(SafeBrowsingHostMsg_MalwareDOMDetails, 93 OnReceivedMalwareDOMDetails) 94 IPC_MESSAGE_UNHANDLED(handled = false) 95 IPC_END_MESSAGE_MAP() 96 return handled; 97 } 98 99 bool MalwareDetails::IsPublicUrl(const GURL& url) const { 100 return url.SchemeIs("http"); // TODO(panayiotis): also skip internal urls. 101 } 102 103 // Looks for a Resource for the given url in resources_. If found, it 104 // updates |resource|. Otherwise, it creates a new message, adds it to 105 // resources_ and updates |resource| to point to it. 106 ClientMalwareReportRequest::Resource* MalwareDetails::FindOrCreateResource( 107 const GURL& url) { 108 safe_browsing::ResourceMap::iterator it = resources_.find(url.spec()); 109 if (it != resources_.end()) 110 return it->second.get(); 111 112 // Create the resource for |url|. 113 int id = resources_.size(); 114 linked_ptr<ClientMalwareReportRequest::Resource> new_resource( 115 new ClientMalwareReportRequest::Resource()); 116 new_resource->set_url(url.spec()); 117 new_resource->set_id(id); 118 resources_[url.spec()] = new_resource; 119 return new_resource.get(); 120 } 121 122 void MalwareDetails::AddUrl(const GURL& url, 123 const GURL& parent, 124 const std::string& tagname, 125 const std::vector<GURL>* children) { 126 if (!url.is_valid() || !IsPublicUrl(url)) 127 return; 128 129 // Find (or create) the resource for the url. 130 ClientMalwareReportRequest::Resource* url_resource = 131 FindOrCreateResource(url); 132 if (!tagname.empty()) 133 url_resource->set_tag_name(tagname); 134 if (!parent.is_empty() && IsPublicUrl(parent)) { 135 // Add the resource for the parent. 136 ClientMalwareReportRequest::Resource* parent_resource = 137 FindOrCreateResource(parent); 138 // Update the parent-child relation 139 url_resource->set_parent_id(parent_resource->id()); 140 } 141 if (children) { 142 for (std::vector<GURL>::const_iterator it = children->begin(); 143 it != children->end(); ++it) { 144 ClientMalwareReportRequest::Resource* child_resource = 145 FindOrCreateResource(*it); 146 url_resource->add_child_ids(child_resource->id()); 147 } 148 } 149 } 150 151 void MalwareDetails::StartCollection() { 152 DVLOG(1) << "Starting to compute malware details."; 153 report_.reset(new ClientMalwareReportRequest()); 154 155 if (IsPublicUrl(resource_.url)) 156 report_->set_malware_url(resource_.url.spec()); 157 158 GURL page_url = web_contents()->GetURL(); 159 if (IsPublicUrl(page_url)) 160 report_->set_page_url(page_url.spec()); 161 162 GURL referrer_url; 163 NavigationEntry* nav_entry = web_contents()->GetController().GetActiveEntry(); 164 if (nav_entry) { 165 referrer_url = nav_entry->GetReferrer().url; 166 if (IsPublicUrl(referrer_url)) { 167 report_->set_referrer_url(referrer_url.spec()); 168 } 169 } 170 171 // Add the nodes, starting from the page url. 172 AddUrl(page_url, GURL(), std::string(), NULL); 173 174 // Add the resource_url and its original url, if non-empty and different. 175 if (!resource_.original_url.is_empty() && 176 resource_.url != resource_.original_url) { 177 // Add original_url, as the parent of resource_url. 178 AddUrl(resource_.original_url, GURL(), std::string(), NULL); 179 AddUrl(resource_.url, resource_.original_url, std::string(), NULL); 180 } else { 181 AddUrl(resource_.url, GURL(), std::string(), NULL); 182 } 183 184 // Add the redirect urls, if non-empty. The redirect urls do not include the 185 // original url, but include the unsafe url which is the last one of the 186 // redirect urls chain 187 GURL parent_url; 188 // Set the original url as the parent of the first redirect url if it's not 189 // empty. 190 if (!resource_.original_url.is_empty()) 191 parent_url = resource_.original_url; 192 193 // Set the previous redirect url as the parent of the next one 194 for (size_t i = 0; i < resource_.redirect_urls.size(); ++i) { 195 AddUrl(resource_.redirect_urls[i], parent_url, std::string(), NULL); 196 parent_url = resource_.redirect_urls[i]; 197 } 198 199 // Add the referrer url. 200 if (nav_entry && !referrer_url.is_empty()) 201 AddUrl(referrer_url, GURL(), std::string(), NULL); 202 203 // Get URLs of frames, scripts etc from the DOM. 204 // OnReceivedMalwareDOMDetails will be called when the renderer replies. 205 content::RenderViewHost* view = web_contents()->GetRenderViewHost(); 206 view->Send(new SafeBrowsingMsg_GetMalwareDOMDetails(view->GetRoutingID())); 207 } 208 209 // When the renderer is done, this is called. 210 void MalwareDetails::OnReceivedMalwareDOMDetails( 211 const std::vector<SafeBrowsingHostMsg_MalwareDOMDetails_Node>& params) { 212 // Schedule this in IO thread, so it doesn't conflict with future users 213 // of our data structures (eg GetSerializedReport). 214 BrowserThread::PostTask( 215 BrowserThread::IO, FROM_HERE, 216 base::Bind(&MalwareDetails::AddDOMDetails, this, params)); 217 } 218 219 void MalwareDetails::AddDOMDetails( 220 const std::vector<SafeBrowsingHostMsg_MalwareDOMDetails_Node>& params) { 221 DCHECK_CURRENTLY_ON(BrowserThread::IO); 222 DVLOG(1) << "Nodes from the DOM: " << params.size(); 223 224 // If we have already started getting redirects from history service, 225 // don't modify state, otherwise will invalidate the iterators. 226 if (redirects_collector_->HasStarted()) 227 return; 228 229 // If we have already started collecting data from the HTTP cache, don't 230 // modify our state. 231 if (cache_collector_->HasStarted()) 232 return; 233 234 // Add the urls from the DOM to |resources_|. The renderer could be 235 // sending bogus messages, so limit the number of nodes we accept. 236 for (size_t i = 0; i < params.size() && i < kMaxDomNodes; ++i) { 237 SafeBrowsingHostMsg_MalwareDOMDetails_Node node = params[i]; 238 DVLOG(1) << node.url << ", " << node.tag_name << ", " << node.parent; 239 AddUrl(node.url, node.parent, node.tag_name, &(node.children)); 240 } 241 } 242 243 // Called from the SB Service on the IO thread, after the user has 244 // closed the tab, or clicked proceed or goback. Since the user needs 245 // to take an action, we expect this to be called after 246 // OnReceivedMalwareDOMDetails in most cases. If not, we don't include 247 // the DOM data in our report. 248 void MalwareDetails::FinishCollection() { 249 DCHECK_CURRENTLY_ON(BrowserThread::IO); 250 251 std::vector<GURL> urls; 252 for (safe_browsing::ResourceMap::const_iterator it = resources_.begin(); 253 it != resources_.end(); ++it) { 254 urls.push_back(GURL(it->first)); 255 } 256 redirects_collector_->StartHistoryCollection( 257 urls, 258 base::Bind(&MalwareDetails::OnRedirectionCollectionReady, this)); 259 } 260 261 void MalwareDetails::OnRedirectionCollectionReady() { 262 DCHECK_CURRENTLY_ON(BrowserThread::IO); 263 const std::vector<safe_browsing::RedirectChain>& redirects = 264 redirects_collector_->GetCollectedUrls(); 265 266 for (size_t i = 0; i < redirects.size(); ++i) 267 AddRedirectUrlList(redirects[i]); 268 269 // Call the cache collector 270 cache_collector_->StartCacheCollection( 271 request_context_getter_.get(), 272 &resources_, 273 &cache_result_, 274 base::Bind(&MalwareDetails::OnCacheCollectionReady, this)); 275 } 276 277 void MalwareDetails::AddRedirectUrlList(const std::vector<GURL>& urls) { 278 DCHECK_CURRENTLY_ON(BrowserThread::IO); 279 for (size_t i = 0; i < urls.size() - 1; ++i) { 280 AddUrl(urls[i], urls[i + 1], std::string(), NULL); 281 } 282 } 283 284 void MalwareDetails::OnCacheCollectionReady() { 285 DVLOG(1) << "OnCacheCollectionReady."; 286 // Add all the urls in our |resources_| maps to the |report_| protocol buffer. 287 for (safe_browsing::ResourceMap::const_iterator it = resources_.begin(); 288 it != resources_.end(); ++it) { 289 ClientMalwareReportRequest::Resource* pb_resource = 290 report_->add_resources(); 291 pb_resource->CopyFrom(*(it->second)); 292 } 293 294 report_->set_complete(cache_result_); 295 296 // Send the report, using the SafeBrowsingService. 297 std::string serialized; 298 if (!report_->SerializeToString(&serialized)) { 299 DLOG(ERROR) << "Unable to serialize the malware report."; 300 return; 301 } 302 303 ui_manager_->SendSerializedMalwareDetails(serialized); 304 } 305