1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 // 5 // Implementation of the MalwareDetails class. 6 7 #include "chrome/browser/safe_browsing/malware_details.h" 8 9 #include "base/bind.h" 10 #include "base/lazy_instance.h" 11 #include "base/md5.h" 12 #include "base/strings/string_util.h" 13 #include "chrome/browser/net/chrome_url_request_context.h" 14 #include "chrome/browser/safe_browsing/malware_details_cache.h" 15 #include "chrome/browser/safe_browsing/report.pb.h" 16 #include "chrome/browser/safe_browsing/safe_browsing_service.h" 17 #include "content/public/browser/browser_thread.h" 18 #include "net/base/host_port_pair.h" 19 #include "net/base/load_flags.h" 20 #include "net/base/net_errors.h" 21 #include "net/http/http_response_headers.h" 22 #include "net/url_request/url_fetcher.h" 23 #include "net/url_request/url_request_context_getter.h" 24 #include "net/url_request/url_request_status.h" 25 26 using content::BrowserThread; 27 using safe_browsing::ClientMalwareReportRequest; 28 29 // Only send small files for now, a better strategy would use the size 30 // of the whole report and the user's bandwidth. 31 static const uint32 kMaxBodySizeBytes = 1024; 32 33 MalwareDetailsCacheCollector::MalwareDetailsCacheCollector() 34 : resources_(NULL), result_(NULL), has_started_(false) {} 35 36 void MalwareDetailsCacheCollector::StartCacheCollection( 37 net::URLRequestContextGetter* request_context_getter, 38 safe_browsing::ResourceMap* resources, 39 bool* result, 40 const base::Closure& callback) { 41 // Start the data collection from the HTTP cache. We use a URLFetcher 42 // and set the right flags so we only hit the cache. 43 DVLOG(1) << "Getting cache data for all urls..."; 44 request_context_getter_ = request_context_getter; 45 resources_ = resources; 46 resources_it_ = resources_->begin(); 47 result_ = result; 48 callback_ = callback; 49 has_started_ = true; 50 51 // Post a task in the message loop, so the callers don't need to 52 // check if we call their callback immediately. 53 BrowserThread::PostTask( 54 BrowserThread::IO, FROM_HERE, 55 base::Bind(&MalwareDetailsCacheCollector::OpenEntry, this)); 56 } 57 58 bool MalwareDetailsCacheCollector::HasStarted() { 59 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 60 return has_started_; 61 } 62 63 MalwareDetailsCacheCollector::~MalwareDetailsCacheCollector() {} 64 65 // Fetch a URL and advance to the next one when done. 66 void MalwareDetailsCacheCollector::OpenEntry() { 67 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 68 DVLOG(1) << "OpenEntry"; 69 70 if (resources_it_ == resources_->end()) { // We are done. 71 AllDone(true); 72 return; 73 } 74 75 if (!request_context_getter_.get()) { 76 DVLOG(1) << "Missing request context getter"; 77 AllDone(false); 78 return; 79 } 80 81 current_fetch_.reset(net::URLFetcher::Create( 82 GURL(resources_it_->first), net::URLFetcher::GET, this)); 83 current_fetch_->SetRequestContext(request_context_getter_.get()); 84 // Only from cache, and don't save cookies. 85 current_fetch_->SetLoadFlags(net::LOAD_ONLY_FROM_CACHE | 86 net::LOAD_DO_NOT_SAVE_COOKIES); 87 current_fetch_->SetAutomaticallyRetryOn5xx(false); // No retries. 88 current_fetch_->Start(); // OnURLFetchComplete will be called when done. 89 } 90 91 ClientMalwareReportRequest::Resource* MalwareDetailsCacheCollector::GetResource( 92 const GURL& url) { 93 safe_browsing::ResourceMap::iterator it = resources_->find(url.spec()); 94 if (it != resources_->end()) { 95 return it->second.get(); 96 } 97 return NULL; 98 } 99 100 void MalwareDetailsCacheCollector::OnURLFetchComplete( 101 const net::URLFetcher* source) { 102 DVLOG(1) << "OnUrlFetchComplete"; 103 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 104 DCHECK(current_fetch_.get()); 105 if (source->GetStatus().status() != net::URLRequestStatus::SUCCESS && 106 source->GetStatus().error() == net::ERR_CACHE_MISS) { 107 // Cache miss, skip this resource. 108 DVLOG(1) << "Cache miss for url: " << source->GetURL(); 109 AdvanceEntry(); 110 return; 111 } 112 113 if (source->GetStatus().status() != net::URLRequestStatus::SUCCESS) { 114 // Some other error occurred, e.g. the request could have been cancelled. 115 DVLOG(1) << "Unsuccessful fetch: " << source->GetURL(); 116 AdvanceEntry(); 117 return; 118 } 119 120 // Set the response headers and body to the right resource, which 121 // might not be the same as the one we asked for. 122 // For redirects, resources_it_->first != url.spec(). 123 ClientMalwareReportRequest::Resource* resource = 124 GetResource(source->GetURL()); 125 if (!resource) { 126 DVLOG(1) << "Cannot find resource for url:" << source->GetURL(); 127 AdvanceEntry(); 128 return; 129 } 130 131 ReadResponse(resource, source); 132 std::string data; 133 source->GetResponseAsString(&data); 134 ReadData(resource, data); 135 AdvanceEntry(); 136 } 137 138 void MalwareDetailsCacheCollector::ReadResponse( 139 ClientMalwareReportRequest::Resource* pb_resource, 140 const net::URLFetcher* source) { 141 DVLOG(1) << "ReadResponse"; 142 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 143 net::HttpResponseHeaders* headers = source->GetResponseHeaders(); 144 if (!headers) { 145 DVLOG(1) << "Missing response headers."; 146 return; 147 } 148 149 ClientMalwareReportRequest::HTTPResponse* pb_response = 150 pb_resource->mutable_response(); 151 pb_response->mutable_firstline()->set_code(headers->response_code()); 152 void* iter = NULL; 153 std::string name, value; 154 while (headers->EnumerateHeaderLines(&iter, &name, &value)) { 155 ClientMalwareReportRequest::HTTPHeader* pb_header = 156 pb_response->add_headers(); 157 pb_header->set_name(name); 158 // Strip any Set-Cookie headers. 159 if (LowerCaseEqualsASCII(name, "set-cookie")) { 160 pb_header->set_value(""); 161 } else { 162 pb_header->set_value(value); 163 } 164 } 165 166 if (!source->WasFetchedViaProxy()) { 167 pb_response->set_remote_ip(source->GetSocketAddress().ToString()); 168 } 169 } 170 171 void MalwareDetailsCacheCollector::ReadData( 172 ClientMalwareReportRequest::Resource* pb_resource, 173 const std::string& data) { 174 DVLOG(1) << "ReadData"; 175 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 176 ClientMalwareReportRequest::HTTPResponse* pb_response = 177 pb_resource->mutable_response(); 178 if (data.size() <= kMaxBodySizeBytes) { // Only send small bodies for now. 179 pb_response->set_body(data); 180 } 181 pb_response->set_bodylength(data.size()); 182 base::MD5Digest digest; 183 base::MD5Sum(data.c_str(), data.size(), &digest); 184 pb_response->set_bodydigest(base::MD5DigestToBase16(digest)); 185 } 186 187 void MalwareDetailsCacheCollector::AdvanceEntry() { 188 DVLOG(1) << "AdvanceEntry"; 189 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 190 // Advance to the next resource. 191 ++resources_it_; 192 current_fetch_.reset(NULL); 193 194 // Create a task so we don't take over the IO thread for too long. 195 BrowserThread::PostTask( 196 BrowserThread::IO, FROM_HERE, 197 base::Bind(&MalwareDetailsCacheCollector::OpenEntry, this)); 198 } 199 200 void MalwareDetailsCacheCollector::AllDone(bool success) { 201 DVLOG(1) << "AllDone"; 202 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 203 *result_ = success; 204 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, callback_); 205 callback_.Reset(); 206 } 207