1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 // 5 // Implementation of the MalwareDetails class. 6 7 #include "chrome/browser/safe_browsing/malware_details.h" 8 9 #include "base/callback.h" 10 #include "base/lazy_instance.h" 11 #include "base/md5.h" 12 #include "base/string_util.h" 13 #include "chrome/browser/net/chrome_url_request_context.h" 14 #include "chrome/browser/safe_browsing/malware_details_cache.h" 15 #include "chrome/browser/safe_browsing/safe_browsing_service.h" 16 #include "chrome/browser/safe_browsing/report.pb.h" 17 #include "content/browser/browser_thread.h" 18 #include "net/base/load_flags.h" 19 #include "net/http/http_response_headers.h" 20 #include "net/url_request/url_request_context_getter.h" 21 #include "net/url_request/url_request_status.h" 22 23 using safe_browsing::ClientMalwareReportRequest; 24 25 // Only send small files for now, a better strategy would use the size 26 // of the whole report and the user's bandwidth. 27 static const uint32 kMaxBodySizeBytes = 1024; 28 29 MalwareDetailsCacheCollector::MalwareDetailsCacheCollector() 30 : has_started_(false), 31 current_fetch_(NULL) { 32 } 33 34 MalwareDetailsCacheCollector::~MalwareDetailsCacheCollector() { 35 } 36 37 void MalwareDetailsCacheCollector::StartCacheCollection( 38 net::URLRequestContextGetter* request_context_getter, 39 safe_browsing::ResourceMap* resources, 40 bool* result, 41 Task* callback) { 42 // Start the data collection from the HTTP cache. We use a URLFetcher 43 // and set the right flags so we only hit the cache. 44 DVLOG(1) << "Getting cache data for all urls..."; 45 request_context_getter_ = request_context_getter; 46 resources_ = resources; 47 resources_it_ = resources_->begin(); 48 result_ = result; 49 callback_ = callback; 50 has_started_ = true; 51 52 // Post a task in the message loop, so the callers don't need to 53 // check if we call their callback immediately. 54 BrowserThread::PostTask( 55 BrowserThread::IO, FROM_HERE, 56 NewRunnableMethod(this, &MalwareDetailsCacheCollector::OpenEntry)); 57 } 58 59 bool MalwareDetailsCacheCollector::HasStarted() { 60 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 61 return has_started_; 62 } 63 64 // Fetch a URL and advance to the next one when done. 65 void MalwareDetailsCacheCollector::OpenEntry() { 66 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 67 DVLOG(1) << "OpenEntry"; 68 69 if (resources_it_ == resources_->end()) { // We are done. 70 AllDone(true); 71 return; 72 } 73 74 if (!request_context_getter_) { 75 DVLOG(1) << "Missing request context getter"; 76 AllDone(false); 77 return; 78 } 79 80 current_fetch_.reset(new URLFetcher( 81 GURL(resources_it_->first), 82 URLFetcher::GET, 83 this)); 84 current_fetch_->set_request_context(request_context_getter_); 85 // Only from cache, and don't save cookies. 86 current_fetch_->set_load_flags(net::LOAD_ONLY_FROM_CACHE | 87 net::LOAD_DO_NOT_SAVE_COOKIES); 88 current_fetch_->set_automatically_retry_on_5xx(false); // No retries. 89 current_fetch_->Start(); // OnURLFetchComplete will be called when done. 90 } 91 92 ClientMalwareReportRequest::Resource* MalwareDetailsCacheCollector::GetResource( 93 const GURL& url) { 94 safe_browsing::ResourceMap::iterator it = resources_->find(url.spec()); 95 if (it != resources_->end()) { 96 return it->second.get(); 97 } 98 return NULL; 99 } 100 101 void MalwareDetailsCacheCollector::OnURLFetchComplete( 102 const URLFetcher* source, 103 const GURL& url, 104 const net::URLRequestStatus& status, 105 int response_code, 106 const ResponseCookies& cookies, 107 const std::string& data) { 108 DVLOG(1) << "OnUrlFetchComplete"; 109 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 110 DCHECK(current_fetch_.get()); 111 if (status.status() != net::URLRequestStatus::SUCCESS && 112 status.os_error() == net::ERR_CACHE_MISS) { 113 // Cache miss, skip this resource. 114 DVLOG(1) << "Cache miss for url: " << url; 115 AdvanceEntry(); 116 return; 117 } 118 119 if (status.status() != net::URLRequestStatus::SUCCESS) { 120 // Some other error occurred, e.g. the request could have been cancelled. 121 DVLOG(1) << "Unsuccessful fetch: " << url; 122 AdvanceEntry(); 123 return; 124 } 125 126 // Set the response headers and body to the right resource, which 127 // might not be the same as the one we asked for. 128 // For redirects, resources_it_->first != url.spec(). 129 ClientMalwareReportRequest::Resource* resource = GetResource(url); 130 if (!resource) { 131 DVLOG(1) << "Cannot find resource for url:" << url; 132 AdvanceEntry(); 133 return; 134 } 135 136 ReadResponse(resource, source); 137 ReadData(resource, data); 138 AdvanceEntry(); 139 } 140 141 void MalwareDetailsCacheCollector::ReadResponse( 142 ClientMalwareReportRequest::Resource* pb_resource, 143 const URLFetcher* source) { 144 DVLOG(1) << "ReadResponse"; 145 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 146 net::HttpResponseHeaders* headers = source->response_headers(); 147 if (!headers) { 148 DVLOG(1) << "Missing response headers."; 149 return; 150 } 151 152 ClientMalwareReportRequest::HTTPResponse* pb_response = 153 pb_resource->mutable_response(); 154 pb_response->mutable_firstline()->set_code(headers->response_code()); 155 void* iter = NULL; 156 std::string name, value; 157 while (headers->EnumerateHeaderLines(&iter, &name, &value)) { 158 ClientMalwareReportRequest::HTTPHeader* pb_header = 159 pb_response->add_headers(); 160 pb_header->set_name(name); 161 // Strip any Set-Cookie headers. 162 if (LowerCaseEqualsASCII(name, "set-cookie")) { 163 pb_header->set_value(""); 164 } else { 165 pb_header->set_value(value); 166 } 167 } 168 } 169 170 void MalwareDetailsCacheCollector::ReadData( 171 ClientMalwareReportRequest::Resource* pb_resource, 172 const std::string& data) { 173 DVLOG(1) << "ReadData"; 174 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 175 ClientMalwareReportRequest::HTTPResponse* pb_response = 176 pb_resource->mutable_response(); 177 if (data.size() <= kMaxBodySizeBytes) { // Only send small bodies for now. 178 pb_response->set_body(data); 179 } 180 pb_response->set_bodylength(data.size()); 181 MD5Digest digest; 182 MD5Sum(data.c_str(), data.size(), &digest); 183 pb_response->set_bodydigest(MD5DigestToBase16(digest)); 184 } 185 186 void MalwareDetailsCacheCollector::AdvanceEntry() { 187 DVLOG(1) << "AdvanceEntry"; 188 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 189 // Advance to the next resource. 190 ++resources_it_; 191 current_fetch_.reset(NULL); 192 193 // Create a task so we don't take over the IO thread for too long. 194 BrowserThread::PostTask( 195 BrowserThread::IO, FROM_HERE, 196 NewRunnableMethod(this, &MalwareDetailsCacheCollector::OpenEntry)); 197 } 198 199 void MalwareDetailsCacheCollector::AllDone(bool success) { 200 DVLOG(1) << "AllDone"; 201 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 202 *result_ = success; 203 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, callback_); 204 } 205