Home | History | Annotate | Download | only in safe_browsing
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 //
      5 // Implementation of the MalwareDetails class.
      6 
      7 #include "chrome/browser/safe_browsing/malware_details.h"
      8 
      9 #include "base/bind.h"
     10 #include "base/lazy_instance.h"
     11 #include "base/md5.h"
     12 #include "base/strings/string_util.h"
     13 #include "chrome/browser/net/chrome_url_request_context.h"
     14 #include "chrome/browser/safe_browsing/malware_details_cache.h"
     15 #include "chrome/browser/safe_browsing/report.pb.h"
     16 #include "chrome/browser/safe_browsing/safe_browsing_service.h"
     17 #include "content/public/browser/browser_thread.h"
     18 #include "net/base/host_port_pair.h"
     19 #include "net/base/load_flags.h"
     20 #include "net/base/net_errors.h"
     21 #include "net/http/http_response_headers.h"
     22 #include "net/url_request/url_fetcher.h"
     23 #include "net/url_request/url_request_context_getter.h"
     24 #include "net/url_request/url_request_status.h"
     25 
     26 using content::BrowserThread;
     27 using safe_browsing::ClientMalwareReportRequest;
     28 
     29 // Only send small files for now, a better strategy would use the size
     30 // of the whole report and the user's bandwidth.
     31 static const uint32 kMaxBodySizeBytes = 1024;
     32 
     33 MalwareDetailsCacheCollector::MalwareDetailsCacheCollector()
     34     : resources_(NULL), result_(NULL), has_started_(false) {}
     35 
     36 void MalwareDetailsCacheCollector::StartCacheCollection(
     37     net::URLRequestContextGetter* request_context_getter,
     38     safe_browsing::ResourceMap* resources,
     39     bool* result,
     40     const base::Closure& callback) {
     41   // Start the data collection from the HTTP cache. We use a URLFetcher
     42   // and set the right flags so we only hit the cache.
     43   DVLOG(1) << "Getting cache data for all urls...";
     44   request_context_getter_ = request_context_getter;
     45   resources_ = resources;
     46   resources_it_ = resources_->begin();
     47   result_ = result;
     48   callback_ = callback;
     49   has_started_ = true;
     50 
     51   // Post a task in the message loop, so the callers don't need to
     52   // check if we call their callback immediately.
     53   BrowserThread::PostTask(
     54       BrowserThread::IO, FROM_HERE,
     55       base::Bind(&MalwareDetailsCacheCollector::OpenEntry, this));
     56 }
     57 
     58 bool MalwareDetailsCacheCollector::HasStarted() {
     59   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
     60   return has_started_;
     61 }
     62 
     63 MalwareDetailsCacheCollector::~MalwareDetailsCacheCollector() {}
     64 
     65 // Fetch a URL and advance to the next one when done.
     66 void MalwareDetailsCacheCollector::OpenEntry() {
     67   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
     68   DVLOG(1) << "OpenEntry";
     69 
     70   if (resources_it_ == resources_->end()) { // We are done.
     71     AllDone(true);
     72     return;
     73   }
     74 
     75   if (!request_context_getter_.get()) {
     76     DVLOG(1) << "Missing request context getter";
     77     AllDone(false);
     78     return;
     79   }
     80 
     81   current_fetch_.reset(net::URLFetcher::Create(
     82       GURL(resources_it_->first), net::URLFetcher::GET, this));
     83   current_fetch_->SetRequestContext(request_context_getter_.get());
     84   // Only from cache, and don't save cookies.
     85   current_fetch_->SetLoadFlags(net::LOAD_ONLY_FROM_CACHE |
     86                                net::LOAD_DO_NOT_SAVE_COOKIES);
     87   current_fetch_->SetAutomaticallyRetryOn5xx(false);  // No retries.
     88   current_fetch_->Start();  // OnURLFetchComplete will be called when done.
     89 }
     90 
     91 ClientMalwareReportRequest::Resource* MalwareDetailsCacheCollector::GetResource(
     92     const GURL& url) {
     93   safe_browsing::ResourceMap::iterator it = resources_->find(url.spec());
     94   if (it != resources_->end()) {
     95     return it->second.get();
     96   }
     97   return NULL;
     98 }
     99 
    100 void MalwareDetailsCacheCollector::OnURLFetchComplete(
    101     const net::URLFetcher* source) {
    102   DVLOG(1) << "OnUrlFetchComplete";
    103   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
    104   DCHECK(current_fetch_.get());
    105   if (source->GetStatus().status() != net::URLRequestStatus::SUCCESS &&
    106       source->GetStatus().error() == net::ERR_CACHE_MISS) {
    107     // Cache miss, skip this resource.
    108     DVLOG(1) << "Cache miss for url: " << source->GetURL();
    109     AdvanceEntry();
    110     return;
    111   }
    112 
    113   if (source->GetStatus().status() != net::URLRequestStatus::SUCCESS) {
    114     // Some other error occurred, e.g. the request could have been cancelled.
    115     DVLOG(1) << "Unsuccessful fetch: " << source->GetURL();
    116     AdvanceEntry();
    117     return;
    118   }
    119 
    120   // Set the response headers and body to the right resource, which
    121   // might not be the same as the one we asked for.
    122   // For redirects, resources_it_->first != url.spec().
    123   ClientMalwareReportRequest::Resource* resource =
    124       GetResource(source->GetURL());
    125   if (!resource) {
    126     DVLOG(1) << "Cannot find resource for url:" << source->GetURL();
    127     AdvanceEntry();
    128     return;
    129   }
    130 
    131   ReadResponse(resource, source);
    132   std::string data;
    133   source->GetResponseAsString(&data);
    134   ReadData(resource, data);
    135   AdvanceEntry();
    136 }
    137 
    138 void MalwareDetailsCacheCollector::ReadResponse(
    139     ClientMalwareReportRequest::Resource* pb_resource,
    140     const net::URLFetcher* source) {
    141   DVLOG(1) << "ReadResponse";
    142   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
    143   net::HttpResponseHeaders* headers = source->GetResponseHeaders();
    144   if (!headers) {
    145     DVLOG(1) << "Missing response headers.";
    146     return;
    147   }
    148 
    149   ClientMalwareReportRequest::HTTPResponse* pb_response =
    150       pb_resource->mutable_response();
    151   pb_response->mutable_firstline()->set_code(headers->response_code());
    152   void* iter = NULL;
    153   std::string name, value;
    154   while (headers->EnumerateHeaderLines(&iter, &name, &value)) {
    155     ClientMalwareReportRequest::HTTPHeader* pb_header =
    156         pb_response->add_headers();
    157     pb_header->set_name(name);
    158     // Strip any Set-Cookie headers.
    159     if (LowerCaseEqualsASCII(name, "set-cookie")) {
    160       pb_header->set_value("");
    161     } else {
    162       pb_header->set_value(value);
    163     }
    164   }
    165 
    166   if (!source->WasFetchedViaProxy()) {
    167     pb_response->set_remote_ip(source->GetSocketAddress().ToString());
    168   }
    169 }
    170 
    171 void MalwareDetailsCacheCollector::ReadData(
    172     ClientMalwareReportRequest::Resource* pb_resource,
    173     const std::string& data) {
    174   DVLOG(1) << "ReadData";
    175   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
    176   ClientMalwareReportRequest::HTTPResponse* pb_response =
    177       pb_resource->mutable_response();
    178   if (data.size() <= kMaxBodySizeBytes) {  // Only send small bodies for now.
    179     pb_response->set_body(data);
    180   }
    181   pb_response->set_bodylength(data.size());
    182   base::MD5Digest digest;
    183   base::MD5Sum(data.c_str(), data.size(), &digest);
    184   pb_response->set_bodydigest(base::MD5DigestToBase16(digest));
    185 }
    186 
    187 void MalwareDetailsCacheCollector::AdvanceEntry() {
    188   DVLOG(1) << "AdvanceEntry";
    189   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
    190   // Advance to the next resource.
    191   ++resources_it_;
    192   current_fetch_.reset(NULL);
    193 
    194   // Create a task so we don't take over the IO thread for too long.
    195   BrowserThread::PostTask(
    196       BrowserThread::IO, FROM_HERE,
    197       base::Bind(&MalwareDetailsCacheCollector::OpenEntry, this));
    198 }
    199 
    200 void MalwareDetailsCacheCollector::AllDone(bool success) {
    201   DVLOG(1) << "AllDone";
    202   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
    203   *result_ = success;
    204   BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, callback_);
    205   callback_.Reset();
    206 }
    207