Home | History | Annotate | Download | only in safe_browsing
      1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 //
      5 // Implementation of the MalwareDetails class.
      6 
      7 #include "chrome/browser/safe_browsing/malware_details.h"
      8 
      9 #include "base/callback.h"
     10 #include "base/lazy_instance.h"
     11 #include "base/md5.h"
     12 #include "base/string_util.h"
     13 #include "chrome/browser/net/chrome_url_request_context.h"
     14 #include "chrome/browser/safe_browsing/malware_details_cache.h"
     15 #include "chrome/browser/safe_browsing/safe_browsing_service.h"
     16 #include "chrome/browser/safe_browsing/report.pb.h"
     17 #include "content/browser/browser_thread.h"
     18 #include "net/base/load_flags.h"
     19 #include "net/http/http_response_headers.h"
     20 #include "net/url_request/url_request_context_getter.h"
     21 #include "net/url_request/url_request_status.h"
     22 
     23 using safe_browsing::ClientMalwareReportRequest;
     24 
     25 // Only send small files for now, a better strategy would use the size
     26 // of the whole report and the user's bandwidth.
     27 static const uint32 kMaxBodySizeBytes = 1024;
     28 
     29 MalwareDetailsCacheCollector::MalwareDetailsCacheCollector()
     30     : has_started_(false),
     31       current_fetch_(NULL) {
     32 }
     33 
     34 MalwareDetailsCacheCollector::~MalwareDetailsCacheCollector() {
     35 }
     36 
     37 void MalwareDetailsCacheCollector::StartCacheCollection(
     38     net::URLRequestContextGetter* request_context_getter,
     39     safe_browsing::ResourceMap* resources,
     40     bool* result,
     41     Task* callback) {
     42   // Start the data collection from the HTTP cache. We use a URLFetcher
     43   // and set the right flags so we only hit the cache.
     44   DVLOG(1) << "Getting cache data for all urls...";
     45   request_context_getter_ = request_context_getter;
     46   resources_ = resources;
     47   resources_it_ = resources_->begin();
     48   result_ = result;
     49   callback_ = callback;
     50   has_started_ = true;
     51 
     52   // Post a task in the message loop, so the callers don't need to
     53   // check if we call their callback immediately.
     54   BrowserThread::PostTask(
     55       BrowserThread::IO, FROM_HERE,
     56       NewRunnableMethod(this, &MalwareDetailsCacheCollector::OpenEntry));
     57 }
     58 
     59 bool MalwareDetailsCacheCollector::HasStarted() {
     60   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
     61   return has_started_;
     62 }
     63 
     64 // Fetch a URL and advance to the next one when done.
     65 void MalwareDetailsCacheCollector::OpenEntry() {
     66   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
     67   DVLOG(1) << "OpenEntry";
     68 
     69   if (resources_it_ == resources_->end()) { // We are done.
     70     AllDone(true);
     71     return;
     72   }
     73 
     74   if (!request_context_getter_) {
     75     DVLOG(1) << "Missing request context getter";
     76     AllDone(false);
     77     return;
     78   }
     79 
     80   current_fetch_.reset(new URLFetcher(
     81       GURL(resources_it_->first),
     82       URLFetcher::GET,
     83       this));
     84   current_fetch_->set_request_context(request_context_getter_);
     85   // Only from cache, and don't save cookies.
     86   current_fetch_->set_load_flags(net::LOAD_ONLY_FROM_CACHE |
     87                                  net::LOAD_DO_NOT_SAVE_COOKIES);
     88   current_fetch_->set_automatically_retry_on_5xx(false);  // No retries.
     89   current_fetch_->Start();  // OnURLFetchComplete will be called when done.
     90 }
     91 
     92 ClientMalwareReportRequest::Resource* MalwareDetailsCacheCollector::GetResource(
     93     const GURL& url) {
     94   safe_browsing::ResourceMap::iterator it = resources_->find(url.spec());
     95   if (it != resources_->end()) {
     96     return it->second.get();
     97   }
     98   return NULL;
     99 }
    100 
    101 void MalwareDetailsCacheCollector::OnURLFetchComplete(
    102     const URLFetcher* source,
    103     const GURL& url,
    104     const net::URLRequestStatus& status,
    105     int response_code,
    106     const ResponseCookies& cookies,
    107     const std::string& data) {
    108   DVLOG(1) << "OnUrlFetchComplete";
    109   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
    110   DCHECK(current_fetch_.get());
    111   if (status.status() != net::URLRequestStatus::SUCCESS &&
    112       status.os_error() == net::ERR_CACHE_MISS) {
    113     // Cache miss, skip this resource.
    114     DVLOG(1) << "Cache miss for url: " << url;
    115     AdvanceEntry();
    116     return;
    117   }
    118 
    119   if (status.status() != net::URLRequestStatus::SUCCESS) {
    120     // Some other error occurred, e.g. the request could have been cancelled.
    121     DVLOG(1) << "Unsuccessful fetch: " << url;
    122     AdvanceEntry();
    123     return;
    124   }
    125 
    126   // Set the response headers and body to the right resource, which
    127   // might not be the same as the one we asked for.
    128   // For redirects, resources_it_->first != url.spec().
    129   ClientMalwareReportRequest::Resource* resource = GetResource(url);
    130   if (!resource) {
    131     DVLOG(1) << "Cannot find resource for url:" << url;
    132     AdvanceEntry();
    133     return;
    134   }
    135 
    136   ReadResponse(resource, source);
    137   ReadData(resource, data);
    138   AdvanceEntry();
    139 }
    140 
    141 void MalwareDetailsCacheCollector::ReadResponse(
    142     ClientMalwareReportRequest::Resource* pb_resource,
    143     const URLFetcher* source) {
    144   DVLOG(1) << "ReadResponse";
    145   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
    146   net::HttpResponseHeaders* headers = source->response_headers();
    147   if (!headers) {
    148     DVLOG(1) << "Missing response headers.";
    149     return;
    150   }
    151 
    152   ClientMalwareReportRequest::HTTPResponse* pb_response =
    153       pb_resource->mutable_response();
    154   pb_response->mutable_firstline()->set_code(headers->response_code());
    155   void* iter = NULL;
    156   std::string name, value;
    157   while (headers->EnumerateHeaderLines(&iter, &name, &value)) {
    158     ClientMalwareReportRequest::HTTPHeader* pb_header =
    159         pb_response->add_headers();
    160     pb_header->set_name(name);
    161     // Strip any Set-Cookie headers.
    162     if (LowerCaseEqualsASCII(name, "set-cookie")) {
    163       pb_header->set_value("");
    164     } else {
    165       pb_header->set_value(value);
    166     }
    167   }
    168 }
    169 
    170 void MalwareDetailsCacheCollector::ReadData(
    171     ClientMalwareReportRequest::Resource* pb_resource,
    172     const std::string& data) {
    173   DVLOG(1) << "ReadData";
    174   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
    175   ClientMalwareReportRequest::HTTPResponse* pb_response =
    176       pb_resource->mutable_response();
    177   if (data.size() <= kMaxBodySizeBytes) {  // Only send small bodies for now.
    178     pb_response->set_body(data);
    179   }
    180   pb_response->set_bodylength(data.size());
    181   MD5Digest digest;
    182   MD5Sum(data.c_str(), data.size(), &digest);
    183   pb_response->set_bodydigest(MD5DigestToBase16(digest));
    184 }
    185 
    186 void MalwareDetailsCacheCollector::AdvanceEntry() {
    187   DVLOG(1) << "AdvanceEntry";
    188   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
    189   // Advance to the next resource.
    190   ++resources_it_;
    191   current_fetch_.reset(NULL);
    192 
    193   // Create a task so we don't take over the IO thread for too long.
    194   BrowserThread::PostTask(
    195       BrowserThread::IO, FROM_HERE,
    196       NewRunnableMethod(this, &MalwareDetailsCacheCollector::OpenEntry));
    197 }
    198 
    199 void MalwareDetailsCacheCollector::AllDone(bool success) {
    200   DVLOG(1) << "AllDone";
    201   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
    202   *result_ = success;
    203   BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, callback_);
    204 }
    205