Home | History | Annotate | Download | only in safe_browsing
      1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "chrome/browser/safe_browsing/client_side_detection_service.h"
      6 
      7 #include "base/command_line.h"
      8 #include "base/file_path.h"
      9 #include "base/file_util_proxy.h"
     10 #include "base/logging.h"
     11 #include "base/memory/scoped_ptr.h"
     12 #include "base/message_loop.h"
     13 #include "base/metrics/histogram.h"
     14 #include "base/platform_file.h"
     15 #include "base/stl_util-inl.h"
     16 #include "base/task.h"
     17 #include "base/time.h"
     18 #include "chrome/common/net/http_return.h"
     19 #include "chrome/common/net/url_fetcher.h"
     20 #include "chrome/common/safe_browsing/csd.pb.h"
     21 #include "content/browser/browser_thread.h"
     22 #include "googleurl/src/gurl.h"
     23 #include "net/base/load_flags.h"
     24 #include "net/url_request/url_request_context_getter.h"
     25 #include "net/url_request/url_request_status.h"
     26 
     27 namespace safe_browsing {
     28 
     29 const int ClientSideDetectionService::kMaxReportsPerInterval = 3;
     30 
     31 const base::TimeDelta ClientSideDetectionService::kReportsInterval =
     32     base::TimeDelta::FromDays(1);
     33 const base::TimeDelta ClientSideDetectionService::kNegativeCacheInterval =
     34     base::TimeDelta::FromDays(1);
     35 const base::TimeDelta ClientSideDetectionService::kPositiveCacheInterval =
     36     base::TimeDelta::FromMinutes(30);
     37 
     38 const char ClientSideDetectionService::kClientReportPhishingUrl[] =
     39     "https://sb-ssl.google.com/safebrowsing/clientreport/phishing";
     40 // Note: when updatng the model version, don't forget to change the filename
     41 // in chrome/common/chrome_constants.cc as well, or else existing users won't
     42 // download the new model.
     43 //
     44 // TODO(bryner): add version metadata so that clients can download new models
     45 // without needing a new model filename.
     46 const char ClientSideDetectionService::kClientModelUrl[] =
     47     "https://ssl.gstatic.com/safebrowsing/csd/client_model_v1.pb";
     48 
     49 struct ClientSideDetectionService::ClientReportInfo {
     50   scoped_ptr<ClientReportPhishingRequestCallback> callback;
     51   GURL phishing_url;
     52 };
     53 
     54 ClientSideDetectionService::CacheState::CacheState(bool phish, base::Time time)
     55     : is_phishing(phish),
     56       timestamp(time) {}
     57 
     58 ClientSideDetectionService::ClientSideDetectionService(
     59     const FilePath& model_path,
     60     net::URLRequestContextGetter* request_context_getter)
     61     : model_path_(model_path),
     62       model_status_(UNKNOWN_STATUS),
     63       model_file_(base::kInvalidPlatformFileValue),
     64       ALLOW_THIS_IN_INITIALIZER_LIST(method_factory_(this)),
     65       ALLOW_THIS_IN_INITIALIZER_LIST(callback_factory_(this)),
     66       request_context_getter_(request_context_getter) {}
     67 
     68 ClientSideDetectionService::~ClientSideDetectionService() {
     69   method_factory_.RevokeAll();
     70   STLDeleteContainerPairPointers(client_phishing_reports_.begin(),
     71                                  client_phishing_reports_.end());
     72   client_phishing_reports_.clear();
     73   STLDeleteElements(&open_callbacks_);
     74   CloseModelFile();
     75 }
     76 
     77 /* static */
     78 ClientSideDetectionService* ClientSideDetectionService::Create(
     79     const FilePath& model_path,
     80     net::URLRequestContextGetter* request_context_getter) {
     81   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
     82   scoped_ptr<ClientSideDetectionService> service(
     83       new ClientSideDetectionService(model_path, request_context_getter));
     84   if (!service->InitializePrivateNetworks()) {
     85     UMA_HISTOGRAM_COUNTS("SBClientPhishing.InitPrivateNetworksFailed", 1);
     86     return NULL;
     87   }
     88 
     89   // We try to open the model file right away and start fetching it if
     90   // it does not already exist on disk.
     91   base::FileUtilProxy::CreateOrOpenCallback* cb =
     92       service.get()->callback_factory_.NewCallback(
     93           &ClientSideDetectionService::OpenModelFileDone);
     94   if (!base::FileUtilProxy::CreateOrOpen(
     95           BrowserThread::GetMessageLoopProxyForThread(BrowserThread::FILE),
     96           model_path,
     97           base::PLATFORM_FILE_OPEN | base::PLATFORM_FILE_READ,
     98           cb)) {
     99     delete cb;
    100     return NULL;
    101   }
    102 
    103   // Delete the previous-version model file.
    104   // TODO(bryner): Remove this for M14.
    105   base::FileUtilProxy::Delete(
    106       BrowserThread::GetMessageLoopProxyForThread(BrowserThread::FILE),
    107       model_path.DirName().AppendASCII("Safe Browsing Phishing Model"),
    108       false /* not recursive */,
    109       NULL /* not interested in result */);
    110   return service.release();
    111 }
    112 
    113 void ClientSideDetectionService::GetModelFile(OpenModelDoneCallback* callback) {
    114   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    115   MessageLoop::current()->PostTask(
    116       FROM_HERE,
    117       method_factory_.NewRunnableMethod(
    118           &ClientSideDetectionService::StartGetModelFile, callback));
    119 }
    120 
    121 void ClientSideDetectionService::SendClientReportPhishingRequest(
    122     ClientPhishingRequest* verdict,
    123     ClientReportPhishingRequestCallback* callback) {
    124   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    125   MessageLoop::current()->PostTask(
    126       FROM_HERE,
    127       method_factory_.NewRunnableMethod(
    128           &ClientSideDetectionService::StartClientReportPhishingRequest,
    129           verdict, callback));
    130 }
    131 
    132 bool ClientSideDetectionService::IsPrivateIPAddress(
    133     const std::string& ip_address) const {
    134   net::IPAddressNumber ip_number;
    135   if (!net::ParseIPLiteralToNumber(ip_address, &ip_number)) {
    136     DLOG(WARNING) << "Unable to parse IP address: " << ip_address;
    137     // Err on the side of safety and assume this might be private.
    138     return true;
    139   }
    140 
    141   for (std::vector<AddressRange>::const_iterator it =
    142            private_networks_.begin();
    143        it != private_networks_.end(); ++it) {
    144     if (net::IPNumberMatchesPrefix(ip_number, it->first, it->second)) {
    145       return true;
    146     }
    147   }
    148   return false;
    149 }
    150 
    151 void ClientSideDetectionService::OnURLFetchComplete(
    152     const URLFetcher* source,
    153     const GURL& url,
    154     const net::URLRequestStatus& status,
    155     int response_code,
    156     const ResponseCookies& cookies,
    157     const std::string& data) {
    158   if (source == model_fetcher_.get()) {
    159     HandleModelResponse(source, url, status, response_code, cookies, data);
    160   } else if (client_phishing_reports_.find(source) !=
    161              client_phishing_reports_.end()) {
    162     HandlePhishingVerdict(source, url, status, response_code, cookies, data);
    163   } else {
    164     NOTREACHED();
    165   }
    166 }
    167 
    168 void ClientSideDetectionService::SetModelStatus(ModelStatus status) {
    169   DCHECK_NE(READY_STATUS, model_status_);
    170   model_status_ = status;
    171   if (READY_STATUS == status || ERROR_STATUS == status) {
    172     for (size_t i = 0; i < open_callbacks_.size(); ++i) {
    173       open_callbacks_[i]->Run(model_file_);
    174     }
    175     STLDeleteElements(&open_callbacks_);
    176   } else {
    177     NOTREACHED();
    178   }
    179 }
    180 
    181 void ClientSideDetectionService::OpenModelFileDone(
    182     base::PlatformFileError error_code,
    183     base::PassPlatformFile file,
    184     bool created) {
    185   DCHECK(!created);
    186   if (base::PLATFORM_FILE_OK == error_code) {
    187     // The model file already exists.  There is no need to fetch the model.
    188     model_file_ = file.ReleaseValue();
    189     SetModelStatus(READY_STATUS);
    190   } else if (base::PLATFORM_FILE_ERROR_NOT_FOUND == error_code) {
    191     // We need to fetch the model since it does not exist yet.
    192     model_fetcher_.reset(URLFetcher::Create(0 /* ID is not used */,
    193                                             GURL(kClientModelUrl),
    194                                             URLFetcher::GET,
    195                                             this));
    196     model_fetcher_->set_request_context(request_context_getter_.get());
    197     model_fetcher_->Start();
    198   } else {
    199     // It is not clear what we should do in this case.  For now we simply fail.
    200     // Hopefully, we'll be able to read the model during the next browser
    201     // restart.
    202     SetModelStatus(ERROR_STATUS);
    203   }
    204 }
    205 
    206 void ClientSideDetectionService::CreateModelFileDone(
    207     base::PlatformFileError error_code,
    208     base::PassPlatformFile file,
    209     bool created) {
    210   model_file_ = file.ReleaseValue();
    211   base::FileUtilProxy::WriteCallback* cb = callback_factory_.NewCallback(
    212       &ClientSideDetectionService::WriteModelFileDone);
    213   if (!created ||
    214       base::PLATFORM_FILE_OK != error_code ||
    215       !base::FileUtilProxy::Write(
    216           BrowserThread::GetMessageLoopProxyForThread(BrowserThread::FILE),
    217           model_file_,
    218           0 /* offset */, tmp_model_string_->data(), tmp_model_string_->size(),
    219           cb)) {
    220     delete cb;
    221     // An error occurred somewhere.  We close the model file if necessary and
    222     // then run all the pending callbacks giving them an invalid model file.
    223     CloseModelFile();
    224     SetModelStatus(ERROR_STATUS);
    225   }
    226 }
    227 
    228 void ClientSideDetectionService::WriteModelFileDone(
    229     base::PlatformFileError error_code,
    230     int bytes_written) {
    231   if (base::PLATFORM_FILE_OK == error_code) {
    232     SetModelStatus(READY_STATUS);
    233   } else {
    234     // TODO(noelutz): maybe we should retry writing the model since we
    235     // did already fetch the model?
    236     CloseModelFile();
    237     SetModelStatus(ERROR_STATUS);
    238   }
    239   // Delete the model string that we kept around while we were writing the
    240   // string to disk - we don't need it anymore.
    241   tmp_model_string_.reset();
    242 }
    243 
    244 void ClientSideDetectionService::CloseModelFile() {
    245   if (model_file_ != base::kInvalidPlatformFileValue) {
    246     base::FileUtilProxy::Close(
    247         BrowserThread::GetMessageLoopProxyForThread(BrowserThread::FILE),
    248         model_file_,
    249         NULL);
    250   }
    251   model_file_ = base::kInvalidPlatformFileValue;
    252 }
    253 
    254 void ClientSideDetectionService::StartGetModelFile(
    255     OpenModelDoneCallback* callback) {
    256   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    257   if (UNKNOWN_STATUS == model_status_) {
    258     // Store the callback which will be called once we know the status of the
    259     // model file.
    260     open_callbacks_.push_back(callback);
    261   } else {
    262     // The model is either in READY or ERROR state which means we can
    263     // call the callback right away.
    264     callback->Run(model_file_);
    265     delete callback;
    266   }
    267 }
    268 
    269 void ClientSideDetectionService::StartClientReportPhishingRequest(
    270     ClientPhishingRequest* verdict,
    271     ClientReportPhishingRequestCallback* callback) {
    272   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    273   scoped_ptr<ClientPhishingRequest> request(verdict);
    274   scoped_ptr<ClientReportPhishingRequestCallback> cb(callback);
    275 
    276   std::string request_data;
    277   if (!request->SerializeToString(&request_data)) {
    278     UMA_HISTOGRAM_COUNTS("SBClientPhishing.RequestNotSerialized", 1);
    279     VLOG(1) << "Unable to serialize the CSD request. Proto file changed?";
    280     cb->Run(GURL(request->url()), false);
    281     return;
    282   }
    283 
    284   URLFetcher* fetcher = URLFetcher::Create(0 /* ID is not used */,
    285                                            GURL(kClientReportPhishingUrl),
    286                                            URLFetcher::POST,
    287                                            this);
    288 
    289   // Remember which callback and URL correspond to the current fetcher object.
    290   ClientReportInfo* info = new ClientReportInfo;
    291   info->callback.swap(cb);  // takes ownership of the callback.
    292   info->phishing_url = GURL(request->url());
    293   client_phishing_reports_[fetcher] = info;
    294 
    295   fetcher->set_load_flags(net::LOAD_DISABLE_CACHE);
    296   fetcher->set_request_context(request_context_getter_.get());
    297   fetcher->set_upload_data("application/octet-stream", request_data);
    298   fetcher->Start();
    299 
    300   // Record that we made a request
    301   phishing_report_times_.push(base::Time::Now());
    302 }
    303 
    304 void ClientSideDetectionService::HandleModelResponse(
    305     const URLFetcher* source,
    306     const GURL& url,
    307     const net::URLRequestStatus& status,
    308     int response_code,
    309     const ResponseCookies& cookies,
    310     const std::string& data) {
    311   if (status.is_success() && RC_REQUEST_OK == response_code) {
    312     // Copy the model because it has to be accessible after this function
    313     // returns.  Once we have written the model to a file we will delete the
    314     // temporary model string. TODO(noelutz): don't store the model to disk if
    315     // it's invalid.
    316     tmp_model_string_.reset(new std::string(data));
    317     base::FileUtilProxy::CreateOrOpenCallback* cb =
    318         callback_factory_.NewCallback(
    319             &ClientSideDetectionService::CreateModelFileDone);
    320     if (!base::FileUtilProxy::CreateOrOpen(
    321             BrowserThread::GetMessageLoopProxyForThread(BrowserThread::FILE),
    322             model_path_,
    323             base::PLATFORM_FILE_CREATE_ALWAYS |
    324             base::PLATFORM_FILE_WRITE |
    325             base::PLATFORM_FILE_READ,
    326             cb)) {
    327       delete cb;
    328       SetModelStatus(ERROR_STATUS);
    329     }
    330   } else {
    331     SetModelStatus(ERROR_STATUS);
    332   }
    333 }
    334 
    335 void ClientSideDetectionService::HandlePhishingVerdict(
    336     const URLFetcher* source,
    337     const GURL& url,
    338     const net::URLRequestStatus& status,
    339     int response_code,
    340     const ResponseCookies& cookies,
    341     const std::string& data) {
    342   ClientPhishingResponse response;
    343   scoped_ptr<ClientReportInfo> info(client_phishing_reports_[source]);
    344   if (status.is_success() && RC_REQUEST_OK == response_code &&
    345       response.ParseFromString(data)) {
    346     // Cache response, possibly flushing an old one.
    347     cache_[info->phishing_url] =
    348         make_linked_ptr(new CacheState(response.phishy(), base::Time::Now()));
    349     info->callback->Run(info->phishing_url, response.phishy());
    350   } else {
    351     DLOG(ERROR) << "Unable to get the server verdict for URL: "
    352                 << info->phishing_url << " status: " << status.status() << " "
    353                 << "response_code:" << response_code;
    354     info->callback->Run(info->phishing_url, false);
    355   }
    356   client_phishing_reports_.erase(source);
    357   delete source;
    358 }
    359 
    360 bool ClientSideDetectionService::IsInCache(const GURL& url) {
    361   UpdateCache();
    362 
    363   return cache_.find(url) != cache_.end();
    364 }
    365 
    366 bool ClientSideDetectionService::GetValidCachedResult(const GURL& url,
    367                                                       bool* is_phishing) {
    368   UpdateCache();
    369 
    370   PhishingCache::iterator it = cache_.find(url);
    371   if (it == cache_.end()) {
    372     return false;
    373   }
    374 
    375   // We still need to check if the result is valid.
    376   const CacheState& cache_state = *it->second;
    377   if (cache_state.is_phishing ?
    378       cache_state.timestamp > base::Time::Now() - kPositiveCacheInterval :
    379       cache_state.timestamp > base::Time::Now() - kNegativeCacheInterval) {
    380     *is_phishing = cache_state.is_phishing;
    381     return true;
    382   }
    383   return false;
    384 }
    385 
    386 void ClientSideDetectionService::UpdateCache() {
    387   // Since we limit the number of requests but allow pass-through for cache
    388   // refreshes, we don't want to remove elements from the cache if they
    389   // could be used for this purpose even if we will not use the entry to
    390   // satisfy the request from the cache.
    391   base::TimeDelta positive_cache_interval =
    392       std::max(kPositiveCacheInterval, kReportsInterval);
    393   base::TimeDelta negative_cache_interval =
    394       std::max(kNegativeCacheInterval, kReportsInterval);
    395 
    396   // Remove elements from the cache that will no longer be used.
    397   for (PhishingCache::iterator it = cache_.begin(); it != cache_.end();) {
    398     const CacheState& cache_state = *it->second;
    399     if (cache_state.is_phishing ?
    400         cache_state.timestamp > base::Time::Now() - positive_cache_interval :
    401         cache_state.timestamp > base::Time::Now() - negative_cache_interval) {
    402       ++it;
    403     } else {
    404       cache_.erase(it++);
    405     }
    406   }
    407 }
    408 
    409 bool ClientSideDetectionService::OverReportLimit() {
    410   return GetNumReports() > kMaxReportsPerInterval;
    411 }
    412 
    413 int ClientSideDetectionService::GetNumReports() {
    414   base::Time cutoff = base::Time::Now() - kReportsInterval;
    415 
    416   // Erase items older than cutoff because we will never care about them again.
    417   while (!phishing_report_times_.empty() &&
    418          phishing_report_times_.front() < cutoff) {
    419     phishing_report_times_.pop();
    420   }
    421 
    422   // Return the number of elements that are above the cutoff.
    423   return phishing_report_times_.size();
    424 }
    425 
    426 bool ClientSideDetectionService::InitializePrivateNetworks() {
    427   static const char* const kPrivateNetworks[] = {
    428     "10.0.0.0/8",
    429     "127.0.0.0/8",
    430     "172.16.0.0/12",
    431     "192.168.0.0/16",
    432     // IPv6 address ranges
    433     "fc00::/7",
    434     "fec0::/10",
    435     "::1/128",
    436   };
    437 
    438   for (size_t i = 0; i < arraysize(kPrivateNetworks); ++i) {
    439     net::IPAddressNumber ip_number;
    440     size_t prefix_length;
    441     if (net::ParseCIDRBlock(kPrivateNetworks[i], &ip_number, &prefix_length)) {
    442       private_networks_.push_back(std::make_pair(ip_number, prefix_length));
    443     } else {
    444       DLOG(FATAL) << "Unable to parse IP address range: "
    445                   << kPrivateNetworks[i];
    446       return false;
    447     }
    448   }
    449   return true;
    450 }
    451 
    452 }  // namespace safe_browsing
    453