Home | History | Annotate | Download | only in browser
      1 // Copyright 2014 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "extensions/browser/content_hash_fetcher.h"
      6 
      7 #include <algorithm>
      8 
      9 #include "base/base64.h"
     10 #include "base/files/file_enumerator.h"
     11 #include "base/files/file_util.h"
     12 #include "base/json/json_reader.h"
     13 #include "base/memory/ref_counted.h"
     14 #include "base/metrics/histogram.h"
     15 #include "base/synchronization/lock.h"
     16 #include "base/task_runner_util.h"
     17 #include "base/timer/elapsed_timer.h"
     18 #include "base/version.h"
     19 #include "content/public/browser/browser_context.h"
     20 #include "content/public/browser/browser_thread.h"
     21 #include "crypto/sha2.h"
     22 #include "extensions/browser/computed_hashes.h"
     23 #include "extensions/browser/content_hash_tree.h"
     24 #include "extensions/browser/content_verifier_delegate.h"
     25 #include "extensions/browser/verified_contents.h"
     26 #include "extensions/common/constants.h"
     27 #include "extensions/common/extension.h"
     28 #include "extensions/common/file_util.h"
     29 #include "net/base/load_flags.h"
     30 #include "net/url_request/url_fetcher.h"
     31 #include "net/url_request/url_fetcher_delegate.h"
     32 #include "net/url_request/url_request_status.h"
     33 
     34 namespace {
     35 
     36 typedef std::set<base::FilePath> SortedFilePathSet;
     37 
     38 }  // namespace
     39 
     40 namespace extensions {
     41 
     42 // This class takes care of doing the disk and network I/O work to ensure we
     43 // have both verified_contents.json files from the webstore and
     44 // computed_hashes.json files computed over the files in an extension's
     45 // directory.
     46 class ContentHashFetcherJob
     47     : public base::RefCountedThreadSafe<ContentHashFetcherJob>,
     48       public net::URLFetcherDelegate {
     49  public:
     50   typedef base::Callback<void(ContentHashFetcherJob*)> CompletionCallback;
     51   ContentHashFetcherJob(net::URLRequestContextGetter* request_context,
     52                         const ContentVerifierKey& key,
     53                         const std::string& extension_id,
     54                         const base::FilePath& extension_path,
     55                         const GURL& fetch_url,
     56                         bool force,
     57                         const CompletionCallback& callback);
     58 
     59   void Start();
     60 
     61   // Cancels this job, which will attempt to stop I/O operations sooner than
     62   // just waiting for the entire job to complete. Safe to call from any thread.
     63   void Cancel();
     64 
     65   // Checks whether this job has been cancelled. Safe to call from any thread.
     66   bool IsCancelled();
     67 
     68   // Returns whether this job was successful (we have both verified contents
     69   // and computed hashes). Even if the job was a success, there might have been
     70   // files that were found to have contents not matching expectations; these
     71   // are available by calling hash_mismatch_paths().
     72   bool success() { return success_; }
     73 
     74   bool force() { return force_; }
     75 
     76   const std::string& extension_id() { return extension_id_; }
     77 
     78   // Returns the set of paths that had a hash mismatch.
     79   const std::set<base::FilePath>& hash_mismatch_paths() {
     80     return hash_mismatch_paths_;
     81   }
     82 
     83  private:
     84   friend class base::RefCountedThreadSafe<ContentHashFetcherJob>;
     85   virtual ~ContentHashFetcherJob();
     86 
     87   // Tries to load a verified_contents.json file at |path|. On successfully
     88   // reading and validing the file, the verified_contents_ member variable will
     89   // be set and this function will return true. If the file does not exist, or
     90   // exists but is invalid, it will return false. Also, any invalid
     91   // file will be removed from disk and
     92   bool LoadVerifiedContents(const base::FilePath& path);
     93 
     94   // Callback for when we're done doing file I/O to see if we already have
     95   // a verified contents file. If we don't, this will kick off a network
     96   // request to get one.
     97   void DoneCheckingForVerifiedContents(bool found);
     98 
     99   // URLFetcherDelegate interface
    100   virtual void OnURLFetchComplete(const net::URLFetcher* source) OVERRIDE;
    101 
    102   // Callback for when we're done ensuring we have verified contents, and are
    103   // ready to move on to MaybeCreateHashes.
    104   void DoneFetchingVerifiedContents(bool success);
    105 
    106   // Callback for the job to write the verified contents to the filesystem.
    107   void OnVerifiedContentsWritten(size_t expected_size, int write_result);
    108 
    109   // The verified contents file from the webstore only contains the treehash
    110   // root hash, but for performance we want to cache the individual block level
    111   // hashes. This function will create that cache with block-level hashes for
    112   // each file in the extension if needed (the treehash root hash for each of
    113   // these should equal what is in the verified contents file from the
    114   // webstore).
    115   void MaybeCreateHashes();
    116 
    117   // Computes hashes for all files in |extension_path_|, and uses a
    118   // ComputedHashes::Writer to write that information into
    119   // |hashes_file|. Returns true on success.
    120   bool CreateHashes(const base::FilePath& hashes_file);
    121 
    122   // Will call the callback, if we haven't been cancelled.
    123   void DispatchCallback();
    124 
    125   net::URLRequestContextGetter* request_context_;
    126   std::string extension_id_;
    127   base::FilePath extension_path_;
    128 
    129   // The url we'll need to use to fetch a verified_contents.json file.
    130   GURL fetch_url_;
    131 
    132   bool force_;
    133 
    134   CompletionCallback callback_;
    135   content::BrowserThread::ID creation_thread_;
    136 
    137   // Used for fetching content signatures.
    138   scoped_ptr<net::URLFetcher> url_fetcher_;
    139 
    140   // The key used to validate verified_contents.json.
    141   ContentVerifierKey key_;
    142 
    143   // The parsed contents of the verified_contents.json file, either read from
    144   // disk or fetched from the network and then written to disk.
    145   scoped_ptr<VerifiedContents> verified_contents_;
    146 
    147   // Whether this job succeeded.
    148   bool success_;
    149 
    150   // Paths that were found to have a mismatching hash.
    151   std::set<base::FilePath> hash_mismatch_paths_;
    152 
    153   // The block size to use for hashing.
    154   int block_size_;
    155 
    156   // Note: this may be accessed from multiple threads, so all access should
    157   // be protected by |cancelled_lock_|.
    158   bool cancelled_;
    159 
    160   // A lock for synchronizing access to |cancelled_|.
    161   base::Lock cancelled_lock_;
    162 
    163   DISALLOW_COPY_AND_ASSIGN(ContentHashFetcherJob);
    164 };
    165 
    166 ContentHashFetcherJob::ContentHashFetcherJob(
    167     net::URLRequestContextGetter* request_context,
    168     const ContentVerifierKey& key,
    169     const std::string& extension_id,
    170     const base::FilePath& extension_path,
    171     const GURL& fetch_url,
    172     bool force,
    173     const CompletionCallback& callback)
    174     : request_context_(request_context),
    175       extension_id_(extension_id),
    176       extension_path_(extension_path),
    177       fetch_url_(fetch_url),
    178       force_(force),
    179       callback_(callback),
    180       key_(key),
    181       success_(false),
    182       // TODO(asargent) - use the value from verified_contents.json for each
    183       // file, instead of using a constant.
    184       block_size_(4096),
    185       cancelled_(false) {
    186   bool got_id =
    187       content::BrowserThread::GetCurrentThreadIdentifier(&creation_thread_);
    188   DCHECK(got_id);
    189 }
    190 
    191 void ContentHashFetcherJob::Start() {
    192   base::FilePath verified_contents_path =
    193       file_util::GetVerifiedContentsPath(extension_path_);
    194   base::PostTaskAndReplyWithResult(
    195       content::BrowserThread::GetBlockingPool(),
    196       FROM_HERE,
    197       base::Bind(&ContentHashFetcherJob::LoadVerifiedContents,
    198                  this,
    199                  verified_contents_path),
    200       base::Bind(&ContentHashFetcherJob::DoneCheckingForVerifiedContents,
    201                  this));
    202 }
    203 
    204 void ContentHashFetcherJob::Cancel() {
    205   base::AutoLock autolock(cancelled_lock_);
    206   cancelled_ = true;
    207 }
    208 
    209 bool ContentHashFetcherJob::IsCancelled() {
    210   base::AutoLock autolock(cancelled_lock_);
    211   bool result = cancelled_;
    212   return result;
    213 }
    214 
    215 ContentHashFetcherJob::~ContentHashFetcherJob() {
    216 }
    217 
    218 bool ContentHashFetcherJob::LoadVerifiedContents(const base::FilePath& path) {
    219   if (!base::PathExists(path))
    220     return false;
    221   verified_contents_.reset(new VerifiedContents(key_.data, key_.size));
    222   if (!verified_contents_->InitFrom(path, false)) {
    223     verified_contents_.reset();
    224     if (!base::DeleteFile(path, false))
    225       LOG(WARNING) << "Failed to delete " << path.value();
    226     return false;
    227   }
    228   return true;
    229 }
    230 
    231 void ContentHashFetcherJob::DoneCheckingForVerifiedContents(bool found) {
    232   if (IsCancelled())
    233     return;
    234   if (found) {
    235     VLOG(1) << "Found verified contents for " << extension_id_;
    236     DoneFetchingVerifiedContents(true);
    237   } else {
    238     VLOG(1) << "Missing verified contents for " << extension_id_
    239             << ", fetching...";
    240     url_fetcher_.reset(
    241         net::URLFetcher::Create(fetch_url_, net::URLFetcher::GET, this));
    242     url_fetcher_->SetRequestContext(request_context_);
    243     url_fetcher_->SetLoadFlags(net::LOAD_DO_NOT_SEND_COOKIES |
    244                                net::LOAD_DO_NOT_SAVE_COOKIES |
    245                                net::LOAD_DISABLE_CACHE);
    246     url_fetcher_->SetAutomaticallyRetryOnNetworkChanges(3);
    247     url_fetcher_->Start();
    248   }
    249 }
    250 
    251 // Helper function to let us pass ownership of a string via base::Bind with the
    252 // contents to be written into a file. Also ensures that the directory for
    253 // |path| exists, creating it if needed.
    254 static int WriteFileHelper(const base::FilePath& path,
    255                            scoped_ptr<std::string> content) {
    256   base::FilePath dir = path.DirName();
    257   return (base::CreateDirectoryAndGetError(dir, NULL) &&
    258           base::WriteFile(path, content->data(), content->size()));
    259 }
    260 
    261 void ContentHashFetcherJob::OnURLFetchComplete(const net::URLFetcher* source) {
    262   VLOG(1) << "URLFetchComplete for " << extension_id_
    263           << " is_success:" << url_fetcher_->GetStatus().is_success() << " "
    264           << fetch_url_.possibly_invalid_spec();
    265   if (IsCancelled())
    266     return;
    267   scoped_ptr<std::string> response(new std::string);
    268   if (!url_fetcher_->GetStatus().is_success() ||
    269       !url_fetcher_->GetResponseAsString(response.get())) {
    270     DoneFetchingVerifiedContents(false);
    271     return;
    272   }
    273 
    274   // Parse the response to make sure it is valid json (on staging sometimes it
    275   // can be a login redirect html, xml file, etc. if you aren't logged in with
    276   // the right cookies).  TODO(asargent) - It would be a nice enhancement to
    277   // move to parsing this in a sandboxed helper (crbug.com/372878).
    278   scoped_ptr<base::Value> parsed(base::JSONReader::Read(*response));
    279   if (parsed) {
    280     VLOG(1) << "JSON parsed ok for " << extension_id_;
    281 
    282     parsed.reset();  // no longer needed
    283     base::FilePath destination =
    284         file_util::GetVerifiedContentsPath(extension_path_);
    285     size_t size = response->size();
    286     base::PostTaskAndReplyWithResult(
    287         content::BrowserThread::GetBlockingPool(),
    288         FROM_HERE,
    289         base::Bind(&WriteFileHelper, destination, base::Passed(&response)),
    290         base::Bind(
    291             &ContentHashFetcherJob::OnVerifiedContentsWritten, this, size));
    292   } else {
    293     DoneFetchingVerifiedContents(false);
    294   }
    295 }
    296 
    297 void ContentHashFetcherJob::OnVerifiedContentsWritten(size_t expected_size,
    298                                                       int write_result) {
    299   bool success =
    300       (write_result >= 0 && static_cast<size_t>(write_result) == expected_size);
    301   DoneFetchingVerifiedContents(success);
    302 }
    303 
    304 void ContentHashFetcherJob::DoneFetchingVerifiedContents(bool success) {
    305   if (IsCancelled())
    306     return;
    307 
    308   if (!success) {
    309     DispatchCallback();
    310     return;
    311   }
    312 
    313   content::BrowserThread::PostBlockingPoolSequencedTask(
    314       "ContentHashFetcher",
    315       FROM_HERE,
    316       base::Bind(&ContentHashFetcherJob::MaybeCreateHashes, this));
    317 }
    318 
    319 void ContentHashFetcherJob::MaybeCreateHashes() {
    320   if (IsCancelled())
    321     return;
    322   base::FilePath hashes_file =
    323       file_util::GetComputedHashesPath(extension_path_);
    324 
    325   if (!force_ && base::PathExists(hashes_file)) {
    326     success_ = true;
    327   } else {
    328     if (force_)
    329       base::DeleteFile(hashes_file, false /* recursive */);
    330     success_ = CreateHashes(hashes_file);
    331   }
    332 
    333   content::BrowserThread::PostTask(
    334       creation_thread_,
    335       FROM_HERE,
    336       base::Bind(&ContentHashFetcherJob::DispatchCallback, this));
    337 }
    338 
    339 bool ContentHashFetcherJob::CreateHashes(const base::FilePath& hashes_file) {
    340   base::ElapsedTimer timer;
    341   if (IsCancelled())
    342     return false;
    343   // Make sure the directory exists.
    344   if (!base::CreateDirectoryAndGetError(hashes_file.DirName(), NULL))
    345     return false;
    346 
    347   if (!verified_contents_.get()) {
    348     base::FilePath verified_contents_path =
    349         file_util::GetVerifiedContentsPath(extension_path_);
    350     verified_contents_.reset(new VerifiedContents(key_.data, key_.size));
    351     if (!verified_contents_->InitFrom(verified_contents_path, false))
    352       return false;
    353     verified_contents_.reset();
    354   }
    355 
    356   base::FileEnumerator enumerator(extension_path_,
    357                                   true, /* recursive */
    358                                   base::FileEnumerator::FILES);
    359   // First discover all the file paths and put them in a sorted set.
    360   SortedFilePathSet paths;
    361   for (;;) {
    362     if (IsCancelled())
    363       return false;
    364 
    365     base::FilePath full_path = enumerator.Next();
    366     if (full_path.empty())
    367       break;
    368     paths.insert(full_path);
    369   }
    370 
    371   // Now iterate over all the paths in sorted order and compute the block hashes
    372   // for each one.
    373   ComputedHashes::Writer writer;
    374   for (SortedFilePathSet::iterator i = paths.begin(); i != paths.end(); ++i) {
    375     if (IsCancelled())
    376       return false;
    377     const base::FilePath& full_path = *i;
    378     base::FilePath relative_path;
    379     extension_path_.AppendRelativePath(full_path, &relative_path);
    380     relative_path = relative_path.NormalizePathSeparatorsTo('/');
    381 
    382     if (!verified_contents_->HasTreeHashRoot(relative_path))
    383       continue;
    384 
    385     std::string contents;
    386     if (!base::ReadFileToString(full_path, &contents)) {
    387       LOG(ERROR) << "Could not read " << full_path.MaybeAsASCII();
    388       continue;
    389     }
    390 
    391     // Iterate through taking the hash of each block of size (block_size_) of
    392     // the file.
    393     std::vector<std::string> hashes;
    394     ComputedHashes::ComputeHashesForContent(contents, block_size_, &hashes);
    395     std::string root =
    396         ComputeTreeHashRoot(hashes, block_size_ / crypto::kSHA256Length);
    397     if (!verified_contents_->TreeHashRootEquals(relative_path, root)) {
    398       VLOG(1) << "content mismatch for " << relative_path.AsUTF8Unsafe();
    399       hash_mismatch_paths_.insert(relative_path);
    400       continue;
    401     }
    402 
    403     writer.AddHashes(relative_path, block_size_, hashes);
    404   }
    405   bool result = writer.WriteToFile(hashes_file);
    406   UMA_HISTOGRAM_TIMES("ExtensionContentHashFetcher.CreateHashesTime",
    407                       timer.Elapsed());
    408   return result;
    409 }
    410 
    411 void ContentHashFetcherJob::DispatchCallback() {
    412   {
    413     base::AutoLock autolock(cancelled_lock_);
    414     if (cancelled_)
    415       return;
    416   }
    417   callback_.Run(this);
    418 }
    419 
    420 // ----
    421 
    422 ContentHashFetcher::ContentHashFetcher(content::BrowserContext* context,
    423                                        ContentVerifierDelegate* delegate,
    424                                        const FetchCallback& callback)
    425     : context_(context),
    426       delegate_(delegate),
    427       fetch_callback_(callback),
    428       weak_ptr_factory_(this) {
    429 }
    430 
    431 ContentHashFetcher::~ContentHashFetcher() {
    432   for (JobMap::iterator i = jobs_.begin(); i != jobs_.end(); ++i) {
    433     i->second->Cancel();
    434   }
    435 }
    436 
    437 void ContentHashFetcher::DoFetch(const Extension* extension, bool force) {
    438   DCHECK(extension);
    439 
    440   IdAndVersion key(extension->id(), extension->version()->GetString());
    441   JobMap::iterator found = jobs_.find(key);
    442   if (found != jobs_.end()) {
    443     if (!force || found->second->force()) {
    444       // Just let the existing job keep running.
    445       return;
    446     } else {
    447       // Kill the existing non-force job, so we can start a new one below.
    448       found->second->Cancel();
    449       jobs_.erase(found);
    450     }
    451   }
    452 
    453   // TODO(asargent) - we should do something here to remember recent attempts
    454   // to fetch signatures by extension id, and use exponential backoff to avoid
    455   // hammering the server when we aren't successful in getting them.
    456   // crbug.com/373397
    457 
    458   DCHECK(extension->version());
    459   GURL url =
    460       delegate_->GetSignatureFetchUrl(extension->id(), *extension->version());
    461   ContentHashFetcherJob* job =
    462       new ContentHashFetcherJob(context_->GetRequestContext(),
    463                                 delegate_->PublicKey(),
    464                                 extension->id(),
    465                                 extension->path(),
    466                                 url,
    467                                 force,
    468                                 base::Bind(&ContentHashFetcher::JobFinished,
    469                                            weak_ptr_factory_.GetWeakPtr()));
    470   jobs_.insert(std::make_pair(key, job));
    471   job->Start();
    472 }
    473 
    474 void ContentHashFetcher::ExtensionLoaded(const Extension* extension) {
    475   CHECK(extension);
    476   DoFetch(extension, false);
    477 }
    478 
    479 void ContentHashFetcher::ExtensionUnloaded(const Extension* extension) {
    480   CHECK(extension);
    481   IdAndVersion key(extension->id(), extension->version()->GetString());
    482   JobMap::iterator found = jobs_.find(key);
    483   if (found != jobs_.end()) {
    484     found->second->Cancel();
    485     jobs_.erase(found);
    486   }
    487 }
    488 
    489 void ContentHashFetcher::JobFinished(ContentHashFetcherJob* job) {
    490   if (!job->IsCancelled()) {
    491     fetch_callback_.Run(job->extension_id(),
    492                         job->success(),
    493                         job->force(),
    494                         job->hash_mismatch_paths());
    495   }
    496 
    497   for (JobMap::iterator i = jobs_.begin(); i != jobs_.end(); ++i) {
    498     if (i->second.get() == job) {
    499       jobs_.erase(i);
    500       break;
    501     }
    502   }
    503 }
    504 
    505 }  // namespace extensions
    506