1 // Copyright 2014 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "extensions/browser/content_hash_fetcher.h" 6 7 #include <algorithm> 8 9 #include "base/base64.h" 10 #include "base/file_util.h" 11 #include "base/files/file_enumerator.h" 12 #include "base/json/json_reader.h" 13 #include "base/memory/ref_counted.h" 14 #include "base/stl_util.h" 15 #include "base/synchronization/lock.h" 16 #include "base/task_runner_util.h" 17 #include "base/version.h" 18 #include "content/public/browser/browser_context.h" 19 #include "content/public/browser/browser_thread.h" 20 #include "crypto/secure_hash.h" 21 #include "crypto/sha2.h" 22 #include "extensions/browser/computed_hashes.h" 23 #include "extensions/browser/content_hash_tree.h" 24 #include "extensions/browser/extension_registry.h" 25 #include "extensions/browser/verified_contents.h" 26 #include "extensions/common/constants.h" 27 #include "extensions/common/extension.h" 28 #include "extensions/common/file_util.h" 29 #include "net/base/load_flags.h" 30 #include "net/url_request/url_fetcher.h" 31 #include "net/url_request/url_fetcher_delegate.h" 32 #include "net/url_request/url_request_status.h" 33 34 namespace { 35 36 typedef std::set<base::FilePath> SortedFilePathSet; 37 38 } // namespace 39 40 namespace extensions { 41 42 // This class takes care of doing the disk and network I/O work to ensure we 43 // have both verified_contents.json files from the webstore and 44 // computed_hashes.json files computed over the files in an extension's 45 // directory. 46 class ContentHashFetcherJob 47 : public base::RefCountedThreadSafe<ContentHashFetcherJob>, 48 public net::URLFetcherDelegate { 49 public: 50 typedef base::Callback<void(ContentHashFetcherJob*)> CompletionCallback; 51 ContentHashFetcherJob(net::URLRequestContextGetter* request_context, 52 ContentVerifierKey key, 53 const std::string& extension_id, 54 const base::FilePath& extension_path, 55 const GURL& fetch_url, 56 bool force, 57 const CompletionCallback& callback); 58 59 void Start(); 60 61 // Cancels this job, which will attempt to stop I/O operations sooner than 62 // just waiting for the entire job to complete. Safe to call from any thread. 63 void Cancel(); 64 65 // Checks whether this job has been cancelled. Safe to call from any thread. 66 bool IsCancelled(); 67 68 // Returns whether this job was successful (we have both verified contents 69 // and computed hashes). Even if the job was a success, there might have been 70 // files that were found to have contents not matching expectations; these 71 // are available by calling hash_mismatch_paths(). 72 bool success() { return success_; } 73 74 bool force() { return force_; } 75 76 const std::string& extension_id() { return extension_id_; } 77 78 // Returns the set of paths that had a hash mismatch. 79 const std::set<base::FilePath>& hash_mismatch_paths() { 80 return hash_mismatch_paths_; 81 } 82 83 private: 84 friend class base::RefCountedThreadSafe<ContentHashFetcherJob>; 85 virtual ~ContentHashFetcherJob(); 86 87 // Callback for when we're done doing file I/O to see if we already have 88 // a verified contents file. If we don't, this will kick off a network 89 // request to get one. 90 void DoneCheckingForVerifiedContents(bool found); 91 92 // URLFetcherDelegate interface 93 virtual void OnURLFetchComplete(const net::URLFetcher* source) OVERRIDE; 94 95 // Callback for when we're done ensuring we have verified contents, and are 96 // ready to move on to MaybeCreateHashes. 97 void DoneFetchingVerifiedContents(bool success); 98 99 // Callback for the job to write the verified contents to the filesystem. 100 void OnVerifiedContentsWritten(size_t expected_size, int write_result); 101 102 // The verified contents file from the webstore only contains the treehash 103 // root hash, but for performance we want to cache the individual block level 104 // hashes. This function will create that cache with block-level hashes for 105 // each file in the extension if needed (the treehash root hash for each of 106 // these should equal what is in the verified contents file from the 107 // webstore). 108 void MaybeCreateHashes(); 109 110 // Computes hashes for all files in |extension_path_|, and uses a 111 // ComputedHashes::Writer to write that information into 112 // |hashes_file|. Returns true on success. 113 bool CreateHashes(const base::FilePath& hashes_file); 114 115 // Will call the callback, if we haven't been cancelled. 116 void DispatchCallback(); 117 118 net::URLRequestContextGetter* request_context_; 119 std::string extension_id_; 120 base::FilePath extension_path_; 121 122 // The url we'll need to use to fetch a verified_contents.json file. 123 GURL fetch_url_; 124 125 bool force_; 126 127 CompletionCallback callback_; 128 content::BrowserThread::ID creation_thread_; 129 130 // Used for fetching content signatures. 131 scoped_ptr<net::URLFetcher> url_fetcher_; 132 133 // The key used to validate verified_contents.json. 134 ContentVerifierKey key_; 135 136 // Whether this job succeeded. 137 bool success_; 138 139 // Paths that were found to have a mismatching hash. 140 std::set<base::FilePath> hash_mismatch_paths_; 141 142 // The block size to use for hashing. 143 int block_size_; 144 145 // Note: this may be accessed from multiple threads, so all access should 146 // be protected by |cancelled_lock_|. 147 bool cancelled_; 148 149 // A lock for synchronizing access to |cancelled_|. 150 base::Lock cancelled_lock_; 151 152 DISALLOW_COPY_AND_ASSIGN(ContentHashFetcherJob); 153 }; 154 155 ContentHashFetcherJob::ContentHashFetcherJob( 156 net::URLRequestContextGetter* request_context, 157 ContentVerifierKey key, 158 const std::string& extension_id, 159 const base::FilePath& extension_path, 160 const GURL& fetch_url, 161 bool force, 162 const CompletionCallback& callback) 163 : request_context_(request_context), 164 extension_id_(extension_id), 165 extension_path_(extension_path), 166 fetch_url_(fetch_url), 167 force_(force), 168 callback_(callback), 169 key_(key), 170 success_(false), 171 // TODO(asargent) - use the value from verified_contents.json for each 172 // file, instead of using a constant. 173 block_size_(4096), 174 cancelled_(false) { 175 bool got_id = 176 content::BrowserThread::GetCurrentThreadIdentifier(&creation_thread_); 177 DCHECK(got_id); 178 } 179 180 void ContentHashFetcherJob::Start() { 181 base::FilePath verified_contents_path = 182 file_util::GetVerifiedContentsPath(extension_path_); 183 base::PostTaskAndReplyWithResult( 184 content::BrowserThread::GetBlockingPool(), 185 FROM_HERE, 186 base::Bind(&base::PathExists, verified_contents_path), 187 base::Bind(&ContentHashFetcherJob::DoneCheckingForVerifiedContents, 188 this)); 189 } 190 191 void ContentHashFetcherJob::Cancel() { 192 base::AutoLock autolock(cancelled_lock_); 193 cancelled_ = true; 194 } 195 196 bool ContentHashFetcherJob::IsCancelled() { 197 base::AutoLock autolock(cancelled_lock_); 198 bool result = cancelled_; 199 return result; 200 } 201 202 ContentHashFetcherJob::~ContentHashFetcherJob() { 203 } 204 205 void ContentHashFetcherJob::DoneCheckingForVerifiedContents(bool found) { 206 if (IsCancelled()) 207 return; 208 if (found) { 209 VLOG(1) << "Found verified contents for " << extension_id_; 210 DoneFetchingVerifiedContents(true); 211 } else { 212 VLOG(1) << "Missing verified contents for " << extension_id_ 213 << ", fetching..."; 214 url_fetcher_.reset( 215 net::URLFetcher::Create(fetch_url_, net::URLFetcher::GET, this)); 216 url_fetcher_->SetRequestContext(request_context_); 217 url_fetcher_->SetLoadFlags(net::LOAD_DO_NOT_SEND_COOKIES | 218 net::LOAD_DO_NOT_SAVE_COOKIES | 219 net::LOAD_DISABLE_CACHE); 220 url_fetcher_->SetAutomaticallyRetryOnNetworkChanges(3); 221 url_fetcher_->Start(); 222 } 223 } 224 225 // Helper function to let us pass ownership of a string via base::Bind with the 226 // contents to be written into a file. Also ensures that the directory for 227 // |path| exists, creating it if needed. 228 static int WriteFileHelper(const base::FilePath& path, 229 scoped_ptr<std::string> content) { 230 base::FilePath dir = path.DirName(); 231 return (base::CreateDirectoryAndGetError(dir, NULL) && 232 base::WriteFile(path, content->data(), content->size())); 233 } 234 235 void ContentHashFetcherJob::OnURLFetchComplete(const net::URLFetcher* source) { 236 VLOG(1) << "URLFetchComplete for " << extension_id_ 237 << " is_success:" << url_fetcher_->GetStatus().is_success() << " " 238 << fetch_url_.possibly_invalid_spec(); 239 if (IsCancelled()) 240 return; 241 scoped_ptr<std::string> response(new std::string); 242 if (!url_fetcher_->GetStatus().is_success() || 243 !url_fetcher_->GetResponseAsString(response.get())) { 244 DoneFetchingVerifiedContents(false); 245 return; 246 } 247 248 // Parse the response to make sure it is valid json (on staging sometimes it 249 // can be a login redirect html, xml file, etc. if you aren't logged in with 250 // the right cookies). TODO(asargent) - It would be a nice enhancement to 251 // move to parsing this in a sandboxed helper (crbug.com/372878). 252 scoped_ptr<base::Value> parsed(base::JSONReader::Read(*response)); 253 if (parsed) { 254 VLOG(1) << "JSON parsed ok for " << extension_id_; 255 256 parsed.reset(); // no longer needed 257 base::FilePath destination = 258 file_util::GetVerifiedContentsPath(extension_path_); 259 size_t size = response->size(); 260 base::PostTaskAndReplyWithResult( 261 content::BrowserThread::GetBlockingPool(), 262 FROM_HERE, 263 base::Bind(&WriteFileHelper, destination, base::Passed(&response)), 264 base::Bind( 265 &ContentHashFetcherJob::OnVerifiedContentsWritten, this, size)); 266 } else { 267 DoneFetchingVerifiedContents(false); 268 } 269 } 270 271 void ContentHashFetcherJob::OnVerifiedContentsWritten(size_t expected_size, 272 int write_result) { 273 bool success = 274 (write_result >= 0 && static_cast<size_t>(write_result) == expected_size); 275 DoneFetchingVerifiedContents(success); 276 } 277 278 void ContentHashFetcherJob::DoneFetchingVerifiedContents(bool success) { 279 if (IsCancelled()) 280 return; 281 282 if (!success) { 283 DispatchCallback(); 284 return; 285 } 286 287 content::BrowserThread::PostBlockingPoolSequencedTask( 288 "ContentHashFetcher", 289 FROM_HERE, 290 base::Bind(&ContentHashFetcherJob::MaybeCreateHashes, this)); 291 } 292 293 void ContentHashFetcherJob::MaybeCreateHashes() { 294 if (IsCancelled()) 295 return; 296 base::FilePath hashes_file = 297 file_util::GetComputedHashesPath(extension_path_); 298 299 if (!force_ && base::PathExists(hashes_file)) { 300 success_ = true; 301 } else { 302 if (force_) 303 base::DeleteFile(hashes_file, false /* recursive */); 304 success_ = CreateHashes(hashes_file); 305 } 306 307 content::BrowserThread::PostTask( 308 creation_thread_, 309 FROM_HERE, 310 base::Bind(&ContentHashFetcherJob::DispatchCallback, this)); 311 } 312 313 bool ContentHashFetcherJob::CreateHashes(const base::FilePath& hashes_file) { 314 if (IsCancelled()) 315 return false; 316 // Make sure the directory exists. 317 if (!base::CreateDirectoryAndGetError(hashes_file.DirName(), NULL)) 318 return false; 319 320 base::FilePath verified_contents_path = 321 file_util::GetVerifiedContentsPath(extension_path_); 322 VerifiedContents verified_contents(key_.data, key_.size); 323 if (!verified_contents.InitFrom(verified_contents_path, false)) 324 return false; 325 326 base::FileEnumerator enumerator(extension_path_, 327 true, /* recursive */ 328 base::FileEnumerator::FILES); 329 // First discover all the file paths and put them in a sorted set. 330 SortedFilePathSet paths; 331 for (;;) { 332 if (IsCancelled()) 333 return false; 334 335 base::FilePath full_path = enumerator.Next(); 336 if (full_path.empty()) 337 break; 338 paths.insert(full_path); 339 } 340 341 // Now iterate over all the paths in sorted order and compute the block hashes 342 // for each one. 343 ComputedHashes::Writer writer; 344 for (SortedFilePathSet::iterator i = paths.begin(); i != paths.end(); ++i) { 345 if (IsCancelled()) 346 return false; 347 const base::FilePath& full_path = *i; 348 base::FilePath relative_path; 349 extension_path_.AppendRelativePath(full_path, &relative_path); 350 351 const std::string* expected_root = 352 verified_contents.GetTreeHashRoot(relative_path); 353 if (!expected_root) 354 continue; 355 356 std::string contents; 357 if (!base::ReadFileToString(full_path, &contents)) { 358 LOG(ERROR) << "Could not read " << full_path.MaybeAsASCII(); 359 continue; 360 } 361 362 // Iterate through taking the hash of each block of size (block_size_) of 363 // the file. 364 std::vector<std::string> hashes; 365 size_t offset = 0; 366 while (offset < contents.size()) { 367 if (IsCancelled()) 368 return false; 369 const char* block_start = contents.data() + offset; 370 size_t bytes_to_read = 371 std::min(contents.size() - offset, static_cast<size_t>(block_size_)); 372 DCHECK(bytes_to_read > 0); 373 scoped_ptr<crypto::SecureHash> hash( 374 crypto::SecureHash::Create(crypto::SecureHash::SHA256)); 375 hash->Update(block_start, bytes_to_read); 376 377 hashes.push_back(std::string()); 378 std::string* buffer = &hashes.back(); 379 buffer->resize(crypto::kSHA256Length); 380 hash->Finish(string_as_array(buffer), buffer->size()); 381 382 // Get ready for next iteration. 383 offset += bytes_to_read; 384 } 385 std::string root = 386 ComputeTreeHashRoot(hashes, block_size_ / crypto::kSHA256Length); 387 if (expected_root && *expected_root != root) { 388 VLOG(1) << "content mismatch for " << relative_path.AsUTF8Unsafe(); 389 hash_mismatch_paths_.insert(relative_path); 390 continue; 391 } 392 393 writer.AddHashes(relative_path, block_size_, hashes); 394 } 395 return writer.WriteToFile(hashes_file); 396 } 397 398 void ContentHashFetcherJob::DispatchCallback() { 399 { 400 base::AutoLock autolock(cancelled_lock_); 401 if (cancelled_) 402 return; 403 } 404 callback_.Run(this); 405 } 406 407 // ---- 408 409 ContentHashFetcher::ContentHashFetcher(content::BrowserContext* context, 410 ContentVerifierDelegate* delegate, 411 const FetchCallback& callback) 412 : context_(context), 413 delegate_(delegate), 414 fetch_callback_(callback), 415 observer_(this), 416 weak_ptr_factory_(this) { 417 } 418 419 ContentHashFetcher::~ContentHashFetcher() { 420 for (JobMap::iterator i = jobs_.begin(); i != jobs_.end(); ++i) { 421 i->second->Cancel(); 422 } 423 } 424 425 void ContentHashFetcher::Start() { 426 ExtensionRegistry* registry = ExtensionRegistry::Get(context_); 427 observer_.Add(registry); 428 } 429 430 void ContentHashFetcher::DoFetch(const Extension* extension, bool force) { 431 if (!extension || !delegate_->ShouldBeVerified(*extension)) 432 return; 433 434 IdAndVersion key(extension->id(), extension->version()->GetString()); 435 JobMap::iterator found = jobs_.find(key); 436 if (found != jobs_.end()) { 437 if (!force || found->second->force()) { 438 // Just let the existing job keep running. 439 return; 440 } else { 441 // Kill the existing non-force job, so we can start a new one below. 442 found->second->Cancel(); 443 jobs_.erase(found); 444 } 445 } 446 447 // TODO(asargent) - we should do something here to remember recent attempts 448 // to fetch signatures by extension id, and use exponential backoff to avoid 449 // hammering the server when we aren't successful in getting them. 450 // crbug.com/373397 451 452 DCHECK(extension->version()); 453 GURL url = 454 delegate_->GetSignatureFetchUrl(extension->id(), *extension->version()); 455 ContentHashFetcherJob* job = 456 new ContentHashFetcherJob(context_->GetRequestContext(), 457 delegate_->PublicKey(), 458 extension->id(), 459 extension->path(), 460 url, 461 force, 462 base::Bind(&ContentHashFetcher::JobFinished, 463 weak_ptr_factory_.GetWeakPtr())); 464 jobs_.insert(std::make_pair(key, job)); 465 job->Start(); 466 } 467 468 void ContentHashFetcher::OnExtensionLoaded( 469 content::BrowserContext* browser_context, 470 const Extension* extension) { 471 CHECK(extension); 472 DoFetch(extension, false); 473 } 474 475 void ContentHashFetcher::OnExtensionUnloaded( 476 content::BrowserContext* browser_context, 477 const Extension* extension, 478 UnloadedExtensionInfo::Reason reason) { 479 CHECK(extension); 480 IdAndVersion key(extension->id(), extension->version()->GetString()); 481 JobMap::iterator found = jobs_.find(key); 482 if (found != jobs_.end()) { 483 found->second->Cancel(); 484 jobs_.erase(found); 485 } 486 } 487 488 void ContentHashFetcher::JobFinished(ContentHashFetcherJob* job) { 489 if (!job->IsCancelled()) { 490 fetch_callback_.Run(job->extension_id(), 491 job->success(), 492 job->force(), 493 job->hash_mismatch_paths()); 494 } 495 496 for (JobMap::iterator i = jobs_.begin(); i != jobs_.end(); ++i) { 497 if (i->second.get() == job) { 498 jobs_.erase(i); 499 break; 500 } 501 } 502 } 503 504 } // namespace extensions 505