1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "content/browser/storage_partition_impl_map.h" 6 7 #include "base/bind.h" 8 #include "base/callback.h" 9 #include "base/file_util.h" 10 #include "base/files/file_enumerator.h" 11 #include "base/files/file_path.h" 12 #include "base/stl_util.h" 13 #include "base/strings/string_number_conversions.h" 14 #include "base/strings/string_util.h" 15 #include "base/strings/stringprintf.h" 16 #include "base/threading/sequenced_worker_pool.h" 17 #include "content/browser/appcache/chrome_appcache_service.h" 18 #include "content/browser/fileapi/browser_file_system_helper.h" 19 #include "content/browser/fileapi/chrome_blob_storage_context.h" 20 #include "content/browser/loader/resource_request_info_impl.h" 21 #include "content/browser/resource_context_impl.h" 22 #include "content/browser/storage_partition_impl.h" 23 #include "content/browser/streams/stream.h" 24 #include "content/browser/streams/stream_context.h" 25 #include "content/browser/streams/stream_registry.h" 26 #include "content/browser/streams/stream_url_request_job.h" 27 #include "content/browser/webui/url_data_manager_backend.h" 28 #include "content/public/browser/browser_context.h" 29 #include "content/public/browser/browser_thread.h" 30 #include "content/public/browser/content_browser_client.h" 31 #include "content/public/browser/storage_partition.h" 32 #include "content/public/common/content_constants.h" 33 #include "content/public/common/url_constants.h" 34 #include "crypto/sha2.h" 35 #include "net/url_request/url_request_context.h" 36 #include "net/url_request/url_request_context_getter.h" 37 #include "webkit/browser/blob/blob_storage_context.h" 38 #include "webkit/browser/blob/blob_url_request_job_factory.h" 39 #include "webkit/browser/fileapi/file_system_url_request_job_factory.h" 40 #include "webkit/common/blob/blob_data.h" 41 42 using appcache::AppCacheService; 43 using fileapi::FileSystemContext; 44 using webkit_blob::BlobStorageContext; 45 46 namespace content { 47 48 namespace { 49 50 // A derivative that knows about Streams too. 51 class BlobProtocolHandler : public net::URLRequestJobFactory::ProtocolHandler { 52 public: 53 BlobProtocolHandler(ChromeBlobStorageContext* blob_storage_context, 54 StreamContext* stream_context, 55 fileapi::FileSystemContext* file_system_context) 56 : blob_storage_context_(blob_storage_context), 57 stream_context_(stream_context), 58 file_system_context_(file_system_context) { 59 } 60 61 virtual ~BlobProtocolHandler() { 62 } 63 64 virtual net::URLRequestJob* MaybeCreateJob( 65 net::URLRequest* request, 66 net::NetworkDelegate* network_delegate) const OVERRIDE { 67 scoped_refptr<Stream> stream = 68 stream_context_->registry()->GetStream(request->url()); 69 if (stream.get()) 70 return new StreamURLRequestJob(request, network_delegate, stream); 71 72 if (!blob_protocol_handler_) { 73 // Construction is deferred because 'this' is constructed on 74 // the main thread but we want blob_protocol_handler_ constructed 75 // on the IO thread. 76 blob_protocol_handler_.reset( 77 new webkit_blob::BlobProtocolHandler( 78 blob_storage_context_->context(), 79 file_system_context_, 80 BrowserThread::GetMessageLoopProxyForThread( 81 BrowserThread::FILE).get())); 82 } 83 return blob_protocol_handler_->MaybeCreateJob(request, network_delegate); 84 } 85 86 private: 87 const scoped_refptr<ChromeBlobStorageContext> blob_storage_context_; 88 const scoped_refptr<StreamContext> stream_context_; 89 const scoped_refptr<fileapi::FileSystemContext> file_system_context_; 90 mutable scoped_ptr<webkit_blob::BlobProtocolHandler> blob_protocol_handler_; 91 DISALLOW_COPY_AND_ASSIGN(BlobProtocolHandler); 92 }; 93 94 // These constants are used to create the directory structure under the profile 95 // where renderers with a non-default storage partition keep their persistent 96 // state. This will contain a set of directories that partially mirror the 97 // directory structure of BrowserContext::GetPath(). 98 // 99 // The kStoragePartitionDirname contains an extensions directory which is 100 // further partitioned by extension id, followed by another level of directories 101 // for the "default" extension storage partition and one directory for each 102 // persistent partition used by a webview tag. Example: 103 // 104 // Storage/ext/ABCDEF/def 105 // Storage/ext/ABCDEF/hash(partition name) 106 // 107 // The code in GetStoragePartitionPath() constructs these path names. 108 // 109 // TODO(nasko): Move extension related path code out of content. 110 const base::FilePath::CharType kStoragePartitionDirname[] = 111 FILE_PATH_LITERAL("Storage"); 112 const base::FilePath::CharType kExtensionsDirname[] = 113 FILE_PATH_LITERAL("ext"); 114 const base::FilePath::CharType kDefaultPartitionDirname[] = 115 FILE_PATH_LITERAL("def"); 116 const base::FilePath::CharType kTrashDirname[] = 117 FILE_PATH_LITERAL("trash"); 118 119 // Because partition names are user specified, they can be arbitrarily long 120 // which makes them unsuitable for paths names. We use a truncation of a 121 // SHA256 hash to perform a deterministic shortening of the string. The 122 // kPartitionNameHashBytes constant controls the length of the truncation. 123 // We use 6 bytes, which gives us 99.999% reliability against collisions over 124 // 1 million partition domains. 125 // 126 // Analysis: 127 // We assume that all partition names within one partition domain are 128 // controlled by the the same entity. Thus there is no chance for adverserial 129 // attack and all we care about is accidental collision. To get 5 9s over 130 // 1 million domains, we need the probability of a collision in any one domain 131 // to be 132 // 133 // p < nroot(1000000, .99999) ~= 10^-11 134 // 135 // We use the following birthday attack approximation to calculate the max 136 // number of unique names for this probability: 137 // 138 // n(p,H) = sqrt(2*H * ln(1/(1-p))) 139 // 140 // For a 6-byte hash, H = 2^(6*8). n(10^-11, H) ~= 75 141 // 142 // An average partition domain is likely to have less than 10 unique 143 // partition names which is far lower than 75. 144 // 145 // Note, that for 4 9s of reliability, the limit is 237 partition names per 146 // partition domain. 147 const int kPartitionNameHashBytes = 6; 148 149 // Needed for selecting all files in ObliterateOneDirectory() below. 150 #if defined(OS_POSIX) 151 const int kAllFileTypes = base::FileEnumerator::FILES | 152 base::FileEnumerator::DIRECTORIES | 153 base::FileEnumerator::SHOW_SYM_LINKS; 154 #else 155 const int kAllFileTypes = base::FileEnumerator::FILES | 156 base::FileEnumerator::DIRECTORIES; 157 #endif 158 159 base::FilePath GetStoragePartitionDomainPath( 160 const std::string& partition_domain) { 161 CHECK(IsStringUTF8(partition_domain)); 162 163 return base::FilePath(kStoragePartitionDirname).Append(kExtensionsDirname) 164 .Append(base::FilePath::FromUTF8Unsafe(partition_domain)); 165 } 166 167 // Helper function for doing a depth-first deletion of the data on disk. 168 // Examines paths directly in |current_dir| (no recursion) and tries to 169 // delete from disk anything that is in, or isn't a parent of something in 170 // |paths_to_keep|. Paths that need further expansion are added to 171 // |paths_to_consider|. 172 void ObliterateOneDirectory(const base::FilePath& current_dir, 173 const std::vector<base::FilePath>& paths_to_keep, 174 std::vector<base::FilePath>* paths_to_consider) { 175 CHECK(current_dir.IsAbsolute()); 176 177 base::FileEnumerator enumerator(current_dir, false, kAllFileTypes); 178 for (base::FilePath to_delete = enumerator.Next(); !to_delete.empty(); 179 to_delete = enumerator.Next()) { 180 // Enum tracking which of the 3 possible actions to take for |to_delete|. 181 enum { kSkip, kEnqueue, kDelete } action = kDelete; 182 183 for (std::vector<base::FilePath>::const_iterator to_keep = 184 paths_to_keep.begin(); 185 to_keep != paths_to_keep.end(); 186 ++to_keep) { 187 if (to_delete == *to_keep) { 188 action = kSkip; 189 break; 190 } else if (to_delete.IsParent(*to_keep)) { 191 // |to_delete| contains a path to keep. Add to stack for further 192 // processing. 193 action = kEnqueue; 194 break; 195 } 196 } 197 198 switch (action) { 199 case kDelete: 200 base::DeleteFile(to_delete, true); 201 break; 202 203 case kEnqueue: 204 paths_to_consider->push_back(to_delete); 205 break; 206 207 case kSkip: 208 break; 209 } 210 } 211 } 212 213 // Synchronously attempts to delete |unnormalized_root|, preserving only 214 // entries in |paths_to_keep|. If there are no entries in |paths_to_keep| on 215 // disk, then it completely removes |unnormalized_root|. All paths must be 216 // absolute paths. 217 void BlockingObliteratePath( 218 const base::FilePath& unnormalized_browser_context_root, 219 const base::FilePath& unnormalized_root, 220 const std::vector<base::FilePath>& paths_to_keep, 221 const scoped_refptr<base::TaskRunner>& closure_runner, 222 const base::Closure& on_gc_required) { 223 // Early exit required because MakeAbsoluteFilePath() will fail on POSIX 224 // if |unnormalized_root| does not exist. This is safe because there is 225 // nothing to do in this situation anwyays. 226 if (!base::PathExists(unnormalized_root)) { 227 return; 228 } 229 230 // Never try to obliterate things outside of the browser context root or the 231 // browser context root itself. Die hard. 232 base::FilePath root = base::MakeAbsoluteFilePath(unnormalized_root); 233 base::FilePath browser_context_root = 234 base::MakeAbsoluteFilePath(unnormalized_browser_context_root); 235 CHECK(!root.empty()); 236 CHECK(!browser_context_root.empty()); 237 CHECK(browser_context_root.IsParent(root) && browser_context_root != root); 238 239 // Reduce |paths_to_keep| set to those under the root and actually on disk. 240 std::vector<base::FilePath> valid_paths_to_keep; 241 for (std::vector<base::FilePath>::const_iterator it = paths_to_keep.begin(); 242 it != paths_to_keep.end(); 243 ++it) { 244 if (root.IsParent(*it) && base::PathExists(*it)) 245 valid_paths_to_keep.push_back(*it); 246 } 247 248 // If none of the |paths_to_keep| are valid anymore then we just whack the 249 // root and be done with it. Otherwise, signal garbage collection and do 250 // a best-effort delete of the on-disk structures. 251 if (valid_paths_to_keep.empty()) { 252 base::DeleteFile(root, true); 253 return; 254 } 255 closure_runner->PostTask(FROM_HERE, on_gc_required); 256 257 // Otherwise, start at the root and delete everything that is not in 258 // |valid_paths_to_keep|. 259 std::vector<base::FilePath> paths_to_consider; 260 paths_to_consider.push_back(root); 261 while(!paths_to_consider.empty()) { 262 base::FilePath path = paths_to_consider.back(); 263 paths_to_consider.pop_back(); 264 ObliterateOneDirectory(path, valid_paths_to_keep, &paths_to_consider); 265 } 266 } 267 268 // Deletes all entries inside the |storage_root| that are not in the 269 // |active_paths|. Deletion is done in 2 steps: 270 // 271 // (1) Moving all garbage collected paths into a trash directory. 272 // (2) Asynchronously deleting the trash directory. 273 // 274 // The deletion is asynchronous because after (1) completes, calling code can 275 // safely continue to use the paths that had just been garbage collected 276 // without fear of race conditions. 277 // 278 // This code also ignores failed moves rather than attempting a smarter retry. 279 // Moves shouldn't fail here unless there is some out-of-band error (eg., 280 // FS corruption). Retry logic is dangerous in the general case because 281 // there is not necessarily a guaranteed case where the logic may succeed. 282 // 283 // This function is still named BlockingGarbageCollect() because it does 284 // execute a few filesystem operations synchronously. 285 void BlockingGarbageCollect( 286 const base::FilePath& storage_root, 287 const scoped_refptr<base::TaskRunner>& file_access_runner, 288 scoped_ptr<base::hash_set<base::FilePath> > active_paths) { 289 CHECK(storage_root.IsAbsolute()); 290 291 base::FileEnumerator enumerator(storage_root, false, kAllFileTypes); 292 base::FilePath trash_directory; 293 if (!base::CreateTemporaryDirInDir(storage_root, kTrashDirname, 294 &trash_directory)) { 295 // Unable to continue without creating the trash directory so give up. 296 return; 297 } 298 for (base::FilePath path = enumerator.Next(); !path.empty(); 299 path = enumerator.Next()) { 300 if (active_paths->find(path) == active_paths->end() && 301 path != trash_directory) { 302 // Since |trash_directory| is unique for each run of this function there 303 // can be no colllisions on the move. 304 base::Move(path, trash_directory.Append(path.BaseName())); 305 } 306 } 307 308 file_access_runner->PostTask( 309 FROM_HERE, 310 base::Bind(base::IgnoreResult(&base::DeleteFile), trash_directory, true)); 311 } 312 313 } // namespace 314 315 // static 316 base::FilePath StoragePartitionImplMap::GetStoragePartitionPath( 317 const std::string& partition_domain, 318 const std::string& partition_name) { 319 if (partition_domain.empty()) 320 return base::FilePath(); 321 322 base::FilePath path = GetStoragePartitionDomainPath(partition_domain); 323 324 // TODO(ajwong): Mangle in-memory into this somehow, either by putting 325 // it into the partition_name, or by manually adding another path component 326 // here. Otherwise, it's possible to have an in-memory StoragePartition and 327 // a persistent one that return the same FilePath for GetPath(). 328 if (!partition_name.empty()) { 329 // For analysis of why we can ignore collisions, see the comment above 330 // kPartitionNameHashBytes. 331 char buffer[kPartitionNameHashBytes]; 332 crypto::SHA256HashString(partition_name, &buffer[0], 333 sizeof(buffer)); 334 return path.AppendASCII(base::HexEncode(buffer, sizeof(buffer))); 335 } 336 337 return path.Append(kDefaultPartitionDirname); 338 } 339 340 StoragePartitionImplMap::StoragePartitionImplMap( 341 BrowserContext* browser_context) 342 : browser_context_(browser_context), 343 resource_context_initialized_(false) { 344 // Doing here instead of initializer list cause it's just too ugly to read. 345 base::SequencedWorkerPool* blocking_pool = BrowserThread::GetBlockingPool(); 346 file_access_runner_ = 347 blocking_pool->GetSequencedTaskRunner(blocking_pool->GetSequenceToken()); 348 } 349 350 StoragePartitionImplMap::~StoragePartitionImplMap() { 351 STLDeleteContainerPairSecondPointers(partitions_.begin(), 352 partitions_.end()); 353 } 354 355 StoragePartitionImpl* StoragePartitionImplMap::Get( 356 const std::string& partition_domain, 357 const std::string& partition_name, 358 bool in_memory) { 359 // Find the previously created partition if it's available. 360 StoragePartitionConfig partition_config( 361 partition_domain, partition_name, in_memory); 362 363 PartitionMap::const_iterator it = partitions_.find(partition_config); 364 if (it != partitions_.end()) 365 return it->second; 366 367 base::FilePath partition_path = 368 browser_context_->GetPath().Append( 369 GetStoragePartitionPath(partition_domain, partition_name)); 370 StoragePartitionImpl* partition = 371 StoragePartitionImpl::Create(browser_context_, in_memory, 372 partition_path); 373 partitions_[partition_config] = partition; 374 375 ChromeBlobStorageContext* blob_storage_context = 376 ChromeBlobStorageContext::GetFor(browser_context_); 377 StreamContext* stream_context = StreamContext::GetFor(browser_context_); 378 ProtocolHandlerMap protocol_handlers; 379 protocol_handlers[chrome::kBlobScheme] = 380 linked_ptr<net::URLRequestJobFactory::ProtocolHandler>( 381 new BlobProtocolHandler(blob_storage_context, 382 stream_context, 383 partition->GetFileSystemContext())); 384 protocol_handlers[chrome::kFileSystemScheme] = 385 linked_ptr<net::URLRequestJobFactory::ProtocolHandler>( 386 CreateFileSystemProtocolHandler(partition->GetFileSystemContext())); 387 protocol_handlers[chrome::kChromeUIScheme] = 388 linked_ptr<net::URLRequestJobFactory::ProtocolHandler>( 389 URLDataManagerBackend::CreateProtocolHandler( 390 browser_context_->GetResourceContext(), 391 browser_context_->IsOffTheRecord(), 392 partition->GetAppCacheService(), 393 blob_storage_context)); 394 std::vector<std::string> additional_webui_schemes; 395 GetContentClient()->browser()->GetAdditionalWebUISchemes( 396 &additional_webui_schemes); 397 for (std::vector<std::string>::const_iterator it = 398 additional_webui_schemes.begin(); 399 it != additional_webui_schemes.end(); 400 ++it) { 401 protocol_handlers[*it] = 402 linked_ptr<net::URLRequestJobFactory::ProtocolHandler>( 403 URLDataManagerBackend::CreateProtocolHandler( 404 browser_context_->GetResourceContext(), 405 browser_context_->IsOffTheRecord(), 406 partition->GetAppCacheService(), 407 blob_storage_context)); 408 } 409 protocol_handlers[chrome::kChromeDevToolsScheme] = 410 linked_ptr<net::URLRequestJobFactory::ProtocolHandler>( 411 CreateDevToolsProtocolHandler(browser_context_->GetResourceContext(), 412 browser_context_->IsOffTheRecord())); 413 414 // These calls must happen after StoragePartitionImpl::Create(). 415 if (partition_domain.empty()) { 416 partition->SetURLRequestContext( 417 GetContentClient()->browser()->CreateRequestContext( 418 browser_context_, 419 &protocol_handlers)); 420 } else { 421 partition->SetURLRequestContext( 422 GetContentClient()->browser()->CreateRequestContextForStoragePartition( 423 browser_context_, partition->GetPath(), in_memory, 424 &protocol_handlers)); 425 } 426 partition->SetMediaURLRequestContext( 427 partition_domain.empty() ? 428 browser_context_->GetMediaRequestContext() : 429 browser_context_->GetMediaRequestContextForStoragePartition( 430 partition->GetPath(), in_memory)); 431 432 PostCreateInitialization(partition, in_memory); 433 434 return partition; 435 } 436 437 void StoragePartitionImplMap::AsyncObliterate( 438 const GURL& site, 439 const base::Closure& on_gc_required) { 440 // This method should avoid creating any StoragePartition (which would 441 // create more open file handles) so that it can delete as much of the 442 // data off disk as possible. 443 std::string partition_domain; 444 std::string partition_name; 445 bool in_memory = false; 446 GetContentClient()->browser()->GetStoragePartitionConfigForSite( 447 browser_context_, site, false, &partition_domain, 448 &partition_name, &in_memory); 449 450 // Find the active partitions for the domain. Because these partitions are 451 // active, it is not possible to just delete the directories that contain 452 // the backing data structures without causing the browser to crash. Instead, 453 // of deleteing the directory, we tell each storage context later to 454 // remove any data they have saved. This will leave the directory structure 455 // intact but it will only contain empty databases. 456 std::vector<StoragePartitionImpl*> active_partitions; 457 std::vector<base::FilePath> paths_to_keep; 458 for (PartitionMap::const_iterator it = partitions_.begin(); 459 it != partitions_.end(); 460 ++it) { 461 const StoragePartitionConfig& config = it->first; 462 if (config.partition_domain == partition_domain) { 463 it->second->ClearData( 464 // All except shader cache. 465 StoragePartition::REMOVE_DATA_MASK_ALL & 466 (~StoragePartition::REMOVE_DATA_MASK_SHADER_CACHE), 467 StoragePartition::QUOTA_MANAGED_STORAGE_MASK_ALL, 468 NULL, 469 StoragePartition::OriginMatcherFunction(), 470 base::Time(), base::Time::Max(), 471 base::Bind(&base::DoNothing)); 472 if (!config.in_memory) { 473 paths_to_keep.push_back(it->second->GetPath()); 474 } 475 } 476 } 477 478 // Start a best-effort delete of the on-disk storage excluding paths that are 479 // known to still be in use. This is to delete any previously created 480 // StoragePartition state that just happens to not have been used during this 481 // run of the browser. 482 base::FilePath domain_root = browser_context_->GetPath().Append( 483 GetStoragePartitionDomainPath(partition_domain)); 484 485 BrowserThread::PostBlockingPoolTask( 486 FROM_HERE, 487 base::Bind(&BlockingObliteratePath, browser_context_->GetPath(), 488 domain_root, paths_to_keep, 489 base::MessageLoopProxy::current(), on_gc_required)); 490 } 491 492 void StoragePartitionImplMap::GarbageCollect( 493 scoped_ptr<base::hash_set<base::FilePath> > active_paths, 494 const base::Closure& done) { 495 // Include all paths for current StoragePartitions in the active_paths since 496 // they cannot be deleted safely. 497 for (PartitionMap::const_iterator it = partitions_.begin(); 498 it != partitions_.end(); 499 ++it) { 500 const StoragePartitionConfig& config = it->first; 501 if (!config.in_memory) 502 active_paths->insert(it->second->GetPath()); 503 } 504 505 // Find the directory holding the StoragePartitions and delete everything in 506 // there that isn't considered active. 507 base::FilePath storage_root = browser_context_->GetPath().Append( 508 GetStoragePartitionDomainPath(std::string())); 509 file_access_runner_->PostTaskAndReply( 510 FROM_HERE, 511 base::Bind(&BlockingGarbageCollect, storage_root, 512 file_access_runner_, 513 base::Passed(&active_paths)), 514 done); 515 } 516 517 void StoragePartitionImplMap::ForEach( 518 const BrowserContext::StoragePartitionCallback& callback) { 519 for (PartitionMap::const_iterator it = partitions_.begin(); 520 it != partitions_.end(); 521 ++it) { 522 callback.Run(it->second); 523 } 524 } 525 526 void StoragePartitionImplMap::PostCreateInitialization( 527 StoragePartitionImpl* partition, 528 bool in_memory) { 529 // TODO(ajwong): ResourceContexts no longer have any storage related state. 530 // We should move this into a place where it is called once per 531 // BrowserContext creation rather than piggybacking off the default context 532 // creation. 533 // Note: moving this into Get() before partitions_[] is set causes reentrency. 534 if (!resource_context_initialized_) { 535 resource_context_initialized_ = true; 536 InitializeResourceContext(browser_context_); 537 } 538 539 // Check first to avoid memory leak in unittests. 540 if (BrowserThread::IsMessageLoopValid(BrowserThread::IO)) { 541 BrowserThread::PostTask( 542 BrowserThread::IO, FROM_HERE, 543 base::Bind(&ChromeAppCacheService::InitializeOnIOThread, 544 partition->GetAppCacheService(), 545 in_memory ? base::FilePath() : 546 partition->GetPath().Append(kAppCacheDirname), 547 browser_context_->GetResourceContext(), 548 make_scoped_refptr(partition->GetURLRequestContext()), 549 make_scoped_refptr( 550 browser_context_->GetSpecialStoragePolicy()))); 551 552 // We do not call InitializeURLRequestContext() for media contexts because, 553 // other than the HTTP cache, the media contexts share the same backing 554 // objects as their associated "normal" request context. Thus, the previous 555 // call serves to initialize the media request context for this storage 556 // partition as well. 557 } 558 } 559 560 } // namespace content 561