1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "net/disk_cache/simple/simple_index_file.h" 6 7 #include <vector> 8 9 #include "base/files/file_util.h" 10 #include "base/files/memory_mapped_file.h" 11 #include "base/hash.h" 12 #include "base/logging.h" 13 #include "base/pickle.h" 14 #include "base/single_thread_task_runner.h" 15 #include "base/task_runner_util.h" 16 #include "base/threading/thread_restrictions.h" 17 #include "net/disk_cache/simple/simple_backend_version.h" 18 #include "net/disk_cache/simple/simple_entry_format.h" 19 #include "net/disk_cache/simple/simple_histogram_macros.h" 20 #include "net/disk_cache/simple/simple_index.h" 21 #include "net/disk_cache/simple/simple_synchronous_entry.h" 22 #include "net/disk_cache/simple/simple_util.h" 23 #include "third_party/zlib/zlib.h" 24 25 namespace disk_cache { 26 namespace { 27 28 const int kEntryFilesHashLength = 16; 29 const int kEntryFilesSuffixLength = 2; 30 31 const uint64 kMaxEntiresInIndex = 100000000; 32 33 uint32 CalculatePickleCRC(const Pickle& pickle) { 34 return crc32(crc32(0, Z_NULL, 0), 35 reinterpret_cast<const Bytef*>(pickle.payload()), 36 pickle.payload_size()); 37 } 38 39 // Used in histograms. Please only add new values at the end. 40 enum IndexFileState { 41 INDEX_STATE_CORRUPT = 0, 42 INDEX_STATE_STALE = 1, 43 INDEX_STATE_FRESH = 2, 44 INDEX_STATE_FRESH_CONCURRENT_UPDATES = 3, 45 INDEX_STATE_MAX = 4, 46 }; 47 48 void UmaRecordIndexFileState(IndexFileState state, net::CacheType cache_type) { 49 SIMPLE_CACHE_UMA(ENUMERATION, 50 "IndexFileStateOnLoad", cache_type, state, INDEX_STATE_MAX); 51 } 52 53 // Used in histograms. Please only add new values at the end. 54 enum IndexInitMethod { 55 INITIALIZE_METHOD_RECOVERED = 0, 56 INITIALIZE_METHOD_LOADED = 1, 57 INITIALIZE_METHOD_NEWCACHE = 2, 58 INITIALIZE_METHOD_MAX = 3, 59 }; 60 61 void UmaRecordIndexInitMethod(IndexInitMethod method, 62 net::CacheType cache_type) { 63 SIMPLE_CACHE_UMA(ENUMERATION, 64 "IndexInitializeMethod", cache_type, 65 method, INITIALIZE_METHOD_MAX); 66 } 67 68 bool WritePickleFile(Pickle* pickle, const base::FilePath& file_name) { 69 int bytes_written = base::WriteFile( 70 file_name, static_cast<const char*>(pickle->data()), pickle->size()); 71 if (bytes_written != implicit_cast<int>(pickle->size())) { 72 base::DeleteFile(file_name, /* recursive = */ false); 73 return false; 74 } 75 return true; 76 } 77 78 // Called for each cache directory traversal iteration. 79 void ProcessEntryFile(SimpleIndex::EntrySet* entries, 80 const base::FilePath& file_path) { 81 static const size_t kEntryFilesLength = 82 kEntryFilesHashLength + kEntryFilesSuffixLength; 83 // Converting to std::string is OK since we never use UTF8 wide chars in our 84 // file names. 85 const base::FilePath::StringType base_name = file_path.BaseName().value(); 86 const std::string file_name(base_name.begin(), base_name.end()); 87 if (file_name.size() != kEntryFilesLength) 88 return; 89 const base::StringPiece hash_string( 90 file_name.begin(), file_name.begin() + kEntryFilesHashLength); 91 uint64 hash_key = 0; 92 if (!simple_util::GetEntryHashKeyFromHexString(hash_string, &hash_key)) { 93 LOG(WARNING) << "Invalid entry hash key filename while restoring index from" 94 << " disk: " << file_name; 95 return; 96 } 97 98 base::File::Info file_info; 99 if (!base::GetFileInfo(file_path, &file_info)) { 100 LOG(ERROR) << "Could not get file info for " << file_path.value(); 101 return; 102 } 103 base::Time last_used_time; 104 #if defined(OS_POSIX) 105 // For POSIX systems, a last access time is available. However, it's not 106 // guaranteed to be more accurate than mtime. It is no worse though. 107 last_used_time = file_info.last_accessed; 108 #endif 109 if (last_used_time.is_null()) 110 last_used_time = file_info.last_modified; 111 112 int64 file_size = file_info.size; 113 SimpleIndex::EntrySet::iterator it = entries->find(hash_key); 114 if (it == entries->end()) { 115 SimpleIndex::InsertInEntrySet( 116 hash_key, 117 EntryMetadata(last_used_time, file_size), 118 entries); 119 } else { 120 // Summing up the total size of the entry through all the *_[0-1] files 121 it->second.SetEntrySize(it->second.GetEntrySize() + file_size); 122 } 123 } 124 125 } // namespace 126 127 SimpleIndexLoadResult::SimpleIndexLoadResult() : did_load(false), 128 flush_required(false) { 129 } 130 131 SimpleIndexLoadResult::~SimpleIndexLoadResult() { 132 } 133 134 void SimpleIndexLoadResult::Reset() { 135 did_load = false; 136 flush_required = false; 137 entries.clear(); 138 } 139 140 // static 141 const char SimpleIndexFile::kIndexFileName[] = "the-real-index"; 142 // static 143 const char SimpleIndexFile::kIndexDirectory[] = "index-dir"; 144 // static 145 const char SimpleIndexFile::kTempIndexFileName[] = "temp-index"; 146 147 SimpleIndexFile::IndexMetadata::IndexMetadata() 148 : magic_number_(kSimpleIndexMagicNumber), 149 version_(kSimpleVersion), 150 number_of_entries_(0), 151 cache_size_(0) {} 152 153 SimpleIndexFile::IndexMetadata::IndexMetadata( 154 uint64 number_of_entries, uint64 cache_size) 155 : magic_number_(kSimpleIndexMagicNumber), 156 version_(kSimpleVersion), 157 number_of_entries_(number_of_entries), 158 cache_size_(cache_size) {} 159 160 void SimpleIndexFile::IndexMetadata::Serialize(Pickle* pickle) const { 161 DCHECK(pickle); 162 pickle->WriteUInt64(magic_number_); 163 pickle->WriteUInt32(version_); 164 pickle->WriteUInt64(number_of_entries_); 165 pickle->WriteUInt64(cache_size_); 166 } 167 168 // static 169 bool SimpleIndexFile::SerializeFinalData(base::Time cache_modified, 170 Pickle* pickle) { 171 if (!pickle->WriteInt64(cache_modified.ToInternalValue())) 172 return false; 173 SimpleIndexFile::PickleHeader* header_p = pickle->headerT<PickleHeader>(); 174 header_p->crc = CalculatePickleCRC(*pickle); 175 return true; 176 } 177 178 bool SimpleIndexFile::IndexMetadata::Deserialize(PickleIterator* it) { 179 DCHECK(it); 180 return it->ReadUInt64(&magic_number_) && 181 it->ReadUInt32(&version_) && 182 it->ReadUInt64(&number_of_entries_)&& 183 it->ReadUInt64(&cache_size_); 184 } 185 186 void SimpleIndexFile::SyncWriteToDisk(net::CacheType cache_type, 187 const base::FilePath& cache_directory, 188 const base::FilePath& index_filename, 189 const base::FilePath& temp_index_filename, 190 scoped_ptr<Pickle> pickle, 191 const base::TimeTicks& start_time, 192 bool app_on_background) { 193 // There is a chance that the index containing all the necessary data about 194 // newly created entries will appear to be stale. This can happen if on-disk 195 // part of a Create operation does not fit into the time budget for the index 196 // flush delay. This simple approach will be reconsidered if it does not allow 197 // for maintaining freshness. 198 base::Time cache_dir_mtime; 199 if (!simple_util::GetMTime(cache_directory, &cache_dir_mtime)) { 200 LOG(ERROR) << "Could obtain information about cache age"; 201 return; 202 } 203 SerializeFinalData(cache_dir_mtime, pickle.get()); 204 if (!WritePickleFile(pickle.get(), temp_index_filename)) { 205 if (!base::CreateDirectory(temp_index_filename.DirName())) { 206 LOG(ERROR) << "Could not create a directory to hold the index file"; 207 return; 208 } 209 if (!WritePickleFile(pickle.get(), temp_index_filename)) { 210 LOG(ERROR) << "Failed to write the temporary index file"; 211 return; 212 } 213 } 214 215 // Atomically rename the temporary index file to become the real one. 216 bool result = base::ReplaceFile(temp_index_filename, index_filename, NULL); 217 DCHECK(result); 218 219 if (app_on_background) { 220 SIMPLE_CACHE_UMA(TIMES, 221 "IndexWriteToDiskTime.Background", cache_type, 222 (base::TimeTicks::Now() - start_time)); 223 } else { 224 SIMPLE_CACHE_UMA(TIMES, 225 "IndexWriteToDiskTime.Foreground", cache_type, 226 (base::TimeTicks::Now() - start_time)); 227 } 228 } 229 230 bool SimpleIndexFile::IndexMetadata::CheckIndexMetadata() { 231 return number_of_entries_ <= kMaxEntiresInIndex && 232 magic_number_ == kSimpleIndexMagicNumber && 233 version_ == kSimpleVersion; 234 } 235 236 SimpleIndexFile::SimpleIndexFile( 237 const scoped_refptr<base::SingleThreadTaskRunner>& cache_thread, 238 const scoped_refptr<base::TaskRunner>& worker_pool, 239 net::CacheType cache_type, 240 const base::FilePath& cache_directory) 241 : cache_thread_(cache_thread), 242 worker_pool_(worker_pool), 243 cache_type_(cache_type), 244 cache_directory_(cache_directory), 245 index_file_(cache_directory_.AppendASCII(kIndexDirectory) 246 .AppendASCII(kIndexFileName)), 247 temp_index_file_(cache_directory_.AppendASCII(kIndexDirectory) 248 .AppendASCII(kTempIndexFileName)) { 249 } 250 251 SimpleIndexFile::~SimpleIndexFile() {} 252 253 void SimpleIndexFile::LoadIndexEntries(base::Time cache_last_modified, 254 const base::Closure& callback, 255 SimpleIndexLoadResult* out_result) { 256 base::Closure task = base::Bind(&SimpleIndexFile::SyncLoadIndexEntries, 257 cache_type_, 258 cache_last_modified, cache_directory_, 259 index_file_, out_result); 260 worker_pool_->PostTaskAndReply(FROM_HERE, task, callback); 261 } 262 263 void SimpleIndexFile::WriteToDisk(const SimpleIndex::EntrySet& entry_set, 264 uint64 cache_size, 265 const base::TimeTicks& start, 266 bool app_on_background) { 267 IndexMetadata index_metadata(entry_set.size(), cache_size); 268 scoped_ptr<Pickle> pickle = Serialize(index_metadata, entry_set); 269 cache_thread_->PostTask(FROM_HERE, 270 base::Bind(&SimpleIndexFile::SyncWriteToDisk, 271 cache_type_, 272 cache_directory_, 273 index_file_, 274 temp_index_file_, 275 base::Passed(&pickle), 276 base::TimeTicks::Now(), 277 app_on_background)); 278 } 279 280 // static 281 void SimpleIndexFile::SyncLoadIndexEntries( 282 net::CacheType cache_type, 283 base::Time cache_last_modified, 284 const base::FilePath& cache_directory, 285 const base::FilePath& index_file_path, 286 SimpleIndexLoadResult* out_result) { 287 // Load the index and find its age. 288 base::Time last_cache_seen_by_index; 289 SyncLoadFromDisk(index_file_path, &last_cache_seen_by_index, out_result); 290 291 // Consider the index loaded if it is fresh. 292 const bool index_file_existed = base::PathExists(index_file_path); 293 if (!out_result->did_load) { 294 if (index_file_existed) 295 UmaRecordIndexFileState(INDEX_STATE_CORRUPT, cache_type); 296 } else { 297 if (cache_last_modified <= last_cache_seen_by_index) { 298 base::Time latest_dir_mtime; 299 simple_util::GetMTime(cache_directory, &latest_dir_mtime); 300 if (LegacyIsIndexFileStale(latest_dir_mtime, index_file_path)) { 301 UmaRecordIndexFileState(INDEX_STATE_FRESH_CONCURRENT_UPDATES, 302 cache_type); 303 } else { 304 UmaRecordIndexFileState(INDEX_STATE_FRESH, cache_type); 305 } 306 UmaRecordIndexInitMethod(INITIALIZE_METHOD_LOADED, cache_type); 307 return; 308 } 309 UmaRecordIndexFileState(INDEX_STATE_STALE, cache_type); 310 } 311 312 // Reconstruct the index by scanning the disk for entries. 313 const base::TimeTicks start = base::TimeTicks::Now(); 314 SyncRestoreFromDisk(cache_directory, index_file_path, out_result); 315 SIMPLE_CACHE_UMA(MEDIUM_TIMES, "IndexRestoreTime", cache_type, 316 base::TimeTicks::Now() - start); 317 SIMPLE_CACHE_UMA(COUNTS, "IndexEntriesRestored", cache_type, 318 out_result->entries.size()); 319 if (index_file_existed) { 320 UmaRecordIndexInitMethod(INITIALIZE_METHOD_RECOVERED, cache_type); 321 } else { 322 UmaRecordIndexInitMethod(INITIALIZE_METHOD_NEWCACHE, cache_type); 323 SIMPLE_CACHE_UMA(COUNTS, 324 "IndexCreatedEntryCount", cache_type, 325 out_result->entries.size()); 326 } 327 } 328 329 // static 330 void SimpleIndexFile::SyncLoadFromDisk(const base::FilePath& index_filename, 331 base::Time* out_last_cache_seen_by_index, 332 SimpleIndexLoadResult* out_result) { 333 out_result->Reset(); 334 335 base::MemoryMappedFile index_file_map; 336 if (!index_file_map.Initialize(index_filename)) { 337 LOG(WARNING) << "Could not map Simple Index file."; 338 base::DeleteFile(index_filename, false); 339 return; 340 } 341 342 SimpleIndexFile::Deserialize( 343 reinterpret_cast<const char*>(index_file_map.data()), 344 index_file_map.length(), 345 out_last_cache_seen_by_index, 346 out_result); 347 348 if (!out_result->did_load) 349 base::DeleteFile(index_filename, false); 350 } 351 352 // static 353 scoped_ptr<Pickle> SimpleIndexFile::Serialize( 354 const SimpleIndexFile::IndexMetadata& index_metadata, 355 const SimpleIndex::EntrySet& entries) { 356 scoped_ptr<Pickle> pickle(new Pickle(sizeof(SimpleIndexFile::PickleHeader))); 357 358 index_metadata.Serialize(pickle.get()); 359 for (SimpleIndex::EntrySet::const_iterator it = entries.begin(); 360 it != entries.end(); ++it) { 361 pickle->WriteUInt64(it->first); 362 it->second.Serialize(pickle.get()); 363 } 364 return pickle.Pass(); 365 } 366 367 // static 368 void SimpleIndexFile::Deserialize(const char* data, int data_len, 369 base::Time* out_cache_last_modified, 370 SimpleIndexLoadResult* out_result) { 371 DCHECK(data); 372 373 out_result->Reset(); 374 SimpleIndex::EntrySet* entries = &out_result->entries; 375 376 Pickle pickle(data, data_len); 377 if (!pickle.data()) { 378 LOG(WARNING) << "Corrupt Simple Index File."; 379 return; 380 } 381 382 PickleIterator pickle_it(pickle); 383 SimpleIndexFile::PickleHeader* header_p = 384 pickle.headerT<SimpleIndexFile::PickleHeader>(); 385 const uint32 crc_read = header_p->crc; 386 const uint32 crc_calculated = CalculatePickleCRC(pickle); 387 388 if (crc_read != crc_calculated) { 389 LOG(WARNING) << "Invalid CRC in Simple Index file."; 390 return; 391 } 392 393 SimpleIndexFile::IndexMetadata index_metadata; 394 if (!index_metadata.Deserialize(&pickle_it)) { 395 LOG(ERROR) << "Invalid index_metadata on Simple Cache Index."; 396 return; 397 } 398 399 if (!index_metadata.CheckIndexMetadata()) { 400 LOG(ERROR) << "Invalid index_metadata on Simple Cache Index."; 401 return; 402 } 403 404 #if !defined(OS_WIN) 405 // TODO(gavinp): Consider using std::unordered_map. 406 entries->resize(index_metadata.GetNumberOfEntries() + kExtraSizeForMerge); 407 #endif 408 while (entries->size() < index_metadata.GetNumberOfEntries()) { 409 uint64 hash_key; 410 EntryMetadata entry_metadata; 411 if (!pickle_it.ReadUInt64(&hash_key) || 412 !entry_metadata.Deserialize(&pickle_it)) { 413 LOG(WARNING) << "Invalid EntryMetadata in Simple Index file."; 414 entries->clear(); 415 return; 416 } 417 SimpleIndex::InsertInEntrySet(hash_key, entry_metadata, entries); 418 } 419 420 int64 cache_last_modified; 421 if (!pickle_it.ReadInt64(&cache_last_modified)) { 422 entries->clear(); 423 return; 424 } 425 DCHECK(out_cache_last_modified); 426 *out_cache_last_modified = base::Time::FromInternalValue(cache_last_modified); 427 428 out_result->did_load = true; 429 } 430 431 // static 432 void SimpleIndexFile::SyncRestoreFromDisk( 433 const base::FilePath& cache_directory, 434 const base::FilePath& index_file_path, 435 SimpleIndexLoadResult* out_result) { 436 VLOG(1) << "Simple Cache Index is being restored from disk."; 437 base::DeleteFile(index_file_path, /* recursive = */ false); 438 out_result->Reset(); 439 SimpleIndex::EntrySet* entries = &out_result->entries; 440 441 const bool did_succeed = TraverseCacheDirectory( 442 cache_directory, base::Bind(&ProcessEntryFile, entries)); 443 if (!did_succeed) { 444 LOG(ERROR) << "Could not reconstruct index from disk"; 445 return; 446 } 447 out_result->did_load = true; 448 // When we restore from disk we write the merged index file to disk right 449 // away, this might save us from having to restore again next time. 450 out_result->flush_required = true; 451 } 452 453 // static 454 bool SimpleIndexFile::LegacyIsIndexFileStale( 455 base::Time cache_last_modified, 456 const base::FilePath& index_file_path) { 457 base::Time index_mtime; 458 if (!simple_util::GetMTime(index_file_path, &index_mtime)) 459 return true; 460 return index_mtime < cache_last_modified; 461 } 462 463 } // namespace disk_cache 464