1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "net/disk_cache/simple/simple_index_file.h" 6 7 #include <vector> 8 9 #include "base/file_util.h" 10 #include "base/files/memory_mapped_file.h" 11 #include "base/hash.h" 12 #include "base/logging.h" 13 #include "base/pickle.h" 14 #include "base/single_thread_task_runner.h" 15 #include "base/task_runner_util.h" 16 #include "base/threading/thread_restrictions.h" 17 #include "net/disk_cache/simple/simple_backend_version.h" 18 #include "net/disk_cache/simple/simple_entry_format.h" 19 #include "net/disk_cache/simple/simple_histogram_macros.h" 20 #include "net/disk_cache/simple/simple_index.h" 21 #include "net/disk_cache/simple/simple_synchronous_entry.h" 22 #include "net/disk_cache/simple/simple_util.h" 23 #include "third_party/zlib/zlib.h" 24 25 namespace disk_cache { 26 namespace { 27 28 const int kEntryFilesHashLength = 16; 29 const int kEntryFilesSuffixLength = 2; 30 31 const uint64 kMaxEntiresInIndex = 100000000; 32 33 uint32 CalculatePickleCRC(const Pickle& pickle) { 34 return crc32(crc32(0, Z_NULL, 0), 35 reinterpret_cast<const Bytef*>(pickle.payload()), 36 pickle.payload_size()); 37 } 38 39 // Used in histograms. Please only add new values at the end. 40 enum IndexFileState { 41 INDEX_STATE_CORRUPT = 0, 42 INDEX_STATE_STALE = 1, 43 INDEX_STATE_FRESH = 2, 44 INDEX_STATE_FRESH_CONCURRENT_UPDATES = 3, 45 INDEX_STATE_MAX = 4, 46 }; 47 48 void UmaRecordIndexFileState(IndexFileState state, net::CacheType cache_type) { 49 SIMPLE_CACHE_UMA(ENUMERATION, 50 "IndexFileStateOnLoad", cache_type, state, INDEX_STATE_MAX); 51 } 52 53 // Used in histograms. Please only add new values at the end. 54 enum IndexInitMethod { 55 INITIALIZE_METHOD_RECOVERED = 0, 56 INITIALIZE_METHOD_LOADED = 1, 57 INITIALIZE_METHOD_NEWCACHE = 2, 58 INITIALIZE_METHOD_MAX = 3, 59 }; 60 61 void UmaRecordIndexInitMethod(IndexInitMethod method, 62 net::CacheType cache_type) { 63 SIMPLE_CACHE_UMA(ENUMERATION, 64 "IndexInitializeMethod", cache_type, 65 method, INITIALIZE_METHOD_MAX); 66 } 67 68 bool WritePickleFile(Pickle* pickle, const base::FilePath& file_name) { 69 int bytes_written = file_util::WriteFile( 70 file_name, static_cast<const char*>(pickle->data()), pickle->size()); 71 if (bytes_written != implicit_cast<int>(pickle->size())) { 72 base::DeleteFile(file_name, /* recursive = */ false); 73 return false; 74 } 75 return true; 76 } 77 78 // Called for each cache directory traversal iteration. 79 void ProcessEntryFile(SimpleIndex::EntrySet* entries, 80 const base::FilePath& file_path) { 81 static const size_t kEntryFilesLength = 82 kEntryFilesHashLength + kEntryFilesSuffixLength; 83 // Converting to std::string is OK since we never use UTF8 wide chars in our 84 // file names. 85 const base::FilePath::StringType base_name = file_path.BaseName().value(); 86 const std::string file_name(base_name.begin(), base_name.end()); 87 if (file_name.size() != kEntryFilesLength) 88 return; 89 const base::StringPiece hash_string( 90 file_name.begin(), file_name.begin() + kEntryFilesHashLength); 91 uint64 hash_key = 0; 92 if (!simple_util::GetEntryHashKeyFromHexString(hash_string, &hash_key)) { 93 LOG(WARNING) << "Invalid entry hash key filename while restoring index from" 94 << " disk: " << file_name; 95 return; 96 } 97 98 base::PlatformFileInfo file_info; 99 if (!base::GetFileInfo(file_path, &file_info)) { 100 LOG(ERROR) << "Could not get file info for " << file_path.value(); 101 return; 102 } 103 base::Time last_used_time; 104 #if defined(OS_POSIX) 105 // For POSIX systems, a last access time is available. However, it's not 106 // guaranteed to be more accurate than mtime. It is no worse though. 107 last_used_time = file_info.last_accessed; 108 #endif 109 if (last_used_time.is_null()) 110 last_used_time = file_info.last_modified; 111 112 int64 file_size = file_info.size; 113 SimpleIndex::EntrySet::iterator it = entries->find(hash_key); 114 if (it == entries->end()) { 115 SimpleIndex::InsertInEntrySet( 116 hash_key, 117 EntryMetadata(last_used_time, file_size), 118 entries); 119 } else { 120 // Summing up the total size of the entry through all the *_[0-1] files 121 it->second.SetEntrySize(it->second.GetEntrySize() + file_size); 122 } 123 } 124 125 } // namespace 126 127 SimpleIndexLoadResult::SimpleIndexLoadResult() : did_load(false), 128 flush_required(false) { 129 } 130 131 SimpleIndexLoadResult::~SimpleIndexLoadResult() { 132 } 133 134 void SimpleIndexLoadResult::Reset() { 135 did_load = false; 136 flush_required = false; 137 entries.clear(); 138 } 139 140 // static 141 const char SimpleIndexFile::kIndexFileName[] = "the-real-index"; 142 // static 143 const char SimpleIndexFile::kIndexDirectory[] = "index-dir"; 144 // static 145 const char SimpleIndexFile::kTempIndexFileName[] = "temp-index"; 146 147 SimpleIndexFile::IndexMetadata::IndexMetadata() 148 : magic_number_(kSimpleIndexMagicNumber), 149 version_(kSimpleVersion), 150 number_of_entries_(0), 151 cache_size_(0) {} 152 153 SimpleIndexFile::IndexMetadata::IndexMetadata( 154 uint64 number_of_entries, uint64 cache_size) 155 : magic_number_(kSimpleIndexMagicNumber), 156 version_(kSimpleVersion), 157 number_of_entries_(number_of_entries), 158 cache_size_(cache_size) {} 159 160 void SimpleIndexFile::IndexMetadata::Serialize(Pickle* pickle) const { 161 DCHECK(pickle); 162 pickle->WriteUInt64(magic_number_); 163 pickle->WriteUInt32(version_); 164 pickle->WriteUInt64(number_of_entries_); 165 pickle->WriteUInt64(cache_size_); 166 } 167 168 // static 169 bool SimpleIndexFile::SerializeFinalData(base::Time cache_modified, 170 Pickle* pickle) { 171 if (!pickle->WriteInt64(cache_modified.ToInternalValue())) 172 return false; 173 SimpleIndexFile::PickleHeader* header_p = pickle->headerT<PickleHeader>(); 174 header_p->crc = CalculatePickleCRC(*pickle); 175 return true; 176 } 177 178 bool SimpleIndexFile::IndexMetadata::Deserialize(PickleIterator* it) { 179 DCHECK(it); 180 return it->ReadUInt64(&magic_number_) && 181 it->ReadUInt32(&version_) && 182 it->ReadUInt64(&number_of_entries_)&& 183 it->ReadUInt64(&cache_size_); 184 } 185 186 void SimpleIndexFile::SyncWriteToDisk(net::CacheType cache_type, 187 const base::FilePath& cache_directory, 188 const base::FilePath& index_filename, 189 const base::FilePath& temp_index_filename, 190 scoped_ptr<Pickle> pickle, 191 const base::TimeTicks& start_time, 192 bool app_on_background) { 193 // There is a chance that the index containing all the necessary data about 194 // newly created entries will appear to be stale. This can happen if on-disk 195 // part of a Create operation does not fit into the time budget for the index 196 // flush delay. This simple approach will be reconsidered if it does not allow 197 // for maintaining freshness. 198 base::PlatformFileInfo cache_dir_info; 199 base::Time cache_dir_mtime; 200 if (!simple_util::GetMTime(cache_directory, &cache_dir_mtime)) { 201 LOG(ERROR) << "Could obtain information about cache age"; 202 return; 203 } 204 SerializeFinalData(cache_dir_mtime, pickle.get()); 205 if (!WritePickleFile(pickle.get(), temp_index_filename)) { 206 if (!base::CreateDirectory(temp_index_filename.DirName())) { 207 LOG(ERROR) << "Could not create a directory to hold the index file"; 208 return; 209 } 210 if (!WritePickleFile(pickle.get(), temp_index_filename)) { 211 LOG(ERROR) << "Failed to write the temporary index file"; 212 return; 213 } 214 } 215 216 // Atomically rename the temporary index file to become the real one. 217 bool result = base::ReplaceFile(temp_index_filename, index_filename, NULL); 218 DCHECK(result); 219 220 if (app_on_background) { 221 SIMPLE_CACHE_UMA(TIMES, 222 "IndexWriteToDiskTime.Background", cache_type, 223 (base::TimeTicks::Now() - start_time)); 224 } else { 225 SIMPLE_CACHE_UMA(TIMES, 226 "IndexWriteToDiskTime.Foreground", cache_type, 227 (base::TimeTicks::Now() - start_time)); 228 } 229 } 230 231 bool SimpleIndexFile::IndexMetadata::CheckIndexMetadata() { 232 return number_of_entries_ <= kMaxEntiresInIndex && 233 magic_number_ == kSimpleIndexMagicNumber && 234 version_ == kSimpleVersion; 235 } 236 237 SimpleIndexFile::SimpleIndexFile( 238 base::SingleThreadTaskRunner* cache_thread, 239 base::TaskRunner* worker_pool, 240 net::CacheType cache_type, 241 const base::FilePath& cache_directory) 242 : cache_thread_(cache_thread), 243 worker_pool_(worker_pool), 244 cache_type_(cache_type), 245 cache_directory_(cache_directory), 246 index_file_(cache_directory_.AppendASCII(kIndexDirectory) 247 .AppendASCII(kIndexFileName)), 248 temp_index_file_(cache_directory_.AppendASCII(kIndexDirectory) 249 .AppendASCII(kTempIndexFileName)) { 250 } 251 252 SimpleIndexFile::~SimpleIndexFile() {} 253 254 void SimpleIndexFile::LoadIndexEntries(base::Time cache_last_modified, 255 const base::Closure& callback, 256 SimpleIndexLoadResult* out_result) { 257 base::Closure task = base::Bind(&SimpleIndexFile::SyncLoadIndexEntries, 258 cache_type_, 259 cache_last_modified, cache_directory_, 260 index_file_, out_result); 261 worker_pool_->PostTaskAndReply(FROM_HERE, task, callback); 262 } 263 264 void SimpleIndexFile::WriteToDisk(const SimpleIndex::EntrySet& entry_set, 265 uint64 cache_size, 266 const base::TimeTicks& start, 267 bool app_on_background) { 268 IndexMetadata index_metadata(entry_set.size(), cache_size); 269 scoped_ptr<Pickle> pickle = Serialize(index_metadata, entry_set); 270 cache_thread_->PostTask(FROM_HERE, base::Bind( 271 &SimpleIndexFile::SyncWriteToDisk, 272 cache_type_, 273 cache_directory_, 274 index_file_, 275 temp_index_file_, 276 base::Passed(&pickle), 277 base::TimeTicks::Now(), 278 app_on_background)); 279 } 280 281 // static 282 void SimpleIndexFile::SyncLoadIndexEntries( 283 net::CacheType cache_type, 284 base::Time cache_last_modified, 285 const base::FilePath& cache_directory, 286 const base::FilePath& index_file_path, 287 SimpleIndexLoadResult* out_result) { 288 // Load the index and find its age. 289 base::Time last_cache_seen_by_index; 290 SyncLoadFromDisk(index_file_path, &last_cache_seen_by_index, out_result); 291 292 // Consider the index loaded if it is fresh. 293 const bool index_file_existed = base::PathExists(index_file_path); 294 if (!out_result->did_load) { 295 if (index_file_existed) 296 UmaRecordIndexFileState(INDEX_STATE_CORRUPT, cache_type); 297 } else { 298 if (cache_last_modified <= last_cache_seen_by_index) { 299 base::Time latest_dir_mtime; 300 simple_util::GetMTime(cache_directory, &latest_dir_mtime); 301 if (LegacyIsIndexFileStale(latest_dir_mtime, index_file_path)) { 302 UmaRecordIndexFileState(INDEX_STATE_FRESH_CONCURRENT_UPDATES, 303 cache_type); 304 } else { 305 UmaRecordIndexFileState(INDEX_STATE_FRESH, cache_type); 306 } 307 UmaRecordIndexInitMethod(INITIALIZE_METHOD_LOADED, cache_type); 308 return; 309 } 310 UmaRecordIndexFileState(INDEX_STATE_STALE, cache_type); 311 } 312 313 // Reconstruct the index by scanning the disk for entries. 314 const base::TimeTicks start = base::TimeTicks::Now(); 315 SyncRestoreFromDisk(cache_directory, index_file_path, out_result); 316 SIMPLE_CACHE_UMA(MEDIUM_TIMES, "IndexRestoreTime", cache_type, 317 base::TimeTicks::Now() - start); 318 SIMPLE_CACHE_UMA(COUNTS, "IndexEntriesRestored", cache_type, 319 out_result->entries.size()); 320 if (index_file_existed) { 321 UmaRecordIndexInitMethod(INITIALIZE_METHOD_RECOVERED, cache_type); 322 } else { 323 UmaRecordIndexInitMethod(INITIALIZE_METHOD_NEWCACHE, cache_type); 324 SIMPLE_CACHE_UMA(COUNTS, 325 "IndexCreatedEntryCount", cache_type, 326 out_result->entries.size()); 327 } 328 } 329 330 // static 331 void SimpleIndexFile::SyncLoadFromDisk(const base::FilePath& index_filename, 332 base::Time* out_last_cache_seen_by_index, 333 SimpleIndexLoadResult* out_result) { 334 out_result->Reset(); 335 336 base::MemoryMappedFile index_file_map; 337 if (!index_file_map.Initialize(index_filename)) { 338 LOG(WARNING) << "Could not map Simple Index file."; 339 base::DeleteFile(index_filename, false); 340 return; 341 } 342 343 SimpleIndexFile::Deserialize( 344 reinterpret_cast<const char*>(index_file_map.data()), 345 index_file_map.length(), 346 out_last_cache_seen_by_index, 347 out_result); 348 349 if (!out_result->did_load) 350 base::DeleteFile(index_filename, false); 351 } 352 353 // static 354 scoped_ptr<Pickle> SimpleIndexFile::Serialize( 355 const SimpleIndexFile::IndexMetadata& index_metadata, 356 const SimpleIndex::EntrySet& entries) { 357 scoped_ptr<Pickle> pickle(new Pickle(sizeof(SimpleIndexFile::PickleHeader))); 358 359 index_metadata.Serialize(pickle.get()); 360 for (SimpleIndex::EntrySet::const_iterator it = entries.begin(); 361 it != entries.end(); ++it) { 362 pickle->WriteUInt64(it->first); 363 it->second.Serialize(pickle.get()); 364 } 365 return pickle.Pass(); 366 } 367 368 // static 369 void SimpleIndexFile::Deserialize(const char* data, int data_len, 370 base::Time* out_cache_last_modified, 371 SimpleIndexLoadResult* out_result) { 372 DCHECK(data); 373 374 out_result->Reset(); 375 SimpleIndex::EntrySet* entries = &out_result->entries; 376 377 Pickle pickle(data, data_len); 378 if (!pickle.data()) { 379 LOG(WARNING) << "Corrupt Simple Index File."; 380 return; 381 } 382 383 PickleIterator pickle_it(pickle); 384 SimpleIndexFile::PickleHeader* header_p = 385 pickle.headerT<SimpleIndexFile::PickleHeader>(); 386 const uint32 crc_read = header_p->crc; 387 const uint32 crc_calculated = CalculatePickleCRC(pickle); 388 389 if (crc_read != crc_calculated) { 390 LOG(WARNING) << "Invalid CRC in Simple Index file."; 391 return; 392 } 393 394 SimpleIndexFile::IndexMetadata index_metadata; 395 if (!index_metadata.Deserialize(&pickle_it)) { 396 LOG(ERROR) << "Invalid index_metadata on Simple Cache Index."; 397 return; 398 } 399 400 if (!index_metadata.CheckIndexMetadata()) { 401 LOG(ERROR) << "Invalid index_metadata on Simple Cache Index."; 402 return; 403 } 404 405 #if !defined(OS_WIN) 406 // TODO(gavinp): Consider using std::unordered_map. 407 entries->resize(index_metadata.GetNumberOfEntries() + kExtraSizeForMerge); 408 #endif 409 while (entries->size() < index_metadata.GetNumberOfEntries()) { 410 uint64 hash_key; 411 EntryMetadata entry_metadata; 412 if (!pickle_it.ReadUInt64(&hash_key) || 413 !entry_metadata.Deserialize(&pickle_it)) { 414 LOG(WARNING) << "Invalid EntryMetadata in Simple Index file."; 415 entries->clear(); 416 return; 417 } 418 SimpleIndex::InsertInEntrySet(hash_key, entry_metadata, entries); 419 } 420 421 int64 cache_last_modified; 422 if (!pickle_it.ReadInt64(&cache_last_modified)) { 423 entries->clear(); 424 return; 425 } 426 DCHECK(out_cache_last_modified); 427 *out_cache_last_modified = base::Time::FromInternalValue(cache_last_modified); 428 429 out_result->did_load = true; 430 } 431 432 // static 433 void SimpleIndexFile::SyncRestoreFromDisk( 434 const base::FilePath& cache_directory, 435 const base::FilePath& index_file_path, 436 SimpleIndexLoadResult* out_result) { 437 LOG(INFO) << "Simple Cache Index is being restored from disk."; 438 base::DeleteFile(index_file_path, /* recursive = */ false); 439 out_result->Reset(); 440 SimpleIndex::EntrySet* entries = &out_result->entries; 441 442 const bool did_succeed = TraverseCacheDirectory( 443 cache_directory, base::Bind(&ProcessEntryFile, entries)); 444 if (!did_succeed) { 445 LOG(ERROR) << "Could not reconstruct index from disk"; 446 return; 447 } 448 out_result->did_load = true; 449 // When we restore from disk we write the merged index file to disk right 450 // away, this might save us from having to restore again next time. 451 out_result->flush_required = true; 452 } 453 454 // static 455 bool SimpleIndexFile::LegacyIsIndexFileStale( 456 base::Time cache_last_modified, 457 const base::FilePath& index_file_path) { 458 base::Time index_mtime; 459 if (!simple_util::GetMTime(index_file_path, &index_mtime)) 460 return true; 461 return index_mtime < cache_last_modified; 462 } 463 464 } // namespace disk_cache 465