1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "net/disk_cache/simple/simple_index_file.h" 6 7 #include <vector> 8 9 #include "base/file_util.h" 10 #include "base/files/memory_mapped_file.h" 11 #include "base/hash.h" 12 #include "base/logging.h" 13 #include "base/metrics/histogram.h" 14 #include "base/pickle.h" 15 #include "base/single_thread_task_runner.h" 16 #include "base/task_runner_util.h" 17 #include "base/threading/thread_restrictions.h" 18 #include "net/disk_cache/simple/simple_entry_format.h" 19 #include "net/disk_cache/simple/simple_index.h" 20 #include "net/disk_cache/simple/simple_synchronous_entry.h" 21 #include "net/disk_cache/simple/simple_util.h" 22 #include "third_party/zlib/zlib.h" 23 24 namespace disk_cache { 25 namespace { 26 27 const int kEntryFilesHashLength = 16; 28 const int kEntryFilesSuffixLength = 2; 29 30 const uint64 kMaxEntiresInIndex = 100000000; 31 32 const char kIndexFileName[] = "the-real-index"; 33 const char kTempIndexFileName[] = "temp-index"; 34 35 uint32 CalculatePickleCRC(const Pickle& pickle) { 36 return crc32(crc32(0, Z_NULL, 0), 37 reinterpret_cast<const Bytef*>(pickle.payload()), 38 pickle.payload_size()); 39 } 40 41 void DoomEntrySetReply(const net::CompletionCallback& reply_callback, 42 int result) { 43 reply_callback.Run(result); 44 } 45 46 void WriteToDiskInternal(const base::FilePath& index_filename, 47 const base::FilePath& temp_index_filename, 48 scoped_ptr<Pickle> pickle, 49 const base::TimeTicks& start_time, 50 bool app_on_background) { 51 int bytes_written = file_util::WriteFile( 52 temp_index_filename, 53 reinterpret_cast<const char*>(pickle->data()), 54 pickle->size()); 55 DCHECK_EQ(bytes_written, implicit_cast<int>(pickle->size())); 56 if (bytes_written != static_cast<int>(pickle->size())) { 57 // TODO(felipeg): Add better error handling. 58 LOG(ERROR) << "Could not write Simple Cache index to temporary file: " 59 << temp_index_filename.value(); 60 base::DeleteFile(temp_index_filename, /* recursive = */ false); 61 } else { 62 // Swap temp and index_file. 63 bool result = base::ReplaceFile(temp_index_filename, index_filename, NULL); 64 DCHECK(result); 65 } 66 if (app_on_background) { 67 UMA_HISTOGRAM_TIMES("SimpleCache.IndexWriteToDiskTime.Background", 68 (base::TimeTicks::Now() - start_time)); 69 } else { 70 UMA_HISTOGRAM_TIMES("SimpleCache.IndexWriteToDiskTime.Foreground", 71 (base::TimeTicks::Now() - start_time)); 72 } 73 } 74 75 // Called for each cache directory traversal iteration. 76 void ProcessEntryFile(SimpleIndex::EntrySet* entries, 77 const base::FilePath& file_path) { 78 static const size_t kEntryFilesLength = 79 kEntryFilesHashLength + kEntryFilesSuffixLength; 80 // Converting to std::string is OK since we never use UTF8 wide chars in our 81 // file names. 82 const base::FilePath::StringType base_name = file_path.BaseName().value(); 83 const std::string file_name(base_name.begin(), base_name.end()); 84 if (file_name.size() != kEntryFilesLength) 85 return; 86 const base::StringPiece hash_string( 87 file_name.begin(), file_name.begin() + kEntryFilesHashLength); 88 uint64 hash_key = 0; 89 if (!simple_util::GetEntryHashKeyFromHexString(hash_string, &hash_key)) { 90 LOG(WARNING) << "Invalid entry hash key filename while restoring index from" 91 << " disk: " << file_name; 92 return; 93 } 94 95 base::PlatformFileInfo file_info; 96 if (!file_util::GetFileInfo(file_path, &file_info)) { 97 LOG(ERROR) << "Could not get file info for " << file_path.value(); 98 return; 99 } 100 base::Time last_used_time; 101 #if defined(OS_POSIX) 102 // For POSIX systems, a last access time is available. However, it's not 103 // guaranteed to be more accurate than mtime. It is no worse though. 104 last_used_time = file_info.last_accessed; 105 #endif 106 if (last_used_time.is_null()) 107 last_used_time = file_info.last_modified; 108 109 int64 file_size = file_info.size; 110 SimpleIndex::EntrySet::iterator it = entries->find(hash_key); 111 if (it == entries->end()) { 112 SimpleIndex::InsertInEntrySet( 113 hash_key, 114 EntryMetadata(last_used_time, file_size), 115 entries); 116 } else { 117 // Summing up the total size of the entry through all the *_[0-2] files 118 it->second.SetEntrySize(it->second.GetEntrySize() + file_size); 119 } 120 } 121 122 } // namespace 123 124 SimpleIndexLoadResult::SimpleIndexLoadResult() : did_load(false), 125 flush_required(false) { 126 } 127 128 SimpleIndexLoadResult::~SimpleIndexLoadResult() { 129 } 130 131 void SimpleIndexLoadResult::Reset() { 132 did_load = false; 133 flush_required = false; 134 entries.clear(); 135 } 136 137 SimpleIndexFile::IndexMetadata::IndexMetadata() : 138 magic_number_(kSimpleIndexMagicNumber), 139 version_(kSimpleVersion), 140 number_of_entries_(0), 141 cache_size_(0) {} 142 143 SimpleIndexFile::IndexMetadata::IndexMetadata( 144 uint64 number_of_entries, uint64 cache_size) : 145 magic_number_(kSimpleIndexMagicNumber), 146 version_(kSimpleVersion), 147 number_of_entries_(number_of_entries), 148 cache_size_(cache_size) {} 149 150 void SimpleIndexFile::IndexMetadata::Serialize(Pickle* pickle) const { 151 DCHECK(pickle); 152 pickle->WriteUInt64(magic_number_); 153 pickle->WriteUInt32(version_); 154 pickle->WriteUInt64(number_of_entries_); 155 pickle->WriteUInt64(cache_size_); 156 } 157 158 bool SimpleIndexFile::IndexMetadata::Deserialize(PickleIterator* it) { 159 DCHECK(it); 160 return it->ReadUInt64(&magic_number_) && 161 it->ReadUInt32(&version_) && 162 it->ReadUInt64(&number_of_entries_)&& 163 it->ReadUInt64(&cache_size_); 164 } 165 166 bool SimpleIndexFile::IndexMetadata::CheckIndexMetadata() { 167 return number_of_entries_ <= kMaxEntiresInIndex && 168 magic_number_ == disk_cache::kSimpleIndexMagicNumber && 169 version_ == disk_cache::kSimpleVersion; 170 } 171 172 SimpleIndexFile::SimpleIndexFile( 173 base::SingleThreadTaskRunner* cache_thread, 174 base::TaskRunner* worker_pool, 175 const base::FilePath& cache_directory) 176 : cache_thread_(cache_thread), 177 worker_pool_(worker_pool), 178 cache_directory_(cache_directory), 179 index_file_(cache_directory_.AppendASCII(kIndexFileName)), 180 temp_index_file_(cache_directory_.AppendASCII(kTempIndexFileName)) { 181 } 182 183 SimpleIndexFile::~SimpleIndexFile() {} 184 185 void SimpleIndexFile::LoadIndexEntries(base::Time cache_last_modified, 186 const base::Closure& callback, 187 SimpleIndexLoadResult* out_result) { 188 base::Closure task = base::Bind(&SimpleIndexFile::SyncLoadIndexEntries, 189 cache_last_modified, cache_directory_, 190 index_file_, out_result); 191 worker_pool_->PostTaskAndReply(FROM_HERE, task, callback); 192 } 193 194 void SimpleIndexFile::WriteToDisk(const SimpleIndex::EntrySet& entry_set, 195 uint64 cache_size, 196 const base::TimeTicks& start, 197 bool app_on_background) { 198 IndexMetadata index_metadata(entry_set.size(), cache_size); 199 scoped_ptr<Pickle> pickle = Serialize(index_metadata, entry_set); 200 cache_thread_->PostTask(FROM_HERE, base::Bind( 201 &WriteToDiskInternal, 202 index_file_, 203 temp_index_file_, 204 base::Passed(&pickle), 205 base::TimeTicks::Now(), 206 app_on_background)); 207 } 208 209 void SimpleIndexFile::DoomEntrySet( 210 scoped_ptr<std::vector<uint64> > entry_hashes, 211 const net::CompletionCallback& reply_callback) { 212 PostTaskAndReplyWithResult( 213 worker_pool_, 214 FROM_HERE, 215 base::Bind(&SimpleSynchronousEntry::DoomEntrySet, 216 base::Passed(entry_hashes.Pass()), cache_directory_), 217 base::Bind(&DoomEntrySetReply, reply_callback)); 218 } 219 220 // static 221 void SimpleIndexFile::SyncLoadIndexEntries( 222 base::Time cache_last_modified, 223 const base::FilePath& cache_directory, 224 const base::FilePath& index_file_path, 225 SimpleIndexLoadResult* out_result) { 226 // TODO(felipeg): probably could load a stale index and use it for something. 227 const SimpleIndex::EntrySet& entries = out_result->entries; 228 229 const bool index_file_exists = base::PathExists(index_file_path); 230 231 // Used in histograms. Please only add new values at the end. 232 enum { 233 INDEX_STATE_CORRUPT = 0, 234 INDEX_STATE_STALE = 1, 235 INDEX_STATE_FRESH = 2, 236 INDEX_STATE_FRESH_CONCURRENT_UPDATES = 3, 237 INDEX_STATE_MAX = 4, 238 } index_file_state; 239 240 // Only load if the index is not stale. 241 if (IsIndexFileStale(cache_last_modified, index_file_path)) { 242 index_file_state = INDEX_STATE_STALE; 243 } else { 244 index_file_state = INDEX_STATE_FRESH; 245 base::Time latest_dir_mtime; 246 if (simple_util::GetMTime(cache_directory, &latest_dir_mtime) && 247 IsIndexFileStale(latest_dir_mtime, index_file_path)) { 248 // A file operation has updated the directory since we last looked at it 249 // during backend initialization. 250 index_file_state = INDEX_STATE_FRESH_CONCURRENT_UPDATES; 251 } 252 253 const base::TimeTicks start = base::TimeTicks::Now(); 254 SyncLoadFromDisk(index_file_path, out_result); 255 UMA_HISTOGRAM_TIMES("SimpleCache.IndexLoadTime", 256 base::TimeTicks::Now() - start); 257 UMA_HISTOGRAM_COUNTS("SimpleCache.IndexEntriesLoaded", 258 out_result->did_load ? entries.size() : 0); 259 if (!out_result->did_load) 260 index_file_state = INDEX_STATE_CORRUPT; 261 } 262 UMA_HISTOGRAM_ENUMERATION("SimpleCache.IndexFileStateOnLoad", 263 index_file_state, 264 INDEX_STATE_MAX); 265 266 if (!out_result->did_load) { 267 const base::TimeTicks start = base::TimeTicks::Now(); 268 SyncRestoreFromDisk(cache_directory, index_file_path, out_result); 269 UMA_HISTOGRAM_MEDIUM_TIMES("SimpleCache.IndexRestoreTime", 270 base::TimeTicks::Now() - start); 271 UMA_HISTOGRAM_COUNTS("SimpleCache.IndexEntriesRestored", 272 entries.size()); 273 } 274 275 // Used in histograms. Please only add new values at the end. 276 enum { 277 INITIALIZE_METHOD_RECOVERED = 0, 278 INITIALIZE_METHOD_LOADED = 1, 279 INITIALIZE_METHOD_NEWCACHE = 2, 280 INITIALIZE_METHOD_MAX = 3, 281 }; 282 int initialize_method; 283 if (index_file_exists) { 284 if (out_result->flush_required) 285 initialize_method = INITIALIZE_METHOD_RECOVERED; 286 else 287 initialize_method = INITIALIZE_METHOD_LOADED; 288 } else { 289 UMA_HISTOGRAM_COUNTS("SimpleCache.IndexCreatedEntryCount", 290 entries.size()); 291 initialize_method = INITIALIZE_METHOD_NEWCACHE; 292 } 293 294 UMA_HISTOGRAM_ENUMERATION("SimpleCache.IndexInitializeMethod", 295 initialize_method, INITIALIZE_METHOD_MAX); 296 } 297 298 // static 299 void SimpleIndexFile::SyncLoadFromDisk(const base::FilePath& index_filename, 300 SimpleIndexLoadResult* out_result) { 301 out_result->Reset(); 302 303 base::MemoryMappedFile index_file_map; 304 if (!index_file_map.Initialize(index_filename)) { 305 LOG(WARNING) << "Could not map Simple Index file."; 306 base::DeleteFile(index_filename, false); 307 return; 308 } 309 310 SimpleIndexFile::Deserialize( 311 reinterpret_cast<const char*>(index_file_map.data()), 312 index_file_map.length(), out_result); 313 314 if (!out_result->did_load) 315 base::DeleteFile(index_filename, false); 316 } 317 318 // static 319 scoped_ptr<Pickle> SimpleIndexFile::Serialize( 320 const SimpleIndexFile::IndexMetadata& index_metadata, 321 const SimpleIndex::EntrySet& entries) { 322 scoped_ptr<Pickle> pickle(new Pickle(sizeof(SimpleIndexFile::PickleHeader))); 323 324 index_metadata.Serialize(pickle.get()); 325 for (SimpleIndex::EntrySet::const_iterator it = entries.begin(); 326 it != entries.end(); ++it) { 327 pickle->WriteUInt64(it->first); 328 it->second.Serialize(pickle.get()); 329 } 330 SimpleIndexFile::PickleHeader* header_p = 331 pickle->headerT<SimpleIndexFile::PickleHeader>(); 332 header_p->crc = CalculatePickleCRC(*pickle); 333 return pickle.Pass(); 334 } 335 336 // static 337 void SimpleIndexFile::Deserialize(const char* data, int data_len, 338 SimpleIndexLoadResult* out_result) { 339 DCHECK(data); 340 341 out_result->Reset(); 342 SimpleIndex::EntrySet* entries = &out_result->entries; 343 344 Pickle pickle(data, data_len); 345 if (!pickle.data()) { 346 LOG(WARNING) << "Corrupt Simple Index File."; 347 return; 348 } 349 350 PickleIterator pickle_it(pickle); 351 352 SimpleIndexFile::PickleHeader* header_p = 353 pickle.headerT<SimpleIndexFile::PickleHeader>(); 354 const uint32 crc_read = header_p->crc; 355 const uint32 crc_calculated = CalculatePickleCRC(pickle); 356 357 if (crc_read != crc_calculated) { 358 LOG(WARNING) << "Invalid CRC in Simple Index file."; 359 return; 360 } 361 362 SimpleIndexFile::IndexMetadata index_metadata; 363 if (!index_metadata.Deserialize(&pickle_it)) { 364 LOG(ERROR) << "Invalid index_metadata on Simple Cache Index."; 365 return; 366 } 367 368 if (!index_metadata.CheckIndexMetadata()) { 369 LOG(ERROR) << "Invalid index_metadata on Simple Cache Index."; 370 return; 371 } 372 373 #if !defined(OS_WIN) 374 // TODO(gavinp): Consider using std::unordered_map. 375 entries->resize(index_metadata.GetNumberOfEntries() + kExtraSizeForMerge); 376 #endif 377 while (entries->size() < index_metadata.GetNumberOfEntries()) { 378 uint64 hash_key; 379 EntryMetadata entry_metadata; 380 if (!pickle_it.ReadUInt64(&hash_key) || 381 !entry_metadata.Deserialize(&pickle_it)) { 382 LOG(WARNING) << "Invalid EntryMetadata in Simple Index file."; 383 entries->clear(); 384 return; 385 } 386 SimpleIndex::InsertInEntrySet(hash_key, entry_metadata, entries); 387 } 388 389 out_result->did_load = true; 390 } 391 392 // static 393 void SimpleIndexFile::SyncRestoreFromDisk( 394 const base::FilePath& cache_directory, 395 const base::FilePath& index_file_path, 396 SimpleIndexLoadResult* out_result) { 397 LOG(INFO) << "Simple Cache Index is being restored from disk."; 398 base::DeleteFile(index_file_path, /* recursive = */ false); 399 out_result->Reset(); 400 SimpleIndex::EntrySet* entries = &out_result->entries; 401 402 // TODO(felipeg,gavinp): Fix this once we have a one-file per entry format. 403 COMPILE_ASSERT(kSimpleEntryFileCount == 3, 404 file_pattern_must_match_file_count); 405 406 const bool did_succeed = TraverseCacheDirectory( 407 cache_directory, base::Bind(&ProcessEntryFile, entries)); 408 if (!did_succeed) { 409 LOG(ERROR) << "Could not reconstruct index from disk"; 410 return; 411 } 412 out_result->did_load = true; 413 // When we restore from disk we write the merged index file to disk right 414 // away, this might save us from having to restore again next time. 415 out_result->flush_required = true; 416 } 417 418 // static 419 bool SimpleIndexFile::IsIndexFileStale(base::Time cache_last_modified, 420 const base::FilePath& index_file_path) { 421 base::Time index_mtime; 422 if (!simple_util::GetMTime(index_file_path, &index_mtime)) 423 return true; 424 return index_mtime < cache_last_modified; 425 } 426 427 } // namespace disk_cache 428