Home | History | Annotate | Download | only in simple
      1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "net/disk_cache/simple/simple_index_file.h"
      6 
      7 #include <vector>
      8 
      9 #include "base/files/file_util.h"
     10 #include "base/files/memory_mapped_file.h"
     11 #include "base/hash.h"
     12 #include "base/logging.h"
     13 #include "base/pickle.h"
     14 #include "base/single_thread_task_runner.h"
     15 #include "base/task_runner_util.h"
     16 #include "base/threading/thread_restrictions.h"
     17 #include "net/disk_cache/simple/simple_backend_version.h"
     18 #include "net/disk_cache/simple/simple_entry_format.h"
     19 #include "net/disk_cache/simple/simple_histogram_macros.h"
     20 #include "net/disk_cache/simple/simple_index.h"
     21 #include "net/disk_cache/simple/simple_synchronous_entry.h"
     22 #include "net/disk_cache/simple/simple_util.h"
     23 #include "third_party/zlib/zlib.h"
     24 
     25 namespace disk_cache {
     26 namespace {
     27 
     28 const int kEntryFilesHashLength = 16;
     29 const int kEntryFilesSuffixLength = 2;
     30 
     31 const uint64 kMaxEntiresInIndex = 100000000;
     32 
     33 uint32 CalculatePickleCRC(const Pickle& pickle) {
     34   return crc32(crc32(0, Z_NULL, 0),
     35                reinterpret_cast<const Bytef*>(pickle.payload()),
     36                pickle.payload_size());
     37 }
     38 
     39 // Used in histograms. Please only add new values at the end.
     40 enum IndexFileState {
     41   INDEX_STATE_CORRUPT = 0,
     42   INDEX_STATE_STALE = 1,
     43   INDEX_STATE_FRESH = 2,
     44   INDEX_STATE_FRESH_CONCURRENT_UPDATES = 3,
     45   INDEX_STATE_MAX = 4,
     46 };
     47 
     48 void UmaRecordIndexFileState(IndexFileState state, net::CacheType cache_type) {
     49   SIMPLE_CACHE_UMA(ENUMERATION,
     50                    "IndexFileStateOnLoad", cache_type, state, INDEX_STATE_MAX);
     51 }
     52 
     53 // Used in histograms. Please only add new values at the end.
     54 enum IndexInitMethod {
     55   INITIALIZE_METHOD_RECOVERED = 0,
     56   INITIALIZE_METHOD_LOADED = 1,
     57   INITIALIZE_METHOD_NEWCACHE = 2,
     58   INITIALIZE_METHOD_MAX = 3,
     59 };
     60 
     61 void UmaRecordIndexInitMethod(IndexInitMethod method,
     62                               net::CacheType cache_type) {
     63   SIMPLE_CACHE_UMA(ENUMERATION,
     64                    "IndexInitializeMethod", cache_type,
     65                    method, INITIALIZE_METHOD_MAX);
     66 }
     67 
     68 bool WritePickleFile(Pickle* pickle, const base::FilePath& file_name) {
     69   int bytes_written = base::WriteFile(
     70       file_name, static_cast<const char*>(pickle->data()), pickle->size());
     71   if (bytes_written != implicit_cast<int>(pickle->size())) {
     72     base::DeleteFile(file_name, /* recursive = */ false);
     73     return false;
     74   }
     75   return true;
     76 }
     77 
     78 // Called for each cache directory traversal iteration.
     79 void ProcessEntryFile(SimpleIndex::EntrySet* entries,
     80                       const base::FilePath& file_path) {
     81   static const size_t kEntryFilesLength =
     82       kEntryFilesHashLength + kEntryFilesSuffixLength;
     83   // Converting to std::string is OK since we never use UTF8 wide chars in our
     84   // file names.
     85   const base::FilePath::StringType base_name = file_path.BaseName().value();
     86   const std::string file_name(base_name.begin(), base_name.end());
     87   if (file_name.size() != kEntryFilesLength)
     88     return;
     89   const base::StringPiece hash_string(
     90       file_name.begin(), file_name.begin() + kEntryFilesHashLength);
     91   uint64 hash_key = 0;
     92   if (!simple_util::GetEntryHashKeyFromHexString(hash_string, &hash_key)) {
     93     LOG(WARNING) << "Invalid entry hash key filename while restoring index from"
     94                  << " disk: " << file_name;
     95     return;
     96   }
     97 
     98   base::File::Info file_info;
     99   if (!base::GetFileInfo(file_path, &file_info)) {
    100     LOG(ERROR) << "Could not get file info for " << file_path.value();
    101     return;
    102   }
    103   base::Time last_used_time;
    104 #if defined(OS_POSIX)
    105   // For POSIX systems, a last access time is available. However, it's not
    106   // guaranteed to be more accurate than mtime. It is no worse though.
    107   last_used_time = file_info.last_accessed;
    108 #endif
    109   if (last_used_time.is_null())
    110     last_used_time = file_info.last_modified;
    111 
    112   int64 file_size = file_info.size;
    113   SimpleIndex::EntrySet::iterator it = entries->find(hash_key);
    114   if (it == entries->end()) {
    115     SimpleIndex::InsertInEntrySet(
    116         hash_key,
    117         EntryMetadata(last_used_time, file_size),
    118         entries);
    119   } else {
    120     // Summing up the total size of the entry through all the *_[0-1] files
    121     it->second.SetEntrySize(it->second.GetEntrySize() + file_size);
    122   }
    123 }
    124 
    125 }  // namespace
    126 
    127 SimpleIndexLoadResult::SimpleIndexLoadResult() : did_load(false),
    128                                                  flush_required(false) {
    129 }
    130 
    131 SimpleIndexLoadResult::~SimpleIndexLoadResult() {
    132 }
    133 
    134 void SimpleIndexLoadResult::Reset() {
    135   did_load = false;
    136   flush_required = false;
    137   entries.clear();
    138 }
    139 
    140 // static
    141 const char SimpleIndexFile::kIndexFileName[] = "the-real-index";
    142 // static
    143 const char SimpleIndexFile::kIndexDirectory[] = "index-dir";
    144 // static
    145 const char SimpleIndexFile::kTempIndexFileName[] = "temp-index";
    146 
    147 SimpleIndexFile::IndexMetadata::IndexMetadata()
    148     : magic_number_(kSimpleIndexMagicNumber),
    149       version_(kSimpleVersion),
    150       number_of_entries_(0),
    151       cache_size_(0) {}
    152 
    153 SimpleIndexFile::IndexMetadata::IndexMetadata(
    154     uint64 number_of_entries, uint64 cache_size)
    155     : magic_number_(kSimpleIndexMagicNumber),
    156       version_(kSimpleVersion),
    157       number_of_entries_(number_of_entries),
    158       cache_size_(cache_size) {}
    159 
    160 void SimpleIndexFile::IndexMetadata::Serialize(Pickle* pickle) const {
    161   DCHECK(pickle);
    162   pickle->WriteUInt64(magic_number_);
    163   pickle->WriteUInt32(version_);
    164   pickle->WriteUInt64(number_of_entries_);
    165   pickle->WriteUInt64(cache_size_);
    166 }
    167 
    168 // static
    169 bool SimpleIndexFile::SerializeFinalData(base::Time cache_modified,
    170                                          Pickle* pickle) {
    171   if (!pickle->WriteInt64(cache_modified.ToInternalValue()))
    172     return false;
    173   SimpleIndexFile::PickleHeader* header_p = pickle->headerT<PickleHeader>();
    174   header_p->crc = CalculatePickleCRC(*pickle);
    175   return true;
    176 }
    177 
    178 bool SimpleIndexFile::IndexMetadata::Deserialize(PickleIterator* it) {
    179   DCHECK(it);
    180   return it->ReadUInt64(&magic_number_) &&
    181       it->ReadUInt32(&version_) &&
    182       it->ReadUInt64(&number_of_entries_)&&
    183       it->ReadUInt64(&cache_size_);
    184 }
    185 
    186 void SimpleIndexFile::SyncWriteToDisk(net::CacheType cache_type,
    187                                       const base::FilePath& cache_directory,
    188                                       const base::FilePath& index_filename,
    189                                       const base::FilePath& temp_index_filename,
    190                                       scoped_ptr<Pickle> pickle,
    191                                       const base::TimeTicks& start_time,
    192                                       bool app_on_background) {
    193   // There is a chance that the index containing all the necessary data about
    194   // newly created entries will appear to be stale. This can happen if on-disk
    195   // part of a Create operation does not fit into the time budget for the index
    196   // flush delay. This simple approach will be reconsidered if it does not allow
    197   // for maintaining freshness.
    198   base::Time cache_dir_mtime;
    199   if (!simple_util::GetMTime(cache_directory, &cache_dir_mtime)) {
    200     LOG(ERROR) << "Could obtain information about cache age";
    201     return;
    202   }
    203   SerializeFinalData(cache_dir_mtime, pickle.get());
    204   if (!WritePickleFile(pickle.get(), temp_index_filename)) {
    205     if (!base::CreateDirectory(temp_index_filename.DirName())) {
    206       LOG(ERROR) << "Could not create a directory to hold the index file";
    207       return;
    208     }
    209     if (!WritePickleFile(pickle.get(), temp_index_filename)) {
    210       LOG(ERROR) << "Failed to write the temporary index file";
    211       return;
    212     }
    213   }
    214 
    215   // Atomically rename the temporary index file to become the real one.
    216   bool result = base::ReplaceFile(temp_index_filename, index_filename, NULL);
    217   DCHECK(result);
    218 
    219   if (app_on_background) {
    220     SIMPLE_CACHE_UMA(TIMES,
    221                      "IndexWriteToDiskTime.Background", cache_type,
    222                      (base::TimeTicks::Now() - start_time));
    223   } else {
    224     SIMPLE_CACHE_UMA(TIMES,
    225                      "IndexWriteToDiskTime.Foreground", cache_type,
    226                      (base::TimeTicks::Now() - start_time));
    227   }
    228 }
    229 
    230 bool SimpleIndexFile::IndexMetadata::CheckIndexMetadata() {
    231   return number_of_entries_ <= kMaxEntiresInIndex &&
    232       magic_number_ == kSimpleIndexMagicNumber &&
    233       version_ == kSimpleVersion;
    234 }
    235 
    236 SimpleIndexFile::SimpleIndexFile(
    237     const scoped_refptr<base::SingleThreadTaskRunner>& cache_thread,
    238     const scoped_refptr<base::TaskRunner>& worker_pool,
    239     net::CacheType cache_type,
    240     const base::FilePath& cache_directory)
    241     : cache_thread_(cache_thread),
    242       worker_pool_(worker_pool),
    243       cache_type_(cache_type),
    244       cache_directory_(cache_directory),
    245       index_file_(cache_directory_.AppendASCII(kIndexDirectory)
    246                       .AppendASCII(kIndexFileName)),
    247       temp_index_file_(cache_directory_.AppendASCII(kIndexDirectory)
    248                            .AppendASCII(kTempIndexFileName)) {
    249 }
    250 
    251 SimpleIndexFile::~SimpleIndexFile() {}
    252 
    253 void SimpleIndexFile::LoadIndexEntries(base::Time cache_last_modified,
    254                                        const base::Closure& callback,
    255                                        SimpleIndexLoadResult* out_result) {
    256   base::Closure task = base::Bind(&SimpleIndexFile::SyncLoadIndexEntries,
    257                                   cache_type_,
    258                                   cache_last_modified, cache_directory_,
    259                                   index_file_, out_result);
    260   worker_pool_->PostTaskAndReply(FROM_HERE, task, callback);
    261 }
    262 
    263 void SimpleIndexFile::WriteToDisk(const SimpleIndex::EntrySet& entry_set,
    264                                   uint64 cache_size,
    265                                   const base::TimeTicks& start,
    266                                   bool app_on_background) {
    267   IndexMetadata index_metadata(entry_set.size(), cache_size);
    268   scoped_ptr<Pickle> pickle = Serialize(index_metadata, entry_set);
    269   cache_thread_->PostTask(FROM_HERE,
    270                           base::Bind(&SimpleIndexFile::SyncWriteToDisk,
    271                                      cache_type_,
    272                                      cache_directory_,
    273                                      index_file_,
    274                                      temp_index_file_,
    275                                      base::Passed(&pickle),
    276                                      base::TimeTicks::Now(),
    277                                      app_on_background));
    278 }
    279 
    280 // static
    281 void SimpleIndexFile::SyncLoadIndexEntries(
    282     net::CacheType cache_type,
    283     base::Time cache_last_modified,
    284     const base::FilePath& cache_directory,
    285     const base::FilePath& index_file_path,
    286     SimpleIndexLoadResult* out_result) {
    287   // Load the index and find its age.
    288   base::Time last_cache_seen_by_index;
    289   SyncLoadFromDisk(index_file_path, &last_cache_seen_by_index, out_result);
    290 
    291   // Consider the index loaded if it is fresh.
    292   const bool index_file_existed = base::PathExists(index_file_path);
    293   if (!out_result->did_load) {
    294     if (index_file_existed)
    295       UmaRecordIndexFileState(INDEX_STATE_CORRUPT, cache_type);
    296   } else {
    297     if (cache_last_modified <= last_cache_seen_by_index) {
    298       base::Time latest_dir_mtime;
    299       simple_util::GetMTime(cache_directory, &latest_dir_mtime);
    300       if (LegacyIsIndexFileStale(latest_dir_mtime, index_file_path)) {
    301         UmaRecordIndexFileState(INDEX_STATE_FRESH_CONCURRENT_UPDATES,
    302                                 cache_type);
    303       } else {
    304         UmaRecordIndexFileState(INDEX_STATE_FRESH, cache_type);
    305       }
    306       UmaRecordIndexInitMethod(INITIALIZE_METHOD_LOADED, cache_type);
    307       return;
    308     }
    309     UmaRecordIndexFileState(INDEX_STATE_STALE, cache_type);
    310   }
    311 
    312   // Reconstruct the index by scanning the disk for entries.
    313   const base::TimeTicks start = base::TimeTicks::Now();
    314   SyncRestoreFromDisk(cache_directory, index_file_path, out_result);
    315   SIMPLE_CACHE_UMA(MEDIUM_TIMES, "IndexRestoreTime", cache_type,
    316                    base::TimeTicks::Now() - start);
    317   SIMPLE_CACHE_UMA(COUNTS, "IndexEntriesRestored", cache_type,
    318                    out_result->entries.size());
    319   if (index_file_existed) {
    320     UmaRecordIndexInitMethod(INITIALIZE_METHOD_RECOVERED, cache_type);
    321   } else {
    322     UmaRecordIndexInitMethod(INITIALIZE_METHOD_NEWCACHE, cache_type);
    323     SIMPLE_CACHE_UMA(COUNTS,
    324                      "IndexCreatedEntryCount", cache_type,
    325                      out_result->entries.size());
    326   }
    327 }
    328 
    329 // static
    330 void SimpleIndexFile::SyncLoadFromDisk(const base::FilePath& index_filename,
    331                                        base::Time* out_last_cache_seen_by_index,
    332                                        SimpleIndexLoadResult* out_result) {
    333   out_result->Reset();
    334 
    335   base::MemoryMappedFile index_file_map;
    336   if (!index_file_map.Initialize(index_filename)) {
    337     LOG(WARNING) << "Could not map Simple Index file.";
    338     base::DeleteFile(index_filename, false);
    339     return;
    340   }
    341 
    342   SimpleIndexFile::Deserialize(
    343       reinterpret_cast<const char*>(index_file_map.data()),
    344       index_file_map.length(),
    345       out_last_cache_seen_by_index,
    346       out_result);
    347 
    348   if (!out_result->did_load)
    349     base::DeleteFile(index_filename, false);
    350 }
    351 
    352 // static
    353 scoped_ptr<Pickle> SimpleIndexFile::Serialize(
    354     const SimpleIndexFile::IndexMetadata& index_metadata,
    355     const SimpleIndex::EntrySet& entries) {
    356   scoped_ptr<Pickle> pickle(new Pickle(sizeof(SimpleIndexFile::PickleHeader)));
    357 
    358   index_metadata.Serialize(pickle.get());
    359   for (SimpleIndex::EntrySet::const_iterator it = entries.begin();
    360        it != entries.end(); ++it) {
    361     pickle->WriteUInt64(it->first);
    362     it->second.Serialize(pickle.get());
    363   }
    364   return pickle.Pass();
    365 }
    366 
    367 // static
    368 void SimpleIndexFile::Deserialize(const char* data, int data_len,
    369                                   base::Time* out_cache_last_modified,
    370                                   SimpleIndexLoadResult* out_result) {
    371   DCHECK(data);
    372 
    373   out_result->Reset();
    374   SimpleIndex::EntrySet* entries = &out_result->entries;
    375 
    376   Pickle pickle(data, data_len);
    377   if (!pickle.data()) {
    378     LOG(WARNING) << "Corrupt Simple Index File.";
    379     return;
    380   }
    381 
    382   PickleIterator pickle_it(pickle);
    383   SimpleIndexFile::PickleHeader* header_p =
    384       pickle.headerT<SimpleIndexFile::PickleHeader>();
    385   const uint32 crc_read = header_p->crc;
    386   const uint32 crc_calculated = CalculatePickleCRC(pickle);
    387 
    388   if (crc_read != crc_calculated) {
    389     LOG(WARNING) << "Invalid CRC in Simple Index file.";
    390     return;
    391   }
    392 
    393   SimpleIndexFile::IndexMetadata index_metadata;
    394   if (!index_metadata.Deserialize(&pickle_it)) {
    395     LOG(ERROR) << "Invalid index_metadata on Simple Cache Index.";
    396     return;
    397   }
    398 
    399   if (!index_metadata.CheckIndexMetadata()) {
    400     LOG(ERROR) << "Invalid index_metadata on Simple Cache Index.";
    401     return;
    402   }
    403 
    404 #if !defined(OS_WIN)
    405   // TODO(gavinp): Consider using std::unordered_map.
    406   entries->resize(index_metadata.GetNumberOfEntries() + kExtraSizeForMerge);
    407 #endif
    408   while (entries->size() < index_metadata.GetNumberOfEntries()) {
    409     uint64 hash_key;
    410     EntryMetadata entry_metadata;
    411     if (!pickle_it.ReadUInt64(&hash_key) ||
    412         !entry_metadata.Deserialize(&pickle_it)) {
    413       LOG(WARNING) << "Invalid EntryMetadata in Simple Index file.";
    414       entries->clear();
    415       return;
    416     }
    417     SimpleIndex::InsertInEntrySet(hash_key, entry_metadata, entries);
    418   }
    419 
    420   int64 cache_last_modified;
    421   if (!pickle_it.ReadInt64(&cache_last_modified)) {
    422     entries->clear();
    423     return;
    424   }
    425   DCHECK(out_cache_last_modified);
    426   *out_cache_last_modified = base::Time::FromInternalValue(cache_last_modified);
    427 
    428   out_result->did_load = true;
    429 }
    430 
    431 // static
    432 void SimpleIndexFile::SyncRestoreFromDisk(
    433     const base::FilePath& cache_directory,
    434     const base::FilePath& index_file_path,
    435     SimpleIndexLoadResult* out_result) {
    436   VLOG(1) << "Simple Cache Index is being restored from disk.";
    437   base::DeleteFile(index_file_path, /* recursive = */ false);
    438   out_result->Reset();
    439   SimpleIndex::EntrySet* entries = &out_result->entries;
    440 
    441   const bool did_succeed = TraverseCacheDirectory(
    442       cache_directory, base::Bind(&ProcessEntryFile, entries));
    443   if (!did_succeed) {
    444     LOG(ERROR) << "Could not reconstruct index from disk";
    445     return;
    446   }
    447   out_result->did_load = true;
    448   // When we restore from disk we write the merged index file to disk right
    449   // away, this might save us from having to restore again next time.
    450   out_result->flush_required = true;
    451 }
    452 
    453 // static
    454 bool SimpleIndexFile::LegacyIsIndexFileStale(
    455     base::Time cache_last_modified,
    456     const base::FilePath& index_file_path) {
    457   base::Time index_mtime;
    458   if (!simple_util::GetMTime(index_file_path, &index_mtime))
    459     return true;
    460   return index_mtime < cache_last_modified;
    461 }
    462 
    463 }  // namespace disk_cache
    464