Home | History | Annotate | Download | only in simple
      1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "net/disk_cache/simple/simple_index_file.h"
      6 
      7 #include <vector>
      8 
      9 #include "base/file_util.h"
     10 #include "base/files/memory_mapped_file.h"
     11 #include "base/hash.h"
     12 #include "base/logging.h"
     13 #include "base/metrics/histogram.h"
     14 #include "base/pickle.h"
     15 #include "base/single_thread_task_runner.h"
     16 #include "base/task_runner_util.h"
     17 #include "base/threading/thread_restrictions.h"
     18 #include "net/disk_cache/simple/simple_entry_format.h"
     19 #include "net/disk_cache/simple/simple_index.h"
     20 #include "net/disk_cache/simple/simple_synchronous_entry.h"
     21 #include "net/disk_cache/simple/simple_util.h"
     22 #include "third_party/zlib/zlib.h"
     23 
     24 namespace disk_cache {
     25 namespace {
     26 
     27 const int kEntryFilesHashLength = 16;
     28 const int kEntryFilesSuffixLength = 2;
     29 
     30 const uint64 kMaxEntiresInIndex = 100000000;
     31 
     32 const char kIndexFileName[] = "the-real-index";
     33 const char kTempIndexFileName[] = "temp-index";
     34 
     35 uint32 CalculatePickleCRC(const Pickle& pickle) {
     36   return crc32(crc32(0, Z_NULL, 0),
     37                reinterpret_cast<const Bytef*>(pickle.payload()),
     38                pickle.payload_size());
     39 }
     40 
     41 void DoomEntrySetReply(const net::CompletionCallback& reply_callback,
     42                        int result) {
     43   reply_callback.Run(result);
     44 }
     45 
     46 void WriteToDiskInternal(const base::FilePath& index_filename,
     47                          const base::FilePath& temp_index_filename,
     48                          scoped_ptr<Pickle> pickle,
     49                          const base::TimeTicks& start_time,
     50                          bool app_on_background) {
     51   int bytes_written = file_util::WriteFile(
     52       temp_index_filename,
     53       reinterpret_cast<const char*>(pickle->data()),
     54       pickle->size());
     55   DCHECK_EQ(bytes_written, implicit_cast<int>(pickle->size()));
     56   if (bytes_written != static_cast<int>(pickle->size())) {
     57     // TODO(felipeg): Add better error handling.
     58     LOG(ERROR) << "Could not write Simple Cache index to temporary file: "
     59                << temp_index_filename.value();
     60     base::DeleteFile(temp_index_filename, /* recursive = */ false);
     61   } else {
     62     // Swap temp and index_file.
     63     bool result = base::ReplaceFile(temp_index_filename, index_filename, NULL);
     64     DCHECK(result);
     65   }
     66   if (app_on_background) {
     67     UMA_HISTOGRAM_TIMES("SimpleCache.IndexWriteToDiskTime.Background",
     68                         (base::TimeTicks::Now() - start_time));
     69   } else {
     70     UMA_HISTOGRAM_TIMES("SimpleCache.IndexWriteToDiskTime.Foreground",
     71                         (base::TimeTicks::Now() - start_time));
     72   }
     73 }
     74 
     75 // Called for each cache directory traversal iteration.
     76 void ProcessEntryFile(SimpleIndex::EntrySet* entries,
     77                       const base::FilePath& file_path) {
     78   static const size_t kEntryFilesLength =
     79       kEntryFilesHashLength + kEntryFilesSuffixLength;
     80   // Converting to std::string is OK since we never use UTF8 wide chars in our
     81   // file names.
     82   const base::FilePath::StringType base_name = file_path.BaseName().value();
     83   const std::string file_name(base_name.begin(), base_name.end());
     84   if (file_name.size() != kEntryFilesLength)
     85     return;
     86   const base::StringPiece hash_string(
     87       file_name.begin(), file_name.begin() + kEntryFilesHashLength);
     88   uint64 hash_key = 0;
     89   if (!simple_util::GetEntryHashKeyFromHexString(hash_string, &hash_key)) {
     90     LOG(WARNING) << "Invalid entry hash key filename while restoring index from"
     91                  << " disk: " << file_name;
     92     return;
     93   }
     94 
     95   base::PlatformFileInfo file_info;
     96   if (!file_util::GetFileInfo(file_path, &file_info)) {
     97     LOG(ERROR) << "Could not get file info for " << file_path.value();
     98     return;
     99   }
    100   base::Time last_used_time;
    101 #if defined(OS_POSIX)
    102   // For POSIX systems, a last access time is available. However, it's not
    103   // guaranteed to be more accurate than mtime. It is no worse though.
    104   last_used_time = file_info.last_accessed;
    105 #endif
    106   if (last_used_time.is_null())
    107     last_used_time = file_info.last_modified;
    108 
    109   int64 file_size = file_info.size;
    110   SimpleIndex::EntrySet::iterator it = entries->find(hash_key);
    111   if (it == entries->end()) {
    112     SimpleIndex::InsertInEntrySet(
    113         hash_key,
    114         EntryMetadata(last_used_time, file_size),
    115         entries);
    116   } else {
    117     // Summing up the total size of the entry through all the *_[0-2] files
    118     it->second.SetEntrySize(it->second.GetEntrySize() + file_size);
    119   }
    120 }
    121 
    122 }  // namespace
    123 
    124 SimpleIndexLoadResult::SimpleIndexLoadResult() : did_load(false),
    125                                                  flush_required(false) {
    126 }
    127 
    128 SimpleIndexLoadResult::~SimpleIndexLoadResult() {
    129 }
    130 
    131 void SimpleIndexLoadResult::Reset() {
    132   did_load = false;
    133   flush_required = false;
    134   entries.clear();
    135 }
    136 
    137 SimpleIndexFile::IndexMetadata::IndexMetadata() :
    138     magic_number_(kSimpleIndexMagicNumber),
    139     version_(kSimpleVersion),
    140     number_of_entries_(0),
    141     cache_size_(0) {}
    142 
    143 SimpleIndexFile::IndexMetadata::IndexMetadata(
    144     uint64 number_of_entries, uint64 cache_size) :
    145     magic_number_(kSimpleIndexMagicNumber),
    146     version_(kSimpleVersion),
    147     number_of_entries_(number_of_entries),
    148     cache_size_(cache_size) {}
    149 
    150 void SimpleIndexFile::IndexMetadata::Serialize(Pickle* pickle) const {
    151   DCHECK(pickle);
    152   pickle->WriteUInt64(magic_number_);
    153   pickle->WriteUInt32(version_);
    154   pickle->WriteUInt64(number_of_entries_);
    155   pickle->WriteUInt64(cache_size_);
    156 }
    157 
    158 bool SimpleIndexFile::IndexMetadata::Deserialize(PickleIterator* it) {
    159   DCHECK(it);
    160   return it->ReadUInt64(&magic_number_) &&
    161       it->ReadUInt32(&version_) &&
    162       it->ReadUInt64(&number_of_entries_)&&
    163       it->ReadUInt64(&cache_size_);
    164 }
    165 
    166 bool SimpleIndexFile::IndexMetadata::CheckIndexMetadata() {
    167   return number_of_entries_ <= kMaxEntiresInIndex &&
    168       magic_number_ == disk_cache::kSimpleIndexMagicNumber &&
    169       version_ == disk_cache::kSimpleVersion;
    170 }
    171 
    172 SimpleIndexFile::SimpleIndexFile(
    173     base::SingleThreadTaskRunner* cache_thread,
    174     base::TaskRunner* worker_pool,
    175     const base::FilePath& cache_directory)
    176     : cache_thread_(cache_thread),
    177       worker_pool_(worker_pool),
    178       cache_directory_(cache_directory),
    179       index_file_(cache_directory_.AppendASCII(kIndexFileName)),
    180       temp_index_file_(cache_directory_.AppendASCII(kTempIndexFileName)) {
    181 }
    182 
    183 SimpleIndexFile::~SimpleIndexFile() {}
    184 
    185 void SimpleIndexFile::LoadIndexEntries(base::Time cache_last_modified,
    186                                        const base::Closure& callback,
    187                                        SimpleIndexLoadResult* out_result) {
    188   base::Closure task = base::Bind(&SimpleIndexFile::SyncLoadIndexEntries,
    189                                   cache_last_modified, cache_directory_,
    190                                   index_file_, out_result);
    191   worker_pool_->PostTaskAndReply(FROM_HERE, task, callback);
    192 }
    193 
    194 void SimpleIndexFile::WriteToDisk(const SimpleIndex::EntrySet& entry_set,
    195                                   uint64 cache_size,
    196                                   const base::TimeTicks& start,
    197                                   bool app_on_background) {
    198   IndexMetadata index_metadata(entry_set.size(), cache_size);
    199   scoped_ptr<Pickle> pickle = Serialize(index_metadata, entry_set);
    200   cache_thread_->PostTask(FROM_HERE, base::Bind(
    201       &WriteToDiskInternal,
    202       index_file_,
    203       temp_index_file_,
    204       base::Passed(&pickle),
    205       base::TimeTicks::Now(),
    206       app_on_background));
    207 }
    208 
    209 void SimpleIndexFile::DoomEntrySet(
    210     scoped_ptr<std::vector<uint64> > entry_hashes,
    211     const net::CompletionCallback& reply_callback) {
    212   PostTaskAndReplyWithResult(
    213       worker_pool_,
    214       FROM_HERE,
    215       base::Bind(&SimpleSynchronousEntry::DoomEntrySet,
    216                  base::Passed(entry_hashes.Pass()), cache_directory_),
    217       base::Bind(&DoomEntrySetReply, reply_callback));
    218 }
    219 
    220 // static
    221 void SimpleIndexFile::SyncLoadIndexEntries(
    222     base::Time cache_last_modified,
    223     const base::FilePath& cache_directory,
    224     const base::FilePath& index_file_path,
    225     SimpleIndexLoadResult* out_result) {
    226   // TODO(felipeg): probably could load a stale index and use it for something.
    227   const SimpleIndex::EntrySet& entries = out_result->entries;
    228 
    229   const bool index_file_exists = base::PathExists(index_file_path);
    230 
    231   // Used in histograms. Please only add new values at the end.
    232   enum {
    233     INDEX_STATE_CORRUPT = 0,
    234     INDEX_STATE_STALE = 1,
    235     INDEX_STATE_FRESH = 2,
    236     INDEX_STATE_FRESH_CONCURRENT_UPDATES = 3,
    237     INDEX_STATE_MAX = 4,
    238   } index_file_state;
    239 
    240   // Only load if the index is not stale.
    241   if (IsIndexFileStale(cache_last_modified, index_file_path)) {
    242     index_file_state = INDEX_STATE_STALE;
    243   } else {
    244     index_file_state = INDEX_STATE_FRESH;
    245     base::Time latest_dir_mtime;
    246     if (simple_util::GetMTime(cache_directory, &latest_dir_mtime) &&
    247         IsIndexFileStale(latest_dir_mtime, index_file_path)) {
    248       // A file operation has updated the directory since we last looked at it
    249       // during backend initialization.
    250       index_file_state = INDEX_STATE_FRESH_CONCURRENT_UPDATES;
    251     }
    252 
    253     const base::TimeTicks start = base::TimeTicks::Now();
    254     SyncLoadFromDisk(index_file_path, out_result);
    255     UMA_HISTOGRAM_TIMES("SimpleCache.IndexLoadTime",
    256                         base::TimeTicks::Now() - start);
    257     UMA_HISTOGRAM_COUNTS("SimpleCache.IndexEntriesLoaded",
    258                          out_result->did_load ? entries.size() : 0);
    259     if (!out_result->did_load)
    260       index_file_state = INDEX_STATE_CORRUPT;
    261   }
    262   UMA_HISTOGRAM_ENUMERATION("SimpleCache.IndexFileStateOnLoad",
    263                             index_file_state,
    264                             INDEX_STATE_MAX);
    265 
    266   if (!out_result->did_load) {
    267     const base::TimeTicks start = base::TimeTicks::Now();
    268     SyncRestoreFromDisk(cache_directory, index_file_path, out_result);
    269     UMA_HISTOGRAM_MEDIUM_TIMES("SimpleCache.IndexRestoreTime",
    270                         base::TimeTicks::Now() - start);
    271     UMA_HISTOGRAM_COUNTS("SimpleCache.IndexEntriesRestored",
    272                          entries.size());
    273   }
    274 
    275   // Used in histograms. Please only add new values at the end.
    276   enum {
    277     INITIALIZE_METHOD_RECOVERED = 0,
    278     INITIALIZE_METHOD_LOADED = 1,
    279     INITIALIZE_METHOD_NEWCACHE = 2,
    280     INITIALIZE_METHOD_MAX = 3,
    281   };
    282   int initialize_method;
    283   if (index_file_exists) {
    284     if (out_result->flush_required)
    285       initialize_method = INITIALIZE_METHOD_RECOVERED;
    286     else
    287       initialize_method = INITIALIZE_METHOD_LOADED;
    288   } else {
    289     UMA_HISTOGRAM_COUNTS("SimpleCache.IndexCreatedEntryCount",
    290                          entries.size());
    291     initialize_method = INITIALIZE_METHOD_NEWCACHE;
    292   }
    293 
    294   UMA_HISTOGRAM_ENUMERATION("SimpleCache.IndexInitializeMethod",
    295                             initialize_method, INITIALIZE_METHOD_MAX);
    296 }
    297 
    298 // static
    299 void SimpleIndexFile::SyncLoadFromDisk(const base::FilePath& index_filename,
    300                                        SimpleIndexLoadResult* out_result) {
    301   out_result->Reset();
    302 
    303   base::MemoryMappedFile index_file_map;
    304   if (!index_file_map.Initialize(index_filename)) {
    305     LOG(WARNING) << "Could not map Simple Index file.";
    306     base::DeleteFile(index_filename, false);
    307     return;
    308   }
    309 
    310   SimpleIndexFile::Deserialize(
    311       reinterpret_cast<const char*>(index_file_map.data()),
    312       index_file_map.length(), out_result);
    313 
    314   if (!out_result->did_load)
    315     base::DeleteFile(index_filename, false);
    316 }
    317 
    318 // static
    319 scoped_ptr<Pickle> SimpleIndexFile::Serialize(
    320     const SimpleIndexFile::IndexMetadata& index_metadata,
    321     const SimpleIndex::EntrySet& entries) {
    322   scoped_ptr<Pickle> pickle(new Pickle(sizeof(SimpleIndexFile::PickleHeader)));
    323 
    324   index_metadata.Serialize(pickle.get());
    325   for (SimpleIndex::EntrySet::const_iterator it = entries.begin();
    326        it != entries.end(); ++it) {
    327     pickle->WriteUInt64(it->first);
    328     it->second.Serialize(pickle.get());
    329   }
    330   SimpleIndexFile::PickleHeader* header_p =
    331       pickle->headerT<SimpleIndexFile::PickleHeader>();
    332   header_p->crc = CalculatePickleCRC(*pickle);
    333   return pickle.Pass();
    334 }
    335 
    336 // static
    337 void SimpleIndexFile::Deserialize(const char* data, int data_len,
    338                                   SimpleIndexLoadResult* out_result) {
    339   DCHECK(data);
    340 
    341   out_result->Reset();
    342   SimpleIndex::EntrySet* entries = &out_result->entries;
    343 
    344   Pickle pickle(data, data_len);
    345   if (!pickle.data()) {
    346     LOG(WARNING) << "Corrupt Simple Index File.";
    347     return;
    348   }
    349 
    350   PickleIterator pickle_it(pickle);
    351 
    352   SimpleIndexFile::PickleHeader* header_p =
    353       pickle.headerT<SimpleIndexFile::PickleHeader>();
    354   const uint32 crc_read = header_p->crc;
    355   const uint32 crc_calculated = CalculatePickleCRC(pickle);
    356 
    357   if (crc_read != crc_calculated) {
    358     LOG(WARNING) << "Invalid CRC in Simple Index file.";
    359     return;
    360   }
    361 
    362   SimpleIndexFile::IndexMetadata index_metadata;
    363   if (!index_metadata.Deserialize(&pickle_it)) {
    364     LOG(ERROR) << "Invalid index_metadata on Simple Cache Index.";
    365     return;
    366   }
    367 
    368   if (!index_metadata.CheckIndexMetadata()) {
    369     LOG(ERROR) << "Invalid index_metadata on Simple Cache Index.";
    370     return;
    371   }
    372 
    373 #if !defined(OS_WIN)
    374   // TODO(gavinp): Consider using std::unordered_map.
    375   entries->resize(index_metadata.GetNumberOfEntries() + kExtraSizeForMerge);
    376 #endif
    377   while (entries->size() < index_metadata.GetNumberOfEntries()) {
    378     uint64 hash_key;
    379     EntryMetadata entry_metadata;
    380     if (!pickle_it.ReadUInt64(&hash_key) ||
    381         !entry_metadata.Deserialize(&pickle_it)) {
    382       LOG(WARNING) << "Invalid EntryMetadata in Simple Index file.";
    383       entries->clear();
    384       return;
    385     }
    386     SimpleIndex::InsertInEntrySet(hash_key, entry_metadata, entries);
    387   }
    388 
    389   out_result->did_load = true;
    390 }
    391 
    392 // static
    393 void SimpleIndexFile::SyncRestoreFromDisk(
    394     const base::FilePath& cache_directory,
    395     const base::FilePath& index_file_path,
    396     SimpleIndexLoadResult* out_result) {
    397   LOG(INFO) << "Simple Cache Index is being restored from disk.";
    398   base::DeleteFile(index_file_path, /* recursive = */ false);
    399   out_result->Reset();
    400   SimpleIndex::EntrySet* entries = &out_result->entries;
    401 
    402   // TODO(felipeg,gavinp): Fix this once we have a one-file per entry format.
    403   COMPILE_ASSERT(kSimpleEntryFileCount == 3,
    404                  file_pattern_must_match_file_count);
    405 
    406   const bool did_succeed = TraverseCacheDirectory(
    407       cache_directory, base::Bind(&ProcessEntryFile, entries));
    408   if (!did_succeed) {
    409     LOG(ERROR) << "Could not reconstruct index from disk";
    410     return;
    411   }
    412   out_result->did_load = true;
    413   // When we restore from disk we write the merged index file to disk right
    414   // away, this might save us from having to restore again next time.
    415   out_result->flush_required = true;
    416 }
    417 
    418 // static
    419 bool SimpleIndexFile::IsIndexFileStale(base::Time cache_last_modified,
    420                                        const base::FilePath& index_file_path) {
    421   base::Time index_mtime;
    422   if (!simple_util::GetMTime(index_file_path, &index_mtime))
    423     return true;
    424   return index_mtime < cache_last_modified;
    425 }
    426 
    427 }  // namespace disk_cache
    428