Home | History | Annotate | Download | only in blockfile
      1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // The cache is stored on disk as a collection of block-files, plus an index
      6 // file plus a collection of external files.
      7 //
      8 // Any data blob bigger than kMaxBlockSize (disk_cache/addr.h) will be stored in
      9 // a separate file named f_xxx where x is a hexadecimal number. Shorter data
     10 // will be stored as a series of blocks on a block-file. In any case, CacheAddr
     11 // represents the address of the data inside the cache.
     12 //
     13 // The index file is just a simple hash table that maps a particular entry to
     14 // a CacheAddr value. Linking for a given hash bucket is handled internally
     15 // by the cache entry.
     16 //
     17 // The last element of the cache is the block-file. A block file is a file
     18 // designed to store blocks of data of a given size. For more details see
     19 // disk_cache/disk_format_base.h
     20 //
     21 // A new cache is initialized with four block files (named data_0 through
     22 // data_3), each one dedicated to store blocks of a given size. The number at
     23 // the end of the file name is the block file number (in decimal).
     24 //
     25 // There are two "special" types of blocks: an entry and a rankings node. An
     26 // entry keeps track of all the information related to the same cache entry,
     27 // such as the key, hash value, data pointers etc. A rankings node keeps track
     28 // of the information that is updated frequently for a given entry, such as its
     29 // location on the LRU lists, last access time etc.
     30 //
     31 // The files that store internal information for the cache (blocks and index)
     32 // are at least partially memory mapped. They have a location that is signaled
     33 // every time the internal structures are modified, so it is possible to detect
     34 // (most of the time) when the process dies in the middle of an update.
     35 //
     36 // In order to prevent dirty data to be used as valid (after a crash), every
     37 // cache entry has a dirty identifier. Each running instance of the cache keeps
     38 // a separate identifier (maintained on the "this_id" header field) that is used
     39 // to mark every entry that is created or modified. When the entry is closed,
     40 // and all the data can be trusted, the dirty flag is cleared from the entry.
     41 // When the cache encounters an entry whose identifier is different than the one
     42 // being currently used, it means that the entry was not properly closed on a
     43 // previous run, so it is discarded.
     44 
     45 #ifndef NET_DISK_CACHE_BLOCKFILE_DISK_FORMAT_H_
     46 #define NET_DISK_CACHE_BLOCKFILE_DISK_FORMAT_H_
     47 
     48 #include "base/basictypes.h"
     49 #include "net/base/net_export.h"
     50 #include "net/disk_cache/blockfile/disk_format_base.h"
     51 
     52 namespace disk_cache {
     53 
     54 const int kIndexTablesize = 0x10000;
     55 const uint32 kIndexMagic = 0xC103CAC3;
     56 const uint32 kCurrentVersion = 0x20000;  // Version 2.0.
     57 
     58 struct LruData {
     59   int32     pad1[2];
     60   int32     filled;          // Flag to tell when we filled the cache.
     61   int32     sizes[5];
     62   CacheAddr heads[5];
     63   CacheAddr tails[5];
     64   CacheAddr transaction;     // In-flight operation target.
     65   int32     operation;       // Actual in-flight operation.
     66   int32     operation_list;  // In-flight operation list.
     67   int32     pad2[7];
     68 };
     69 
     70 // Header for the master index file.
     71 struct NET_EXPORT_PRIVATE IndexHeader {
     72   IndexHeader();
     73 
     74   uint32      magic;
     75   uint32      version;
     76   int32       num_entries;   // Number of entries currently stored.
     77   int32       num_bytes;     // Total size of the stored data.
     78   int32       last_file;     // Last external file created.
     79   int32       this_id;       // Id for all entries being changed (dirty flag).
     80   CacheAddr   stats;         // Storage for usage data.
     81   int32       table_len;     // Actual size of the table (0 == kIndexTablesize).
     82   int32       crash;         // Signals a previous crash.
     83   int32       experiment;    // Id of an ongoing test.
     84   uint64      create_time;   // Creation time for this set of files.
     85   int32       pad[52];
     86   LruData     lru;           // Eviction control data.
     87 };
     88 
     89 // The structure of the whole index file.
     90 struct Index {
     91   IndexHeader header;
     92   CacheAddr   table[kIndexTablesize];  // Default size. Actual size controlled
     93                                        // by header.table_len.
     94 };
     95 
     96 // Main structure for an entry on the backing storage. If the key is longer than
     97 // what can be stored on this structure, it will be extended on consecutive
     98 // blocks (adding 256 bytes each time), up to 4 blocks (1024 - 32 - 1 chars).
     99 // After that point, the whole key will be stored as a data block or external
    100 // file.
    101 struct EntryStore {
    102   uint32      hash;               // Full hash of the key.
    103   CacheAddr   next;               // Next entry with the same hash or bucket.
    104   CacheAddr   rankings_node;      // Rankings node for this entry.
    105   int32       reuse_count;        // How often is this entry used.
    106   int32       refetch_count;      // How often is this fetched from the net.
    107   int32       state;              // Current state.
    108   uint64      creation_time;
    109   int32       key_len;
    110   CacheAddr   long_key;           // Optional address of a long key.
    111   int32       data_size[4];       // We can store up to 4 data streams for each
    112   CacheAddr   data_addr[4];       // entry.
    113   uint32      flags;              // Any combination of EntryFlags.
    114   int32       pad[4];
    115   uint32      self_hash;          // The hash of EntryStore up to this point.
    116   char        key[256 - 24 * 4];  // null terminated
    117 };
    118 
    119 COMPILE_ASSERT(sizeof(EntryStore) == 256, bad_EntyStore);
    120 const int kMaxInternalKeyLength = 4 * sizeof(EntryStore) -
    121                                   offsetof(EntryStore, key) - 1;
    122 
    123 // Possible states for a given entry.
    124 enum EntryState {
    125   ENTRY_NORMAL = 0,
    126   ENTRY_EVICTED,    // The entry was recently evicted from the cache.
    127   ENTRY_DOOMED      // The entry was doomed.
    128 };
    129 
    130 // Flags that can be applied to an entry.
    131 enum EntryFlags {
    132   PARENT_ENTRY = 1,         // This entry has children (sparse) entries.
    133   CHILD_ENTRY = 1 << 1      // Child entry that stores sparse data.
    134 };
    135 
    136 #pragma pack(push, 4)
    137 // Rankings information for a given entry.
    138 struct RankingsNode {
    139   uint64      last_used;        // LRU info.
    140   uint64      last_modified;    // LRU info.
    141   CacheAddr   next;             // LRU list.
    142   CacheAddr   prev;             // LRU list.
    143   CacheAddr   contents;         // Address of the EntryStore.
    144   int32       dirty;            // The entry is being modifyied.
    145   uint32      self_hash;        // RankingsNode's hash.
    146 };
    147 #pragma pack(pop)
    148 
    149 COMPILE_ASSERT(sizeof(RankingsNode) == 36, bad_RankingsNode);
    150 
    151 }  // namespace disk_cache
    152 
    153 #endif  // NET_DISK_CACHE_BLOCKFILE_DISK_FORMAT_H_
    154