Home | History | Annotate | Download | only in disk_cache
      1 // Copyright (c) 2006-2009 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // The cache is stored on disk as a collection of block-files, plus an index
      6 // file plus a collection of external files.
      7 //
      8 // Any data blob bigger than kMaxBlockSize (net/addr.h) will be stored on a
      9 // separate file named f_xxx where x is a hexadecimal number. Shorter data will
     10 // be stored as a series of blocks on a block-file. In any case, CacheAddr
     11 // represents the address of the data inside the cache.
     12 //
     13 // The index file is just a simple hash table that maps a particular entry to
     14 // a CacheAddr value. Linking for a given hash bucket is handled internally
     15 // by the cache entry.
     16 //
     17 // The last element of the cache is the block-file. A block file is a file
     18 // designed to store blocks of data of a given size. It is able to store data
     19 // that spans from one to four consecutive "blocks", and it grows as needed to
     20 // store up to approximately 65000 blocks. It has a fixed size header used for
     21 // book keeping such as tracking free of blocks on the file. For example, a
     22 // block-file for 1KB blocks will grow from 8KB when totally empty to about 64MB
     23 // when completely full. At that point, data blocks of 1KB will be stored on a
     24 // second block file that will store the next set of 65000 blocks. The first
     25 // file contains the number of the second file, and the second file contains the
     26 // number of a third file, created when the second file reaches its limit. It is
     27 // important to remember that no matter how long the chain of files is, any
     28 // given block can be located directly by its address, which contains the file
     29 // number and starting block inside the file.
     30 //
     31 // A new cache is initialized with four block files (named data_0 through
     32 // data_3), each one dedicated to store blocks of a given size. The number at
     33 // the end of the file name is the block file number (in decimal).
     34 //
     35 // There are two "special" types of blocks: an entry and a rankings node. An
     36 // entry keeps track of all the information related to the same cache entry,
     37 // such as the key, hash value, data pointers etc. A rankings node keeps track
     38 // of the information that is updated frequently for a given entry, such as its
     39 // location on the LRU lists, last access time etc.
     40 //
     41 // The files that store internal information for the cache (blocks and index)
     42 // are at least partially memory mapped. They have a location that is signaled
     43 // every time the internal structures are modified, so it is possible to detect
     44 // (most of the time) when the process dies in the middle of an update.
     45 //
     46 // In order to prevent dirty data to be used as valid (after a crash), every
     47 // cache entry has a dirty identifier. Each running instance of the cache keeps
     48 // a separate identifier (maintained on the "this_id" header field) that is used
     49 // to mark every entry that is created or modified. When the entry is closed,
     50 // and all the data can be trusted, the dirty flag is cleared from the entry.
     51 // When the cache encounters an entry whose identifier is different than the one
     52 // being currently used, it means that the entry was not properly closed on a
     53 // previous run, so it is discarded.
     54 
     55 #ifndef NET_DISK_CACHE_DISK_FORMAT_H_
     56 #define NET_DISK_CACHE_DISK_FORMAT_H_
     57 
     58 #include "base/basictypes.h"
     59 
     60 namespace disk_cache {
     61 
     62 typedef uint32 CacheAddr;
     63 
     64 const int kIndexTablesize = 0x10000;
     65 const uint32 kIndexMagic = 0xC103CAC3;
     66 const uint32 kCurrentVersion = 0x20000;  // Version 2.0.
     67 
     68 struct LruData {
     69   int32     pad1[2];
     70   int32     filled;          // Flag to tell when we filled the cache.
     71   int32     sizes[5];
     72   CacheAddr heads[5];
     73   CacheAddr tails[5];
     74   CacheAddr transaction;     // In-flight operation target.
     75   int32     operation;       // Actual in-flight operation.
     76   int32     operation_list;  // In-flight operation list.
     77   int32     pad2[7];
     78 };
     79 
     80 // Header for the master index file.
     81 struct IndexHeader {
     82   uint32      magic;
     83   uint32      version;
     84   int32       num_entries;   // Number of entries currently stored.
     85   int32       num_bytes;     // Total size of the stored data.
     86   int32       last_file;     // Last external file created.
     87   int32       this_id;       // Id for all entries being changed (dirty flag).
     88   CacheAddr   stats;         // Storage for usage data.
     89   int32       table_len;     // Actual size of the table (0 == kIndexTablesize).
     90   int32       crash;         // Signals a previous crash.
     91   int32       experiment;    // Id of an ongoing test.
     92   uint64      create_time;   // Creation time for this set of files.
     93   int32       pad[52];
     94   LruData     lru;           // Eviction control data.
     95   IndexHeader() {
     96     memset(this, 0, sizeof(*this));
     97     magic = kIndexMagic;
     98     version = kCurrentVersion;
     99   };
    100 };
    101 
    102 // The structure of the whole index file.
    103 struct Index {
    104   IndexHeader header;
    105   CacheAddr   table[kIndexTablesize];  // Default size. Actual size controlled
    106                                        // by header.table_len.
    107 };
    108 
    109 // Main structure for an entry on the backing storage. If the key is longer than
    110 // what can be stored on this structure, it will be extended on consecutive
    111 // blocks (adding 256 bytes each time), up to 4 blocks (1024 - 32 - 1 chars).
    112 // After that point, the whole key will be stored as a data block or external
    113 // file.
    114 struct EntryStore {
    115   uint32      hash;               // Full hash of the key.
    116   CacheAddr   next;               // Next entry with the same hash or bucket.
    117   CacheAddr   rankings_node;      // Rankings node for this entry.
    118   int32       reuse_count;        // How often is this entry used.
    119   int32       refetch_count;      // How often is this fetched from the net.
    120   int32       state;              // Current state.
    121   uint64      creation_time;
    122   int32       key_len;
    123   CacheAddr   long_key;           // Optional address of a long key.
    124   int32       data_size[4];       // We can store up to 4 data streams for each
    125   CacheAddr   data_addr[4];       // entry.
    126   uint32      flags;              // Any combination of EntryFlags.
    127   int32       pad[5];
    128   char        key[256 - 24 * 4];  // null terminated
    129 };
    130 
    131 COMPILE_ASSERT(sizeof(EntryStore) == 256, bad_EntyStore);
    132 const int kMaxInternalKeyLength = 4 * sizeof(EntryStore) -
    133                                   offsetof(EntryStore, key) - 1;
    134 
    135 // Possible states for a given entry.
    136 enum EntryState {
    137   ENTRY_NORMAL = 0,
    138   ENTRY_EVICTED,    // The entry was recently evicted from the cache.
    139   ENTRY_DOOMED      // The entry was doomed.
    140 };
    141 
    142 // Flags that can be applied to an entry.
    143 enum EntryFlags {
    144   PARENT_ENTRY = 1,         // This entry has children (sparse) entries.
    145   CHILD_ENTRY = 1 << 1      // Child entry that stores sparse data.
    146 };
    147 
    148 #pragma pack(push, 4)
    149 // Rankings information for a given entry.
    150 struct RankingsNode {
    151   uint64      last_used;        // LRU info.
    152   uint64      last_modified;    // LRU info.
    153   CacheAddr   next;             // LRU list.
    154   CacheAddr   prev;             // LRU list.
    155   CacheAddr   contents;         // Address of the EntryStore.
    156   int32       dirty;            // The entry is being modifyied.
    157   int32       dummy;            // Old files may have a pointer here.
    158 };
    159 #pragma pack(pop)
    160 
    161 COMPILE_ASSERT(sizeof(RankingsNode) == 36, bad_RankingsNode);
    162 
    163 const uint32 kBlockMagic = 0xC104CAC3;
    164 const int kBlockHeaderSize = 8192;  // Two pages: almost 64k entries
    165 const int kMaxBlocks = (kBlockHeaderSize - 80) * 8;
    166 
    167 // Bitmap to track used blocks on a block-file.
    168 typedef uint32 AllocBitmap[kMaxBlocks / 32];
    169 
    170 // A block-file is the file used to store information in blocks (could be
    171 // EntryStore blocks, RankingsNode blocks or user-data blocks).
    172 // We store entries that can expand for up to 4 consecutive blocks, and keep
    173 // counters of the number of blocks available for each type of entry. For
    174 // instance, an entry of 3 blocks is an entry of type 3. We also keep track of
    175 // where did we find the last entry of that type (to avoid searching the bitmap
    176 // from the beginning every time).
    177 // This Structure is the header of a block-file:
    178 struct BlockFileHeader {
    179   uint32          magic;
    180   uint32          version;
    181   int16           this_file;    // Index of this file.
    182   int16           next_file;    // Next file when this one is full.
    183   int32           entry_size;   // Size of the blocks of this file.
    184   int32           num_entries;  // Number of stored entries.
    185   int32           max_entries;  // Current maximum number of entries.
    186   int32           empty[4];     // Counters of empty entries for each type.
    187   int32           hints[4];     // Last used position for each entry type.
    188   volatile int32  updating;     // Keep track of updates to the header.
    189   int32           user[5];
    190   AllocBitmap     allocation_map;
    191   BlockFileHeader() {
    192     memset(this, 0, sizeof(BlockFileHeader));
    193     magic = kBlockMagic;
    194     version = kCurrentVersion;
    195   };
    196 };
    197 
    198 COMPILE_ASSERT(sizeof(BlockFileHeader) == kBlockHeaderSize, bad_header);
    199 
    200 // Sparse data support:
    201 // We keep a two level hierarchy to enable sparse data for an entry: the first
    202 // level consists of using separate "child" entries to store ranges of 1 MB,
    203 // and the second level stores blocks of 1 KB inside each child entry.
    204 //
    205 // Whenever we need to access a particular sparse offset, we first locate the
    206 // child entry that stores that offset, so we discard the 20 least significant
    207 // bits of the offset, and end up with the child id. For instance, the child id
    208 // to store the first megabyte is 0, and the child that should store offset
    209 // 0x410000 has an id of 4.
    210 //
    211 // The child entry is stored the same way as any other entry, so it also has a
    212 // name (key). The key includes a signature to be able to identify children
    213 // created for different generations of the same resource. In other words, given
    214 // that a given sparse entry can have a large number of child entries, and the
    215 // resource can be invalidated and replaced with a new version at any time, it
    216 // is important to be sure that a given child actually belongs to certain entry.
    217 //
    218 // The full name of a child entry is composed with a prefix ("Range_"), and two
    219 // hexadecimal 64-bit numbers at the end, separated by semicolons. The first
    220 // number is the signature of the parent key, and the second number is the child
    221 // id as described previously. The signature itself is also stored internally by
    222 // the child and the parent entries. For example, a sparse entry with a key of
    223 // "sparse entry name", and a signature of 0x052AF76, may have a child entry
    224 // named "Range_sparse entry name:052af76:4", which stores data in the range
    225 // 0x400000 to 0x4FFFFF.
    226 //
    227 // Each child entry keeps track of all the 1 KB blocks that have been written
    228 // to the entry, but being a regular entry, it will happily return zeros for any
    229 // read that spans data not written before. The actual sparse data is stored in
    230 // one of the data streams of the child entry (at index 1), while the control
    231 // information is stored in another stream (at index 2), both by parents and
    232 // the children.
    233 
    234 // This structure contains the control information for parent and child entries.
    235 // It is stored at offset 0 of the data stream with index 2.
    236 // It is possible to write to a child entry in a way that causes the last block
    237 // to be only partialy filled. In that case, last_block and last_block_len will
    238 // keep track of that block.
    239 struct SparseHeader {
    240   int64 signature;          // The parent and children signature.
    241   uint32 magic;             // Structure identifier (equal to kIndexMagic).
    242   int32 parent_key_len;     // Key length for the parent entry.
    243   int32 last_block;         // Index of the last written block.
    244   int32 last_block_len;     // Lenght of the last written block.
    245   int32 dummy[10];
    246 };
    247 
    248 // The SparseHeader will be followed by a bitmap, as described by this
    249 // structure.
    250 struct SparseData {
    251   SparseHeader header;
    252   uint32 bitmap[32];        // Bitmap representation of known children (if this
    253                             // is a parent entry), or used blocks (for child
    254                             // entries. The size is fixed for child entries but
    255                             // not for parents; it can be as small as 4 bytes
    256                             // and as large as 8 KB.
    257 };
    258 
    259 // The number of blocks stored by a child entry.
    260 const int kNumSparseBits = 1024;
    261 COMPILE_ASSERT(sizeof(SparseData) == sizeof(SparseHeader) + kNumSparseBits / 8,
    262                Invalid_SparseData_bitmap);
    263 
    264 }  // namespace disk_cache
    265 
    266 #endif  // NET_DISK_CACHE_DISK_FORMAT_H_
    267