1 // Copyright (c) 2006-2009 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // The cache is stored on disk as a collection of block-files, plus an index 6 // file plus a collection of external files. 7 // 8 // Any data blob bigger than kMaxBlockSize (net/addr.h) will be stored on a 9 // separate file named f_xxx where x is a hexadecimal number. Shorter data will 10 // be stored as a series of blocks on a block-file. In any case, CacheAddr 11 // represents the address of the data inside the cache. 12 // 13 // The index file is just a simple hash table that maps a particular entry to 14 // a CacheAddr value. Linking for a given hash bucket is handled internally 15 // by the cache entry. 16 // 17 // The last element of the cache is the block-file. A block file is a file 18 // designed to store blocks of data of a given size. It is able to store data 19 // that spans from one to four consecutive "blocks", and it grows as needed to 20 // store up to approximately 65000 blocks. It has a fixed size header used for 21 // book keeping such as tracking free of blocks on the file. For example, a 22 // block-file for 1KB blocks will grow from 8KB when totally empty to about 64MB 23 // when completely full. At that point, data blocks of 1KB will be stored on a 24 // second block file that will store the next set of 65000 blocks. The first 25 // file contains the number of the second file, and the second file contains the 26 // number of a third file, created when the second file reaches its limit. It is 27 // important to remember that no matter how long the chain of files is, any 28 // given block can be located directly by its address, which contains the file 29 // number and starting block inside the file. 30 // 31 // A new cache is initialized with four block files (named data_0 through 32 // data_3), each one dedicated to store blocks of a given size. The number at 33 // the end of the file name is the block file number (in decimal). 34 // 35 // There are two "special" types of blocks: an entry and a rankings node. An 36 // entry keeps track of all the information related to the same cache entry, 37 // such as the key, hash value, data pointers etc. A rankings node keeps track 38 // of the information that is updated frequently for a given entry, such as its 39 // location on the LRU lists, last access time etc. 40 // 41 // The files that store internal information for the cache (blocks and index) 42 // are at least partially memory mapped. They have a location that is signaled 43 // every time the internal structures are modified, so it is possible to detect 44 // (most of the time) when the process dies in the middle of an update. 45 // 46 // In order to prevent dirty data to be used as valid (after a crash), every 47 // cache entry has a dirty identifier. Each running instance of the cache keeps 48 // a separate identifier (maintained on the "this_id" header field) that is used 49 // to mark every entry that is created or modified. When the entry is closed, 50 // and all the data can be trusted, the dirty flag is cleared from the entry. 51 // When the cache encounters an entry whose identifier is different than the one 52 // being currently used, it means that the entry was not properly closed on a 53 // previous run, so it is discarded. 54 55 #ifndef NET_DISK_CACHE_DISK_FORMAT_H_ 56 #define NET_DISK_CACHE_DISK_FORMAT_H_ 57 58 #include "base/basictypes.h" 59 60 namespace disk_cache { 61 62 typedef uint32 CacheAddr; 63 64 const int kIndexTablesize = 0x10000; 65 const uint32 kIndexMagic = 0xC103CAC3; 66 const uint32 kCurrentVersion = 0x20000; // Version 2.0. 67 68 struct LruData { 69 int32 pad1[2]; 70 int32 filled; // Flag to tell when we filled the cache. 71 int32 sizes[5]; 72 CacheAddr heads[5]; 73 CacheAddr tails[5]; 74 CacheAddr transaction; // In-flight operation target. 75 int32 operation; // Actual in-flight operation. 76 int32 operation_list; // In-flight operation list. 77 int32 pad2[7]; 78 }; 79 80 // Header for the master index file. 81 struct IndexHeader { 82 uint32 magic; 83 uint32 version; 84 int32 num_entries; // Number of entries currently stored. 85 int32 num_bytes; // Total size of the stored data. 86 int32 last_file; // Last external file created. 87 int32 this_id; // Id for all entries being changed (dirty flag). 88 CacheAddr stats; // Storage for usage data. 89 int32 table_len; // Actual size of the table (0 == kIndexTablesize). 90 int32 crash; // Signals a previous crash. 91 int32 experiment; // Id of an ongoing test. 92 uint64 create_time; // Creation time for this set of files. 93 int32 pad[52]; 94 LruData lru; // Eviction control data. 95 IndexHeader() { 96 memset(this, 0, sizeof(*this)); 97 magic = kIndexMagic; 98 version = kCurrentVersion; 99 }; 100 }; 101 102 // The structure of the whole index file. 103 struct Index { 104 IndexHeader header; 105 CacheAddr table[kIndexTablesize]; // Default size. Actual size controlled 106 // by header.table_len. 107 }; 108 109 // Main structure for an entry on the backing storage. If the key is longer than 110 // what can be stored on this structure, it will be extended on consecutive 111 // blocks (adding 256 bytes each time), up to 4 blocks (1024 - 32 - 1 chars). 112 // After that point, the whole key will be stored as a data block or external 113 // file. 114 struct EntryStore { 115 uint32 hash; // Full hash of the key. 116 CacheAddr next; // Next entry with the same hash or bucket. 117 CacheAddr rankings_node; // Rankings node for this entry. 118 int32 reuse_count; // How often is this entry used. 119 int32 refetch_count; // How often is this fetched from the net. 120 int32 state; // Current state. 121 uint64 creation_time; 122 int32 key_len; 123 CacheAddr long_key; // Optional address of a long key. 124 int32 data_size[4]; // We can store up to 4 data streams for each 125 CacheAddr data_addr[4]; // entry. 126 uint32 flags; // Any combination of EntryFlags. 127 int32 pad[5]; 128 char key[256 - 24 * 4]; // null terminated 129 }; 130 131 COMPILE_ASSERT(sizeof(EntryStore) == 256, bad_EntyStore); 132 const int kMaxInternalKeyLength = 4 * sizeof(EntryStore) - 133 offsetof(EntryStore, key) - 1; 134 135 // Possible states for a given entry. 136 enum EntryState { 137 ENTRY_NORMAL = 0, 138 ENTRY_EVICTED, // The entry was recently evicted from the cache. 139 ENTRY_DOOMED // The entry was doomed. 140 }; 141 142 // Flags that can be applied to an entry. 143 enum EntryFlags { 144 PARENT_ENTRY = 1, // This entry has children (sparse) entries. 145 CHILD_ENTRY = 1 << 1 // Child entry that stores sparse data. 146 }; 147 148 #pragma pack(push, 4) 149 // Rankings information for a given entry. 150 struct RankingsNode { 151 uint64 last_used; // LRU info. 152 uint64 last_modified; // LRU info. 153 CacheAddr next; // LRU list. 154 CacheAddr prev; // LRU list. 155 CacheAddr contents; // Address of the EntryStore. 156 int32 dirty; // The entry is being modifyied. 157 int32 dummy; // Old files may have a pointer here. 158 }; 159 #pragma pack(pop) 160 161 COMPILE_ASSERT(sizeof(RankingsNode) == 36, bad_RankingsNode); 162 163 const uint32 kBlockMagic = 0xC104CAC3; 164 const int kBlockHeaderSize = 8192; // Two pages: almost 64k entries 165 const int kMaxBlocks = (kBlockHeaderSize - 80) * 8; 166 167 // Bitmap to track used blocks on a block-file. 168 typedef uint32 AllocBitmap[kMaxBlocks / 32]; 169 170 // A block-file is the file used to store information in blocks (could be 171 // EntryStore blocks, RankingsNode blocks or user-data blocks). 172 // We store entries that can expand for up to 4 consecutive blocks, and keep 173 // counters of the number of blocks available for each type of entry. For 174 // instance, an entry of 3 blocks is an entry of type 3. We also keep track of 175 // where did we find the last entry of that type (to avoid searching the bitmap 176 // from the beginning every time). 177 // This Structure is the header of a block-file: 178 struct BlockFileHeader { 179 uint32 magic; 180 uint32 version; 181 int16 this_file; // Index of this file. 182 int16 next_file; // Next file when this one is full. 183 int32 entry_size; // Size of the blocks of this file. 184 int32 num_entries; // Number of stored entries. 185 int32 max_entries; // Current maximum number of entries. 186 int32 empty[4]; // Counters of empty entries for each type. 187 int32 hints[4]; // Last used position for each entry type. 188 volatile int32 updating; // Keep track of updates to the header. 189 int32 user[5]; 190 AllocBitmap allocation_map; 191 BlockFileHeader() { 192 memset(this, 0, sizeof(BlockFileHeader)); 193 magic = kBlockMagic; 194 version = kCurrentVersion; 195 }; 196 }; 197 198 COMPILE_ASSERT(sizeof(BlockFileHeader) == kBlockHeaderSize, bad_header); 199 200 // Sparse data support: 201 // We keep a two level hierarchy to enable sparse data for an entry: the first 202 // level consists of using separate "child" entries to store ranges of 1 MB, 203 // and the second level stores blocks of 1 KB inside each child entry. 204 // 205 // Whenever we need to access a particular sparse offset, we first locate the 206 // child entry that stores that offset, so we discard the 20 least significant 207 // bits of the offset, and end up with the child id. For instance, the child id 208 // to store the first megabyte is 0, and the child that should store offset 209 // 0x410000 has an id of 4. 210 // 211 // The child entry is stored the same way as any other entry, so it also has a 212 // name (key). The key includes a signature to be able to identify children 213 // created for different generations of the same resource. In other words, given 214 // that a given sparse entry can have a large number of child entries, and the 215 // resource can be invalidated and replaced with a new version at any time, it 216 // is important to be sure that a given child actually belongs to certain entry. 217 // 218 // The full name of a child entry is composed with a prefix ("Range_"), and two 219 // hexadecimal 64-bit numbers at the end, separated by semicolons. The first 220 // number is the signature of the parent key, and the second number is the child 221 // id as described previously. The signature itself is also stored internally by 222 // the child and the parent entries. For example, a sparse entry with a key of 223 // "sparse entry name", and a signature of 0x052AF76, may have a child entry 224 // named "Range_sparse entry name:052af76:4", which stores data in the range 225 // 0x400000 to 0x4FFFFF. 226 // 227 // Each child entry keeps track of all the 1 KB blocks that have been written 228 // to the entry, but being a regular entry, it will happily return zeros for any 229 // read that spans data not written before. The actual sparse data is stored in 230 // one of the data streams of the child entry (at index 1), while the control 231 // information is stored in another stream (at index 2), both by parents and 232 // the children. 233 234 // This structure contains the control information for parent and child entries. 235 // It is stored at offset 0 of the data stream with index 2. 236 // It is possible to write to a child entry in a way that causes the last block 237 // to be only partialy filled. In that case, last_block and last_block_len will 238 // keep track of that block. 239 struct SparseHeader { 240 int64 signature; // The parent and children signature. 241 uint32 magic; // Structure identifier (equal to kIndexMagic). 242 int32 parent_key_len; // Key length for the parent entry. 243 int32 last_block; // Index of the last written block. 244 int32 last_block_len; // Lenght of the last written block. 245 int32 dummy[10]; 246 }; 247 248 // The SparseHeader will be followed by a bitmap, as described by this 249 // structure. 250 struct SparseData { 251 SparseHeader header; 252 uint32 bitmap[32]; // Bitmap representation of known children (if this 253 // is a parent entry), or used blocks (for child 254 // entries. The size is fixed for child entries but 255 // not for parents; it can be as small as 4 bytes 256 // and as large as 8 KB. 257 }; 258 259 // The number of blocks stored by a child entry. 260 const int kNumSparseBits = 1024; 261 COMPILE_ASSERT(sizeof(SparseData) == sizeof(SparseHeader) + kNumSparseBits / 8, 262 Invalid_SparseData_bitmap); 263 264 } // namespace disk_cache 265 266 #endif // NET_DISK_CACHE_DISK_FORMAT_H_ 267