1 // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 // 5 // The representation of a DBImpl consists of a set of Versions. The 6 // newest version is called "current". Older versions may be kept 7 // around to provide a consistent view to live iterators. 8 // 9 // Each Version keeps track of a set of Table files per level. The 10 // entire set of versions is maintained in a VersionSet. 11 // 12 // Version,VersionSet are thread-compatible, but require external 13 // synchronization on all accesses. 14 15 #ifndef STORAGE_LEVELDB_DB_VERSION_SET_H_ 16 #define STORAGE_LEVELDB_DB_VERSION_SET_H_ 17 18 #include <map> 19 #include <set> 20 #include <vector> 21 #include "db/dbformat.h" 22 #include "db/version_edit.h" 23 #include "port/port.h" 24 #include "port/thread_annotations.h" 25 26 namespace leveldb { 27 28 namespace log { class Writer; } 29 30 class Compaction; 31 class Iterator; 32 class MemTable; 33 class TableBuilder; 34 class TableCache; 35 class Version; 36 class VersionSet; 37 class WritableFile; 38 39 // Return the smallest index i such that files[i]->largest >= key. 40 // Return files.size() if there is no such file. 41 // REQUIRES: "files" contains a sorted list of non-overlapping files. 42 extern int FindFile(const InternalKeyComparator& icmp, 43 const std::vector<FileMetaData*>& files, 44 const Slice& key); 45 46 // Returns true iff some file in "files" overlaps the user key range 47 // [*smallest,*largest]. 48 // smallest==NULL represents a key smaller than all keys in the DB. 49 // largest==NULL represents a key largest than all keys in the DB. 50 // REQUIRES: If disjoint_sorted_files, files[] contains disjoint ranges 51 // in sorted order. 52 extern bool SomeFileOverlapsRange( 53 const InternalKeyComparator& icmp, 54 bool disjoint_sorted_files, 55 const std::vector<FileMetaData*>& files, 56 const Slice* smallest_user_key, 57 const Slice* largest_user_key); 58 59 class Version { 60 public: 61 // Append to *iters a sequence of iterators that will 62 // yield the contents of this Version when merged together. 63 // REQUIRES: This version has been saved (see VersionSet::SaveTo) 64 void AddIterators(const ReadOptions&, std::vector<Iterator*>* iters); 65 66 // Lookup the value for key. If found, store it in *val and 67 // return OK. Else return a non-OK status. Fills *stats. 68 // REQUIRES: lock is not held 69 struct GetStats { 70 FileMetaData* seek_file; 71 int seek_file_level; 72 }; 73 Status Get(const ReadOptions&, const LookupKey& key, std::string* val, 74 GetStats* stats); 75 76 // Adds "stats" into the current state. Returns true if a new 77 // compaction may need to be triggered, false otherwise. 78 // REQUIRES: lock is held 79 bool UpdateStats(const GetStats& stats); 80 81 // Reference count management (so Versions do not disappear out from 82 // under live iterators) 83 void Ref(); 84 void Unref(); 85 86 void GetOverlappingInputs( 87 int level, 88 const InternalKey* begin, // NULL means before all keys 89 const InternalKey* end, // NULL means after all keys 90 std::vector<FileMetaData*>* inputs); 91 92 // Returns true iff some file in the specified level overlaps 93 // some part of [*smallest_user_key,*largest_user_key]. 94 // smallest_user_key==NULL represents a key smaller than all keys in the DB. 95 // largest_user_key==NULL represents a key largest than all keys in the DB. 96 bool OverlapInLevel(int level, 97 const Slice* smallest_user_key, 98 const Slice* largest_user_key); 99 100 // Return the level at which we should place a new memtable compaction 101 // result that covers the range [smallest_user_key,largest_user_key]. 102 int PickLevelForMemTableOutput(const Slice& smallest_user_key, 103 const Slice& largest_user_key); 104 105 int NumFiles(int level) const { return files_[level].size(); } 106 107 // Return a human readable string that describes this version's contents. 108 std::string DebugString() const; 109 110 private: 111 friend class Compaction; 112 friend class VersionSet; 113 114 class LevelFileNumIterator; 115 Iterator* NewConcatenatingIterator(const ReadOptions&, int level) const; 116 117 VersionSet* vset_; // VersionSet to which this Version belongs 118 Version* next_; // Next version in linked list 119 Version* prev_; // Previous version in linked list 120 int refs_; // Number of live refs to this version 121 122 // List of files per level 123 std::vector<FileMetaData*> files_[config::kNumLevels]; 124 125 // Next file to compact based on seek stats. 126 FileMetaData* file_to_compact_; 127 int file_to_compact_level_; 128 129 // Level that should be compacted next and its compaction score. 130 // Score < 1 means compaction is not strictly needed. These fields 131 // are initialized by Finalize(). 132 double compaction_score_; 133 int compaction_level_; 134 135 explicit Version(VersionSet* vset) 136 : vset_(vset), next_(this), prev_(this), refs_(0), 137 file_to_compact_(NULL), 138 file_to_compact_level_(-1), 139 compaction_score_(-1), 140 compaction_level_(-1) { 141 } 142 143 ~Version(); 144 145 // No copying allowed 146 Version(const Version&); 147 void operator=(const Version&); 148 }; 149 150 class VersionSet { 151 public: 152 VersionSet(const std::string& dbname, 153 const Options* options, 154 TableCache* table_cache, 155 const InternalKeyComparator*); 156 ~VersionSet(); 157 158 // Apply *edit to the current version to form a new descriptor that 159 // is both saved to persistent state and installed as the new 160 // current version. Will release *mu while actually writing to the file. 161 // REQUIRES: *mu is held on entry. 162 // REQUIRES: no other thread concurrently calls LogAndApply() 163 Status LogAndApply(VersionEdit* edit, port::Mutex* mu) 164 EXCLUSIVE_LOCKS_REQUIRED(mu); 165 166 // Recover the last saved descriptor from persistent storage. 167 Status Recover(); 168 169 // Return the current version. 170 Version* current() const { return current_; } 171 172 // Return the current manifest file number 173 uint64_t ManifestFileNumber() const { return manifest_file_number_; } 174 175 // Allocate and return a new file number 176 uint64_t NewFileNumber() { return next_file_number_++; } 177 178 // Arrange to reuse "file_number" unless a newer file number has 179 // already been allocated. 180 // REQUIRES: "file_number" was returned by a call to NewFileNumber(). 181 void ReuseFileNumber(uint64_t file_number) { 182 if (next_file_number_ == file_number + 1) { 183 next_file_number_ = file_number; 184 } 185 } 186 187 // Return the number of Table files at the specified level. 188 int NumLevelFiles(int level) const; 189 190 // Return the combined file size of all files at the specified level. 191 int64_t NumLevelBytes(int level) const; 192 193 // Return the last sequence number. 194 uint64_t LastSequence() const { return last_sequence_; } 195 196 // Set the last sequence number to s. 197 void SetLastSequence(uint64_t s) { 198 assert(s >= last_sequence_); 199 last_sequence_ = s; 200 } 201 202 // Mark the specified file number as used. 203 void MarkFileNumberUsed(uint64_t number); 204 205 // Return the current log file number. 206 uint64_t LogNumber() const { return log_number_; } 207 208 // Return the log file number for the log file that is currently 209 // being compacted, or zero if there is no such log file. 210 uint64_t PrevLogNumber() const { return prev_log_number_; } 211 212 // Pick level and inputs for a new compaction. 213 // Returns NULL if there is no compaction to be done. 214 // Otherwise returns a pointer to a heap-allocated object that 215 // describes the compaction. Caller should delete the result. 216 Compaction* PickCompaction(); 217 218 // Return a compaction object for compacting the range [begin,end] in 219 // the specified level. Returns NULL if there is nothing in that 220 // level that overlaps the specified range. Caller should delete 221 // the result. 222 Compaction* CompactRange( 223 int level, 224 const InternalKey* begin, 225 const InternalKey* end); 226 227 // Return the maximum overlapping data (in bytes) at next level for any 228 // file at a level >= 1. 229 int64_t MaxNextLevelOverlappingBytes(); 230 231 // Create an iterator that reads over the compaction inputs for "*c". 232 // The caller should delete the iterator when no longer needed. 233 Iterator* MakeInputIterator(Compaction* c); 234 235 // Returns true iff some level needs a compaction. 236 bool NeedsCompaction() const { 237 Version* v = current_; 238 return (v->compaction_score_ >= 1) || (v->file_to_compact_ != NULL); 239 } 240 241 // Add all files listed in any live version to *live. 242 // May also mutate some internal state. 243 void AddLiveFiles(std::set<uint64_t>* live); 244 245 // Return the approximate offset in the database of the data for 246 // "key" as of version "v". 247 uint64_t ApproximateOffsetOf(Version* v, const InternalKey& key); 248 249 // Return a human-readable short (single-line) summary of the number 250 // of files per level. Uses *scratch as backing store. 251 struct LevelSummaryStorage { 252 char buffer[100]; 253 }; 254 const char* LevelSummary(LevelSummaryStorage* scratch) const; 255 256 private: 257 class Builder; 258 259 friend class Compaction; 260 friend class Version; 261 262 void Finalize(Version* v); 263 264 void GetRange(const std::vector<FileMetaData*>& inputs, 265 InternalKey* smallest, 266 InternalKey* largest); 267 268 void GetRange2(const std::vector<FileMetaData*>& inputs1, 269 const std::vector<FileMetaData*>& inputs2, 270 InternalKey* smallest, 271 InternalKey* largest); 272 273 void SetupOtherInputs(Compaction* c); 274 275 // Save current contents to *log 276 Status WriteSnapshot(log::Writer* log); 277 278 void AppendVersion(Version* v); 279 280 bool ManifestContains(const std::string& record) const; 281 282 Env* const env_; 283 const std::string dbname_; 284 const Options* const options_; 285 TableCache* const table_cache_; 286 const InternalKeyComparator icmp_; 287 uint64_t next_file_number_; 288 uint64_t manifest_file_number_; 289 uint64_t last_sequence_; 290 uint64_t log_number_; 291 uint64_t prev_log_number_; // 0 or backing store for memtable being compacted 292 293 // Opened lazily 294 WritableFile* descriptor_file_; 295 log::Writer* descriptor_log_; 296 Version dummy_versions_; // Head of circular doubly-linked list of versions. 297 Version* current_; // == dummy_versions_.prev_ 298 299 // Per-level key at which the next compaction at that level should start. 300 // Either an empty string, or a valid InternalKey. 301 std::string compact_pointer_[config::kNumLevels]; 302 303 // No copying allowed 304 VersionSet(const VersionSet&); 305 void operator=(const VersionSet&); 306 }; 307 308 // A Compaction encapsulates information about a compaction. 309 class Compaction { 310 public: 311 ~Compaction(); 312 313 // Return the level that is being compacted. Inputs from "level" 314 // and "level+1" will be merged to produce a set of "level+1" files. 315 int level() const { return level_; } 316 317 // Return the object that holds the edits to the descriptor done 318 // by this compaction. 319 VersionEdit* edit() { return &edit_; } 320 321 // "which" must be either 0 or 1 322 int num_input_files(int which) const { return inputs_[which].size(); } 323 324 // Return the ith input file at "level()+which" ("which" must be 0 or 1). 325 FileMetaData* input(int which, int i) const { return inputs_[which][i]; } 326 327 // Maximum size of files to build during this compaction. 328 uint64_t MaxOutputFileSize() const { return max_output_file_size_; } 329 330 // Is this a trivial compaction that can be implemented by just 331 // moving a single input file to the next level (no merging or splitting) 332 bool IsTrivialMove() const; 333 334 // Add all inputs to this compaction as delete operations to *edit. 335 void AddInputDeletions(VersionEdit* edit); 336 337 // Returns true if the information we have available guarantees that 338 // the compaction is producing data in "level+1" for which no data exists 339 // in levels greater than "level+1". 340 bool IsBaseLevelForKey(const Slice& user_key); 341 342 // Returns true iff we should stop building the current output 343 // before processing "internal_key". 344 bool ShouldStopBefore(const Slice& internal_key); 345 346 // Release the input version for the compaction, once the compaction 347 // is successful. 348 void ReleaseInputs(); 349 350 private: 351 friend class Version; 352 friend class VersionSet; 353 354 explicit Compaction(int level); 355 356 int level_; 357 uint64_t max_output_file_size_; 358 Version* input_version_; 359 VersionEdit edit_; 360 361 // Each compaction reads inputs from "level_" and "level_+1" 362 std::vector<FileMetaData*> inputs_[2]; // The two sets of inputs 363 364 // State used to check for number of of overlapping grandparent files 365 // (parent == level_ + 1, grandparent == level_ + 2) 366 std::vector<FileMetaData*> grandparents_; 367 size_t grandparent_index_; // Index in grandparent_starts_ 368 bool seen_key_; // Some output key has been seen 369 int64_t overlapped_bytes_; // Bytes of overlap between current output 370 // and grandparent files 371 372 // State for implementing IsBaseLevelForKey 373 374 // level_ptrs_ holds indices into input_version_->levels_: our state 375 // is that we are positioned at one of the file ranges for each 376 // higher level than the ones involved in this compaction (i.e. for 377 // all L >= level_ + 2). 378 size_t level_ptrs_[config::kNumLevels]; 379 }; 380 381 } // namespace leveldb 382 383 #endif // STORAGE_LEVELDB_DB_VERSION_SET_H_ 384