1 // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 5 #ifndef STORAGE_LEVELDB_DB_DB_IMPL_H_ 6 #define STORAGE_LEVELDB_DB_DB_IMPL_H_ 7 8 #include <deque> 9 #include <set> 10 #include "db/dbformat.h" 11 #include "db/log_writer.h" 12 #include "db/snapshot.h" 13 #include "leveldb/db.h" 14 #include "leveldb/env.h" 15 #include "port/port.h" 16 #include "port/thread_annotations.h" 17 18 namespace leveldb { 19 20 class MemTable; 21 class TableCache; 22 class Version; 23 class VersionEdit; 24 class VersionSet; 25 26 class DBImpl : public DB { 27 public: 28 DBImpl(const Options& options, const std::string& dbname); 29 virtual ~DBImpl(); 30 31 // Implementations of the DB interface 32 virtual Status Put(const WriteOptions&, const Slice& key, const Slice& value); 33 virtual Status Delete(const WriteOptions&, const Slice& key); 34 virtual Status Write(const WriteOptions& options, WriteBatch* updates); 35 virtual Status Get(const ReadOptions& options, 36 const Slice& key, 37 std::string* value); 38 virtual Iterator* NewIterator(const ReadOptions&); 39 virtual const Snapshot* GetSnapshot(); 40 virtual void ReleaseSnapshot(const Snapshot* snapshot); 41 virtual bool GetProperty(const Slice& property, std::string* value); 42 virtual void GetApproximateSizes(const Range* range, int n, uint64_t* sizes); 43 virtual void CompactRange(const Slice* begin, const Slice* end); 44 45 // Extra methods (for testing) that are not in the public DB interface 46 47 // Compact any files in the named level that overlap [*begin,*end] 48 void TEST_CompactRange(int level, const Slice* begin, const Slice* end); 49 50 // Force current memtable contents to be compacted. 51 Status TEST_CompactMemTable(); 52 53 // Return an internal iterator over the current state of the database. 54 // The keys of this iterator are internal keys (see format.h). 55 // The returned iterator should be deleted when no longer needed. 56 Iterator* TEST_NewInternalIterator(); 57 58 // Return the maximum overlapping data (in bytes) at next level for any 59 // file at a level >= 1. 60 int64_t TEST_MaxNextLevelOverlappingBytes(); 61 62 // Record a sample of bytes read at the specified internal key. 63 // Samples are taken approximately once every config::kReadBytesPeriod 64 // bytes. 65 void RecordReadSample(Slice key); 66 67 private: 68 friend class DB; 69 struct CompactionState; 70 struct Writer; 71 72 Iterator* NewInternalIterator(const ReadOptions&, 73 SequenceNumber* latest_snapshot, 74 uint32_t* seed); 75 76 Status NewDB(); 77 78 // Recover the descriptor from persistent storage. May do a significant 79 // amount of work to recover recently logged updates. Any changes to 80 // be made to the descriptor are added to *edit. 81 Status Recover(VersionEdit* edit) EXCLUSIVE_LOCKS_REQUIRED(mutex_); 82 83 void MaybeIgnoreError(Status* s) const; 84 85 // Delete any unneeded files and stale in-memory entries. 86 void DeleteObsoleteFiles(); 87 88 // Compact the in-memory write buffer to disk. Switches to a new 89 // log-file/memtable and writes a new descriptor iff successful. 90 // Errors are recorded in bg_error_. 91 void CompactMemTable() EXCLUSIVE_LOCKS_REQUIRED(mutex_); 92 93 Status RecoverLogFile(uint64_t log_number, 94 VersionEdit* edit, 95 SequenceNumber* max_sequence) 96 EXCLUSIVE_LOCKS_REQUIRED(mutex_); 97 98 Status WriteLevel0Table(MemTable* mem, VersionEdit* edit, Version* base) 99 EXCLUSIVE_LOCKS_REQUIRED(mutex_); 100 101 Status MakeRoomForWrite(bool force /* compact even if there is room? */) 102 EXCLUSIVE_LOCKS_REQUIRED(mutex_); 103 WriteBatch* BuildBatchGroup(Writer** last_writer); 104 105 void RecordBackgroundError(const Status& s); 106 107 void MaybeScheduleCompaction() EXCLUSIVE_LOCKS_REQUIRED(mutex_); 108 static void BGWork(void* db); 109 void BackgroundCall(); 110 void BackgroundCompaction() EXCLUSIVE_LOCKS_REQUIRED(mutex_); 111 void CleanupCompaction(CompactionState* compact) 112 EXCLUSIVE_LOCKS_REQUIRED(mutex_); 113 Status DoCompactionWork(CompactionState* compact) 114 EXCLUSIVE_LOCKS_REQUIRED(mutex_); 115 116 Status OpenCompactionOutputFile(CompactionState* compact); 117 Status FinishCompactionOutputFile(CompactionState* compact, Iterator* input); 118 Status InstallCompactionResults(CompactionState* compact) 119 EXCLUSIVE_LOCKS_REQUIRED(mutex_); 120 121 // Constant after construction 122 Env* const env_; 123 const InternalKeyComparator internal_comparator_; 124 const InternalFilterPolicy internal_filter_policy_; 125 const Options options_; // options_.comparator == &internal_comparator_ 126 bool owns_info_log_; 127 bool owns_cache_; 128 const std::string dbname_; 129 130 // table_cache_ provides its own synchronization 131 TableCache* table_cache_; 132 133 // Lock over the persistent DB state. Non-NULL iff successfully acquired. 134 FileLock* db_lock_; 135 136 // State below is protected by mutex_ 137 port::Mutex mutex_; 138 port::AtomicPointer shutting_down_; 139 port::CondVar bg_cv_; // Signalled when background work finishes 140 MemTable* mem_; 141 MemTable* imm_; // Memtable being compacted 142 port::AtomicPointer has_imm_; // So bg thread can detect non-NULL imm_ 143 WritableFile* logfile_; 144 uint64_t logfile_number_; 145 log::Writer* log_; 146 uint32_t seed_; // For sampling. 147 148 // Queue of writers. 149 std::deque<Writer*> writers_; 150 WriteBatch* tmp_batch_; 151 152 SnapshotList snapshots_; 153 154 // Set of table files to protect from deletion because they are 155 // part of ongoing compactions. 156 std::set<uint64_t> pending_outputs_; 157 158 // Has a background compaction been scheduled or is running? 159 bool bg_compaction_scheduled_; 160 161 // Information for a manual compaction 162 struct ManualCompaction { 163 int level; 164 bool done; 165 const InternalKey* begin; // NULL means beginning of key range 166 const InternalKey* end; // NULL means end of key range 167 InternalKey tmp_storage; // Used to keep track of compaction progress 168 }; 169 ManualCompaction* manual_compaction_; 170 171 VersionSet* versions_; 172 173 // Have we encountered a background error in paranoid mode? 174 Status bg_error_; 175 176 // Per level compaction stats. stats_[level] stores the stats for 177 // compactions that produced data for the specified "level". 178 struct CompactionStats { 179 int64_t micros; 180 int64_t bytes_read; 181 int64_t bytes_written; 182 183 CompactionStats() : micros(0), bytes_read(0), bytes_written(0) { } 184 185 void Add(const CompactionStats& c) { 186 this->micros += c.micros; 187 this->bytes_read += c.bytes_read; 188 this->bytes_written += c.bytes_written; 189 } 190 }; 191 CompactionStats stats_[config::kNumLevels]; 192 193 // No copying allowed 194 DBImpl(const DBImpl&); 195 void operator=(const DBImpl&); 196 197 const Comparator* user_comparator() const { 198 return internal_comparator_.user_comparator(); 199 } 200 }; 201 202 // Sanitize db options. The caller should delete result.info_log if 203 // it is not equal to src.info_log. 204 extern Options SanitizeOptions(const std::string& db, 205 const InternalKeyComparator* icmp, 206 const InternalFilterPolicy* ipolicy, 207 const Options& src); 208 209 } // namespace leveldb 210 211 #endif // STORAGE_LEVELDB_DB_DB_IMPL_H_ 212