1 // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 // 5 // BlockBuilder generates blocks where keys are prefix-compressed: 6 // 7 // When we store a key, we drop the prefix shared with the previous 8 // string. This helps reduce the space requirement significantly. 9 // Furthermore, once every K keys, we do not apply the prefix 10 // compression and store the entire key. We call this a "restart 11 // point". The tail end of the block stores the offsets of all of the 12 // restart points, and can be used to do a binary search when looking 13 // for a particular key. Values are stored as-is (without compression) 14 // immediately following the corresponding key. 15 // 16 // An entry for a particular key-value pair has the form: 17 // shared_bytes: varint32 18 // unshared_bytes: varint32 19 // value_length: varint32 20 // key_delta: char[unshared_bytes] 21 // value: char[value_length] 22 // shared_bytes == 0 for restart points. 23 // 24 // The trailer of the block has the form: 25 // restarts: uint32[num_restarts] 26 // num_restarts: uint32 27 // restarts[i] contains the offset within the block of the ith restart point. 28 29 #include "table/block_builder.h" 30 31 #include <algorithm> 32 #include <assert.h> 33 #include "leveldb/comparator.h" 34 #include "leveldb/table_builder.h" 35 #include "util/coding.h" 36 37 namespace leveldb { 38 39 BlockBuilder::BlockBuilder(const Options* options) 40 : options_(options), 41 restarts_(), 42 counter_(0), 43 finished_(false) { 44 assert(options->block_restart_interval >= 1); 45 restarts_.push_back(0); // First restart point is at offset 0 46 } 47 48 void BlockBuilder::Reset() { 49 buffer_.clear(); 50 restarts_.clear(); 51 restarts_.push_back(0); // First restart point is at offset 0 52 counter_ = 0; 53 finished_ = false; 54 last_key_.clear(); 55 } 56 57 size_t BlockBuilder::CurrentSizeEstimate() const { 58 return (buffer_.size() + // Raw data buffer 59 restarts_.size() * sizeof(uint32_t) + // Restart array 60 sizeof(uint32_t)); // Restart array length 61 } 62 63 Slice BlockBuilder::Finish() { 64 // Append restart array 65 for (size_t i = 0; i < restarts_.size(); i++) { 66 PutFixed32(&buffer_, restarts_[i]); 67 } 68 PutFixed32(&buffer_, restarts_.size()); 69 finished_ = true; 70 return Slice(buffer_); 71 } 72 73 void BlockBuilder::Add(const Slice& key, const Slice& value) { 74 Slice last_key_piece(last_key_); 75 assert(!finished_); 76 assert(counter_ <= options_->block_restart_interval); 77 assert(buffer_.empty() // No values yet? 78 || options_->comparator->Compare(key, last_key_piece) > 0); 79 size_t shared = 0; 80 if (counter_ < options_->block_restart_interval) { 81 // See how much sharing to do with previous string 82 const size_t min_length = std::min(last_key_piece.size(), key.size()); 83 while ((shared < min_length) && (last_key_piece[shared] == key[shared])) { 84 shared++; 85 } 86 } else { 87 // Restart compression 88 restarts_.push_back(buffer_.size()); 89 counter_ = 0; 90 } 91 const size_t non_shared = key.size() - shared; 92 93 // Add "<shared><non_shared><value_size>" to buffer_ 94 PutVarint32(&buffer_, shared); 95 PutVarint32(&buffer_, non_shared); 96 PutVarint32(&buffer_, value.size()); 97 98 // Add string delta to buffer_ followed by value 99 buffer_.append(key.data() + shared, non_shared); 100 buffer_.append(value.data(), value.size()); 101 102 // Update state 103 last_key_.resize(shared); 104 last_key_.append(key.data() + shared, non_shared); 105 assert(Slice(last_key_) == key); 106 counter_++; 107 } 108 109 } // namespace leveldb 110