Home | History | Annotate | Download | only in table
      1 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file. See the AUTHORS file for names of contributors.
      4 //
      5 // Decodes the blocks generated by block_builder.cc.
      6 
      7 #include "table/block.h"
      8 
      9 #include <vector>
     10 #include <algorithm>
     11 #include "leveldb/comparator.h"
     12 #include "table/format.h"
     13 #include "util/coding.h"
     14 #include "util/logging.h"
     15 
     16 namespace leveldb {
     17 
     18 inline uint32_t Block::NumRestarts() const {
     19   assert(size_ >= sizeof(uint32_t));
     20   return DecodeFixed32(data_ + size_ - sizeof(uint32_t));
     21 }
     22 
     23 Block::Block(const BlockContents& contents)
     24     : data_(contents.data.data()),
     25       size_(contents.data.size()),
     26       owned_(contents.heap_allocated) {
     27   if (size_ < sizeof(uint32_t)) {
     28     size_ = 0;  // Error marker
     29   } else {
     30     size_t max_restarts_allowed = (size_-sizeof(uint32_t)) / sizeof(uint32_t);
     31     if (NumRestarts() > max_restarts_allowed) {
     32       // The size is too small for NumRestarts()
     33       size_ = 0;
     34     } else {
     35       restart_offset_ = size_ - (1 + NumRestarts()) * sizeof(uint32_t);
     36     }
     37   }
     38 }
     39 
     40 Block::~Block() {
     41   if (owned_) {
     42     delete[] data_;
     43   }
     44 }
     45 
     46 // Helper routine: decode the next block entry starting at "p",
     47 // storing the number of shared key bytes, non_shared key bytes,
     48 // and the length of the value in "*shared", "*non_shared", and
     49 // "*value_length", respectively.  Will not derefence past "limit".
     50 //
     51 // If any errors are detected, returns NULL.  Otherwise, returns a
     52 // pointer to the key delta (just past the three decoded values).
     53 static inline const char* DecodeEntry(const char* p, const char* limit,
     54                                       uint32_t* shared,
     55                                       uint32_t* non_shared,
     56                                       uint32_t* value_length) {
     57   if (limit - p < 3) return NULL;
     58   *shared = reinterpret_cast<const unsigned char*>(p)[0];
     59   *non_shared = reinterpret_cast<const unsigned char*>(p)[1];
     60   *value_length = reinterpret_cast<const unsigned char*>(p)[2];
     61   if ((*shared | *non_shared | *value_length) < 128) {
     62     // Fast path: all three values are encoded in one byte each
     63     p += 3;
     64   } else {
     65     if ((p = GetVarint32Ptr(p, limit, shared)) == NULL) return NULL;
     66     if ((p = GetVarint32Ptr(p, limit, non_shared)) == NULL) return NULL;
     67     if ((p = GetVarint32Ptr(p, limit, value_length)) == NULL) return NULL;
     68   }
     69 
     70   if (static_cast<uint32_t>(limit - p) < (*non_shared + *value_length)) {
     71     return NULL;
     72   }
     73   return p;
     74 }
     75 
     76 class Block::Iter : public Iterator {
     77  private:
     78   const Comparator* const comparator_;
     79   const char* const data_;      // underlying block contents
     80   uint32_t const restarts_;     // Offset of restart array (list of fixed32)
     81   uint32_t const num_restarts_; // Number of uint32_t entries in restart array
     82 
     83   // current_ is offset in data_ of current entry.  >= restarts_ if !Valid
     84   uint32_t current_;
     85   uint32_t restart_index_;  // Index of restart block in which current_ falls
     86   std::string key_;
     87   Slice value_;
     88   Status status_;
     89 
     90   inline int Compare(const Slice& a, const Slice& b) const {
     91     return comparator_->Compare(a, b);
     92   }
     93 
     94   // Return the offset in data_ just past the end of the current entry.
     95   inline uint32_t NextEntryOffset() const {
     96     return (value_.data() + value_.size()) - data_;
     97   }
     98 
     99   uint32_t GetRestartPoint(uint32_t index) {
    100     assert(index < num_restarts_);
    101     return DecodeFixed32(data_ + restarts_ + index * sizeof(uint32_t));
    102   }
    103 
    104   void SeekToRestartPoint(uint32_t index) {
    105     key_.clear();
    106     restart_index_ = index;
    107     // current_ will be fixed by ParseNextKey();
    108 
    109     // ParseNextKey() starts at the end of value_, so set value_ accordingly
    110     uint32_t offset = GetRestartPoint(index);
    111     value_ = Slice(data_ + offset, 0);
    112   }
    113 
    114  public:
    115   Iter(const Comparator* comparator,
    116        const char* data,
    117        uint32_t restarts,
    118        uint32_t num_restarts)
    119       : comparator_(comparator),
    120         data_(data),
    121         restarts_(restarts),
    122         num_restarts_(num_restarts),
    123         current_(restarts_),
    124         restart_index_(num_restarts_) {
    125     assert(num_restarts_ > 0);
    126   }
    127 
    128   virtual bool Valid() const { return current_ < restarts_; }
    129   virtual Status status() const { return status_; }
    130   virtual Slice key() const {
    131     assert(Valid());
    132     return key_;
    133   }
    134   virtual Slice value() const {
    135     assert(Valid());
    136     return value_;
    137   }
    138 
    139   virtual void Next() {
    140     assert(Valid());
    141     ParseNextKey();
    142   }
    143 
    144   virtual void Prev() {
    145     assert(Valid());
    146 
    147     // Scan backwards to a restart point before current_
    148     const uint32_t original = current_;
    149     while (GetRestartPoint(restart_index_) >= original) {
    150       if (restart_index_ == 0) {
    151         // No more entries
    152         current_ = restarts_;
    153         restart_index_ = num_restarts_;
    154         return;
    155       }
    156       restart_index_--;
    157     }
    158 
    159     SeekToRestartPoint(restart_index_);
    160     do {
    161       // Loop until end of current entry hits the start of original entry
    162     } while (ParseNextKey() && NextEntryOffset() < original);
    163   }
    164 
    165   virtual void Seek(const Slice& target) {
    166     // Binary search in restart array to find the last restart point
    167     // with a key < target
    168     uint32_t left = 0;
    169     uint32_t right = num_restarts_ - 1;
    170     while (left < right) {
    171       uint32_t mid = (left + right + 1) / 2;
    172       uint32_t region_offset = GetRestartPoint(mid);
    173       uint32_t shared, non_shared, value_length;
    174       const char* key_ptr = DecodeEntry(data_ + region_offset,
    175                                         data_ + restarts_,
    176                                         &shared, &non_shared, &value_length);
    177       if (key_ptr == NULL || (shared != 0)) {
    178         CorruptionError();
    179         return;
    180       }
    181       Slice mid_key(key_ptr, non_shared);
    182       if (Compare(mid_key, target) < 0) {
    183         // Key at "mid" is smaller than "target".  Therefore all
    184         // blocks before "mid" are uninteresting.
    185         left = mid;
    186       } else {
    187         // Key at "mid" is >= "target".  Therefore all blocks at or
    188         // after "mid" are uninteresting.
    189         right = mid - 1;
    190       }
    191     }
    192 
    193     // Linear search (within restart block) for first key >= target
    194     SeekToRestartPoint(left);
    195     while (true) {
    196       if (!ParseNextKey()) {
    197         return;
    198       }
    199       if (Compare(key_, target) >= 0) {
    200         return;
    201       }
    202     }
    203   }
    204 
    205   virtual void SeekToFirst() {
    206     SeekToRestartPoint(0);
    207     ParseNextKey();
    208   }
    209 
    210   virtual void SeekToLast() {
    211     SeekToRestartPoint(num_restarts_ - 1);
    212     while (ParseNextKey() && NextEntryOffset() < restarts_) {
    213       // Keep skipping
    214     }
    215   }
    216 
    217  private:
    218   void CorruptionError() {
    219     current_ = restarts_;
    220     restart_index_ = num_restarts_;
    221     status_ = Status::Corruption("bad entry in block");
    222     key_.clear();
    223     value_.clear();
    224   }
    225 
    226   bool ParseNextKey() {
    227     current_ = NextEntryOffset();
    228     const char* p = data_ + current_;
    229     const char* limit = data_ + restarts_;  // Restarts come right after data
    230     if (p >= limit) {
    231       // No more entries to return.  Mark as invalid.
    232       current_ = restarts_;
    233       restart_index_ = num_restarts_;
    234       return false;
    235     }
    236 
    237     // Decode next entry
    238     uint32_t shared, non_shared, value_length;
    239     p = DecodeEntry(p, limit, &shared, &non_shared, &value_length);
    240     if (p == NULL || key_.size() < shared) {
    241       CorruptionError();
    242       return false;
    243     } else {
    244       key_.resize(shared);
    245       key_.append(p, non_shared);
    246       value_ = Slice(p + non_shared, value_length);
    247       while (restart_index_ + 1 < num_restarts_ &&
    248              GetRestartPoint(restart_index_ + 1) < current_) {
    249         ++restart_index_;
    250       }
    251       return true;
    252     }
    253   }
    254 };
    255 
    256 Iterator* Block::NewIterator(const Comparator* cmp) {
    257   if (size_ < sizeof(uint32_t)) {
    258     return NewErrorIterator(Status::Corruption("bad block contents"));
    259   }
    260   const uint32_t num_restarts = NumRestarts();
    261   if (num_restarts == 0) {
    262     return NewEmptyIterator();
    263   } else {
    264     return new Iter(cmp, data_, restart_offset_, num_restarts);
    265   }
    266 }
    267 
    268 }  // namespace leveldb
    269