Home | History | Annotate | Download | only in table
      1 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file. See the AUTHORS file for names of contributors.
      4 
      5 #include "leveldb/table.h"
      6 
      7 #include "leveldb/cache.h"
      8 #include "leveldb/comparator.h"
      9 #include "leveldb/env.h"
     10 #include "leveldb/filter_policy.h"
     11 #include "leveldb/options.h"
     12 #include "table/block.h"
     13 #include "table/filter_block.h"
     14 #include "table/format.h"
     15 #include "table/two_level_iterator.h"
     16 #include "util/coding.h"
     17 
     18 namespace leveldb {
     19 
     20 struct Table::Rep {
     21   ~Rep() {
     22     delete filter;
     23     delete [] filter_data;
     24     delete index_block;
     25   }
     26 
     27   Options options;
     28   Status status;
     29   RandomAccessFile* file;
     30   uint64_t cache_id;
     31   FilterBlockReader* filter;
     32   const char* filter_data;
     33 
     34   BlockHandle metaindex_handle;  // Handle to metaindex_block: saved from footer
     35   Block* index_block;
     36 };
     37 
     38 Status Table::Open(const Options& options,
     39                    RandomAccessFile* file,
     40                    uint64_t size,
     41                    Table** table) {
     42   *table = NULL;
     43   if (size < Footer::kEncodedLength) {
     44     return Status::InvalidArgument("file is too short to be an sstable");
     45   }
     46 
     47   char footer_space[Footer::kEncodedLength];
     48   Slice footer_input;
     49   Status s = file->Read(size - Footer::kEncodedLength, Footer::kEncodedLength,
     50                         &footer_input, footer_space);
     51   if (!s.ok()) return s;
     52 
     53   Footer footer;
     54   s = footer.DecodeFrom(&footer_input);
     55   if (!s.ok()) return s;
     56 
     57   // Read the index block
     58   BlockContents contents;
     59   Block* index_block = NULL;
     60   if (s.ok()) {
     61     s = ReadBlock(file, ReadOptions(), footer.index_handle(), &contents);
     62     if (s.ok()) {
     63       index_block = new Block(contents);
     64     }
     65   }
     66 
     67   if (s.ok()) {
     68     // We've successfully read the footer and the index block: we're
     69     // ready to serve requests.
     70     Rep* rep = new Table::Rep;
     71     rep->options = options;
     72     rep->file = file;
     73     rep->metaindex_handle = footer.metaindex_handle();
     74     rep->index_block = index_block;
     75     rep->cache_id = (options.block_cache ? options.block_cache->NewId() : 0);
     76     rep->filter_data = NULL;
     77     rep->filter = NULL;
     78     *table = new Table(rep);
     79     (*table)->ReadMeta(footer);
     80   } else {
     81     if (index_block) delete index_block;
     82   }
     83 
     84   return s;
     85 }
     86 
     87 void Table::ReadMeta(const Footer& footer) {
     88   if (rep_->options.filter_policy == NULL) {
     89     return;  // Do not need any metadata
     90   }
     91 
     92   // TODO(sanjay): Skip this if footer.metaindex_handle() size indicates
     93   // it is an empty block.
     94   ReadOptions opt;
     95   BlockContents contents;
     96   if (!ReadBlock(rep_->file, opt, footer.metaindex_handle(), &contents).ok()) {
     97     // Do not propagate errors since meta info is not needed for operation
     98     return;
     99   }
    100   Block* meta = new Block(contents);
    101 
    102   Iterator* iter = meta->NewIterator(BytewiseComparator());
    103   std::string key = "filter.";
    104   key.append(rep_->options.filter_policy->Name());
    105   iter->Seek(key);
    106   if (iter->Valid() && iter->key() == Slice(key)) {
    107     ReadFilter(iter->value());
    108   }
    109   delete iter;
    110   delete meta;
    111 }
    112 
    113 void Table::ReadFilter(const Slice& filter_handle_value) {
    114   Slice v = filter_handle_value;
    115   BlockHandle filter_handle;
    116   if (!filter_handle.DecodeFrom(&v).ok()) {
    117     return;
    118   }
    119 
    120   // We might want to unify with ReadBlock() if we start
    121   // requiring checksum verification in Table::Open.
    122   ReadOptions opt;
    123   BlockContents block;
    124   if (!ReadBlock(rep_->file, opt, filter_handle, &block).ok()) {
    125     return;
    126   }
    127   if (block.heap_allocated) {
    128     rep_->filter_data = block.data.data();     // Will need to delete later
    129   }
    130   rep_->filter = new FilterBlockReader(rep_->options.filter_policy, block.data);
    131 }
    132 
    133 Table::~Table() {
    134   delete rep_;
    135 }
    136 
    137 static void DeleteBlock(void* arg, void* ignored) {
    138   delete reinterpret_cast<Block*>(arg);
    139 }
    140 
    141 static void DeleteCachedBlock(const Slice& key, void* value) {
    142   Block* block = reinterpret_cast<Block*>(value);
    143   delete block;
    144 }
    145 
    146 static void ReleaseBlock(void* arg, void* h) {
    147   Cache* cache = reinterpret_cast<Cache*>(arg);
    148   Cache::Handle* handle = reinterpret_cast<Cache::Handle*>(h);
    149   cache->Release(handle);
    150 }
    151 
    152 // Convert an index iterator value (i.e., an encoded BlockHandle)
    153 // into an iterator over the contents of the corresponding block.
    154 Iterator* Table::BlockReader(void* arg,
    155                              const ReadOptions& options,
    156                              const Slice& index_value) {
    157   Table* table = reinterpret_cast<Table*>(arg);
    158   Cache* block_cache = table->rep_->options.block_cache;
    159   Block* block = NULL;
    160   Cache::Handle* cache_handle = NULL;
    161 
    162   BlockHandle handle;
    163   Slice input = index_value;
    164   Status s = handle.DecodeFrom(&input);
    165   // We intentionally allow extra stuff in index_value so that we
    166   // can add more features in the future.
    167 
    168   if (s.ok()) {
    169     BlockContents contents;
    170     if (block_cache != NULL) {
    171       char cache_key_buffer[16];
    172       EncodeFixed64(cache_key_buffer, table->rep_->cache_id);
    173       EncodeFixed64(cache_key_buffer+8, handle.offset());
    174       Slice key(cache_key_buffer, sizeof(cache_key_buffer));
    175       cache_handle = block_cache->Lookup(key);
    176       if (cache_handle != NULL) {
    177         block = reinterpret_cast<Block*>(block_cache->Value(cache_handle));
    178       } else {
    179         s = ReadBlock(table->rep_->file, options, handle, &contents);
    180         if (s.ok()) {
    181           block = new Block(contents);
    182           if (contents.cachable && options.fill_cache) {
    183             cache_handle = block_cache->Insert(
    184                 key, block, block->size(), &DeleteCachedBlock);
    185           }
    186         }
    187       }
    188     } else {
    189       s = ReadBlock(table->rep_->file, options, handle, &contents);
    190       if (s.ok()) {
    191         block = new Block(contents);
    192       }
    193     }
    194   }
    195 
    196   Iterator* iter;
    197   if (block != NULL) {
    198     iter = block->NewIterator(table->rep_->options.comparator);
    199     if (cache_handle == NULL) {
    200       iter->RegisterCleanup(&DeleteBlock, block, NULL);
    201     } else {
    202       iter->RegisterCleanup(&ReleaseBlock, block_cache, cache_handle);
    203     }
    204   } else {
    205     iter = NewErrorIterator(s);
    206   }
    207   return iter;
    208 }
    209 
    210 Iterator* Table::NewIterator(const ReadOptions& options) const {
    211   return NewTwoLevelIterator(
    212       rep_->index_block->NewIterator(rep_->options.comparator),
    213       &Table::BlockReader, const_cast<Table*>(this), options);
    214 }
    215 
    216 Status Table::InternalGet(const ReadOptions& options, const Slice& k,
    217                           void* arg,
    218                           void (*saver)(void*, const Slice&, const Slice&)) {
    219   Status s;
    220   Iterator* iiter = rep_->index_block->NewIterator(rep_->options.comparator);
    221   iiter->Seek(k);
    222   if (iiter->Valid()) {
    223     Slice handle_value = iiter->value();
    224     FilterBlockReader* filter = rep_->filter;
    225     BlockHandle handle;
    226     if (filter != NULL &&
    227         handle.DecodeFrom(&handle_value).ok() &&
    228         !filter->KeyMayMatch(handle.offset(), k)) {
    229       // Not found
    230     } else {
    231       Iterator* block_iter = BlockReader(this, options, iiter->value());
    232       block_iter->Seek(k);
    233       if (block_iter->Valid()) {
    234         (*saver)(arg, block_iter->key(), block_iter->value());
    235       }
    236       s = block_iter->status();
    237       delete block_iter;
    238     }
    239   }
    240   if (s.ok()) {
    241     s = iiter->status();
    242   }
    243   delete iiter;
    244   return s;
    245 }
    246 
    247 
    248 uint64_t Table::ApproximateOffsetOf(const Slice& key) const {
    249   Iterator* index_iter =
    250       rep_->index_block->NewIterator(rep_->options.comparator);
    251   index_iter->Seek(key);
    252   uint64_t result;
    253   if (index_iter->Valid()) {
    254     BlockHandle handle;
    255     Slice input = index_iter->value();
    256     Status s = handle.DecodeFrom(&input);
    257     if (s.ok()) {
    258       result = handle.offset();
    259     } else {
    260       // Strange: we can't decode the block handle in the index block.
    261       // We'll just return the offset of the metaindex block, which is
    262       // close to the whole file size for this case.
    263       result = rep_->metaindex_handle.offset();
    264     }
    265   } else {
    266     // key is past the last key in the file.  Approximate the offset
    267     // by returning the offset of the metaindex block (which is
    268     // right near the end of the file).
    269     result = rep_->metaindex_handle.offset();
    270   }
    271   delete index_iter;
    272   return result;
    273 }
    274 
    275 }  // namespace leveldb
    276