Home | History | Annotate | Download | only in bench
      1 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file. See the AUTHORS file for names of contributors.
      4 
      5 #include <stdio.h>
      6 #include <stdlib.h>
      7 #include <kcpolydb.h>
      8 #include "util/histogram.h"
      9 #include "util/random.h"
     10 #include "util/testutil.h"
     11 
     12 // Comma-separated list of operations to run in the specified order
     13 //   Actual benchmarks:
     14 //
     15 //   fillseq       -- write N values in sequential key order in async mode
     16 //   fillrandom    -- write N values in random key order in async mode
     17 //   overwrite     -- overwrite N values in random key order in async mode
     18 //   fillseqsync   -- write N/100 values in sequential key order in sync mode
     19 //   fillrandsync  -- write N/100 values in random key order in sync mode
     20 //   fillrand100K  -- write N/1000 100K values in random order in async mode
     21 //   fillseq100K   -- write N/1000 100K values in seq order in async mode
     22 //   readseq       -- read N times sequentially
     23 //   readseq100K   -- read N/1000 100K values in sequential order in async mode
     24 //   readrand100K  -- read N/1000 100K values in sequential order in async mode
     25 //   readrandom    -- read N times in random order
     26 static const char* FLAGS_benchmarks =
     27     "fillseq,"
     28     "fillseqsync,"
     29     "fillrandsync,"
     30     "fillrandom,"
     31     "overwrite,"
     32     "readrandom,"
     33     "readseq,"
     34     "fillrand100K,"
     35     "fillseq100K,"
     36     "readseq100K,"
     37     "readrand100K,"
     38     ;
     39 
     40 // Number of key/values to place in database
     41 static int FLAGS_num = 1000000;
     42 
     43 // Number of read operations to do.  If negative, do FLAGS_num reads.
     44 static int FLAGS_reads = -1;
     45 
     46 // Size of each value
     47 static int FLAGS_value_size = 100;
     48 
     49 // Arrange to generate values that shrink to this fraction of
     50 // their original size after compression
     51 static double FLAGS_compression_ratio = 0.5;
     52 
     53 // Print histogram of operation timings
     54 static bool FLAGS_histogram = false;
     55 
     56 // Cache size. Default 4 MB
     57 static int FLAGS_cache_size = 4194304;
     58 
     59 // Page size. Default 1 KB
     60 static int FLAGS_page_size = 1024;
     61 
     62 // If true, do not destroy the existing database.  If you set this
     63 // flag and also specify a benchmark that wants a fresh database, that
     64 // benchmark will fail.
     65 static bool FLAGS_use_existing_db = false;
     66 
     67 // Compression flag. If true, compression is on. If false, compression
     68 // is off.
     69 static bool FLAGS_compression = true;
     70 
     71 // Use the db with the following name.
     72 static const char* FLAGS_db = NULL;
     73 
     74 inline
     75 static void DBSynchronize(kyotocabinet::TreeDB* db_)
     76 {
     77   // Synchronize will flush writes to disk
     78   if (!db_->synchronize()) {
     79     fprintf(stderr, "synchronize error: %s\n", db_->error().name());
     80   }
     81 }
     82 
     83 namespace leveldb {
     84 
     85 // Helper for quickly generating random data.
     86 namespace {
     87 class RandomGenerator {
     88  private:
     89   std::string data_;
     90   int pos_;
     91 
     92  public:
     93   RandomGenerator() {
     94     // We use a limited amount of data over and over again and ensure
     95     // that it is larger than the compression window (32KB), and also
     96     // large enough to serve all typical value sizes we want to write.
     97     Random rnd(301);
     98     std::string piece;
     99     while (data_.size() < 1048576) {
    100       // Add a short fragment that is as compressible as specified
    101       // by FLAGS_compression_ratio.
    102       test::CompressibleString(&rnd, FLAGS_compression_ratio, 100, &piece);
    103       data_.append(piece);
    104     }
    105     pos_ = 0;
    106   }
    107 
    108   Slice Generate(int len) {
    109     if (pos_ + len > data_.size()) {
    110       pos_ = 0;
    111       assert(len < data_.size());
    112     }
    113     pos_ += len;
    114     return Slice(data_.data() + pos_ - len, len);
    115   }
    116 };
    117 
    118 static Slice TrimSpace(Slice s) {
    119   int start = 0;
    120   while (start < s.size() && isspace(s[start])) {
    121     start++;
    122   }
    123   int limit = s.size();
    124   while (limit > start && isspace(s[limit-1])) {
    125     limit--;
    126   }
    127   return Slice(s.data() + start, limit - start);
    128 }
    129 
    130 }  // namespace
    131 
    132 class Benchmark {
    133  private:
    134   kyotocabinet::TreeDB* db_;
    135   int db_num_;
    136   int num_;
    137   int reads_;
    138   double start_;
    139   double last_op_finish_;
    140   int64_t bytes_;
    141   std::string message_;
    142   Histogram hist_;
    143   RandomGenerator gen_;
    144   Random rand_;
    145   kyotocabinet::LZOCompressor<kyotocabinet::LZO::RAW> comp_;
    146 
    147   // State kept for progress messages
    148   int done_;
    149   int next_report_;     // When to report next
    150 
    151   void PrintHeader() {
    152     const int kKeySize = 16;
    153     PrintEnvironment();
    154     fprintf(stdout, "Keys:       %d bytes each\n", kKeySize);
    155     fprintf(stdout, "Values:     %d bytes each (%d bytes after compression)\n",
    156             FLAGS_value_size,
    157             static_cast<int>(FLAGS_value_size * FLAGS_compression_ratio + 0.5));
    158     fprintf(stdout, "Entries:    %d\n", num_);
    159     fprintf(stdout, "RawSize:    %.1f MB (estimated)\n",
    160             ((static_cast<int64_t>(kKeySize + FLAGS_value_size) * num_)
    161              / 1048576.0));
    162     fprintf(stdout, "FileSize:   %.1f MB (estimated)\n",
    163             (((kKeySize + FLAGS_value_size * FLAGS_compression_ratio) * num_)
    164              / 1048576.0));
    165     PrintWarnings();
    166     fprintf(stdout, "------------------------------------------------\n");
    167   }
    168 
    169   void PrintWarnings() {
    170 #if defined(__GNUC__) && !defined(__OPTIMIZE__)
    171     fprintf(stdout,
    172             "WARNING: Optimization is disabled: benchmarks unnecessarily slow\n"
    173             );
    174 #endif
    175 #ifndef NDEBUG
    176     fprintf(stdout,
    177             "WARNING: Assertions are enabled; benchmarks unnecessarily slow\n");
    178 #endif
    179   }
    180 
    181   void PrintEnvironment() {
    182     fprintf(stderr, "Kyoto Cabinet:    version %s, lib ver %d, lib rev %d\n",
    183             kyotocabinet::VERSION, kyotocabinet::LIBVER, kyotocabinet::LIBREV);
    184 
    185 #if defined(__linux)
    186     time_t now = time(NULL);
    187     fprintf(stderr, "Date:           %s", ctime(&now));  // ctime() adds newline
    188 
    189     FILE* cpuinfo = fopen("/proc/cpuinfo", "r");
    190     if (cpuinfo != NULL) {
    191       char line[1000];
    192       int num_cpus = 0;
    193       std::string cpu_type;
    194       std::string cache_size;
    195       while (fgets(line, sizeof(line), cpuinfo) != NULL) {
    196         const char* sep = strchr(line, ':');
    197         if (sep == NULL) {
    198           continue;
    199         }
    200         Slice key = TrimSpace(Slice(line, sep - 1 - line));
    201         Slice val = TrimSpace(Slice(sep + 1));
    202         if (key == "model name") {
    203           ++num_cpus;
    204           cpu_type = val.ToString();
    205         } else if (key == "cache size") {
    206           cache_size = val.ToString();
    207         }
    208       }
    209       fclose(cpuinfo);
    210       fprintf(stderr, "CPU:            %d * %s\n", num_cpus, cpu_type.c_str());
    211       fprintf(stderr, "CPUCache:       %s\n", cache_size.c_str());
    212     }
    213 #endif
    214   }
    215 
    216   void Start() {
    217     start_ = Env::Default()->NowMicros() * 1e-6;
    218     bytes_ = 0;
    219     message_.clear();
    220     last_op_finish_ = start_;
    221     hist_.Clear();
    222     done_ = 0;
    223     next_report_ = 100;
    224   }
    225 
    226   void FinishedSingleOp() {
    227     if (FLAGS_histogram) {
    228       double now = Env::Default()->NowMicros() * 1e-6;
    229       double micros = (now - last_op_finish_) * 1e6;
    230       hist_.Add(micros);
    231       if (micros > 20000) {
    232         fprintf(stderr, "long op: %.1f micros%30s\r", micros, "");
    233         fflush(stderr);
    234       }
    235       last_op_finish_ = now;
    236     }
    237 
    238     done_++;
    239     if (done_ >= next_report_) {
    240       if      (next_report_ < 1000)   next_report_ += 100;
    241       else if (next_report_ < 5000)   next_report_ += 500;
    242       else if (next_report_ < 10000)  next_report_ += 1000;
    243       else if (next_report_ < 50000)  next_report_ += 5000;
    244       else if (next_report_ < 100000) next_report_ += 10000;
    245       else if (next_report_ < 500000) next_report_ += 50000;
    246       else                            next_report_ += 100000;
    247       fprintf(stderr, "... finished %d ops%30s\r", done_, "");
    248       fflush(stderr);
    249     }
    250   }
    251 
    252   void Stop(const Slice& name) {
    253     double finish = Env::Default()->NowMicros() * 1e-6;
    254 
    255     // Pretend at least one op was done in case we are running a benchmark
    256     // that does not call FinishedSingleOp().
    257     if (done_ < 1) done_ = 1;
    258 
    259     if (bytes_ > 0) {
    260       char rate[100];
    261       snprintf(rate, sizeof(rate), "%6.1f MB/s",
    262                (bytes_ / 1048576.0) / (finish - start_));
    263       if (!message_.empty()) {
    264         message_  = std::string(rate) + " " + message_;
    265       } else {
    266         message_ = rate;
    267       }
    268     }
    269 
    270     fprintf(stdout, "%-12s : %11.3f micros/op;%s%s\n",
    271             name.ToString().c_str(),
    272             (finish - start_) * 1e6 / done_,
    273             (message_.empty() ? "" : " "),
    274             message_.c_str());
    275     if (FLAGS_histogram) {
    276       fprintf(stdout, "Microseconds per op:\n%s\n", hist_.ToString().c_str());
    277     }
    278     fflush(stdout);
    279   }
    280 
    281  public:
    282   enum Order {
    283     SEQUENTIAL,
    284     RANDOM
    285   };
    286   enum DBState {
    287     FRESH,
    288     EXISTING
    289   };
    290 
    291   Benchmark()
    292   : db_(NULL),
    293     num_(FLAGS_num),
    294     reads_(FLAGS_reads < 0 ? FLAGS_num : FLAGS_reads),
    295     bytes_(0),
    296     rand_(301) {
    297     std::vector<std::string> files;
    298     std::string test_dir;
    299     Env::Default()->GetTestDirectory(&test_dir);
    300     Env::Default()->GetChildren(test_dir.c_str(), &files);
    301     if (!FLAGS_use_existing_db) {
    302       for (int i = 0; i < files.size(); i++) {
    303         if (Slice(files[i]).starts_with("dbbench_polyDB")) {
    304           std::string file_name(test_dir);
    305           file_name += "/";
    306           file_name += files[i];
    307           Env::Default()->DeleteFile(file_name.c_str());
    308         }
    309       }
    310     }
    311   }
    312 
    313   ~Benchmark() {
    314     if (!db_->close()) {
    315       fprintf(stderr, "close error: %s\n", db_->error().name());
    316     }
    317   }
    318 
    319   void Run() {
    320     PrintHeader();
    321     Open(false);
    322 
    323     const char* benchmarks = FLAGS_benchmarks;
    324     while (benchmarks != NULL) {
    325       const char* sep = strchr(benchmarks, ',');
    326       Slice name;
    327       if (sep == NULL) {
    328         name = benchmarks;
    329         benchmarks = NULL;
    330       } else {
    331         name = Slice(benchmarks, sep - benchmarks);
    332         benchmarks = sep + 1;
    333       }
    334 
    335       Start();
    336 
    337       bool known = true;
    338       bool write_sync = false;
    339       if (name == Slice("fillseq")) {
    340         Write(write_sync, SEQUENTIAL, FRESH, num_, FLAGS_value_size, 1);
    341 
    342       } else if (name == Slice("fillrandom")) {
    343         Write(write_sync, RANDOM, FRESH, num_, FLAGS_value_size, 1);
    344         DBSynchronize(db_);
    345       } else if (name == Slice("overwrite")) {
    346         Write(write_sync, RANDOM, EXISTING, num_, FLAGS_value_size, 1);
    347         DBSynchronize(db_);
    348       } else if (name == Slice("fillrandsync")) {
    349         write_sync = true;
    350         Write(write_sync, RANDOM, FRESH, num_ / 100, FLAGS_value_size, 1);
    351         DBSynchronize(db_);
    352       } else if (name == Slice("fillseqsync")) {
    353         write_sync = true;
    354         Write(write_sync, SEQUENTIAL, FRESH, num_ / 100, FLAGS_value_size, 1);
    355         DBSynchronize(db_);
    356       } else if (name == Slice("fillrand100K")) {
    357         Write(write_sync, RANDOM, FRESH, num_ / 1000, 100 * 1000, 1);
    358         DBSynchronize(db_);
    359       } else if (name == Slice("fillseq100K")) {
    360         Write(write_sync, SEQUENTIAL, FRESH, num_ / 1000, 100 * 1000, 1);
    361         DBSynchronize(db_);
    362       } else if (name == Slice("readseq")) {
    363         ReadSequential();
    364       } else if (name == Slice("readrandom")) {
    365         ReadRandom();
    366       } else if (name == Slice("readrand100K")) {
    367         int n = reads_;
    368         reads_ /= 1000;
    369         ReadRandom();
    370         reads_ = n;
    371       } else if (name == Slice("readseq100K")) {
    372         int n = reads_;
    373         reads_ /= 1000;
    374         ReadSequential();
    375         reads_ = n;
    376       } else {
    377         known = false;
    378         if (name != Slice()) {  // No error message for empty name
    379           fprintf(stderr, "unknown benchmark '%s'\n", name.ToString().c_str());
    380         }
    381       }
    382       if (known) {
    383         Stop(name);
    384       }
    385     }
    386   }
    387 
    388  private:
    389     void Open(bool sync) {
    390     assert(db_ == NULL);
    391 
    392     // Initialize db_
    393     db_ = new kyotocabinet::TreeDB();
    394     char file_name[100];
    395     db_num_++;
    396     std::string test_dir;
    397     Env::Default()->GetTestDirectory(&test_dir);
    398     snprintf(file_name, sizeof(file_name),
    399              "%s/dbbench_polyDB-%d.kct",
    400              test_dir.c_str(),
    401              db_num_);
    402 
    403     // Create tuning options and open the database
    404     int open_options = kyotocabinet::PolyDB::OWRITER |
    405                        kyotocabinet::PolyDB::OCREATE;
    406     int tune_options = kyotocabinet::TreeDB::TSMALL |
    407         kyotocabinet::TreeDB::TLINEAR;
    408     if (FLAGS_compression) {
    409       tune_options |= kyotocabinet::TreeDB::TCOMPRESS;
    410       db_->tune_compressor(&comp_);
    411     }
    412     db_->tune_options(tune_options);
    413     db_->tune_page_cache(FLAGS_cache_size);
    414     db_->tune_page(FLAGS_page_size);
    415     db_->tune_map(256LL<<20);
    416     if (sync) {
    417       open_options |= kyotocabinet::PolyDB::OAUTOSYNC;
    418     }
    419     if (!db_->open(file_name, open_options)) {
    420       fprintf(stderr, "open error: %s\n", db_->error().name());
    421     }
    422   }
    423 
    424   void Write(bool sync, Order order, DBState state,
    425              int num_entries, int value_size, int entries_per_batch) {
    426     // Create new database if state == FRESH
    427     if (state == FRESH) {
    428       if (FLAGS_use_existing_db) {
    429         message_ = "skipping (--use_existing_db is true)";
    430         return;
    431       }
    432       delete db_;
    433       db_ = NULL;
    434       Open(sync);
    435       Start();  // Do not count time taken to destroy/open
    436     }
    437 
    438     if (num_entries != num_) {
    439       char msg[100];
    440       snprintf(msg, sizeof(msg), "(%d ops)", num_entries);
    441       message_ = msg;
    442     }
    443 
    444     // Write to database
    445     for (int i = 0; i < num_entries; i++)
    446     {
    447       const int k = (order == SEQUENTIAL) ? i : (rand_.Next() % num_entries);
    448       char key[100];
    449       snprintf(key, sizeof(key), "%016d", k);
    450       bytes_ += value_size + strlen(key);
    451       std::string cpp_key = key;
    452       if (!db_->set(cpp_key, gen_.Generate(value_size).ToString())) {
    453         fprintf(stderr, "set error: %s\n", db_->error().name());
    454       }
    455       FinishedSingleOp();
    456     }
    457   }
    458 
    459   void ReadSequential() {
    460     kyotocabinet::DB::Cursor* cur = db_->cursor();
    461     cur->jump();
    462     std::string ckey, cvalue;
    463     while (cur->get(&ckey, &cvalue, true)) {
    464       bytes_ += ckey.size() + cvalue.size();
    465       FinishedSingleOp();
    466     }
    467     delete cur;
    468   }
    469 
    470   void ReadRandom() {
    471     std::string value;
    472     for (int i = 0; i < reads_; i++) {
    473       char key[100];
    474       const int k = rand_.Next() % reads_;
    475       snprintf(key, sizeof(key), "%016d", k);
    476       db_->get(key, &value);
    477       FinishedSingleOp();
    478     }
    479   }
    480 };
    481 
    482 }  // namespace leveldb
    483 
    484 int main(int argc, char** argv) {
    485   std::string default_db_path;
    486   for (int i = 1; i < argc; i++) {
    487     double d;
    488     int n;
    489     char junk;
    490     if (leveldb::Slice(argv[i]).starts_with("--benchmarks=")) {
    491       FLAGS_benchmarks = argv[i] + strlen("--benchmarks=");
    492     } else if (sscanf(argv[i], "--compression_ratio=%lf%c", &d, &junk) == 1) {
    493       FLAGS_compression_ratio = d;
    494     } else if (sscanf(argv[i], "--histogram=%d%c", &n, &junk) == 1 &&
    495                (n == 0 || n == 1)) {
    496       FLAGS_histogram = n;
    497     } else if (sscanf(argv[i], "--num=%d%c", &n, &junk) == 1) {
    498       FLAGS_num = n;
    499     } else if (sscanf(argv[i], "--reads=%d%c", &n, &junk) == 1) {
    500       FLAGS_reads = n;
    501     } else if (sscanf(argv[i], "--value_size=%d%c", &n, &junk) == 1) {
    502       FLAGS_value_size = n;
    503     } else if (sscanf(argv[i], "--cache_size=%d%c", &n, &junk) == 1) {
    504       FLAGS_cache_size = n;
    505     } else if (sscanf(argv[i], "--page_size=%d%c", &n, &junk) == 1) {
    506       FLAGS_page_size = n;
    507     } else if (sscanf(argv[i], "--compression=%d%c", &n, &junk) == 1 &&
    508                (n == 0 || n == 1)) {
    509       FLAGS_compression = (n == 1) ? true : false;
    510     } else if (strncmp(argv[i], "--db=", 5) == 0) {
    511       FLAGS_db = argv[i] + 5;
    512     } else {
    513       fprintf(stderr, "Invalid flag '%s'\n", argv[i]);
    514       exit(1);
    515     }
    516   }
    517 
    518   // Choose a location for the test database if none given with --db=<path>
    519   if (FLAGS_db == NULL) {
    520       leveldb::Env::Default()->GetTestDirectory(&default_db_path);
    521       default_db_path += "/dbbench";
    522       FLAGS_db = default_db_path.c_str();
    523   }
    524 
    525   leveldb::Benchmark benchmark;
    526   benchmark.Run();
    527   return 0;
    528 }
    529