1 // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 5 #include "leveldb/db.h" 6 #include "leveldb/filter_policy.h" 7 #include "db/db_impl.h" 8 #include "db/filename.h" 9 #include "db/version_set.h" 10 #include "db/write_batch_internal.h" 11 #include "leveldb/cache.h" 12 #include "leveldb/env.h" 13 #include "leveldb/table.h" 14 #include "util/hash.h" 15 #include "util/logging.h" 16 #include "util/mutexlock.h" 17 #include "util/testharness.h" 18 #include "util/testutil.h" 19 20 namespace leveldb { 21 22 static std::string RandomString(Random* rnd, int len) { 23 std::string r; 24 test::RandomString(rnd, len, &r); 25 return r; 26 } 27 28 namespace { 29 class AtomicCounter { 30 private: 31 port::Mutex mu_; 32 int count_; 33 public: 34 AtomicCounter() : count_(0) { } 35 void Increment() { 36 IncrementBy(1); 37 } 38 void IncrementBy(int count) { 39 MutexLock l(&mu_); 40 count_ += count; 41 } 42 int Read() { 43 MutexLock l(&mu_); 44 return count_; 45 } 46 void Reset() { 47 MutexLock l(&mu_); 48 count_ = 0; 49 } 50 }; 51 52 void DelayMilliseconds(int millis) { 53 Env::Default()->SleepForMicroseconds(millis * 1000); 54 } 55 } 56 57 // Special Env used to delay background operations 58 class SpecialEnv : public EnvWrapper { 59 public: 60 // sstable/log Sync() calls are blocked while this pointer is non-NULL. 61 port::AtomicPointer delay_data_sync_; 62 63 // sstable/log Sync() calls return an error. 64 port::AtomicPointer data_sync_error_; 65 66 // Simulate no-space errors while this pointer is non-NULL. 67 port::AtomicPointer no_space_; 68 69 // Simulate non-writable file system while this pointer is non-NULL 70 port::AtomicPointer non_writable_; 71 72 // Force sync of manifest files to fail while this pointer is non-NULL 73 port::AtomicPointer manifest_sync_error_; 74 75 // Force write to manifest files to fail while this pointer is non-NULL 76 port::AtomicPointer manifest_write_error_; 77 78 bool count_random_reads_; 79 AtomicCounter random_read_counter_; 80 81 explicit SpecialEnv(Env* base) : EnvWrapper(base) { 82 delay_data_sync_.Release_Store(NULL); 83 data_sync_error_.Release_Store(NULL); 84 no_space_.Release_Store(NULL); 85 non_writable_.Release_Store(NULL); 86 count_random_reads_ = false; 87 manifest_sync_error_.Release_Store(NULL); 88 manifest_write_error_.Release_Store(NULL); 89 } 90 91 Status NewWritableFile(const std::string& f, WritableFile** r) { 92 class DataFile : public WritableFile { 93 private: 94 SpecialEnv* env_; 95 WritableFile* base_; 96 97 public: 98 DataFile(SpecialEnv* env, WritableFile* base) 99 : env_(env), 100 base_(base) { 101 } 102 ~DataFile() { delete base_; } 103 Status Append(const Slice& data) { 104 if (env_->no_space_.Acquire_Load() != NULL) { 105 // Drop writes on the floor 106 return Status::OK(); 107 } else { 108 return base_->Append(data); 109 } 110 } 111 Status Close() { return base_->Close(); } 112 Status Flush() { return base_->Flush(); } 113 Status Sync() { 114 if (env_->data_sync_error_.Acquire_Load() != NULL) { 115 return Status::IOError("simulated data sync error"); 116 } 117 while (env_->delay_data_sync_.Acquire_Load() != NULL) { 118 DelayMilliseconds(100); 119 } 120 return base_->Sync(); 121 } 122 }; 123 class ManifestFile : public WritableFile { 124 private: 125 SpecialEnv* env_; 126 WritableFile* base_; 127 public: 128 ManifestFile(SpecialEnv* env, WritableFile* b) : env_(env), base_(b) { } 129 ~ManifestFile() { delete base_; } 130 Status Append(const Slice& data) { 131 if (env_->manifest_write_error_.Acquire_Load() != NULL) { 132 return Status::IOError("simulated writer error"); 133 } else { 134 return base_->Append(data); 135 } 136 } 137 Status Close() { return base_->Close(); } 138 Status Flush() { return base_->Flush(); } 139 Status Sync() { 140 if (env_->manifest_sync_error_.Acquire_Load() != NULL) { 141 return Status::IOError("simulated sync error"); 142 } else { 143 return base_->Sync(); 144 } 145 } 146 }; 147 148 if (non_writable_.Acquire_Load() != NULL) { 149 return Status::IOError("simulated write error"); 150 } 151 152 Status s = target()->NewWritableFile(f, r); 153 if (s.ok()) { 154 if (strstr(f.c_str(), ".ldb") != NULL || 155 strstr(f.c_str(), ".log") != NULL) { 156 *r = new DataFile(this, *r); 157 } else if (strstr(f.c_str(), "MANIFEST") != NULL) { 158 *r = new ManifestFile(this, *r); 159 } 160 } 161 return s; 162 } 163 164 Status NewRandomAccessFile(const std::string& f, RandomAccessFile** r) { 165 class CountingFile : public RandomAccessFile { 166 private: 167 RandomAccessFile* target_; 168 AtomicCounter* counter_; 169 public: 170 CountingFile(RandomAccessFile* target, AtomicCounter* counter) 171 : target_(target), counter_(counter) { 172 } 173 virtual ~CountingFile() { delete target_; } 174 virtual Status Read(uint64_t offset, size_t n, Slice* result, 175 char* scratch) const { 176 counter_->Increment(); 177 return target_->Read(offset, n, result, scratch); 178 } 179 }; 180 181 Status s = target()->NewRandomAccessFile(f, r); 182 if (s.ok() && count_random_reads_) { 183 *r = new CountingFile(*r, &random_read_counter_); 184 } 185 return s; 186 } 187 }; 188 189 class DBTest { 190 private: 191 const FilterPolicy* filter_policy_; 192 193 // Sequence of option configurations to try 194 enum OptionConfig { 195 kDefault, 196 kFilter, 197 kUncompressed, 198 kEnd 199 }; 200 int option_config_; 201 202 public: 203 std::string dbname_; 204 SpecialEnv* env_; 205 DB* db_; 206 207 Options last_options_; 208 209 DBTest() : option_config_(kDefault), 210 env_(new SpecialEnv(Env::Default())) { 211 filter_policy_ = NewBloomFilterPolicy(10); 212 dbname_ = test::TmpDir() + "/db_test"; 213 DestroyDB(dbname_, Options()); 214 db_ = NULL; 215 Reopen(); 216 } 217 218 ~DBTest() { 219 delete db_; 220 DestroyDB(dbname_, Options()); 221 delete env_; 222 delete filter_policy_; 223 } 224 225 // Switch to a fresh database with the next option configuration to 226 // test. Return false if there are no more configurations to test. 227 bool ChangeOptions() { 228 option_config_++; 229 if (option_config_ >= kEnd) { 230 return false; 231 } else { 232 DestroyAndReopen(); 233 return true; 234 } 235 } 236 237 // Return the current option configuration. 238 Options CurrentOptions() { 239 Options options; 240 switch (option_config_) { 241 case kFilter: 242 options.filter_policy = filter_policy_; 243 break; 244 case kUncompressed: 245 options.compression = kNoCompression; 246 break; 247 default: 248 break; 249 } 250 return options; 251 } 252 253 DBImpl* dbfull() { 254 return reinterpret_cast<DBImpl*>(db_); 255 } 256 257 void Reopen(Options* options = NULL) { 258 ASSERT_OK(TryReopen(options)); 259 } 260 261 void Close() { 262 delete db_; 263 db_ = NULL; 264 } 265 266 void DestroyAndReopen(Options* options = NULL) { 267 delete db_; 268 db_ = NULL; 269 DestroyDB(dbname_, Options()); 270 ASSERT_OK(TryReopen(options)); 271 } 272 273 Status TryReopen(Options* options) { 274 delete db_; 275 db_ = NULL; 276 Options opts; 277 if (options != NULL) { 278 opts = *options; 279 } else { 280 opts = CurrentOptions(); 281 opts.create_if_missing = true; 282 } 283 last_options_ = opts; 284 285 return DB::Open(opts, dbname_, &db_); 286 } 287 288 Status Put(const std::string& k, const std::string& v) { 289 return db_->Put(WriteOptions(), k, v); 290 } 291 292 Status Delete(const std::string& k) { 293 return db_->Delete(WriteOptions(), k); 294 } 295 296 std::string Get(const std::string& k, const Snapshot* snapshot = NULL) { 297 ReadOptions options; 298 options.snapshot = snapshot; 299 std::string result; 300 Status s = db_->Get(options, k, &result); 301 if (s.IsNotFound()) { 302 result = "NOT_FOUND"; 303 } else if (!s.ok()) { 304 result = s.ToString(); 305 } 306 return result; 307 } 308 309 // Return a string that contains all key,value pairs in order, 310 // formatted like "(k1->v1)(k2->v2)". 311 std::string Contents() { 312 std::vector<std::string> forward; 313 std::string result; 314 Iterator* iter = db_->NewIterator(ReadOptions()); 315 for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { 316 std::string s = IterStatus(iter); 317 result.push_back('('); 318 result.append(s); 319 result.push_back(')'); 320 forward.push_back(s); 321 } 322 323 // Check reverse iteration results are the reverse of forward results 324 size_t matched = 0; 325 for (iter->SeekToLast(); iter->Valid(); iter->Prev()) { 326 ASSERT_LT(matched, forward.size()); 327 ASSERT_EQ(IterStatus(iter), forward[forward.size() - matched - 1]); 328 matched++; 329 } 330 ASSERT_EQ(matched, forward.size()); 331 332 delete iter; 333 return result; 334 } 335 336 std::string AllEntriesFor(const Slice& user_key) { 337 Iterator* iter = dbfull()->TEST_NewInternalIterator(); 338 InternalKey target(user_key, kMaxSequenceNumber, kTypeValue); 339 iter->Seek(target.Encode()); 340 std::string result; 341 if (!iter->status().ok()) { 342 result = iter->status().ToString(); 343 } else { 344 result = "[ "; 345 bool first = true; 346 while (iter->Valid()) { 347 ParsedInternalKey ikey; 348 if (!ParseInternalKey(iter->key(), &ikey)) { 349 result += "CORRUPTED"; 350 } else { 351 if (last_options_.comparator->Compare(ikey.user_key, user_key) != 0) { 352 break; 353 } 354 if (!first) { 355 result += ", "; 356 } 357 first = false; 358 switch (ikey.type) { 359 case kTypeValue: 360 result += iter->value().ToString(); 361 break; 362 case kTypeDeletion: 363 result += "DEL"; 364 break; 365 } 366 } 367 iter->Next(); 368 } 369 if (!first) { 370 result += " "; 371 } 372 result += "]"; 373 } 374 delete iter; 375 return result; 376 } 377 378 int NumTableFilesAtLevel(int level) { 379 std::string property; 380 ASSERT_TRUE( 381 db_->GetProperty("leveldb.num-files-at-level" + NumberToString(level), 382 &property)); 383 return atoi(property.c_str()); 384 } 385 386 int TotalTableFiles() { 387 int result = 0; 388 for (int level = 0; level < config::kNumLevels; level++) { 389 result += NumTableFilesAtLevel(level); 390 } 391 return result; 392 } 393 394 // Return spread of files per level 395 std::string FilesPerLevel() { 396 std::string result; 397 int last_non_zero_offset = 0; 398 for (int level = 0; level < config::kNumLevels; level++) { 399 int f = NumTableFilesAtLevel(level); 400 char buf[100]; 401 snprintf(buf, sizeof(buf), "%s%d", (level ? "," : ""), f); 402 result += buf; 403 if (f > 0) { 404 last_non_zero_offset = result.size(); 405 } 406 } 407 result.resize(last_non_zero_offset); 408 return result; 409 } 410 411 int CountFiles() { 412 std::vector<std::string> files; 413 env_->GetChildren(dbname_, &files); 414 return static_cast<int>(files.size()); 415 } 416 417 uint64_t Size(const Slice& start, const Slice& limit) { 418 Range r(start, limit); 419 uint64_t size; 420 db_->GetApproximateSizes(&r, 1, &size); 421 return size; 422 } 423 424 void Compact(const Slice& start, const Slice& limit) { 425 db_->CompactRange(&start, &limit); 426 } 427 428 // Do n memtable compactions, each of which produces an sstable 429 // covering the range [small,large]. 430 void MakeTables(int n, const std::string& small, const std::string& large) { 431 for (int i = 0; i < n; i++) { 432 Put(small, "begin"); 433 Put(large, "end"); 434 dbfull()->TEST_CompactMemTable(); 435 } 436 } 437 438 // Prevent pushing of new sstables into deeper levels by adding 439 // tables that cover a specified range to all levels. 440 void FillLevels(const std::string& smallest, const std::string& largest) { 441 MakeTables(config::kNumLevels, smallest, largest); 442 } 443 444 void DumpFileCounts(const char* label) { 445 fprintf(stderr, "---\n%s:\n", label); 446 fprintf(stderr, "maxoverlap: %lld\n", 447 static_cast<long long>( 448 dbfull()->TEST_MaxNextLevelOverlappingBytes())); 449 for (int level = 0; level < config::kNumLevels; level++) { 450 int num = NumTableFilesAtLevel(level); 451 if (num > 0) { 452 fprintf(stderr, " level %3d : %d files\n", level, num); 453 } 454 } 455 } 456 457 std::string DumpSSTableList() { 458 std::string property; 459 db_->GetProperty("leveldb.sstables", &property); 460 return property; 461 } 462 463 std::string IterStatus(Iterator* iter) { 464 std::string result; 465 if (iter->Valid()) { 466 result = iter->key().ToString() + "->" + iter->value().ToString(); 467 } else { 468 result = "(invalid)"; 469 } 470 return result; 471 } 472 473 bool DeleteAnSSTFile() { 474 std::vector<std::string> filenames; 475 ASSERT_OK(env_->GetChildren(dbname_, &filenames)); 476 uint64_t number; 477 FileType type; 478 for (size_t i = 0; i < filenames.size(); i++) { 479 if (ParseFileName(filenames[i], &number, &type) && type == kTableFile) { 480 ASSERT_OK(env_->DeleteFile(TableFileName(dbname_, number))); 481 return true; 482 } 483 } 484 return false; 485 } 486 487 // Returns number of files renamed. 488 int RenameLDBToSST() { 489 std::vector<std::string> filenames; 490 ASSERT_OK(env_->GetChildren(dbname_, &filenames)); 491 uint64_t number; 492 FileType type; 493 int files_renamed = 0; 494 for (size_t i = 0; i < filenames.size(); i++) { 495 if (ParseFileName(filenames[i], &number, &type) && type == kTableFile) { 496 const std::string from = TableFileName(dbname_, number); 497 const std::string to = SSTTableFileName(dbname_, number); 498 ASSERT_OK(env_->RenameFile(from, to)); 499 files_renamed++; 500 } 501 } 502 return files_renamed; 503 } 504 }; 505 506 TEST(DBTest, Empty) { 507 do { 508 ASSERT_TRUE(db_ != NULL); 509 ASSERT_EQ("NOT_FOUND", Get("foo")); 510 } while (ChangeOptions()); 511 } 512 513 TEST(DBTest, ReadWrite) { 514 do { 515 ASSERT_OK(Put("foo", "v1")); 516 ASSERT_EQ("v1", Get("foo")); 517 ASSERT_OK(Put("bar", "v2")); 518 ASSERT_OK(Put("foo", "v3")); 519 ASSERT_EQ("v3", Get("foo")); 520 ASSERT_EQ("v2", Get("bar")); 521 } while (ChangeOptions()); 522 } 523 524 TEST(DBTest, PutDeleteGet) { 525 do { 526 ASSERT_OK(db_->Put(WriteOptions(), "foo", "v1")); 527 ASSERT_EQ("v1", Get("foo")); 528 ASSERT_OK(db_->Put(WriteOptions(), "foo", "v2")); 529 ASSERT_EQ("v2", Get("foo")); 530 ASSERT_OK(db_->Delete(WriteOptions(), "foo")); 531 ASSERT_EQ("NOT_FOUND", Get("foo")); 532 } while (ChangeOptions()); 533 } 534 535 TEST(DBTest, GetFromImmutableLayer) { 536 do { 537 Options options = CurrentOptions(); 538 options.env = env_; 539 options.write_buffer_size = 100000; // Small write buffer 540 Reopen(&options); 541 542 ASSERT_OK(Put("foo", "v1")); 543 ASSERT_EQ("v1", Get("foo")); 544 545 env_->delay_data_sync_.Release_Store(env_); // Block sync calls 546 Put("k1", std::string(100000, 'x')); // Fill memtable 547 Put("k2", std::string(100000, 'y')); // Trigger compaction 548 ASSERT_EQ("v1", Get("foo")); 549 env_->delay_data_sync_.Release_Store(NULL); // Release sync calls 550 } while (ChangeOptions()); 551 } 552 553 TEST(DBTest, GetFromVersions) { 554 do { 555 ASSERT_OK(Put("foo", "v1")); 556 dbfull()->TEST_CompactMemTable(); 557 ASSERT_EQ("v1", Get("foo")); 558 } while (ChangeOptions()); 559 } 560 561 TEST(DBTest, GetSnapshot) { 562 do { 563 // Try with both a short key and a long key 564 for (int i = 0; i < 2; i++) { 565 std::string key = (i == 0) ? std::string("foo") : std::string(200, 'x'); 566 ASSERT_OK(Put(key, "v1")); 567 const Snapshot* s1 = db_->GetSnapshot(); 568 ASSERT_OK(Put(key, "v2")); 569 ASSERT_EQ("v2", Get(key)); 570 ASSERT_EQ("v1", Get(key, s1)); 571 dbfull()->TEST_CompactMemTable(); 572 ASSERT_EQ("v2", Get(key)); 573 ASSERT_EQ("v1", Get(key, s1)); 574 db_->ReleaseSnapshot(s1); 575 } 576 } while (ChangeOptions()); 577 } 578 579 TEST(DBTest, GetLevel0Ordering) { 580 do { 581 // Check that we process level-0 files in correct order. The code 582 // below generates two level-0 files where the earlier one comes 583 // before the later one in the level-0 file list since the earlier 584 // one has a smaller "smallest" key. 585 ASSERT_OK(Put("bar", "b")); 586 ASSERT_OK(Put("foo", "v1")); 587 dbfull()->TEST_CompactMemTable(); 588 ASSERT_OK(Put("foo", "v2")); 589 dbfull()->TEST_CompactMemTable(); 590 ASSERT_EQ("v2", Get("foo")); 591 } while (ChangeOptions()); 592 } 593 594 TEST(DBTest, GetOrderedByLevels) { 595 do { 596 ASSERT_OK(Put("foo", "v1")); 597 Compact("a", "z"); 598 ASSERT_EQ("v1", Get("foo")); 599 ASSERT_OK(Put("foo", "v2")); 600 ASSERT_EQ("v2", Get("foo")); 601 dbfull()->TEST_CompactMemTable(); 602 ASSERT_EQ("v2", Get("foo")); 603 } while (ChangeOptions()); 604 } 605 606 TEST(DBTest, GetPicksCorrectFile) { 607 do { 608 // Arrange to have multiple files in a non-level-0 level. 609 ASSERT_OK(Put("a", "va")); 610 Compact("a", "b"); 611 ASSERT_OK(Put("x", "vx")); 612 Compact("x", "y"); 613 ASSERT_OK(Put("f", "vf")); 614 Compact("f", "g"); 615 ASSERT_EQ("va", Get("a")); 616 ASSERT_EQ("vf", Get("f")); 617 ASSERT_EQ("vx", Get("x")); 618 } while (ChangeOptions()); 619 } 620 621 TEST(DBTest, GetEncountersEmptyLevel) { 622 do { 623 // Arrange for the following to happen: 624 // * sstable A in level 0 625 // * nothing in level 1 626 // * sstable B in level 2 627 // Then do enough Get() calls to arrange for an automatic compaction 628 // of sstable A. A bug would cause the compaction to be marked as 629 // occuring at level 1 (instead of the correct level 0). 630 631 // Step 1: First place sstables in levels 0 and 2 632 int compaction_count = 0; 633 while (NumTableFilesAtLevel(0) == 0 || 634 NumTableFilesAtLevel(2) == 0) { 635 ASSERT_LE(compaction_count, 100) << "could not fill levels 0 and 2"; 636 compaction_count++; 637 Put("a", "begin"); 638 Put("z", "end"); 639 dbfull()->TEST_CompactMemTable(); 640 } 641 642 // Step 2: clear level 1 if necessary. 643 dbfull()->TEST_CompactRange(1, NULL, NULL); 644 ASSERT_EQ(NumTableFilesAtLevel(0), 1); 645 ASSERT_EQ(NumTableFilesAtLevel(1), 0); 646 ASSERT_EQ(NumTableFilesAtLevel(2), 1); 647 648 // Step 3: read a bunch of times 649 for (int i = 0; i < 1000; i++) { 650 ASSERT_EQ("NOT_FOUND", Get("missing")); 651 } 652 653 // Step 4: Wait for compaction to finish 654 DelayMilliseconds(1000); 655 656 ASSERT_EQ(NumTableFilesAtLevel(0), 0); 657 } while (ChangeOptions()); 658 } 659 660 TEST(DBTest, IterEmpty) { 661 Iterator* iter = db_->NewIterator(ReadOptions()); 662 663 iter->SeekToFirst(); 664 ASSERT_EQ(IterStatus(iter), "(invalid)"); 665 666 iter->SeekToLast(); 667 ASSERT_EQ(IterStatus(iter), "(invalid)"); 668 669 iter->Seek("foo"); 670 ASSERT_EQ(IterStatus(iter), "(invalid)"); 671 672 delete iter; 673 } 674 675 TEST(DBTest, IterSingle) { 676 ASSERT_OK(Put("a", "va")); 677 Iterator* iter = db_->NewIterator(ReadOptions()); 678 679 iter->SeekToFirst(); 680 ASSERT_EQ(IterStatus(iter), "a->va"); 681 iter->Next(); 682 ASSERT_EQ(IterStatus(iter), "(invalid)"); 683 iter->SeekToFirst(); 684 ASSERT_EQ(IterStatus(iter), "a->va"); 685 iter->Prev(); 686 ASSERT_EQ(IterStatus(iter), "(invalid)"); 687 688 iter->SeekToLast(); 689 ASSERT_EQ(IterStatus(iter), "a->va"); 690 iter->Next(); 691 ASSERT_EQ(IterStatus(iter), "(invalid)"); 692 iter->SeekToLast(); 693 ASSERT_EQ(IterStatus(iter), "a->va"); 694 iter->Prev(); 695 ASSERT_EQ(IterStatus(iter), "(invalid)"); 696 697 iter->Seek(""); 698 ASSERT_EQ(IterStatus(iter), "a->va"); 699 iter->Next(); 700 ASSERT_EQ(IterStatus(iter), "(invalid)"); 701 702 iter->Seek("a"); 703 ASSERT_EQ(IterStatus(iter), "a->va"); 704 iter->Next(); 705 ASSERT_EQ(IterStatus(iter), "(invalid)"); 706 707 iter->Seek("b"); 708 ASSERT_EQ(IterStatus(iter), "(invalid)"); 709 710 delete iter; 711 } 712 713 TEST(DBTest, IterMulti) { 714 ASSERT_OK(Put("a", "va")); 715 ASSERT_OK(Put("b", "vb")); 716 ASSERT_OK(Put("c", "vc")); 717 Iterator* iter = db_->NewIterator(ReadOptions()); 718 719 iter->SeekToFirst(); 720 ASSERT_EQ(IterStatus(iter), "a->va"); 721 iter->Next(); 722 ASSERT_EQ(IterStatus(iter), "b->vb"); 723 iter->Next(); 724 ASSERT_EQ(IterStatus(iter), "c->vc"); 725 iter->Next(); 726 ASSERT_EQ(IterStatus(iter), "(invalid)"); 727 iter->SeekToFirst(); 728 ASSERT_EQ(IterStatus(iter), "a->va"); 729 iter->Prev(); 730 ASSERT_EQ(IterStatus(iter), "(invalid)"); 731 732 iter->SeekToLast(); 733 ASSERT_EQ(IterStatus(iter), "c->vc"); 734 iter->Prev(); 735 ASSERT_EQ(IterStatus(iter), "b->vb"); 736 iter->Prev(); 737 ASSERT_EQ(IterStatus(iter), "a->va"); 738 iter->Prev(); 739 ASSERT_EQ(IterStatus(iter), "(invalid)"); 740 iter->SeekToLast(); 741 ASSERT_EQ(IterStatus(iter), "c->vc"); 742 iter->Next(); 743 ASSERT_EQ(IterStatus(iter), "(invalid)"); 744 745 iter->Seek(""); 746 ASSERT_EQ(IterStatus(iter), "a->va"); 747 iter->Seek("a"); 748 ASSERT_EQ(IterStatus(iter), "a->va"); 749 iter->Seek("ax"); 750 ASSERT_EQ(IterStatus(iter), "b->vb"); 751 iter->Seek("b"); 752 ASSERT_EQ(IterStatus(iter), "b->vb"); 753 iter->Seek("z"); 754 ASSERT_EQ(IterStatus(iter), "(invalid)"); 755 756 // Switch from reverse to forward 757 iter->SeekToLast(); 758 iter->Prev(); 759 iter->Prev(); 760 iter->Next(); 761 ASSERT_EQ(IterStatus(iter), "b->vb"); 762 763 // Switch from forward to reverse 764 iter->SeekToFirst(); 765 iter->Next(); 766 iter->Next(); 767 iter->Prev(); 768 ASSERT_EQ(IterStatus(iter), "b->vb"); 769 770 // Make sure iter stays at snapshot 771 ASSERT_OK(Put("a", "va2")); 772 ASSERT_OK(Put("a2", "va3")); 773 ASSERT_OK(Put("b", "vb2")); 774 ASSERT_OK(Put("c", "vc2")); 775 ASSERT_OK(Delete("b")); 776 iter->SeekToFirst(); 777 ASSERT_EQ(IterStatus(iter), "a->va"); 778 iter->Next(); 779 ASSERT_EQ(IterStatus(iter), "b->vb"); 780 iter->Next(); 781 ASSERT_EQ(IterStatus(iter), "c->vc"); 782 iter->Next(); 783 ASSERT_EQ(IterStatus(iter), "(invalid)"); 784 iter->SeekToLast(); 785 ASSERT_EQ(IterStatus(iter), "c->vc"); 786 iter->Prev(); 787 ASSERT_EQ(IterStatus(iter), "b->vb"); 788 iter->Prev(); 789 ASSERT_EQ(IterStatus(iter), "a->va"); 790 iter->Prev(); 791 ASSERT_EQ(IterStatus(iter), "(invalid)"); 792 793 delete iter; 794 } 795 796 TEST(DBTest, IterSmallAndLargeMix) { 797 ASSERT_OK(Put("a", "va")); 798 ASSERT_OK(Put("b", std::string(100000, 'b'))); 799 ASSERT_OK(Put("c", "vc")); 800 ASSERT_OK(Put("d", std::string(100000, 'd'))); 801 ASSERT_OK(Put("e", std::string(100000, 'e'))); 802 803 Iterator* iter = db_->NewIterator(ReadOptions()); 804 805 iter->SeekToFirst(); 806 ASSERT_EQ(IterStatus(iter), "a->va"); 807 iter->Next(); 808 ASSERT_EQ(IterStatus(iter), "b->" + std::string(100000, 'b')); 809 iter->Next(); 810 ASSERT_EQ(IterStatus(iter), "c->vc"); 811 iter->Next(); 812 ASSERT_EQ(IterStatus(iter), "d->" + std::string(100000, 'd')); 813 iter->Next(); 814 ASSERT_EQ(IterStatus(iter), "e->" + std::string(100000, 'e')); 815 iter->Next(); 816 ASSERT_EQ(IterStatus(iter), "(invalid)"); 817 818 iter->SeekToLast(); 819 ASSERT_EQ(IterStatus(iter), "e->" + std::string(100000, 'e')); 820 iter->Prev(); 821 ASSERT_EQ(IterStatus(iter), "d->" + std::string(100000, 'd')); 822 iter->Prev(); 823 ASSERT_EQ(IterStatus(iter), "c->vc"); 824 iter->Prev(); 825 ASSERT_EQ(IterStatus(iter), "b->" + std::string(100000, 'b')); 826 iter->Prev(); 827 ASSERT_EQ(IterStatus(iter), "a->va"); 828 iter->Prev(); 829 ASSERT_EQ(IterStatus(iter), "(invalid)"); 830 831 delete iter; 832 } 833 834 TEST(DBTest, IterMultiWithDelete) { 835 do { 836 ASSERT_OK(Put("a", "va")); 837 ASSERT_OK(Put("b", "vb")); 838 ASSERT_OK(Put("c", "vc")); 839 ASSERT_OK(Delete("b")); 840 ASSERT_EQ("NOT_FOUND", Get("b")); 841 842 Iterator* iter = db_->NewIterator(ReadOptions()); 843 iter->Seek("c"); 844 ASSERT_EQ(IterStatus(iter), "c->vc"); 845 iter->Prev(); 846 ASSERT_EQ(IterStatus(iter), "a->va"); 847 delete iter; 848 } while (ChangeOptions()); 849 } 850 851 TEST(DBTest, Recover) { 852 do { 853 ASSERT_OK(Put("foo", "v1")); 854 ASSERT_OK(Put("baz", "v5")); 855 856 Reopen(); 857 ASSERT_EQ("v1", Get("foo")); 858 859 ASSERT_EQ("v1", Get("foo")); 860 ASSERT_EQ("v5", Get("baz")); 861 ASSERT_OK(Put("bar", "v2")); 862 ASSERT_OK(Put("foo", "v3")); 863 864 Reopen(); 865 ASSERT_EQ("v3", Get("foo")); 866 ASSERT_OK(Put("foo", "v4")); 867 ASSERT_EQ("v4", Get("foo")); 868 ASSERT_EQ("v2", Get("bar")); 869 ASSERT_EQ("v5", Get("baz")); 870 } while (ChangeOptions()); 871 } 872 873 TEST(DBTest, RecoveryWithEmptyLog) { 874 do { 875 ASSERT_OK(Put("foo", "v1")); 876 ASSERT_OK(Put("foo", "v2")); 877 Reopen(); 878 Reopen(); 879 ASSERT_OK(Put("foo", "v3")); 880 Reopen(); 881 ASSERT_EQ("v3", Get("foo")); 882 } while (ChangeOptions()); 883 } 884 885 // Check that writes done during a memtable compaction are recovered 886 // if the database is shutdown during the memtable compaction. 887 TEST(DBTest, RecoverDuringMemtableCompaction) { 888 do { 889 Options options = CurrentOptions(); 890 options.env = env_; 891 options.write_buffer_size = 1000000; 892 Reopen(&options); 893 894 // Trigger a long memtable compaction and reopen the database during it 895 ASSERT_OK(Put("foo", "v1")); // Goes to 1st log file 896 ASSERT_OK(Put("big1", std::string(10000000, 'x'))); // Fills memtable 897 ASSERT_OK(Put("big2", std::string(1000, 'y'))); // Triggers compaction 898 ASSERT_OK(Put("bar", "v2")); // Goes to new log file 899 900 Reopen(&options); 901 ASSERT_EQ("v1", Get("foo")); 902 ASSERT_EQ("v2", Get("bar")); 903 ASSERT_EQ(std::string(10000000, 'x'), Get("big1")); 904 ASSERT_EQ(std::string(1000, 'y'), Get("big2")); 905 } while (ChangeOptions()); 906 } 907 908 static std::string Key(int i) { 909 char buf[100]; 910 snprintf(buf, sizeof(buf), "key%06d", i); 911 return std::string(buf); 912 } 913 914 TEST(DBTest, MinorCompactionsHappen) { 915 Options options = CurrentOptions(); 916 options.write_buffer_size = 10000; 917 Reopen(&options); 918 919 const int N = 500; 920 921 int starting_num_tables = TotalTableFiles(); 922 for (int i = 0; i < N; i++) { 923 ASSERT_OK(Put(Key(i), Key(i) + std::string(1000, 'v'))); 924 } 925 int ending_num_tables = TotalTableFiles(); 926 ASSERT_GT(ending_num_tables, starting_num_tables); 927 928 for (int i = 0; i < N; i++) { 929 ASSERT_EQ(Key(i) + std::string(1000, 'v'), Get(Key(i))); 930 } 931 932 Reopen(); 933 934 for (int i = 0; i < N; i++) { 935 ASSERT_EQ(Key(i) + std::string(1000, 'v'), Get(Key(i))); 936 } 937 } 938 939 TEST(DBTest, RecoverWithLargeLog) { 940 { 941 Options options = CurrentOptions(); 942 Reopen(&options); 943 ASSERT_OK(Put("big1", std::string(200000, '1'))); 944 ASSERT_OK(Put("big2", std::string(200000, '2'))); 945 ASSERT_OK(Put("small3", std::string(10, '3'))); 946 ASSERT_OK(Put("small4", std::string(10, '4'))); 947 ASSERT_EQ(NumTableFilesAtLevel(0), 0); 948 } 949 950 // Make sure that if we re-open with a small write buffer size that 951 // we flush table files in the middle of a large log file. 952 Options options = CurrentOptions(); 953 options.write_buffer_size = 100000; 954 Reopen(&options); 955 ASSERT_EQ(NumTableFilesAtLevel(0), 3); 956 ASSERT_EQ(std::string(200000, '1'), Get("big1")); 957 ASSERT_EQ(std::string(200000, '2'), Get("big2")); 958 ASSERT_EQ(std::string(10, '3'), Get("small3")); 959 ASSERT_EQ(std::string(10, '4'), Get("small4")); 960 ASSERT_GT(NumTableFilesAtLevel(0), 1); 961 } 962 963 TEST(DBTest, CompactionsGenerateMultipleFiles) { 964 Options options = CurrentOptions(); 965 options.write_buffer_size = 100000000; // Large write buffer 966 Reopen(&options); 967 968 Random rnd(301); 969 970 // Write 8MB (80 values, each 100K) 971 ASSERT_EQ(NumTableFilesAtLevel(0), 0); 972 std::vector<std::string> values; 973 for (int i = 0; i < 80; i++) { 974 values.push_back(RandomString(&rnd, 100000)); 975 ASSERT_OK(Put(Key(i), values[i])); 976 } 977 978 // Reopening moves updates to level-0 979 Reopen(&options); 980 dbfull()->TEST_CompactRange(0, NULL, NULL); 981 982 ASSERT_EQ(NumTableFilesAtLevel(0), 0); 983 ASSERT_GT(NumTableFilesAtLevel(1), 1); 984 for (int i = 0; i < 80; i++) { 985 ASSERT_EQ(Get(Key(i)), values[i]); 986 } 987 } 988 989 TEST(DBTest, RepeatedWritesToSameKey) { 990 Options options = CurrentOptions(); 991 options.env = env_; 992 options.write_buffer_size = 100000; // Small write buffer 993 Reopen(&options); 994 995 // We must have at most one file per level except for level-0, 996 // which may have up to kL0_StopWritesTrigger files. 997 const int kMaxFiles = config::kNumLevels + config::kL0_StopWritesTrigger; 998 999 Random rnd(301); 1000 std::string value = RandomString(&rnd, 2 * options.write_buffer_size); 1001 for (int i = 0; i < 5 * kMaxFiles; i++) { 1002 Put("key", value); 1003 ASSERT_LE(TotalTableFiles(), kMaxFiles); 1004 fprintf(stderr, "after %d: %d files\n", int(i+1), TotalTableFiles()); 1005 } 1006 } 1007 1008 TEST(DBTest, SparseMerge) { 1009 Options options = CurrentOptions(); 1010 options.compression = kNoCompression; 1011 Reopen(&options); 1012 1013 FillLevels("A", "Z"); 1014 1015 // Suppose there is: 1016 // small amount of data with prefix A 1017 // large amount of data with prefix B 1018 // small amount of data with prefix C 1019 // and that recent updates have made small changes to all three prefixes. 1020 // Check that we do not do a compaction that merges all of B in one shot. 1021 const std::string value(1000, 'x'); 1022 Put("A", "va"); 1023 // Write approximately 100MB of "B" values 1024 for (int i = 0; i < 100000; i++) { 1025 char key[100]; 1026 snprintf(key, sizeof(key), "B%010d", i); 1027 Put(key, value); 1028 } 1029 Put("C", "vc"); 1030 dbfull()->TEST_CompactMemTable(); 1031 dbfull()->TEST_CompactRange(0, NULL, NULL); 1032 1033 // Make sparse update 1034 Put("A", "va2"); 1035 Put("B100", "bvalue2"); 1036 Put("C", "vc2"); 1037 dbfull()->TEST_CompactMemTable(); 1038 1039 // Compactions should not cause us to create a situation where 1040 // a file overlaps too much data at the next level. 1041 ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(), 20*1048576); 1042 dbfull()->TEST_CompactRange(0, NULL, NULL); 1043 ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(), 20*1048576); 1044 dbfull()->TEST_CompactRange(1, NULL, NULL); 1045 ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(), 20*1048576); 1046 } 1047 1048 static bool Between(uint64_t val, uint64_t low, uint64_t high) { 1049 bool result = (val >= low) && (val <= high); 1050 if (!result) { 1051 fprintf(stderr, "Value %llu is not in range [%llu, %llu]\n", 1052 (unsigned long long)(val), 1053 (unsigned long long)(low), 1054 (unsigned long long)(high)); 1055 } 1056 return result; 1057 } 1058 1059 TEST(DBTest, ApproximateSizes) { 1060 do { 1061 Options options = CurrentOptions(); 1062 options.write_buffer_size = 100000000; // Large write buffer 1063 options.compression = kNoCompression; 1064 DestroyAndReopen(); 1065 1066 ASSERT_TRUE(Between(Size("", "xyz"), 0, 0)); 1067 Reopen(&options); 1068 ASSERT_TRUE(Between(Size("", "xyz"), 0, 0)); 1069 1070 // Write 8MB (80 values, each 100K) 1071 ASSERT_EQ(NumTableFilesAtLevel(0), 0); 1072 const int N = 80; 1073 static const int S1 = 100000; 1074 static const int S2 = 105000; // Allow some expansion from metadata 1075 Random rnd(301); 1076 for (int i = 0; i < N; i++) { 1077 ASSERT_OK(Put(Key(i), RandomString(&rnd, S1))); 1078 } 1079 1080 // 0 because GetApproximateSizes() does not account for memtable space 1081 ASSERT_TRUE(Between(Size("", Key(50)), 0, 0)); 1082 1083 // Check sizes across recovery by reopening a few times 1084 for (int run = 0; run < 3; run++) { 1085 Reopen(&options); 1086 1087 for (int compact_start = 0; compact_start < N; compact_start += 10) { 1088 for (int i = 0; i < N; i += 10) { 1089 ASSERT_TRUE(Between(Size("", Key(i)), S1*i, S2*i)); 1090 ASSERT_TRUE(Between(Size("", Key(i)+".suffix"), S1*(i+1), S2*(i+1))); 1091 ASSERT_TRUE(Between(Size(Key(i), Key(i+10)), S1*10, S2*10)); 1092 } 1093 ASSERT_TRUE(Between(Size("", Key(50)), S1*50, S2*50)); 1094 ASSERT_TRUE(Between(Size("", Key(50)+".suffix"), S1*50, S2*50)); 1095 1096 std::string cstart_str = Key(compact_start); 1097 std::string cend_str = Key(compact_start + 9); 1098 Slice cstart = cstart_str; 1099 Slice cend = cend_str; 1100 dbfull()->TEST_CompactRange(0, &cstart, &cend); 1101 } 1102 1103 ASSERT_EQ(NumTableFilesAtLevel(0), 0); 1104 ASSERT_GT(NumTableFilesAtLevel(1), 0); 1105 } 1106 } while (ChangeOptions()); 1107 } 1108 1109 TEST(DBTest, ApproximateSizes_MixOfSmallAndLarge) { 1110 do { 1111 Options options = CurrentOptions(); 1112 options.compression = kNoCompression; 1113 Reopen(); 1114 1115 Random rnd(301); 1116 std::string big1 = RandomString(&rnd, 100000); 1117 ASSERT_OK(Put(Key(0), RandomString(&rnd, 10000))); 1118 ASSERT_OK(Put(Key(1), RandomString(&rnd, 10000))); 1119 ASSERT_OK(Put(Key(2), big1)); 1120 ASSERT_OK(Put(Key(3), RandomString(&rnd, 10000))); 1121 ASSERT_OK(Put(Key(4), big1)); 1122 ASSERT_OK(Put(Key(5), RandomString(&rnd, 10000))); 1123 ASSERT_OK(Put(Key(6), RandomString(&rnd, 300000))); 1124 ASSERT_OK(Put(Key(7), RandomString(&rnd, 10000))); 1125 1126 // Check sizes across recovery by reopening a few times 1127 for (int run = 0; run < 3; run++) { 1128 Reopen(&options); 1129 1130 ASSERT_TRUE(Between(Size("", Key(0)), 0, 0)); 1131 ASSERT_TRUE(Between(Size("", Key(1)), 10000, 11000)); 1132 ASSERT_TRUE(Between(Size("", Key(2)), 20000, 21000)); 1133 ASSERT_TRUE(Between(Size("", Key(3)), 120000, 121000)); 1134 ASSERT_TRUE(Between(Size("", Key(4)), 130000, 131000)); 1135 ASSERT_TRUE(Between(Size("", Key(5)), 230000, 231000)); 1136 ASSERT_TRUE(Between(Size("", Key(6)), 240000, 241000)); 1137 ASSERT_TRUE(Between(Size("", Key(7)), 540000, 541000)); 1138 ASSERT_TRUE(Between(Size("", Key(8)), 550000, 560000)); 1139 1140 ASSERT_TRUE(Between(Size(Key(3), Key(5)), 110000, 111000)); 1141 1142 dbfull()->TEST_CompactRange(0, NULL, NULL); 1143 } 1144 } while (ChangeOptions()); 1145 } 1146 1147 TEST(DBTest, IteratorPinsRef) { 1148 Put("foo", "hello"); 1149 1150 // Get iterator that will yield the current contents of the DB. 1151 Iterator* iter = db_->NewIterator(ReadOptions()); 1152 1153 // Write to force compactions 1154 Put("foo", "newvalue1"); 1155 for (int i = 0; i < 100; i++) { 1156 ASSERT_OK(Put(Key(i), Key(i) + std::string(100000, 'v'))); // 100K values 1157 } 1158 Put("foo", "newvalue2"); 1159 1160 iter->SeekToFirst(); 1161 ASSERT_TRUE(iter->Valid()); 1162 ASSERT_EQ("foo", iter->key().ToString()); 1163 ASSERT_EQ("hello", iter->value().ToString()); 1164 iter->Next(); 1165 ASSERT_TRUE(!iter->Valid()); 1166 delete iter; 1167 } 1168 1169 TEST(DBTest, Snapshot) { 1170 do { 1171 Put("foo", "v1"); 1172 const Snapshot* s1 = db_->GetSnapshot(); 1173 Put("foo", "v2"); 1174 const Snapshot* s2 = db_->GetSnapshot(); 1175 Put("foo", "v3"); 1176 const Snapshot* s3 = db_->GetSnapshot(); 1177 1178 Put("foo", "v4"); 1179 ASSERT_EQ("v1", Get("foo", s1)); 1180 ASSERT_EQ("v2", Get("foo", s2)); 1181 ASSERT_EQ("v3", Get("foo", s3)); 1182 ASSERT_EQ("v4", Get("foo")); 1183 1184 db_->ReleaseSnapshot(s3); 1185 ASSERT_EQ("v1", Get("foo", s1)); 1186 ASSERT_EQ("v2", Get("foo", s2)); 1187 ASSERT_EQ("v4", Get("foo")); 1188 1189 db_->ReleaseSnapshot(s1); 1190 ASSERT_EQ("v2", Get("foo", s2)); 1191 ASSERT_EQ("v4", Get("foo")); 1192 1193 db_->ReleaseSnapshot(s2); 1194 ASSERT_EQ("v4", Get("foo")); 1195 } while (ChangeOptions()); 1196 } 1197 1198 TEST(DBTest, HiddenValuesAreRemoved) { 1199 do { 1200 Random rnd(301); 1201 FillLevels("a", "z"); 1202 1203 std::string big = RandomString(&rnd, 50000); 1204 Put("foo", big); 1205 Put("pastfoo", "v"); 1206 const Snapshot* snapshot = db_->GetSnapshot(); 1207 Put("foo", "tiny"); 1208 Put("pastfoo2", "v2"); // Advance sequence number one more 1209 1210 ASSERT_OK(dbfull()->TEST_CompactMemTable()); 1211 ASSERT_GT(NumTableFilesAtLevel(0), 0); 1212 1213 ASSERT_EQ(big, Get("foo", snapshot)); 1214 ASSERT_TRUE(Between(Size("", "pastfoo"), 50000, 60000)); 1215 db_->ReleaseSnapshot(snapshot); 1216 ASSERT_EQ(AllEntriesFor("foo"), "[ tiny, " + big + " ]"); 1217 Slice x("x"); 1218 dbfull()->TEST_CompactRange(0, NULL, &x); 1219 ASSERT_EQ(AllEntriesFor("foo"), "[ tiny ]"); 1220 ASSERT_EQ(NumTableFilesAtLevel(0), 0); 1221 ASSERT_GE(NumTableFilesAtLevel(1), 1); 1222 dbfull()->TEST_CompactRange(1, NULL, &x); 1223 ASSERT_EQ(AllEntriesFor("foo"), "[ tiny ]"); 1224 1225 ASSERT_TRUE(Between(Size("", "pastfoo"), 0, 1000)); 1226 } while (ChangeOptions()); 1227 } 1228 1229 TEST(DBTest, DeletionMarkers1) { 1230 Put("foo", "v1"); 1231 ASSERT_OK(dbfull()->TEST_CompactMemTable()); 1232 const int last = config::kMaxMemCompactLevel; 1233 ASSERT_EQ(NumTableFilesAtLevel(last), 1); // foo => v1 is now in last level 1234 1235 // Place a table at level last-1 to prevent merging with preceding mutation 1236 Put("a", "begin"); 1237 Put("z", "end"); 1238 dbfull()->TEST_CompactMemTable(); 1239 ASSERT_EQ(NumTableFilesAtLevel(last), 1); 1240 ASSERT_EQ(NumTableFilesAtLevel(last-1), 1); 1241 1242 Delete("foo"); 1243 Put("foo", "v2"); 1244 ASSERT_EQ(AllEntriesFor("foo"), "[ v2, DEL, v1 ]"); 1245 ASSERT_OK(dbfull()->TEST_CompactMemTable()); // Moves to level last-2 1246 ASSERT_EQ(AllEntriesFor("foo"), "[ v2, DEL, v1 ]"); 1247 Slice z("z"); 1248 dbfull()->TEST_CompactRange(last-2, NULL, &z); 1249 // DEL eliminated, but v1 remains because we aren't compacting that level 1250 // (DEL can be eliminated because v2 hides v1). 1251 ASSERT_EQ(AllEntriesFor("foo"), "[ v2, v1 ]"); 1252 dbfull()->TEST_CompactRange(last-1, NULL, NULL); 1253 // Merging last-1 w/ last, so we are the base level for "foo", so 1254 // DEL is removed. (as is v1). 1255 ASSERT_EQ(AllEntriesFor("foo"), "[ v2 ]"); 1256 } 1257 1258 TEST(DBTest, DeletionMarkers2) { 1259 Put("foo", "v1"); 1260 ASSERT_OK(dbfull()->TEST_CompactMemTable()); 1261 const int last = config::kMaxMemCompactLevel; 1262 ASSERT_EQ(NumTableFilesAtLevel(last), 1); // foo => v1 is now in last level 1263 1264 // Place a table at level last-1 to prevent merging with preceding mutation 1265 Put("a", "begin"); 1266 Put("z", "end"); 1267 dbfull()->TEST_CompactMemTable(); 1268 ASSERT_EQ(NumTableFilesAtLevel(last), 1); 1269 ASSERT_EQ(NumTableFilesAtLevel(last-1), 1); 1270 1271 Delete("foo"); 1272 ASSERT_EQ(AllEntriesFor("foo"), "[ DEL, v1 ]"); 1273 ASSERT_OK(dbfull()->TEST_CompactMemTable()); // Moves to level last-2 1274 ASSERT_EQ(AllEntriesFor("foo"), "[ DEL, v1 ]"); 1275 dbfull()->TEST_CompactRange(last-2, NULL, NULL); 1276 // DEL kept: "last" file overlaps 1277 ASSERT_EQ(AllEntriesFor("foo"), "[ DEL, v1 ]"); 1278 dbfull()->TEST_CompactRange(last-1, NULL, NULL); 1279 // Merging last-1 w/ last, so we are the base level for "foo", so 1280 // DEL is removed. (as is v1). 1281 ASSERT_EQ(AllEntriesFor("foo"), "[ ]"); 1282 } 1283 1284 TEST(DBTest, OverlapInLevel0) { 1285 do { 1286 ASSERT_EQ(config::kMaxMemCompactLevel, 2) << "Fix test to match config"; 1287 1288 // Fill levels 1 and 2 to disable the pushing of new memtables to levels > 0. 1289 ASSERT_OK(Put("100", "v100")); 1290 ASSERT_OK(Put("999", "v999")); 1291 dbfull()->TEST_CompactMemTable(); 1292 ASSERT_OK(Delete("100")); 1293 ASSERT_OK(Delete("999")); 1294 dbfull()->TEST_CompactMemTable(); 1295 ASSERT_EQ("0,1,1", FilesPerLevel()); 1296 1297 // Make files spanning the following ranges in level-0: 1298 // files[0] 200 .. 900 1299 // files[1] 300 .. 500 1300 // Note that files are sorted by smallest key. 1301 ASSERT_OK(Put("300", "v300")); 1302 ASSERT_OK(Put("500", "v500")); 1303 dbfull()->TEST_CompactMemTable(); 1304 ASSERT_OK(Put("200", "v200")); 1305 ASSERT_OK(Put("600", "v600")); 1306 ASSERT_OK(Put("900", "v900")); 1307 dbfull()->TEST_CompactMemTable(); 1308 ASSERT_EQ("2,1,1", FilesPerLevel()); 1309 1310 // Compact away the placeholder files we created initially 1311 dbfull()->TEST_CompactRange(1, NULL, NULL); 1312 dbfull()->TEST_CompactRange(2, NULL, NULL); 1313 ASSERT_EQ("2", FilesPerLevel()); 1314 1315 // Do a memtable compaction. Before bug-fix, the compaction would 1316 // not detect the overlap with level-0 files and would incorrectly place 1317 // the deletion in a deeper level. 1318 ASSERT_OK(Delete("600")); 1319 dbfull()->TEST_CompactMemTable(); 1320 ASSERT_EQ("3", FilesPerLevel()); 1321 ASSERT_EQ("NOT_FOUND", Get("600")); 1322 } while (ChangeOptions()); 1323 } 1324 1325 TEST(DBTest, L0_CompactionBug_Issue44_a) { 1326 Reopen(); 1327 ASSERT_OK(Put("b", "v")); 1328 Reopen(); 1329 ASSERT_OK(Delete("b")); 1330 ASSERT_OK(Delete("a")); 1331 Reopen(); 1332 ASSERT_OK(Delete("a")); 1333 Reopen(); 1334 ASSERT_OK(Put("a", "v")); 1335 Reopen(); 1336 Reopen(); 1337 ASSERT_EQ("(a->v)", Contents()); 1338 DelayMilliseconds(1000); // Wait for compaction to finish 1339 ASSERT_EQ("(a->v)", Contents()); 1340 } 1341 1342 TEST(DBTest, L0_CompactionBug_Issue44_b) { 1343 Reopen(); 1344 Put("",""); 1345 Reopen(); 1346 Delete("e"); 1347 Put("",""); 1348 Reopen(); 1349 Put("c", "cv"); 1350 Reopen(); 1351 Put("",""); 1352 Reopen(); 1353 Put("",""); 1354 DelayMilliseconds(1000); // Wait for compaction to finish 1355 Reopen(); 1356 Put("d","dv"); 1357 Reopen(); 1358 Put("",""); 1359 Reopen(); 1360 Delete("d"); 1361 Delete("b"); 1362 Reopen(); 1363 ASSERT_EQ("(->)(c->cv)", Contents()); 1364 DelayMilliseconds(1000); // Wait for compaction to finish 1365 ASSERT_EQ("(->)(c->cv)", Contents()); 1366 } 1367 1368 TEST(DBTest, ComparatorCheck) { 1369 class NewComparator : public Comparator { 1370 public: 1371 virtual const char* Name() const { return "leveldb.NewComparator"; } 1372 virtual int Compare(const Slice& a, const Slice& b) const { 1373 return BytewiseComparator()->Compare(a, b); 1374 } 1375 virtual void FindShortestSeparator(std::string* s, const Slice& l) const { 1376 BytewiseComparator()->FindShortestSeparator(s, l); 1377 } 1378 virtual void FindShortSuccessor(std::string* key) const { 1379 BytewiseComparator()->FindShortSuccessor(key); 1380 } 1381 }; 1382 NewComparator cmp; 1383 Options new_options = CurrentOptions(); 1384 new_options.comparator = &cmp; 1385 Status s = TryReopen(&new_options); 1386 ASSERT_TRUE(!s.ok()); 1387 ASSERT_TRUE(s.ToString().find("comparator") != std::string::npos) 1388 << s.ToString(); 1389 } 1390 1391 TEST(DBTest, CustomComparator) { 1392 class NumberComparator : public Comparator { 1393 public: 1394 virtual const char* Name() const { return "test.NumberComparator"; } 1395 virtual int Compare(const Slice& a, const Slice& b) const { 1396 return ToNumber(a) - ToNumber(b); 1397 } 1398 virtual void FindShortestSeparator(std::string* s, const Slice& l) const { 1399 ToNumber(*s); // Check format 1400 ToNumber(l); // Check format 1401 } 1402 virtual void FindShortSuccessor(std::string* key) const { 1403 ToNumber(*key); // Check format 1404 } 1405 private: 1406 static int ToNumber(const Slice& x) { 1407 // Check that there are no extra characters. 1408 ASSERT_TRUE(x.size() >= 2 && x[0] == '[' && x[x.size()-1] == ']') 1409 << EscapeString(x); 1410 int val; 1411 char ignored; 1412 ASSERT_TRUE(sscanf(x.ToString().c_str(), "[%i]%c", &val, &ignored) == 1) 1413 << EscapeString(x); 1414 return val; 1415 } 1416 }; 1417 NumberComparator cmp; 1418 Options new_options = CurrentOptions(); 1419 new_options.create_if_missing = true; 1420 new_options.comparator = &cmp; 1421 new_options.filter_policy = NULL; // Cannot use bloom filters 1422 new_options.write_buffer_size = 1000; // Compact more often 1423 DestroyAndReopen(&new_options); 1424 ASSERT_OK(Put("[10]", "ten")); 1425 ASSERT_OK(Put("[0x14]", "twenty")); 1426 for (int i = 0; i < 2; i++) { 1427 ASSERT_EQ("ten", Get("[10]")); 1428 ASSERT_EQ("ten", Get("[0xa]")); 1429 ASSERT_EQ("twenty", Get("[20]")); 1430 ASSERT_EQ("twenty", Get("[0x14]")); 1431 ASSERT_EQ("NOT_FOUND", Get("[15]")); 1432 ASSERT_EQ("NOT_FOUND", Get("[0xf]")); 1433 Compact("[0]", "[9999]"); 1434 } 1435 1436 for (int run = 0; run < 2; run++) { 1437 for (int i = 0; i < 1000; i++) { 1438 char buf[100]; 1439 snprintf(buf, sizeof(buf), "[%d]", i*10); 1440 ASSERT_OK(Put(buf, buf)); 1441 } 1442 Compact("[0]", "[1000000]"); 1443 } 1444 } 1445 1446 TEST(DBTest, ManualCompaction) { 1447 ASSERT_EQ(config::kMaxMemCompactLevel, 2) 1448 << "Need to update this test to match kMaxMemCompactLevel"; 1449 1450 MakeTables(3, "p", "q"); 1451 ASSERT_EQ("1,1,1", FilesPerLevel()); 1452 1453 // Compaction range falls before files 1454 Compact("", "c"); 1455 ASSERT_EQ("1,1,1", FilesPerLevel()); 1456 1457 // Compaction range falls after files 1458 Compact("r", "z"); 1459 ASSERT_EQ("1,1,1", FilesPerLevel()); 1460 1461 // Compaction range overlaps files 1462 Compact("p1", "p9"); 1463 ASSERT_EQ("0,0,1", FilesPerLevel()); 1464 1465 // Populate a different range 1466 MakeTables(3, "c", "e"); 1467 ASSERT_EQ("1,1,2", FilesPerLevel()); 1468 1469 // Compact just the new range 1470 Compact("b", "f"); 1471 ASSERT_EQ("0,0,2", FilesPerLevel()); 1472 1473 // Compact all 1474 MakeTables(1, "a", "z"); 1475 ASSERT_EQ("0,1,2", FilesPerLevel()); 1476 db_->CompactRange(NULL, NULL); 1477 ASSERT_EQ("0,0,1", FilesPerLevel()); 1478 } 1479 1480 TEST(DBTest, DBOpen_Options) { 1481 std::string dbname = test::TmpDir() + "/db_options_test"; 1482 DestroyDB(dbname, Options()); 1483 1484 // Does not exist, and create_if_missing == false: error 1485 DB* db = NULL; 1486 Options opts; 1487 opts.create_if_missing = false; 1488 Status s = DB::Open(opts, dbname, &db); 1489 ASSERT_TRUE(strstr(s.ToString().c_str(), "does not exist") != NULL); 1490 ASSERT_TRUE(db == NULL); 1491 1492 // Does not exist, and create_if_missing == true: OK 1493 opts.create_if_missing = true; 1494 s = DB::Open(opts, dbname, &db); 1495 ASSERT_OK(s); 1496 ASSERT_TRUE(db != NULL); 1497 1498 delete db; 1499 db = NULL; 1500 1501 // Does exist, and error_if_exists == true: error 1502 opts.create_if_missing = false; 1503 opts.error_if_exists = true; 1504 s = DB::Open(opts, dbname, &db); 1505 ASSERT_TRUE(strstr(s.ToString().c_str(), "exists") != NULL); 1506 ASSERT_TRUE(db == NULL); 1507 1508 // Does exist, and error_if_exists == false: OK 1509 opts.create_if_missing = true; 1510 opts.error_if_exists = false; 1511 s = DB::Open(opts, dbname, &db); 1512 ASSERT_OK(s); 1513 ASSERT_TRUE(db != NULL); 1514 1515 delete db; 1516 db = NULL; 1517 } 1518 1519 TEST(DBTest, Locking) { 1520 DB* db2 = NULL; 1521 Status s = DB::Open(CurrentOptions(), dbname_, &db2); 1522 ASSERT_TRUE(!s.ok()) << "Locking did not prevent re-opening db"; 1523 } 1524 1525 // Check that number of files does not grow when we are out of space 1526 TEST(DBTest, NoSpace) { 1527 Options options = CurrentOptions(); 1528 options.env = env_; 1529 Reopen(&options); 1530 1531 ASSERT_OK(Put("foo", "v1")); 1532 ASSERT_EQ("v1", Get("foo")); 1533 Compact("a", "z"); 1534 const int num_files = CountFiles(); 1535 env_->no_space_.Release_Store(env_); // Force out-of-space errors 1536 for (int i = 0; i < 10; i++) { 1537 for (int level = 0; level < config::kNumLevels-1; level++) { 1538 dbfull()->TEST_CompactRange(level, NULL, NULL); 1539 } 1540 } 1541 env_->no_space_.Release_Store(NULL); 1542 ASSERT_LT(CountFiles(), num_files + 3); 1543 } 1544 1545 TEST(DBTest, NonWritableFileSystem) { 1546 Options options = CurrentOptions(); 1547 options.write_buffer_size = 1000; 1548 options.env = env_; 1549 Reopen(&options); 1550 ASSERT_OK(Put("foo", "v1")); 1551 env_->non_writable_.Release_Store(env_); // Force errors for new files 1552 std::string big(100000, 'x'); 1553 int errors = 0; 1554 for (int i = 0; i < 20; i++) { 1555 fprintf(stderr, "iter %d; errors %d\n", i, errors); 1556 if (!Put("foo", big).ok()) { 1557 errors++; 1558 DelayMilliseconds(100); 1559 } 1560 } 1561 ASSERT_GT(errors, 0); 1562 env_->non_writable_.Release_Store(NULL); 1563 } 1564 1565 TEST(DBTest, WriteSyncError) { 1566 // Check that log sync errors cause the DB to disallow future writes. 1567 1568 // (a) Cause log sync calls to fail 1569 Options options = CurrentOptions(); 1570 options.env = env_; 1571 Reopen(&options); 1572 env_->data_sync_error_.Release_Store(env_); 1573 1574 // (b) Normal write should succeed 1575 WriteOptions w; 1576 ASSERT_OK(db_->Put(w, "k1", "v1")); 1577 ASSERT_EQ("v1", Get("k1")); 1578 1579 // (c) Do a sync write; should fail 1580 w.sync = true; 1581 ASSERT_TRUE(!db_->Put(w, "k2", "v2").ok()); 1582 ASSERT_EQ("v1", Get("k1")); 1583 ASSERT_EQ("NOT_FOUND", Get("k2")); 1584 1585 // (d) make sync behave normally 1586 env_->data_sync_error_.Release_Store(NULL); 1587 1588 // (e) Do a non-sync write; should fail 1589 w.sync = false; 1590 ASSERT_TRUE(!db_->Put(w, "k3", "v3").ok()); 1591 ASSERT_EQ("v1", Get("k1")); 1592 ASSERT_EQ("NOT_FOUND", Get("k2")); 1593 ASSERT_EQ("NOT_FOUND", Get("k3")); 1594 } 1595 1596 TEST(DBTest, ManifestWriteError) { 1597 // Test for the following problem: 1598 // (a) Compaction produces file F 1599 // (b) Log record containing F is written to MANIFEST file, but Sync() fails 1600 // (c) GC deletes F 1601 // (d) After reopening DB, reads fail since deleted F is named in log record 1602 1603 // We iterate twice. In the second iteration, everything is the 1604 // same except the log record never makes it to the MANIFEST file. 1605 for (int iter = 0; iter < 2; iter++) { 1606 port::AtomicPointer* error_type = (iter == 0) 1607 ? &env_->manifest_sync_error_ 1608 : &env_->manifest_write_error_; 1609 1610 // Insert foo=>bar mapping 1611 Options options = CurrentOptions(); 1612 options.env = env_; 1613 options.create_if_missing = true; 1614 options.error_if_exists = false; 1615 DestroyAndReopen(&options); 1616 ASSERT_OK(Put("foo", "bar")); 1617 ASSERT_EQ("bar", Get("foo")); 1618 1619 // Memtable compaction (will succeed) 1620 dbfull()->TEST_CompactMemTable(); 1621 ASSERT_EQ("bar", Get("foo")); 1622 const int last = config::kMaxMemCompactLevel; 1623 ASSERT_EQ(NumTableFilesAtLevel(last), 1); // foo=>bar is now in last level 1624 1625 // Merging compaction (will fail) 1626 error_type->Release_Store(env_); 1627 dbfull()->TEST_CompactRange(last, NULL, NULL); // Should fail 1628 ASSERT_EQ("bar", Get("foo")); 1629 1630 // Recovery: should not lose data 1631 error_type->Release_Store(NULL); 1632 Reopen(&options); 1633 ASSERT_EQ("bar", Get("foo")); 1634 } 1635 } 1636 1637 TEST(DBTest, MissingSSTFile) { 1638 ASSERT_OK(Put("foo", "bar")); 1639 ASSERT_EQ("bar", Get("foo")); 1640 1641 // Dump the memtable to disk. 1642 dbfull()->TEST_CompactMemTable(); 1643 ASSERT_EQ("bar", Get("foo")); 1644 1645 Close(); 1646 ASSERT_TRUE(DeleteAnSSTFile()); 1647 Options options = CurrentOptions(); 1648 options.paranoid_checks = true; 1649 Status s = TryReopen(&options); 1650 ASSERT_TRUE(!s.ok()); 1651 ASSERT_TRUE(s.ToString().find("issing") != std::string::npos) 1652 << s.ToString(); 1653 } 1654 1655 TEST(DBTest, StillReadSST) { 1656 ASSERT_OK(Put("foo", "bar")); 1657 ASSERT_EQ("bar", Get("foo")); 1658 1659 // Dump the memtable to disk. 1660 dbfull()->TEST_CompactMemTable(); 1661 ASSERT_EQ("bar", Get("foo")); 1662 Close(); 1663 ASSERT_GT(RenameLDBToSST(), 0); 1664 Options options = CurrentOptions(); 1665 options.paranoid_checks = true; 1666 Status s = TryReopen(&options); 1667 ASSERT_TRUE(s.ok()); 1668 ASSERT_EQ("bar", Get("foo")); 1669 } 1670 1671 TEST(DBTest, FilesDeletedAfterCompaction) { 1672 ASSERT_OK(Put("foo", "v2")); 1673 Compact("a", "z"); 1674 const int num_files = CountFiles(); 1675 for (int i = 0; i < 10; i++) { 1676 ASSERT_OK(Put("foo", "v2")); 1677 Compact("a", "z"); 1678 } 1679 ASSERT_EQ(CountFiles(), num_files); 1680 } 1681 1682 TEST(DBTest, BloomFilter) { 1683 env_->count_random_reads_ = true; 1684 Options options = CurrentOptions(); 1685 options.env = env_; 1686 options.block_cache = NewLRUCache(0); // Prevent cache hits 1687 options.filter_policy = NewBloomFilterPolicy(10); 1688 Reopen(&options); 1689 1690 // Populate multiple layers 1691 const int N = 10000; 1692 for (int i = 0; i < N; i++) { 1693 ASSERT_OK(Put(Key(i), Key(i))); 1694 } 1695 Compact("a", "z"); 1696 for (int i = 0; i < N; i += 100) { 1697 ASSERT_OK(Put(Key(i), Key(i))); 1698 } 1699 dbfull()->TEST_CompactMemTable(); 1700 1701 // Prevent auto compactions triggered by seeks 1702 env_->delay_data_sync_.Release_Store(env_); 1703 1704 // Lookup present keys. Should rarely read from small sstable. 1705 env_->random_read_counter_.Reset(); 1706 for (int i = 0; i < N; i++) { 1707 ASSERT_EQ(Key(i), Get(Key(i))); 1708 } 1709 int reads = env_->random_read_counter_.Read(); 1710 fprintf(stderr, "%d present => %d reads\n", N, reads); 1711 ASSERT_GE(reads, N); 1712 ASSERT_LE(reads, N + 2*N/100); 1713 1714 // Lookup present keys. Should rarely read from either sstable. 1715 env_->random_read_counter_.Reset(); 1716 for (int i = 0; i < N; i++) { 1717 ASSERT_EQ("NOT_FOUND", Get(Key(i) + ".missing")); 1718 } 1719 reads = env_->random_read_counter_.Read(); 1720 fprintf(stderr, "%d missing => %d reads\n", N, reads); 1721 ASSERT_LE(reads, 3*N/100); 1722 1723 env_->delay_data_sync_.Release_Store(NULL); 1724 Close(); 1725 delete options.block_cache; 1726 delete options.filter_policy; 1727 } 1728 1729 // Multi-threaded test: 1730 namespace { 1731 1732 static const int kNumThreads = 4; 1733 static const int kTestSeconds = 10; 1734 static const int kNumKeys = 1000; 1735 1736 struct MTState { 1737 DBTest* test; 1738 port::AtomicPointer stop; 1739 port::AtomicPointer counter[kNumThreads]; 1740 port::AtomicPointer thread_done[kNumThreads]; 1741 }; 1742 1743 struct MTThread { 1744 MTState* state; 1745 int id; 1746 }; 1747 1748 static void MTThreadBody(void* arg) { 1749 MTThread* t = reinterpret_cast<MTThread*>(arg); 1750 int id = t->id; 1751 DB* db = t->state->test->db_; 1752 uintptr_t counter = 0; 1753 fprintf(stderr, "... starting thread %d\n", id); 1754 Random rnd(1000 + id); 1755 std::string value; 1756 char valbuf[1500]; 1757 while (t->state->stop.Acquire_Load() == NULL) { 1758 t->state->counter[id].Release_Store(reinterpret_cast<void*>(counter)); 1759 1760 int key = rnd.Uniform(kNumKeys); 1761 char keybuf[20]; 1762 snprintf(keybuf, sizeof(keybuf), "%016d", key); 1763 1764 if (rnd.OneIn(2)) { 1765 // Write values of the form <key, my id, counter>. 1766 // We add some padding for force compactions. 1767 snprintf(valbuf, sizeof(valbuf), "%d.%d.%-1000d", 1768 key, id, static_cast<int>(counter)); 1769 ASSERT_OK(db->Put(WriteOptions(), Slice(keybuf), Slice(valbuf))); 1770 } else { 1771 // Read a value and verify that it matches the pattern written above. 1772 Status s = db->Get(ReadOptions(), Slice(keybuf), &value); 1773 if (s.IsNotFound()) { 1774 // Key has not yet been written 1775 } else { 1776 // Check that the writer thread counter is >= the counter in the value 1777 ASSERT_OK(s); 1778 int k, w, c; 1779 ASSERT_EQ(3, sscanf(value.c_str(), "%d.%d.%d", &k, &w, &c)) << value; 1780 ASSERT_EQ(k, key); 1781 ASSERT_GE(w, 0); 1782 ASSERT_LT(w, kNumThreads); 1783 ASSERT_LE(static_cast<uintptr_t>(c), reinterpret_cast<uintptr_t>( 1784 t->state->counter[w].Acquire_Load())); 1785 } 1786 } 1787 counter++; 1788 } 1789 t->state->thread_done[id].Release_Store(t); 1790 fprintf(stderr, "... stopping thread %d after %d ops\n", id, int(counter)); 1791 } 1792 1793 } // namespace 1794 1795 TEST(DBTest, MultiThreaded) { 1796 do { 1797 // Initialize state 1798 MTState mt; 1799 mt.test = this; 1800 mt.stop.Release_Store(0); 1801 for (int id = 0; id < kNumThreads; id++) { 1802 mt.counter[id].Release_Store(0); 1803 mt.thread_done[id].Release_Store(0); 1804 } 1805 1806 // Start threads 1807 MTThread thread[kNumThreads]; 1808 for (int id = 0; id < kNumThreads; id++) { 1809 thread[id].state = &mt; 1810 thread[id].id = id; 1811 env_->StartThread(MTThreadBody, &thread[id]); 1812 } 1813 1814 // Let them run for a while 1815 DelayMilliseconds(kTestSeconds * 1000); 1816 1817 // Stop the threads and wait for them to finish 1818 mt.stop.Release_Store(&mt); 1819 for (int id = 0; id < kNumThreads; id++) { 1820 while (mt.thread_done[id].Acquire_Load() == NULL) { 1821 DelayMilliseconds(100); 1822 } 1823 } 1824 } while (ChangeOptions()); 1825 } 1826 1827 namespace { 1828 typedef std::map<std::string, std::string> KVMap; 1829 } 1830 1831 class ModelDB: public DB { 1832 public: 1833 class ModelSnapshot : public Snapshot { 1834 public: 1835 KVMap map_; 1836 }; 1837 1838 explicit ModelDB(const Options& options): options_(options) { } 1839 ~ModelDB() { } 1840 virtual Status Put(const WriteOptions& o, const Slice& k, const Slice& v) { 1841 return DB::Put(o, k, v); 1842 } 1843 virtual Status Delete(const WriteOptions& o, const Slice& key) { 1844 return DB::Delete(o, key); 1845 } 1846 virtual Status Get(const ReadOptions& options, 1847 const Slice& key, std::string* value) { 1848 assert(false); // Not implemented 1849 return Status::NotFound(key); 1850 } 1851 virtual Iterator* NewIterator(const ReadOptions& options) { 1852 if (options.snapshot == NULL) { 1853 KVMap* saved = new KVMap; 1854 *saved = map_; 1855 return new ModelIter(saved, true); 1856 } else { 1857 const KVMap* snapshot_state = 1858 &(reinterpret_cast<const ModelSnapshot*>(options.snapshot)->map_); 1859 return new ModelIter(snapshot_state, false); 1860 } 1861 } 1862 virtual const Snapshot* GetSnapshot() { 1863 ModelSnapshot* snapshot = new ModelSnapshot; 1864 snapshot->map_ = map_; 1865 return snapshot; 1866 } 1867 1868 virtual void ReleaseSnapshot(const Snapshot* snapshot) { 1869 delete reinterpret_cast<const ModelSnapshot*>(snapshot); 1870 } 1871 virtual Status Write(const WriteOptions& options, WriteBatch* batch) { 1872 class Handler : public WriteBatch::Handler { 1873 public: 1874 KVMap* map_; 1875 virtual void Put(const Slice& key, const Slice& value) { 1876 (*map_)[key.ToString()] = value.ToString(); 1877 } 1878 virtual void Delete(const Slice& key) { 1879 map_->erase(key.ToString()); 1880 } 1881 }; 1882 Handler handler; 1883 handler.map_ = &map_; 1884 return batch->Iterate(&handler); 1885 } 1886 1887 virtual bool GetProperty(const Slice& property, std::string* value) { 1888 return false; 1889 } 1890 virtual void GetApproximateSizes(const Range* r, int n, uint64_t* sizes) { 1891 for (int i = 0; i < n; i++) { 1892 sizes[i] = 0; 1893 } 1894 } 1895 virtual void CompactRange(const Slice* start, const Slice* end) { 1896 } 1897 1898 private: 1899 class ModelIter: public Iterator { 1900 public: 1901 ModelIter(const KVMap* map, bool owned) 1902 : map_(map), owned_(owned), iter_(map_->end()) { 1903 } 1904 ~ModelIter() { 1905 if (owned_) delete map_; 1906 } 1907 virtual bool Valid() const { return iter_ != map_->end(); } 1908 virtual void SeekToFirst() { iter_ = map_->begin(); } 1909 virtual void SeekToLast() { 1910 if (map_->empty()) { 1911 iter_ = map_->end(); 1912 } else { 1913 iter_ = map_->find(map_->rbegin()->first); 1914 } 1915 } 1916 virtual void Seek(const Slice& k) { 1917 iter_ = map_->lower_bound(k.ToString()); 1918 } 1919 virtual void Next() { ++iter_; } 1920 virtual void Prev() { --iter_; } 1921 virtual Slice key() const { return iter_->first; } 1922 virtual Slice value() const { return iter_->second; } 1923 virtual Status status() const { return Status::OK(); } 1924 private: 1925 const KVMap* const map_; 1926 const bool owned_; // Do we own map_ 1927 KVMap::const_iterator iter_; 1928 }; 1929 const Options options_; 1930 KVMap map_; 1931 }; 1932 1933 static std::string RandomKey(Random* rnd) { 1934 int len = (rnd->OneIn(3) 1935 ? 1 // Short sometimes to encourage collisions 1936 : (rnd->OneIn(100) ? rnd->Skewed(10) : rnd->Uniform(10))); 1937 return test::RandomKey(rnd, len); 1938 } 1939 1940 static bool CompareIterators(int step, 1941 DB* model, 1942 DB* db, 1943 const Snapshot* model_snap, 1944 const Snapshot* db_snap) { 1945 ReadOptions options; 1946 options.snapshot = model_snap; 1947 Iterator* miter = model->NewIterator(options); 1948 options.snapshot = db_snap; 1949 Iterator* dbiter = db->NewIterator(options); 1950 bool ok = true; 1951 int count = 0; 1952 for (miter->SeekToFirst(), dbiter->SeekToFirst(); 1953 ok && miter->Valid() && dbiter->Valid(); 1954 miter->Next(), dbiter->Next()) { 1955 count++; 1956 if (miter->key().compare(dbiter->key()) != 0) { 1957 fprintf(stderr, "step %d: Key mismatch: '%s' vs. '%s'\n", 1958 step, 1959 EscapeString(miter->key()).c_str(), 1960 EscapeString(dbiter->key()).c_str()); 1961 ok = false; 1962 break; 1963 } 1964 1965 if (miter->value().compare(dbiter->value()) != 0) { 1966 fprintf(stderr, "step %d: Value mismatch for key '%s': '%s' vs. '%s'\n", 1967 step, 1968 EscapeString(miter->key()).c_str(), 1969 EscapeString(miter->value()).c_str(), 1970 EscapeString(miter->value()).c_str()); 1971 ok = false; 1972 } 1973 } 1974 1975 if (ok) { 1976 if (miter->Valid() != dbiter->Valid()) { 1977 fprintf(stderr, "step %d: Mismatch at end of iterators: %d vs. %d\n", 1978 step, miter->Valid(), dbiter->Valid()); 1979 ok = false; 1980 } 1981 } 1982 fprintf(stderr, "%d entries compared: ok=%d\n", count, ok); 1983 delete miter; 1984 delete dbiter; 1985 return ok; 1986 } 1987 1988 TEST(DBTest, Randomized) { 1989 Random rnd(test::RandomSeed()); 1990 do { 1991 ModelDB model(CurrentOptions()); 1992 const int N = 10000; 1993 const Snapshot* model_snap = NULL; 1994 const Snapshot* db_snap = NULL; 1995 std::string k, v; 1996 for (int step = 0; step < N; step++) { 1997 if (step % 100 == 0) { 1998 fprintf(stderr, "Step %d of %d\n", step, N); 1999 } 2000 // TODO(sanjay): Test Get() works 2001 int p = rnd.Uniform(100); 2002 if (p < 45) { // Put 2003 k = RandomKey(&rnd); 2004 v = RandomString(&rnd, 2005 rnd.OneIn(20) 2006 ? 100 + rnd.Uniform(100) 2007 : rnd.Uniform(8)); 2008 ASSERT_OK(model.Put(WriteOptions(), k, v)); 2009 ASSERT_OK(db_->Put(WriteOptions(), k, v)); 2010 2011 } else if (p < 90) { // Delete 2012 k = RandomKey(&rnd); 2013 ASSERT_OK(model.Delete(WriteOptions(), k)); 2014 ASSERT_OK(db_->Delete(WriteOptions(), k)); 2015 2016 2017 } else { // Multi-element batch 2018 WriteBatch b; 2019 const int num = rnd.Uniform(8); 2020 for (int i = 0; i < num; i++) { 2021 if (i == 0 || !rnd.OneIn(10)) { 2022 k = RandomKey(&rnd); 2023 } else { 2024 // Periodically re-use the same key from the previous iter, so 2025 // we have multiple entries in the write batch for the same key 2026 } 2027 if (rnd.OneIn(2)) { 2028 v = RandomString(&rnd, rnd.Uniform(10)); 2029 b.Put(k, v); 2030 } else { 2031 b.Delete(k); 2032 } 2033 } 2034 ASSERT_OK(model.Write(WriteOptions(), &b)); 2035 ASSERT_OK(db_->Write(WriteOptions(), &b)); 2036 } 2037 2038 if ((step % 100) == 0) { 2039 ASSERT_TRUE(CompareIterators(step, &model, db_, NULL, NULL)); 2040 ASSERT_TRUE(CompareIterators(step, &model, db_, model_snap, db_snap)); 2041 // Save a snapshot from each DB this time that we'll use next 2042 // time we compare things, to make sure the current state is 2043 // preserved with the snapshot 2044 if (model_snap != NULL) model.ReleaseSnapshot(model_snap); 2045 if (db_snap != NULL) db_->ReleaseSnapshot(db_snap); 2046 2047 Reopen(); 2048 ASSERT_TRUE(CompareIterators(step, &model, db_, NULL, NULL)); 2049 2050 model_snap = model.GetSnapshot(); 2051 db_snap = db_->GetSnapshot(); 2052 } 2053 } 2054 if (model_snap != NULL) model.ReleaseSnapshot(model_snap); 2055 if (db_snap != NULL) db_->ReleaseSnapshot(db_snap); 2056 } while (ChangeOptions()); 2057 } 2058 2059 std::string MakeKey(unsigned int num) { 2060 char buf[30]; 2061 snprintf(buf, sizeof(buf), "%016u", num); 2062 return std::string(buf); 2063 } 2064 2065 void BM_LogAndApply(int iters, int num_base_files) { 2066 std::string dbname = test::TmpDir() + "/leveldb_test_benchmark"; 2067 DestroyDB(dbname, Options()); 2068 2069 DB* db = NULL; 2070 Options opts; 2071 opts.create_if_missing = true; 2072 Status s = DB::Open(opts, dbname, &db); 2073 ASSERT_OK(s); 2074 ASSERT_TRUE(db != NULL); 2075 2076 delete db; 2077 db = NULL; 2078 2079 Env* env = Env::Default(); 2080 2081 port::Mutex mu; 2082 MutexLock l(&mu); 2083 2084 InternalKeyComparator cmp(BytewiseComparator()); 2085 Options options; 2086 VersionSet vset(dbname, &options, NULL, &cmp); 2087 ASSERT_OK(vset.Recover()); 2088 VersionEdit vbase; 2089 uint64_t fnum = 1; 2090 for (int i = 0; i < num_base_files; i++) { 2091 InternalKey start(MakeKey(2*fnum), 1, kTypeValue); 2092 InternalKey limit(MakeKey(2*fnum+1), 1, kTypeDeletion); 2093 vbase.AddFile(2, fnum++, 1 /* file size */, start, limit); 2094 } 2095 ASSERT_OK(vset.LogAndApply(&vbase, &mu)); 2096 2097 uint64_t start_micros = env->NowMicros(); 2098 2099 for (int i = 0; i < iters; i++) { 2100 VersionEdit vedit; 2101 vedit.DeleteFile(2, fnum); 2102 InternalKey start(MakeKey(2*fnum), 1, kTypeValue); 2103 InternalKey limit(MakeKey(2*fnum+1), 1, kTypeDeletion); 2104 vedit.AddFile(2, fnum++, 1 /* file size */, start, limit); 2105 vset.LogAndApply(&vedit, &mu); 2106 } 2107 uint64_t stop_micros = env->NowMicros(); 2108 unsigned int us = stop_micros - start_micros; 2109 char buf[16]; 2110 snprintf(buf, sizeof(buf), "%d", num_base_files); 2111 fprintf(stderr, 2112 "BM_LogAndApply/%-6s %8d iters : %9u us (%7.0f us / iter)\n", 2113 buf, iters, us, ((float)us) / iters); 2114 } 2115 2116 } // namespace leveldb 2117 2118 int main(int argc, char** argv) { 2119 if (argc > 1 && std::string(argv[1]) == "--benchmark") { 2120 leveldb::BM_LogAndApply(1000, 1); 2121 leveldb::BM_LogAndApply(1000, 100); 2122 leveldb::BM_LogAndApply(1000, 10000); 2123 leveldb::BM_LogAndApply(100, 100000); 2124 return 0; 2125 } 2126 2127 return leveldb::test::RunAllTests(); 2128 } 2129