1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "chrome/browser/visitedlink/visitedlink_master.h" 6 7 #if defined(OS_WIN) 8 #include <windows.h> 9 #include <io.h> 10 #include <shlobj.h> 11 #endif // defined(OS_WIN) 12 #include <stdio.h> 13 14 #include <algorithm> 15 16 #include "base/file_util.h" 17 #include "base/logging.h" 18 #include "base/message_loop.h" 19 #include "base/path_service.h" 20 #include "base/process_util.h" 21 #include "base/rand_util.h" 22 #include "base/stack_container.h" 23 #include "base/string_util.h" 24 #include "base/threading/thread_restrictions.h" 25 #include "content/browser/browser_thread.h" 26 #include "chrome/browser/history/history.h" 27 #include "chrome/browser/profiles/profile.h" 28 29 using file_util::ScopedFILE; 30 using file_util::OpenFile; 31 using file_util::TruncateFile; 32 33 const int32 VisitedLinkMaster::kFileHeaderSignatureOffset = 0; 34 const int32 VisitedLinkMaster::kFileHeaderVersionOffset = 4; 35 const int32 VisitedLinkMaster::kFileHeaderLengthOffset = 8; 36 const int32 VisitedLinkMaster::kFileHeaderUsedOffset = 12; 37 const int32 VisitedLinkMaster::kFileHeaderSaltOffset = 16; 38 39 const int32 VisitedLinkMaster::kFileCurrentVersion = 2; 40 41 // the signature at the beginning of the URL table = "VLnk" (visited links) 42 const int32 VisitedLinkMaster::kFileSignature = 0x6b6e4c56; 43 const size_t VisitedLinkMaster::kFileHeaderSize = 44 kFileHeaderSaltOffset + LINK_SALT_LENGTH; 45 46 // This value should also be the same as the smallest size in the lookup 47 // table in NewTableSizeForCount (prime number). 48 const unsigned VisitedLinkMaster::kDefaultTableSize = 16381; 49 50 const size_t VisitedLinkMaster::kBigDeleteThreshold = 64; 51 52 namespace { 53 54 // Fills the given salt structure with some quasi-random values 55 // It is not necessary to generate a cryptographically strong random string, 56 // only that it be reasonably different for different users. 57 void GenerateSalt(uint8 salt[LINK_SALT_LENGTH]) { 58 DCHECK_EQ(LINK_SALT_LENGTH, 8) << "This code assumes the length of the salt"; 59 uint64 randval = base::RandUint64(); 60 memcpy(salt, &randval, 8); 61 } 62 63 // AsyncWriter ---------------------------------------------------------------- 64 65 // This task executes on a background thread and executes a write. This 66 // prevents us from blocking the UI thread doing I/O. 67 class AsyncWriter : public Task { 68 public: 69 AsyncWriter(FILE* file, int32 offset, const void* data, size_t data_len) 70 : file_(file), 71 offset_(offset) { 72 data_->resize(data_len); 73 memcpy(&*data_->begin(), data, data_len); 74 } 75 76 virtual void Run() { 77 WriteToFile(file_, offset_, 78 &*data_->begin(), static_cast<int32>(data_->size())); 79 } 80 81 // Exposed as a static so it can be called directly from the Master to 82 // reduce the number of platform-specific I/O sites we have. Returns true if 83 // the write was complete. 84 static bool WriteToFile(FILE* file, 85 off_t offset, 86 const void* data, 87 size_t data_len) { 88 if (fseek(file, offset, SEEK_SET) != 0) 89 return false; // Don't write to an invalid part of the file. 90 91 size_t num_written = fwrite(data, 1, data_len, file); 92 93 // The write may not make it to the kernel (stdlib may buffer the write) 94 // until the next fseek/fclose call. If we crash, it's easy for our used 95 // item count to be out of sync with the number of hashes we write. 96 // Protect against this by calling fflush. 97 int ret = fflush(file); 98 DCHECK_EQ(0, ret); 99 return num_written == data_len; 100 } 101 102 private: 103 // The data to write and where to write it. 104 FILE* file_; 105 int32 offset_; // Offset from the beginning of the file. 106 107 // Most writes are just a single fingerprint, so we reserve that much in this 108 // object to avoid mallocs in that case. 109 StackVector<char, sizeof(VisitedLinkCommon::Fingerprint)> data_; 110 111 DISALLOW_COPY_AND_ASSIGN(AsyncWriter); 112 }; 113 114 // Used to asynchronously set the end of the file. This must be done on the 115 // same thread as the writing to keep things synchronized. 116 class AsyncSetEndOfFile : public Task { 117 public: 118 explicit AsyncSetEndOfFile(FILE* file) : file_(file) {} 119 120 virtual void Run() { 121 TruncateFile(file_); 122 } 123 124 private: 125 FILE* file_; 126 DISALLOW_COPY_AND_ASSIGN(AsyncSetEndOfFile); 127 }; 128 129 // Used to asynchronously close a file. This must be done on the same thread as 130 // the writing to keep things synchronized. 131 class AsyncCloseHandle : public Task { 132 public: 133 explicit AsyncCloseHandle(FILE* file) : file_(file) {} 134 135 virtual void Run() { 136 fclose(file_); 137 } 138 139 private: 140 FILE* file_; 141 DISALLOW_COPY_AND_ASSIGN(AsyncCloseHandle); 142 }; 143 144 } // namespace 145 146 // TableBuilder --------------------------------------------------------------- 147 148 // How rebuilding from history works 149 // --------------------------------- 150 // 151 // We mark that we're rebuilding from history by setting the table_builder_ 152 // member in VisitedLinkMaster to the TableBuilder we create. This builder 153 // will be called on the history thread by the history system for every URL 154 // in the database. 155 // 156 // The builder will store the fingerprints for those URLs, and then marshalls 157 // back to the main thread where the VisitedLinkMaster will be notified. The 158 // master then replaces its table with a new table containing the computed 159 // fingerprints. 160 // 161 // The builder must remain active while the history system is using it. 162 // Sometimes, the master will be deleted before the rebuild is complete, in 163 // which case it notifies the builder via DisownMaster(). The builder will 164 // delete itself once rebuilding is complete, and not execute any callback. 165 class VisitedLinkMaster::TableBuilder 166 : public HistoryService::URLEnumerator, 167 public base::RefCountedThreadSafe<TableBuilder> { 168 public: 169 TableBuilder(VisitedLinkMaster* master, 170 const uint8 salt[LINK_SALT_LENGTH]); 171 172 // Called on the main thread when the master is being destroyed. This will 173 // prevent a crash when the query completes and the master is no longer 174 // around. We can not actually do anything but mark this fact, since the 175 // table will be being rebuilt simultaneously on the other thread. 176 void DisownMaster(); 177 178 // HistoryService::URLEnumerator 179 virtual void OnURL(const GURL& url); 180 virtual void OnComplete(bool succeed); 181 182 private: 183 friend class base::RefCountedThreadSafe<TableBuilder>; 184 185 ~TableBuilder() {} 186 187 // OnComplete mashals to this function on the main thread to do the 188 // notification. 189 void OnCompleteMainThread(); 190 191 // Owner of this object. MAY ONLY BE ACCESSED ON THE MAIN THREAD! 192 VisitedLinkMaster* master_; 193 194 // Indicates whether the operation has failed or not. 195 bool success_; 196 197 // Salt for this new table. 198 uint8 salt_[LINK_SALT_LENGTH]; 199 200 // Stores the fingerprints we computed on the background thread. 201 VisitedLinkCommon::Fingerprints fingerprints_; 202 }; 203 204 // VisitedLinkMaster ---------------------------------------------------------- 205 206 VisitedLinkMaster::VisitedLinkMaster(Listener* listener, 207 Profile* profile) { 208 InitMembers(listener, profile); 209 } 210 211 VisitedLinkMaster::VisitedLinkMaster(Listener* listener, 212 HistoryService* history_service, 213 bool suppress_rebuild, 214 const FilePath& filename, 215 int32 default_table_size) { 216 InitMembers(listener, NULL); 217 218 database_name_override_ = filename; 219 table_size_override_ = default_table_size; 220 history_service_override_ = history_service; 221 suppress_rebuild_ = suppress_rebuild; 222 } 223 224 VisitedLinkMaster::~VisitedLinkMaster() { 225 if (table_builder_.get()) { 226 // Prevent the table builder from calling us back now that we're being 227 // destroyed. Note that we DON'T delete the object, since the history 228 // system is still writing into it. When that is complete, the table 229 // builder will destroy itself when it finds we are gone. 230 table_builder_->DisownMaster(); 231 } 232 FreeURLTable(); 233 } 234 235 void VisitedLinkMaster::InitMembers(Listener* listener, Profile* profile) { 236 DCHECK(listener); 237 238 listener_ = listener; 239 file_ = NULL; 240 shared_memory_ = NULL; 241 shared_memory_serial_ = 0; 242 used_items_ = 0; 243 table_size_override_ = 0; 244 history_service_override_ = NULL; 245 suppress_rebuild_ = false; 246 profile_ = profile; 247 248 #ifndef NDEBUG 249 posted_asynchronous_operation_ = false; 250 #endif 251 } 252 253 bool VisitedLinkMaster::Init() { 254 // We probably shouldn't be loading this from the UI thread, 255 // but it does need to happen early on in startup. 256 // http://code.google.com/p/chromium/issues/detail?id=24163 257 base::ThreadRestrictions::ScopedAllowIO allow_io; 258 if (!InitFromFile()) 259 return InitFromScratch(suppress_rebuild_); 260 return true; 261 } 262 263 VisitedLinkMaster::Hash VisitedLinkMaster::TryToAddURL(const GURL& url) { 264 // Extra check that we are not incognito. This should not happen. 265 if (profile_ && profile_->IsOffTheRecord()) { 266 NOTREACHED(); 267 return null_hash_; 268 } 269 270 if (!url.is_valid()) 271 return null_hash_; // Don't add invalid URLs. 272 273 Fingerprint fingerprint = ComputeURLFingerprint(url.spec().data(), 274 url.spec().size(), 275 salt_); 276 if (table_builder_) { 277 // If we have a pending delete for this fingerprint, cancel it. 278 std::set<Fingerprint>::iterator found = 279 deleted_since_rebuild_.find(fingerprint); 280 if (found != deleted_since_rebuild_.end()) 281 deleted_since_rebuild_.erase(found); 282 283 // A rebuild is in progress, save this addition in the temporary list so 284 // it can be added once rebuild is complete. 285 added_since_rebuild_.insert(fingerprint); 286 } 287 288 // If the table is "full", we don't add URLs and just drop them on the floor. 289 // This can happen if we get thousands of new URLs and something causes 290 // the table resizing to fail. This check prevents a hang in that case. Note 291 // that this is *not* the resize limit, this is just a sanity check. 292 if (used_items_ / 8 > table_length_ / 10) 293 return null_hash_; // Table is more than 80% full. 294 295 return AddFingerprint(fingerprint, true); 296 } 297 298 void VisitedLinkMaster::AddURL(const GURL& url) { 299 Hash index = TryToAddURL(url); 300 if (!table_builder_ && index != null_hash_) { 301 // Not rebuilding, so we want to keep the file on disk up-to-date. 302 WriteUsedItemCountToFile(); 303 WriteHashRangeToFile(index, index); 304 ResizeTableIfNecessary(); 305 } 306 } 307 308 void VisitedLinkMaster::AddURLs(const std::vector<GURL>& url) { 309 for (std::vector<GURL>::const_iterator i = url.begin(); 310 i != url.end(); ++i) { 311 Hash index = TryToAddURL(*i); 312 if (!table_builder_ && index != null_hash_) 313 ResizeTableIfNecessary(); 314 } 315 316 // Keeps the file on disk up-to-date. 317 if (!table_builder_) 318 WriteFullTable(); 319 } 320 321 void VisitedLinkMaster::DeleteAllURLs() { 322 // Any pending modifications are invalid. 323 added_since_rebuild_.clear(); 324 deleted_since_rebuild_.clear(); 325 326 // Clear the hash table. 327 used_items_ = 0; 328 memset(hash_table_, 0, this->table_length_ * sizeof(Fingerprint)); 329 330 // Resize it if it is now too empty. Resize may write the new table out for 331 // us, otherwise, schedule writing the new table to disk ourselves. 332 if (!ResizeTableIfNecessary()) 333 WriteFullTable(); 334 335 listener_->Reset(); 336 } 337 338 void VisitedLinkMaster::DeleteURLs(const std::set<GURL>& urls) { 339 typedef std::set<GURL>::const_iterator SetIterator; 340 341 if (urls.empty()) 342 return; 343 344 listener_->Reset(); 345 346 if (table_builder_) { 347 // A rebuild is in progress, save this deletion in the temporary list so 348 // it can be added once rebuild is complete. 349 for (SetIterator i = urls.begin(); i != urls.end(); ++i) { 350 if (!i->is_valid()) 351 continue; 352 353 Fingerprint fingerprint = 354 ComputeURLFingerprint(i->spec().data(), i->spec().size(), salt_); 355 deleted_since_rebuild_.insert(fingerprint); 356 357 // If the URL was just added and now we're deleting it, it may be in the 358 // list of things added since the last rebuild. Delete it from that list. 359 std::set<Fingerprint>::iterator found = 360 added_since_rebuild_.find(fingerprint); 361 if (found != added_since_rebuild_.end()) 362 added_since_rebuild_.erase(found); 363 364 // Delete the URLs from the in-memory table, but don't bother writing 365 // to disk since it will be replaced soon. 366 DeleteFingerprint(fingerprint, false); 367 } 368 return; 369 } 370 371 // Compute the deleted URLs' fingerprints and delete them 372 std::set<Fingerprint> deleted_fingerprints; 373 for (SetIterator i = urls.begin(); i != urls.end(); ++i) { 374 if (!i->is_valid()) 375 continue; 376 deleted_fingerprints.insert( 377 ComputeURLFingerprint(i->spec().data(), i->spec().size(), salt_)); 378 } 379 DeleteFingerprintsFromCurrentTable(deleted_fingerprints); 380 } 381 382 // See VisitedLinkCommon::IsVisited which should be in sync with this algorithm 383 VisitedLinkMaster::Hash VisitedLinkMaster::AddFingerprint( 384 Fingerprint fingerprint, 385 bool send_notifications) { 386 if (!hash_table_ || table_length_ == 0) { 387 NOTREACHED(); // Not initialized. 388 return null_hash_; 389 } 390 391 Hash cur_hash = HashFingerprint(fingerprint); 392 Hash first_hash = cur_hash; 393 while (true) { 394 Fingerprint cur_fingerprint = FingerprintAt(cur_hash); 395 if (cur_fingerprint == fingerprint) 396 return null_hash_; // This fingerprint is already in there, do nothing. 397 398 if (cur_fingerprint == null_fingerprint_) { 399 // End of probe sequence found, insert here. 400 hash_table_[cur_hash] = fingerprint; 401 used_items_++; 402 // If allowed, notify listener that a new visited link was added. 403 if (send_notifications) 404 listener_->Add(fingerprint); 405 return cur_hash; 406 } 407 408 // Advance in the probe sequence. 409 cur_hash = IncrementHash(cur_hash); 410 if (cur_hash == first_hash) { 411 // This means that we've wrapped around and are about to go into an 412 // infinite loop. Something was wrong with the hashtable resizing 413 // logic, so stop here. 414 NOTREACHED(); 415 return null_hash_; 416 } 417 } 418 } 419 420 void VisitedLinkMaster::DeleteFingerprintsFromCurrentTable( 421 const std::set<Fingerprint>& fingerprints) { 422 bool bulk_write = (fingerprints.size() > kBigDeleteThreshold); 423 424 // Delete the URLs from the table. 425 for (std::set<Fingerprint>::const_iterator i = fingerprints.begin(); 426 i != fingerprints.end(); ++i) 427 DeleteFingerprint(*i, !bulk_write); 428 429 // These deleted fingerprints may make us shrink the table. 430 if (ResizeTableIfNecessary()) 431 return; // The resize function wrote the new table to disk for us. 432 433 // Nobody wrote this out for us, write the full file to disk. 434 if (bulk_write) 435 WriteFullTable(); 436 } 437 438 bool VisitedLinkMaster::DeleteFingerprint(Fingerprint fingerprint, 439 bool update_file) { 440 if (!hash_table_ || table_length_ == 0) { 441 NOTREACHED(); // Not initialized. 442 return false; 443 } 444 if (!IsVisited(fingerprint)) 445 return false; // Not in the database to delete. 446 447 // First update the header used count. 448 used_items_--; 449 if (update_file) 450 WriteUsedItemCountToFile(); 451 452 Hash deleted_hash = HashFingerprint(fingerprint); 453 454 // Find the range of "stuff" in the hash table that is adjacent to this 455 // fingerprint. These are things that could be affected by the change in 456 // the hash table. Since we use linear probing, anything after the deleted 457 // item up until an empty item could be affected. 458 Hash end_range = deleted_hash; 459 while (true) { 460 Hash next_hash = IncrementHash(end_range); 461 if (next_hash == deleted_hash) 462 break; // We wrapped around and the whole table is full. 463 if (!hash_table_[next_hash]) 464 break; // Found the last spot. 465 end_range = next_hash; 466 } 467 468 // We could get all fancy and move the affected fingerprints around, but 469 // instead we just remove them all and re-add them (minus our deleted one). 470 // This will mean there's a small window of time where the affected links 471 // won't be marked visited. 472 StackVector<Fingerprint, 32> shuffled_fingerprints; 473 Hash stop_loop = IncrementHash(end_range); // The end range is inclusive. 474 for (Hash i = deleted_hash; i != stop_loop; i = IncrementHash(i)) { 475 if (hash_table_[i] != fingerprint) { 476 // Don't save the one we're deleting! 477 shuffled_fingerprints->push_back(hash_table_[i]); 478 479 // This will balance the increment of this value in AddFingerprint below 480 // so there is no net change. 481 used_items_--; 482 } 483 hash_table_[i] = null_fingerprint_; 484 } 485 486 if (!shuffled_fingerprints->empty()) { 487 // Need to add the new items back. 488 for (size_t i = 0; i < shuffled_fingerprints->size(); i++) 489 AddFingerprint(shuffled_fingerprints[i], false); 490 } 491 492 // Write the affected range to disk [deleted_hash, end_range]. 493 if (update_file) 494 WriteHashRangeToFile(deleted_hash, end_range); 495 496 return true; 497 } 498 499 bool VisitedLinkMaster::WriteFullTable() { 500 // This function can get called when the file is open, for example, when we 501 // resize the table. We must handle this case and not try to reopen the file, 502 // since there may be write operations pending on the file I/O thread. 503 // 504 // Note that once we start writing, we do not delete on error. This means 505 // there can be a partial file, but the short file will be detected next time 506 // we start, and will be replaced. 507 // 508 // This might possibly get corrupted if we crash in the middle of writing. 509 // We should pick up the most common types of these failures when we notice 510 // that the file size is different when we load it back in, and then we will 511 // regenerate the table. 512 if (!file_) { 513 FilePath filename; 514 GetDatabaseFileName(&filename); 515 file_ = OpenFile(filename, "wb+"); 516 if (!file_) { 517 DLOG(ERROR) << "Failed to open file " << filename.value(); 518 return false; 519 } 520 } 521 522 // Write the new header. 523 int32 header[4]; 524 header[0] = kFileSignature; 525 header[1] = kFileCurrentVersion; 526 header[2] = table_length_; 527 header[3] = used_items_; 528 WriteToFile(file_, 0, header, sizeof(header)); 529 WriteToFile(file_, sizeof(header), salt_, LINK_SALT_LENGTH); 530 531 // Write the hash data. 532 WriteToFile(file_, kFileHeaderSize, 533 hash_table_, table_length_ * sizeof(Fingerprint)); 534 535 // The hash table may have shrunk, so make sure this is the end. 536 BrowserThread::PostTask( 537 BrowserThread::FILE, FROM_HERE, new AsyncSetEndOfFile(file_)); 538 return true; 539 } 540 541 bool VisitedLinkMaster::InitFromFile() { 542 DCHECK(file_ == NULL); 543 544 FilePath filename; 545 GetDatabaseFileName(&filename); 546 ScopedFILE file_closer(OpenFile(filename, "rb+")); 547 if (!file_closer.get()) 548 return false; 549 550 int32 num_entries, used_count; 551 if (!ReadFileHeader(file_closer.get(), &num_entries, &used_count, salt_)) 552 return false; // Header isn't valid. 553 554 // Allocate and read the table. 555 if (!CreateURLTable(num_entries, false)) 556 return false; 557 if (!ReadFromFile(file_closer.get(), kFileHeaderSize, 558 hash_table_, num_entries * sizeof(Fingerprint))) { 559 FreeURLTable(); 560 return false; 561 } 562 used_items_ = used_count; 563 564 #ifndef NDEBUG 565 DebugValidate(); 566 #endif 567 568 file_ = file_closer.release(); 569 return true; 570 } 571 572 bool VisitedLinkMaster::InitFromScratch(bool suppress_rebuild) { 573 int32 table_size = kDefaultTableSize; 574 if (table_size_override_) 575 table_size = table_size_override_; 576 577 // The salt must be generated before the table so that it can be copied to 578 // the shared memory. 579 GenerateSalt(salt_); 580 if (!CreateURLTable(table_size, true)) 581 return false; 582 583 #ifndef NDEBUG 584 DebugValidate(); 585 #endif 586 587 if (suppress_rebuild) { 588 // When we disallow rebuilds (normally just unit tests), just use the 589 // current empty table. 590 return WriteFullTable(); 591 } 592 593 // This will build the table from history. On the first run, history will 594 // be empty, so this will be correct. This will also write the new table 595 // to disk. We don't want to save explicitly here, since the rebuild may 596 // not complete, leaving us with an empty but valid visited link database. 597 // In the future, we won't know we need to try rebuilding again. 598 return RebuildTableFromHistory(); 599 } 600 601 bool VisitedLinkMaster::ReadFileHeader(FILE* file, 602 int32* num_entries, 603 int32* used_count, 604 uint8 salt[LINK_SALT_LENGTH]) { 605 // Get file size. 606 // Note that there is no need to seek back to the original location in the 607 // file since ReadFromFile() [which is the next call accessing the file] 608 // seeks before reading. 609 if (fseek(file, 0, SEEK_END) == -1) 610 return false; 611 size_t file_size = ftell(file); 612 613 if (file_size <= kFileHeaderSize) 614 return false; 615 616 uint8 header[kFileHeaderSize]; 617 if (!ReadFromFile(file, 0, &header, kFileHeaderSize)) 618 return false; 619 620 // Verify the signature. 621 int32 signature; 622 memcpy(&signature, &header[kFileHeaderSignatureOffset], sizeof(signature)); 623 if (signature != kFileSignature) 624 return false; 625 626 // Verify the version is up-to-date. As with other read errors, a version 627 // mistmatch will trigger a rebuild of the database from history, which will 628 // have the effect of migrating the database. 629 int32 version; 630 memcpy(&version, &header[kFileHeaderVersionOffset], sizeof(version)); 631 if (version != kFileCurrentVersion) 632 return false; // Bad version. 633 634 // Read the table size and make sure it matches the file size. 635 memcpy(num_entries, &header[kFileHeaderLengthOffset], sizeof(*num_entries)); 636 if (*num_entries * sizeof(Fingerprint) + kFileHeaderSize != file_size) 637 return false; // Bad size. 638 639 // Read the used item count. 640 memcpy(used_count, &header[kFileHeaderUsedOffset], sizeof(*used_count)); 641 if (*used_count > *num_entries) 642 return false; // Bad used item count; 643 644 // Read the salt. 645 memcpy(salt, &header[kFileHeaderSaltOffset], LINK_SALT_LENGTH); 646 647 // This file looks OK from the header's perspective. 648 return true; 649 } 650 651 bool VisitedLinkMaster::GetDatabaseFileName(FilePath* filename) { 652 if (!database_name_override_.empty()) { 653 // use this filename, the directory must exist 654 *filename = database_name_override_; 655 return true; 656 } 657 658 if (!profile_ || profile_->GetPath().empty()) 659 return false; 660 661 FilePath profile_dir = profile_->GetPath(); 662 *filename = profile_dir.Append(FILE_PATH_LITERAL("Visited Links")); 663 return true; 664 } 665 666 // Initializes the shared memory structure. The salt should already be filled 667 // in so that it can be written to the shared memory 668 bool VisitedLinkMaster::CreateURLTable(int32 num_entries, bool init_to_empty) { 669 // The table is the size of the table followed by the entries. 670 uint32 alloc_size = num_entries * sizeof(Fingerprint) + sizeof(SharedHeader); 671 672 // Create the shared memory object. 673 shared_memory_ = new base::SharedMemory(); 674 if (!shared_memory_) 675 return false; 676 677 if (!shared_memory_->CreateAndMapAnonymous(alloc_size)) { 678 delete shared_memory_; 679 shared_memory_ = NULL; 680 return false; 681 } 682 683 if (init_to_empty) { 684 memset(shared_memory_->memory(), 0, alloc_size); 685 used_items_ = 0; 686 } 687 table_length_ = num_entries; 688 689 // Save the header for other processes to read. 690 SharedHeader* header = static_cast<SharedHeader*>(shared_memory_->memory()); 691 header->length = table_length_; 692 memcpy(header->salt, salt_, LINK_SALT_LENGTH); 693 694 // Our table pointer is just the data immediately following the size. 695 hash_table_ = reinterpret_cast<Fingerprint*>( 696 static_cast<char*>(shared_memory_->memory()) + sizeof(SharedHeader)); 697 698 return true; 699 } 700 701 bool VisitedLinkMaster::BeginReplaceURLTable(int32 num_entries) { 702 base::SharedMemory *old_shared_memory = shared_memory_; 703 Fingerprint* old_hash_table = hash_table_; 704 int32 old_table_length = table_length_; 705 if (!CreateURLTable(num_entries, true)) { 706 // Try to put back the old state. 707 shared_memory_ = old_shared_memory; 708 hash_table_ = old_hash_table; 709 table_length_ = old_table_length; 710 return false; 711 } 712 713 #ifndef NDEBUG 714 DebugValidate(); 715 #endif 716 717 return true; 718 } 719 720 void VisitedLinkMaster::FreeURLTable() { 721 if (shared_memory_) { 722 delete shared_memory_; 723 shared_memory_ = NULL; 724 } 725 if (!file_) 726 return; 727 728 BrowserThread::PostTask( 729 BrowserThread::FILE, FROM_HERE, new AsyncCloseHandle(file_)); 730 } 731 732 bool VisitedLinkMaster::ResizeTableIfNecessary() { 733 DCHECK(table_length_ > 0) << "Must have a table"; 734 735 // Load limits for good performance/space. We are pretty conservative about 736 // keeping the table not very full. This is because we use linear probing 737 // which increases the likelihood of clumps of entries which will reduce 738 // performance. 739 const float max_table_load = 0.5f; // Grow when we're > this full. 740 const float min_table_load = 0.2f; // Shrink when we're < this full. 741 742 float load = ComputeTableLoad(); 743 if (load < max_table_load && 744 (table_length_ <= static_cast<float>(kDefaultTableSize) || 745 load > min_table_load)) 746 return false; 747 748 // Table needs to grow or shrink. 749 int new_size = NewTableSizeForCount(used_items_); 750 DCHECK(new_size > used_items_); 751 DCHECK(load <= min_table_load || new_size > table_length_); 752 ResizeTable(new_size); 753 return true; 754 } 755 756 void VisitedLinkMaster::ResizeTable(int32 new_size) { 757 DCHECK(shared_memory_ && shared_memory_->memory() && hash_table_); 758 shared_memory_serial_++; 759 760 #ifndef NDEBUG 761 DebugValidate(); 762 #endif 763 764 base::SharedMemory* old_shared_memory = shared_memory_; 765 Fingerprint* old_hash_table = hash_table_; 766 int32 old_table_length = table_length_; 767 if (!BeginReplaceURLTable(new_size)) 768 return; 769 770 // Now we have two tables, our local copy which is the old one, and the new 771 // one loaded into this object where we need to copy the data. 772 for (int32 i = 0; i < old_table_length; i++) { 773 Fingerprint cur = old_hash_table[i]; 774 if (cur) 775 AddFingerprint(cur, false); 776 } 777 778 // On error unmapping, just forget about it since we can't do anything 779 // else to release it. 780 delete old_shared_memory; 781 782 // Send an update notification to all child processes so they read the new 783 // table. 784 listener_->NewTable(shared_memory_); 785 786 #ifndef NDEBUG 787 DebugValidate(); 788 #endif 789 790 // The new table needs to be written to disk. 791 WriteFullTable(); 792 } 793 794 uint32 VisitedLinkMaster::NewTableSizeForCount(int32 item_count) const { 795 // These table sizes are selected to be the maximum prime number less than 796 // a "convenient" multiple of 1K. 797 static const int table_sizes[] = { 798 16381, // 16K = 16384 <- don't shrink below this table size 799 // (should be == default_table_size) 800 32767, // 32K = 32768 801 65521, // 64K = 65536 802 130051, // 128K = 131072 803 262127, // 256K = 262144 804 524269, // 512K = 524288 805 1048549, // 1M = 1048576 806 2097143, // 2M = 2097152 807 4194301, // 4M = 4194304 808 8388571, // 8M = 8388608 809 16777199, // 16M = 16777216 810 33554347}; // 32M = 33554432 811 812 // Try to leave the table 33% full. 813 int desired = item_count * 3; 814 815 // Find the closest prime. 816 for (size_t i = 0; i < arraysize(table_sizes); i ++) { 817 if (table_sizes[i] > desired) 818 return table_sizes[i]; 819 } 820 821 // Growing very big, just approximate a "good" number, not growing as much 822 // as normal. 823 return item_count * 2 - 1; 824 } 825 826 // See the TableBuilder definition in the header file for how this works. 827 bool VisitedLinkMaster::RebuildTableFromHistory() { 828 DCHECK(!table_builder_); 829 if (table_builder_) 830 return false; 831 832 HistoryService* history_service = history_service_override_; 833 if (!history_service && profile_) { 834 history_service = profile_->GetHistoryService(Profile::EXPLICIT_ACCESS); 835 } 836 837 if (!history_service) { 838 DLOG(WARNING) << "Attempted to rebuild visited link table, but couldn't " 839 "obtain a HistoryService."; 840 return false; 841 } 842 843 // TODO(brettw) make sure we have reasonable salt! 844 table_builder_ = new TableBuilder(this, salt_); 845 846 // Make sure the table builder stays live during the call, even if the 847 // master is deleted. This is balanced in TableBuilder::OnCompleteMainThread. 848 table_builder_->AddRef(); 849 history_service->IterateURLs(table_builder_); 850 return true; 851 } 852 853 // See the TableBuilder declaration above for how this works. 854 void VisitedLinkMaster::OnTableRebuildComplete( 855 bool success, 856 const std::vector<Fingerprint>& fingerprints) { 857 if (success) { 858 // Replace the old table with a new blank one. 859 shared_memory_serial_++; 860 861 // We are responsible for freeing it AFTER it has been replaced if 862 // replacement succeeds. 863 base::SharedMemory* old_shared_memory = shared_memory_; 864 865 int new_table_size = NewTableSizeForCount( 866 static_cast<int>(fingerprints.size() + added_since_rebuild_.size())); 867 if (BeginReplaceURLTable(new_table_size)) { 868 // Free the old table. 869 delete old_shared_memory; 870 871 // Add the stored fingerprints to the hash table. 872 for (size_t i = 0; i < fingerprints.size(); i++) 873 AddFingerprint(fingerprints[i], false); 874 875 // Also add anything that was added while we were asynchronously 876 // generating the new table. 877 for (std::set<Fingerprint>::iterator i = added_since_rebuild_.begin(); 878 i != added_since_rebuild_.end(); ++i) 879 AddFingerprint(*i, false); 880 added_since_rebuild_.clear(); 881 882 // Now handle deletions. 883 DeleteFingerprintsFromCurrentTable(deleted_since_rebuild_); 884 deleted_since_rebuild_.clear(); 885 886 // Send an update notification to all child processes. 887 listener_->NewTable(shared_memory_); 888 889 // We shouldn't be writing the table from the main thread! 890 // http://code.google.com/p/chromium/issues/detail?id=24163 891 base::ThreadRestrictions::ScopedAllowIO allow_io; 892 WriteFullTable(); 893 } 894 } 895 table_builder_ = NULL; // Will release our reference to the builder. 896 897 // Notify the unit test that the rebuild is complete (will be NULL in prod.) 898 if (rebuild_complete_task_.get()) { 899 rebuild_complete_task_->Run(); 900 rebuild_complete_task_.reset(NULL); 901 } 902 } 903 904 void VisitedLinkMaster::WriteToFile(FILE* file, 905 off_t offset, 906 void* data, 907 int32 data_size) { 908 #ifndef NDEBUG 909 posted_asynchronous_operation_ = true; 910 #endif 911 912 BrowserThread::PostTask( 913 BrowserThread::FILE, FROM_HERE, 914 new AsyncWriter(file, offset, data, data_size)); 915 } 916 917 void VisitedLinkMaster::WriteUsedItemCountToFile() { 918 if (!file_) 919 return; // See comment on the file_ variable for why this might happen. 920 WriteToFile(file_, kFileHeaderUsedOffset, &used_items_, sizeof(used_items_)); 921 } 922 923 void VisitedLinkMaster::WriteHashRangeToFile(Hash first_hash, Hash last_hash) { 924 if (!file_) 925 return; // See comment on the file_ variable for why this might happen. 926 if (last_hash < first_hash) { 927 // Handle wraparound at 0. This first write is first_hash->EOF 928 WriteToFile(file_, first_hash * sizeof(Fingerprint) + kFileHeaderSize, 929 &hash_table_[first_hash], 930 (table_length_ - first_hash + 1) * sizeof(Fingerprint)); 931 932 // Now do 0->last_lash. 933 WriteToFile(file_, kFileHeaderSize, hash_table_, 934 (last_hash + 1) * sizeof(Fingerprint)); 935 } else { 936 // Normal case, just write the range. 937 WriteToFile(file_, first_hash * sizeof(Fingerprint) + kFileHeaderSize, 938 &hash_table_[first_hash], 939 (last_hash - first_hash + 1) * sizeof(Fingerprint)); 940 } 941 } 942 943 bool VisitedLinkMaster::ReadFromFile(FILE* file, 944 off_t offset, 945 void* data, 946 size_t data_size) { 947 #ifndef NDEBUG 948 // Since this function is synchronous, we require that no asynchronous 949 // operations could possibly be pending. 950 DCHECK(!posted_asynchronous_operation_); 951 #endif 952 953 if (fseek(file, offset, SEEK_SET) != 0) 954 return false; 955 956 size_t num_read = fread(data, 1, data_size, file); 957 return num_read == data_size; 958 } 959 960 // VisitedLinkTableBuilder ---------------------------------------------------- 961 962 VisitedLinkMaster::TableBuilder::TableBuilder( 963 VisitedLinkMaster* master, 964 const uint8 salt[LINK_SALT_LENGTH]) 965 : master_(master), 966 success_(true) { 967 fingerprints_.reserve(4096); 968 memcpy(salt_, salt, sizeof(salt)); 969 } 970 971 // TODO(brettw): Do we want to try to cancel the request if this happens? It 972 // could delay shutdown if there are a lot of URLs. 973 void VisitedLinkMaster::TableBuilder::DisownMaster() { 974 master_ = NULL; 975 } 976 977 void VisitedLinkMaster::TableBuilder::OnURL(const GURL& url) { 978 if (!url.is_empty()) { 979 fingerprints_.push_back(VisitedLinkMaster::ComputeURLFingerprint( 980 url.spec().data(), url.spec().length(), salt_)); 981 } 982 } 983 984 void VisitedLinkMaster::TableBuilder::OnComplete(bool success) { 985 success_ = success; 986 DLOG_IF(WARNING, !success) << "Unable to rebuild visited links"; 987 988 // Marshal to the main thread to notify the VisitedLinkMaster that the 989 // rebuild is complete. 990 BrowserThread::PostTask( 991 BrowserThread::UI, FROM_HERE, 992 NewRunnableMethod(this, &TableBuilder::OnCompleteMainThread)); 993 } 994 995 void VisitedLinkMaster::TableBuilder::OnCompleteMainThread() { 996 if (master_) 997 master_->OnTableRebuildComplete(success_, fingerprints_); 998 999 // WILL (generally) DELETE THIS! This balances the AddRef in 1000 // VisitedLinkMaster::RebuildTableFromHistory. 1001 Release(); 1002 } 1003