1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "components/visitedlink/browser/visitedlink_master.h" 6 7 #if defined(OS_WIN) 8 #include <windows.h> 9 #include <io.h> 10 #include <shlobj.h> 11 #endif // defined(OS_WIN) 12 #include <stdio.h> 13 14 #include <algorithm> 15 16 #include "base/bind.h" 17 #include "base/bind_helpers.h" 18 #include "base/containers/stack_container.h" 19 #include "base/file_util.h" 20 #include "base/logging.h" 21 #include "base/message_loop/message_loop.h" 22 #include "base/path_service.h" 23 #include "base/rand_util.h" 24 #include "base/strings/string_util.h" 25 #include "base/threading/thread_restrictions.h" 26 #include "components/visitedlink/browser/visitedlink_delegate.h" 27 #include "components/visitedlink/browser/visitedlink_event_listener.h" 28 #include "content/public/browser/browser_context.h" 29 #include "content/public/browser/browser_thread.h" 30 #include "url/gurl.h" 31 32 using content::BrowserThread; 33 using file_util::ScopedFILE; 34 using file_util::OpenFile; 35 using file_util::TruncateFile; 36 37 namespace visitedlink { 38 39 const int32 VisitedLinkMaster::kFileHeaderSignatureOffset = 0; 40 const int32 VisitedLinkMaster::kFileHeaderVersionOffset = 4; 41 const int32 VisitedLinkMaster::kFileHeaderLengthOffset = 8; 42 const int32 VisitedLinkMaster::kFileHeaderUsedOffset = 12; 43 const int32 VisitedLinkMaster::kFileHeaderSaltOffset = 16; 44 45 const int32 VisitedLinkMaster::kFileCurrentVersion = 3; 46 47 // the signature at the beginning of the URL table = "VLnk" (visited links) 48 const int32 VisitedLinkMaster::kFileSignature = 0x6b6e4c56; 49 const size_t VisitedLinkMaster::kFileHeaderSize = 50 kFileHeaderSaltOffset + LINK_SALT_LENGTH; 51 52 // This value should also be the same as the smallest size in the lookup 53 // table in NewTableSizeForCount (prime number). 54 const unsigned VisitedLinkMaster::kDefaultTableSize = 16381; 55 56 const size_t VisitedLinkMaster::kBigDeleteThreshold = 64; 57 58 namespace { 59 60 // Fills the given salt structure with some quasi-random values 61 // It is not necessary to generate a cryptographically strong random string, 62 // only that it be reasonably different for different users. 63 void GenerateSalt(uint8 salt[LINK_SALT_LENGTH]) { 64 DCHECK_EQ(LINK_SALT_LENGTH, 8) << "This code assumes the length of the salt"; 65 uint64 randval = base::RandUint64(); 66 memcpy(salt, &randval, 8); 67 } 68 69 // Opens file on a background thread to not block UI thread. 70 void AsyncOpen(FILE** file, const base::FilePath& filename) { 71 *file = OpenFile(filename, "wb+"); 72 DLOG_IF(ERROR, !(*file)) << "Failed to open file " << filename.value(); 73 } 74 75 // Returns true if the write was complete. 76 static bool WriteToFile(FILE* file, 77 off_t offset, 78 const void* data, 79 size_t data_len) { 80 if (fseek(file, offset, SEEK_SET) != 0) 81 return false; // Don't write to an invalid part of the file. 82 83 size_t num_written = fwrite(data, 1, data_len, file); 84 85 // The write may not make it to the kernel (stdlib may buffer the write) 86 // until the next fseek/fclose call. If we crash, it's easy for our used 87 // item count to be out of sync with the number of hashes we write. 88 // Protect against this by calling fflush. 89 int ret = fflush(file); 90 DCHECK_EQ(0, ret); 91 return num_written == data_len; 92 } 93 94 // This task executes on a background thread and executes a write. This 95 // prevents us from blocking the UI thread doing I/O. Double pointer to FILE 96 // is used because file may still not be opened by the time of scheduling 97 // the task for execution. 98 void AsyncWrite(FILE** file, int32 offset, const std::string& data) { 99 if (*file) 100 WriteToFile(*file, offset, data.data(), data.size()); 101 } 102 103 // Truncates the file to the current position asynchronously on a background 104 // thread. Double pointer to FILE is used because file may still not be opened 105 // by the time of scheduling the task for execution. 106 void AsyncTruncate(FILE** file) { 107 if (*file) 108 base::IgnoreResult(TruncateFile(*file)); 109 } 110 111 // Closes the file on a background thread and releases memory used for storage 112 // of FILE* value. Double pointer to FILE is used because file may still not 113 // be opened by the time of scheduling the task for execution. 114 void AsyncClose(FILE** file) { 115 if (*file) 116 base::IgnoreResult(fclose(*file)); 117 free(file); 118 } 119 120 } // namespace 121 122 // TableBuilder --------------------------------------------------------------- 123 124 // How rebuilding from history works 125 // --------------------------------- 126 // 127 // We mark that we're rebuilding from history by setting the table_builder_ 128 // member in VisitedLinkMaster to the TableBuilder we create. This builder 129 // will be called on the history thread by the history system for every URL 130 // in the database. 131 // 132 // The builder will store the fingerprints for those URLs, and then marshalls 133 // back to the main thread where the VisitedLinkMaster will be notified. The 134 // master then replaces its table with a new table containing the computed 135 // fingerprints. 136 // 137 // The builder must remain active while the history system is using it. 138 // Sometimes, the master will be deleted before the rebuild is complete, in 139 // which case it notifies the builder via DisownMaster(). The builder will 140 // delete itself once rebuilding is complete, and not execute any callback. 141 class VisitedLinkMaster::TableBuilder 142 : public VisitedLinkDelegate::URLEnumerator { 143 public: 144 TableBuilder(VisitedLinkMaster* master, 145 const uint8 salt[LINK_SALT_LENGTH]); 146 147 // Called on the main thread when the master is being destroyed. This will 148 // prevent a crash when the query completes and the master is no longer 149 // around. We can not actually do anything but mark this fact, since the 150 // table will be being rebuilt simultaneously on the other thread. 151 void DisownMaster(); 152 153 // VisitedLinkDelegate::URLEnumerator 154 virtual void OnURL(const GURL& url) OVERRIDE; 155 virtual void OnComplete(bool succeed) OVERRIDE; 156 157 private: 158 virtual ~TableBuilder() {} 159 160 // OnComplete mashals to this function on the main thread to do the 161 // notification. 162 void OnCompleteMainThread(); 163 164 // Owner of this object. MAY ONLY BE ACCESSED ON THE MAIN THREAD! 165 VisitedLinkMaster* master_; 166 167 // Indicates whether the operation has failed or not. 168 bool success_; 169 170 // Salt for this new table. 171 uint8 salt_[LINK_SALT_LENGTH]; 172 173 // Stores the fingerprints we computed on the background thread. 174 VisitedLinkCommon::Fingerprints fingerprints_; 175 176 DISALLOW_COPY_AND_ASSIGN(TableBuilder); 177 }; 178 179 // VisitedLinkMaster ---------------------------------------------------------- 180 181 VisitedLinkMaster::VisitedLinkMaster(content::BrowserContext* browser_context, 182 VisitedLinkDelegate* delegate, 183 bool persist_to_disk) 184 : browser_context_(browser_context), 185 delegate_(delegate), 186 listener_(new VisitedLinkEventListener(this, browser_context)), 187 persist_to_disk_(persist_to_disk) { 188 InitMembers(); 189 } 190 191 VisitedLinkMaster::VisitedLinkMaster(Listener* listener, 192 VisitedLinkDelegate* delegate, 193 bool persist_to_disk, 194 bool suppress_rebuild, 195 const base::FilePath& filename, 196 int32 default_table_size) 197 : browser_context_(NULL), 198 delegate_(delegate), 199 persist_to_disk_(persist_to_disk) { 200 listener_.reset(listener); 201 DCHECK(listener_.get()); 202 InitMembers(); 203 204 database_name_override_ = filename; 205 table_size_override_ = default_table_size; 206 suppress_rebuild_ = suppress_rebuild; 207 } 208 209 VisitedLinkMaster::~VisitedLinkMaster() { 210 if (table_builder_.get()) { 211 // Prevent the table builder from calling us back now that we're being 212 // destroyed. Note that we DON'T delete the object, since the history 213 // system is still writing into it. When that is complete, the table 214 // builder will destroy itself when it finds we are gone. 215 table_builder_->DisownMaster(); 216 } 217 FreeURLTable(); 218 // FreeURLTable() will schedule closing of the file and deletion of |file_|. 219 // So nothing should be done here. 220 } 221 222 void VisitedLinkMaster::InitMembers() { 223 file_ = NULL; 224 shared_memory_ = NULL; 225 shared_memory_serial_ = 0; 226 used_items_ = 0; 227 table_size_override_ = 0; 228 suppress_rebuild_ = false; 229 sequence_token_ = BrowserThread::GetBlockingPool()->GetSequenceToken(); 230 231 #ifndef NDEBUG 232 posted_asynchronous_operation_ = false; 233 #endif 234 } 235 236 bool VisitedLinkMaster::Init() { 237 // We probably shouldn't be loading this from the UI thread, 238 // but it does need to happen early on in startup. 239 // http://code.google.com/p/chromium/issues/detail?id=24163 240 base::ThreadRestrictions::ScopedAllowIO allow_io; 241 242 if (persist_to_disk_) { 243 if (InitFromFile()) 244 return true; 245 } 246 return InitFromScratch(suppress_rebuild_); 247 } 248 249 VisitedLinkMaster::Hash VisitedLinkMaster::TryToAddURL(const GURL& url) { 250 // Extra check that we are not incognito. This should not happen. 251 // TODO(boliu): Move this check to HistoryService when IsOffTheRecord is 252 // removed from BrowserContext. 253 if (browser_context_ && browser_context_->IsOffTheRecord()) { 254 NOTREACHED(); 255 return null_hash_; 256 } 257 258 if (!url.is_valid()) 259 return null_hash_; // Don't add invalid URLs. 260 261 Fingerprint fingerprint = ComputeURLFingerprint(url.spec().data(), 262 url.spec().size(), 263 salt_); 264 if (table_builder_.get()) { 265 // If we have a pending delete for this fingerprint, cancel it. 266 std::set<Fingerprint>::iterator found = 267 deleted_since_rebuild_.find(fingerprint); 268 if (found != deleted_since_rebuild_.end()) 269 deleted_since_rebuild_.erase(found); 270 271 // A rebuild is in progress, save this addition in the temporary list so 272 // it can be added once rebuild is complete. 273 added_since_rebuild_.insert(fingerprint); 274 } 275 276 // If the table is "full", we don't add URLs and just drop them on the floor. 277 // This can happen if we get thousands of new URLs and something causes 278 // the table resizing to fail. This check prevents a hang in that case. Note 279 // that this is *not* the resize limit, this is just a sanity check. 280 if (used_items_ / 8 > table_length_ / 10) 281 return null_hash_; // Table is more than 80% full. 282 283 return AddFingerprint(fingerprint, true); 284 } 285 286 void VisitedLinkMaster::PostIOTask(const tracked_objects::Location& from_here, 287 const base::Closure& task) { 288 DCHECK(persist_to_disk_); 289 BrowserThread::GetBlockingPool()->PostSequencedWorkerTask(sequence_token_, 290 from_here, task); 291 } 292 293 void VisitedLinkMaster::AddURL(const GURL& url) { 294 Hash index = TryToAddURL(url); 295 if (!table_builder_.get() && index != null_hash_) { 296 // Not rebuilding, so we want to keep the file on disk up-to-date. 297 if (persist_to_disk_) { 298 WriteUsedItemCountToFile(); 299 WriteHashRangeToFile(index, index); 300 } 301 ResizeTableIfNecessary(); 302 } 303 } 304 305 void VisitedLinkMaster::AddURLs(const std::vector<GURL>& url) { 306 for (std::vector<GURL>::const_iterator i = url.begin(); 307 i != url.end(); ++i) { 308 Hash index = TryToAddURL(*i); 309 if (!table_builder_.get() && index != null_hash_) 310 ResizeTableIfNecessary(); 311 } 312 313 // Keeps the file on disk up-to-date. 314 if (!table_builder_.get() && persist_to_disk_) 315 WriteFullTable(); 316 } 317 318 void VisitedLinkMaster::DeleteAllURLs() { 319 // Any pending modifications are invalid. 320 added_since_rebuild_.clear(); 321 deleted_since_rebuild_.clear(); 322 323 // Clear the hash table. 324 used_items_ = 0; 325 memset(hash_table_, 0, this->table_length_ * sizeof(Fingerprint)); 326 327 // Resize it if it is now too empty. Resize may write the new table out for 328 // us, otherwise, schedule writing the new table to disk ourselves. 329 if (!ResizeTableIfNecessary() && persist_to_disk_) 330 WriteFullTable(); 331 332 listener_->Reset(); 333 } 334 335 VisitedLinkDelegate* VisitedLinkMaster::GetDelegate() { 336 return delegate_; 337 } 338 339 void VisitedLinkMaster::DeleteURLs(URLIterator* urls) { 340 if (!urls->HasNextURL()) 341 return; 342 343 listener_->Reset(); 344 345 if (table_builder_.get()) { 346 // A rebuild is in progress, save this deletion in the temporary list so 347 // it can be added once rebuild is complete. 348 while (urls->HasNextURL()) { 349 const GURL& url(urls->NextURL()); 350 if (!url.is_valid()) 351 continue; 352 353 Fingerprint fingerprint = 354 ComputeURLFingerprint(url.spec().data(), url.spec().size(), salt_); 355 deleted_since_rebuild_.insert(fingerprint); 356 357 // If the URL was just added and now we're deleting it, it may be in the 358 // list of things added since the last rebuild. Delete it from that list. 359 std::set<Fingerprint>::iterator found = 360 added_since_rebuild_.find(fingerprint); 361 if (found != added_since_rebuild_.end()) 362 added_since_rebuild_.erase(found); 363 364 // Delete the URLs from the in-memory table, but don't bother writing 365 // to disk since it will be replaced soon. 366 DeleteFingerprint(fingerprint, false); 367 } 368 return; 369 } 370 371 // Compute the deleted URLs' fingerprints and delete them 372 std::set<Fingerprint> deleted_fingerprints; 373 while (urls->HasNextURL()) { 374 const GURL& url(urls->NextURL()); 375 if (!url.is_valid()) 376 continue; 377 deleted_fingerprints.insert( 378 ComputeURLFingerprint(url.spec().data(), url.spec().size(), salt_)); 379 } 380 DeleteFingerprintsFromCurrentTable(deleted_fingerprints); 381 } 382 383 // See VisitedLinkCommon::IsVisited which should be in sync with this algorithm 384 VisitedLinkMaster::Hash VisitedLinkMaster::AddFingerprint( 385 Fingerprint fingerprint, 386 bool send_notifications) { 387 if (!hash_table_ || table_length_ == 0) { 388 NOTREACHED(); // Not initialized. 389 return null_hash_; 390 } 391 392 Hash cur_hash = HashFingerprint(fingerprint); 393 Hash first_hash = cur_hash; 394 while (true) { 395 Fingerprint cur_fingerprint = FingerprintAt(cur_hash); 396 if (cur_fingerprint == fingerprint) 397 return null_hash_; // This fingerprint is already in there, do nothing. 398 399 if (cur_fingerprint == null_fingerprint_) { 400 // End of probe sequence found, insert here. 401 hash_table_[cur_hash] = fingerprint; 402 used_items_++; 403 // If allowed, notify listener that a new visited link was added. 404 if (send_notifications) 405 listener_->Add(fingerprint); 406 return cur_hash; 407 } 408 409 // Advance in the probe sequence. 410 cur_hash = IncrementHash(cur_hash); 411 if (cur_hash == first_hash) { 412 // This means that we've wrapped around and are about to go into an 413 // infinite loop. Something was wrong with the hashtable resizing 414 // logic, so stop here. 415 NOTREACHED(); 416 return null_hash_; 417 } 418 } 419 } 420 421 void VisitedLinkMaster::DeleteFingerprintsFromCurrentTable( 422 const std::set<Fingerprint>& fingerprints) { 423 bool bulk_write = (fingerprints.size() > kBigDeleteThreshold); 424 425 // Delete the URLs from the table. 426 for (std::set<Fingerprint>::const_iterator i = fingerprints.begin(); 427 i != fingerprints.end(); ++i) 428 DeleteFingerprint(*i, !bulk_write); 429 430 // These deleted fingerprints may make us shrink the table. 431 if (ResizeTableIfNecessary()) 432 return; // The resize function wrote the new table to disk for us. 433 434 // Nobody wrote this out for us, write the full file to disk. 435 if (bulk_write && persist_to_disk_) 436 WriteFullTable(); 437 } 438 439 bool VisitedLinkMaster::DeleteFingerprint(Fingerprint fingerprint, 440 bool update_file) { 441 if (!hash_table_ || table_length_ == 0) { 442 NOTREACHED(); // Not initialized. 443 return false; 444 } 445 if (!IsVisited(fingerprint)) 446 return false; // Not in the database to delete. 447 448 // First update the header used count. 449 used_items_--; 450 if (update_file && persist_to_disk_) 451 WriteUsedItemCountToFile(); 452 453 Hash deleted_hash = HashFingerprint(fingerprint); 454 455 // Find the range of "stuff" in the hash table that is adjacent to this 456 // fingerprint. These are things that could be affected by the change in 457 // the hash table. Since we use linear probing, anything after the deleted 458 // item up until an empty item could be affected. 459 Hash end_range = deleted_hash; 460 while (true) { 461 Hash next_hash = IncrementHash(end_range); 462 if (next_hash == deleted_hash) 463 break; // We wrapped around and the whole table is full. 464 if (!hash_table_[next_hash]) 465 break; // Found the last spot. 466 end_range = next_hash; 467 } 468 469 // We could get all fancy and move the affected fingerprints around, but 470 // instead we just remove them all and re-add them (minus our deleted one). 471 // This will mean there's a small window of time where the affected links 472 // won't be marked visited. 473 base::StackVector<Fingerprint, 32> shuffled_fingerprints; 474 Hash stop_loop = IncrementHash(end_range); // The end range is inclusive. 475 for (Hash i = deleted_hash; i != stop_loop; i = IncrementHash(i)) { 476 if (hash_table_[i] != fingerprint) { 477 // Don't save the one we're deleting! 478 shuffled_fingerprints->push_back(hash_table_[i]); 479 480 // This will balance the increment of this value in AddFingerprint below 481 // so there is no net change. 482 used_items_--; 483 } 484 hash_table_[i] = null_fingerprint_; 485 } 486 487 if (!shuffled_fingerprints->empty()) { 488 // Need to add the new items back. 489 for (size_t i = 0; i < shuffled_fingerprints->size(); i++) 490 AddFingerprint(shuffled_fingerprints[i], false); 491 } 492 493 // Write the affected range to disk [deleted_hash, end_range]. 494 if (update_file && persist_to_disk_) 495 WriteHashRangeToFile(deleted_hash, end_range); 496 497 return true; 498 } 499 500 void VisitedLinkMaster::WriteFullTable() { 501 // This function can get called when the file is open, for example, when we 502 // resize the table. We must handle this case and not try to reopen the file, 503 // since there may be write operations pending on the file I/O thread. 504 // 505 // Note that once we start writing, we do not delete on error. This means 506 // there can be a partial file, but the short file will be detected next time 507 // we start, and will be replaced. 508 // 509 // This might possibly get corrupted if we crash in the middle of writing. 510 // We should pick up the most common types of these failures when we notice 511 // that the file size is different when we load it back in, and then we will 512 // regenerate the table. 513 DCHECK(persist_to_disk_); 514 515 if (!file_) { 516 file_ = static_cast<FILE**>(calloc(1, sizeof(*file_))); 517 base::FilePath filename; 518 GetDatabaseFileName(&filename); 519 PostIOTask(FROM_HERE, base::Bind(&AsyncOpen, file_, filename)); 520 } 521 522 // Write the new header. 523 int32 header[4]; 524 header[0] = kFileSignature; 525 header[1] = kFileCurrentVersion; 526 header[2] = table_length_; 527 header[3] = used_items_; 528 WriteToFile(file_, 0, header, sizeof(header)); 529 WriteToFile(file_, sizeof(header), salt_, LINK_SALT_LENGTH); 530 531 // Write the hash data. 532 WriteToFile(file_, kFileHeaderSize, 533 hash_table_, table_length_ * sizeof(Fingerprint)); 534 535 // The hash table may have shrunk, so make sure this is the end. 536 PostIOTask(FROM_HERE, base::Bind(&AsyncTruncate, file_)); 537 } 538 539 bool VisitedLinkMaster::InitFromFile() { 540 DCHECK(file_ == NULL); 541 DCHECK(persist_to_disk_); 542 543 base::FilePath filename; 544 GetDatabaseFileName(&filename); 545 ScopedFILE file_closer(OpenFile(filename, "rb+")); 546 if (!file_closer.get()) 547 return false; 548 549 int32 num_entries, used_count; 550 if (!ReadFileHeader(file_closer.get(), &num_entries, &used_count, salt_)) 551 return false; // Header isn't valid. 552 553 // Allocate and read the table. 554 if (!CreateURLTable(num_entries, false)) 555 return false; 556 if (!ReadFromFile(file_closer.get(), kFileHeaderSize, 557 hash_table_, num_entries * sizeof(Fingerprint))) { 558 FreeURLTable(); 559 return false; 560 } 561 used_items_ = used_count; 562 563 #ifndef NDEBUG 564 DebugValidate(); 565 #endif 566 567 file_ = static_cast<FILE**>(malloc(sizeof(*file_))); 568 *file_ = file_closer.release(); 569 return true; 570 } 571 572 bool VisitedLinkMaster::InitFromScratch(bool suppress_rebuild) { 573 int32 table_size = kDefaultTableSize; 574 if (table_size_override_) 575 table_size = table_size_override_; 576 577 // The salt must be generated before the table so that it can be copied to 578 // the shared memory. 579 GenerateSalt(salt_); 580 if (!CreateURLTable(table_size, true)) 581 return false; 582 583 #ifndef NDEBUG 584 DebugValidate(); 585 #endif 586 587 if (suppress_rebuild && persist_to_disk_) { 588 // When we disallow rebuilds (normally just unit tests), just use the 589 // current empty table. 590 WriteFullTable(); 591 return true; 592 } 593 594 // This will build the table from history. On the first run, history will 595 // be empty, so this will be correct. This will also write the new table 596 // to disk. We don't want to save explicitly here, since the rebuild may 597 // not complete, leaving us with an empty but valid visited link database. 598 // In the future, we won't know we need to try rebuilding again. 599 return RebuildTableFromDelegate(); 600 } 601 602 bool VisitedLinkMaster::ReadFileHeader(FILE* file, 603 int32* num_entries, 604 int32* used_count, 605 uint8 salt[LINK_SALT_LENGTH]) { 606 DCHECK(persist_to_disk_); 607 608 // Get file size. 609 // Note that there is no need to seek back to the original location in the 610 // file since ReadFromFile() [which is the next call accessing the file] 611 // seeks before reading. 612 if (fseek(file, 0, SEEK_END) == -1) 613 return false; 614 size_t file_size = ftell(file); 615 616 if (file_size <= kFileHeaderSize) 617 return false; 618 619 uint8 header[kFileHeaderSize]; 620 if (!ReadFromFile(file, 0, &header, kFileHeaderSize)) 621 return false; 622 623 // Verify the signature. 624 int32 signature; 625 memcpy(&signature, &header[kFileHeaderSignatureOffset], sizeof(signature)); 626 if (signature != kFileSignature) 627 return false; 628 629 // Verify the version is up-to-date. As with other read errors, a version 630 // mistmatch will trigger a rebuild of the database from history, which will 631 // have the effect of migrating the database. 632 int32 version; 633 memcpy(&version, &header[kFileHeaderVersionOffset], sizeof(version)); 634 if (version != kFileCurrentVersion) 635 return false; // Bad version. 636 637 // Read the table size and make sure it matches the file size. 638 memcpy(num_entries, &header[kFileHeaderLengthOffset], sizeof(*num_entries)); 639 if (*num_entries * sizeof(Fingerprint) + kFileHeaderSize != file_size) 640 return false; // Bad size. 641 642 // Read the used item count. 643 memcpy(used_count, &header[kFileHeaderUsedOffset], sizeof(*used_count)); 644 if (*used_count > *num_entries) 645 return false; // Bad used item count; 646 647 // Read the salt. 648 memcpy(salt, &header[kFileHeaderSaltOffset], LINK_SALT_LENGTH); 649 650 // This file looks OK from the header's perspective. 651 return true; 652 } 653 654 bool VisitedLinkMaster::GetDatabaseFileName(base::FilePath* filename) { 655 if (!database_name_override_.empty()) { 656 // use this filename, the directory must exist 657 *filename = database_name_override_; 658 return true; 659 } 660 661 if (!browser_context_ || browser_context_->GetPath().empty()) 662 return false; 663 664 base::FilePath profile_dir = browser_context_->GetPath(); 665 *filename = profile_dir.Append(FILE_PATH_LITERAL("Visited Links")); 666 return true; 667 } 668 669 // Initializes the shared memory structure. The salt should already be filled 670 // in so that it can be written to the shared memory 671 bool VisitedLinkMaster::CreateURLTable(int32 num_entries, bool init_to_empty) { 672 // The table is the size of the table followed by the entries. 673 uint32 alloc_size = num_entries * sizeof(Fingerprint) + sizeof(SharedHeader); 674 675 // Create the shared memory object. 676 shared_memory_ = new base::SharedMemory(); 677 if (!shared_memory_) 678 return false; 679 680 if (!shared_memory_->CreateAndMapAnonymous(alloc_size)) { 681 delete shared_memory_; 682 shared_memory_ = NULL; 683 return false; 684 } 685 686 if (init_to_empty) { 687 memset(shared_memory_->memory(), 0, alloc_size); 688 used_items_ = 0; 689 } 690 table_length_ = num_entries; 691 692 // Save the header for other processes to read. 693 SharedHeader* header = static_cast<SharedHeader*>(shared_memory_->memory()); 694 header->length = table_length_; 695 memcpy(header->salt, salt_, LINK_SALT_LENGTH); 696 697 // Our table pointer is just the data immediately following the size. 698 hash_table_ = reinterpret_cast<Fingerprint*>( 699 static_cast<char*>(shared_memory_->memory()) + sizeof(SharedHeader)); 700 701 return true; 702 } 703 704 bool VisitedLinkMaster::BeginReplaceURLTable(int32 num_entries) { 705 base::SharedMemory *old_shared_memory = shared_memory_; 706 Fingerprint* old_hash_table = hash_table_; 707 int32 old_table_length = table_length_; 708 if (!CreateURLTable(num_entries, true)) { 709 // Try to put back the old state. 710 shared_memory_ = old_shared_memory; 711 hash_table_ = old_hash_table; 712 table_length_ = old_table_length; 713 return false; 714 } 715 716 #ifndef NDEBUG 717 DebugValidate(); 718 #endif 719 720 return true; 721 } 722 723 void VisitedLinkMaster::FreeURLTable() { 724 if (shared_memory_) { 725 delete shared_memory_; 726 shared_memory_ = NULL; 727 } 728 if (!persist_to_disk_ || !file_) 729 return; 730 PostIOTask(FROM_HERE, base::Bind(&AsyncClose, file_)); 731 // AsyncClose() will close the file and free the memory pointed by |file_|. 732 file_ = NULL; 733 } 734 735 bool VisitedLinkMaster::ResizeTableIfNecessary() { 736 DCHECK(table_length_ > 0) << "Must have a table"; 737 738 // Load limits for good performance/space. We are pretty conservative about 739 // keeping the table not very full. This is because we use linear probing 740 // which increases the likelihood of clumps of entries which will reduce 741 // performance. 742 const float max_table_load = 0.5f; // Grow when we're > this full. 743 const float min_table_load = 0.2f; // Shrink when we're < this full. 744 745 float load = ComputeTableLoad(); 746 if (load < max_table_load && 747 (table_length_ <= static_cast<float>(kDefaultTableSize) || 748 load > min_table_load)) 749 return false; 750 751 // Table needs to grow or shrink. 752 int new_size = NewTableSizeForCount(used_items_); 753 DCHECK(new_size > used_items_); 754 DCHECK(load <= min_table_load || new_size > table_length_); 755 ResizeTable(new_size); 756 return true; 757 } 758 759 void VisitedLinkMaster::ResizeTable(int32 new_size) { 760 DCHECK(shared_memory_ && shared_memory_->memory() && hash_table_); 761 shared_memory_serial_++; 762 763 #ifndef NDEBUG 764 DebugValidate(); 765 #endif 766 767 base::SharedMemory* old_shared_memory = shared_memory_; 768 Fingerprint* old_hash_table = hash_table_; 769 int32 old_table_length = table_length_; 770 if (!BeginReplaceURLTable(new_size)) 771 return; 772 773 // Now we have two tables, our local copy which is the old one, and the new 774 // one loaded into this object where we need to copy the data. 775 for (int32 i = 0; i < old_table_length; i++) { 776 Fingerprint cur = old_hash_table[i]; 777 if (cur) 778 AddFingerprint(cur, false); 779 } 780 781 // On error unmapping, just forget about it since we can't do anything 782 // else to release it. 783 delete old_shared_memory; 784 785 // Send an update notification to all child processes so they read the new 786 // table. 787 listener_->NewTable(shared_memory_); 788 789 #ifndef NDEBUG 790 DebugValidate(); 791 #endif 792 793 // The new table needs to be written to disk. 794 if (persist_to_disk_) 795 WriteFullTable(); 796 } 797 798 uint32 VisitedLinkMaster::NewTableSizeForCount(int32 item_count) const { 799 // These table sizes are selected to be the maximum prime number less than 800 // a "convenient" multiple of 1K. 801 static const int table_sizes[] = { 802 16381, // 16K = 16384 <- don't shrink below this table size 803 // (should be == default_table_size) 804 32767, // 32K = 32768 805 65521, // 64K = 65536 806 130051, // 128K = 131072 807 262127, // 256K = 262144 808 524269, // 512K = 524288 809 1048549, // 1M = 1048576 810 2097143, // 2M = 2097152 811 4194301, // 4M = 4194304 812 8388571, // 8M = 8388608 813 16777199, // 16M = 16777216 814 33554347}; // 32M = 33554432 815 816 // Try to leave the table 33% full. 817 int desired = item_count * 3; 818 819 // Find the closest prime. 820 for (size_t i = 0; i < arraysize(table_sizes); i ++) { 821 if (table_sizes[i] > desired) 822 return table_sizes[i]; 823 } 824 825 // Growing very big, just approximate a "good" number, not growing as much 826 // as normal. 827 return item_count * 2 - 1; 828 } 829 830 // See the TableBuilder definition in the header file for how this works. 831 bool VisitedLinkMaster::RebuildTableFromDelegate() { 832 DCHECK(!table_builder_.get()); 833 834 // TODO(brettw) make sure we have reasonable salt! 835 table_builder_ = new TableBuilder(this, salt_); 836 delegate_->RebuildTable(table_builder_); 837 return true; 838 } 839 840 // See the TableBuilder declaration above for how this works. 841 void VisitedLinkMaster::OnTableRebuildComplete( 842 bool success, 843 const std::vector<Fingerprint>& fingerprints) { 844 if (success) { 845 // Replace the old table with a new blank one. 846 shared_memory_serial_++; 847 848 // We are responsible for freeing it AFTER it has been replaced if 849 // replacement succeeds. 850 base::SharedMemory* old_shared_memory = shared_memory_; 851 852 int new_table_size = NewTableSizeForCount( 853 static_cast<int>(fingerprints.size() + added_since_rebuild_.size())); 854 if (BeginReplaceURLTable(new_table_size)) { 855 // Free the old table. 856 delete old_shared_memory; 857 858 // Add the stored fingerprints to the hash table. 859 for (size_t i = 0; i < fingerprints.size(); i++) 860 AddFingerprint(fingerprints[i], false); 861 862 // Also add anything that was added while we were asynchronously 863 // generating the new table. 864 for (std::set<Fingerprint>::iterator i = added_since_rebuild_.begin(); 865 i != added_since_rebuild_.end(); ++i) 866 AddFingerprint(*i, false); 867 added_since_rebuild_.clear(); 868 869 // Now handle deletions. 870 DeleteFingerprintsFromCurrentTable(deleted_since_rebuild_); 871 deleted_since_rebuild_.clear(); 872 873 // Send an update notification to all child processes. 874 listener_->NewTable(shared_memory_); 875 876 if (persist_to_disk_) 877 WriteFullTable(); 878 } 879 } 880 table_builder_ = NULL; // Will release our reference to the builder. 881 882 // Notify the unit test that the rebuild is complete (will be NULL in prod.) 883 if (!rebuild_complete_task_.is_null()) { 884 rebuild_complete_task_.Run(); 885 rebuild_complete_task_.Reset(); 886 } 887 } 888 889 void VisitedLinkMaster::WriteToFile(FILE** file, 890 off_t offset, 891 void* data, 892 int32 data_size) { 893 DCHECK(persist_to_disk_); 894 #ifndef NDEBUG 895 posted_asynchronous_operation_ = true; 896 #endif 897 PostIOTask(FROM_HERE, 898 base::Bind(&AsyncWrite, file, offset, 899 std::string(static_cast<const char*>(data), data_size))); 900 } 901 902 void VisitedLinkMaster::WriteUsedItemCountToFile() { 903 DCHECK(persist_to_disk_); 904 if (!file_) 905 return; // See comment on the file_ variable for why this might happen. 906 WriteToFile(file_, kFileHeaderUsedOffset, &used_items_, sizeof(used_items_)); 907 } 908 909 void VisitedLinkMaster::WriteHashRangeToFile(Hash first_hash, Hash last_hash) { 910 DCHECK(persist_to_disk_); 911 912 if (!file_) 913 return; // See comment on the file_ variable for why this might happen. 914 if (last_hash < first_hash) { 915 // Handle wraparound at 0. This first write is first_hash->EOF 916 WriteToFile(file_, first_hash * sizeof(Fingerprint) + kFileHeaderSize, 917 &hash_table_[first_hash], 918 (table_length_ - first_hash + 1) * sizeof(Fingerprint)); 919 920 // Now do 0->last_lash. 921 WriteToFile(file_, kFileHeaderSize, hash_table_, 922 (last_hash + 1) * sizeof(Fingerprint)); 923 } else { 924 // Normal case, just write the range. 925 WriteToFile(file_, first_hash * sizeof(Fingerprint) + kFileHeaderSize, 926 &hash_table_[first_hash], 927 (last_hash - first_hash + 1) * sizeof(Fingerprint)); 928 } 929 } 930 931 bool VisitedLinkMaster::ReadFromFile(FILE* file, 932 off_t offset, 933 void* data, 934 size_t data_size) { 935 DCHECK(persist_to_disk_); 936 #ifndef NDEBUG 937 // Since this function is synchronous, we require that no asynchronous 938 // operations could possibly be pending. 939 DCHECK(!posted_asynchronous_operation_); 940 #endif 941 942 if (fseek(file, offset, SEEK_SET) != 0) 943 return false; 944 945 size_t num_read = fread(data, 1, data_size, file); 946 return num_read == data_size; 947 } 948 949 // VisitedLinkTableBuilder ---------------------------------------------------- 950 951 VisitedLinkMaster::TableBuilder::TableBuilder( 952 VisitedLinkMaster* master, 953 const uint8 salt[LINK_SALT_LENGTH]) 954 : master_(master), 955 success_(true) { 956 fingerprints_.reserve(4096); 957 memcpy(salt_, salt, LINK_SALT_LENGTH * sizeof(uint8)); 958 } 959 960 // TODO(brettw): Do we want to try to cancel the request if this happens? It 961 // could delay shutdown if there are a lot of URLs. 962 void VisitedLinkMaster::TableBuilder::DisownMaster() { 963 master_ = NULL; 964 } 965 966 void VisitedLinkMaster::TableBuilder::OnURL(const GURL& url) { 967 if (!url.is_empty()) { 968 fingerprints_.push_back(VisitedLinkMaster::ComputeURLFingerprint( 969 url.spec().data(), url.spec().length(), salt_)); 970 } 971 } 972 973 void VisitedLinkMaster::TableBuilder::OnComplete(bool success) { 974 success_ = success; 975 DLOG_IF(WARNING, !success) << "Unable to rebuild visited links"; 976 977 // Marshal to the main thread to notify the VisitedLinkMaster that the 978 // rebuild is complete. 979 BrowserThread::PostTask( 980 BrowserThread::UI, FROM_HERE, 981 base::Bind(&TableBuilder::OnCompleteMainThread, this)); 982 } 983 984 void VisitedLinkMaster::TableBuilder::OnCompleteMainThread() { 985 if (master_) 986 master_->OnTableRebuildComplete(success_, fingerprints_); 987 } 988 989 } // namespace visitedlink 990