1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "components/visitedlink/browser/visitedlink_master.h" 6 7 #if defined(OS_WIN) 8 #include <windows.h> 9 #include <io.h> 10 #include <shlobj.h> 11 #endif // defined(OS_WIN) 12 #include <stdio.h> 13 14 #include <algorithm> 15 16 #include "base/bind.h" 17 #include "base/bind_helpers.h" 18 #include "base/containers/stack_container.h" 19 #include "base/file_util.h" 20 #include "base/logging.h" 21 #include "base/message_loop/message_loop.h" 22 #include "base/path_service.h" 23 #include "base/rand_util.h" 24 #include "base/strings/string_util.h" 25 #include "base/threading/thread_restrictions.h" 26 #include "components/visitedlink/browser/visitedlink_delegate.h" 27 #include "components/visitedlink/browser/visitedlink_event_listener.h" 28 #include "content/public/browser/browser_context.h" 29 #include "content/public/browser/browser_thread.h" 30 #include "url/gurl.h" 31 32 using content::BrowserThread; 33 using file_util::ScopedFILE; 34 35 namespace visitedlink { 36 37 const int32 VisitedLinkMaster::kFileHeaderSignatureOffset = 0; 38 const int32 VisitedLinkMaster::kFileHeaderVersionOffset = 4; 39 const int32 VisitedLinkMaster::kFileHeaderLengthOffset = 8; 40 const int32 VisitedLinkMaster::kFileHeaderUsedOffset = 12; 41 const int32 VisitedLinkMaster::kFileHeaderSaltOffset = 16; 42 43 const int32 VisitedLinkMaster::kFileCurrentVersion = 3; 44 45 // the signature at the beginning of the URL table = "VLnk" (visited links) 46 const int32 VisitedLinkMaster::kFileSignature = 0x6b6e4c56; 47 const size_t VisitedLinkMaster::kFileHeaderSize = 48 kFileHeaderSaltOffset + LINK_SALT_LENGTH; 49 50 // This value should also be the same as the smallest size in the lookup 51 // table in NewTableSizeForCount (prime number). 52 const unsigned VisitedLinkMaster::kDefaultTableSize = 16381; 53 54 const size_t VisitedLinkMaster::kBigDeleteThreshold = 64; 55 56 namespace { 57 58 // Fills the given salt structure with some quasi-random values 59 // It is not necessary to generate a cryptographically strong random string, 60 // only that it be reasonably different for different users. 61 void GenerateSalt(uint8 salt[LINK_SALT_LENGTH]) { 62 DCHECK_EQ(LINK_SALT_LENGTH, 8) << "This code assumes the length of the salt"; 63 uint64 randval = base::RandUint64(); 64 memcpy(salt, &randval, 8); 65 } 66 67 // Opens file on a background thread to not block UI thread. 68 void AsyncOpen(FILE** file, const base::FilePath& filename) { 69 *file = base::OpenFile(filename, "wb+"); 70 DLOG_IF(ERROR, !(*file)) << "Failed to open file " << filename.value(); 71 } 72 73 // Returns true if the write was complete. 74 static bool WriteToFile(FILE* file, 75 off_t offset, 76 const void* data, 77 size_t data_len) { 78 if (fseek(file, offset, SEEK_SET) != 0) 79 return false; // Don't write to an invalid part of the file. 80 81 size_t num_written = fwrite(data, 1, data_len, file); 82 83 // The write may not make it to the kernel (stdlib may buffer the write) 84 // until the next fseek/fclose call. If we crash, it's easy for our used 85 // item count to be out of sync with the number of hashes we write. 86 // Protect against this by calling fflush. 87 int ret = fflush(file); 88 DCHECK_EQ(0, ret); 89 return num_written == data_len; 90 } 91 92 // This task executes on a background thread and executes a write. This 93 // prevents us from blocking the UI thread doing I/O. Double pointer to FILE 94 // is used because file may still not be opened by the time of scheduling 95 // the task for execution. 96 void AsyncWrite(FILE** file, int32 offset, const std::string& data) { 97 if (*file) 98 WriteToFile(*file, offset, data.data(), data.size()); 99 } 100 101 // Truncates the file to the current position asynchronously on a background 102 // thread. Double pointer to FILE is used because file may still not be opened 103 // by the time of scheduling the task for execution. 104 void AsyncTruncate(FILE** file) { 105 if (*file) 106 base::IgnoreResult(base::TruncateFile(*file)); 107 } 108 109 // Closes the file on a background thread and releases memory used for storage 110 // of FILE* value. Double pointer to FILE is used because file may still not 111 // be opened by the time of scheduling the task for execution. 112 void AsyncClose(FILE** file) { 113 if (*file) 114 base::IgnoreResult(fclose(*file)); 115 free(file); 116 } 117 118 } // namespace 119 120 // TableBuilder --------------------------------------------------------------- 121 122 // How rebuilding from history works 123 // --------------------------------- 124 // 125 // We mark that we're rebuilding from history by setting the table_builder_ 126 // member in VisitedLinkMaster to the TableBuilder we create. This builder 127 // will be called on the history thread by the history system for every URL 128 // in the database. 129 // 130 // The builder will store the fingerprints for those URLs, and then marshalls 131 // back to the main thread where the VisitedLinkMaster will be notified. The 132 // master then replaces its table with a new table containing the computed 133 // fingerprints. 134 // 135 // The builder must remain active while the history system is using it. 136 // Sometimes, the master will be deleted before the rebuild is complete, in 137 // which case it notifies the builder via DisownMaster(). The builder will 138 // delete itself once rebuilding is complete, and not execute any callback. 139 class VisitedLinkMaster::TableBuilder 140 : public VisitedLinkDelegate::URLEnumerator { 141 public: 142 TableBuilder(VisitedLinkMaster* master, 143 const uint8 salt[LINK_SALT_LENGTH]); 144 145 // Called on the main thread when the master is being destroyed. This will 146 // prevent a crash when the query completes and the master is no longer 147 // around. We can not actually do anything but mark this fact, since the 148 // table will be being rebuilt simultaneously on the other thread. 149 void DisownMaster(); 150 151 // VisitedLinkDelegate::URLEnumerator 152 virtual void OnURL(const GURL& url) OVERRIDE; 153 virtual void OnComplete(bool succeed) OVERRIDE; 154 155 private: 156 virtual ~TableBuilder() {} 157 158 // OnComplete mashals to this function on the main thread to do the 159 // notification. 160 void OnCompleteMainThread(); 161 162 // Owner of this object. MAY ONLY BE ACCESSED ON THE MAIN THREAD! 163 VisitedLinkMaster* master_; 164 165 // Indicates whether the operation has failed or not. 166 bool success_; 167 168 // Salt for this new table. 169 uint8 salt_[LINK_SALT_LENGTH]; 170 171 // Stores the fingerprints we computed on the background thread. 172 VisitedLinkCommon::Fingerprints fingerprints_; 173 174 DISALLOW_COPY_AND_ASSIGN(TableBuilder); 175 }; 176 177 // VisitedLinkMaster ---------------------------------------------------------- 178 179 VisitedLinkMaster::VisitedLinkMaster(content::BrowserContext* browser_context, 180 VisitedLinkDelegate* delegate, 181 bool persist_to_disk) 182 : browser_context_(browser_context), 183 delegate_(delegate), 184 listener_(new VisitedLinkEventListener(this, browser_context)), 185 persist_to_disk_(persist_to_disk) { 186 InitMembers(); 187 } 188 189 VisitedLinkMaster::VisitedLinkMaster(Listener* listener, 190 VisitedLinkDelegate* delegate, 191 bool persist_to_disk, 192 bool suppress_rebuild, 193 const base::FilePath& filename, 194 int32 default_table_size) 195 : browser_context_(NULL), 196 delegate_(delegate), 197 persist_to_disk_(persist_to_disk) { 198 listener_.reset(listener); 199 DCHECK(listener_.get()); 200 InitMembers(); 201 202 database_name_override_ = filename; 203 table_size_override_ = default_table_size; 204 suppress_rebuild_ = suppress_rebuild; 205 } 206 207 VisitedLinkMaster::~VisitedLinkMaster() { 208 if (table_builder_.get()) { 209 // Prevent the table builder from calling us back now that we're being 210 // destroyed. Note that we DON'T delete the object, since the history 211 // system is still writing into it. When that is complete, the table 212 // builder will destroy itself when it finds we are gone. 213 table_builder_->DisownMaster(); 214 } 215 FreeURLTable(); 216 // FreeURLTable() will schedule closing of the file and deletion of |file_|. 217 // So nothing should be done here. 218 } 219 220 void VisitedLinkMaster::InitMembers() { 221 file_ = NULL; 222 shared_memory_ = NULL; 223 shared_memory_serial_ = 0; 224 used_items_ = 0; 225 table_size_override_ = 0; 226 suppress_rebuild_ = false; 227 sequence_token_ = BrowserThread::GetBlockingPool()->GetSequenceToken(); 228 229 #ifndef NDEBUG 230 posted_asynchronous_operation_ = false; 231 #endif 232 } 233 234 bool VisitedLinkMaster::Init() { 235 // We probably shouldn't be loading this from the UI thread, 236 // but it does need to happen early on in startup. 237 // http://code.google.com/p/chromium/issues/detail?id=24163 238 base::ThreadRestrictions::ScopedAllowIO allow_io; 239 240 if (persist_to_disk_) { 241 if (InitFromFile()) 242 return true; 243 } 244 return InitFromScratch(suppress_rebuild_); 245 } 246 247 VisitedLinkMaster::Hash VisitedLinkMaster::TryToAddURL(const GURL& url) { 248 // Extra check that we are not incognito. This should not happen. 249 // TODO(boliu): Move this check to HistoryService when IsOffTheRecord is 250 // removed from BrowserContext. 251 if (browser_context_ && browser_context_->IsOffTheRecord()) { 252 NOTREACHED(); 253 return null_hash_; 254 } 255 256 if (!url.is_valid()) 257 return null_hash_; // Don't add invalid URLs. 258 259 Fingerprint fingerprint = ComputeURLFingerprint(url.spec().data(), 260 url.spec().size(), 261 salt_); 262 if (table_builder_.get()) { 263 // If we have a pending delete for this fingerprint, cancel it. 264 std::set<Fingerprint>::iterator found = 265 deleted_since_rebuild_.find(fingerprint); 266 if (found != deleted_since_rebuild_.end()) 267 deleted_since_rebuild_.erase(found); 268 269 // A rebuild is in progress, save this addition in the temporary list so 270 // it can be added once rebuild is complete. 271 added_since_rebuild_.insert(fingerprint); 272 } 273 274 // If the table is "full", we don't add URLs and just drop them on the floor. 275 // This can happen if we get thousands of new URLs and something causes 276 // the table resizing to fail. This check prevents a hang in that case. Note 277 // that this is *not* the resize limit, this is just a sanity check. 278 if (used_items_ / 8 > table_length_ / 10) 279 return null_hash_; // Table is more than 80% full. 280 281 return AddFingerprint(fingerprint, true); 282 } 283 284 void VisitedLinkMaster::PostIOTask(const tracked_objects::Location& from_here, 285 const base::Closure& task) { 286 DCHECK(persist_to_disk_); 287 BrowserThread::GetBlockingPool()->PostSequencedWorkerTask(sequence_token_, 288 from_here, task); 289 } 290 291 void VisitedLinkMaster::AddURL(const GURL& url) { 292 Hash index = TryToAddURL(url); 293 if (!table_builder_.get() && index != null_hash_) { 294 // Not rebuilding, so we want to keep the file on disk up-to-date. 295 if (persist_to_disk_) { 296 WriteUsedItemCountToFile(); 297 WriteHashRangeToFile(index, index); 298 } 299 ResizeTableIfNecessary(); 300 } 301 } 302 303 void VisitedLinkMaster::AddURLs(const std::vector<GURL>& url) { 304 for (std::vector<GURL>::const_iterator i = url.begin(); 305 i != url.end(); ++i) { 306 Hash index = TryToAddURL(*i); 307 if (!table_builder_.get() && index != null_hash_) 308 ResizeTableIfNecessary(); 309 } 310 311 // Keeps the file on disk up-to-date. 312 if (!table_builder_.get() && persist_to_disk_) 313 WriteFullTable(); 314 } 315 316 void VisitedLinkMaster::DeleteAllURLs() { 317 // Any pending modifications are invalid. 318 added_since_rebuild_.clear(); 319 deleted_since_rebuild_.clear(); 320 321 // Clear the hash table. 322 used_items_ = 0; 323 memset(hash_table_, 0, this->table_length_ * sizeof(Fingerprint)); 324 325 // Resize it if it is now too empty. Resize may write the new table out for 326 // us, otherwise, schedule writing the new table to disk ourselves. 327 if (!ResizeTableIfNecessary() && persist_to_disk_) 328 WriteFullTable(); 329 330 listener_->Reset(); 331 } 332 333 VisitedLinkDelegate* VisitedLinkMaster::GetDelegate() { 334 return delegate_; 335 } 336 337 void VisitedLinkMaster::DeleteURLs(URLIterator* urls) { 338 if (!urls->HasNextURL()) 339 return; 340 341 listener_->Reset(); 342 343 if (table_builder_.get()) { 344 // A rebuild is in progress, save this deletion in the temporary list so 345 // it can be added once rebuild is complete. 346 while (urls->HasNextURL()) { 347 const GURL& url(urls->NextURL()); 348 if (!url.is_valid()) 349 continue; 350 351 Fingerprint fingerprint = 352 ComputeURLFingerprint(url.spec().data(), url.spec().size(), salt_); 353 deleted_since_rebuild_.insert(fingerprint); 354 355 // If the URL was just added and now we're deleting it, it may be in the 356 // list of things added since the last rebuild. Delete it from that list. 357 std::set<Fingerprint>::iterator found = 358 added_since_rebuild_.find(fingerprint); 359 if (found != added_since_rebuild_.end()) 360 added_since_rebuild_.erase(found); 361 362 // Delete the URLs from the in-memory table, but don't bother writing 363 // to disk since it will be replaced soon. 364 DeleteFingerprint(fingerprint, false); 365 } 366 return; 367 } 368 369 // Compute the deleted URLs' fingerprints and delete them 370 std::set<Fingerprint> deleted_fingerprints; 371 while (urls->HasNextURL()) { 372 const GURL& url(urls->NextURL()); 373 if (!url.is_valid()) 374 continue; 375 deleted_fingerprints.insert( 376 ComputeURLFingerprint(url.spec().data(), url.spec().size(), salt_)); 377 } 378 DeleteFingerprintsFromCurrentTable(deleted_fingerprints); 379 } 380 381 // See VisitedLinkCommon::IsVisited which should be in sync with this algorithm 382 VisitedLinkMaster::Hash VisitedLinkMaster::AddFingerprint( 383 Fingerprint fingerprint, 384 bool send_notifications) { 385 if (!hash_table_ || table_length_ == 0) { 386 NOTREACHED(); // Not initialized. 387 return null_hash_; 388 } 389 390 Hash cur_hash = HashFingerprint(fingerprint); 391 Hash first_hash = cur_hash; 392 while (true) { 393 Fingerprint cur_fingerprint = FingerprintAt(cur_hash); 394 if (cur_fingerprint == fingerprint) 395 return null_hash_; // This fingerprint is already in there, do nothing. 396 397 if (cur_fingerprint == null_fingerprint_) { 398 // End of probe sequence found, insert here. 399 hash_table_[cur_hash] = fingerprint; 400 used_items_++; 401 // If allowed, notify listener that a new visited link was added. 402 if (send_notifications) 403 listener_->Add(fingerprint); 404 return cur_hash; 405 } 406 407 // Advance in the probe sequence. 408 cur_hash = IncrementHash(cur_hash); 409 if (cur_hash == first_hash) { 410 // This means that we've wrapped around and are about to go into an 411 // infinite loop. Something was wrong with the hashtable resizing 412 // logic, so stop here. 413 NOTREACHED(); 414 return null_hash_; 415 } 416 } 417 } 418 419 void VisitedLinkMaster::DeleteFingerprintsFromCurrentTable( 420 const std::set<Fingerprint>& fingerprints) { 421 bool bulk_write = (fingerprints.size() > kBigDeleteThreshold); 422 423 // Delete the URLs from the table. 424 for (std::set<Fingerprint>::const_iterator i = fingerprints.begin(); 425 i != fingerprints.end(); ++i) 426 DeleteFingerprint(*i, !bulk_write); 427 428 // These deleted fingerprints may make us shrink the table. 429 if (ResizeTableIfNecessary()) 430 return; // The resize function wrote the new table to disk for us. 431 432 // Nobody wrote this out for us, write the full file to disk. 433 if (bulk_write && persist_to_disk_) 434 WriteFullTable(); 435 } 436 437 bool VisitedLinkMaster::DeleteFingerprint(Fingerprint fingerprint, 438 bool update_file) { 439 if (!hash_table_ || table_length_ == 0) { 440 NOTREACHED(); // Not initialized. 441 return false; 442 } 443 if (!IsVisited(fingerprint)) 444 return false; // Not in the database to delete. 445 446 // First update the header used count. 447 used_items_--; 448 if (update_file && persist_to_disk_) 449 WriteUsedItemCountToFile(); 450 451 Hash deleted_hash = HashFingerprint(fingerprint); 452 453 // Find the range of "stuff" in the hash table that is adjacent to this 454 // fingerprint. These are things that could be affected by the change in 455 // the hash table. Since we use linear probing, anything after the deleted 456 // item up until an empty item could be affected. 457 Hash end_range = deleted_hash; 458 while (true) { 459 Hash next_hash = IncrementHash(end_range); 460 if (next_hash == deleted_hash) 461 break; // We wrapped around and the whole table is full. 462 if (!hash_table_[next_hash]) 463 break; // Found the last spot. 464 end_range = next_hash; 465 } 466 467 // We could get all fancy and move the affected fingerprints around, but 468 // instead we just remove them all and re-add them (minus our deleted one). 469 // This will mean there's a small window of time where the affected links 470 // won't be marked visited. 471 base::StackVector<Fingerprint, 32> shuffled_fingerprints; 472 Hash stop_loop = IncrementHash(end_range); // The end range is inclusive. 473 for (Hash i = deleted_hash; i != stop_loop; i = IncrementHash(i)) { 474 if (hash_table_[i] != fingerprint) { 475 // Don't save the one we're deleting! 476 shuffled_fingerprints->push_back(hash_table_[i]); 477 478 // This will balance the increment of this value in AddFingerprint below 479 // so there is no net change. 480 used_items_--; 481 } 482 hash_table_[i] = null_fingerprint_; 483 } 484 485 if (!shuffled_fingerprints->empty()) { 486 // Need to add the new items back. 487 for (size_t i = 0; i < shuffled_fingerprints->size(); i++) 488 AddFingerprint(shuffled_fingerprints[i], false); 489 } 490 491 // Write the affected range to disk [deleted_hash, end_range]. 492 if (update_file && persist_to_disk_) 493 WriteHashRangeToFile(deleted_hash, end_range); 494 495 return true; 496 } 497 498 void VisitedLinkMaster::WriteFullTable() { 499 // This function can get called when the file is open, for example, when we 500 // resize the table. We must handle this case and not try to reopen the file, 501 // since there may be write operations pending on the file I/O thread. 502 // 503 // Note that once we start writing, we do not delete on error. This means 504 // there can be a partial file, but the short file will be detected next time 505 // we start, and will be replaced. 506 // 507 // This might possibly get corrupted if we crash in the middle of writing. 508 // We should pick up the most common types of these failures when we notice 509 // that the file size is different when we load it back in, and then we will 510 // regenerate the table. 511 DCHECK(persist_to_disk_); 512 513 if (!file_) { 514 file_ = static_cast<FILE**>(calloc(1, sizeof(*file_))); 515 base::FilePath filename; 516 GetDatabaseFileName(&filename); 517 PostIOTask(FROM_HERE, base::Bind(&AsyncOpen, file_, filename)); 518 } 519 520 // Write the new header. 521 int32 header[4]; 522 header[0] = kFileSignature; 523 header[1] = kFileCurrentVersion; 524 header[2] = table_length_; 525 header[3] = used_items_; 526 WriteToFile(file_, 0, header, sizeof(header)); 527 WriteToFile(file_, sizeof(header), salt_, LINK_SALT_LENGTH); 528 529 // Write the hash data. 530 WriteToFile(file_, kFileHeaderSize, 531 hash_table_, table_length_ * sizeof(Fingerprint)); 532 533 // The hash table may have shrunk, so make sure this is the end. 534 PostIOTask(FROM_HERE, base::Bind(&AsyncTruncate, file_)); 535 } 536 537 bool VisitedLinkMaster::InitFromFile() { 538 DCHECK(file_ == NULL); 539 DCHECK(persist_to_disk_); 540 541 base::FilePath filename; 542 GetDatabaseFileName(&filename); 543 ScopedFILE file_closer(base::OpenFile(filename, "rb+")); 544 if (!file_closer.get()) 545 return false; 546 547 int32 num_entries, used_count; 548 if (!ReadFileHeader(file_closer.get(), &num_entries, &used_count, salt_)) 549 return false; // Header isn't valid. 550 551 // Allocate and read the table. 552 if (!CreateURLTable(num_entries, false)) 553 return false; 554 if (!ReadFromFile(file_closer.get(), kFileHeaderSize, 555 hash_table_, num_entries * sizeof(Fingerprint))) { 556 FreeURLTable(); 557 return false; 558 } 559 used_items_ = used_count; 560 561 #ifndef NDEBUG 562 DebugValidate(); 563 #endif 564 565 file_ = static_cast<FILE**>(malloc(sizeof(*file_))); 566 *file_ = file_closer.release(); 567 return true; 568 } 569 570 bool VisitedLinkMaster::InitFromScratch(bool suppress_rebuild) { 571 int32 table_size = kDefaultTableSize; 572 if (table_size_override_) 573 table_size = table_size_override_; 574 575 // The salt must be generated before the table so that it can be copied to 576 // the shared memory. 577 GenerateSalt(salt_); 578 if (!CreateURLTable(table_size, true)) 579 return false; 580 581 #ifndef NDEBUG 582 DebugValidate(); 583 #endif 584 585 if (suppress_rebuild && persist_to_disk_) { 586 // When we disallow rebuilds (normally just unit tests), just use the 587 // current empty table. 588 WriteFullTable(); 589 return true; 590 } 591 592 // This will build the table from history. On the first run, history will 593 // be empty, so this will be correct. This will also write the new table 594 // to disk. We don't want to save explicitly here, since the rebuild may 595 // not complete, leaving us with an empty but valid visited link database. 596 // In the future, we won't know we need to try rebuilding again. 597 return RebuildTableFromDelegate(); 598 } 599 600 bool VisitedLinkMaster::ReadFileHeader(FILE* file, 601 int32* num_entries, 602 int32* used_count, 603 uint8 salt[LINK_SALT_LENGTH]) { 604 DCHECK(persist_to_disk_); 605 606 // Get file size. 607 // Note that there is no need to seek back to the original location in the 608 // file since ReadFromFile() [which is the next call accessing the file] 609 // seeks before reading. 610 if (fseek(file, 0, SEEK_END) == -1) 611 return false; 612 size_t file_size = ftell(file); 613 614 if (file_size <= kFileHeaderSize) 615 return false; 616 617 uint8 header[kFileHeaderSize]; 618 if (!ReadFromFile(file, 0, &header, kFileHeaderSize)) 619 return false; 620 621 // Verify the signature. 622 int32 signature; 623 memcpy(&signature, &header[kFileHeaderSignatureOffset], sizeof(signature)); 624 if (signature != kFileSignature) 625 return false; 626 627 // Verify the version is up-to-date. As with other read errors, a version 628 // mistmatch will trigger a rebuild of the database from history, which will 629 // have the effect of migrating the database. 630 int32 version; 631 memcpy(&version, &header[kFileHeaderVersionOffset], sizeof(version)); 632 if (version != kFileCurrentVersion) 633 return false; // Bad version. 634 635 // Read the table size and make sure it matches the file size. 636 memcpy(num_entries, &header[kFileHeaderLengthOffset], sizeof(*num_entries)); 637 if (*num_entries * sizeof(Fingerprint) + kFileHeaderSize != file_size) 638 return false; // Bad size. 639 640 // Read the used item count. 641 memcpy(used_count, &header[kFileHeaderUsedOffset], sizeof(*used_count)); 642 if (*used_count > *num_entries) 643 return false; // Bad used item count; 644 645 // Read the salt. 646 memcpy(salt, &header[kFileHeaderSaltOffset], LINK_SALT_LENGTH); 647 648 // This file looks OK from the header's perspective. 649 return true; 650 } 651 652 bool VisitedLinkMaster::GetDatabaseFileName(base::FilePath* filename) { 653 if (!database_name_override_.empty()) { 654 // use this filename, the directory must exist 655 *filename = database_name_override_; 656 return true; 657 } 658 659 if (!browser_context_ || browser_context_->GetPath().empty()) 660 return false; 661 662 base::FilePath profile_dir = browser_context_->GetPath(); 663 *filename = profile_dir.Append(FILE_PATH_LITERAL("Visited Links")); 664 return true; 665 } 666 667 // Initializes the shared memory structure. The salt should already be filled 668 // in so that it can be written to the shared memory 669 bool VisitedLinkMaster::CreateURLTable(int32 num_entries, bool init_to_empty) { 670 // The table is the size of the table followed by the entries. 671 uint32 alloc_size = num_entries * sizeof(Fingerprint) + sizeof(SharedHeader); 672 673 // Create the shared memory object. 674 shared_memory_ = new base::SharedMemory(); 675 if (!shared_memory_) 676 return false; 677 678 if (!shared_memory_->CreateAndMapAnonymous(alloc_size)) { 679 delete shared_memory_; 680 shared_memory_ = NULL; 681 return false; 682 } 683 684 if (init_to_empty) { 685 memset(shared_memory_->memory(), 0, alloc_size); 686 used_items_ = 0; 687 } 688 table_length_ = num_entries; 689 690 // Save the header for other processes to read. 691 SharedHeader* header = static_cast<SharedHeader*>(shared_memory_->memory()); 692 header->length = table_length_; 693 memcpy(header->salt, salt_, LINK_SALT_LENGTH); 694 695 // Our table pointer is just the data immediately following the size. 696 hash_table_ = reinterpret_cast<Fingerprint*>( 697 static_cast<char*>(shared_memory_->memory()) + sizeof(SharedHeader)); 698 699 return true; 700 } 701 702 bool VisitedLinkMaster::BeginReplaceURLTable(int32 num_entries) { 703 base::SharedMemory *old_shared_memory = shared_memory_; 704 Fingerprint* old_hash_table = hash_table_; 705 int32 old_table_length = table_length_; 706 if (!CreateURLTable(num_entries, true)) { 707 // Try to put back the old state. 708 shared_memory_ = old_shared_memory; 709 hash_table_ = old_hash_table; 710 table_length_ = old_table_length; 711 return false; 712 } 713 714 #ifndef NDEBUG 715 DebugValidate(); 716 #endif 717 718 return true; 719 } 720 721 void VisitedLinkMaster::FreeURLTable() { 722 if (shared_memory_) { 723 delete shared_memory_; 724 shared_memory_ = NULL; 725 } 726 if (!persist_to_disk_ || !file_) 727 return; 728 PostIOTask(FROM_HERE, base::Bind(&AsyncClose, file_)); 729 // AsyncClose() will close the file and free the memory pointed by |file_|. 730 file_ = NULL; 731 } 732 733 bool VisitedLinkMaster::ResizeTableIfNecessary() { 734 DCHECK(table_length_ > 0) << "Must have a table"; 735 736 // Load limits for good performance/space. We are pretty conservative about 737 // keeping the table not very full. This is because we use linear probing 738 // which increases the likelihood of clumps of entries which will reduce 739 // performance. 740 const float max_table_load = 0.5f; // Grow when we're > this full. 741 const float min_table_load = 0.2f; // Shrink when we're < this full. 742 743 float load = ComputeTableLoad(); 744 if (load < max_table_load && 745 (table_length_ <= static_cast<float>(kDefaultTableSize) || 746 load > min_table_load)) 747 return false; 748 749 // Table needs to grow or shrink. 750 int new_size = NewTableSizeForCount(used_items_); 751 DCHECK(new_size > used_items_); 752 DCHECK(load <= min_table_load || new_size > table_length_); 753 ResizeTable(new_size); 754 return true; 755 } 756 757 void VisitedLinkMaster::ResizeTable(int32 new_size) { 758 DCHECK(shared_memory_ && shared_memory_->memory() && hash_table_); 759 shared_memory_serial_++; 760 761 #ifndef NDEBUG 762 DebugValidate(); 763 #endif 764 765 base::SharedMemory* old_shared_memory = shared_memory_; 766 Fingerprint* old_hash_table = hash_table_; 767 int32 old_table_length = table_length_; 768 if (!BeginReplaceURLTable(new_size)) 769 return; 770 771 // Now we have two tables, our local copy which is the old one, and the new 772 // one loaded into this object where we need to copy the data. 773 for (int32 i = 0; i < old_table_length; i++) { 774 Fingerprint cur = old_hash_table[i]; 775 if (cur) 776 AddFingerprint(cur, false); 777 } 778 779 // On error unmapping, just forget about it since we can't do anything 780 // else to release it. 781 delete old_shared_memory; 782 783 // Send an update notification to all child processes so they read the new 784 // table. 785 listener_->NewTable(shared_memory_); 786 787 #ifndef NDEBUG 788 DebugValidate(); 789 #endif 790 791 // The new table needs to be written to disk. 792 if (persist_to_disk_) 793 WriteFullTable(); 794 } 795 796 uint32 VisitedLinkMaster::NewTableSizeForCount(int32 item_count) const { 797 // These table sizes are selected to be the maximum prime number less than 798 // a "convenient" multiple of 1K. 799 static const int table_sizes[] = { 800 16381, // 16K = 16384 <- don't shrink below this table size 801 // (should be == default_table_size) 802 32767, // 32K = 32768 803 65521, // 64K = 65536 804 130051, // 128K = 131072 805 262127, // 256K = 262144 806 524269, // 512K = 524288 807 1048549, // 1M = 1048576 808 2097143, // 2M = 2097152 809 4194301, // 4M = 4194304 810 8388571, // 8M = 8388608 811 16777199, // 16M = 16777216 812 33554347}; // 32M = 33554432 813 814 // Try to leave the table 33% full. 815 int desired = item_count * 3; 816 817 // Find the closest prime. 818 for (size_t i = 0; i < arraysize(table_sizes); i ++) { 819 if (table_sizes[i] > desired) 820 return table_sizes[i]; 821 } 822 823 // Growing very big, just approximate a "good" number, not growing as much 824 // as normal. 825 return item_count * 2 - 1; 826 } 827 828 // See the TableBuilder definition in the header file for how this works. 829 bool VisitedLinkMaster::RebuildTableFromDelegate() { 830 DCHECK(!table_builder_.get()); 831 832 // TODO(brettw) make sure we have reasonable salt! 833 table_builder_ = new TableBuilder(this, salt_); 834 delegate_->RebuildTable(table_builder_); 835 return true; 836 } 837 838 // See the TableBuilder declaration above for how this works. 839 void VisitedLinkMaster::OnTableRebuildComplete( 840 bool success, 841 const std::vector<Fingerprint>& fingerprints) { 842 if (success) { 843 // Replace the old table with a new blank one. 844 shared_memory_serial_++; 845 846 // We are responsible for freeing it AFTER it has been replaced if 847 // replacement succeeds. 848 base::SharedMemory* old_shared_memory = shared_memory_; 849 850 int new_table_size = NewTableSizeForCount( 851 static_cast<int>(fingerprints.size() + added_since_rebuild_.size())); 852 if (BeginReplaceURLTable(new_table_size)) { 853 // Free the old table. 854 delete old_shared_memory; 855 856 // Add the stored fingerprints to the hash table. 857 for (size_t i = 0; i < fingerprints.size(); i++) 858 AddFingerprint(fingerprints[i], false); 859 860 // Also add anything that was added while we were asynchronously 861 // generating the new table. 862 for (std::set<Fingerprint>::iterator i = added_since_rebuild_.begin(); 863 i != added_since_rebuild_.end(); ++i) 864 AddFingerprint(*i, false); 865 added_since_rebuild_.clear(); 866 867 // Now handle deletions. 868 DeleteFingerprintsFromCurrentTable(deleted_since_rebuild_); 869 deleted_since_rebuild_.clear(); 870 871 // Send an update notification to all child processes. 872 listener_->NewTable(shared_memory_); 873 874 if (persist_to_disk_) 875 WriteFullTable(); 876 } 877 } 878 table_builder_ = NULL; // Will release our reference to the builder. 879 880 // Notify the unit test that the rebuild is complete (will be NULL in prod.) 881 if (!rebuild_complete_task_.is_null()) { 882 rebuild_complete_task_.Run(); 883 rebuild_complete_task_.Reset(); 884 } 885 } 886 887 void VisitedLinkMaster::WriteToFile(FILE** file, 888 off_t offset, 889 void* data, 890 int32 data_size) { 891 DCHECK(persist_to_disk_); 892 #ifndef NDEBUG 893 posted_asynchronous_operation_ = true; 894 #endif 895 PostIOTask(FROM_HERE, 896 base::Bind(&AsyncWrite, file, offset, 897 std::string(static_cast<const char*>(data), data_size))); 898 } 899 900 void VisitedLinkMaster::WriteUsedItemCountToFile() { 901 DCHECK(persist_to_disk_); 902 if (!file_) 903 return; // See comment on the file_ variable for why this might happen. 904 WriteToFile(file_, kFileHeaderUsedOffset, &used_items_, sizeof(used_items_)); 905 } 906 907 void VisitedLinkMaster::WriteHashRangeToFile(Hash first_hash, Hash last_hash) { 908 DCHECK(persist_to_disk_); 909 910 if (!file_) 911 return; // See comment on the file_ variable for why this might happen. 912 if (last_hash < first_hash) { 913 // Handle wraparound at 0. This first write is first_hash->EOF 914 WriteToFile(file_, first_hash * sizeof(Fingerprint) + kFileHeaderSize, 915 &hash_table_[first_hash], 916 (table_length_ - first_hash + 1) * sizeof(Fingerprint)); 917 918 // Now do 0->last_lash. 919 WriteToFile(file_, kFileHeaderSize, hash_table_, 920 (last_hash + 1) * sizeof(Fingerprint)); 921 } else { 922 // Normal case, just write the range. 923 WriteToFile(file_, first_hash * sizeof(Fingerprint) + kFileHeaderSize, 924 &hash_table_[first_hash], 925 (last_hash - first_hash + 1) * sizeof(Fingerprint)); 926 } 927 } 928 929 bool VisitedLinkMaster::ReadFromFile(FILE* file, 930 off_t offset, 931 void* data, 932 size_t data_size) { 933 DCHECK(persist_to_disk_); 934 #ifndef NDEBUG 935 // Since this function is synchronous, we require that no asynchronous 936 // operations could possibly be pending. 937 DCHECK(!posted_asynchronous_operation_); 938 #endif 939 940 if (fseek(file, offset, SEEK_SET) != 0) 941 return false; 942 943 size_t num_read = fread(data, 1, data_size, file); 944 return num_read == data_size; 945 } 946 947 // VisitedLinkTableBuilder ---------------------------------------------------- 948 949 VisitedLinkMaster::TableBuilder::TableBuilder( 950 VisitedLinkMaster* master, 951 const uint8 salt[LINK_SALT_LENGTH]) 952 : master_(master), 953 success_(true) { 954 fingerprints_.reserve(4096); 955 memcpy(salt_, salt, LINK_SALT_LENGTH * sizeof(uint8)); 956 } 957 958 // TODO(brettw): Do we want to try to cancel the request if this happens? It 959 // could delay shutdown if there are a lot of URLs. 960 void VisitedLinkMaster::TableBuilder::DisownMaster() { 961 master_ = NULL; 962 } 963 964 void VisitedLinkMaster::TableBuilder::OnURL(const GURL& url) { 965 if (!url.is_empty()) { 966 fingerprints_.push_back(VisitedLinkMaster::ComputeURLFingerprint( 967 url.spec().data(), url.spec().length(), salt_)); 968 } 969 } 970 971 void VisitedLinkMaster::TableBuilder::OnComplete(bool success) { 972 success_ = success; 973 DLOG_IF(WARNING, !success) << "Unable to rebuild visited links"; 974 975 // Marshal to the main thread to notify the VisitedLinkMaster that the 976 // rebuild is complete. 977 BrowserThread::PostTask( 978 BrowserThread::UI, FROM_HERE, 979 base::Bind(&TableBuilder::OnCompleteMainThread, this)); 980 } 981 982 void VisitedLinkMaster::TableBuilder::OnCompleteMainThread() { 983 if (master_) 984 master_->OnTableRebuildComplete(success_, fingerprints_); 985 } 986 987 } // namespace visitedlink 988