1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "components/visitedlink/browser/visitedlink_master.h" 6 7 #if defined(OS_WIN) 8 #include <windows.h> 9 #include <io.h> 10 #include <shlobj.h> 11 #endif // defined(OS_WIN) 12 #include <stdio.h> 13 14 #include <algorithm> 15 16 #include "base/bind.h" 17 #include "base/bind_helpers.h" 18 #include "base/containers/stack_container.h" 19 #include "base/files/file_util.h" 20 #include "base/files/scoped_file.h" 21 #include "base/logging.h" 22 #include "base/message_loop/message_loop.h" 23 #include "base/path_service.h" 24 #include "base/rand_util.h" 25 #include "base/strings/string_util.h" 26 #include "base/threading/thread_restrictions.h" 27 #include "components/visitedlink/browser/visitedlink_delegate.h" 28 #include "components/visitedlink/browser/visitedlink_event_listener.h" 29 #include "content/public/browser/browser_context.h" 30 #include "content/public/browser/browser_thread.h" 31 #include "url/gurl.h" 32 33 using content::BrowserThread; 34 35 namespace visitedlink { 36 37 const int32 VisitedLinkMaster::kFileHeaderSignatureOffset = 0; 38 const int32 VisitedLinkMaster::kFileHeaderVersionOffset = 4; 39 const int32 VisitedLinkMaster::kFileHeaderLengthOffset = 8; 40 const int32 VisitedLinkMaster::kFileHeaderUsedOffset = 12; 41 const int32 VisitedLinkMaster::kFileHeaderSaltOffset = 16; 42 43 const int32 VisitedLinkMaster::kFileCurrentVersion = 3; 44 45 // the signature at the beginning of the URL table = "VLnk" (visited links) 46 const int32 VisitedLinkMaster::kFileSignature = 0x6b6e4c56; 47 const size_t VisitedLinkMaster::kFileHeaderSize = 48 kFileHeaderSaltOffset + LINK_SALT_LENGTH; 49 50 // This value should also be the same as the smallest size in the lookup 51 // table in NewTableSizeForCount (prime number). 52 const unsigned VisitedLinkMaster::kDefaultTableSize = 16381; 53 54 const size_t VisitedLinkMaster::kBigDeleteThreshold = 64; 55 56 namespace { 57 58 // Fills the given salt structure with some quasi-random values 59 // It is not necessary to generate a cryptographically strong random string, 60 // only that it be reasonably different for different users. 61 void GenerateSalt(uint8 salt[LINK_SALT_LENGTH]) { 62 DCHECK_EQ(LINK_SALT_LENGTH, 8) << "This code assumes the length of the salt"; 63 uint64 randval = base::RandUint64(); 64 memcpy(salt, &randval, 8); 65 } 66 67 // Opens file on a background thread to not block UI thread. 68 void AsyncOpen(FILE** file, const base::FilePath& filename) { 69 *file = base::OpenFile(filename, "wb+"); 70 DLOG_IF(ERROR, !(*file)) << "Failed to open file " << filename.value(); 71 } 72 73 // Returns true if the write was complete. 74 static bool WriteToFile(FILE* file, 75 off_t offset, 76 const void* data, 77 size_t data_len) { 78 if (fseek(file, offset, SEEK_SET) != 0) 79 return false; // Don't write to an invalid part of the file. 80 81 size_t num_written = fwrite(data, 1, data_len, file); 82 83 // The write may not make it to the kernel (stdlib may buffer the write) 84 // until the next fseek/fclose call. If we crash, it's easy for our used 85 // item count to be out of sync with the number of hashes we write. 86 // Protect against this by calling fflush. 87 int ret = fflush(file); 88 DCHECK_EQ(0, ret); 89 return num_written == data_len; 90 } 91 92 // This task executes on a background thread and executes a write. This 93 // prevents us from blocking the UI thread doing I/O. Double pointer to FILE 94 // is used because file may still not be opened by the time of scheduling 95 // the task for execution. 96 void AsyncWrite(FILE** file, int32 offset, const std::string& data) { 97 if (*file) 98 WriteToFile(*file, offset, data.data(), data.size()); 99 } 100 101 // Truncates the file to the current position asynchronously on a background 102 // thread. Double pointer to FILE is used because file may still not be opened 103 // by the time of scheduling the task for execution. 104 void AsyncTruncate(FILE** file) { 105 if (*file) 106 base::IgnoreResult(base::TruncateFile(*file)); 107 } 108 109 // Closes the file on a background thread and releases memory used for storage 110 // of FILE* value. Double pointer to FILE is used because file may still not 111 // be opened by the time of scheduling the task for execution. 112 void AsyncClose(FILE** file) { 113 if (*file) 114 base::IgnoreResult(fclose(*file)); 115 free(file); 116 } 117 118 } // namespace 119 120 // TableBuilder --------------------------------------------------------------- 121 122 // How rebuilding from history works 123 // --------------------------------- 124 // 125 // We mark that we're rebuilding from history by setting the table_builder_ 126 // member in VisitedLinkMaster to the TableBuilder we create. This builder 127 // will be called on the history thread by the history system for every URL 128 // in the database. 129 // 130 // The builder will store the fingerprints for those URLs, and then marshalls 131 // back to the main thread where the VisitedLinkMaster will be notified. The 132 // master then replaces its table with a new table containing the computed 133 // fingerprints. 134 // 135 // The builder must remain active while the history system is using it. 136 // Sometimes, the master will be deleted before the rebuild is complete, in 137 // which case it notifies the builder via DisownMaster(). The builder will 138 // delete itself once rebuilding is complete, and not execute any callback. 139 class VisitedLinkMaster::TableBuilder 140 : public VisitedLinkDelegate::URLEnumerator { 141 public: 142 TableBuilder(VisitedLinkMaster* master, 143 const uint8 salt[LINK_SALT_LENGTH]); 144 145 // Called on the main thread when the master is being destroyed. This will 146 // prevent a crash when the query completes and the master is no longer 147 // around. We can not actually do anything but mark this fact, since the 148 // table will be being rebuilt simultaneously on the other thread. 149 void DisownMaster(); 150 151 // VisitedLinkDelegate::URLEnumerator 152 virtual void OnURL(const GURL& url) OVERRIDE; 153 virtual void OnComplete(bool succeed) OVERRIDE; 154 155 private: 156 virtual ~TableBuilder() {} 157 158 // OnComplete mashals to this function on the main thread to do the 159 // notification. 160 void OnCompleteMainThread(); 161 162 // Owner of this object. MAY ONLY BE ACCESSED ON THE MAIN THREAD! 163 VisitedLinkMaster* master_; 164 165 // Indicates whether the operation has failed or not. 166 bool success_; 167 168 // Salt for this new table. 169 uint8 salt_[LINK_SALT_LENGTH]; 170 171 // Stores the fingerprints we computed on the background thread. 172 VisitedLinkCommon::Fingerprints fingerprints_; 173 174 DISALLOW_COPY_AND_ASSIGN(TableBuilder); 175 }; 176 177 // VisitedLinkMaster ---------------------------------------------------------- 178 179 VisitedLinkMaster::VisitedLinkMaster(content::BrowserContext* browser_context, 180 VisitedLinkDelegate* delegate, 181 bool persist_to_disk) 182 : browser_context_(browser_context), 183 delegate_(delegate), 184 listener_(new VisitedLinkEventListener(this, browser_context)), 185 persist_to_disk_(persist_to_disk) { 186 InitMembers(); 187 } 188 189 VisitedLinkMaster::VisitedLinkMaster(Listener* listener, 190 VisitedLinkDelegate* delegate, 191 bool persist_to_disk, 192 bool suppress_rebuild, 193 const base::FilePath& filename, 194 int32 default_table_size) 195 : browser_context_(NULL), 196 delegate_(delegate), 197 persist_to_disk_(persist_to_disk) { 198 listener_.reset(listener); 199 DCHECK(listener_.get()); 200 InitMembers(); 201 202 database_name_override_ = filename; 203 table_size_override_ = default_table_size; 204 suppress_rebuild_ = suppress_rebuild; 205 } 206 207 VisitedLinkMaster::~VisitedLinkMaster() { 208 if (table_builder_.get()) { 209 // Prevent the table builder from calling us back now that we're being 210 // destroyed. Note that we DON'T delete the object, since the history 211 // system is still writing into it. When that is complete, the table 212 // builder will destroy itself when it finds we are gone. 213 table_builder_->DisownMaster(); 214 } 215 FreeURLTable(); 216 // FreeURLTable() will schedule closing of the file and deletion of |file_|. 217 // So nothing should be done here. 218 } 219 220 void VisitedLinkMaster::InitMembers() { 221 file_ = NULL; 222 shared_memory_ = NULL; 223 shared_memory_serial_ = 0; 224 used_items_ = 0; 225 table_size_override_ = 0; 226 suppress_rebuild_ = false; 227 sequence_token_ = BrowserThread::GetBlockingPool()->GetSequenceToken(); 228 229 #ifndef NDEBUG 230 posted_asynchronous_operation_ = false; 231 #endif 232 } 233 234 bool VisitedLinkMaster::Init() { 235 // We probably shouldn't be loading this from the UI thread, 236 // but it does need to happen early on in startup. 237 // http://code.google.com/p/chromium/issues/detail?id=24163 238 base::ThreadRestrictions::ScopedAllowIO allow_io; 239 240 if (persist_to_disk_) { 241 if (InitFromFile()) 242 return true; 243 } 244 return InitFromScratch(suppress_rebuild_); 245 } 246 247 VisitedLinkMaster::Hash VisitedLinkMaster::TryToAddURL(const GURL& url) { 248 // Extra check that we are not incognito. This should not happen. 249 // TODO(boliu): Move this check to HistoryService when IsOffTheRecord is 250 // removed from BrowserContext. 251 if (browser_context_ && browser_context_->IsOffTheRecord()) { 252 NOTREACHED(); 253 return null_hash_; 254 } 255 256 if (!url.is_valid()) 257 return null_hash_; // Don't add invalid URLs. 258 259 Fingerprint fingerprint = ComputeURLFingerprint(url.spec().data(), 260 url.spec().size(), 261 salt_); 262 if (table_builder_.get()) { 263 // If we have a pending delete for this fingerprint, cancel it. 264 std::set<Fingerprint>::iterator found = 265 deleted_since_rebuild_.find(fingerprint); 266 if (found != deleted_since_rebuild_.end()) 267 deleted_since_rebuild_.erase(found); 268 269 // A rebuild is in progress, save this addition in the temporary list so 270 // it can be added once rebuild is complete. 271 added_since_rebuild_.insert(fingerprint); 272 } 273 274 // If the table is "full", we don't add URLs and just drop them on the floor. 275 // This can happen if we get thousands of new URLs and something causes 276 // the table resizing to fail. This check prevents a hang in that case. Note 277 // that this is *not* the resize limit, this is just a sanity check. 278 if (used_items_ / 8 > table_length_ / 10) 279 return null_hash_; // Table is more than 80% full. 280 281 return AddFingerprint(fingerprint, true); 282 } 283 284 void VisitedLinkMaster::PostIOTask(const tracked_objects::Location& from_here, 285 const base::Closure& task) { 286 DCHECK(persist_to_disk_); 287 BrowserThread::GetBlockingPool()->PostSequencedWorkerTask(sequence_token_, 288 from_here, task); 289 } 290 291 void VisitedLinkMaster::AddURL(const GURL& url) { 292 Hash index = TryToAddURL(url); 293 if (!table_builder_.get() && index != null_hash_) { 294 // Not rebuilding, so we want to keep the file on disk up-to-date. 295 if (persist_to_disk_) { 296 WriteUsedItemCountToFile(); 297 WriteHashRangeToFile(index, index); 298 } 299 ResizeTableIfNecessary(); 300 } 301 } 302 303 void VisitedLinkMaster::AddURLs(const std::vector<GURL>& url) { 304 for (std::vector<GURL>::const_iterator i = url.begin(); 305 i != url.end(); ++i) { 306 Hash index = TryToAddURL(*i); 307 if (!table_builder_.get() && index != null_hash_) 308 ResizeTableIfNecessary(); 309 } 310 311 // Keeps the file on disk up-to-date. 312 if (!table_builder_.get() && persist_to_disk_) 313 WriteFullTable(); 314 } 315 316 void VisitedLinkMaster::DeleteAllURLs() { 317 // Any pending modifications are invalid. 318 added_since_rebuild_.clear(); 319 deleted_since_rebuild_.clear(); 320 321 // Clear the hash table. 322 used_items_ = 0; 323 memset(hash_table_, 0, this->table_length_ * sizeof(Fingerprint)); 324 325 // Resize it if it is now too empty. Resize may write the new table out for 326 // us, otherwise, schedule writing the new table to disk ourselves. 327 if (!ResizeTableIfNecessary() && persist_to_disk_) 328 WriteFullTable(); 329 330 listener_->Reset(); 331 } 332 333 VisitedLinkDelegate* VisitedLinkMaster::GetDelegate() { 334 return delegate_; 335 } 336 337 void VisitedLinkMaster::DeleteURLs(URLIterator* urls) { 338 if (!urls->HasNextURL()) 339 return; 340 341 listener_->Reset(); 342 343 if (table_builder_.get()) { 344 // A rebuild is in progress, save this deletion in the temporary list so 345 // it can be added once rebuild is complete. 346 while (urls->HasNextURL()) { 347 const GURL& url(urls->NextURL()); 348 if (!url.is_valid()) 349 continue; 350 351 Fingerprint fingerprint = 352 ComputeURLFingerprint(url.spec().data(), url.spec().size(), salt_); 353 deleted_since_rebuild_.insert(fingerprint); 354 355 // If the URL was just added and now we're deleting it, it may be in the 356 // list of things added since the last rebuild. Delete it from that list. 357 std::set<Fingerprint>::iterator found = 358 added_since_rebuild_.find(fingerprint); 359 if (found != added_since_rebuild_.end()) 360 added_since_rebuild_.erase(found); 361 362 // Delete the URLs from the in-memory table, but don't bother writing 363 // to disk since it will be replaced soon. 364 DeleteFingerprint(fingerprint, false); 365 } 366 return; 367 } 368 369 // Compute the deleted URLs' fingerprints and delete them 370 std::set<Fingerprint> deleted_fingerprints; 371 while (urls->HasNextURL()) { 372 const GURL& url(urls->NextURL()); 373 if (!url.is_valid()) 374 continue; 375 deleted_fingerprints.insert( 376 ComputeURLFingerprint(url.spec().data(), url.spec().size(), salt_)); 377 } 378 DeleteFingerprintsFromCurrentTable(deleted_fingerprints); 379 } 380 381 // See VisitedLinkCommon::IsVisited which should be in sync with this algorithm 382 VisitedLinkMaster::Hash VisitedLinkMaster::AddFingerprint( 383 Fingerprint fingerprint, 384 bool send_notifications) { 385 if (!hash_table_ || table_length_ == 0) { 386 NOTREACHED(); // Not initialized. 387 return null_hash_; 388 } 389 390 Hash cur_hash = HashFingerprint(fingerprint); 391 Hash first_hash = cur_hash; 392 while (true) { 393 Fingerprint cur_fingerprint = FingerprintAt(cur_hash); 394 if (cur_fingerprint == fingerprint) 395 return null_hash_; // This fingerprint is already in there, do nothing. 396 397 if (cur_fingerprint == null_fingerprint_) { 398 // End of probe sequence found, insert here. 399 hash_table_[cur_hash] = fingerprint; 400 used_items_++; 401 // If allowed, notify listener that a new visited link was added. 402 if (send_notifications) 403 listener_->Add(fingerprint); 404 return cur_hash; 405 } 406 407 // Advance in the probe sequence. 408 cur_hash = IncrementHash(cur_hash); 409 if (cur_hash == first_hash) { 410 // This means that we've wrapped around and are about to go into an 411 // infinite loop. Something was wrong with the hashtable resizing 412 // logic, so stop here. 413 NOTREACHED(); 414 return null_hash_; 415 } 416 } 417 } 418 419 void VisitedLinkMaster::DeleteFingerprintsFromCurrentTable( 420 const std::set<Fingerprint>& fingerprints) { 421 bool bulk_write = (fingerprints.size() > kBigDeleteThreshold); 422 423 // Delete the URLs from the table. 424 for (std::set<Fingerprint>::const_iterator i = fingerprints.begin(); 425 i != fingerprints.end(); ++i) 426 DeleteFingerprint(*i, !bulk_write); 427 428 // These deleted fingerprints may make us shrink the table. 429 if (ResizeTableIfNecessary()) 430 return; // The resize function wrote the new table to disk for us. 431 432 // Nobody wrote this out for us, write the full file to disk. 433 if (bulk_write && persist_to_disk_) 434 WriteFullTable(); 435 } 436 437 bool VisitedLinkMaster::DeleteFingerprint(Fingerprint fingerprint, 438 bool update_file) { 439 if (!hash_table_ || table_length_ == 0) { 440 NOTREACHED(); // Not initialized. 441 return false; 442 } 443 if (!IsVisited(fingerprint)) 444 return false; // Not in the database to delete. 445 446 // First update the header used count. 447 used_items_--; 448 if (update_file && persist_to_disk_) 449 WriteUsedItemCountToFile(); 450 451 Hash deleted_hash = HashFingerprint(fingerprint); 452 453 // Find the range of "stuff" in the hash table that is adjacent to this 454 // fingerprint. These are things that could be affected by the change in 455 // the hash table. Since we use linear probing, anything after the deleted 456 // item up until an empty item could be affected. 457 Hash end_range = deleted_hash; 458 while (true) { 459 Hash next_hash = IncrementHash(end_range); 460 if (next_hash == deleted_hash) 461 break; // We wrapped around and the whole table is full. 462 if (!hash_table_[next_hash]) 463 break; // Found the last spot. 464 end_range = next_hash; 465 } 466 467 // We could get all fancy and move the affected fingerprints around, but 468 // instead we just remove them all and re-add them (minus our deleted one). 469 // This will mean there's a small window of time where the affected links 470 // won't be marked visited. 471 base::StackVector<Fingerprint, 32> shuffled_fingerprints; 472 Hash stop_loop = IncrementHash(end_range); // The end range is inclusive. 473 for (Hash i = deleted_hash; i != stop_loop; i = IncrementHash(i)) { 474 if (hash_table_[i] != fingerprint) { 475 // Don't save the one we're deleting! 476 shuffled_fingerprints->push_back(hash_table_[i]); 477 478 // This will balance the increment of this value in AddFingerprint below 479 // so there is no net change. 480 used_items_--; 481 } 482 hash_table_[i] = null_fingerprint_; 483 } 484 485 if (!shuffled_fingerprints->empty()) { 486 // Need to add the new items back. 487 for (size_t i = 0; i < shuffled_fingerprints->size(); i++) 488 AddFingerprint(shuffled_fingerprints[i], false); 489 } 490 491 // Write the affected range to disk [deleted_hash, end_range]. 492 if (update_file && persist_to_disk_) 493 WriteHashRangeToFile(deleted_hash, end_range); 494 495 return true; 496 } 497 498 void VisitedLinkMaster::WriteFullTable() { 499 // This function can get called when the file is open, for example, when we 500 // resize the table. We must handle this case and not try to reopen the file, 501 // since there may be write operations pending on the file I/O thread. 502 // 503 // Note that once we start writing, we do not delete on error. This means 504 // there can be a partial file, but the short file will be detected next time 505 // we start, and will be replaced. 506 // 507 // This might possibly get corrupted if we crash in the middle of writing. 508 // We should pick up the most common types of these failures when we notice 509 // that the file size is different when we load it back in, and then we will 510 // regenerate the table. 511 DCHECK(persist_to_disk_); 512 513 if (!file_) { 514 file_ = static_cast<FILE**>(calloc(1, sizeof(*file_))); 515 base::FilePath filename; 516 GetDatabaseFileName(&filename); 517 PostIOTask(FROM_HERE, base::Bind(&AsyncOpen, file_, filename)); 518 } 519 520 // Write the new header. 521 int32 header[4]; 522 header[0] = kFileSignature; 523 header[1] = kFileCurrentVersion; 524 header[2] = table_length_; 525 header[3] = used_items_; 526 WriteToFile(file_, 0, header, sizeof(header)); 527 WriteToFile(file_, sizeof(header), salt_, LINK_SALT_LENGTH); 528 529 // Write the hash data. 530 WriteToFile(file_, kFileHeaderSize, 531 hash_table_, table_length_ * sizeof(Fingerprint)); 532 533 // The hash table may have shrunk, so make sure this is the end. 534 PostIOTask(FROM_HERE, base::Bind(&AsyncTruncate, file_)); 535 } 536 537 bool VisitedLinkMaster::InitFromFile() { 538 DCHECK(file_ == NULL); 539 DCHECK(persist_to_disk_); 540 541 base::FilePath filename; 542 GetDatabaseFileName(&filename); 543 base::ScopedFILE file_closer(base::OpenFile(filename, "rb+")); 544 if (!file_closer.get()) 545 return false; 546 547 int32 num_entries, used_count; 548 if (!ReadFileHeader(file_closer.get(), &num_entries, &used_count, salt_)) 549 return false; // Header isn't valid. 550 551 // Allocate and read the table. 552 if (!CreateURLTable(num_entries, false)) 553 return false; 554 if (!ReadFromFile(file_closer.get(), kFileHeaderSize, 555 hash_table_, num_entries * sizeof(Fingerprint))) { 556 FreeURLTable(); 557 return false; 558 } 559 used_items_ = used_count; 560 561 #ifndef NDEBUG 562 DebugValidate(); 563 #endif 564 565 file_ = static_cast<FILE**>(malloc(sizeof(*file_))); 566 *file_ = file_closer.release(); 567 return true; 568 } 569 570 bool VisitedLinkMaster::InitFromScratch(bool suppress_rebuild) { 571 int32 table_size = kDefaultTableSize; 572 if (table_size_override_) 573 table_size = table_size_override_; 574 575 // The salt must be generated before the table so that it can be copied to 576 // the shared memory. 577 GenerateSalt(salt_); 578 if (!CreateURLTable(table_size, true)) 579 return false; 580 581 #ifndef NDEBUG 582 DebugValidate(); 583 #endif 584 585 if (suppress_rebuild && persist_to_disk_) { 586 // When we disallow rebuilds (normally just unit tests), just use the 587 // current empty table. 588 WriteFullTable(); 589 return true; 590 } 591 592 // This will build the table from history. On the first run, history will 593 // be empty, so this will be correct. This will also write the new table 594 // to disk. We don't want to save explicitly here, since the rebuild may 595 // not complete, leaving us with an empty but valid visited link database. 596 // In the future, we won't know we need to try rebuilding again. 597 return RebuildTableFromDelegate(); 598 } 599 600 bool VisitedLinkMaster::ReadFileHeader(FILE* file, 601 int32* num_entries, 602 int32* used_count, 603 uint8 salt[LINK_SALT_LENGTH]) { 604 DCHECK(persist_to_disk_); 605 606 // Get file size. 607 // Note that there is no need to seek back to the original location in the 608 // file since ReadFromFile() [which is the next call accessing the file] 609 // seeks before reading. 610 if (fseek(file, 0, SEEK_END) == -1) 611 return false; 612 size_t file_size = ftell(file); 613 614 if (file_size <= kFileHeaderSize) 615 return false; 616 617 uint8 header[kFileHeaderSize]; 618 if (!ReadFromFile(file, 0, &header, kFileHeaderSize)) 619 return false; 620 621 // Verify the signature. 622 int32 signature; 623 memcpy(&signature, &header[kFileHeaderSignatureOffset], sizeof(signature)); 624 if (signature != kFileSignature) 625 return false; 626 627 // Verify the version is up-to-date. As with other read errors, a version 628 // mistmatch will trigger a rebuild of the database from history, which will 629 // have the effect of migrating the database. 630 int32 version; 631 memcpy(&version, &header[kFileHeaderVersionOffset], sizeof(version)); 632 if (version != kFileCurrentVersion) 633 return false; // Bad version. 634 635 // Read the table size and make sure it matches the file size. 636 memcpy(num_entries, &header[kFileHeaderLengthOffset], sizeof(*num_entries)); 637 if (*num_entries * sizeof(Fingerprint) + kFileHeaderSize != file_size) 638 return false; // Bad size. 639 640 // Read the used item count. 641 memcpy(used_count, &header[kFileHeaderUsedOffset], sizeof(*used_count)); 642 if (*used_count > *num_entries) 643 return false; // Bad used item count; 644 645 // Read the salt. 646 memcpy(salt, &header[kFileHeaderSaltOffset], LINK_SALT_LENGTH); 647 648 // This file looks OK from the header's perspective. 649 return true; 650 } 651 652 bool VisitedLinkMaster::GetDatabaseFileName(base::FilePath* filename) { 653 if (!database_name_override_.empty()) { 654 // use this filename, the directory must exist 655 *filename = database_name_override_; 656 return true; 657 } 658 659 if (!browser_context_ || browser_context_->GetPath().empty()) 660 return false; 661 662 base::FilePath profile_dir = browser_context_->GetPath(); 663 *filename = profile_dir.Append(FILE_PATH_LITERAL("Visited Links")); 664 return true; 665 } 666 667 // Initializes the shared memory structure. The salt should already be filled 668 // in so that it can be written to the shared memory 669 bool VisitedLinkMaster::CreateURLTable(int32 num_entries, bool init_to_empty) { 670 // The table is the size of the table followed by the entries. 671 uint32 alloc_size = num_entries * sizeof(Fingerprint) + sizeof(SharedHeader); 672 673 // Create the shared memory object. 674 shared_memory_ = new base::SharedMemory(); 675 if (!shared_memory_) 676 return false; 677 678 base::SharedMemoryCreateOptions options; 679 options.size = alloc_size; 680 options.share_read_only = true; 681 682 if (!shared_memory_->Create(options) || !shared_memory_->Map(alloc_size)) { 683 delete shared_memory_; 684 shared_memory_ = NULL; 685 return false; 686 } 687 688 if (init_to_empty) { 689 memset(shared_memory_->memory(), 0, alloc_size); 690 used_items_ = 0; 691 } 692 table_length_ = num_entries; 693 694 // Save the header for other processes to read. 695 SharedHeader* header = static_cast<SharedHeader*>(shared_memory_->memory()); 696 header->length = table_length_; 697 memcpy(header->salt, salt_, LINK_SALT_LENGTH); 698 699 // Our table pointer is just the data immediately following the size. 700 hash_table_ = reinterpret_cast<Fingerprint*>( 701 static_cast<char*>(shared_memory_->memory()) + sizeof(SharedHeader)); 702 703 return true; 704 } 705 706 bool VisitedLinkMaster::BeginReplaceURLTable(int32 num_entries) { 707 base::SharedMemory *old_shared_memory = shared_memory_; 708 Fingerprint* old_hash_table = hash_table_; 709 int32 old_table_length = table_length_; 710 if (!CreateURLTable(num_entries, true)) { 711 // Try to put back the old state. 712 shared_memory_ = old_shared_memory; 713 hash_table_ = old_hash_table; 714 table_length_ = old_table_length; 715 return false; 716 } 717 718 #ifndef NDEBUG 719 DebugValidate(); 720 #endif 721 722 return true; 723 } 724 725 void VisitedLinkMaster::FreeURLTable() { 726 if (shared_memory_) { 727 delete shared_memory_; 728 shared_memory_ = NULL; 729 } 730 if (!persist_to_disk_ || !file_) 731 return; 732 PostIOTask(FROM_HERE, base::Bind(&AsyncClose, file_)); 733 // AsyncClose() will close the file and free the memory pointed by |file_|. 734 file_ = NULL; 735 } 736 737 bool VisitedLinkMaster::ResizeTableIfNecessary() { 738 DCHECK(table_length_ > 0) << "Must have a table"; 739 740 // Load limits for good performance/space. We are pretty conservative about 741 // keeping the table not very full. This is because we use linear probing 742 // which increases the likelihood of clumps of entries which will reduce 743 // performance. 744 const float max_table_load = 0.5f; // Grow when we're > this full. 745 const float min_table_load = 0.2f; // Shrink when we're < this full. 746 747 float load = ComputeTableLoad(); 748 if (load < max_table_load && 749 (table_length_ <= static_cast<float>(kDefaultTableSize) || 750 load > min_table_load)) 751 return false; 752 753 // Table needs to grow or shrink. 754 int new_size = NewTableSizeForCount(used_items_); 755 DCHECK(new_size > used_items_); 756 DCHECK(load <= min_table_load || new_size > table_length_); 757 ResizeTable(new_size); 758 return true; 759 } 760 761 void VisitedLinkMaster::ResizeTable(int32 new_size) { 762 DCHECK(shared_memory_ && shared_memory_->memory() && hash_table_); 763 shared_memory_serial_++; 764 765 #ifndef NDEBUG 766 DebugValidate(); 767 #endif 768 769 base::SharedMemory* old_shared_memory = shared_memory_; 770 Fingerprint* old_hash_table = hash_table_; 771 int32 old_table_length = table_length_; 772 if (!BeginReplaceURLTable(new_size)) 773 return; 774 775 // Now we have two tables, our local copy which is the old one, and the new 776 // one loaded into this object where we need to copy the data. 777 for (int32 i = 0; i < old_table_length; i++) { 778 Fingerprint cur = old_hash_table[i]; 779 if (cur) 780 AddFingerprint(cur, false); 781 } 782 783 // On error unmapping, just forget about it since we can't do anything 784 // else to release it. 785 delete old_shared_memory; 786 787 // Send an update notification to all child processes so they read the new 788 // table. 789 listener_->NewTable(shared_memory_); 790 791 #ifndef NDEBUG 792 DebugValidate(); 793 #endif 794 795 // The new table needs to be written to disk. 796 if (persist_to_disk_) 797 WriteFullTable(); 798 } 799 800 uint32 VisitedLinkMaster::NewTableSizeForCount(int32 item_count) const { 801 // These table sizes are selected to be the maximum prime number less than 802 // a "convenient" multiple of 1K. 803 static const int table_sizes[] = { 804 16381, // 16K = 16384 <- don't shrink below this table size 805 // (should be == default_table_size) 806 32767, // 32K = 32768 807 65521, // 64K = 65536 808 130051, // 128K = 131072 809 262127, // 256K = 262144 810 524269, // 512K = 524288 811 1048549, // 1M = 1048576 812 2097143, // 2M = 2097152 813 4194301, // 4M = 4194304 814 8388571, // 8M = 8388608 815 16777199, // 16M = 16777216 816 33554347}; // 32M = 33554432 817 818 // Try to leave the table 33% full. 819 int desired = item_count * 3; 820 821 // Find the closest prime. 822 for (size_t i = 0; i < arraysize(table_sizes); i ++) { 823 if (table_sizes[i] > desired) 824 return table_sizes[i]; 825 } 826 827 // Growing very big, just approximate a "good" number, not growing as much 828 // as normal. 829 return item_count * 2 - 1; 830 } 831 832 // See the TableBuilder definition in the header file for how this works. 833 bool VisitedLinkMaster::RebuildTableFromDelegate() { 834 DCHECK(!table_builder_.get()); 835 836 // TODO(brettw) make sure we have reasonable salt! 837 table_builder_ = new TableBuilder(this, salt_); 838 delegate_->RebuildTable(table_builder_); 839 return true; 840 } 841 842 // See the TableBuilder declaration above for how this works. 843 void VisitedLinkMaster::OnTableRebuildComplete( 844 bool success, 845 const std::vector<Fingerprint>& fingerprints) { 846 if (success) { 847 // Replace the old table with a new blank one. 848 shared_memory_serial_++; 849 850 // We are responsible for freeing it AFTER it has been replaced if 851 // replacement succeeds. 852 base::SharedMemory* old_shared_memory = shared_memory_; 853 854 int new_table_size = NewTableSizeForCount( 855 static_cast<int>(fingerprints.size() + added_since_rebuild_.size())); 856 if (BeginReplaceURLTable(new_table_size)) { 857 // Free the old table. 858 delete old_shared_memory; 859 860 // Add the stored fingerprints to the hash table. 861 for (size_t i = 0; i < fingerprints.size(); i++) 862 AddFingerprint(fingerprints[i], false); 863 864 // Also add anything that was added while we were asynchronously 865 // generating the new table. 866 for (std::set<Fingerprint>::iterator i = added_since_rebuild_.begin(); 867 i != added_since_rebuild_.end(); ++i) 868 AddFingerprint(*i, false); 869 added_since_rebuild_.clear(); 870 871 // Now handle deletions. 872 DeleteFingerprintsFromCurrentTable(deleted_since_rebuild_); 873 deleted_since_rebuild_.clear(); 874 875 // Send an update notification to all child processes. 876 listener_->NewTable(shared_memory_); 877 878 if (persist_to_disk_) 879 WriteFullTable(); 880 } 881 } 882 table_builder_ = NULL; // Will release our reference to the builder. 883 884 // Notify the unit test that the rebuild is complete (will be NULL in prod.) 885 if (!rebuild_complete_task_.is_null()) { 886 rebuild_complete_task_.Run(); 887 rebuild_complete_task_.Reset(); 888 } 889 } 890 891 void VisitedLinkMaster::WriteToFile(FILE** file, 892 off_t offset, 893 void* data, 894 int32 data_size) { 895 DCHECK(persist_to_disk_); 896 #ifndef NDEBUG 897 posted_asynchronous_operation_ = true; 898 #endif 899 PostIOTask(FROM_HERE, 900 base::Bind(&AsyncWrite, file, offset, 901 std::string(static_cast<const char*>(data), data_size))); 902 } 903 904 void VisitedLinkMaster::WriteUsedItemCountToFile() { 905 DCHECK(persist_to_disk_); 906 if (!file_) 907 return; // See comment on the file_ variable for why this might happen. 908 WriteToFile(file_, kFileHeaderUsedOffset, &used_items_, sizeof(used_items_)); 909 } 910 911 void VisitedLinkMaster::WriteHashRangeToFile(Hash first_hash, Hash last_hash) { 912 DCHECK(persist_to_disk_); 913 914 if (!file_) 915 return; // See comment on the file_ variable for why this might happen. 916 if (last_hash < first_hash) { 917 // Handle wraparound at 0. This first write is first_hash->EOF 918 WriteToFile(file_, first_hash * sizeof(Fingerprint) + kFileHeaderSize, 919 &hash_table_[first_hash], 920 (table_length_ - first_hash + 1) * sizeof(Fingerprint)); 921 922 // Now do 0->last_lash. 923 WriteToFile(file_, kFileHeaderSize, hash_table_, 924 (last_hash + 1) * sizeof(Fingerprint)); 925 } else { 926 // Normal case, just write the range. 927 WriteToFile(file_, first_hash * sizeof(Fingerprint) + kFileHeaderSize, 928 &hash_table_[first_hash], 929 (last_hash - first_hash + 1) * sizeof(Fingerprint)); 930 } 931 } 932 933 bool VisitedLinkMaster::ReadFromFile(FILE* file, 934 off_t offset, 935 void* data, 936 size_t data_size) { 937 DCHECK(persist_to_disk_); 938 #ifndef NDEBUG 939 // Since this function is synchronous, we require that no asynchronous 940 // operations could possibly be pending. 941 DCHECK(!posted_asynchronous_operation_); 942 #endif 943 944 if (fseek(file, offset, SEEK_SET) != 0) 945 return false; 946 947 size_t num_read = fread(data, 1, data_size, file); 948 return num_read == data_size; 949 } 950 951 // VisitedLinkTableBuilder ---------------------------------------------------- 952 953 VisitedLinkMaster::TableBuilder::TableBuilder( 954 VisitedLinkMaster* master, 955 const uint8 salt[LINK_SALT_LENGTH]) 956 : master_(master), 957 success_(true) { 958 fingerprints_.reserve(4096); 959 memcpy(salt_, salt, LINK_SALT_LENGTH * sizeof(uint8)); 960 } 961 962 // TODO(brettw): Do we want to try to cancel the request if this happens? It 963 // could delay shutdown if there are a lot of URLs. 964 void VisitedLinkMaster::TableBuilder::DisownMaster() { 965 master_ = NULL; 966 } 967 968 void VisitedLinkMaster::TableBuilder::OnURL(const GURL& url) { 969 if (!url.is_empty()) { 970 fingerprints_.push_back(VisitedLinkMaster::ComputeURLFingerprint( 971 url.spec().data(), url.spec().length(), salt_)); 972 } 973 } 974 975 void VisitedLinkMaster::TableBuilder::OnComplete(bool success) { 976 success_ = success; 977 DLOG_IF(WARNING, !success) << "Unable to rebuild visited links"; 978 979 // Marshal to the main thread to notify the VisitedLinkMaster that the 980 // rebuild is complete. 981 BrowserThread::PostTask( 982 BrowserThread::UI, FROM_HERE, 983 base::Bind(&TableBuilder::OnCompleteMainThread, this)); 984 } 985 986 void VisitedLinkMaster::TableBuilder::OnCompleteMainThread() { 987 if (master_) 988 master_->OnTableRebuildComplete(success_, fingerprints_); 989 } 990 991 } // namespace visitedlink 992