Home | History | Annotate | Download | only in browser
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "components/visitedlink/browser/visitedlink_master.h"
      6 
      7 #if defined(OS_WIN)
      8 #include <windows.h>
      9 #include <io.h>
     10 #include <shlobj.h>
     11 #endif  // defined(OS_WIN)
     12 #include <stdio.h>
     13 
     14 #include <algorithm>
     15 
     16 #include "base/bind.h"
     17 #include "base/bind_helpers.h"
     18 #include "base/containers/stack_container.h"
     19 #include "base/files/file_util.h"
     20 #include "base/files/scoped_file.h"
     21 #include "base/logging.h"
     22 #include "base/message_loop/message_loop.h"
     23 #include "base/path_service.h"
     24 #include "base/rand_util.h"
     25 #include "base/strings/string_util.h"
     26 #include "base/threading/thread_restrictions.h"
     27 #include "components/visitedlink/browser/visitedlink_delegate.h"
     28 #include "components/visitedlink/browser/visitedlink_event_listener.h"
     29 #include "content/public/browser/browser_context.h"
     30 #include "content/public/browser/browser_thread.h"
     31 #include "url/gurl.h"
     32 
     33 using content::BrowserThread;
     34 
     35 namespace visitedlink {
     36 
     37 const int32 VisitedLinkMaster::kFileHeaderSignatureOffset = 0;
     38 const int32 VisitedLinkMaster::kFileHeaderVersionOffset = 4;
     39 const int32 VisitedLinkMaster::kFileHeaderLengthOffset = 8;
     40 const int32 VisitedLinkMaster::kFileHeaderUsedOffset = 12;
     41 const int32 VisitedLinkMaster::kFileHeaderSaltOffset = 16;
     42 
     43 const int32 VisitedLinkMaster::kFileCurrentVersion = 3;
     44 
     45 // the signature at the beginning of the URL table = "VLnk" (visited links)
     46 const int32 VisitedLinkMaster::kFileSignature = 0x6b6e4c56;
     47 const size_t VisitedLinkMaster::kFileHeaderSize =
     48     kFileHeaderSaltOffset + LINK_SALT_LENGTH;
     49 
     50 // This value should also be the same as the smallest size in the lookup
     51 // table in NewTableSizeForCount (prime number).
     52 const unsigned VisitedLinkMaster::kDefaultTableSize = 16381;
     53 
     54 const size_t VisitedLinkMaster::kBigDeleteThreshold = 64;
     55 
     56 namespace {
     57 
     58 // Fills the given salt structure with some quasi-random values
     59 // It is not necessary to generate a cryptographically strong random string,
     60 // only that it be reasonably different for different users.
     61 void GenerateSalt(uint8 salt[LINK_SALT_LENGTH]) {
     62   DCHECK_EQ(LINK_SALT_LENGTH, 8) << "This code assumes the length of the salt";
     63   uint64 randval = base::RandUint64();
     64   memcpy(salt, &randval, 8);
     65 }
     66 
     67 // Opens file on a background thread to not block UI thread.
     68 void AsyncOpen(FILE** file, const base::FilePath& filename) {
     69   *file = base::OpenFile(filename, "wb+");
     70   DLOG_IF(ERROR, !(*file)) << "Failed to open file " << filename.value();
     71 }
     72 
     73 // Returns true if the write was complete.
     74 static bool WriteToFile(FILE* file,
     75                         off_t offset,
     76                         const void* data,
     77                         size_t data_len) {
     78   if (fseek(file, offset, SEEK_SET) != 0)
     79     return false;  // Don't write to an invalid part of the file.
     80 
     81   size_t num_written = fwrite(data, 1, data_len, file);
     82 
     83   // The write may not make it to the kernel (stdlib may buffer the write)
     84   // until the next fseek/fclose call.  If we crash, it's easy for our used
     85   // item count to be out of sync with the number of hashes we write.
     86   // Protect against this by calling fflush.
     87   int ret = fflush(file);
     88   DCHECK_EQ(0, ret);
     89   return num_written == data_len;
     90 }
     91 
     92 // This task executes on a background thread and executes a write. This
     93 // prevents us from blocking the UI thread doing I/O. Double pointer to FILE
     94 // is used because file may still not be opened by the time of scheduling
     95 // the task for execution.
     96 void AsyncWrite(FILE** file, int32 offset, const std::string& data) {
     97   if (*file)
     98     WriteToFile(*file, offset, data.data(), data.size());
     99 }
    100 
    101 // Truncates the file to the current position asynchronously on a background
    102 // thread. Double pointer to FILE is used because file may still not be opened
    103 // by the time of scheduling the task for execution.
    104 void AsyncTruncate(FILE** file) {
    105   if (*file)
    106     base::IgnoreResult(base::TruncateFile(*file));
    107 }
    108 
    109 // Closes the file on a background thread and releases memory used for storage
    110 // of FILE* value. Double pointer to FILE is used because file may still not
    111 // be opened by the time of scheduling the task for execution.
    112 void AsyncClose(FILE** file) {
    113   if (*file)
    114     base::IgnoreResult(fclose(*file));
    115   free(file);
    116 }
    117 
    118 }  // namespace
    119 
    120 // TableBuilder ---------------------------------------------------------------
    121 
    122 // How rebuilding from history works
    123 // ---------------------------------
    124 //
    125 // We mark that we're rebuilding from history by setting the table_builder_
    126 // member in VisitedLinkMaster to the TableBuilder we create. This builder
    127 // will be called on the history thread by the history system for every URL
    128 // in the database.
    129 //
    130 // The builder will store the fingerprints for those URLs, and then marshalls
    131 // back to the main thread where the VisitedLinkMaster will be notified. The
    132 // master then replaces its table with a new table containing the computed
    133 // fingerprints.
    134 //
    135 // The builder must remain active while the history system is using it.
    136 // Sometimes, the master will be deleted before the rebuild is complete, in
    137 // which case it notifies the builder via DisownMaster(). The builder will
    138 // delete itself once rebuilding is complete, and not execute any callback.
    139 class VisitedLinkMaster::TableBuilder
    140     : public VisitedLinkDelegate::URLEnumerator {
    141  public:
    142   TableBuilder(VisitedLinkMaster* master,
    143                const uint8 salt[LINK_SALT_LENGTH]);
    144 
    145   // Called on the main thread when the master is being destroyed. This will
    146   // prevent a crash when the query completes and the master is no longer
    147   // around. We can not actually do anything but mark this fact, since the
    148   // table will be being rebuilt simultaneously on the other thread.
    149   void DisownMaster();
    150 
    151   // VisitedLinkDelegate::URLEnumerator
    152   virtual void OnURL(const GURL& url) OVERRIDE;
    153   virtual void OnComplete(bool succeed) OVERRIDE;
    154 
    155  private:
    156   virtual ~TableBuilder() {}
    157 
    158   // OnComplete mashals to this function on the main thread to do the
    159   // notification.
    160   void OnCompleteMainThread();
    161 
    162   // Owner of this object. MAY ONLY BE ACCESSED ON THE MAIN THREAD!
    163   VisitedLinkMaster* master_;
    164 
    165   // Indicates whether the operation has failed or not.
    166   bool success_;
    167 
    168   // Salt for this new table.
    169   uint8 salt_[LINK_SALT_LENGTH];
    170 
    171   // Stores the fingerprints we computed on the background thread.
    172   VisitedLinkCommon::Fingerprints fingerprints_;
    173 
    174   DISALLOW_COPY_AND_ASSIGN(TableBuilder);
    175 };
    176 
    177 // VisitedLinkMaster ----------------------------------------------------------
    178 
    179 VisitedLinkMaster::VisitedLinkMaster(content::BrowserContext* browser_context,
    180                                      VisitedLinkDelegate* delegate,
    181                                      bool persist_to_disk)
    182     : browser_context_(browser_context),
    183       delegate_(delegate),
    184       listener_(new VisitedLinkEventListener(this, browser_context)),
    185       persist_to_disk_(persist_to_disk) {
    186   InitMembers();
    187 }
    188 
    189 VisitedLinkMaster::VisitedLinkMaster(Listener* listener,
    190                                      VisitedLinkDelegate* delegate,
    191                                      bool persist_to_disk,
    192                                      bool suppress_rebuild,
    193                                      const base::FilePath& filename,
    194                                      int32 default_table_size)
    195     : browser_context_(NULL),
    196       delegate_(delegate),
    197       persist_to_disk_(persist_to_disk) {
    198   listener_.reset(listener);
    199   DCHECK(listener_.get());
    200   InitMembers();
    201 
    202   database_name_override_ = filename;
    203   table_size_override_ = default_table_size;
    204   suppress_rebuild_ = suppress_rebuild;
    205 }
    206 
    207 VisitedLinkMaster::~VisitedLinkMaster() {
    208   if (table_builder_.get()) {
    209     // Prevent the table builder from calling us back now that we're being
    210     // destroyed. Note that we DON'T delete the object, since the history
    211     // system is still writing into it. When that is complete, the table
    212     // builder will destroy itself when it finds we are gone.
    213     table_builder_->DisownMaster();
    214   }
    215   FreeURLTable();
    216   // FreeURLTable() will schedule closing of the file and deletion of |file_|.
    217   // So nothing should be done here.
    218 }
    219 
    220 void VisitedLinkMaster::InitMembers() {
    221   file_ = NULL;
    222   shared_memory_ = NULL;
    223   shared_memory_serial_ = 0;
    224   used_items_ = 0;
    225   table_size_override_ = 0;
    226   suppress_rebuild_ = false;
    227   sequence_token_ = BrowserThread::GetBlockingPool()->GetSequenceToken();
    228 
    229 #ifndef NDEBUG
    230   posted_asynchronous_operation_ = false;
    231 #endif
    232 }
    233 
    234 bool VisitedLinkMaster::Init() {
    235   // We probably shouldn't be loading this from the UI thread,
    236   // but it does need to happen early on in startup.
    237   //   http://code.google.com/p/chromium/issues/detail?id=24163
    238   base::ThreadRestrictions::ScopedAllowIO allow_io;
    239 
    240   if (persist_to_disk_) {
    241     if (InitFromFile())
    242       return true;
    243   }
    244   return InitFromScratch(suppress_rebuild_);
    245 }
    246 
    247 VisitedLinkMaster::Hash VisitedLinkMaster::TryToAddURL(const GURL& url) {
    248   // Extra check that we are not incognito. This should not happen.
    249   // TODO(boliu): Move this check to HistoryService when IsOffTheRecord is
    250   // removed from BrowserContext.
    251   if (browser_context_ && browser_context_->IsOffTheRecord()) {
    252     NOTREACHED();
    253     return null_hash_;
    254   }
    255 
    256   if (!url.is_valid())
    257     return null_hash_;  // Don't add invalid URLs.
    258 
    259   Fingerprint fingerprint = ComputeURLFingerprint(url.spec().data(),
    260                                                   url.spec().size(),
    261                                                   salt_);
    262   if (table_builder_.get()) {
    263     // If we have a pending delete for this fingerprint, cancel it.
    264     std::set<Fingerprint>::iterator found =
    265         deleted_since_rebuild_.find(fingerprint);
    266     if (found != deleted_since_rebuild_.end())
    267         deleted_since_rebuild_.erase(found);
    268 
    269     // A rebuild is in progress, save this addition in the temporary list so
    270     // it can be added once rebuild is complete.
    271     added_since_rebuild_.insert(fingerprint);
    272   }
    273 
    274   // If the table is "full", we don't add URLs and just drop them on the floor.
    275   // This can happen if we get thousands of new URLs and something causes
    276   // the table resizing to fail. This check prevents a hang in that case. Note
    277   // that this is *not* the resize limit, this is just a sanity check.
    278   if (used_items_ / 8 > table_length_ / 10)
    279     return null_hash_;  // Table is more than 80% full.
    280 
    281   return AddFingerprint(fingerprint, true);
    282 }
    283 
    284 void VisitedLinkMaster::PostIOTask(const tracked_objects::Location& from_here,
    285                                    const base::Closure& task) {
    286   DCHECK(persist_to_disk_);
    287   BrowserThread::GetBlockingPool()->PostSequencedWorkerTask(sequence_token_,
    288                                                             from_here, task);
    289 }
    290 
    291 void VisitedLinkMaster::AddURL(const GURL& url) {
    292   Hash index = TryToAddURL(url);
    293   if (!table_builder_.get() && index != null_hash_) {
    294     // Not rebuilding, so we want to keep the file on disk up-to-date.
    295     if (persist_to_disk_) {
    296       WriteUsedItemCountToFile();
    297       WriteHashRangeToFile(index, index);
    298     }
    299     ResizeTableIfNecessary();
    300   }
    301 }
    302 
    303 void VisitedLinkMaster::AddURLs(const std::vector<GURL>& url) {
    304   for (std::vector<GURL>::const_iterator i = url.begin();
    305        i != url.end(); ++i) {
    306     Hash index = TryToAddURL(*i);
    307     if (!table_builder_.get() && index != null_hash_)
    308       ResizeTableIfNecessary();
    309   }
    310 
    311   // Keeps the file on disk up-to-date.
    312   if (!table_builder_.get() && persist_to_disk_)
    313     WriteFullTable();
    314 }
    315 
    316 void VisitedLinkMaster::DeleteAllURLs() {
    317   // Any pending modifications are invalid.
    318   added_since_rebuild_.clear();
    319   deleted_since_rebuild_.clear();
    320 
    321   // Clear the hash table.
    322   used_items_ = 0;
    323   memset(hash_table_, 0, this->table_length_ * sizeof(Fingerprint));
    324 
    325   // Resize it if it is now too empty. Resize may write the new table out for
    326   // us, otherwise, schedule writing the new table to disk ourselves.
    327   if (!ResizeTableIfNecessary() && persist_to_disk_)
    328     WriteFullTable();
    329 
    330   listener_->Reset();
    331 }
    332 
    333 VisitedLinkDelegate* VisitedLinkMaster::GetDelegate() {
    334   return delegate_;
    335 }
    336 
    337 void VisitedLinkMaster::DeleteURLs(URLIterator* urls) {
    338   if (!urls->HasNextURL())
    339     return;
    340 
    341   listener_->Reset();
    342 
    343   if (table_builder_.get()) {
    344     // A rebuild is in progress, save this deletion in the temporary list so
    345     // it can be added once rebuild is complete.
    346     while (urls->HasNextURL()) {
    347       const GURL& url(urls->NextURL());
    348       if (!url.is_valid())
    349         continue;
    350 
    351       Fingerprint fingerprint =
    352           ComputeURLFingerprint(url.spec().data(), url.spec().size(), salt_);
    353       deleted_since_rebuild_.insert(fingerprint);
    354 
    355       // If the URL was just added and now we're deleting it, it may be in the
    356       // list of things added since the last rebuild. Delete it from that list.
    357       std::set<Fingerprint>::iterator found =
    358           added_since_rebuild_.find(fingerprint);
    359       if (found != added_since_rebuild_.end())
    360         added_since_rebuild_.erase(found);
    361 
    362       // Delete the URLs from the in-memory table, but don't bother writing
    363       // to disk since it will be replaced soon.
    364       DeleteFingerprint(fingerprint, false);
    365     }
    366     return;
    367   }
    368 
    369   // Compute the deleted URLs' fingerprints and delete them
    370   std::set<Fingerprint> deleted_fingerprints;
    371   while (urls->HasNextURL()) {
    372     const GURL& url(urls->NextURL());
    373     if (!url.is_valid())
    374       continue;
    375     deleted_fingerprints.insert(
    376         ComputeURLFingerprint(url.spec().data(), url.spec().size(), salt_));
    377   }
    378   DeleteFingerprintsFromCurrentTable(deleted_fingerprints);
    379 }
    380 
    381 // See VisitedLinkCommon::IsVisited which should be in sync with this algorithm
    382 VisitedLinkMaster::Hash VisitedLinkMaster::AddFingerprint(
    383     Fingerprint fingerprint,
    384     bool send_notifications) {
    385   if (!hash_table_ || table_length_ == 0) {
    386     NOTREACHED();  // Not initialized.
    387     return null_hash_;
    388   }
    389 
    390   Hash cur_hash = HashFingerprint(fingerprint);
    391   Hash first_hash = cur_hash;
    392   while (true) {
    393     Fingerprint cur_fingerprint = FingerprintAt(cur_hash);
    394     if (cur_fingerprint == fingerprint)
    395       return null_hash_;  // This fingerprint is already in there, do nothing.
    396 
    397     if (cur_fingerprint == null_fingerprint_) {
    398       // End of probe sequence found, insert here.
    399       hash_table_[cur_hash] = fingerprint;
    400       used_items_++;
    401       // If allowed, notify listener that a new visited link was added.
    402       if (send_notifications)
    403         listener_->Add(fingerprint);
    404       return cur_hash;
    405     }
    406 
    407     // Advance in the probe sequence.
    408     cur_hash = IncrementHash(cur_hash);
    409     if (cur_hash == first_hash) {
    410       // This means that we've wrapped around and are about to go into an
    411       // infinite loop. Something was wrong with the hashtable resizing
    412       // logic, so stop here.
    413       NOTREACHED();
    414       return null_hash_;
    415     }
    416   }
    417 }
    418 
    419 void VisitedLinkMaster::DeleteFingerprintsFromCurrentTable(
    420     const std::set<Fingerprint>& fingerprints) {
    421   bool bulk_write = (fingerprints.size() > kBigDeleteThreshold);
    422 
    423   // Delete the URLs from the table.
    424   for (std::set<Fingerprint>::const_iterator i = fingerprints.begin();
    425        i != fingerprints.end(); ++i)
    426     DeleteFingerprint(*i, !bulk_write);
    427 
    428   // These deleted fingerprints may make us shrink the table.
    429   if (ResizeTableIfNecessary())
    430     return;  // The resize function wrote the new table to disk for us.
    431 
    432   // Nobody wrote this out for us, write the full file to disk.
    433   if (bulk_write && persist_to_disk_)
    434     WriteFullTable();
    435 }
    436 
    437 bool VisitedLinkMaster::DeleteFingerprint(Fingerprint fingerprint,
    438                                           bool update_file) {
    439   if (!hash_table_ || table_length_ == 0) {
    440     NOTREACHED();  // Not initialized.
    441     return false;
    442   }
    443   if (!IsVisited(fingerprint))
    444     return false;  // Not in the database to delete.
    445 
    446   // First update the header used count.
    447   used_items_--;
    448   if (update_file && persist_to_disk_)
    449     WriteUsedItemCountToFile();
    450 
    451   Hash deleted_hash = HashFingerprint(fingerprint);
    452 
    453   // Find the range of "stuff" in the hash table that is adjacent to this
    454   // fingerprint. These are things that could be affected by the change in
    455   // the hash table. Since we use linear probing, anything after the deleted
    456   // item up until an empty item could be affected.
    457   Hash end_range = deleted_hash;
    458   while (true) {
    459     Hash next_hash = IncrementHash(end_range);
    460     if (next_hash == deleted_hash)
    461       break;  // We wrapped around and the whole table is full.
    462     if (!hash_table_[next_hash])
    463       break;  // Found the last spot.
    464     end_range = next_hash;
    465   }
    466 
    467   // We could get all fancy and move the affected fingerprints around, but
    468   // instead we just remove them all and re-add them (minus our deleted one).
    469   // This will mean there's a small window of time where the affected links
    470   // won't be marked visited.
    471   base::StackVector<Fingerprint, 32> shuffled_fingerprints;
    472   Hash stop_loop = IncrementHash(end_range);  // The end range is inclusive.
    473   for (Hash i = deleted_hash; i != stop_loop; i = IncrementHash(i)) {
    474     if (hash_table_[i] != fingerprint) {
    475       // Don't save the one we're deleting!
    476       shuffled_fingerprints->push_back(hash_table_[i]);
    477 
    478       // This will balance the increment of this value in AddFingerprint below
    479       // so there is no net change.
    480       used_items_--;
    481     }
    482     hash_table_[i] = null_fingerprint_;
    483   }
    484 
    485   if (!shuffled_fingerprints->empty()) {
    486     // Need to add the new items back.
    487     for (size_t i = 0; i < shuffled_fingerprints->size(); i++)
    488       AddFingerprint(shuffled_fingerprints[i], false);
    489   }
    490 
    491   // Write the affected range to disk [deleted_hash, end_range].
    492   if (update_file && persist_to_disk_)
    493     WriteHashRangeToFile(deleted_hash, end_range);
    494 
    495   return true;
    496 }
    497 
    498 void VisitedLinkMaster::WriteFullTable() {
    499   // This function can get called when the file is open, for example, when we
    500   // resize the table. We must handle this case and not try to reopen the file,
    501   // since there may be write operations pending on the file I/O thread.
    502   //
    503   // Note that once we start writing, we do not delete on error. This means
    504   // there can be a partial file, but the short file will be detected next time
    505   // we start, and will be replaced.
    506   //
    507   // This might possibly get corrupted if we crash in the middle of writing.
    508   // We should pick up the most common types of these failures when we notice
    509   // that the file size is different when we load it back in, and then we will
    510   // regenerate the table.
    511   DCHECK(persist_to_disk_);
    512 
    513   if (!file_) {
    514     file_ = static_cast<FILE**>(calloc(1, sizeof(*file_)));
    515     base::FilePath filename;
    516     GetDatabaseFileName(&filename);
    517     PostIOTask(FROM_HERE, base::Bind(&AsyncOpen, file_, filename));
    518   }
    519 
    520   // Write the new header.
    521   int32 header[4];
    522   header[0] = kFileSignature;
    523   header[1] = kFileCurrentVersion;
    524   header[2] = table_length_;
    525   header[3] = used_items_;
    526   WriteToFile(file_, 0, header, sizeof(header));
    527   WriteToFile(file_, sizeof(header), salt_, LINK_SALT_LENGTH);
    528 
    529   // Write the hash data.
    530   WriteToFile(file_, kFileHeaderSize,
    531               hash_table_, table_length_ * sizeof(Fingerprint));
    532 
    533   // The hash table may have shrunk, so make sure this is the end.
    534   PostIOTask(FROM_HERE, base::Bind(&AsyncTruncate, file_));
    535 }
    536 
    537 bool VisitedLinkMaster::InitFromFile() {
    538   DCHECK(file_ == NULL);
    539   DCHECK(persist_to_disk_);
    540 
    541   base::FilePath filename;
    542   GetDatabaseFileName(&filename);
    543   base::ScopedFILE file_closer(base::OpenFile(filename, "rb+"));
    544   if (!file_closer.get())
    545     return false;
    546 
    547   int32 num_entries, used_count;
    548   if (!ReadFileHeader(file_closer.get(), &num_entries, &used_count, salt_))
    549     return false;  // Header isn't valid.
    550 
    551   // Allocate and read the table.
    552   if (!CreateURLTable(num_entries, false))
    553     return false;
    554   if (!ReadFromFile(file_closer.get(), kFileHeaderSize,
    555                     hash_table_, num_entries * sizeof(Fingerprint))) {
    556     FreeURLTable();
    557     return false;
    558   }
    559   used_items_ = used_count;
    560 
    561 #ifndef NDEBUG
    562   DebugValidate();
    563 #endif
    564 
    565   file_ = static_cast<FILE**>(malloc(sizeof(*file_)));
    566   *file_ = file_closer.release();
    567   return true;
    568 }
    569 
    570 bool VisitedLinkMaster::InitFromScratch(bool suppress_rebuild) {
    571   int32 table_size = kDefaultTableSize;
    572   if (table_size_override_)
    573     table_size = table_size_override_;
    574 
    575   // The salt must be generated before the table so that it can be copied to
    576   // the shared memory.
    577   GenerateSalt(salt_);
    578   if (!CreateURLTable(table_size, true))
    579     return false;
    580 
    581 #ifndef NDEBUG
    582   DebugValidate();
    583 #endif
    584 
    585   if (suppress_rebuild && persist_to_disk_) {
    586     // When we disallow rebuilds (normally just unit tests), just use the
    587     // current empty table.
    588     WriteFullTable();
    589     return true;
    590   }
    591 
    592   // This will build the table from history. On the first run, history will
    593   // be empty, so this will be correct. This will also write the new table
    594   // to disk. We don't want to save explicitly here, since the rebuild may
    595   // not complete, leaving us with an empty but valid visited link database.
    596   // In the future, we won't know we need to try rebuilding again.
    597   return RebuildTableFromDelegate();
    598 }
    599 
    600 bool VisitedLinkMaster::ReadFileHeader(FILE* file,
    601                                        int32* num_entries,
    602                                        int32* used_count,
    603                                        uint8 salt[LINK_SALT_LENGTH]) {
    604   DCHECK(persist_to_disk_);
    605 
    606   // Get file size.
    607   // Note that there is no need to seek back to the original location in the
    608   // file since ReadFromFile() [which is the next call accessing the file]
    609   // seeks before reading.
    610   if (fseek(file, 0, SEEK_END) == -1)
    611     return false;
    612   size_t file_size = ftell(file);
    613 
    614   if (file_size <= kFileHeaderSize)
    615     return false;
    616 
    617   uint8 header[kFileHeaderSize];
    618   if (!ReadFromFile(file, 0, &header, kFileHeaderSize))
    619     return false;
    620 
    621   // Verify the signature.
    622   int32 signature;
    623   memcpy(&signature, &header[kFileHeaderSignatureOffset], sizeof(signature));
    624   if (signature != kFileSignature)
    625     return false;
    626 
    627   // Verify the version is up-to-date. As with other read errors, a version
    628   // mistmatch will trigger a rebuild of the database from history, which will
    629   // have the effect of migrating the database.
    630   int32 version;
    631   memcpy(&version, &header[kFileHeaderVersionOffset], sizeof(version));
    632   if (version != kFileCurrentVersion)
    633     return false;  // Bad version.
    634 
    635   // Read the table size and make sure it matches the file size.
    636   memcpy(num_entries, &header[kFileHeaderLengthOffset], sizeof(*num_entries));
    637   if (*num_entries * sizeof(Fingerprint) + kFileHeaderSize != file_size)
    638     return false;  // Bad size.
    639 
    640   // Read the used item count.
    641   memcpy(used_count, &header[kFileHeaderUsedOffset], sizeof(*used_count));
    642   if (*used_count > *num_entries)
    643     return false;  // Bad used item count;
    644 
    645   // Read the salt.
    646   memcpy(salt, &header[kFileHeaderSaltOffset], LINK_SALT_LENGTH);
    647 
    648   // This file looks OK from the header's perspective.
    649   return true;
    650 }
    651 
    652 bool VisitedLinkMaster::GetDatabaseFileName(base::FilePath* filename) {
    653   if (!database_name_override_.empty()) {
    654     // use this filename, the directory must exist
    655     *filename = database_name_override_;
    656     return true;
    657   }
    658 
    659   if (!browser_context_ || browser_context_->GetPath().empty())
    660     return false;
    661 
    662   base::FilePath profile_dir = browser_context_->GetPath();
    663   *filename = profile_dir.Append(FILE_PATH_LITERAL("Visited Links"));
    664   return true;
    665 }
    666 
    667 // Initializes the shared memory structure. The salt should already be filled
    668 // in so that it can be written to the shared memory
    669 bool VisitedLinkMaster::CreateURLTable(int32 num_entries, bool init_to_empty) {
    670   // The table is the size of the table followed by the entries.
    671   uint32 alloc_size = num_entries * sizeof(Fingerprint) + sizeof(SharedHeader);
    672 
    673   // Create the shared memory object.
    674   shared_memory_ = new base::SharedMemory();
    675   if (!shared_memory_)
    676     return false;
    677 
    678   base::SharedMemoryCreateOptions options;
    679   options.size = alloc_size;
    680   options.share_read_only = true;
    681 
    682   if (!shared_memory_->Create(options) || !shared_memory_->Map(alloc_size)) {
    683     delete shared_memory_;
    684     shared_memory_ = NULL;
    685     return false;
    686   }
    687 
    688   if (init_to_empty) {
    689     memset(shared_memory_->memory(), 0, alloc_size);
    690     used_items_ = 0;
    691   }
    692   table_length_ = num_entries;
    693 
    694   // Save the header for other processes to read.
    695   SharedHeader* header = static_cast<SharedHeader*>(shared_memory_->memory());
    696   header->length = table_length_;
    697   memcpy(header->salt, salt_, LINK_SALT_LENGTH);
    698 
    699   // Our table pointer is just the data immediately following the size.
    700   hash_table_ = reinterpret_cast<Fingerprint*>(
    701       static_cast<char*>(shared_memory_->memory()) + sizeof(SharedHeader));
    702 
    703   return true;
    704 }
    705 
    706 bool VisitedLinkMaster::BeginReplaceURLTable(int32 num_entries) {
    707   base::SharedMemory *old_shared_memory = shared_memory_;
    708   Fingerprint* old_hash_table = hash_table_;
    709   int32 old_table_length = table_length_;
    710   if (!CreateURLTable(num_entries, true)) {
    711     // Try to put back the old state.
    712     shared_memory_ = old_shared_memory;
    713     hash_table_ = old_hash_table;
    714     table_length_ = old_table_length;
    715     return false;
    716   }
    717 
    718 #ifndef NDEBUG
    719   DebugValidate();
    720 #endif
    721 
    722   return true;
    723 }
    724 
    725 void VisitedLinkMaster::FreeURLTable() {
    726   if (shared_memory_) {
    727     delete shared_memory_;
    728     shared_memory_ = NULL;
    729   }
    730   if (!persist_to_disk_ || !file_)
    731     return;
    732   PostIOTask(FROM_HERE, base::Bind(&AsyncClose, file_));
    733   // AsyncClose() will close the file and free the memory pointed by |file_|.
    734   file_ = NULL;
    735 }
    736 
    737 bool VisitedLinkMaster::ResizeTableIfNecessary() {
    738   DCHECK(table_length_ > 0) << "Must have a table";
    739 
    740   // Load limits for good performance/space. We are pretty conservative about
    741   // keeping the table not very full. This is because we use linear probing
    742   // which increases the likelihood of clumps of entries which will reduce
    743   // performance.
    744   const float max_table_load = 0.5f;  // Grow when we're > this full.
    745   const float min_table_load = 0.2f;  // Shrink when we're < this full.
    746 
    747   float load = ComputeTableLoad();
    748   if (load < max_table_load &&
    749       (table_length_ <= static_cast<float>(kDefaultTableSize) ||
    750        load > min_table_load))
    751     return false;
    752 
    753   // Table needs to grow or shrink.
    754   int new_size = NewTableSizeForCount(used_items_);
    755   DCHECK(new_size > used_items_);
    756   DCHECK(load <= min_table_load || new_size > table_length_);
    757   ResizeTable(new_size);
    758   return true;
    759 }
    760 
    761 void VisitedLinkMaster::ResizeTable(int32 new_size) {
    762   DCHECK(shared_memory_ && shared_memory_->memory() && hash_table_);
    763   shared_memory_serial_++;
    764 
    765 #ifndef NDEBUG
    766   DebugValidate();
    767 #endif
    768 
    769   base::SharedMemory* old_shared_memory = shared_memory_;
    770   Fingerprint* old_hash_table = hash_table_;
    771   int32 old_table_length = table_length_;
    772   if (!BeginReplaceURLTable(new_size))
    773     return;
    774 
    775   // Now we have two tables, our local copy which is the old one, and the new
    776   // one loaded into this object where we need to copy the data.
    777   for (int32 i = 0; i < old_table_length; i++) {
    778     Fingerprint cur = old_hash_table[i];
    779     if (cur)
    780       AddFingerprint(cur, false);
    781   }
    782 
    783   // On error unmapping, just forget about it since we can't do anything
    784   // else to release it.
    785   delete old_shared_memory;
    786 
    787   // Send an update notification to all child processes so they read the new
    788   // table.
    789   listener_->NewTable(shared_memory_);
    790 
    791 #ifndef NDEBUG
    792   DebugValidate();
    793 #endif
    794 
    795   // The new table needs to be written to disk.
    796   if (persist_to_disk_)
    797     WriteFullTable();
    798 }
    799 
    800 uint32 VisitedLinkMaster::NewTableSizeForCount(int32 item_count) const {
    801   // These table sizes are selected to be the maximum prime number less than
    802   // a "convenient" multiple of 1K.
    803   static const int table_sizes[] = {
    804       16381,    // 16K  = 16384   <- don't shrink below this table size
    805                 //                   (should be == default_table_size)
    806       32767,    // 32K  = 32768
    807       65521,    // 64K  = 65536
    808       130051,   // 128K = 131072
    809       262127,   // 256K = 262144
    810       524269,   // 512K = 524288
    811       1048549,  // 1M   = 1048576
    812       2097143,  // 2M   = 2097152
    813       4194301,  // 4M   = 4194304
    814       8388571,  // 8M   = 8388608
    815       16777199,  // 16M  = 16777216
    816       33554347};  // 32M  = 33554432
    817 
    818   // Try to leave the table 33% full.
    819   int desired = item_count * 3;
    820 
    821   // Find the closest prime.
    822   for (size_t i = 0; i < arraysize(table_sizes); i ++) {
    823     if (table_sizes[i] > desired)
    824       return table_sizes[i];
    825   }
    826 
    827   // Growing very big, just approximate a "good" number, not growing as much
    828   // as normal.
    829   return item_count * 2 - 1;
    830 }
    831 
    832 // See the TableBuilder definition in the header file for how this works.
    833 bool VisitedLinkMaster::RebuildTableFromDelegate() {
    834   DCHECK(!table_builder_.get());
    835 
    836   // TODO(brettw) make sure we have reasonable salt!
    837   table_builder_ = new TableBuilder(this, salt_);
    838   delegate_->RebuildTable(table_builder_);
    839   return true;
    840 }
    841 
    842 // See the TableBuilder declaration above for how this works.
    843 void VisitedLinkMaster::OnTableRebuildComplete(
    844     bool success,
    845     const std::vector<Fingerprint>& fingerprints) {
    846   if (success) {
    847     // Replace the old table with a new blank one.
    848     shared_memory_serial_++;
    849 
    850     // We are responsible for freeing it AFTER it has been replaced if
    851     // replacement succeeds.
    852     base::SharedMemory* old_shared_memory = shared_memory_;
    853 
    854     int new_table_size = NewTableSizeForCount(
    855         static_cast<int>(fingerprints.size() + added_since_rebuild_.size()));
    856     if (BeginReplaceURLTable(new_table_size)) {
    857       // Free the old table.
    858       delete old_shared_memory;
    859 
    860       // Add the stored fingerprints to the hash table.
    861       for (size_t i = 0; i < fingerprints.size(); i++)
    862         AddFingerprint(fingerprints[i], false);
    863 
    864       // Also add anything that was added while we were asynchronously
    865       // generating the new table.
    866       for (std::set<Fingerprint>::iterator i = added_since_rebuild_.begin();
    867            i != added_since_rebuild_.end(); ++i)
    868         AddFingerprint(*i, false);
    869       added_since_rebuild_.clear();
    870 
    871       // Now handle deletions.
    872       DeleteFingerprintsFromCurrentTable(deleted_since_rebuild_);
    873       deleted_since_rebuild_.clear();
    874 
    875       // Send an update notification to all child processes.
    876       listener_->NewTable(shared_memory_);
    877 
    878       if (persist_to_disk_)
    879         WriteFullTable();
    880     }
    881   }
    882   table_builder_ = NULL;  // Will release our reference to the builder.
    883 
    884   // Notify the unit test that the rebuild is complete (will be NULL in prod.)
    885   if (!rebuild_complete_task_.is_null()) {
    886     rebuild_complete_task_.Run();
    887     rebuild_complete_task_.Reset();
    888   }
    889 }
    890 
    891 void VisitedLinkMaster::WriteToFile(FILE** file,
    892                                     off_t offset,
    893                                     void* data,
    894                                     int32 data_size) {
    895   DCHECK(persist_to_disk_);
    896 #ifndef NDEBUG
    897   posted_asynchronous_operation_ = true;
    898 #endif
    899   PostIOTask(FROM_HERE,
    900       base::Bind(&AsyncWrite, file, offset,
    901                  std::string(static_cast<const char*>(data), data_size)));
    902 }
    903 
    904 void VisitedLinkMaster::WriteUsedItemCountToFile() {
    905   DCHECK(persist_to_disk_);
    906   if (!file_)
    907     return;  // See comment on the file_ variable for why this might happen.
    908   WriteToFile(file_, kFileHeaderUsedOffset, &used_items_, sizeof(used_items_));
    909 }
    910 
    911 void VisitedLinkMaster::WriteHashRangeToFile(Hash first_hash, Hash last_hash) {
    912   DCHECK(persist_to_disk_);
    913 
    914   if (!file_)
    915     return;  // See comment on the file_ variable for why this might happen.
    916   if (last_hash < first_hash) {
    917     // Handle wraparound at 0. This first write is first_hash->EOF
    918     WriteToFile(file_, first_hash * sizeof(Fingerprint) + kFileHeaderSize,
    919                 &hash_table_[first_hash],
    920                 (table_length_ - first_hash + 1) * sizeof(Fingerprint));
    921 
    922     // Now do 0->last_lash.
    923     WriteToFile(file_, kFileHeaderSize, hash_table_,
    924                 (last_hash + 1) * sizeof(Fingerprint));
    925   } else {
    926     // Normal case, just write the range.
    927     WriteToFile(file_, first_hash * sizeof(Fingerprint) + kFileHeaderSize,
    928                 &hash_table_[first_hash],
    929                 (last_hash - first_hash + 1) * sizeof(Fingerprint));
    930   }
    931 }
    932 
    933 bool VisitedLinkMaster::ReadFromFile(FILE* file,
    934                                      off_t offset,
    935                                      void* data,
    936                                      size_t data_size) {
    937   DCHECK(persist_to_disk_);
    938 #ifndef NDEBUG
    939   // Since this function is synchronous, we require that no asynchronous
    940   // operations could possibly be pending.
    941   DCHECK(!posted_asynchronous_operation_);
    942 #endif
    943 
    944   if (fseek(file, offset, SEEK_SET) != 0)
    945     return false;
    946 
    947   size_t num_read = fread(data, 1, data_size, file);
    948   return num_read == data_size;
    949 }
    950 
    951 // VisitedLinkTableBuilder ----------------------------------------------------
    952 
    953 VisitedLinkMaster::TableBuilder::TableBuilder(
    954     VisitedLinkMaster* master,
    955     const uint8 salt[LINK_SALT_LENGTH])
    956     : master_(master),
    957       success_(true) {
    958   fingerprints_.reserve(4096);
    959   memcpy(salt_, salt, LINK_SALT_LENGTH * sizeof(uint8));
    960 }
    961 
    962 // TODO(brettw): Do we want to try to cancel the request if this happens? It
    963 // could delay shutdown if there are a lot of URLs.
    964 void VisitedLinkMaster::TableBuilder::DisownMaster() {
    965   master_ = NULL;
    966 }
    967 
    968 void VisitedLinkMaster::TableBuilder::OnURL(const GURL& url) {
    969   if (!url.is_empty()) {
    970     fingerprints_.push_back(VisitedLinkMaster::ComputeURLFingerprint(
    971         url.spec().data(), url.spec().length(), salt_));
    972   }
    973 }
    974 
    975 void VisitedLinkMaster::TableBuilder::OnComplete(bool success) {
    976   success_ = success;
    977   DLOG_IF(WARNING, !success) << "Unable to rebuild visited links";
    978 
    979   // Marshal to the main thread to notify the VisitedLinkMaster that the
    980   // rebuild is complete.
    981   BrowserThread::PostTask(
    982       BrowserThread::UI, FROM_HERE,
    983       base::Bind(&TableBuilder::OnCompleteMainThread, this));
    984 }
    985 
    986 void VisitedLinkMaster::TableBuilder::OnCompleteMainThread() {
    987   if (master_)
    988     master_->OnTableRebuildComplete(success_, fingerprints_);
    989 }
    990 
    991 }  // namespace visitedlink
    992