Home | History | Annotate | Download | only in browser
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "components/visitedlink/browser/visitedlink_master.h"
      6 
      7 #if defined(OS_WIN)
      8 #include <windows.h>
      9 #include <io.h>
     10 #include <shlobj.h>
     11 #endif  // defined(OS_WIN)
     12 #include <stdio.h>
     13 
     14 #include <algorithm>
     15 
     16 #include "base/bind.h"
     17 #include "base/bind_helpers.h"
     18 #include "base/containers/stack_container.h"
     19 #include "base/file_util.h"
     20 #include "base/logging.h"
     21 #include "base/message_loop/message_loop.h"
     22 #include "base/path_service.h"
     23 #include "base/rand_util.h"
     24 #include "base/strings/string_util.h"
     25 #include "base/threading/thread_restrictions.h"
     26 #include "components/visitedlink/browser/visitedlink_delegate.h"
     27 #include "components/visitedlink/browser/visitedlink_event_listener.h"
     28 #include "content/public/browser/browser_context.h"
     29 #include "content/public/browser/browser_thread.h"
     30 #include "url/gurl.h"
     31 
     32 using content::BrowserThread;
     33 using file_util::ScopedFILE;
     34 
     35 namespace visitedlink {
     36 
     37 const int32 VisitedLinkMaster::kFileHeaderSignatureOffset = 0;
     38 const int32 VisitedLinkMaster::kFileHeaderVersionOffset = 4;
     39 const int32 VisitedLinkMaster::kFileHeaderLengthOffset = 8;
     40 const int32 VisitedLinkMaster::kFileHeaderUsedOffset = 12;
     41 const int32 VisitedLinkMaster::kFileHeaderSaltOffset = 16;
     42 
     43 const int32 VisitedLinkMaster::kFileCurrentVersion = 3;
     44 
     45 // the signature at the beginning of the URL table = "VLnk" (visited links)
     46 const int32 VisitedLinkMaster::kFileSignature = 0x6b6e4c56;
     47 const size_t VisitedLinkMaster::kFileHeaderSize =
     48     kFileHeaderSaltOffset + LINK_SALT_LENGTH;
     49 
     50 // This value should also be the same as the smallest size in the lookup
     51 // table in NewTableSizeForCount (prime number).
     52 const unsigned VisitedLinkMaster::kDefaultTableSize = 16381;
     53 
     54 const size_t VisitedLinkMaster::kBigDeleteThreshold = 64;
     55 
     56 namespace {
     57 
     58 // Fills the given salt structure with some quasi-random values
     59 // It is not necessary to generate a cryptographically strong random string,
     60 // only that it be reasonably different for different users.
     61 void GenerateSalt(uint8 salt[LINK_SALT_LENGTH]) {
     62   DCHECK_EQ(LINK_SALT_LENGTH, 8) << "This code assumes the length of the salt";
     63   uint64 randval = base::RandUint64();
     64   memcpy(salt, &randval, 8);
     65 }
     66 
     67 // Opens file on a background thread to not block UI thread.
     68 void AsyncOpen(FILE** file, const base::FilePath& filename) {
     69   *file = base::OpenFile(filename, "wb+");
     70   DLOG_IF(ERROR, !(*file)) << "Failed to open file " << filename.value();
     71 }
     72 
     73 // Returns true if the write was complete.
     74 static bool WriteToFile(FILE* file,
     75                         off_t offset,
     76                         const void* data,
     77                         size_t data_len) {
     78   if (fseek(file, offset, SEEK_SET) != 0)
     79     return false;  // Don't write to an invalid part of the file.
     80 
     81   size_t num_written = fwrite(data, 1, data_len, file);
     82 
     83   // The write may not make it to the kernel (stdlib may buffer the write)
     84   // until the next fseek/fclose call.  If we crash, it's easy for our used
     85   // item count to be out of sync with the number of hashes we write.
     86   // Protect against this by calling fflush.
     87   int ret = fflush(file);
     88   DCHECK_EQ(0, ret);
     89   return num_written == data_len;
     90 }
     91 
     92 // This task executes on a background thread and executes a write. This
     93 // prevents us from blocking the UI thread doing I/O. Double pointer to FILE
     94 // is used because file may still not be opened by the time of scheduling
     95 // the task for execution.
     96 void AsyncWrite(FILE** file, int32 offset, const std::string& data) {
     97   if (*file)
     98     WriteToFile(*file, offset, data.data(), data.size());
     99 }
    100 
    101 // Truncates the file to the current position asynchronously on a background
    102 // thread. Double pointer to FILE is used because file may still not be opened
    103 // by the time of scheduling the task for execution.
    104 void AsyncTruncate(FILE** file) {
    105   if (*file)
    106     base::IgnoreResult(base::TruncateFile(*file));
    107 }
    108 
    109 // Closes the file on a background thread and releases memory used for storage
    110 // of FILE* value. Double pointer to FILE is used because file may still not
    111 // be opened by the time of scheduling the task for execution.
    112 void AsyncClose(FILE** file) {
    113   if (*file)
    114     base::IgnoreResult(fclose(*file));
    115   free(file);
    116 }
    117 
    118 }  // namespace
    119 
    120 // TableBuilder ---------------------------------------------------------------
    121 
    122 // How rebuilding from history works
    123 // ---------------------------------
    124 //
    125 // We mark that we're rebuilding from history by setting the table_builder_
    126 // member in VisitedLinkMaster to the TableBuilder we create. This builder
    127 // will be called on the history thread by the history system for every URL
    128 // in the database.
    129 //
    130 // The builder will store the fingerprints for those URLs, and then marshalls
    131 // back to the main thread where the VisitedLinkMaster will be notified. The
    132 // master then replaces its table with a new table containing the computed
    133 // fingerprints.
    134 //
    135 // The builder must remain active while the history system is using it.
    136 // Sometimes, the master will be deleted before the rebuild is complete, in
    137 // which case it notifies the builder via DisownMaster(). The builder will
    138 // delete itself once rebuilding is complete, and not execute any callback.
    139 class VisitedLinkMaster::TableBuilder
    140     : public VisitedLinkDelegate::URLEnumerator {
    141  public:
    142   TableBuilder(VisitedLinkMaster* master,
    143                const uint8 salt[LINK_SALT_LENGTH]);
    144 
    145   // Called on the main thread when the master is being destroyed. This will
    146   // prevent a crash when the query completes and the master is no longer
    147   // around. We can not actually do anything but mark this fact, since the
    148   // table will be being rebuilt simultaneously on the other thread.
    149   void DisownMaster();
    150 
    151   // VisitedLinkDelegate::URLEnumerator
    152   virtual void OnURL(const GURL& url) OVERRIDE;
    153   virtual void OnComplete(bool succeed) OVERRIDE;
    154 
    155  private:
    156   virtual ~TableBuilder() {}
    157 
    158   // OnComplete mashals to this function on the main thread to do the
    159   // notification.
    160   void OnCompleteMainThread();
    161 
    162   // Owner of this object. MAY ONLY BE ACCESSED ON THE MAIN THREAD!
    163   VisitedLinkMaster* master_;
    164 
    165   // Indicates whether the operation has failed or not.
    166   bool success_;
    167 
    168   // Salt for this new table.
    169   uint8 salt_[LINK_SALT_LENGTH];
    170 
    171   // Stores the fingerprints we computed on the background thread.
    172   VisitedLinkCommon::Fingerprints fingerprints_;
    173 
    174   DISALLOW_COPY_AND_ASSIGN(TableBuilder);
    175 };
    176 
    177 // VisitedLinkMaster ----------------------------------------------------------
    178 
    179 VisitedLinkMaster::VisitedLinkMaster(content::BrowserContext* browser_context,
    180                                      VisitedLinkDelegate* delegate,
    181                                      bool persist_to_disk)
    182     : browser_context_(browser_context),
    183       delegate_(delegate),
    184       listener_(new VisitedLinkEventListener(this, browser_context)),
    185       persist_to_disk_(persist_to_disk) {
    186   InitMembers();
    187 }
    188 
    189 VisitedLinkMaster::VisitedLinkMaster(Listener* listener,
    190                                      VisitedLinkDelegate* delegate,
    191                                      bool persist_to_disk,
    192                                      bool suppress_rebuild,
    193                                      const base::FilePath& filename,
    194                                      int32 default_table_size)
    195     : browser_context_(NULL),
    196       delegate_(delegate),
    197       persist_to_disk_(persist_to_disk) {
    198   listener_.reset(listener);
    199   DCHECK(listener_.get());
    200   InitMembers();
    201 
    202   database_name_override_ = filename;
    203   table_size_override_ = default_table_size;
    204   suppress_rebuild_ = suppress_rebuild;
    205 }
    206 
    207 VisitedLinkMaster::~VisitedLinkMaster() {
    208   if (table_builder_.get()) {
    209     // Prevent the table builder from calling us back now that we're being
    210     // destroyed. Note that we DON'T delete the object, since the history
    211     // system is still writing into it. When that is complete, the table
    212     // builder will destroy itself when it finds we are gone.
    213     table_builder_->DisownMaster();
    214   }
    215   FreeURLTable();
    216   // FreeURLTable() will schedule closing of the file and deletion of |file_|.
    217   // So nothing should be done here.
    218 }
    219 
    220 void VisitedLinkMaster::InitMembers() {
    221   file_ = NULL;
    222   shared_memory_ = NULL;
    223   shared_memory_serial_ = 0;
    224   used_items_ = 0;
    225   table_size_override_ = 0;
    226   suppress_rebuild_ = false;
    227   sequence_token_ = BrowserThread::GetBlockingPool()->GetSequenceToken();
    228 
    229 #ifndef NDEBUG
    230   posted_asynchronous_operation_ = false;
    231 #endif
    232 }
    233 
    234 bool VisitedLinkMaster::Init() {
    235   // We probably shouldn't be loading this from the UI thread,
    236   // but it does need to happen early on in startup.
    237   //   http://code.google.com/p/chromium/issues/detail?id=24163
    238   base::ThreadRestrictions::ScopedAllowIO allow_io;
    239 
    240   if (persist_to_disk_) {
    241     if (InitFromFile())
    242       return true;
    243   }
    244   return InitFromScratch(suppress_rebuild_);
    245 }
    246 
    247 VisitedLinkMaster::Hash VisitedLinkMaster::TryToAddURL(const GURL& url) {
    248   // Extra check that we are not incognito. This should not happen.
    249   // TODO(boliu): Move this check to HistoryService when IsOffTheRecord is
    250   // removed from BrowserContext.
    251   if (browser_context_ && browser_context_->IsOffTheRecord()) {
    252     NOTREACHED();
    253     return null_hash_;
    254   }
    255 
    256   if (!url.is_valid())
    257     return null_hash_;  // Don't add invalid URLs.
    258 
    259   Fingerprint fingerprint = ComputeURLFingerprint(url.spec().data(),
    260                                                   url.spec().size(),
    261                                                   salt_);
    262   if (table_builder_.get()) {
    263     // If we have a pending delete for this fingerprint, cancel it.
    264     std::set<Fingerprint>::iterator found =
    265         deleted_since_rebuild_.find(fingerprint);
    266     if (found != deleted_since_rebuild_.end())
    267         deleted_since_rebuild_.erase(found);
    268 
    269     // A rebuild is in progress, save this addition in the temporary list so
    270     // it can be added once rebuild is complete.
    271     added_since_rebuild_.insert(fingerprint);
    272   }
    273 
    274   // If the table is "full", we don't add URLs and just drop them on the floor.
    275   // This can happen if we get thousands of new URLs and something causes
    276   // the table resizing to fail. This check prevents a hang in that case. Note
    277   // that this is *not* the resize limit, this is just a sanity check.
    278   if (used_items_ / 8 > table_length_ / 10)
    279     return null_hash_;  // Table is more than 80% full.
    280 
    281   return AddFingerprint(fingerprint, true);
    282 }
    283 
    284 void VisitedLinkMaster::PostIOTask(const tracked_objects::Location& from_here,
    285                                    const base::Closure& task) {
    286   DCHECK(persist_to_disk_);
    287   BrowserThread::GetBlockingPool()->PostSequencedWorkerTask(sequence_token_,
    288                                                             from_here, task);
    289 }
    290 
    291 void VisitedLinkMaster::AddURL(const GURL& url) {
    292   Hash index = TryToAddURL(url);
    293   if (!table_builder_.get() && index != null_hash_) {
    294     // Not rebuilding, so we want to keep the file on disk up-to-date.
    295     if (persist_to_disk_) {
    296       WriteUsedItemCountToFile();
    297       WriteHashRangeToFile(index, index);
    298     }
    299     ResizeTableIfNecessary();
    300   }
    301 }
    302 
    303 void VisitedLinkMaster::AddURLs(const std::vector<GURL>& url) {
    304   for (std::vector<GURL>::const_iterator i = url.begin();
    305        i != url.end(); ++i) {
    306     Hash index = TryToAddURL(*i);
    307     if (!table_builder_.get() && index != null_hash_)
    308       ResizeTableIfNecessary();
    309   }
    310 
    311   // Keeps the file on disk up-to-date.
    312   if (!table_builder_.get() && persist_to_disk_)
    313     WriteFullTable();
    314 }
    315 
    316 void VisitedLinkMaster::DeleteAllURLs() {
    317   // Any pending modifications are invalid.
    318   added_since_rebuild_.clear();
    319   deleted_since_rebuild_.clear();
    320 
    321   // Clear the hash table.
    322   used_items_ = 0;
    323   memset(hash_table_, 0, this->table_length_ * sizeof(Fingerprint));
    324 
    325   // Resize it if it is now too empty. Resize may write the new table out for
    326   // us, otherwise, schedule writing the new table to disk ourselves.
    327   if (!ResizeTableIfNecessary() && persist_to_disk_)
    328     WriteFullTable();
    329 
    330   listener_->Reset();
    331 }
    332 
    333 VisitedLinkDelegate* VisitedLinkMaster::GetDelegate() {
    334   return delegate_;
    335 }
    336 
    337 void VisitedLinkMaster::DeleteURLs(URLIterator* urls) {
    338   if (!urls->HasNextURL())
    339     return;
    340 
    341   listener_->Reset();
    342 
    343   if (table_builder_.get()) {
    344     // A rebuild is in progress, save this deletion in the temporary list so
    345     // it can be added once rebuild is complete.
    346     while (urls->HasNextURL()) {
    347       const GURL& url(urls->NextURL());
    348       if (!url.is_valid())
    349         continue;
    350 
    351       Fingerprint fingerprint =
    352           ComputeURLFingerprint(url.spec().data(), url.spec().size(), salt_);
    353       deleted_since_rebuild_.insert(fingerprint);
    354 
    355       // If the URL was just added and now we're deleting it, it may be in the
    356       // list of things added since the last rebuild. Delete it from that list.
    357       std::set<Fingerprint>::iterator found =
    358           added_since_rebuild_.find(fingerprint);
    359       if (found != added_since_rebuild_.end())
    360         added_since_rebuild_.erase(found);
    361 
    362       // Delete the URLs from the in-memory table, but don't bother writing
    363       // to disk since it will be replaced soon.
    364       DeleteFingerprint(fingerprint, false);
    365     }
    366     return;
    367   }
    368 
    369   // Compute the deleted URLs' fingerprints and delete them
    370   std::set<Fingerprint> deleted_fingerprints;
    371   while (urls->HasNextURL()) {
    372     const GURL& url(urls->NextURL());
    373     if (!url.is_valid())
    374       continue;
    375     deleted_fingerprints.insert(
    376         ComputeURLFingerprint(url.spec().data(), url.spec().size(), salt_));
    377   }
    378   DeleteFingerprintsFromCurrentTable(deleted_fingerprints);
    379 }
    380 
    381 // See VisitedLinkCommon::IsVisited which should be in sync with this algorithm
    382 VisitedLinkMaster::Hash VisitedLinkMaster::AddFingerprint(
    383     Fingerprint fingerprint,
    384     bool send_notifications) {
    385   if (!hash_table_ || table_length_ == 0) {
    386     NOTREACHED();  // Not initialized.
    387     return null_hash_;
    388   }
    389 
    390   Hash cur_hash = HashFingerprint(fingerprint);
    391   Hash first_hash = cur_hash;
    392   while (true) {
    393     Fingerprint cur_fingerprint = FingerprintAt(cur_hash);
    394     if (cur_fingerprint == fingerprint)
    395       return null_hash_;  // This fingerprint is already in there, do nothing.
    396 
    397     if (cur_fingerprint == null_fingerprint_) {
    398       // End of probe sequence found, insert here.
    399       hash_table_[cur_hash] = fingerprint;
    400       used_items_++;
    401       // If allowed, notify listener that a new visited link was added.
    402       if (send_notifications)
    403         listener_->Add(fingerprint);
    404       return cur_hash;
    405     }
    406 
    407     // Advance in the probe sequence.
    408     cur_hash = IncrementHash(cur_hash);
    409     if (cur_hash == first_hash) {
    410       // This means that we've wrapped around and are about to go into an
    411       // infinite loop. Something was wrong with the hashtable resizing
    412       // logic, so stop here.
    413       NOTREACHED();
    414       return null_hash_;
    415     }
    416   }
    417 }
    418 
    419 void VisitedLinkMaster::DeleteFingerprintsFromCurrentTable(
    420     const std::set<Fingerprint>& fingerprints) {
    421   bool bulk_write = (fingerprints.size() > kBigDeleteThreshold);
    422 
    423   // Delete the URLs from the table.
    424   for (std::set<Fingerprint>::const_iterator i = fingerprints.begin();
    425        i != fingerprints.end(); ++i)
    426     DeleteFingerprint(*i, !bulk_write);
    427 
    428   // These deleted fingerprints may make us shrink the table.
    429   if (ResizeTableIfNecessary())
    430     return;  // The resize function wrote the new table to disk for us.
    431 
    432   // Nobody wrote this out for us, write the full file to disk.
    433   if (bulk_write && persist_to_disk_)
    434     WriteFullTable();
    435 }
    436 
    437 bool VisitedLinkMaster::DeleteFingerprint(Fingerprint fingerprint,
    438                                           bool update_file) {
    439   if (!hash_table_ || table_length_ == 0) {
    440     NOTREACHED();  // Not initialized.
    441     return false;
    442   }
    443   if (!IsVisited(fingerprint))
    444     return false;  // Not in the database to delete.
    445 
    446   // First update the header used count.
    447   used_items_--;
    448   if (update_file && persist_to_disk_)
    449     WriteUsedItemCountToFile();
    450 
    451   Hash deleted_hash = HashFingerprint(fingerprint);
    452 
    453   // Find the range of "stuff" in the hash table that is adjacent to this
    454   // fingerprint. These are things that could be affected by the change in
    455   // the hash table. Since we use linear probing, anything after the deleted
    456   // item up until an empty item could be affected.
    457   Hash end_range = deleted_hash;
    458   while (true) {
    459     Hash next_hash = IncrementHash(end_range);
    460     if (next_hash == deleted_hash)
    461       break;  // We wrapped around and the whole table is full.
    462     if (!hash_table_[next_hash])
    463       break;  // Found the last spot.
    464     end_range = next_hash;
    465   }
    466 
    467   // We could get all fancy and move the affected fingerprints around, but
    468   // instead we just remove them all and re-add them (minus our deleted one).
    469   // This will mean there's a small window of time where the affected links
    470   // won't be marked visited.
    471   base::StackVector<Fingerprint, 32> shuffled_fingerprints;
    472   Hash stop_loop = IncrementHash(end_range);  // The end range is inclusive.
    473   for (Hash i = deleted_hash; i != stop_loop; i = IncrementHash(i)) {
    474     if (hash_table_[i] != fingerprint) {
    475       // Don't save the one we're deleting!
    476       shuffled_fingerprints->push_back(hash_table_[i]);
    477 
    478       // This will balance the increment of this value in AddFingerprint below
    479       // so there is no net change.
    480       used_items_--;
    481     }
    482     hash_table_[i] = null_fingerprint_;
    483   }
    484 
    485   if (!shuffled_fingerprints->empty()) {
    486     // Need to add the new items back.
    487     for (size_t i = 0; i < shuffled_fingerprints->size(); i++)
    488       AddFingerprint(shuffled_fingerprints[i], false);
    489   }
    490 
    491   // Write the affected range to disk [deleted_hash, end_range].
    492   if (update_file && persist_to_disk_)
    493     WriteHashRangeToFile(deleted_hash, end_range);
    494 
    495   return true;
    496 }
    497 
    498 void VisitedLinkMaster::WriteFullTable() {
    499   // This function can get called when the file is open, for example, when we
    500   // resize the table. We must handle this case and not try to reopen the file,
    501   // since there may be write operations pending on the file I/O thread.
    502   //
    503   // Note that once we start writing, we do not delete on error. This means
    504   // there can be a partial file, but the short file will be detected next time
    505   // we start, and will be replaced.
    506   //
    507   // This might possibly get corrupted if we crash in the middle of writing.
    508   // We should pick up the most common types of these failures when we notice
    509   // that the file size is different when we load it back in, and then we will
    510   // regenerate the table.
    511   DCHECK(persist_to_disk_);
    512 
    513   if (!file_) {
    514     file_ = static_cast<FILE**>(calloc(1, sizeof(*file_)));
    515     base::FilePath filename;
    516     GetDatabaseFileName(&filename);
    517     PostIOTask(FROM_HERE, base::Bind(&AsyncOpen, file_, filename));
    518   }
    519 
    520   // Write the new header.
    521   int32 header[4];
    522   header[0] = kFileSignature;
    523   header[1] = kFileCurrentVersion;
    524   header[2] = table_length_;
    525   header[3] = used_items_;
    526   WriteToFile(file_, 0, header, sizeof(header));
    527   WriteToFile(file_, sizeof(header), salt_, LINK_SALT_LENGTH);
    528 
    529   // Write the hash data.
    530   WriteToFile(file_, kFileHeaderSize,
    531               hash_table_, table_length_ * sizeof(Fingerprint));
    532 
    533   // The hash table may have shrunk, so make sure this is the end.
    534   PostIOTask(FROM_HERE, base::Bind(&AsyncTruncate, file_));
    535 }
    536 
    537 bool VisitedLinkMaster::InitFromFile() {
    538   DCHECK(file_ == NULL);
    539   DCHECK(persist_to_disk_);
    540 
    541   base::FilePath filename;
    542   GetDatabaseFileName(&filename);
    543   ScopedFILE file_closer(base::OpenFile(filename, "rb+"));
    544   if (!file_closer.get())
    545     return false;
    546 
    547   int32 num_entries, used_count;
    548   if (!ReadFileHeader(file_closer.get(), &num_entries, &used_count, salt_))
    549     return false;  // Header isn't valid.
    550 
    551   // Allocate and read the table.
    552   if (!CreateURLTable(num_entries, false))
    553     return false;
    554   if (!ReadFromFile(file_closer.get(), kFileHeaderSize,
    555                     hash_table_, num_entries * sizeof(Fingerprint))) {
    556     FreeURLTable();
    557     return false;
    558   }
    559   used_items_ = used_count;
    560 
    561 #ifndef NDEBUG
    562   DebugValidate();
    563 #endif
    564 
    565   file_ = static_cast<FILE**>(malloc(sizeof(*file_)));
    566   *file_ = file_closer.release();
    567   return true;
    568 }
    569 
    570 bool VisitedLinkMaster::InitFromScratch(bool suppress_rebuild) {
    571   int32 table_size = kDefaultTableSize;
    572   if (table_size_override_)
    573     table_size = table_size_override_;
    574 
    575   // The salt must be generated before the table so that it can be copied to
    576   // the shared memory.
    577   GenerateSalt(salt_);
    578   if (!CreateURLTable(table_size, true))
    579     return false;
    580 
    581 #ifndef NDEBUG
    582   DebugValidate();
    583 #endif
    584 
    585   if (suppress_rebuild && persist_to_disk_) {
    586     // When we disallow rebuilds (normally just unit tests), just use the
    587     // current empty table.
    588     WriteFullTable();
    589     return true;
    590   }
    591 
    592   // This will build the table from history. On the first run, history will
    593   // be empty, so this will be correct. This will also write the new table
    594   // to disk. We don't want to save explicitly here, since the rebuild may
    595   // not complete, leaving us with an empty but valid visited link database.
    596   // In the future, we won't know we need to try rebuilding again.
    597   return RebuildTableFromDelegate();
    598 }
    599 
    600 bool VisitedLinkMaster::ReadFileHeader(FILE* file,
    601                                        int32* num_entries,
    602                                        int32* used_count,
    603                                        uint8 salt[LINK_SALT_LENGTH]) {
    604   DCHECK(persist_to_disk_);
    605 
    606   // Get file size.
    607   // Note that there is no need to seek back to the original location in the
    608   // file since ReadFromFile() [which is the next call accessing the file]
    609   // seeks before reading.
    610   if (fseek(file, 0, SEEK_END) == -1)
    611     return false;
    612   size_t file_size = ftell(file);
    613 
    614   if (file_size <= kFileHeaderSize)
    615     return false;
    616 
    617   uint8 header[kFileHeaderSize];
    618   if (!ReadFromFile(file, 0, &header, kFileHeaderSize))
    619     return false;
    620 
    621   // Verify the signature.
    622   int32 signature;
    623   memcpy(&signature, &header[kFileHeaderSignatureOffset], sizeof(signature));
    624   if (signature != kFileSignature)
    625     return false;
    626 
    627   // Verify the version is up-to-date. As with other read errors, a version
    628   // mistmatch will trigger a rebuild of the database from history, which will
    629   // have the effect of migrating the database.
    630   int32 version;
    631   memcpy(&version, &header[kFileHeaderVersionOffset], sizeof(version));
    632   if (version != kFileCurrentVersion)
    633     return false;  // Bad version.
    634 
    635   // Read the table size and make sure it matches the file size.
    636   memcpy(num_entries, &header[kFileHeaderLengthOffset], sizeof(*num_entries));
    637   if (*num_entries * sizeof(Fingerprint) + kFileHeaderSize != file_size)
    638     return false;  // Bad size.
    639 
    640   // Read the used item count.
    641   memcpy(used_count, &header[kFileHeaderUsedOffset], sizeof(*used_count));
    642   if (*used_count > *num_entries)
    643     return false;  // Bad used item count;
    644 
    645   // Read the salt.
    646   memcpy(salt, &header[kFileHeaderSaltOffset], LINK_SALT_LENGTH);
    647 
    648   // This file looks OK from the header's perspective.
    649   return true;
    650 }
    651 
    652 bool VisitedLinkMaster::GetDatabaseFileName(base::FilePath* filename) {
    653   if (!database_name_override_.empty()) {
    654     // use this filename, the directory must exist
    655     *filename = database_name_override_;
    656     return true;
    657   }
    658 
    659   if (!browser_context_ || browser_context_->GetPath().empty())
    660     return false;
    661 
    662   base::FilePath profile_dir = browser_context_->GetPath();
    663   *filename = profile_dir.Append(FILE_PATH_LITERAL("Visited Links"));
    664   return true;
    665 }
    666 
    667 // Initializes the shared memory structure. The salt should already be filled
    668 // in so that it can be written to the shared memory
    669 bool VisitedLinkMaster::CreateURLTable(int32 num_entries, bool init_to_empty) {
    670   // The table is the size of the table followed by the entries.
    671   uint32 alloc_size = num_entries * sizeof(Fingerprint) + sizeof(SharedHeader);
    672 
    673   // Create the shared memory object.
    674   shared_memory_ = new base::SharedMemory();
    675   if (!shared_memory_)
    676     return false;
    677 
    678   if (!shared_memory_->CreateAndMapAnonymous(alloc_size)) {
    679     delete shared_memory_;
    680     shared_memory_ = NULL;
    681     return false;
    682   }
    683 
    684   if (init_to_empty) {
    685     memset(shared_memory_->memory(), 0, alloc_size);
    686     used_items_ = 0;
    687   }
    688   table_length_ = num_entries;
    689 
    690   // Save the header for other processes to read.
    691   SharedHeader* header = static_cast<SharedHeader*>(shared_memory_->memory());
    692   header->length = table_length_;
    693   memcpy(header->salt, salt_, LINK_SALT_LENGTH);
    694 
    695   // Our table pointer is just the data immediately following the size.
    696   hash_table_ = reinterpret_cast<Fingerprint*>(
    697       static_cast<char*>(shared_memory_->memory()) + sizeof(SharedHeader));
    698 
    699   return true;
    700 }
    701 
    702 bool VisitedLinkMaster::BeginReplaceURLTable(int32 num_entries) {
    703   base::SharedMemory *old_shared_memory = shared_memory_;
    704   Fingerprint* old_hash_table = hash_table_;
    705   int32 old_table_length = table_length_;
    706   if (!CreateURLTable(num_entries, true)) {
    707     // Try to put back the old state.
    708     shared_memory_ = old_shared_memory;
    709     hash_table_ = old_hash_table;
    710     table_length_ = old_table_length;
    711     return false;
    712   }
    713 
    714 #ifndef NDEBUG
    715   DebugValidate();
    716 #endif
    717 
    718   return true;
    719 }
    720 
    721 void VisitedLinkMaster::FreeURLTable() {
    722   if (shared_memory_) {
    723     delete shared_memory_;
    724     shared_memory_ = NULL;
    725   }
    726   if (!persist_to_disk_ || !file_)
    727     return;
    728   PostIOTask(FROM_HERE, base::Bind(&AsyncClose, file_));
    729   // AsyncClose() will close the file and free the memory pointed by |file_|.
    730   file_ = NULL;
    731 }
    732 
    733 bool VisitedLinkMaster::ResizeTableIfNecessary() {
    734   DCHECK(table_length_ > 0) << "Must have a table";
    735 
    736   // Load limits for good performance/space. We are pretty conservative about
    737   // keeping the table not very full. This is because we use linear probing
    738   // which increases the likelihood of clumps of entries which will reduce
    739   // performance.
    740   const float max_table_load = 0.5f;  // Grow when we're > this full.
    741   const float min_table_load = 0.2f;  // Shrink when we're < this full.
    742 
    743   float load = ComputeTableLoad();
    744   if (load < max_table_load &&
    745       (table_length_ <= static_cast<float>(kDefaultTableSize) ||
    746        load > min_table_load))
    747     return false;
    748 
    749   // Table needs to grow or shrink.
    750   int new_size = NewTableSizeForCount(used_items_);
    751   DCHECK(new_size > used_items_);
    752   DCHECK(load <= min_table_load || new_size > table_length_);
    753   ResizeTable(new_size);
    754   return true;
    755 }
    756 
    757 void VisitedLinkMaster::ResizeTable(int32 new_size) {
    758   DCHECK(shared_memory_ && shared_memory_->memory() && hash_table_);
    759   shared_memory_serial_++;
    760 
    761 #ifndef NDEBUG
    762   DebugValidate();
    763 #endif
    764 
    765   base::SharedMemory* old_shared_memory = shared_memory_;
    766   Fingerprint* old_hash_table = hash_table_;
    767   int32 old_table_length = table_length_;
    768   if (!BeginReplaceURLTable(new_size))
    769     return;
    770 
    771   // Now we have two tables, our local copy which is the old one, and the new
    772   // one loaded into this object where we need to copy the data.
    773   for (int32 i = 0; i < old_table_length; i++) {
    774     Fingerprint cur = old_hash_table[i];
    775     if (cur)
    776       AddFingerprint(cur, false);
    777   }
    778 
    779   // On error unmapping, just forget about it since we can't do anything
    780   // else to release it.
    781   delete old_shared_memory;
    782 
    783   // Send an update notification to all child processes so they read the new
    784   // table.
    785   listener_->NewTable(shared_memory_);
    786 
    787 #ifndef NDEBUG
    788   DebugValidate();
    789 #endif
    790 
    791   // The new table needs to be written to disk.
    792   if (persist_to_disk_)
    793     WriteFullTable();
    794 }
    795 
    796 uint32 VisitedLinkMaster::NewTableSizeForCount(int32 item_count) const {
    797   // These table sizes are selected to be the maximum prime number less than
    798   // a "convenient" multiple of 1K.
    799   static const int table_sizes[] = {
    800       16381,    // 16K  = 16384   <- don't shrink below this table size
    801                 //                   (should be == default_table_size)
    802       32767,    // 32K  = 32768
    803       65521,    // 64K  = 65536
    804       130051,   // 128K = 131072
    805       262127,   // 256K = 262144
    806       524269,   // 512K = 524288
    807       1048549,  // 1M   = 1048576
    808       2097143,  // 2M   = 2097152
    809       4194301,  // 4M   = 4194304
    810       8388571,  // 8M   = 8388608
    811       16777199,  // 16M  = 16777216
    812       33554347};  // 32M  = 33554432
    813 
    814   // Try to leave the table 33% full.
    815   int desired = item_count * 3;
    816 
    817   // Find the closest prime.
    818   for (size_t i = 0; i < arraysize(table_sizes); i ++) {
    819     if (table_sizes[i] > desired)
    820       return table_sizes[i];
    821   }
    822 
    823   // Growing very big, just approximate a "good" number, not growing as much
    824   // as normal.
    825   return item_count * 2 - 1;
    826 }
    827 
    828 // See the TableBuilder definition in the header file for how this works.
    829 bool VisitedLinkMaster::RebuildTableFromDelegate() {
    830   DCHECK(!table_builder_.get());
    831 
    832   // TODO(brettw) make sure we have reasonable salt!
    833   table_builder_ = new TableBuilder(this, salt_);
    834   delegate_->RebuildTable(table_builder_);
    835   return true;
    836 }
    837 
    838 // See the TableBuilder declaration above for how this works.
    839 void VisitedLinkMaster::OnTableRebuildComplete(
    840     bool success,
    841     const std::vector<Fingerprint>& fingerprints) {
    842   if (success) {
    843     // Replace the old table with a new blank one.
    844     shared_memory_serial_++;
    845 
    846     // We are responsible for freeing it AFTER it has been replaced if
    847     // replacement succeeds.
    848     base::SharedMemory* old_shared_memory = shared_memory_;
    849 
    850     int new_table_size = NewTableSizeForCount(
    851         static_cast<int>(fingerprints.size() + added_since_rebuild_.size()));
    852     if (BeginReplaceURLTable(new_table_size)) {
    853       // Free the old table.
    854       delete old_shared_memory;
    855 
    856       // Add the stored fingerprints to the hash table.
    857       for (size_t i = 0; i < fingerprints.size(); i++)
    858         AddFingerprint(fingerprints[i], false);
    859 
    860       // Also add anything that was added while we were asynchronously
    861       // generating the new table.
    862       for (std::set<Fingerprint>::iterator i = added_since_rebuild_.begin();
    863            i != added_since_rebuild_.end(); ++i)
    864         AddFingerprint(*i, false);
    865       added_since_rebuild_.clear();
    866 
    867       // Now handle deletions.
    868       DeleteFingerprintsFromCurrentTable(deleted_since_rebuild_);
    869       deleted_since_rebuild_.clear();
    870 
    871       // Send an update notification to all child processes.
    872       listener_->NewTable(shared_memory_);
    873 
    874       if (persist_to_disk_)
    875         WriteFullTable();
    876     }
    877   }
    878   table_builder_ = NULL;  // Will release our reference to the builder.
    879 
    880   // Notify the unit test that the rebuild is complete (will be NULL in prod.)
    881   if (!rebuild_complete_task_.is_null()) {
    882     rebuild_complete_task_.Run();
    883     rebuild_complete_task_.Reset();
    884   }
    885 }
    886 
    887 void VisitedLinkMaster::WriteToFile(FILE** file,
    888                                     off_t offset,
    889                                     void* data,
    890                                     int32 data_size) {
    891   DCHECK(persist_to_disk_);
    892 #ifndef NDEBUG
    893   posted_asynchronous_operation_ = true;
    894 #endif
    895   PostIOTask(FROM_HERE,
    896       base::Bind(&AsyncWrite, file, offset,
    897                  std::string(static_cast<const char*>(data), data_size)));
    898 }
    899 
    900 void VisitedLinkMaster::WriteUsedItemCountToFile() {
    901   DCHECK(persist_to_disk_);
    902   if (!file_)
    903     return;  // See comment on the file_ variable for why this might happen.
    904   WriteToFile(file_, kFileHeaderUsedOffset, &used_items_, sizeof(used_items_));
    905 }
    906 
    907 void VisitedLinkMaster::WriteHashRangeToFile(Hash first_hash, Hash last_hash) {
    908   DCHECK(persist_to_disk_);
    909 
    910   if (!file_)
    911     return;  // See comment on the file_ variable for why this might happen.
    912   if (last_hash < first_hash) {
    913     // Handle wraparound at 0. This first write is first_hash->EOF
    914     WriteToFile(file_, first_hash * sizeof(Fingerprint) + kFileHeaderSize,
    915                 &hash_table_[first_hash],
    916                 (table_length_ - first_hash + 1) * sizeof(Fingerprint));
    917 
    918     // Now do 0->last_lash.
    919     WriteToFile(file_, kFileHeaderSize, hash_table_,
    920                 (last_hash + 1) * sizeof(Fingerprint));
    921   } else {
    922     // Normal case, just write the range.
    923     WriteToFile(file_, first_hash * sizeof(Fingerprint) + kFileHeaderSize,
    924                 &hash_table_[first_hash],
    925                 (last_hash - first_hash + 1) * sizeof(Fingerprint));
    926   }
    927 }
    928 
    929 bool VisitedLinkMaster::ReadFromFile(FILE* file,
    930                                      off_t offset,
    931                                      void* data,
    932                                      size_t data_size) {
    933   DCHECK(persist_to_disk_);
    934 #ifndef NDEBUG
    935   // Since this function is synchronous, we require that no asynchronous
    936   // operations could possibly be pending.
    937   DCHECK(!posted_asynchronous_operation_);
    938 #endif
    939 
    940   if (fseek(file, offset, SEEK_SET) != 0)
    941     return false;
    942 
    943   size_t num_read = fread(data, 1, data_size, file);
    944   return num_read == data_size;
    945 }
    946 
    947 // VisitedLinkTableBuilder ----------------------------------------------------
    948 
    949 VisitedLinkMaster::TableBuilder::TableBuilder(
    950     VisitedLinkMaster* master,
    951     const uint8 salt[LINK_SALT_LENGTH])
    952     : master_(master),
    953       success_(true) {
    954   fingerprints_.reserve(4096);
    955   memcpy(salt_, salt, LINK_SALT_LENGTH * sizeof(uint8));
    956 }
    957 
    958 // TODO(brettw): Do we want to try to cancel the request if this happens? It
    959 // could delay shutdown if there are a lot of URLs.
    960 void VisitedLinkMaster::TableBuilder::DisownMaster() {
    961   master_ = NULL;
    962 }
    963 
    964 void VisitedLinkMaster::TableBuilder::OnURL(const GURL& url) {
    965   if (!url.is_empty()) {
    966     fingerprints_.push_back(VisitedLinkMaster::ComputeURLFingerprint(
    967         url.spec().data(), url.spec().length(), salt_));
    968   }
    969 }
    970 
    971 void VisitedLinkMaster::TableBuilder::OnComplete(bool success) {
    972   success_ = success;
    973   DLOG_IF(WARNING, !success) << "Unable to rebuild visited links";
    974 
    975   // Marshal to the main thread to notify the VisitedLinkMaster that the
    976   // rebuild is complete.
    977   BrowserThread::PostTask(
    978       BrowserThread::UI, FROM_HERE,
    979       base::Bind(&TableBuilder::OnCompleteMainThread, this));
    980 }
    981 
    982 void VisitedLinkMaster::TableBuilder::OnCompleteMainThread() {
    983   if (master_)
    984     master_->OnTableRebuildComplete(success_, fingerprints_);
    985 }
    986 
    987 }  // namespace visitedlink
    988