Home | History | Annotate | Download | only in browser
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "components/visitedlink/browser/visitedlink_master.h"
      6 
      7 #if defined(OS_WIN)
      8 #include <windows.h>
      9 #include <io.h>
     10 #include <shlobj.h>
     11 #endif  // defined(OS_WIN)
     12 #include <stdio.h>
     13 
     14 #include <algorithm>
     15 
     16 #include "base/bind.h"
     17 #include "base/bind_helpers.h"
     18 #include "base/containers/stack_container.h"
     19 #include "base/file_util.h"
     20 #include "base/logging.h"
     21 #include "base/message_loop/message_loop.h"
     22 #include "base/path_service.h"
     23 #include "base/rand_util.h"
     24 #include "base/strings/string_util.h"
     25 #include "base/threading/thread_restrictions.h"
     26 #include "components/visitedlink/browser/visitedlink_delegate.h"
     27 #include "components/visitedlink/browser/visitedlink_event_listener.h"
     28 #include "content/public/browser/browser_context.h"
     29 #include "content/public/browser/browser_thread.h"
     30 #include "url/gurl.h"
     31 
     32 using content::BrowserThread;
     33 using file_util::ScopedFILE;
     34 using file_util::OpenFile;
     35 using file_util::TruncateFile;
     36 
     37 namespace visitedlink {
     38 
     39 const int32 VisitedLinkMaster::kFileHeaderSignatureOffset = 0;
     40 const int32 VisitedLinkMaster::kFileHeaderVersionOffset = 4;
     41 const int32 VisitedLinkMaster::kFileHeaderLengthOffset = 8;
     42 const int32 VisitedLinkMaster::kFileHeaderUsedOffset = 12;
     43 const int32 VisitedLinkMaster::kFileHeaderSaltOffset = 16;
     44 
     45 const int32 VisitedLinkMaster::kFileCurrentVersion = 3;
     46 
     47 // the signature at the beginning of the URL table = "VLnk" (visited links)
     48 const int32 VisitedLinkMaster::kFileSignature = 0x6b6e4c56;
     49 const size_t VisitedLinkMaster::kFileHeaderSize =
     50     kFileHeaderSaltOffset + LINK_SALT_LENGTH;
     51 
     52 // This value should also be the same as the smallest size in the lookup
     53 // table in NewTableSizeForCount (prime number).
     54 const unsigned VisitedLinkMaster::kDefaultTableSize = 16381;
     55 
     56 const size_t VisitedLinkMaster::kBigDeleteThreshold = 64;
     57 
     58 namespace {
     59 
     60 // Fills the given salt structure with some quasi-random values
     61 // It is not necessary to generate a cryptographically strong random string,
     62 // only that it be reasonably different for different users.
     63 void GenerateSalt(uint8 salt[LINK_SALT_LENGTH]) {
     64   DCHECK_EQ(LINK_SALT_LENGTH, 8) << "This code assumes the length of the salt";
     65   uint64 randval = base::RandUint64();
     66   memcpy(salt, &randval, 8);
     67 }
     68 
     69 // Opens file on a background thread to not block UI thread.
     70 void AsyncOpen(FILE** file, const base::FilePath& filename) {
     71   *file = OpenFile(filename, "wb+");
     72   DLOG_IF(ERROR, !(*file)) << "Failed to open file " << filename.value();
     73 }
     74 
     75 // Returns true if the write was complete.
     76 static bool WriteToFile(FILE* file,
     77                         off_t offset,
     78                         const void* data,
     79                         size_t data_len) {
     80   if (fseek(file, offset, SEEK_SET) != 0)
     81     return false;  // Don't write to an invalid part of the file.
     82 
     83   size_t num_written = fwrite(data, 1, data_len, file);
     84 
     85   // The write may not make it to the kernel (stdlib may buffer the write)
     86   // until the next fseek/fclose call.  If we crash, it's easy for our used
     87   // item count to be out of sync with the number of hashes we write.
     88   // Protect against this by calling fflush.
     89   int ret = fflush(file);
     90   DCHECK_EQ(0, ret);
     91   return num_written == data_len;
     92 }
     93 
     94 // This task executes on a background thread and executes a write. This
     95 // prevents us from blocking the UI thread doing I/O. Double pointer to FILE
     96 // is used because file may still not be opened by the time of scheduling
     97 // the task for execution.
     98 void AsyncWrite(FILE** file, int32 offset, const std::string& data) {
     99   if (*file)
    100     WriteToFile(*file, offset, data.data(), data.size());
    101 }
    102 
    103 // Truncates the file to the current position asynchronously on a background
    104 // thread. Double pointer to FILE is used because file may still not be opened
    105 // by the time of scheduling the task for execution.
    106 void AsyncTruncate(FILE** file) {
    107   if (*file)
    108     base::IgnoreResult(TruncateFile(*file));
    109 }
    110 
    111 // Closes the file on a background thread and releases memory used for storage
    112 // of FILE* value. Double pointer to FILE is used because file may still not
    113 // be opened by the time of scheduling the task for execution.
    114 void AsyncClose(FILE** file) {
    115   if (*file)
    116     base::IgnoreResult(fclose(*file));
    117   free(file);
    118 }
    119 
    120 }  // namespace
    121 
    122 // TableBuilder ---------------------------------------------------------------
    123 
    124 // How rebuilding from history works
    125 // ---------------------------------
    126 //
    127 // We mark that we're rebuilding from history by setting the table_builder_
    128 // member in VisitedLinkMaster to the TableBuilder we create. This builder
    129 // will be called on the history thread by the history system for every URL
    130 // in the database.
    131 //
    132 // The builder will store the fingerprints for those URLs, and then marshalls
    133 // back to the main thread where the VisitedLinkMaster will be notified. The
    134 // master then replaces its table with a new table containing the computed
    135 // fingerprints.
    136 //
    137 // The builder must remain active while the history system is using it.
    138 // Sometimes, the master will be deleted before the rebuild is complete, in
    139 // which case it notifies the builder via DisownMaster(). The builder will
    140 // delete itself once rebuilding is complete, and not execute any callback.
    141 class VisitedLinkMaster::TableBuilder
    142     : public VisitedLinkDelegate::URLEnumerator {
    143  public:
    144   TableBuilder(VisitedLinkMaster* master,
    145                const uint8 salt[LINK_SALT_LENGTH]);
    146 
    147   // Called on the main thread when the master is being destroyed. This will
    148   // prevent a crash when the query completes and the master is no longer
    149   // around. We can not actually do anything but mark this fact, since the
    150   // table will be being rebuilt simultaneously on the other thread.
    151   void DisownMaster();
    152 
    153   // VisitedLinkDelegate::URLEnumerator
    154   virtual void OnURL(const GURL& url) OVERRIDE;
    155   virtual void OnComplete(bool succeed) OVERRIDE;
    156 
    157  private:
    158   virtual ~TableBuilder() {}
    159 
    160   // OnComplete mashals to this function on the main thread to do the
    161   // notification.
    162   void OnCompleteMainThread();
    163 
    164   // Owner of this object. MAY ONLY BE ACCESSED ON THE MAIN THREAD!
    165   VisitedLinkMaster* master_;
    166 
    167   // Indicates whether the operation has failed or not.
    168   bool success_;
    169 
    170   // Salt for this new table.
    171   uint8 salt_[LINK_SALT_LENGTH];
    172 
    173   // Stores the fingerprints we computed on the background thread.
    174   VisitedLinkCommon::Fingerprints fingerprints_;
    175 
    176   DISALLOW_COPY_AND_ASSIGN(TableBuilder);
    177 };
    178 
    179 // VisitedLinkMaster ----------------------------------------------------------
    180 
    181 VisitedLinkMaster::VisitedLinkMaster(content::BrowserContext* browser_context,
    182                                      VisitedLinkDelegate* delegate,
    183                                      bool persist_to_disk)
    184     : browser_context_(browser_context),
    185       delegate_(delegate),
    186       listener_(new VisitedLinkEventListener(this, browser_context)),
    187       persist_to_disk_(persist_to_disk) {
    188   InitMembers();
    189 }
    190 
    191 VisitedLinkMaster::VisitedLinkMaster(Listener* listener,
    192                                      VisitedLinkDelegate* delegate,
    193                                      bool persist_to_disk,
    194                                      bool suppress_rebuild,
    195                                      const base::FilePath& filename,
    196                                      int32 default_table_size)
    197     : browser_context_(NULL),
    198       delegate_(delegate),
    199       persist_to_disk_(persist_to_disk) {
    200   listener_.reset(listener);
    201   DCHECK(listener_.get());
    202   InitMembers();
    203 
    204   database_name_override_ = filename;
    205   table_size_override_ = default_table_size;
    206   suppress_rebuild_ = suppress_rebuild;
    207 }
    208 
    209 VisitedLinkMaster::~VisitedLinkMaster() {
    210   if (table_builder_.get()) {
    211     // Prevent the table builder from calling us back now that we're being
    212     // destroyed. Note that we DON'T delete the object, since the history
    213     // system is still writing into it. When that is complete, the table
    214     // builder will destroy itself when it finds we are gone.
    215     table_builder_->DisownMaster();
    216   }
    217   FreeURLTable();
    218   // FreeURLTable() will schedule closing of the file and deletion of |file_|.
    219   // So nothing should be done here.
    220 }
    221 
    222 void VisitedLinkMaster::InitMembers() {
    223   file_ = NULL;
    224   shared_memory_ = NULL;
    225   shared_memory_serial_ = 0;
    226   used_items_ = 0;
    227   table_size_override_ = 0;
    228   suppress_rebuild_ = false;
    229   sequence_token_ = BrowserThread::GetBlockingPool()->GetSequenceToken();
    230 
    231 #ifndef NDEBUG
    232   posted_asynchronous_operation_ = false;
    233 #endif
    234 }
    235 
    236 bool VisitedLinkMaster::Init() {
    237   // We probably shouldn't be loading this from the UI thread,
    238   // but it does need to happen early on in startup.
    239   //   http://code.google.com/p/chromium/issues/detail?id=24163
    240   base::ThreadRestrictions::ScopedAllowIO allow_io;
    241 
    242   if (persist_to_disk_) {
    243     if (InitFromFile())
    244       return true;
    245   }
    246   return InitFromScratch(suppress_rebuild_);
    247 }
    248 
    249 VisitedLinkMaster::Hash VisitedLinkMaster::TryToAddURL(const GURL& url) {
    250   // Extra check that we are not incognito. This should not happen.
    251   // TODO(boliu): Move this check to HistoryService when IsOffTheRecord is
    252   // removed from BrowserContext.
    253   if (browser_context_ && browser_context_->IsOffTheRecord()) {
    254     NOTREACHED();
    255     return null_hash_;
    256   }
    257 
    258   if (!url.is_valid())
    259     return null_hash_;  // Don't add invalid URLs.
    260 
    261   Fingerprint fingerprint = ComputeURLFingerprint(url.spec().data(),
    262                                                   url.spec().size(),
    263                                                   salt_);
    264   if (table_builder_.get()) {
    265     // If we have a pending delete for this fingerprint, cancel it.
    266     std::set<Fingerprint>::iterator found =
    267         deleted_since_rebuild_.find(fingerprint);
    268     if (found != deleted_since_rebuild_.end())
    269         deleted_since_rebuild_.erase(found);
    270 
    271     // A rebuild is in progress, save this addition in the temporary list so
    272     // it can be added once rebuild is complete.
    273     added_since_rebuild_.insert(fingerprint);
    274   }
    275 
    276   // If the table is "full", we don't add URLs and just drop them on the floor.
    277   // This can happen if we get thousands of new URLs and something causes
    278   // the table resizing to fail. This check prevents a hang in that case. Note
    279   // that this is *not* the resize limit, this is just a sanity check.
    280   if (used_items_ / 8 > table_length_ / 10)
    281     return null_hash_;  // Table is more than 80% full.
    282 
    283   return AddFingerprint(fingerprint, true);
    284 }
    285 
    286 void VisitedLinkMaster::PostIOTask(const tracked_objects::Location& from_here,
    287                                    const base::Closure& task) {
    288   DCHECK(persist_to_disk_);
    289   BrowserThread::GetBlockingPool()->PostSequencedWorkerTask(sequence_token_,
    290                                                             from_here, task);
    291 }
    292 
    293 void VisitedLinkMaster::AddURL(const GURL& url) {
    294   Hash index = TryToAddURL(url);
    295   if (!table_builder_.get() && index != null_hash_) {
    296     // Not rebuilding, so we want to keep the file on disk up-to-date.
    297     if (persist_to_disk_) {
    298       WriteUsedItemCountToFile();
    299       WriteHashRangeToFile(index, index);
    300     }
    301     ResizeTableIfNecessary();
    302   }
    303 }
    304 
    305 void VisitedLinkMaster::AddURLs(const std::vector<GURL>& url) {
    306   for (std::vector<GURL>::const_iterator i = url.begin();
    307        i != url.end(); ++i) {
    308     Hash index = TryToAddURL(*i);
    309     if (!table_builder_.get() && index != null_hash_)
    310       ResizeTableIfNecessary();
    311   }
    312 
    313   // Keeps the file on disk up-to-date.
    314   if (!table_builder_.get() && persist_to_disk_)
    315     WriteFullTable();
    316 }
    317 
    318 void VisitedLinkMaster::DeleteAllURLs() {
    319   // Any pending modifications are invalid.
    320   added_since_rebuild_.clear();
    321   deleted_since_rebuild_.clear();
    322 
    323   // Clear the hash table.
    324   used_items_ = 0;
    325   memset(hash_table_, 0, this->table_length_ * sizeof(Fingerprint));
    326 
    327   // Resize it if it is now too empty. Resize may write the new table out for
    328   // us, otherwise, schedule writing the new table to disk ourselves.
    329   if (!ResizeTableIfNecessary() && persist_to_disk_)
    330     WriteFullTable();
    331 
    332   listener_->Reset();
    333 }
    334 
    335 VisitedLinkDelegate* VisitedLinkMaster::GetDelegate() {
    336   return delegate_;
    337 }
    338 
    339 void VisitedLinkMaster::DeleteURLs(URLIterator* urls) {
    340   if (!urls->HasNextURL())
    341     return;
    342 
    343   listener_->Reset();
    344 
    345   if (table_builder_.get()) {
    346     // A rebuild is in progress, save this deletion in the temporary list so
    347     // it can be added once rebuild is complete.
    348     while (urls->HasNextURL()) {
    349       const GURL& url(urls->NextURL());
    350       if (!url.is_valid())
    351         continue;
    352 
    353       Fingerprint fingerprint =
    354           ComputeURLFingerprint(url.spec().data(), url.spec().size(), salt_);
    355       deleted_since_rebuild_.insert(fingerprint);
    356 
    357       // If the URL was just added and now we're deleting it, it may be in the
    358       // list of things added since the last rebuild. Delete it from that list.
    359       std::set<Fingerprint>::iterator found =
    360           added_since_rebuild_.find(fingerprint);
    361       if (found != added_since_rebuild_.end())
    362         added_since_rebuild_.erase(found);
    363 
    364       // Delete the URLs from the in-memory table, but don't bother writing
    365       // to disk since it will be replaced soon.
    366       DeleteFingerprint(fingerprint, false);
    367     }
    368     return;
    369   }
    370 
    371   // Compute the deleted URLs' fingerprints and delete them
    372   std::set<Fingerprint> deleted_fingerprints;
    373   while (urls->HasNextURL()) {
    374     const GURL& url(urls->NextURL());
    375     if (!url.is_valid())
    376       continue;
    377     deleted_fingerprints.insert(
    378         ComputeURLFingerprint(url.spec().data(), url.spec().size(), salt_));
    379   }
    380   DeleteFingerprintsFromCurrentTable(deleted_fingerprints);
    381 }
    382 
    383 // See VisitedLinkCommon::IsVisited which should be in sync with this algorithm
    384 VisitedLinkMaster::Hash VisitedLinkMaster::AddFingerprint(
    385     Fingerprint fingerprint,
    386     bool send_notifications) {
    387   if (!hash_table_ || table_length_ == 0) {
    388     NOTREACHED();  // Not initialized.
    389     return null_hash_;
    390   }
    391 
    392   Hash cur_hash = HashFingerprint(fingerprint);
    393   Hash first_hash = cur_hash;
    394   while (true) {
    395     Fingerprint cur_fingerprint = FingerprintAt(cur_hash);
    396     if (cur_fingerprint == fingerprint)
    397       return null_hash_;  // This fingerprint is already in there, do nothing.
    398 
    399     if (cur_fingerprint == null_fingerprint_) {
    400       // End of probe sequence found, insert here.
    401       hash_table_[cur_hash] = fingerprint;
    402       used_items_++;
    403       // If allowed, notify listener that a new visited link was added.
    404       if (send_notifications)
    405         listener_->Add(fingerprint);
    406       return cur_hash;
    407     }
    408 
    409     // Advance in the probe sequence.
    410     cur_hash = IncrementHash(cur_hash);
    411     if (cur_hash == first_hash) {
    412       // This means that we've wrapped around and are about to go into an
    413       // infinite loop. Something was wrong with the hashtable resizing
    414       // logic, so stop here.
    415       NOTREACHED();
    416       return null_hash_;
    417     }
    418   }
    419 }
    420 
    421 void VisitedLinkMaster::DeleteFingerprintsFromCurrentTable(
    422     const std::set<Fingerprint>& fingerprints) {
    423   bool bulk_write = (fingerprints.size() > kBigDeleteThreshold);
    424 
    425   // Delete the URLs from the table.
    426   for (std::set<Fingerprint>::const_iterator i = fingerprints.begin();
    427        i != fingerprints.end(); ++i)
    428     DeleteFingerprint(*i, !bulk_write);
    429 
    430   // These deleted fingerprints may make us shrink the table.
    431   if (ResizeTableIfNecessary())
    432     return;  // The resize function wrote the new table to disk for us.
    433 
    434   // Nobody wrote this out for us, write the full file to disk.
    435   if (bulk_write && persist_to_disk_)
    436     WriteFullTable();
    437 }
    438 
    439 bool VisitedLinkMaster::DeleteFingerprint(Fingerprint fingerprint,
    440                                           bool update_file) {
    441   if (!hash_table_ || table_length_ == 0) {
    442     NOTREACHED();  // Not initialized.
    443     return false;
    444   }
    445   if (!IsVisited(fingerprint))
    446     return false;  // Not in the database to delete.
    447 
    448   // First update the header used count.
    449   used_items_--;
    450   if (update_file && persist_to_disk_)
    451     WriteUsedItemCountToFile();
    452 
    453   Hash deleted_hash = HashFingerprint(fingerprint);
    454 
    455   // Find the range of "stuff" in the hash table that is adjacent to this
    456   // fingerprint. These are things that could be affected by the change in
    457   // the hash table. Since we use linear probing, anything after the deleted
    458   // item up until an empty item could be affected.
    459   Hash end_range = deleted_hash;
    460   while (true) {
    461     Hash next_hash = IncrementHash(end_range);
    462     if (next_hash == deleted_hash)
    463       break;  // We wrapped around and the whole table is full.
    464     if (!hash_table_[next_hash])
    465       break;  // Found the last spot.
    466     end_range = next_hash;
    467   }
    468 
    469   // We could get all fancy and move the affected fingerprints around, but
    470   // instead we just remove them all and re-add them (minus our deleted one).
    471   // This will mean there's a small window of time where the affected links
    472   // won't be marked visited.
    473   base::StackVector<Fingerprint, 32> shuffled_fingerprints;
    474   Hash stop_loop = IncrementHash(end_range);  // The end range is inclusive.
    475   for (Hash i = deleted_hash; i != stop_loop; i = IncrementHash(i)) {
    476     if (hash_table_[i] != fingerprint) {
    477       // Don't save the one we're deleting!
    478       shuffled_fingerprints->push_back(hash_table_[i]);
    479 
    480       // This will balance the increment of this value in AddFingerprint below
    481       // so there is no net change.
    482       used_items_--;
    483     }
    484     hash_table_[i] = null_fingerprint_;
    485   }
    486 
    487   if (!shuffled_fingerprints->empty()) {
    488     // Need to add the new items back.
    489     for (size_t i = 0; i < shuffled_fingerprints->size(); i++)
    490       AddFingerprint(shuffled_fingerprints[i], false);
    491   }
    492 
    493   // Write the affected range to disk [deleted_hash, end_range].
    494   if (update_file && persist_to_disk_)
    495     WriteHashRangeToFile(deleted_hash, end_range);
    496 
    497   return true;
    498 }
    499 
    500 void VisitedLinkMaster::WriteFullTable() {
    501   // This function can get called when the file is open, for example, when we
    502   // resize the table. We must handle this case and not try to reopen the file,
    503   // since there may be write operations pending on the file I/O thread.
    504   //
    505   // Note that once we start writing, we do not delete on error. This means
    506   // there can be a partial file, but the short file will be detected next time
    507   // we start, and will be replaced.
    508   //
    509   // This might possibly get corrupted if we crash in the middle of writing.
    510   // We should pick up the most common types of these failures when we notice
    511   // that the file size is different when we load it back in, and then we will
    512   // regenerate the table.
    513   DCHECK(persist_to_disk_);
    514 
    515   if (!file_) {
    516     file_ = static_cast<FILE**>(calloc(1, sizeof(*file_)));
    517     base::FilePath filename;
    518     GetDatabaseFileName(&filename);
    519     PostIOTask(FROM_HERE, base::Bind(&AsyncOpen, file_, filename));
    520   }
    521 
    522   // Write the new header.
    523   int32 header[4];
    524   header[0] = kFileSignature;
    525   header[1] = kFileCurrentVersion;
    526   header[2] = table_length_;
    527   header[3] = used_items_;
    528   WriteToFile(file_, 0, header, sizeof(header));
    529   WriteToFile(file_, sizeof(header), salt_, LINK_SALT_LENGTH);
    530 
    531   // Write the hash data.
    532   WriteToFile(file_, kFileHeaderSize,
    533               hash_table_, table_length_ * sizeof(Fingerprint));
    534 
    535   // The hash table may have shrunk, so make sure this is the end.
    536   PostIOTask(FROM_HERE, base::Bind(&AsyncTruncate, file_));
    537 }
    538 
    539 bool VisitedLinkMaster::InitFromFile() {
    540   DCHECK(file_ == NULL);
    541   DCHECK(persist_to_disk_);
    542 
    543   base::FilePath filename;
    544   GetDatabaseFileName(&filename);
    545   ScopedFILE file_closer(OpenFile(filename, "rb+"));
    546   if (!file_closer.get())
    547     return false;
    548 
    549   int32 num_entries, used_count;
    550   if (!ReadFileHeader(file_closer.get(), &num_entries, &used_count, salt_))
    551     return false;  // Header isn't valid.
    552 
    553   // Allocate and read the table.
    554   if (!CreateURLTable(num_entries, false))
    555     return false;
    556   if (!ReadFromFile(file_closer.get(), kFileHeaderSize,
    557                     hash_table_, num_entries * sizeof(Fingerprint))) {
    558     FreeURLTable();
    559     return false;
    560   }
    561   used_items_ = used_count;
    562 
    563 #ifndef NDEBUG
    564   DebugValidate();
    565 #endif
    566 
    567   file_ = static_cast<FILE**>(malloc(sizeof(*file_)));
    568   *file_ = file_closer.release();
    569   return true;
    570 }
    571 
    572 bool VisitedLinkMaster::InitFromScratch(bool suppress_rebuild) {
    573   int32 table_size = kDefaultTableSize;
    574   if (table_size_override_)
    575     table_size = table_size_override_;
    576 
    577   // The salt must be generated before the table so that it can be copied to
    578   // the shared memory.
    579   GenerateSalt(salt_);
    580   if (!CreateURLTable(table_size, true))
    581     return false;
    582 
    583 #ifndef NDEBUG
    584   DebugValidate();
    585 #endif
    586 
    587   if (suppress_rebuild && persist_to_disk_) {
    588     // When we disallow rebuilds (normally just unit tests), just use the
    589     // current empty table.
    590     WriteFullTable();
    591     return true;
    592   }
    593 
    594   // This will build the table from history. On the first run, history will
    595   // be empty, so this will be correct. This will also write the new table
    596   // to disk. We don't want to save explicitly here, since the rebuild may
    597   // not complete, leaving us with an empty but valid visited link database.
    598   // In the future, we won't know we need to try rebuilding again.
    599   return RebuildTableFromDelegate();
    600 }
    601 
    602 bool VisitedLinkMaster::ReadFileHeader(FILE* file,
    603                                        int32* num_entries,
    604                                        int32* used_count,
    605                                        uint8 salt[LINK_SALT_LENGTH]) {
    606   DCHECK(persist_to_disk_);
    607 
    608   // Get file size.
    609   // Note that there is no need to seek back to the original location in the
    610   // file since ReadFromFile() [which is the next call accessing the file]
    611   // seeks before reading.
    612   if (fseek(file, 0, SEEK_END) == -1)
    613     return false;
    614   size_t file_size = ftell(file);
    615 
    616   if (file_size <= kFileHeaderSize)
    617     return false;
    618 
    619   uint8 header[kFileHeaderSize];
    620   if (!ReadFromFile(file, 0, &header, kFileHeaderSize))
    621     return false;
    622 
    623   // Verify the signature.
    624   int32 signature;
    625   memcpy(&signature, &header[kFileHeaderSignatureOffset], sizeof(signature));
    626   if (signature != kFileSignature)
    627     return false;
    628 
    629   // Verify the version is up-to-date. As with other read errors, a version
    630   // mistmatch will trigger a rebuild of the database from history, which will
    631   // have the effect of migrating the database.
    632   int32 version;
    633   memcpy(&version, &header[kFileHeaderVersionOffset], sizeof(version));
    634   if (version != kFileCurrentVersion)
    635     return false;  // Bad version.
    636 
    637   // Read the table size and make sure it matches the file size.
    638   memcpy(num_entries, &header[kFileHeaderLengthOffset], sizeof(*num_entries));
    639   if (*num_entries * sizeof(Fingerprint) + kFileHeaderSize != file_size)
    640     return false;  // Bad size.
    641 
    642   // Read the used item count.
    643   memcpy(used_count, &header[kFileHeaderUsedOffset], sizeof(*used_count));
    644   if (*used_count > *num_entries)
    645     return false;  // Bad used item count;
    646 
    647   // Read the salt.
    648   memcpy(salt, &header[kFileHeaderSaltOffset], LINK_SALT_LENGTH);
    649 
    650   // This file looks OK from the header's perspective.
    651   return true;
    652 }
    653 
    654 bool VisitedLinkMaster::GetDatabaseFileName(base::FilePath* filename) {
    655   if (!database_name_override_.empty()) {
    656     // use this filename, the directory must exist
    657     *filename = database_name_override_;
    658     return true;
    659   }
    660 
    661   if (!browser_context_ || browser_context_->GetPath().empty())
    662     return false;
    663 
    664   base::FilePath profile_dir = browser_context_->GetPath();
    665   *filename = profile_dir.Append(FILE_PATH_LITERAL("Visited Links"));
    666   return true;
    667 }
    668 
    669 // Initializes the shared memory structure. The salt should already be filled
    670 // in so that it can be written to the shared memory
    671 bool VisitedLinkMaster::CreateURLTable(int32 num_entries, bool init_to_empty) {
    672   // The table is the size of the table followed by the entries.
    673   uint32 alloc_size = num_entries * sizeof(Fingerprint) + sizeof(SharedHeader);
    674 
    675   // Create the shared memory object.
    676   shared_memory_ = new base::SharedMemory();
    677   if (!shared_memory_)
    678     return false;
    679 
    680   if (!shared_memory_->CreateAndMapAnonymous(alloc_size)) {
    681     delete shared_memory_;
    682     shared_memory_ = NULL;
    683     return false;
    684   }
    685 
    686   if (init_to_empty) {
    687     memset(shared_memory_->memory(), 0, alloc_size);
    688     used_items_ = 0;
    689   }
    690   table_length_ = num_entries;
    691 
    692   // Save the header for other processes to read.
    693   SharedHeader* header = static_cast<SharedHeader*>(shared_memory_->memory());
    694   header->length = table_length_;
    695   memcpy(header->salt, salt_, LINK_SALT_LENGTH);
    696 
    697   // Our table pointer is just the data immediately following the size.
    698   hash_table_ = reinterpret_cast<Fingerprint*>(
    699       static_cast<char*>(shared_memory_->memory()) + sizeof(SharedHeader));
    700 
    701   return true;
    702 }
    703 
    704 bool VisitedLinkMaster::BeginReplaceURLTable(int32 num_entries) {
    705   base::SharedMemory *old_shared_memory = shared_memory_;
    706   Fingerprint* old_hash_table = hash_table_;
    707   int32 old_table_length = table_length_;
    708   if (!CreateURLTable(num_entries, true)) {
    709     // Try to put back the old state.
    710     shared_memory_ = old_shared_memory;
    711     hash_table_ = old_hash_table;
    712     table_length_ = old_table_length;
    713     return false;
    714   }
    715 
    716 #ifndef NDEBUG
    717   DebugValidate();
    718 #endif
    719 
    720   return true;
    721 }
    722 
    723 void VisitedLinkMaster::FreeURLTable() {
    724   if (shared_memory_) {
    725     delete shared_memory_;
    726     shared_memory_ = NULL;
    727   }
    728   if (!persist_to_disk_ || !file_)
    729     return;
    730   PostIOTask(FROM_HERE, base::Bind(&AsyncClose, file_));
    731   // AsyncClose() will close the file and free the memory pointed by |file_|.
    732   file_ = NULL;
    733 }
    734 
    735 bool VisitedLinkMaster::ResizeTableIfNecessary() {
    736   DCHECK(table_length_ > 0) << "Must have a table";
    737 
    738   // Load limits for good performance/space. We are pretty conservative about
    739   // keeping the table not very full. This is because we use linear probing
    740   // which increases the likelihood of clumps of entries which will reduce
    741   // performance.
    742   const float max_table_load = 0.5f;  // Grow when we're > this full.
    743   const float min_table_load = 0.2f;  // Shrink when we're < this full.
    744 
    745   float load = ComputeTableLoad();
    746   if (load < max_table_load &&
    747       (table_length_ <= static_cast<float>(kDefaultTableSize) ||
    748        load > min_table_load))
    749     return false;
    750 
    751   // Table needs to grow or shrink.
    752   int new_size = NewTableSizeForCount(used_items_);
    753   DCHECK(new_size > used_items_);
    754   DCHECK(load <= min_table_load || new_size > table_length_);
    755   ResizeTable(new_size);
    756   return true;
    757 }
    758 
    759 void VisitedLinkMaster::ResizeTable(int32 new_size) {
    760   DCHECK(shared_memory_ && shared_memory_->memory() && hash_table_);
    761   shared_memory_serial_++;
    762 
    763 #ifndef NDEBUG
    764   DebugValidate();
    765 #endif
    766 
    767   base::SharedMemory* old_shared_memory = shared_memory_;
    768   Fingerprint* old_hash_table = hash_table_;
    769   int32 old_table_length = table_length_;
    770   if (!BeginReplaceURLTable(new_size))
    771     return;
    772 
    773   // Now we have two tables, our local copy which is the old one, and the new
    774   // one loaded into this object where we need to copy the data.
    775   for (int32 i = 0; i < old_table_length; i++) {
    776     Fingerprint cur = old_hash_table[i];
    777     if (cur)
    778       AddFingerprint(cur, false);
    779   }
    780 
    781   // On error unmapping, just forget about it since we can't do anything
    782   // else to release it.
    783   delete old_shared_memory;
    784 
    785   // Send an update notification to all child processes so they read the new
    786   // table.
    787   listener_->NewTable(shared_memory_);
    788 
    789 #ifndef NDEBUG
    790   DebugValidate();
    791 #endif
    792 
    793   // The new table needs to be written to disk.
    794   if (persist_to_disk_)
    795     WriteFullTable();
    796 }
    797 
    798 uint32 VisitedLinkMaster::NewTableSizeForCount(int32 item_count) const {
    799   // These table sizes are selected to be the maximum prime number less than
    800   // a "convenient" multiple of 1K.
    801   static const int table_sizes[] = {
    802       16381,    // 16K  = 16384   <- don't shrink below this table size
    803                 //                   (should be == default_table_size)
    804       32767,    // 32K  = 32768
    805       65521,    // 64K  = 65536
    806       130051,   // 128K = 131072
    807       262127,   // 256K = 262144
    808       524269,   // 512K = 524288
    809       1048549,  // 1M   = 1048576
    810       2097143,  // 2M   = 2097152
    811       4194301,  // 4M   = 4194304
    812       8388571,  // 8M   = 8388608
    813       16777199,  // 16M  = 16777216
    814       33554347};  // 32M  = 33554432
    815 
    816   // Try to leave the table 33% full.
    817   int desired = item_count * 3;
    818 
    819   // Find the closest prime.
    820   for (size_t i = 0; i < arraysize(table_sizes); i ++) {
    821     if (table_sizes[i] > desired)
    822       return table_sizes[i];
    823   }
    824 
    825   // Growing very big, just approximate a "good" number, not growing as much
    826   // as normal.
    827   return item_count * 2 - 1;
    828 }
    829 
    830 // See the TableBuilder definition in the header file for how this works.
    831 bool VisitedLinkMaster::RebuildTableFromDelegate() {
    832   DCHECK(!table_builder_.get());
    833 
    834   // TODO(brettw) make sure we have reasonable salt!
    835   table_builder_ = new TableBuilder(this, salt_);
    836   delegate_->RebuildTable(table_builder_);
    837   return true;
    838 }
    839 
    840 // See the TableBuilder declaration above for how this works.
    841 void VisitedLinkMaster::OnTableRebuildComplete(
    842     bool success,
    843     const std::vector<Fingerprint>& fingerprints) {
    844   if (success) {
    845     // Replace the old table with a new blank one.
    846     shared_memory_serial_++;
    847 
    848     // We are responsible for freeing it AFTER it has been replaced if
    849     // replacement succeeds.
    850     base::SharedMemory* old_shared_memory = shared_memory_;
    851 
    852     int new_table_size = NewTableSizeForCount(
    853         static_cast<int>(fingerprints.size() + added_since_rebuild_.size()));
    854     if (BeginReplaceURLTable(new_table_size)) {
    855       // Free the old table.
    856       delete old_shared_memory;
    857 
    858       // Add the stored fingerprints to the hash table.
    859       for (size_t i = 0; i < fingerprints.size(); i++)
    860         AddFingerprint(fingerprints[i], false);
    861 
    862       // Also add anything that was added while we were asynchronously
    863       // generating the new table.
    864       for (std::set<Fingerprint>::iterator i = added_since_rebuild_.begin();
    865            i != added_since_rebuild_.end(); ++i)
    866         AddFingerprint(*i, false);
    867       added_since_rebuild_.clear();
    868 
    869       // Now handle deletions.
    870       DeleteFingerprintsFromCurrentTable(deleted_since_rebuild_);
    871       deleted_since_rebuild_.clear();
    872 
    873       // Send an update notification to all child processes.
    874       listener_->NewTable(shared_memory_);
    875 
    876       if (persist_to_disk_)
    877         WriteFullTable();
    878     }
    879   }
    880   table_builder_ = NULL;  // Will release our reference to the builder.
    881 
    882   // Notify the unit test that the rebuild is complete (will be NULL in prod.)
    883   if (!rebuild_complete_task_.is_null()) {
    884     rebuild_complete_task_.Run();
    885     rebuild_complete_task_.Reset();
    886   }
    887 }
    888 
    889 void VisitedLinkMaster::WriteToFile(FILE** file,
    890                                     off_t offset,
    891                                     void* data,
    892                                     int32 data_size) {
    893   DCHECK(persist_to_disk_);
    894 #ifndef NDEBUG
    895   posted_asynchronous_operation_ = true;
    896 #endif
    897   PostIOTask(FROM_HERE,
    898       base::Bind(&AsyncWrite, file, offset,
    899                  std::string(static_cast<const char*>(data), data_size)));
    900 }
    901 
    902 void VisitedLinkMaster::WriteUsedItemCountToFile() {
    903   DCHECK(persist_to_disk_);
    904   if (!file_)
    905     return;  // See comment on the file_ variable for why this might happen.
    906   WriteToFile(file_, kFileHeaderUsedOffset, &used_items_, sizeof(used_items_));
    907 }
    908 
    909 void VisitedLinkMaster::WriteHashRangeToFile(Hash first_hash, Hash last_hash) {
    910   DCHECK(persist_to_disk_);
    911 
    912   if (!file_)
    913     return;  // See comment on the file_ variable for why this might happen.
    914   if (last_hash < first_hash) {
    915     // Handle wraparound at 0. This first write is first_hash->EOF
    916     WriteToFile(file_, first_hash * sizeof(Fingerprint) + kFileHeaderSize,
    917                 &hash_table_[first_hash],
    918                 (table_length_ - first_hash + 1) * sizeof(Fingerprint));
    919 
    920     // Now do 0->last_lash.
    921     WriteToFile(file_, kFileHeaderSize, hash_table_,
    922                 (last_hash + 1) * sizeof(Fingerprint));
    923   } else {
    924     // Normal case, just write the range.
    925     WriteToFile(file_, first_hash * sizeof(Fingerprint) + kFileHeaderSize,
    926                 &hash_table_[first_hash],
    927                 (last_hash - first_hash + 1) * sizeof(Fingerprint));
    928   }
    929 }
    930 
    931 bool VisitedLinkMaster::ReadFromFile(FILE* file,
    932                                      off_t offset,
    933                                      void* data,
    934                                      size_t data_size) {
    935   DCHECK(persist_to_disk_);
    936 #ifndef NDEBUG
    937   // Since this function is synchronous, we require that no asynchronous
    938   // operations could possibly be pending.
    939   DCHECK(!posted_asynchronous_operation_);
    940 #endif
    941 
    942   if (fseek(file, offset, SEEK_SET) != 0)
    943     return false;
    944 
    945   size_t num_read = fread(data, 1, data_size, file);
    946   return num_read == data_size;
    947 }
    948 
    949 // VisitedLinkTableBuilder ----------------------------------------------------
    950 
    951 VisitedLinkMaster::TableBuilder::TableBuilder(
    952     VisitedLinkMaster* master,
    953     const uint8 salt[LINK_SALT_LENGTH])
    954     : master_(master),
    955       success_(true) {
    956   fingerprints_.reserve(4096);
    957   memcpy(salt_, salt, LINK_SALT_LENGTH * sizeof(uint8));
    958 }
    959 
    960 // TODO(brettw): Do we want to try to cancel the request if this happens? It
    961 // could delay shutdown if there are a lot of URLs.
    962 void VisitedLinkMaster::TableBuilder::DisownMaster() {
    963   master_ = NULL;
    964 }
    965 
    966 void VisitedLinkMaster::TableBuilder::OnURL(const GURL& url) {
    967   if (!url.is_empty()) {
    968     fingerprints_.push_back(VisitedLinkMaster::ComputeURLFingerprint(
    969         url.spec().data(), url.spec().length(), salt_));
    970   }
    971 }
    972 
    973 void VisitedLinkMaster::TableBuilder::OnComplete(bool success) {
    974   success_ = success;
    975   DLOG_IF(WARNING, !success) << "Unable to rebuild visited links";
    976 
    977   // Marshal to the main thread to notify the VisitedLinkMaster that the
    978   // rebuild is complete.
    979   BrowserThread::PostTask(
    980       BrowserThread::UI, FROM_HERE,
    981       base::Bind(&TableBuilder::OnCompleteMainThread, this));
    982 }
    983 
    984 void VisitedLinkMaster::TableBuilder::OnCompleteMainThread() {
    985   if (master_)
    986     master_->OnTableRebuildComplete(success_, fingerprints_);
    987 }
    988 
    989 }  // namespace visitedlink
    990