Home | History | Annotate | Download | only in browser
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #ifndef COMPONENTS_VISITEDLINK_BROWSER_VISITEDLINK_MASTER_H_
      6 #define COMPONENTS_VISITEDLINK_BROWSER_VISITEDLINK_MASTER_H_
      7 
      8 #if defined(OS_WIN)
      9 #include <windows.h>
     10 #endif
     11 #include <set>
     12 #include <vector>
     13 
     14 #include "base/callback.h"
     15 #include "base/callback_forward.h"
     16 #include "base/files/file_path.h"
     17 #include "base/gtest_prod_util.h"
     18 #include "base/memory/shared_memory.h"
     19 #include "base/threading/sequenced_worker_pool.h"
     20 #include "components/visitedlink/common/visitedlink_common.h"
     21 
     22 #if defined(UNIT_TEST) || defined(PERF_TEST) || !defined(NDEBUG)
     23 #include "base/logging.h"
     24 #endif
     25 
     26 class GURL;
     27 
     28 namespace content {
     29 class BrowserContext;
     30 }
     31 
     32 namespace visitedlink {
     33 
     34 class VisitedLinkDelegate;
     35 
     36 // Controls the link coloring database. The master controls all writing to the
     37 // database as well as disk I/O. There should be only one master.
     38 //
     39 // This class will defer writing operations to the file thread. This means that
     40 // class destruction, the file may still be open since operations are pending on
     41 // another thread.
     42 class VisitedLinkMaster : public VisitedLinkCommon {
     43  public:
     44   // Listens to the link coloring database events. The master is given this
     45   // event as a constructor argument and dispatches events using it.
     46   class Listener {
     47    public:
     48     virtual ~Listener() {}
     49 
     50     // Called when link coloring database has been created or replaced. The
     51     // argument is the new table handle.
     52     virtual void NewTable(base::SharedMemory*) = 0;
     53 
     54     // Called when new link has been added. The argument is the fingerprint
     55     // (hash) of the link.
     56     virtual void Add(Fingerprint fingerprint) = 0;
     57 
     58     // Called when link coloring state has been reset. This may occur when
     59     // entire or parts of history were deleted.
     60     virtual void Reset() = 0;
     61   };
     62 
     63   VisitedLinkMaster(content::BrowserContext* browser_context,
     64                     VisitedLinkDelegate* delegate,
     65                     bool persist_to_disk);
     66 
     67   // In unit test mode, we allow the caller to optionally specify the database
     68   // filename so that it can be run from a unit test. The directory where this
     69   // file resides must exist in this mode. You can also specify the default
     70   // table size to test table resizing. If this parameter is 0, we will use the
     71   // defaults.
     72   //
     73   // In the unit test mode, we also allow the caller to provide a history
     74   // service pointer (the history service can't be fetched from the browser
     75   // process when we're in unit test mode). This can be NULL to try to access
     76   // the main version, which will probably fail (which can be good for testing
     77   // this failure mode).
     78   //
     79   // When |suppress_rebuild| is set, we'll not attempt to load data from
     80   // history if the file can't be loaded. This should generally be set for
     81   // testing except when you want to test the rebuild process explicitly.
     82   VisitedLinkMaster(Listener* listener,
     83                     VisitedLinkDelegate* delegate,
     84                     bool persist_to_disk,
     85                     bool suppress_rebuild,
     86                     const base::FilePath& filename,
     87                     int32 default_table_size);
     88   virtual ~VisitedLinkMaster();
     89 
     90   // Must be called immediately after object creation. Nothing else will work
     91   // until this is called. Returns true on success, false means that this
     92   // object won't work.
     93   bool Init();
     94 
     95   base::SharedMemory* shared_memory() { return shared_memory_; }
     96 
     97   // Adds a URL to the table.
     98   void AddURL(const GURL& url);
     99 
    100   // Adds a set of URLs to the table.
    101   void AddURLs(const std::vector<GURL>& url);
    102 
    103   // See DeleteURLs.
    104   class URLIterator {
    105    public:
    106     // HasNextURL must return true when this is called. Returns the next URL
    107     // then advances the iterator. Note that the returned reference is only
    108     // valid until the next call of NextURL.
    109     virtual const GURL& NextURL() = 0;
    110 
    111     // Returns true if still has URLs to be iterated.
    112     virtual bool HasNextURL() const = 0;
    113 
    114    protected:
    115     virtual ~URLIterator() {}
    116   };
    117 
    118   // Deletes the specified URLs from |rows| from the table.
    119   void DeleteURLs(URLIterator* iterator);
    120 
    121   // Clears the visited links table by deleting the file from disk. Used as
    122   // part of history clearing.
    123   void DeleteAllURLs();
    124 
    125   // Returns the Delegate of this Master.
    126   VisitedLinkDelegate* GetDelegate();
    127 
    128 #if defined(UNIT_TEST) || !defined(NDEBUG) || defined(PERF_TEST)
    129   // This is a debugging function that can be called to double-check internal
    130   // data structures. It will assert if the check fails.
    131   void DebugValidate();
    132 
    133   // Sets a task to execute when the next rebuild from history is complete.
    134   // This is used by unit tests to wait for the rebuild to complete before
    135   // they continue. The pointer will be owned by this object after the call.
    136   void set_rebuild_complete_task(const base::Closure& task) {
    137     DCHECK(rebuild_complete_task_.is_null());
    138     rebuild_complete_task_ = task;
    139   }
    140 
    141   // returns the number of items in the table for testing verification
    142   int32 GetUsedCount() const {
    143     return used_items_;
    144   }
    145 
    146   // Returns the listener.
    147   VisitedLinkMaster::Listener* GetListener() const {
    148     return listener_.get();
    149   }
    150 
    151   // Call to cause the entire database file to be re-written from scratch
    152   // to disk. Used by the performance tester.
    153   void RewriteFile() {
    154     WriteFullTable();
    155   }
    156 #endif
    157 
    158  private:
    159   FRIEND_TEST_ALL_PREFIXES(VisitedLinkTest, Delete);
    160   FRIEND_TEST_ALL_PREFIXES(VisitedLinkTest, BigDelete);
    161   FRIEND_TEST_ALL_PREFIXES(VisitedLinkTest, BigImport);
    162 
    163   // Object to rebuild the table on the history thread (see the .cc file).
    164   class TableBuilder;
    165 
    166   // Byte offsets of values in the header.
    167   static const int32 kFileHeaderSignatureOffset;
    168   static const int32 kFileHeaderVersionOffset;
    169   static const int32 kFileHeaderLengthOffset;
    170   static const int32 kFileHeaderUsedOffset;
    171   static const int32 kFileHeaderSaltOffset;
    172 
    173   // The signature at the beginning of a file.
    174   static const int32 kFileSignature;
    175 
    176   // version of the file format this module currently uses
    177   static const int32 kFileCurrentVersion;
    178 
    179   // Bytes in the file header, including the salt.
    180   static const size_t kFileHeaderSize;
    181 
    182   // When creating a fresh new table, we use this many entries.
    183   static const unsigned kDefaultTableSize;
    184 
    185   // When the user is deleting a boatload of URLs, we don't really want to do
    186   // individual writes for each of them. When the count exceeds this threshold,
    187   // we will write the whole table to disk at once instead of individual items.
    188   static const size_t kBigDeleteThreshold;
    189 
    190   // Backend for the constructors initializing the members.
    191   void InitMembers();
    192 
    193   // If a rebuild is in progress, we save the URL in the temporary list.
    194   // Otherwise, we add this to the table. Returns the index of the
    195   // inserted fingerprint or null_hash_ on failure.
    196   Hash TryToAddURL(const GURL& url);
    197 
    198   // File I/O functions
    199   // ------------------
    200   // These functions are only called if |persist_to_disk_| is true.
    201 
    202   // Posts the given task to the blocking worker pool with our options.
    203   void PostIOTask(const tracked_objects::Location& from_here,
    204                   const base::Closure& task);
    205 
    206   // Writes the entire table to disk. It will leave the table file open and
    207   // the handle to it will be stored in file_.
    208   void WriteFullTable();
    209 
    210   // Try to load the table from the database file. If the file doesn't exist or
    211   // is corrupt, this will return failure.
    212   bool InitFromFile();
    213 
    214   // Reads the header of the link coloring database from disk. Assumes the
    215   // file pointer is at the beginning of the file and that there are no pending
    216   // asynchronous I/O operations.
    217   //
    218   // Returns true on success and places the size of the table in num_entries
    219   // and the number of nonzero fingerprints in used_count. This will fail if
    220   // the version of the file is not the current version of the database.
    221   bool ReadFileHeader(FILE* hfile, int32* num_entries, int32* used_count,
    222                       uint8 salt[LINK_SALT_LENGTH]);
    223 
    224   // Fills *filename with the name of the link database filename
    225   bool GetDatabaseFileName(base::FilePath* filename);
    226 
    227   // Wrapper around Window's WriteFile using asynchronous I/O. This will proxy
    228   // the write to a background thread.
    229   void WriteToFile(FILE** hfile, off_t offset, void* data, int32 data_size);
    230 
    231   // Helper function to schedule and asynchronous write of the used count to
    232   // disk (this is a common operation).
    233   void WriteUsedItemCountToFile();
    234 
    235   // Helper function to schedule an asynchronous write of the given range of
    236   // hash functions to disk. The range is inclusive on both ends. The range can
    237   // wrap around at 0 and this function will handle it.
    238   void WriteHashRangeToFile(Hash first_hash, Hash last_hash);
    239 
    240   // Synchronous read from the file. Assumes there are no pending asynchronous
    241   // I/O functions. Returns true if the entire buffer was successfully filled.
    242   bool ReadFromFile(FILE* hfile, off_t offset, void* data, size_t data_size);
    243 
    244   // General table handling
    245   // ----------------------
    246 
    247   // Called to add a fingerprint to the table. If |send_notifications| is true
    248   // and the item is added successfully, Listener::Add will be invoked.
    249   // Returns the index of the inserted fingerprint or null_hash_ if there was a
    250   // duplicate and this item was skippped.
    251   Hash AddFingerprint(Fingerprint fingerprint, bool send_notifications);
    252 
    253   // Deletes all fingerprints from the given vector from the current hash table
    254   // and syncs it to disk if there are changes. This does not update the
    255   // deleted_since_rebuild_ list, the caller must update this itself if there
    256   // is an update pending.
    257   void DeleteFingerprintsFromCurrentTable(
    258       const std::set<Fingerprint>& fingerprints);
    259 
    260   // Removes the indicated fingerprint from the table. If the update_file flag
    261   // is set, the changes will also be written to disk. Returns true if the
    262   // fingerprint was deleted, false if it was not in the table to delete.
    263   bool DeleteFingerprint(Fingerprint fingerprint, bool update_file);
    264 
    265   // Creates a new empty table, call if InitFromFile() fails. Normally, when
    266   // |suppress_rebuild| is false, the table will be rebuilt from history,
    267   // keeping us in sync. When |suppress_rebuild| is true, the new table will be
    268   // empty and we will not consult history. This is used when clearing the
    269   // database and for unit tests.
    270   bool InitFromScratch(bool suppress_rebuild);
    271 
    272   // Allocates the Fingerprint structure and length. When init_to_empty is set,
    273   // the table will be filled with 0s and used_items_ will be set to 0 as well.
    274   // If the flag is not set, these things are untouched and it is the
    275   // responsibility of the caller to fill them (like when we are reading from
    276   // a file).
    277   bool CreateURLTable(int32 num_entries, bool init_to_empty);
    278 
    279   // A wrapper for CreateURLTable, this will allocate a new table, initialized
    280   // to empty. The caller is responsible for saving the shared memory pointer
    281   // and handles before this call (they will be replaced with new ones) and
    282   // releasing them later. This is designed for callers that make a new table
    283   // and then copy values from the old table to the new one, then release the
    284   // old table.
    285   //
    286   // Returns true on success. On failure, the old table will be restored. The
    287   // caller should not attemp to release the pointer/handle in this case.
    288   bool BeginReplaceURLTable(int32 num_entries);
    289 
    290   // unallocates the Fingerprint table
    291   void FreeURLTable();
    292 
    293   // For growing the table. ResizeTableIfNecessary will check to see if the
    294   // table should be resized and calls ResizeTable if needed. Returns true if
    295   // we decided to resize the table.
    296   bool ResizeTableIfNecessary();
    297 
    298   // Resizes the table (growing or shrinking) as necessary to accomodate the
    299   // current count.
    300   void ResizeTable(int32 new_size);
    301 
    302   // Returns the desired table size for |item_count| URLs.
    303   uint32 NewTableSizeForCount(int32 item_count) const;
    304 
    305   // Computes the table load as fraction. For example, if 1/4 of the entries are
    306   // full, this value will be 0.25
    307   float ComputeTableLoad() const {
    308     return static_cast<float>(used_items_) / static_cast<float>(table_length_);
    309   }
    310 
    311   // Initializes a rebuild of the visited link database based on the browser
    312   // history. This will set table_builder_ while working, and there should not
    313   // already be a rebuild in place when called. See the definition for more
    314   // details on how this works.
    315   //
    316   // Returns true on success. Failure means we're not attempting to rebuild
    317   // the database because something failed.
    318   bool RebuildTableFromDelegate();
    319 
    320   // Callback that the table rebuilder uses when the rebuild is complete.
    321   // |success| is true if the fingerprint generation succeeded, in which case
    322   // |fingerprints| will contain the computed fingerprints. On failure, there
    323   // will be no fingerprints.
    324   void OnTableRebuildComplete(bool success,
    325                               const std::vector<Fingerprint>& fingerprints);
    326 
    327   // Increases or decreases the given hash value by one, wrapping around as
    328   // necessary. Used for probing.
    329   inline Hash IncrementHash(Hash hash) {
    330     if (hash >= table_length_ - 1)
    331       return 0;  // Wrap around.
    332     return hash + 1;
    333   }
    334   inline Hash DecrementHash(Hash hash) {
    335     if (hash <= 0)
    336       return table_length_ - 1;  // Wrap around.
    337     return hash - 1;
    338   }
    339 
    340 #ifndef NDEBUG
    341   // Indicates whether any asynchronous operation has ever been completed.
    342   // We do some synchronous reads that require that no asynchronous operations
    343   // are pending, yet we don't track whether they have been completed. This
    344   // flag is a sanity check that these reads only happen before any
    345   // asynchronous writes have been fired.
    346   bool posted_asynchronous_operation_;
    347 #endif
    348 
    349   // Reference to the browser context that this object belongs to
    350   // (it knows the path to where the data is stored)
    351   content::BrowserContext* browser_context_;
    352 
    353   // Client owns the delegate and is responsible for it being valid through
    354   // the life time this VisitedLinkMaster.
    355   VisitedLinkDelegate* delegate_;
    356 
    357   // VisitedLinkEventListener to handle incoming events.
    358   scoped_ptr<Listener> listener_;
    359 
    360   // Lazily initialized sequence token for posting file tasks.
    361   base::SequencedWorkerPool::SequenceToken sequence_token_;
    362 
    363   // When non-NULL, indicates we are in database rebuild mode and points to
    364   // the class collecting fingerprint information from the history system.
    365   // The pointer is owned by this class, but it must remain valid while the
    366   // history query is running. We must only delete it when the query is done.
    367   scoped_refptr<TableBuilder> table_builder_;
    368 
    369   // Indicates URLs added and deleted since we started rebuilding the table.
    370   std::set<Fingerprint> added_since_rebuild_;
    371   std::set<Fingerprint> deleted_since_rebuild_;
    372 
    373   // TODO(brettw) Support deletion, we need to track whether anything was
    374   // deleted during the rebuild here. Then we should delete any of these
    375   // entries from the complete table later.
    376   // std::vector<Fingerprint> removed_since_rebuild_;
    377 
    378   // The currently open file with the table in it. This may be NULL if we're
    379   // rebuilding and haven't written a new version yet or if |persist_to_disk_|
    380   // is false. Writing to the file may be safely ignored in this case. Also
    381   // |file_| may be non-NULL but point to a NULL pointer. That would mean that
    382   // opening of the file is already scheduled in a background thread and any
    383   // writing to the file can also be scheduled to the background thread as it's
    384   // guaranteed to be executed after the opening.
    385   // The class owns both the |file_| pointer and the pointer pointed
    386   // by |*file_|.
    387   FILE** file_;
    388 
    389   // If true, will try to persist the hash table to disk. Will rebuild from
    390   // VisitedLinkDelegate::RebuildTable if there are disk corruptions.
    391   bool persist_to_disk_;
    392 
    393   // Shared memory consists of a SharedHeader followed by the table.
    394   base::SharedMemory *shared_memory_;
    395 
    396   // When we generate new tables, we increment the serial number of the
    397   // shared memory object.
    398   int32 shared_memory_serial_;
    399 
    400   // Number of non-empty items in the table, used to compute fullness.
    401   int32 used_items_;
    402 
    403   // Testing values -----------------------------------------------------------
    404   //
    405   // The following fields exist for testing purposes. They are not used in
    406   // release builds. It'd be nice to eliminate them in release builds, but we
    407   // don't want to change the signature of the object between the unit test and
    408   // regular builds. Instead, we just have "default" values that these take
    409   // in release builds that give "regular" behavior.
    410 
    411   // Overridden database file name for testing
    412   base::FilePath database_name_override_;
    413 
    414   // When nonzero, overrides the table size for new databases for testing
    415   int32 table_size_override_;
    416 
    417   // When set, indicates the task that should be run after the next rebuild from
    418   // history is complete.
    419   base::Closure rebuild_complete_task_;
    420 
    421   // Set to prevent us from attempting to rebuild the database from global
    422   // history if we have an error opening the file. This is used for testing,
    423   // will be false in production.
    424   bool suppress_rebuild_;
    425 
    426   DISALLOW_COPY_AND_ASSIGN(VisitedLinkMaster);
    427 };
    428 
    429 // NOTE: These methods are defined inline here, so we can share the compilation
    430 //       of visitedlink_master.cc between the browser and the unit/perf tests.
    431 
    432 #if defined(UNIT_TEST) || defined(PERF_TEST) || !defined(NDEBUG)
    433 inline void VisitedLinkMaster::DebugValidate() {
    434   int32 used_count = 0;
    435   for (int32 i = 0; i < table_length_; i++) {
    436     if (hash_table_[i])
    437       used_count++;
    438   }
    439   DCHECK_EQ(used_count, used_items_);
    440 }
    441 #endif
    442 
    443 }  // namespace visitedlink
    444 
    445 #endif  // COMPONENTS_VISITEDLINK_BROWSER_VISITEDLINK_MASTER_H_
    446