Home | History | Annotate | Download | only in visitedlink
      1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #ifndef CHROME_BROWSER_VISITEDLINK_VISITEDLINK_MASTER_H_
      6 #define CHROME_BROWSER_VISITEDLINK_VISITEDLINK_MASTER_H_
      7 #pragma once
      8 
      9 #if defined(OS_WIN)
     10 #include <windows.h>
     11 #endif
     12 #include <set>
     13 #include <vector>
     14 
     15 #include "base/file_path.h"
     16 #include "base/gtest_prod_util.h"
     17 #include "base/memory/ref_counted.h"
     18 #include "base/shared_memory.h"
     19 #include "chrome/browser/history/history.h"
     20 #include "chrome/common/visitedlink_common.h"
     21 
     22 class GURL;
     23 class Profile;
     24 
     25 // Controls the link coloring database. The master controls all writing to the
     26 // database as well as disk I/O. There should be only one master.
     27 //
     28 // This class will defer writing operations to the file thread. This means that
     29 // class destruction, the file may still be open since operations are pending on
     30 // another thread.
     31 class VisitedLinkMaster : public VisitedLinkCommon {
     32  public:
     33   // Listens to the link coloring database events. The master is given this
     34   // event as a constructor argument and dispatches events using it.
     35   class Listener {
     36    public:
     37     virtual ~Listener() {}
     38 
     39     // Called when link coloring database has been created or replaced. The
     40     // argument is the new table handle.
     41     virtual void NewTable(base::SharedMemory*) = 0;
     42 
     43     // Called when new link has been added. The argument is the fingerprint
     44     // (hash) of the link.
     45     virtual void Add(Fingerprint fingerprint) = 0;
     46 
     47     // Called when link coloring state has been reset. This may occur when
     48     // entire or parts of history were deleted.
     49     virtual void Reset() = 0;
     50   };
     51 
     52   // The |listener| may not be NULL.
     53   VisitedLinkMaster(Listener* listener, Profile* profile);
     54 
     55   // In unit test mode, we allow the caller to optionally specify the database
     56   // filename so that it can be run from a unit test. The directory where this
     57   // file resides must exist in this mode. You can also specify the default
     58   // table size to test table resizing. If this parameter is 0, we will use the
     59   // defaults.
     60   //
     61   // In the unit test mode, we also allow the caller to provide a history
     62   // service pointer (the history service can't be fetched from the browser
     63   // process when we're in unit test mode). This can be NULL to try to access
     64   // the main version, which will probably fail (which can be good for testing
     65   // this failure mode).
     66   //
     67   // When |suppress_rebuild| is set, we'll not attempt to load data from
     68   // history if the file can't be loaded. This should generally be set for
     69   // testing except when you want to test the rebuild process explicitly.
     70   VisitedLinkMaster(Listener* listener,
     71                     HistoryService* history_service,
     72                     bool suppress_rebuild,
     73                     const FilePath& filename,
     74                     int32 default_table_size);
     75   virtual ~VisitedLinkMaster();
     76 
     77   // Must be called immediately after object creation. Nothing else will work
     78   // until this is called. Returns true on success, false means that this
     79   // object won't work.
     80   bool Init();
     81 
     82   base::SharedMemory* shared_memory() { return shared_memory_; }
     83 
     84   // Adds a URL to the table.
     85   void AddURL(const GURL& url);
     86 
     87   // Adds a set of URLs to the table.
     88   void AddURLs(const std::vector<GURL>& url);
     89 
     90   // Deletes the specified URLs from the table.
     91   void DeleteURLs(const std::set<GURL>& urls);
     92 
     93   // Clears the visited links table by deleting the file from disk. Used as
     94   // part of history clearing.
     95   void DeleteAllURLs();
     96 
     97 #if defined(UNIT_TEST) || !defined(NDEBUG) || defined(PERF_TEST)
     98   // This is a debugging function that can be called to double-check internal
     99   // data structures. It will assert if the check fails.
    100   void DebugValidate();
    101 
    102   // Sets a task to execute when the next rebuild from history is complete.
    103   // This is used by unit tests to wait for the rebuild to complete before
    104   // they continue. The pointer will be owned by this object after the call.
    105   void set_rebuild_complete_task(Task* task) {
    106     DCHECK(!rebuild_complete_task_.get());
    107     rebuild_complete_task_.reset(task);
    108   }
    109 
    110   // returns the number of items in the table for testing verification
    111   int32 GetUsedCount() const {
    112     return used_items_;
    113   }
    114 
    115   // Call to cause the entire database file to be re-written from scratch
    116   // to disk. Used by the performance tester.
    117   bool RewriteFile() {
    118     return WriteFullTable();
    119   }
    120 #endif
    121 
    122  private:
    123   FRIEND_TEST_ALL_PREFIXES(VisitedLinkTest, Delete);
    124   FRIEND_TEST_ALL_PREFIXES(VisitedLinkTest, BigDelete);
    125   FRIEND_TEST_ALL_PREFIXES(VisitedLinkTest, BigImport);
    126 
    127   // Object to rebuild the table on the history thread (see the .cc file).
    128   class TableBuilder;
    129 
    130   // Byte offsets of values in the header.
    131   static const int32 kFileHeaderSignatureOffset;
    132   static const int32 kFileHeaderVersionOffset;
    133   static const int32 kFileHeaderLengthOffset;
    134   static const int32 kFileHeaderUsedOffset;
    135   static const int32 kFileHeaderSaltOffset;
    136 
    137   // The signature at the beginning of a file.
    138   static const int32 kFileSignature;
    139 
    140   // version of the file format this module currently uses
    141   static const int32 kFileCurrentVersion;
    142 
    143   // Bytes in the file header, including the salt.
    144   static const size_t kFileHeaderSize;
    145 
    146   // When creating a fresh new table, we use this many entries.
    147   static const unsigned kDefaultTableSize;
    148 
    149   // When the user is deleting a boatload of URLs, we don't really want to do
    150   // individual writes for each of them. When the count exceeds this threshold,
    151   // we will write the whole table to disk at once instead of individual items.
    152   static const size_t kBigDeleteThreshold;
    153 
    154   // Backend for the constructors initializing the members.
    155   void InitMembers(Listener* listener, Profile* profile);
    156 
    157   // If a rebuild is in progress, we save the URL in the temporary list.
    158   // Otherwise, we add this to the table. Returns the index of the
    159   // inserted fingerprint or null_hash_ on failure.
    160   Hash TryToAddURL(const GURL& url);
    161 
    162   // File I/O functions
    163   // ------------------
    164 
    165   // Writes the entire table to disk, returning true on success. It will leave
    166   // the table file open and the handle to it in file_
    167   bool WriteFullTable();
    168 
    169   // Try to load the table from the database file. If the file doesn't exist or
    170   // is corrupt, this will return failure.
    171   bool InitFromFile();
    172 
    173   // Reads the header of the link coloring database from disk. Assumes the
    174   // file pointer is at the beginning of the file and that there are no pending
    175   // asynchronous I/O operations.
    176   //
    177   // Returns true on success and places the size of the table in num_entries
    178   // and the number of nonzero fingerprints in used_count. This will fail if
    179   // the version of the file is not the current version of the database.
    180   bool ReadFileHeader(FILE* hfile, int32* num_entries, int32* used_count,
    181                       uint8 salt[LINK_SALT_LENGTH]);
    182 
    183   // Fills *filename with the name of the link database filename
    184   bool GetDatabaseFileName(FilePath* filename);
    185 
    186   // Wrapper around Window's WriteFile using asynchronous I/O. This will proxy
    187   // the write to a background thread.
    188   void WriteToFile(FILE* hfile, off_t offset, void* data, int32 data_size);
    189 
    190   // Helper function to schedule and asynchronous write of the used count to
    191   // disk (this is a common operation).
    192   void WriteUsedItemCountToFile();
    193 
    194   // Helper function to schedule an asynchronous write of the given range of
    195   // hash functions to disk. The range is inclusive on both ends. The range can
    196   // wrap around at 0 and this function will handle it.
    197   void WriteHashRangeToFile(Hash first_hash, Hash last_hash);
    198 
    199   // Synchronous read from the file. Assumes there are no pending asynchronous
    200   // I/O functions. Returns true if the entire buffer was successfully filled.
    201   bool ReadFromFile(FILE* hfile, off_t offset, void* data, size_t data_size);
    202 
    203   // General table handling
    204   // ----------------------
    205 
    206   // Called to add a fingerprint to the table. If |send_notifications| is true
    207   // and the item is added successfully, Listener::Add will be invoked.
    208   // Returns the index of the inserted fingerprint or null_hash_ if there was a
    209   // duplicate and this item was skippped.
    210   Hash AddFingerprint(Fingerprint fingerprint, bool send_notifications);
    211 
    212   // Deletes all fingerprints from the given vector from the current hash table
    213   // and syncs it to disk if there are changes. This does not update the
    214   // deleted_since_rebuild_ list, the caller must update this itself if there
    215   // is an update pending.
    216   void DeleteFingerprintsFromCurrentTable(
    217       const std::set<Fingerprint>& fingerprints);
    218 
    219   // Removes the indicated fingerprint from the table. If the update_file flag
    220   // is set, the changes will also be written to disk. Returns true if the
    221   // fingerprint was deleted, false if it was not in the table to delete.
    222   bool DeleteFingerprint(Fingerprint fingerprint, bool update_file);
    223 
    224   // Creates a new empty table, call if InitFromFile() fails. Normally, when
    225   // |suppress_rebuild| is false, the table will be rebuilt from history,
    226   // keeping us in sync. When |suppress_rebuild| is true, the new table will be
    227   // empty and we will not consult history. This is used when clearing the
    228   // database and for unit tests.
    229   bool InitFromScratch(bool suppress_rebuild);
    230 
    231   // Allocates the Fingerprint structure and length. When init_to_empty is set,
    232   // the table will be filled with 0s and used_items_ will be set to 0 as well.
    233   // If the flag is not set, these things are untouched and it is the
    234   // responsibility of the caller to fill them (like when we are reading from
    235   // a file).
    236   bool CreateURLTable(int32 num_entries, bool init_to_empty);
    237 
    238   // A wrapper for CreateURLTable, this will allocate a new table, initialized
    239   // to empty. The caller is responsible for saving the shared memory pointer
    240   // and handles before this call (they will be replaced with new ones) and
    241   // releasing them later. This is designed for callers that make a new table
    242   // and then copy values from the old table to the new one, then release the
    243   // old table.
    244   //
    245   // Returns true on success. On failure, the old table will be restored. The
    246   // caller should not attemp to release the pointer/handle in this case.
    247   bool BeginReplaceURLTable(int32 num_entries);
    248 
    249   // unallocates the Fingerprint table
    250   void FreeURLTable();
    251 
    252   // For growing the table. ResizeTableIfNecessary will check to see if the
    253   // table should be resized and calls ResizeTable if needed. Returns true if
    254   // we decided to resize the table.
    255   bool ResizeTableIfNecessary();
    256 
    257   // Resizes the table (growing or shrinking) as necessary to accomodate the
    258   // current count.
    259   void ResizeTable(int32 new_size);
    260 
    261   // Returns the desired table size for |item_count| URLs.
    262   uint32 NewTableSizeForCount(int32 item_count) const;
    263 
    264   // Computes the table load as fraction. For example, if 1/4 of the entries are
    265   // full, this value will be 0.25
    266   float ComputeTableLoad() const {
    267     return static_cast<float>(used_items_) / static_cast<float>(table_length_);
    268   }
    269 
    270   // Initializes a rebuild of the visited link database based on the browser
    271   // history. This will set table_builder_ while working, and there should not
    272   // already be a rebuild in place when called. See the definition for more
    273   // details on how this works.
    274   //
    275   // Returns true on success. Failure means we're not attempting to rebuild
    276   // the database because something failed.
    277   bool RebuildTableFromHistory();
    278 
    279   // Callback that the table rebuilder uses when the rebuild is complete.
    280   // |success| is true if the fingerprint generation succeeded, in which case
    281   // |fingerprints| will contain the computed fingerprints. On failure, there
    282   // will be no fingerprints.
    283   void OnTableRebuildComplete(bool success,
    284                               const std::vector<Fingerprint>& fingerprints);
    285 
    286   // Increases or decreases the given hash value by one, wrapping around as
    287   // necessary. Used for probing.
    288   inline Hash IncrementHash(Hash hash) {
    289     if (hash >= table_length_ - 1)
    290       return 0;  // Wrap around.
    291     return hash + 1;
    292   }
    293   inline Hash DecrementHash(Hash hash) {
    294     if (hash <= 0)
    295       return table_length_ - 1;  // Wrap around.
    296     return hash - 1;
    297   }
    298 
    299   Listener* listener_;
    300 
    301 #ifndef NDEBUG
    302   // Indicates whether any asynchronous operation has ever been completed.
    303   // We do some synchronous reads that require that no asynchronous operations
    304   // are pending, yet we don't track whether they have been completed. This
    305   // flag is a sanity check that these reads only happen before any
    306   // asynchronous writes have been fired.
    307   bool posted_asynchronous_operation_;
    308 #endif
    309 
    310   // Reference to the user profile that this object belongs to
    311   // (it knows the path to where the data is stored)
    312   Profile* profile_;
    313 
    314   // When non-NULL, indicates we are in database rebuild mode and points to
    315   // the class collecting fingerprint information from the history system.
    316   // The pointer is owned by this class, but it must remain valid while the
    317   // history query is running. We must only delete it when the query is done.
    318   scoped_refptr<TableBuilder> table_builder_;
    319 
    320   // Indicates URLs added and deleted since we started rebuilding the table.
    321   std::set<Fingerprint> added_since_rebuild_;
    322   std::set<Fingerprint> deleted_since_rebuild_;
    323 
    324   // TODO(brettw) Support deletion, we need to track whether anything was
    325   // deleted during the rebuild here. Then we should delete any of these
    326   // entries from the complete table later.
    327   // std::vector<Fingerprint> removed_since_rebuild_;
    328 
    329   // The currently open file with the table in it. This may be NULL if we're
    330   // rebuilding and haven't written a new version yet. Writing to the file may
    331   // be safely ignored in this case.
    332   FILE* file_;
    333 
    334   // Shared memory consists of a SharedHeader followed by the table.
    335   base::SharedMemory *shared_memory_;
    336 
    337   // When we generate new tables, we increment the serial number of the
    338   // shared memory object.
    339   int32 shared_memory_serial_;
    340 
    341   // Number of non-empty items in the table, used to compute fullness.
    342   int32 used_items_;
    343 
    344   // Testing values -----------------------------------------------------------
    345   //
    346   // The following fields exist for testing purposes. They are not used in
    347   // release builds. It'd be nice to eliminate them in release builds, but we
    348   // don't want to change the signature of the object between the unit test and
    349   // regular builds. Instead, we just have "default" values that these take
    350   // in release builds that give "regular" behavior.
    351 
    352   // Overridden database file name for testing
    353   FilePath database_name_override_;
    354 
    355   // When nonzero, overrides the table size for new databases for testing
    356   int32 table_size_override_;
    357 
    358   // When non-NULL, overrides the history service to use rather than as the
    359   // BrowserProcess. This is provided for unit tests.
    360   HistoryService* history_service_override_;
    361 
    362   // When non-NULL, indicates the task that should be run after the next
    363   // rebuild from history is complete.
    364   scoped_ptr<Task> rebuild_complete_task_;
    365 
    366   // Set to prevent us from attempting to rebuild the database from global
    367   // history if we have an error opening the file. This is used for testing,
    368   // will be false in production.
    369   bool suppress_rebuild_;
    370 
    371   DISALLOW_COPY_AND_ASSIGN(VisitedLinkMaster);
    372 };
    373 
    374 // NOTE: These methods are defined inline here, so we can share the compilation
    375 //       of visitedlink_master.cc between the browser and the unit/perf tests.
    376 
    377 #if defined(UNIT_TEST) || defined(PERF_TEST) || !defined(NDEBUG)
    378 inline void VisitedLinkMaster::DebugValidate() {
    379   int32 used_count = 0;
    380   for (int32 i = 0; i < table_length_; i++) {
    381     if (hash_table_[i])
    382       used_count++;
    383   }
    384   DCHECK_EQ(used_count, used_items_);
    385 }
    386 #endif
    387 
    388 #endif  // CHROME_BROWSER_VISITEDLINK_VISITEDLINK_MASTER_H_
    389