1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef CHROME_BROWSER_VISITEDLINK_VISITEDLINK_MASTER_H_ 6 #define CHROME_BROWSER_VISITEDLINK_VISITEDLINK_MASTER_H_ 7 #pragma once 8 9 #if defined(OS_WIN) 10 #include <windows.h> 11 #endif 12 #include <set> 13 #include <vector> 14 15 #include "base/file_path.h" 16 #include "base/gtest_prod_util.h" 17 #include "base/memory/ref_counted.h" 18 #include "base/shared_memory.h" 19 #include "chrome/browser/history/history.h" 20 #include "chrome/common/visitedlink_common.h" 21 22 class GURL; 23 class Profile; 24 25 // Controls the link coloring database. The master controls all writing to the 26 // database as well as disk I/O. There should be only one master. 27 // 28 // This class will defer writing operations to the file thread. This means that 29 // class destruction, the file may still be open since operations are pending on 30 // another thread. 31 class VisitedLinkMaster : public VisitedLinkCommon { 32 public: 33 // Listens to the link coloring database events. The master is given this 34 // event as a constructor argument and dispatches events using it. 35 class Listener { 36 public: 37 virtual ~Listener() {} 38 39 // Called when link coloring database has been created or replaced. The 40 // argument is the new table handle. 41 virtual void NewTable(base::SharedMemory*) = 0; 42 43 // Called when new link has been added. The argument is the fingerprint 44 // (hash) of the link. 45 virtual void Add(Fingerprint fingerprint) = 0; 46 47 // Called when link coloring state has been reset. This may occur when 48 // entire or parts of history were deleted. 49 virtual void Reset() = 0; 50 }; 51 52 // The |listener| may not be NULL. 53 VisitedLinkMaster(Listener* listener, Profile* profile); 54 55 // In unit test mode, we allow the caller to optionally specify the database 56 // filename so that it can be run from a unit test. The directory where this 57 // file resides must exist in this mode. You can also specify the default 58 // table size to test table resizing. If this parameter is 0, we will use the 59 // defaults. 60 // 61 // In the unit test mode, we also allow the caller to provide a history 62 // service pointer (the history service can't be fetched from the browser 63 // process when we're in unit test mode). This can be NULL to try to access 64 // the main version, which will probably fail (which can be good for testing 65 // this failure mode). 66 // 67 // When |suppress_rebuild| is set, we'll not attempt to load data from 68 // history if the file can't be loaded. This should generally be set for 69 // testing except when you want to test the rebuild process explicitly. 70 VisitedLinkMaster(Listener* listener, 71 HistoryService* history_service, 72 bool suppress_rebuild, 73 const FilePath& filename, 74 int32 default_table_size); 75 virtual ~VisitedLinkMaster(); 76 77 // Must be called immediately after object creation. Nothing else will work 78 // until this is called. Returns true on success, false means that this 79 // object won't work. 80 bool Init(); 81 82 base::SharedMemory* shared_memory() { return shared_memory_; } 83 84 // Adds a URL to the table. 85 void AddURL(const GURL& url); 86 87 // Adds a set of URLs to the table. 88 void AddURLs(const std::vector<GURL>& url); 89 90 // Deletes the specified URLs from the table. 91 void DeleteURLs(const std::set<GURL>& urls); 92 93 // Clears the visited links table by deleting the file from disk. Used as 94 // part of history clearing. 95 void DeleteAllURLs(); 96 97 #if defined(UNIT_TEST) || !defined(NDEBUG) || defined(PERF_TEST) 98 // This is a debugging function that can be called to double-check internal 99 // data structures. It will assert if the check fails. 100 void DebugValidate(); 101 102 // Sets a task to execute when the next rebuild from history is complete. 103 // This is used by unit tests to wait for the rebuild to complete before 104 // they continue. The pointer will be owned by this object after the call. 105 void set_rebuild_complete_task(Task* task) { 106 DCHECK(!rebuild_complete_task_.get()); 107 rebuild_complete_task_.reset(task); 108 } 109 110 // returns the number of items in the table for testing verification 111 int32 GetUsedCount() const { 112 return used_items_; 113 } 114 115 // Call to cause the entire database file to be re-written from scratch 116 // to disk. Used by the performance tester. 117 bool RewriteFile() { 118 return WriteFullTable(); 119 } 120 #endif 121 122 private: 123 FRIEND_TEST_ALL_PREFIXES(VisitedLinkTest, Delete); 124 FRIEND_TEST_ALL_PREFIXES(VisitedLinkTest, BigDelete); 125 FRIEND_TEST_ALL_PREFIXES(VisitedLinkTest, BigImport); 126 127 // Object to rebuild the table on the history thread (see the .cc file). 128 class TableBuilder; 129 130 // Byte offsets of values in the header. 131 static const int32 kFileHeaderSignatureOffset; 132 static const int32 kFileHeaderVersionOffset; 133 static const int32 kFileHeaderLengthOffset; 134 static const int32 kFileHeaderUsedOffset; 135 static const int32 kFileHeaderSaltOffset; 136 137 // The signature at the beginning of a file. 138 static const int32 kFileSignature; 139 140 // version of the file format this module currently uses 141 static const int32 kFileCurrentVersion; 142 143 // Bytes in the file header, including the salt. 144 static const size_t kFileHeaderSize; 145 146 // When creating a fresh new table, we use this many entries. 147 static const unsigned kDefaultTableSize; 148 149 // When the user is deleting a boatload of URLs, we don't really want to do 150 // individual writes for each of them. When the count exceeds this threshold, 151 // we will write the whole table to disk at once instead of individual items. 152 static const size_t kBigDeleteThreshold; 153 154 // Backend for the constructors initializing the members. 155 void InitMembers(Listener* listener, Profile* profile); 156 157 // If a rebuild is in progress, we save the URL in the temporary list. 158 // Otherwise, we add this to the table. Returns the index of the 159 // inserted fingerprint or null_hash_ on failure. 160 Hash TryToAddURL(const GURL& url); 161 162 // File I/O functions 163 // ------------------ 164 165 // Writes the entire table to disk, returning true on success. It will leave 166 // the table file open and the handle to it in file_ 167 bool WriteFullTable(); 168 169 // Try to load the table from the database file. If the file doesn't exist or 170 // is corrupt, this will return failure. 171 bool InitFromFile(); 172 173 // Reads the header of the link coloring database from disk. Assumes the 174 // file pointer is at the beginning of the file and that there are no pending 175 // asynchronous I/O operations. 176 // 177 // Returns true on success and places the size of the table in num_entries 178 // and the number of nonzero fingerprints in used_count. This will fail if 179 // the version of the file is not the current version of the database. 180 bool ReadFileHeader(FILE* hfile, int32* num_entries, int32* used_count, 181 uint8 salt[LINK_SALT_LENGTH]); 182 183 // Fills *filename with the name of the link database filename 184 bool GetDatabaseFileName(FilePath* filename); 185 186 // Wrapper around Window's WriteFile using asynchronous I/O. This will proxy 187 // the write to a background thread. 188 void WriteToFile(FILE* hfile, off_t offset, void* data, int32 data_size); 189 190 // Helper function to schedule and asynchronous write of the used count to 191 // disk (this is a common operation). 192 void WriteUsedItemCountToFile(); 193 194 // Helper function to schedule an asynchronous write of the given range of 195 // hash functions to disk. The range is inclusive on both ends. The range can 196 // wrap around at 0 and this function will handle it. 197 void WriteHashRangeToFile(Hash first_hash, Hash last_hash); 198 199 // Synchronous read from the file. Assumes there are no pending asynchronous 200 // I/O functions. Returns true if the entire buffer was successfully filled. 201 bool ReadFromFile(FILE* hfile, off_t offset, void* data, size_t data_size); 202 203 // General table handling 204 // ---------------------- 205 206 // Called to add a fingerprint to the table. If |send_notifications| is true 207 // and the item is added successfully, Listener::Add will be invoked. 208 // Returns the index of the inserted fingerprint or null_hash_ if there was a 209 // duplicate and this item was skippped. 210 Hash AddFingerprint(Fingerprint fingerprint, bool send_notifications); 211 212 // Deletes all fingerprints from the given vector from the current hash table 213 // and syncs it to disk if there are changes. This does not update the 214 // deleted_since_rebuild_ list, the caller must update this itself if there 215 // is an update pending. 216 void DeleteFingerprintsFromCurrentTable( 217 const std::set<Fingerprint>& fingerprints); 218 219 // Removes the indicated fingerprint from the table. If the update_file flag 220 // is set, the changes will also be written to disk. Returns true if the 221 // fingerprint was deleted, false if it was not in the table to delete. 222 bool DeleteFingerprint(Fingerprint fingerprint, bool update_file); 223 224 // Creates a new empty table, call if InitFromFile() fails. Normally, when 225 // |suppress_rebuild| is false, the table will be rebuilt from history, 226 // keeping us in sync. When |suppress_rebuild| is true, the new table will be 227 // empty and we will not consult history. This is used when clearing the 228 // database and for unit tests. 229 bool InitFromScratch(bool suppress_rebuild); 230 231 // Allocates the Fingerprint structure and length. When init_to_empty is set, 232 // the table will be filled with 0s and used_items_ will be set to 0 as well. 233 // If the flag is not set, these things are untouched and it is the 234 // responsibility of the caller to fill them (like when we are reading from 235 // a file). 236 bool CreateURLTable(int32 num_entries, bool init_to_empty); 237 238 // A wrapper for CreateURLTable, this will allocate a new table, initialized 239 // to empty. The caller is responsible for saving the shared memory pointer 240 // and handles before this call (they will be replaced with new ones) and 241 // releasing them later. This is designed for callers that make a new table 242 // and then copy values from the old table to the new one, then release the 243 // old table. 244 // 245 // Returns true on success. On failure, the old table will be restored. The 246 // caller should not attemp to release the pointer/handle in this case. 247 bool BeginReplaceURLTable(int32 num_entries); 248 249 // unallocates the Fingerprint table 250 void FreeURLTable(); 251 252 // For growing the table. ResizeTableIfNecessary will check to see if the 253 // table should be resized and calls ResizeTable if needed. Returns true if 254 // we decided to resize the table. 255 bool ResizeTableIfNecessary(); 256 257 // Resizes the table (growing or shrinking) as necessary to accomodate the 258 // current count. 259 void ResizeTable(int32 new_size); 260 261 // Returns the desired table size for |item_count| URLs. 262 uint32 NewTableSizeForCount(int32 item_count) const; 263 264 // Computes the table load as fraction. For example, if 1/4 of the entries are 265 // full, this value will be 0.25 266 float ComputeTableLoad() const { 267 return static_cast<float>(used_items_) / static_cast<float>(table_length_); 268 } 269 270 // Initializes a rebuild of the visited link database based on the browser 271 // history. This will set table_builder_ while working, and there should not 272 // already be a rebuild in place when called. See the definition for more 273 // details on how this works. 274 // 275 // Returns true on success. Failure means we're not attempting to rebuild 276 // the database because something failed. 277 bool RebuildTableFromHistory(); 278 279 // Callback that the table rebuilder uses when the rebuild is complete. 280 // |success| is true if the fingerprint generation succeeded, in which case 281 // |fingerprints| will contain the computed fingerprints. On failure, there 282 // will be no fingerprints. 283 void OnTableRebuildComplete(bool success, 284 const std::vector<Fingerprint>& fingerprints); 285 286 // Increases or decreases the given hash value by one, wrapping around as 287 // necessary. Used for probing. 288 inline Hash IncrementHash(Hash hash) { 289 if (hash >= table_length_ - 1) 290 return 0; // Wrap around. 291 return hash + 1; 292 } 293 inline Hash DecrementHash(Hash hash) { 294 if (hash <= 0) 295 return table_length_ - 1; // Wrap around. 296 return hash - 1; 297 } 298 299 Listener* listener_; 300 301 #ifndef NDEBUG 302 // Indicates whether any asynchronous operation has ever been completed. 303 // We do some synchronous reads that require that no asynchronous operations 304 // are pending, yet we don't track whether they have been completed. This 305 // flag is a sanity check that these reads only happen before any 306 // asynchronous writes have been fired. 307 bool posted_asynchronous_operation_; 308 #endif 309 310 // Reference to the user profile that this object belongs to 311 // (it knows the path to where the data is stored) 312 Profile* profile_; 313 314 // When non-NULL, indicates we are in database rebuild mode and points to 315 // the class collecting fingerprint information from the history system. 316 // The pointer is owned by this class, but it must remain valid while the 317 // history query is running. We must only delete it when the query is done. 318 scoped_refptr<TableBuilder> table_builder_; 319 320 // Indicates URLs added and deleted since we started rebuilding the table. 321 std::set<Fingerprint> added_since_rebuild_; 322 std::set<Fingerprint> deleted_since_rebuild_; 323 324 // TODO(brettw) Support deletion, we need to track whether anything was 325 // deleted during the rebuild here. Then we should delete any of these 326 // entries from the complete table later. 327 // std::vector<Fingerprint> removed_since_rebuild_; 328 329 // The currently open file with the table in it. This may be NULL if we're 330 // rebuilding and haven't written a new version yet. Writing to the file may 331 // be safely ignored in this case. 332 FILE* file_; 333 334 // Shared memory consists of a SharedHeader followed by the table. 335 base::SharedMemory *shared_memory_; 336 337 // When we generate new tables, we increment the serial number of the 338 // shared memory object. 339 int32 shared_memory_serial_; 340 341 // Number of non-empty items in the table, used to compute fullness. 342 int32 used_items_; 343 344 // Testing values ----------------------------------------------------------- 345 // 346 // The following fields exist for testing purposes. They are not used in 347 // release builds. It'd be nice to eliminate them in release builds, but we 348 // don't want to change the signature of the object between the unit test and 349 // regular builds. Instead, we just have "default" values that these take 350 // in release builds that give "regular" behavior. 351 352 // Overridden database file name for testing 353 FilePath database_name_override_; 354 355 // When nonzero, overrides the table size for new databases for testing 356 int32 table_size_override_; 357 358 // When non-NULL, overrides the history service to use rather than as the 359 // BrowserProcess. This is provided for unit tests. 360 HistoryService* history_service_override_; 361 362 // When non-NULL, indicates the task that should be run after the next 363 // rebuild from history is complete. 364 scoped_ptr<Task> rebuild_complete_task_; 365 366 // Set to prevent us from attempting to rebuild the database from global 367 // history if we have an error opening the file. This is used for testing, 368 // will be false in production. 369 bool suppress_rebuild_; 370 371 DISALLOW_COPY_AND_ASSIGN(VisitedLinkMaster); 372 }; 373 374 // NOTE: These methods are defined inline here, so we can share the compilation 375 // of visitedlink_master.cc between the browser and the unit/perf tests. 376 377 #if defined(UNIT_TEST) || defined(PERF_TEST) || !defined(NDEBUG) 378 inline void VisitedLinkMaster::DebugValidate() { 379 int32 used_count = 0; 380 for (int32 i = 0; i < table_length_; i++) { 381 if (hash_table_[i]) 382 used_count++; 383 } 384 DCHECK_EQ(used_count, used_items_); 385 } 386 #endif 387 388 #endif // CHROME_BROWSER_VISITEDLINK_VISITEDLINK_MASTER_H_ 389