1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef COMPONENTS_VISITEDLINK_BROWSER_VISITEDLINK_MASTER_H_ 6 #define COMPONENTS_VISITEDLINK_BROWSER_VISITEDLINK_MASTER_H_ 7 8 #if defined(OS_WIN) 9 #include <windows.h> 10 #endif 11 #include <set> 12 #include <vector> 13 14 #include "base/callback.h" 15 #include "base/callback_forward.h" 16 #include "base/files/file_path.h" 17 #include "base/gtest_prod_util.h" 18 #include "base/memory/shared_memory.h" 19 #include "base/threading/sequenced_worker_pool.h" 20 #include "components/visitedlink/common/visitedlink_common.h" 21 22 #if defined(UNIT_TEST) || defined(PERF_TEST) || !defined(NDEBUG) 23 #include "base/logging.h" 24 #endif 25 26 class GURL; 27 28 namespace content { 29 class BrowserContext; 30 } 31 32 namespace visitedlink { 33 34 class VisitedLinkDelegate; 35 36 // Controls the link coloring database. The master controls all writing to the 37 // database as well as disk I/O. There should be only one master. 38 // 39 // This class will defer writing operations to the file thread. This means that 40 // class destruction, the file may still be open since operations are pending on 41 // another thread. 42 class VisitedLinkMaster : public VisitedLinkCommon { 43 public: 44 // Listens to the link coloring database events. The master is given this 45 // event as a constructor argument and dispatches events using it. 46 class Listener { 47 public: 48 virtual ~Listener() {} 49 50 // Called when link coloring database has been created or replaced. The 51 // argument is the new table handle. 52 virtual void NewTable(base::SharedMemory*) = 0; 53 54 // Called when new link has been added. The argument is the fingerprint 55 // (hash) of the link. 56 virtual void Add(Fingerprint fingerprint) = 0; 57 58 // Called when link coloring state has been reset. This may occur when 59 // entire or parts of history were deleted. 60 virtual void Reset() = 0; 61 }; 62 63 VisitedLinkMaster(content::BrowserContext* browser_context, 64 VisitedLinkDelegate* delegate, 65 bool persist_to_disk); 66 67 // In unit test mode, we allow the caller to optionally specify the database 68 // filename so that it can be run from a unit test. The directory where this 69 // file resides must exist in this mode. You can also specify the default 70 // table size to test table resizing. If this parameter is 0, we will use the 71 // defaults. 72 // 73 // In the unit test mode, we also allow the caller to provide a history 74 // service pointer (the history service can't be fetched from the browser 75 // process when we're in unit test mode). This can be NULL to try to access 76 // the main version, which will probably fail (which can be good for testing 77 // this failure mode). 78 // 79 // When |suppress_rebuild| is set, we'll not attempt to load data from 80 // history if the file can't be loaded. This should generally be set for 81 // testing except when you want to test the rebuild process explicitly. 82 VisitedLinkMaster(Listener* listener, 83 VisitedLinkDelegate* delegate, 84 bool persist_to_disk, 85 bool suppress_rebuild, 86 const base::FilePath& filename, 87 int32 default_table_size); 88 virtual ~VisitedLinkMaster(); 89 90 // Must be called immediately after object creation. Nothing else will work 91 // until this is called. Returns true on success, false means that this 92 // object won't work. 93 bool Init(); 94 95 base::SharedMemory* shared_memory() { return shared_memory_; } 96 97 // Adds a URL to the table. 98 void AddURL(const GURL& url); 99 100 // Adds a set of URLs to the table. 101 void AddURLs(const std::vector<GURL>& url); 102 103 // See DeleteURLs. 104 class URLIterator { 105 public: 106 // HasNextURL must return true when this is called. Returns the next URL 107 // then advances the iterator. Note that the returned reference is only 108 // valid until the next call of NextURL. 109 virtual const GURL& NextURL() = 0; 110 111 // Returns true if still has URLs to be iterated. 112 virtual bool HasNextURL() const = 0; 113 114 protected: 115 virtual ~URLIterator() {} 116 }; 117 118 // Deletes the specified URLs from |rows| from the table. 119 void DeleteURLs(URLIterator* iterator); 120 121 // Clears the visited links table by deleting the file from disk. Used as 122 // part of history clearing. 123 void DeleteAllURLs(); 124 125 // Returns the Delegate of this Master. 126 VisitedLinkDelegate* GetDelegate(); 127 128 #if defined(UNIT_TEST) || !defined(NDEBUG) || defined(PERF_TEST) 129 // This is a debugging function that can be called to double-check internal 130 // data structures. It will assert if the check fails. 131 void DebugValidate(); 132 133 // Sets a task to execute when the next rebuild from history is complete. 134 // This is used by unit tests to wait for the rebuild to complete before 135 // they continue. The pointer will be owned by this object after the call. 136 void set_rebuild_complete_task(const base::Closure& task) { 137 DCHECK(rebuild_complete_task_.is_null()); 138 rebuild_complete_task_ = task; 139 } 140 141 // returns the number of items in the table for testing verification 142 int32 GetUsedCount() const { 143 return used_items_; 144 } 145 146 // Returns the listener. 147 VisitedLinkMaster::Listener* GetListener() const { 148 return listener_.get(); 149 } 150 151 // Call to cause the entire database file to be re-written from scratch 152 // to disk. Used by the performance tester. 153 void RewriteFile() { 154 WriteFullTable(); 155 } 156 #endif 157 158 private: 159 FRIEND_TEST_ALL_PREFIXES(VisitedLinkTest, Delete); 160 FRIEND_TEST_ALL_PREFIXES(VisitedLinkTest, BigDelete); 161 FRIEND_TEST_ALL_PREFIXES(VisitedLinkTest, BigImport); 162 163 // Object to rebuild the table on the history thread (see the .cc file). 164 class TableBuilder; 165 166 // Byte offsets of values in the header. 167 static const int32 kFileHeaderSignatureOffset; 168 static const int32 kFileHeaderVersionOffset; 169 static const int32 kFileHeaderLengthOffset; 170 static const int32 kFileHeaderUsedOffset; 171 static const int32 kFileHeaderSaltOffset; 172 173 // The signature at the beginning of a file. 174 static const int32 kFileSignature; 175 176 // version of the file format this module currently uses 177 static const int32 kFileCurrentVersion; 178 179 // Bytes in the file header, including the salt. 180 static const size_t kFileHeaderSize; 181 182 // When creating a fresh new table, we use this many entries. 183 static const unsigned kDefaultTableSize; 184 185 // When the user is deleting a boatload of URLs, we don't really want to do 186 // individual writes for each of them. When the count exceeds this threshold, 187 // we will write the whole table to disk at once instead of individual items. 188 static const size_t kBigDeleteThreshold; 189 190 // Backend for the constructors initializing the members. 191 void InitMembers(); 192 193 // If a rebuild is in progress, we save the URL in the temporary list. 194 // Otherwise, we add this to the table. Returns the index of the 195 // inserted fingerprint or null_hash_ on failure. 196 Hash TryToAddURL(const GURL& url); 197 198 // File I/O functions 199 // ------------------ 200 // These functions are only called if |persist_to_disk_| is true. 201 202 // Posts the given task to the blocking worker pool with our options. 203 void PostIOTask(const tracked_objects::Location& from_here, 204 const base::Closure& task); 205 206 // Writes the entire table to disk. It will leave the table file open and 207 // the handle to it will be stored in file_. 208 void WriteFullTable(); 209 210 // Try to load the table from the database file. If the file doesn't exist or 211 // is corrupt, this will return failure. 212 bool InitFromFile(); 213 214 // Reads the header of the link coloring database from disk. Assumes the 215 // file pointer is at the beginning of the file and that there are no pending 216 // asynchronous I/O operations. 217 // 218 // Returns true on success and places the size of the table in num_entries 219 // and the number of nonzero fingerprints in used_count. This will fail if 220 // the version of the file is not the current version of the database. 221 bool ReadFileHeader(FILE* hfile, int32* num_entries, int32* used_count, 222 uint8 salt[LINK_SALT_LENGTH]); 223 224 // Fills *filename with the name of the link database filename 225 bool GetDatabaseFileName(base::FilePath* filename); 226 227 // Wrapper around Window's WriteFile using asynchronous I/O. This will proxy 228 // the write to a background thread. 229 void WriteToFile(FILE** hfile, off_t offset, void* data, int32 data_size); 230 231 // Helper function to schedule and asynchronous write of the used count to 232 // disk (this is a common operation). 233 void WriteUsedItemCountToFile(); 234 235 // Helper function to schedule an asynchronous write of the given range of 236 // hash functions to disk. The range is inclusive on both ends. The range can 237 // wrap around at 0 and this function will handle it. 238 void WriteHashRangeToFile(Hash first_hash, Hash last_hash); 239 240 // Synchronous read from the file. Assumes there are no pending asynchronous 241 // I/O functions. Returns true if the entire buffer was successfully filled. 242 bool ReadFromFile(FILE* hfile, off_t offset, void* data, size_t data_size); 243 244 // General table handling 245 // ---------------------- 246 247 // Called to add a fingerprint to the table. If |send_notifications| is true 248 // and the item is added successfully, Listener::Add will be invoked. 249 // Returns the index of the inserted fingerprint or null_hash_ if there was a 250 // duplicate and this item was skippped. 251 Hash AddFingerprint(Fingerprint fingerprint, bool send_notifications); 252 253 // Deletes all fingerprints from the given vector from the current hash table 254 // and syncs it to disk if there are changes. This does not update the 255 // deleted_since_rebuild_ list, the caller must update this itself if there 256 // is an update pending. 257 void DeleteFingerprintsFromCurrentTable( 258 const std::set<Fingerprint>& fingerprints); 259 260 // Removes the indicated fingerprint from the table. If the update_file flag 261 // is set, the changes will also be written to disk. Returns true if the 262 // fingerprint was deleted, false if it was not in the table to delete. 263 bool DeleteFingerprint(Fingerprint fingerprint, bool update_file); 264 265 // Creates a new empty table, call if InitFromFile() fails. Normally, when 266 // |suppress_rebuild| is false, the table will be rebuilt from history, 267 // keeping us in sync. When |suppress_rebuild| is true, the new table will be 268 // empty and we will not consult history. This is used when clearing the 269 // database and for unit tests. 270 bool InitFromScratch(bool suppress_rebuild); 271 272 // Allocates the Fingerprint structure and length. When init_to_empty is set, 273 // the table will be filled with 0s and used_items_ will be set to 0 as well. 274 // If the flag is not set, these things are untouched and it is the 275 // responsibility of the caller to fill them (like when we are reading from 276 // a file). 277 bool CreateURLTable(int32 num_entries, bool init_to_empty); 278 279 // A wrapper for CreateURLTable, this will allocate a new table, initialized 280 // to empty. The caller is responsible for saving the shared memory pointer 281 // and handles before this call (they will be replaced with new ones) and 282 // releasing them later. This is designed for callers that make a new table 283 // and then copy values from the old table to the new one, then release the 284 // old table. 285 // 286 // Returns true on success. On failure, the old table will be restored. The 287 // caller should not attemp to release the pointer/handle in this case. 288 bool BeginReplaceURLTable(int32 num_entries); 289 290 // unallocates the Fingerprint table 291 void FreeURLTable(); 292 293 // For growing the table. ResizeTableIfNecessary will check to see if the 294 // table should be resized and calls ResizeTable if needed. Returns true if 295 // we decided to resize the table. 296 bool ResizeTableIfNecessary(); 297 298 // Resizes the table (growing or shrinking) as necessary to accomodate the 299 // current count. 300 void ResizeTable(int32 new_size); 301 302 // Returns the desired table size for |item_count| URLs. 303 uint32 NewTableSizeForCount(int32 item_count) const; 304 305 // Computes the table load as fraction. For example, if 1/4 of the entries are 306 // full, this value will be 0.25 307 float ComputeTableLoad() const { 308 return static_cast<float>(used_items_) / static_cast<float>(table_length_); 309 } 310 311 // Initializes a rebuild of the visited link database based on the browser 312 // history. This will set table_builder_ while working, and there should not 313 // already be a rebuild in place when called. See the definition for more 314 // details on how this works. 315 // 316 // Returns true on success. Failure means we're not attempting to rebuild 317 // the database because something failed. 318 bool RebuildTableFromDelegate(); 319 320 // Callback that the table rebuilder uses when the rebuild is complete. 321 // |success| is true if the fingerprint generation succeeded, in which case 322 // |fingerprints| will contain the computed fingerprints. On failure, there 323 // will be no fingerprints. 324 void OnTableRebuildComplete(bool success, 325 const std::vector<Fingerprint>& fingerprints); 326 327 // Increases or decreases the given hash value by one, wrapping around as 328 // necessary. Used for probing. 329 inline Hash IncrementHash(Hash hash) { 330 if (hash >= table_length_ - 1) 331 return 0; // Wrap around. 332 return hash + 1; 333 } 334 inline Hash DecrementHash(Hash hash) { 335 if (hash <= 0) 336 return table_length_ - 1; // Wrap around. 337 return hash - 1; 338 } 339 340 #ifndef NDEBUG 341 // Indicates whether any asynchronous operation has ever been completed. 342 // We do some synchronous reads that require that no asynchronous operations 343 // are pending, yet we don't track whether they have been completed. This 344 // flag is a sanity check that these reads only happen before any 345 // asynchronous writes have been fired. 346 bool posted_asynchronous_operation_; 347 #endif 348 349 // Reference to the browser context that this object belongs to 350 // (it knows the path to where the data is stored) 351 content::BrowserContext* browser_context_; 352 353 // Client owns the delegate and is responsible for it being valid through 354 // the life time this VisitedLinkMaster. 355 VisitedLinkDelegate* delegate_; 356 357 // VisitedLinkEventListener to handle incoming events. 358 scoped_ptr<Listener> listener_; 359 360 // Lazily initialized sequence token for posting file tasks. 361 base::SequencedWorkerPool::SequenceToken sequence_token_; 362 363 // When non-NULL, indicates we are in database rebuild mode and points to 364 // the class collecting fingerprint information from the history system. 365 // The pointer is owned by this class, but it must remain valid while the 366 // history query is running. We must only delete it when the query is done. 367 scoped_refptr<TableBuilder> table_builder_; 368 369 // Indicates URLs added and deleted since we started rebuilding the table. 370 std::set<Fingerprint> added_since_rebuild_; 371 std::set<Fingerprint> deleted_since_rebuild_; 372 373 // TODO(brettw) Support deletion, we need to track whether anything was 374 // deleted during the rebuild here. Then we should delete any of these 375 // entries from the complete table later. 376 // std::vector<Fingerprint> removed_since_rebuild_; 377 378 // The currently open file with the table in it. This may be NULL if we're 379 // rebuilding and haven't written a new version yet or if |persist_to_disk_| 380 // is false. Writing to the file may be safely ignored in this case. Also 381 // |file_| may be non-NULL but point to a NULL pointer. That would mean that 382 // opening of the file is already scheduled in a background thread and any 383 // writing to the file can also be scheduled to the background thread as it's 384 // guaranteed to be executed after the opening. 385 // The class owns both the |file_| pointer and the pointer pointed 386 // by |*file_|. 387 FILE** file_; 388 389 // If true, will try to persist the hash table to disk. Will rebuild from 390 // VisitedLinkDelegate::RebuildTable if there are disk corruptions. 391 bool persist_to_disk_; 392 393 // Shared memory consists of a SharedHeader followed by the table. 394 base::SharedMemory *shared_memory_; 395 396 // When we generate new tables, we increment the serial number of the 397 // shared memory object. 398 int32 shared_memory_serial_; 399 400 // Number of non-empty items in the table, used to compute fullness. 401 int32 used_items_; 402 403 // Testing values ----------------------------------------------------------- 404 // 405 // The following fields exist for testing purposes. They are not used in 406 // release builds. It'd be nice to eliminate them in release builds, but we 407 // don't want to change the signature of the object between the unit test and 408 // regular builds. Instead, we just have "default" values that these take 409 // in release builds that give "regular" behavior. 410 411 // Overridden database file name for testing 412 base::FilePath database_name_override_; 413 414 // When nonzero, overrides the table size for new databases for testing 415 int32 table_size_override_; 416 417 // When set, indicates the task that should be run after the next rebuild from 418 // history is complete. 419 base::Closure rebuild_complete_task_; 420 421 // Set to prevent us from attempting to rebuild the database from global 422 // history if we have an error opening the file. This is used for testing, 423 // will be false in production. 424 bool suppress_rebuild_; 425 426 DISALLOW_COPY_AND_ASSIGN(VisitedLinkMaster); 427 }; 428 429 // NOTE: These methods are defined inline here, so we can share the compilation 430 // of visitedlink_master.cc between the browser and the unit/perf tests. 431 432 #if defined(UNIT_TEST) || defined(PERF_TEST) || !defined(NDEBUG) 433 inline void VisitedLinkMaster::DebugValidate() { 434 int32 used_count = 0; 435 for (int32 i = 0; i < table_length_; i++) { 436 if (hash_table_[i]) 437 used_count++; 438 } 439 DCHECK_EQ(used_count, used_items_); 440 } 441 #endif 442 443 } // namespace visitedlink 444 445 #endif // COMPONENTS_VISITEDLINK_BROWSER_VISITEDLINK_MASTER_H_ 446