1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef COMPONENTS_VISITEDLINK_COMMON_VISITEDLINK_COMMON_H_ 6 #define COMPONENTS_VISITEDLINK_COMMON_VISITEDLINK_COMMON_H_ 7 8 #include <vector> 9 10 #include "base/basictypes.h" 11 12 class GURL; 13 14 namespace visitedlink { 15 16 // number of bytes in the salt 17 #define LINK_SALT_LENGTH 8 18 19 // A multiprocess-safe database of the visited links for the browser. There 20 // should be exactly one process that has write access (implemented by 21 // VisitedLinkMaster), while all other processes should be read-only 22 // (implemented by VisitedLinkSlave). These other processes add links by calling 23 // the writer process to add them for it. The writer may also notify the readers 24 // to replace their table when the table is resized. 25 // 26 // IPC is not implemented in these classes. This is done through callback 27 // functions supplied by the creator of these objects to allow more flexibility, 28 // especially for testing. 29 // 30 // This class defines the common base for these others. We implement accessors 31 // for looking things up in the hash table, and for computing hash values and 32 // fingerprints. Both the master and the slave inherit from this, and add their 33 // own code to set up and change these values as their design requires. The 34 // slave pretty much just sets up the shared memory and saves the pointer. The 35 // master does a lot of work to manage the table, reading and writing it to and 36 // from disk, and resizing it when it gets too full. 37 // 38 // To ask whether a page is in history, we compute a 64-bit fingerprint of the 39 // URL. This URL is hashed and we see if it is in the URL hashtable. If it is, 40 // we consider it visited. Otherwise, it is unvisited. Note that it is possible 41 // to get collisions, which is the penalty for not storing all URL strings in 42 // memory (which could get to be more than we want to have in memory). We use 43 // a salt value for the links on one computer so that an attacker can not 44 // manually create a link that causes a collision. 45 class VisitedLinkCommon { 46 public: 47 // A number that identifies the URL. 48 typedef uint64 Fingerprint; 49 typedef std::vector<Fingerprint> Fingerprints; 50 51 // A hash value of a fingerprint 52 typedef int32 Hash; 53 54 // A fingerprint or hash value that does not exist 55 static const Fingerprint null_fingerprint_; 56 static const Hash null_hash_; 57 58 VisitedLinkCommon(); 59 virtual ~VisitedLinkCommon(); 60 61 // Returns the fingerprint for the given URL. 62 Fingerprint ComputeURLFingerprint(const char* canonical_url, 63 size_t url_len) const { 64 return ComputeURLFingerprint(canonical_url, url_len, salt_); 65 } 66 67 // Looks up the given key in the table. The fingerprint for the URL is 68 // computed if you call one with the string argument. Returns true if found. 69 // Does not modify the hastable. 70 bool IsVisited(const char* canonical_url, size_t url_len) const; 71 bool IsVisited(const GURL& url) const; 72 bool IsVisited(Fingerprint fingerprint) const; 73 74 #ifdef UNIT_TEST 75 // Returns statistics about DB usage 76 void GetUsageStatistics(int32* table_size, 77 VisitedLinkCommon::Fingerprint** fingerprints) { 78 *table_size = table_length_; 79 *fingerprints = hash_table_; 80 } 81 #endif 82 83 protected: 84 // This structure is at the beginning of the shared memory so that the slaves 85 // can get stats on the table 86 struct SharedHeader { 87 // see goes into table_length_ 88 uint32 length; 89 90 // goes into salt_ 91 uint8 salt[LINK_SALT_LENGTH]; 92 }; 93 94 // Returns the fingerprint at the given index into the URL table. This 95 // function should be called instead of accessing the table directly to 96 // contain endian issues. 97 Fingerprint FingerprintAt(int32 table_offset) const { 98 if (!hash_table_) 99 return null_fingerprint_; 100 return hash_table_[table_offset]; 101 } 102 103 // Computes the fingerprint of the given canonical URL. It is static so the 104 // same algorithm can be re-used by the table rebuilder, so you will have to 105 // pass the salt as a parameter. See the non-static version above if you 106 // want to use the current class' salt. 107 static Fingerprint ComputeURLFingerprint(const char* canonical_url, 108 size_t url_len, 109 const uint8 salt[LINK_SALT_LENGTH]); 110 111 // Computes the hash value of the given fingerprint, this is used as a lookup 112 // into the hashtable. 113 static Hash HashFingerprint(Fingerprint fingerprint, int32 table_length) { 114 if (table_length == 0) 115 return null_hash_; 116 return static_cast<Hash>(fingerprint % table_length); 117 } 118 // Uses the current hashtable. 119 Hash HashFingerprint(Fingerprint fingerprint) const { 120 return HashFingerprint(fingerprint, table_length_); 121 } 122 123 // pointer to the first item 124 VisitedLinkCommon::Fingerprint* hash_table_; 125 126 // the number of items in the hash table 127 int32 table_length_; 128 129 // salt used for each URL when computing the fingerprint 130 uint8 salt_[LINK_SALT_LENGTH]; 131 132 private: 133 DISALLOW_COPY_AND_ASSIGN(VisitedLinkCommon); 134 }; 135 136 } // namespace visitedlink 137 138 #endif // COMPONENTS_VISITEDLINK_COMMON_VISITEDLINK_COMMON_H_ 139