Home | History | Annotate | Download | only in common
      1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #ifndef COMPONENTS_VISITEDLINK_COMMON_VISITEDLINK_COMMON_H_
      6 #define COMPONENTS_VISITEDLINK_COMMON_VISITEDLINK_COMMON_H_
      7 
      8 #include <vector>
      9 
     10 #include "base/basictypes.h"
     11 
     12 class GURL;
     13 
     14 namespace visitedlink {
     15 
     16 // number of bytes in the salt
     17 #define LINK_SALT_LENGTH 8
     18 
     19 // A multiprocess-safe database of the visited links for the browser. There
     20 // should be exactly one process that has write access (implemented by
     21 // VisitedLinkMaster), while all other processes should be read-only
     22 // (implemented by VisitedLinkSlave). These other processes add links by calling
     23 // the writer process to add them for it. The writer may also notify the readers
     24 // to replace their table when the table is resized.
     25 //
     26 // IPC is not implemented in these classes. This is done through callback
     27 // functions supplied by the creator of these objects to allow more flexibility,
     28 // especially for testing.
     29 //
     30 // This class defines the common base for these others. We implement accessors
     31 // for looking things up in the hash table, and for computing hash values and
     32 // fingerprints. Both the master and the slave inherit from this, and add their
     33 // own code to set up and change these values as their design requires. The
     34 // slave pretty much just sets up the shared memory and saves the pointer. The
     35 // master does a lot of work to manage the table, reading and writing it to and
     36 // from disk, and resizing it when it gets too full.
     37 //
     38 // To ask whether a page is in history, we compute a 64-bit fingerprint of the
     39 // URL. This URL is hashed and we see if it is in the URL hashtable. If it is,
     40 // we consider it visited. Otherwise, it is unvisited. Note that it is possible
     41 // to get collisions, which is the penalty for not storing all URL strings in
     42 // memory (which could get to be more than we want to have in memory). We use
     43 // a salt value for the links on one computer so that an attacker can not
     44 // manually create a link that causes a collision.
     45 class VisitedLinkCommon {
     46  public:
     47   // A number that identifies the URL.
     48   typedef uint64 Fingerprint;
     49   typedef std::vector<Fingerprint> Fingerprints;
     50 
     51   // A hash value of a fingerprint
     52   typedef int32 Hash;
     53 
     54   // A fingerprint or hash value that does not exist
     55   static const Fingerprint null_fingerprint_;
     56   static const Hash null_hash_;
     57 
     58   VisitedLinkCommon();
     59   virtual ~VisitedLinkCommon();
     60 
     61   // Returns the fingerprint for the given URL.
     62   Fingerprint ComputeURLFingerprint(const char* canonical_url,
     63                                     size_t url_len) const {
     64     return ComputeURLFingerprint(canonical_url, url_len, salt_);
     65   }
     66 
     67   // Looks up the given key in the table. The fingerprint for the URL is
     68   // computed if you call one with the string argument. Returns true if found.
     69   // Does not modify the hastable.
     70   bool IsVisited(const char* canonical_url, size_t url_len) const;
     71   bool IsVisited(const GURL& url) const;
     72   bool IsVisited(Fingerprint fingerprint) const;
     73 
     74 #ifdef UNIT_TEST
     75   // Returns statistics about DB usage
     76   void GetUsageStatistics(int32* table_size,
     77                           VisitedLinkCommon::Fingerprint** fingerprints) {
     78     *table_size = table_length_;
     79     *fingerprints = hash_table_;
     80   }
     81 #endif
     82 
     83  protected:
     84   // This structure is at the beginning of the shared memory so that the slaves
     85   // can get stats on the table
     86   struct SharedHeader {
     87     // see goes into table_length_
     88     uint32 length;
     89 
     90     // goes into salt_
     91     uint8 salt[LINK_SALT_LENGTH];
     92   };
     93 
     94   // Returns the fingerprint at the given index into the URL table. This
     95   // function should be called instead of accessing the table directly to
     96   // contain endian issues.
     97   Fingerprint FingerprintAt(int32 table_offset) const {
     98     if (!hash_table_)
     99       return null_fingerprint_;
    100     return hash_table_[table_offset];
    101   }
    102 
    103   // Computes the fingerprint of the given canonical URL. It is static so the
    104   // same algorithm can be re-used by the table rebuilder, so you will have to
    105   // pass the salt as a parameter. See the non-static version above if you
    106   // want to use the current class' salt.
    107   static Fingerprint ComputeURLFingerprint(const char* canonical_url,
    108                                            size_t url_len,
    109                                            const uint8 salt[LINK_SALT_LENGTH]);
    110 
    111   // Computes the hash value of the given fingerprint, this is used as a lookup
    112   // into the hashtable.
    113   static Hash HashFingerprint(Fingerprint fingerprint, int32 table_length) {
    114     if (table_length == 0)
    115       return null_hash_;
    116     return static_cast<Hash>(fingerprint % table_length);
    117   }
    118   // Uses the current hashtable.
    119   Hash HashFingerprint(Fingerprint fingerprint) const {
    120     return HashFingerprint(fingerprint, table_length_);
    121   }
    122 
    123   // pointer to the first item
    124   VisitedLinkCommon::Fingerprint* hash_table_;
    125 
    126   // the number of items in the hash table
    127   int32 table_length_;
    128 
    129   // salt used for each URL when computing the fingerprint
    130   uint8 salt_[LINK_SALT_LENGTH];
    131 
    132  private:
    133   DISALLOW_COPY_AND_ASSIGN(VisitedLinkCommon);
    134 };
    135 
    136 }  // namespace visitedlink
    137 
    138 #endif  // COMPONENTS_VISITEDLINK_COMMON_VISITEDLINK_COMMON_H_
    139