Home | History | Annotate | Download | only in common
      1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #ifndef CHROME_COMMON_VISITEDLINK_COMMON_H__
      6 #define CHROME_COMMON_VISITEDLINK_COMMON_H__
      7 #pragma once
      8 
      9 #include <vector>
     10 
     11 #include "base/basictypes.h"
     12 
     13 class GURL;
     14 
     15 // number of bytes in the salt
     16 #define LINK_SALT_LENGTH 8
     17 
     18 // A multiprocess-safe database of the visited links for the browser. There
     19 // should be exactly one process that has write access (implemented by
     20 // VisitedLinkMaster), while all other processes should be read-only
     21 // (implemented by VisitedLinkSlave). These other processes add links by calling
     22 // the writer process to add them for it. The writer may also notify the readers
     23 // to replace their table when the table is resized.
     24 //
     25 // IPC is not implemented in these classes. This is done through callback
     26 // functions supplied by the creator of these objects to allow more flexibility,
     27 // especially for testing.
     28 //
     29 // This class defines the common base for these others. We implement accessors
     30 // for looking things up in the hash table, and for computing hash values and
     31 // fingerprints. Both the master and the slave inherit from this, and add their
     32 // own code to set up and change these values as their design requires. The
     33 // slave pretty much just sets up the shared memory and saves the pointer. The
     34 // master does a lot of work to manage the table, reading and writing it to and
     35 // from disk, and resizing it when it gets too full.
     36 //
     37 // To ask whether a page is in history, we compute a 64-bit fingerprint of the
     38 // URL. This URL is hashed and we see if it is in the URL hashtable. If it is,
     39 // we consider it visited. Otherwise, it is unvisited. Note that it is possible
     40 // to get collisions, which is the penalty for not storing all URL strings in
     41 // memory (which could get to be more than we want to have in memory). We use
     42 // a salt value for the links on one computer so that an attacker can not
     43 // manually create a link that causes a collision.
     44 class VisitedLinkCommon {
     45  public:
     46   // A number that identifies the URL.
     47   typedef uint64 Fingerprint;
     48   typedef std::vector<Fingerprint> Fingerprints;
     49 
     50   // A hash value of a fingerprint
     51   typedef int32 Hash;
     52 
     53   // A fingerprint or hash value that does not exist
     54   static const Fingerprint null_fingerprint_;
     55   static const Hash null_hash_;
     56 
     57   VisitedLinkCommon();
     58   virtual ~VisitedLinkCommon();
     59 
     60   // Returns the fingerprint for the given URL.
     61   Fingerprint ComputeURLFingerprint(const char* canonical_url,
     62                                     size_t url_len) const {
     63     return ComputeURLFingerprint(canonical_url, url_len, salt_);
     64   }
     65 
     66   // Looks up the given key in the table. The fingerprint for the URL is
     67   // computed if you call one with the string argument. Returns true if found.
     68   // Does not modify the hastable.
     69   bool IsVisited(const char* canonical_url, size_t url_len) const;
     70   bool IsVisited(const GURL& url) const;
     71   bool IsVisited(Fingerprint fingerprint) const;
     72 
     73 #ifdef UNIT_TEST
     74   // Returns statistics about DB usage
     75   void GetUsageStatistics(int32* table_size,
     76                           VisitedLinkCommon::Fingerprint** fingerprints) {
     77     *table_size = table_length_;
     78     *fingerprints = hash_table_;
     79   }
     80 #endif
     81 
     82  protected:
     83   // This structure is at the beginning of the shared memory so that the slaves
     84   // can get stats on the table
     85   struct SharedHeader {
     86     // see goes into table_length_
     87     uint32 length;
     88 
     89     // goes into salt_
     90     uint8 salt[LINK_SALT_LENGTH];
     91   };
     92 
     93   // Returns the fingerprint at the given index into the URL table. This
     94   // function should be called instead of accessing the table directly to
     95   // contain endian issues.
     96   Fingerprint FingerprintAt(int32 table_offset) const {
     97     if (!hash_table_)
     98       return null_fingerprint_;
     99     return hash_table_[table_offset];
    100   }
    101 
    102   // Computes the fingerprint of the given canonical URL. It is static so the
    103   // same algorithm can be re-used by the table rebuilder, so you will have to
    104   // pass the salt as a parameter. See the non-static version above if you
    105   // want to use the current class' salt.
    106   static Fingerprint ComputeURLFingerprint(const char* canonical_url,
    107                                            size_t url_len,
    108                                            const uint8 salt[LINK_SALT_LENGTH]);
    109 
    110   // Computes the hash value of the given fingerprint, this is used as a lookup
    111   // into the hashtable.
    112   static Hash HashFingerprint(Fingerprint fingerprint, int32 table_length) {
    113     if (table_length == 0)
    114       return null_hash_;
    115     return static_cast<Hash>(fingerprint % table_length);
    116   }
    117   // Uses the current hashtable.
    118   Hash HashFingerprint(Fingerprint fingerprint) const {
    119     return HashFingerprint(fingerprint, table_length_);
    120   }
    121 
    122   // pointer to the first item
    123   VisitedLinkCommon::Fingerprint* hash_table_;
    124 
    125   // the number of items in the hash table
    126   int32 table_length_;
    127 
    128   // salt used for each URL when computing the fingerprint
    129   uint8 salt_[LINK_SALT_LENGTH];
    130 
    131  private:
    132   DISALLOW_COPY_AND_ASSIGN(VisitedLinkCommon);
    133 };
    134 
    135 #endif  // CHROME_COMMON_VISITEDLINK_COMMON_H_
    136