Home | History | Annotate | Download | only in common
      1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "components/visitedlink/common/visitedlink_common.h"
      6 
      7 #include <string.h>  // for memset()
      8 
      9 #include "base/logging.h"
     10 #include "base/md5.h"
     11 #include "url/gurl.h"
     12 
     13 namespace visitedlink {
     14 
     15 const VisitedLinkCommon::Fingerprint VisitedLinkCommon::null_fingerprint_ = 0;
     16 const VisitedLinkCommon::Hash VisitedLinkCommon::null_hash_ = -1;
     17 
     18 VisitedLinkCommon::VisitedLinkCommon()
     19     : hash_table_(NULL),
     20       table_length_(0) {
     21   memset(salt_, 0, sizeof(salt_));
     22 }
     23 
     24 VisitedLinkCommon::~VisitedLinkCommon() {
     25 }
     26 
     27 // FIXME: this uses linear probing, it should be replaced with quadratic
     28 // probing or something better. See VisitedLinkMaster::AddFingerprint
     29 bool VisitedLinkCommon::IsVisited(const char* canonical_url,
     30                                   size_t url_len) const {
     31   if (url_len == 0)
     32     return false;
     33   if (!hash_table_ || table_length_ == 0)
     34     return false;
     35   return IsVisited(ComputeURLFingerprint(canonical_url, url_len));
     36 }
     37 
     38 bool VisitedLinkCommon::IsVisited(const GURL& url) const {
     39   return IsVisited(url.spec().data(), url.spec().size());
     40 }
     41 
     42 bool VisitedLinkCommon::IsVisited(Fingerprint fingerprint) const {
     43   // Go through the table until we find the item or an empty spot (meaning it
     44   // wasn't found). This loop will terminate as long as the table isn't full,
     45   // which should be enforced by AddFingerprint.
     46   Hash first_hash = HashFingerprint(fingerprint);
     47   Hash cur_hash = first_hash;
     48   while (true) {
     49     Fingerprint cur_fingerprint = FingerprintAt(cur_hash);
     50     if (cur_fingerprint == null_fingerprint_)
     51       return false;  // End of probe sequence found.
     52     if (cur_fingerprint == fingerprint)
     53       return true;  // Found a match.
     54 
     55     // This spot was taken, but not by the item we're looking for, search in
     56     // the next position.
     57     cur_hash++;
     58     if (cur_hash == table_length_)
     59       cur_hash = 0;
     60     if (cur_hash == first_hash) {
     61       // Wrapped around and didn't find an empty space, this means we're in an
     62       // infinite loop because AddFingerprint didn't do its job resizing.
     63       NOTREACHED();
     64       return false;
     65     }
     66   }
     67 }
     68 
     69 // Uses the top 64 bits of the MD5 sum of the canonical URL as the fingerprint,
     70 // this is as random as any other subset of the MD5SUM.
     71 //
     72 // FIXME: this uses the MD5SUM of the 16-bit character version. For systems
     73 // where wchar_t is not 16 bits (Linux uses 32 bits, I think), this will not be
     74 // compatable. We should define explicitly what should happen here across
     75 // platforms, and convert if necessary (probably to UTF-16).
     76 
     77 // static
     78 VisitedLinkCommon::Fingerprint VisitedLinkCommon::ComputeURLFingerprint(
     79     const char* canonical_url,
     80     size_t url_len,
     81     const uint8 salt[LINK_SALT_LENGTH]) {
     82   DCHECK(url_len > 0) << "Canonical URLs should not be empty";
     83 
     84   base::MD5Context ctx;
     85   base::MD5Init(&ctx);
     86   base::MD5Update(&ctx, base::StringPiece(reinterpret_cast<const char*>(salt),
     87                                           LINK_SALT_LENGTH));
     88   base::MD5Update(&ctx, base::StringPiece(canonical_url, url_len));
     89 
     90   base::MD5Digest digest;
     91   base::MD5Final(&digest, &ctx);
     92 
     93   // This is the same as "return *(Fingerprint*)&digest.a;" but if we do that
     94   // direct cast the alignment could be wrong, and we can't access a 64-bit int
     95   // on arbitrary alignment on some processors. This reinterpret_casts it
     96   // down to a char array of the same size as fingerprint, and then does the
     97   // bit cast, which amounts to a memcpy. This does not handle endian issues.
     98   return bit_cast<Fingerprint, uint8[8]>(
     99       *reinterpret_cast<uint8(*)[8]>(&digest.a));
    100 }
    101 
    102 }  // namespace visitedlink
    103