1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "components/visitedlink/common/visitedlink_common.h" 6 7 #include <string.h> // for memset() 8 9 #include "base/logging.h" 10 #include "base/md5.h" 11 #include "url/gurl.h" 12 13 namespace visitedlink { 14 15 const VisitedLinkCommon::Fingerprint VisitedLinkCommon::null_fingerprint_ = 0; 16 const VisitedLinkCommon::Hash VisitedLinkCommon::null_hash_ = -1; 17 18 VisitedLinkCommon::VisitedLinkCommon() 19 : hash_table_(NULL), 20 table_length_(0) { 21 memset(salt_, 0, sizeof(salt_)); 22 } 23 24 VisitedLinkCommon::~VisitedLinkCommon() { 25 } 26 27 // FIXME: this uses linear probing, it should be replaced with quadratic 28 // probing or something better. See VisitedLinkMaster::AddFingerprint 29 bool VisitedLinkCommon::IsVisited(const char* canonical_url, 30 size_t url_len) const { 31 if (url_len == 0) 32 return false; 33 if (!hash_table_ || table_length_ == 0) 34 return false; 35 return IsVisited(ComputeURLFingerprint(canonical_url, url_len)); 36 } 37 38 bool VisitedLinkCommon::IsVisited(const GURL& url) const { 39 return IsVisited(url.spec().data(), url.spec().size()); 40 } 41 42 bool VisitedLinkCommon::IsVisited(Fingerprint fingerprint) const { 43 // Go through the table until we find the item or an empty spot (meaning it 44 // wasn't found). This loop will terminate as long as the table isn't full, 45 // which should be enforced by AddFingerprint. 46 Hash first_hash = HashFingerprint(fingerprint); 47 Hash cur_hash = first_hash; 48 while (true) { 49 Fingerprint cur_fingerprint = FingerprintAt(cur_hash); 50 if (cur_fingerprint == null_fingerprint_) 51 return false; // End of probe sequence found. 52 if (cur_fingerprint == fingerprint) 53 return true; // Found a match. 54 55 // This spot was taken, but not by the item we're looking for, search in 56 // the next position. 57 cur_hash++; 58 if (cur_hash == table_length_) 59 cur_hash = 0; 60 if (cur_hash == first_hash) { 61 // Wrapped around and didn't find an empty space, this means we're in an 62 // infinite loop because AddFingerprint didn't do its job resizing. 63 NOTREACHED(); 64 return false; 65 } 66 } 67 } 68 69 // Uses the top 64 bits of the MD5 sum of the canonical URL as the fingerprint, 70 // this is as random as any other subset of the MD5SUM. 71 // 72 // FIXME: this uses the MD5SUM of the 16-bit character version. For systems 73 // where wchar_t is not 16 bits (Linux uses 32 bits, I think), this will not be 74 // compatable. We should define explicitly what should happen here across 75 // platforms, and convert if necessary (probably to UTF-16). 76 77 // static 78 VisitedLinkCommon::Fingerprint VisitedLinkCommon::ComputeURLFingerprint( 79 const char* canonical_url, 80 size_t url_len, 81 const uint8 salt[LINK_SALT_LENGTH]) { 82 DCHECK(url_len > 0) << "Canonical URLs should not be empty"; 83 84 base::MD5Context ctx; 85 base::MD5Init(&ctx); 86 base::MD5Update(&ctx, base::StringPiece(reinterpret_cast<const char*>(salt), 87 LINK_SALT_LENGTH)); 88 base::MD5Update(&ctx, base::StringPiece(canonical_url, url_len)); 89 90 base::MD5Digest digest; 91 base::MD5Final(&digest, &ctx); 92 93 // This is the same as "return *(Fingerprint*)&digest.a;" but if we do that 94 // direct cast the alignment could be wrong, and we can't access a 64-bit int 95 // on arbitrary alignment on some processors. This reinterpret_casts it 96 // down to a char array of the same size as fingerprint, and then does the 97 // bit cast, which amounts to a memcpy. This does not handle endian issues. 98 return bit_cast<Fingerprint, uint8[8]>( 99 *reinterpret_cast<uint8(*)[8]>(&digest.a)); 100 } 101 102 } // namespace visitedlink 103