Home | History | Annotate | Download | only in ADT
      1 //===- llvm/ADT/CachedHashString.h - Prehashed string/StringRef -*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file defines CachedHashString and CachedHashStringRef.  These are owning
     11 // and not-owning string types that store their hash in addition to their string
     12 // data.
     13 //
     14 // Unlike std::string, CachedHashString can be used in DenseSet/DenseMap
     15 // (because, unlike std::string, CachedHashString lets us have empty and
     16 // tombstone values).
     17 //
     18 //===----------------------------------------------------------------------===//
     19 
     20 #ifndef LLVM_ADT_CACHED_HASH_STRING_H
     21 #define LLVM_ADT_CACHED_HASH_STRING_H
     22 
     23 #include "llvm/ADT/DenseMap.h"
     24 #include "llvm/ADT/StringRef.h"
     25 #include "llvm/Support/raw_ostream.h"
     26 
     27 namespace llvm {
     28 
     29 /// A container which contains a StringRef plus a precomputed hash.
     30 class CachedHashStringRef {
     31   const char *P;
     32   uint32_t Size;
     33   uint32_t Hash;
     34 
     35 public:
     36   // Explicit because hashing a string isn't free.
     37   explicit CachedHashStringRef(StringRef S)
     38       : CachedHashStringRef(S, DenseMapInfo<StringRef>::getHashValue(S)) {}
     39 
     40   CachedHashStringRef(StringRef S, uint32_t Hash)
     41       : P(S.data()), Size(S.size()), Hash(Hash) {
     42     assert(S.size() <= std::numeric_limits<uint32_t>::max());
     43   }
     44 
     45   StringRef val() const { return StringRef(P, Size); }
     46   uint32_t size() const { return Size; }
     47   uint32_t hash() const { return Hash; }
     48 };
     49 
     50 template <> struct DenseMapInfo<CachedHashStringRef> {
     51   static CachedHashStringRef getEmptyKey() {
     52     return CachedHashStringRef(DenseMapInfo<StringRef>::getEmptyKey(), 0);
     53   }
     54   static CachedHashStringRef getTombstoneKey() {
     55     return CachedHashStringRef(DenseMapInfo<StringRef>::getTombstoneKey(), 1);
     56   }
     57   static unsigned getHashValue(const CachedHashStringRef &S) {
     58     assert(!isEqual(S, getEmptyKey()) && "Cannot hash the empty key!");
     59     assert(!isEqual(S, getTombstoneKey()) && "Cannot hash the tombstone key!");
     60     return S.hash();
     61   }
     62   static bool isEqual(const CachedHashStringRef &LHS,
     63                       const CachedHashStringRef &RHS) {
     64     return LHS.hash() == RHS.hash() &&
     65            DenseMapInfo<StringRef>::isEqual(LHS.val(), RHS.val());
     66   }
     67 };
     68 
     69 /// A container which contains a string, which it owns, plus a precomputed hash.
     70 ///
     71 /// We do not null-terminate the string.
     72 class CachedHashString {
     73   friend struct DenseMapInfo<CachedHashString>;
     74 
     75   char *P;
     76   uint32_t Size;
     77   uint32_t Hash;
     78 
     79   static char *getEmptyKeyPtr() { return DenseMapInfo<char *>::getEmptyKey(); }
     80   static char *getTombstoneKeyPtr() {
     81     return DenseMapInfo<char *>::getTombstoneKey();
     82   }
     83 
     84   bool isEmptyOrTombstone() const {
     85     return P == getEmptyKeyPtr() || P == getTombstoneKeyPtr();
     86   }
     87 
     88   struct ConstructEmptyOrTombstoneTy {};
     89 
     90   CachedHashString(ConstructEmptyOrTombstoneTy, char *EmptyOrTombstonePtr)
     91       : P(EmptyOrTombstonePtr), Size(0), Hash(0) {
     92     assert(isEmptyOrTombstone());
     93   }
     94 
     95   // TODO: Use small-string optimization to avoid allocating.
     96 
     97 public:
     98   explicit CachedHashString(const char *S) : CachedHashString(StringRef(S)) {}
     99 
    100   // Explicit because copying and hashing a string isn't free.
    101   explicit CachedHashString(StringRef S)
    102       : CachedHashString(S, DenseMapInfo<StringRef>::getHashValue(S)) {}
    103 
    104   CachedHashString(StringRef S, uint32_t Hash)
    105       : P(new char[S.size()]), Size(S.size()), Hash(Hash) {
    106     memcpy(P, S.data(), S.size());
    107   }
    108 
    109   // Ideally this class would not be copyable.  But SetVector requires copyable
    110   // keys, and we want this to be usable there.
    111   CachedHashString(const CachedHashString &Other)
    112       : Size(Other.Size), Hash(Other.Hash) {
    113     if (Other.isEmptyOrTombstone()) {
    114       P = Other.P;
    115     } else {
    116       P = new char[Size];
    117       memcpy(P, Other.P, Size);
    118     }
    119   }
    120 
    121   CachedHashString &operator=(CachedHashString Other) {
    122     swap(*this, Other);
    123     return *this;
    124   }
    125 
    126   CachedHashString(CachedHashString &&Other) noexcept
    127       : P(Other.P), Size(Other.Size), Hash(Other.Hash) {
    128     Other.P = getEmptyKeyPtr();
    129   }
    130 
    131   ~CachedHashString() {
    132     if (!isEmptyOrTombstone())
    133       delete[] P;
    134   }
    135 
    136   StringRef val() const { return StringRef(P, Size); }
    137   uint32_t size() const { return Size; }
    138   uint32_t hash() const { return Hash; }
    139 
    140   operator StringRef() const { return val(); }
    141   operator CachedHashStringRef() const {
    142     return CachedHashStringRef(val(), Hash);
    143   }
    144 
    145   friend void swap(CachedHashString &LHS, CachedHashString &RHS) {
    146     using std::swap;
    147     swap(LHS.P, RHS.P);
    148     swap(LHS.Size, RHS.Size);
    149     swap(LHS.Hash, RHS.Hash);
    150   }
    151 };
    152 
    153 template <> struct DenseMapInfo<CachedHashString> {
    154   static CachedHashString getEmptyKey() {
    155     return CachedHashString(CachedHashString::ConstructEmptyOrTombstoneTy(),
    156                             CachedHashString::getEmptyKeyPtr());
    157   }
    158   static CachedHashString getTombstoneKey() {
    159     return CachedHashString(CachedHashString::ConstructEmptyOrTombstoneTy(),
    160                             CachedHashString::getTombstoneKeyPtr());
    161   }
    162   static unsigned getHashValue(const CachedHashString &S) {
    163     assert(!isEqual(S, getEmptyKey()) && "Cannot hash the empty key!");
    164     assert(!isEqual(S, getTombstoneKey()) && "Cannot hash the tombstone key!");
    165     return S.hash();
    166   }
    167   static bool isEqual(const CachedHashString &LHS,
    168                       const CachedHashString &RHS) {
    169     if (LHS.hash() != RHS.hash())
    170       return false;
    171     if (LHS.P == CachedHashString::getEmptyKeyPtr())
    172       return RHS.P == CachedHashString::getEmptyKeyPtr();
    173     if (LHS.P == CachedHashString::getTombstoneKeyPtr())
    174       return RHS.P == CachedHashString::getTombstoneKeyPtr();
    175 
    176     // This is safe because if RHS.P is the empty or tombstone key, it will have
    177     // length 0, so we'll never dereference its pointer.
    178     return LHS.val() == RHS.val();
    179   }
    180 };
    181 
    182 } // namespace llvm
    183 
    184 #endif
    185