Home | History | Annotate | Download | only in ADT
      1 //===- StringHash.h -------------------------------------------------------===//
      2 //
      3 //                     The MCLinker Project
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 #ifndef MCLD_ADT_STRINGHASH_H_
     10 #define MCLD_ADT_STRINGHASH_H_
     11 
     12 #include <llvm/ADT/StringRef.h>
     13 #include <llvm/Support/DataTypes.h>
     14 
     15 #include <cassert>
     16 #include <cctype>
     17 #include <functional>
     18 
     19 namespace mcld {
     20 namespace hash {
     21 
     22 enum Type { RS, JS, PJW, ELF, BKDR, SDBM, DJB, DEK, BP, FNV, AP, ES };
     23 
     24 /** \class template<uint32_t TYPE> StringHash
     25  *  \brief the template StringHash class, for specification
     26  */
     27 template <uint32_t TYPE>
     28 struct StringHash
     29     : public std::unary_function<const llvm::StringRef, uint32_t> {
     30   uint32_t operator()(const llvm::StringRef pKey) const {
     31     assert(false && "Undefined StringHash function.\n");
     32     return 0;
     33   }
     34 };
     35 
     36 /** \class StringHash<RSHash>
     37  *  \brief RS StringHash funciton
     38  */
     39 template <>
     40 struct StringHash<RS>
     41     : public std::unary_function<const llvm::StringRef, uint32_t> {
     42   uint32_t operator()(const llvm::StringRef pKey) const {
     43     const unsigned int b = 378551;
     44     uint32_t a = 63689;
     45     uint32_t hash_val = 0;
     46 
     47     for (unsigned int i = 0; i < pKey.size(); ++i) {
     48       hash_val = hash_val * a + pKey[i];
     49       a = a * b;
     50     }
     51     return hash_val;
     52   }
     53 };
     54 
     55 /** \class StringHash<JSHash>
     56  *  \brief JS hash funciton
     57  */
     58 template <>
     59 struct StringHash<JS>
     60     : public std::unary_function<const llvm::StringRef, uint32_t> {
     61   uint32_t operator()(const llvm::StringRef pKey) const {
     62     uint32_t hash_val = 1315423911;
     63 
     64     for (unsigned int i = 0; i < pKey.size(); ++i) {
     65       hash_val ^= ((hash_val << 5) + pKey[i] + (hash_val >> 2));
     66     }
     67     return hash_val;
     68   }
     69 };
     70 
     71 /** \class StringHash<PJW>
     72  *  \brief P.J. Weinberger hash function
     73  */
     74 template <>
     75 struct StringHash<PJW>
     76     : public std::unary_function<const llvm::StringRef, uint32_t> {
     77   uint32_t operator()(const llvm::StringRef pKey) const {
     78     const unsigned int BitsInUnsignedInt =
     79         (unsigned int)(sizeof(unsigned int) * 8);
     80     const unsigned int ThreeQuarters =
     81         (unsigned int)((BitsInUnsignedInt * 3) / 4);
     82     const unsigned int OneEighth = (unsigned int)(BitsInUnsignedInt / 8);
     83     const unsigned int HighBits = (unsigned int)(0xFFFFFFFF)
     84                                   << (BitsInUnsignedInt - OneEighth);
     85     uint32_t hash_val = 0;
     86     uint32_t test = 0;
     87 
     88     for (unsigned int i = 0; i < pKey.size(); ++i) {
     89       hash_val = (hash_val << OneEighth) + pKey[i];
     90 
     91       if ((test = hash_val & HighBits) != 0) {
     92         hash_val = ((hash_val ^ (test >> ThreeQuarters)) & (~HighBits));
     93       }
     94     }
     95     return hash_val;
     96   }
     97 };
     98 
     99 /** \class StringHash<ELF>
    100  *  \brief ELF hash function.
    101  */
    102 template <>
    103 struct StringHash<ELF>
    104     : public std::unary_function<const llvm::StringRef, uint32_t> {
    105   uint32_t operator()(const llvm::StringRef pKey) const {
    106     uint32_t hash_val = 0;
    107     uint32_t x = 0;
    108 
    109     for (unsigned int i = 0; i < pKey.size(); ++i) {
    110       hash_val = (hash_val << 4) + pKey[i];
    111       if ((x = hash_val & 0xF0000000L) != 0)
    112         hash_val ^= (x >> 24);
    113       hash_val &= ~x;
    114     }
    115     return hash_val;
    116   }
    117 };
    118 
    119 /** \class StringHash<BKDR>
    120  *  \brief BKDR hash function
    121  */
    122 template <>
    123 struct StringHash<BKDR>
    124     : public std::unary_function<const llvm::StringRef, uint32_t> {
    125   uint32_t operator()(const llvm::StringRef pKey) const {
    126     const uint32_t seed = 131;
    127     uint32_t hash_val = 0;
    128 
    129     for (uint32_t i = 0; i < pKey.size(); ++i)
    130       hash_val = (hash_val * seed) + pKey[i];
    131     return hash_val;
    132   }
    133 };
    134 
    135 /** \class StringHash<SDBM>
    136  *  \brief SDBM hash function
    137  *  0.049s in 100000 test
    138  */
    139 template <>
    140 struct StringHash<SDBM>
    141     : public std::unary_function<const llvm::StringRef, uint32_t> {
    142   uint32_t operator()(const llvm::StringRef pKey) const {
    143     uint32_t hash_val = 0;
    144 
    145     for (uint32_t i = 0; i < pKey.size(); ++i)
    146       hash_val = pKey[i] + (hash_val << 6) + (hash_val << 16) - hash_val;
    147     return hash_val;
    148   }
    149 };
    150 
    151 /** \class StringHash<DJB>
    152  *  \brief DJB hash function
    153  *  0.057s in 100000 test
    154  */
    155 template <>
    156 struct StringHash<DJB>
    157     : public std::unary_function<const llvm::StringRef, uint32_t> {
    158   uint32_t operator()(const llvm::StringRef pKey) const {
    159     uint32_t hash_val = 5381;
    160 
    161     for (uint32_t i = 0; i < pKey.size(); ++i)
    162       hash_val = ((hash_val << 5) + hash_val) + pKey[i];
    163 
    164     return hash_val;
    165   }
    166 };
    167 
    168 /** \class StringHash<DEK>
    169  *  \brief DEK hash function
    170  *  0.60s
    171  */
    172 template <>
    173 struct StringHash<DEK>
    174     : public std::unary_function<const llvm::StringRef, uint32_t> {
    175   uint32_t operator()(const llvm::StringRef pKey) const {
    176     uint32_t hash_val = pKey.size();
    177 
    178     for (uint32_t i = 0; i < pKey.size(); ++i)
    179       hash_val = ((hash_val << 5) ^ (hash_val >> 27)) ^ pKey[i];
    180 
    181     return hash_val;
    182   }
    183 };
    184 
    185 /** \class StringHash<BP>
    186  *  \brief BP hash function
    187  *  0.057s
    188  */
    189 template <>
    190 struct StringHash<BP>
    191     : public std::unary_function<const llvm::StringRef, uint32_t> {
    192   uint32_t operator()(const llvm::StringRef pKey) const {
    193     uint32_t hash_val = 0;
    194     for (uint32_t i = 0; i < pKey.size(); ++i)
    195       hash_val = hash_val << 7 ^ pKey[i];
    196 
    197     return hash_val;
    198   }
    199 };
    200 
    201 /** \class StringHash<FNV>
    202  *  \brief FNV hash function
    203  *  0.058s
    204  */
    205 template <>
    206 struct StringHash<FNV>
    207     : public std::unary_function<const llvm::StringRef, uint32_t> {
    208   uint32_t operator()(const llvm::StringRef pKey) const {
    209     const uint32_t fnv_prime = 0x811C9DC5;
    210     uint32_t hash_val = 0;
    211     for (uint32_t i = 0; i < pKey.size(); ++i) {
    212       hash_val *= fnv_prime;
    213       hash_val ^= pKey[i];
    214     }
    215 
    216     return hash_val;
    217   }
    218 };
    219 
    220 /** \class StringHash<AP>
    221  *  \brief AP hash function
    222  *  0.060s
    223  */
    224 template <>
    225 struct StringHash<AP>
    226     : public std::unary_function<const llvm::StringRef, uint32_t> {
    227   uint32_t operator()(const llvm::StringRef pKey) const {
    228     unsigned int hash_val = 0xAAAAAAAA;
    229 
    230     for (uint32_t i = 0; i < pKey.size(); ++i) {
    231       hash_val ^= ((i & 1) == 0)
    232                       ? ((hash_val << 7) ^ pKey[i] * (hash_val >> 3))
    233                       : (~((hash_val << 11) + (pKey[i] ^ (hash_val >> 5))));
    234     }
    235 
    236     return hash_val;
    237   }
    238 };
    239 
    240 /** \class StringHash<ES>
    241  *  \brief This is a revision of Edward Sayers' string characteristic function.
    242  *
    243  *  31-28  27  26  25   -   0
    244  *  +----+---+---+------------+
    245  *  | .  | N | - | a/A  ~ z/Z |
    246  *  +----+---+---+------------+
    247  *
    248  *  . (bit 31~28) - The number of '.' characters
    249  *  N (bit 27)    - Are there any numbers in the string
    250  *  - (bit 26)    - Does the string have '-' character
    251  *  bit 25~0      - Bit 25 is set only if the string contains a 'a' or 'A', and
    252  *                  Bit 24 is set only if ...                   'b' or 'B', ...
    253  */
    254 template <>
    255 struct StringHash<ES>
    256     : public std::unary_function<const llvm::StringRef, uint32_t> {
    257   uint32_t operator()(const llvm::StringRef pString) const {
    258     uint32_t result = 0x0;
    259     unsigned int dot = 0;
    260     std::string::size_type idx;
    261     for (idx = 0; idx < pString.size(); ++idx) {
    262       int cur_char = pString[idx];
    263 
    264       if ('.' == cur_char) {
    265         ++dot;
    266         continue;
    267       }
    268 
    269       if (isdigit(cur_char)) {
    270         result |= (1 << 27);
    271         continue;
    272       }
    273 
    274       if ('_' == cur_char) {
    275         result |= (1 << 26);
    276         continue;
    277       }
    278 
    279       if (isupper(cur_char)) {
    280         result |= (1 << (cur_char - 'A'));
    281         continue;
    282       }
    283 
    284       if (islower(cur_char)) {
    285         result |= (1 << (cur_char - 'a'));
    286         continue;
    287       }
    288     }
    289     result |= (dot << 28);
    290     return result;
    291   }
    292 
    293   /** \func may_include
    294    *  \brief is it possible that pRule is a sub-string of pInString
    295    */
    296   static bool may_include(uint32_t pRule, uint32_t pInString) {
    297     uint32_t in_c = pInString << 4;
    298     uint32_t r_c = pRule << 4;
    299 
    300     uint32_t res = (in_c ^ r_c) & r_c;
    301     if (0 != res)
    302       return false;
    303 
    304     uint32_t in_dot = pInString >> 28;
    305     uint32_t r_dot = pRule >> 28;
    306     if (r_dot > in_dot)
    307       return false;
    308 
    309     return true;
    310   }
    311 };
    312 
    313 /** \class template<uint32_t TYPE> StringCompare
    314  *  \brief the template StringCompare class, for specification
    315  */
    316 template <typename STRING_TYPE>
    317 struct StringCompare : public std::binary_function<const STRING_TYPE&,
    318                                                    const STRING_TYPE&,
    319                                                    bool> {
    320   bool operator()(const STRING_TYPE& X, const STRING_TYPE& Y) const {
    321     return X == Y;
    322   }
    323 };
    324 
    325 template <>
    326 struct StringCompare<const char*>
    327     : public std::binary_function<const char*, const char*, bool> {
    328   bool operator()(const char* X, const char* Y) const {
    329     return (std::strcmp(X, Y) == 0);
    330   }
    331 };
    332 
    333 template <>
    334 struct StringCompare<char*>
    335     : public std::binary_function<const char*, const char*, bool> {
    336   bool operator()(const char* X, const char* Y) const {
    337     return (std::strcmp(X, Y) == 0);
    338   }
    339 };
    340 
    341 }  // namespace hash
    342 }  // namespace mcld
    343 
    344 #endif  // MCLD_ADT_STRINGHASH_H_
    345