Home | History | Annotate | Download | only in ADT
      1 //===- StringHash.h -------------------------------------------------------===//
      2 //
      3 //                     The MCLinker Project
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 #ifndef MCLD_STRING_HASH_FUNCTION_H
     10 #define MCLD_STRING_HASH_FUNCTION_H
     11 #ifdef ENABLE_UNITTEST
     12 #include <gtest.h>
     13 #endif
     14 #include <llvm/ADT/StringRef.h>
     15 #include <llvm/Support/DataTypes.h>
     16 #include <llvm/Support/ErrorHandling.h>
     17 #include <cctype>
     18 #include <functional>
     19 
     20 namespace mcld {
     21 namespace hash {
     22 
     23 enum Type {
     24   RS,
     25   JS,
     26   PJW,
     27   ELF,
     28   BKDR,
     29   SDBM,
     30   DJB,
     31   DEK,
     32   BP,
     33   FNV,
     34   AP,
     35   ES
     36 };
     37 
     38 /** \class template<uint32_t TYPE> StringHash
     39  *  \brief the template StringHash class, for specification
     40  */
     41 template<uint32_t TYPE>
     42 struct StringHash : public std::unary_function<const llvm::StringRef&, uint32_t>
     43 {
     44   uint32_t operator()(const llvm::StringRef& pKey) const
     45   {
     46     llvm::report_fatal_error("Undefined StringHash function.\n");
     47   }
     48 };
     49 
     50 /** \class StringHash<RSHash>
     51  *  \brief RS StringHash funciton
     52  */
     53 template<>
     54 struct StringHash<RS> : public std::unary_function<const llvm::StringRef&, uint32_t>
     55 {
     56   uint32_t operator()(const llvm::StringRef& pKey) const
     57   {
     58     const unsigned int b = 378551;
     59     uint32_t a = 63689;
     60     uint32_t hash_val = 0;
     61 
     62     for(unsigned int i = 0; i < pKey.size(); ++i) {
     63       hash_val = hash_val * a + pKey[i];
     64       a = a * b;
     65     }
     66     return hash_val;
     67   }
     68 };
     69 
     70 /** \class StringHash<JSHash>
     71  *  \brief JS hash funciton
     72  */
     73 template<>
     74 struct StringHash<JS> : public std::unary_function<const llvm::StringRef&, uint32_t>
     75 {
     76   uint32_t operator()(const llvm::StringRef& pKey) const
     77   {
     78     uint32_t hash_val = 1315423911;
     79 
     80     for(unsigned int i = 0; i < pKey.size(); ++i) {
     81        hash_val ^= ((hash_val << 5) + pKey[i] + (hash_val >> 2));
     82     }
     83     return hash_val;
     84   }
     85 };
     86 
     87 /** \class StringHash<PJW>
     88  *  \brief P.J. Weinberger hash function
     89  */
     90 template<>
     91 struct StringHash<PJW> : public std::unary_function<const llvm::StringRef&, uint32_t>
     92 {
     93   uint32_t operator()(const llvm::StringRef& pKey) const
     94   {
     95     const unsigned int BitsInUnsignedInt = (unsigned int)(sizeof(unsigned int) * 8);
     96     const unsigned int ThreeQuarters     = (unsigned int)((BitsInUnsignedInt  * 3) / 4);
     97     const unsigned int OneEighth         = (unsigned int)(BitsInUnsignedInt / 8);
     98     const unsigned int HighBits          = (unsigned int)(0xFFFFFFFF) << (BitsInUnsignedInt - OneEighth);
     99     uint32_t hash_val = 0;
    100     uint32_t test = 0;
    101 
    102     for(unsigned int i = 0; i < pKey.size(); ++i) {
    103       hash_val = (hash_val << OneEighth) + pKey[i];
    104 
    105       if((test = hash_val & HighBits) != 0) {
    106         hash_val = (( hash_val ^ (test >> ThreeQuarters)) & (~HighBits));
    107       }
    108     }
    109     return hash_val;
    110   }
    111 };
    112 
    113 /** \class StringHash<ELF>
    114  *  \brief ELF hash function.
    115  */
    116 template<>
    117 struct StringHash<ELF> : public std::unary_function<const llvm::StringRef&, uint32_t>
    118 {
    119   uint32_t operator()(const llvm::StringRef& pKey) const
    120   {
    121     uint32_t hash_val = 0;
    122     uint32_t x = 0;
    123 
    124     for (unsigned int i = 0; i < pKey.size(); ++i) {
    125       hash_val = (hash_val << 4) + pKey[i];
    126       if((x = hash_val & 0xF0000000L) != 0)
    127         hash_val ^= (x >> 24);
    128       hash_val &= ~x;
    129     }
    130     return hash_val;
    131   }
    132 };
    133 
    134 /** \class StringHash<BKDR>
    135  *  \brief BKDR hash function
    136  */
    137 template<>
    138 struct StringHash<BKDR> : public std::unary_function<const llvm::StringRef&, uint32_t>
    139 {
    140   uint32_t operator()(const llvm::StringRef& pKey) const
    141   {
    142     const uint32_t seed = 131;
    143     uint32_t hash_val = 0;
    144 
    145     for(uint32_t i = 0; i < pKey.size(); ++i)
    146       hash_val = (hash_val * seed) + pKey[i];
    147     return hash_val;
    148   }
    149 };
    150 
    151 
    152 /** \class StringHash<SDBM>
    153  *  \brief SDBM hash function
    154  *  0.049s in 100000 test
    155  */
    156 template<>
    157 struct StringHash<SDBM> : public std::unary_function<const llvm::StringRef&, uint32_t>
    158 {
    159   uint32_t operator()(const llvm::StringRef& pKey) const
    160   {
    161     uint32_t hash_val = 0;
    162 
    163     for(uint32_t i = 0; i < pKey.size(); ++i)
    164       hash_val = pKey[i] + (hash_val << 6) + (hash_val << 16) - hash_val;
    165     return hash_val;
    166   }
    167 };
    168 
    169 /** \class StringHash<DJB>
    170  *  \brief DJB hash function
    171  *  0.057s in 100000 test
    172  */
    173 template<>
    174 struct StringHash<DJB> : public std::unary_function<const llvm::StringRef&, uint32_t>
    175 {
    176   uint32_t operator()(const llvm::StringRef& pKey) const
    177   {
    178     uint32_t hash_val = 5381;
    179 
    180     for(uint32_t i = 0; i < pKey.size(); ++i)
    181       hash_val = ((hash_val << 5) + hash_val) + pKey[i];
    182 
    183     return hash_val;
    184   }
    185 };
    186 
    187 /** \class StringHash<DEK>
    188  *  \brief DEK hash function
    189  *  0.60s
    190  */
    191 template<>
    192 struct StringHash<DEK> : public std::unary_function<const llvm::StringRef&, uint32_t>
    193 {
    194   uint32_t operator()(const llvm::StringRef& pKey) const
    195   {
    196     uint32_t hash_val = pKey.size();
    197 
    198     for(uint32_t i = 0; i < pKey.size(); ++i)
    199       hash_val = ((hash_val << 5) ^ (hash_val >> 27)) ^ pKey[i];
    200 
    201     return hash_val;
    202   }
    203 };
    204 
    205 /** \class StringHash<BP>
    206  *  \brief BP hash function
    207  *  0.057s
    208  */
    209 template<>
    210 struct StringHash<BP> : public std::unary_function<const llvm::StringRef&, uint32_t>
    211 {
    212   uint32_t operator()(const llvm::StringRef& pKey) const
    213   {
    214     uint32_t hash_val = 0;
    215     for(uint32_t i = 0; i < pKey.size(); ++i)
    216       hash_val = hash_val << 7 ^ pKey[i];
    217 
    218     return hash_val;
    219   }
    220 };
    221 
    222 /** \class StringHash<FNV>
    223  *  \brief FNV hash function
    224  *  0.058s
    225  */
    226 template<>
    227 struct StringHash<FNV> : public std::unary_function<const llvm::StringRef&, uint32_t>
    228 {
    229   uint32_t operator()(const llvm::StringRef& pKey) const
    230   {
    231     const uint32_t fnv_prime = 0x811C9DC5;
    232     uint32_t hash_val = 0;
    233     for(uint32_t i = 0; i < pKey.size(); ++i) {
    234       hash_val *= fnv_prime;
    235       hash_val ^= pKey[i];
    236     }
    237 
    238     return hash_val;
    239   }
    240 };
    241 
    242 /** \class StringHash<AP>
    243  *  \brief AP hash function
    244  *  0.060s
    245  */
    246 template<>
    247 struct StringHash<AP> : public std::unary_function<const llvm::StringRef&, uint32_t>
    248 {
    249   uint32_t operator()(const llvm::StringRef& pKey) const
    250   {
    251     unsigned int hash_val = 0xAAAAAAAA;
    252 
    253     for(uint32_t i = 0; i < pKey.size(); ++i) {
    254       hash_val ^= ((i & 1) == 0)?
    255                           ((hash_val <<  7) ^ pKey[i] * (hash_val >> 3)):
    256                           (~((hash_val << 11) + (pKey[i] ^ (hash_val >> 5))));
    257     }
    258 
    259     return hash_val;
    260   }
    261 };
    262 
    263 /** \class StringHash<ES>
    264  *  \brief This is a revision of Edward Sayers' string characteristic function.
    265  *
    266  *  31-28  27  26  25   -   0
    267  *  +----+---+---+------------+
    268  *  | .  | N | - | a/A  ~ z/Z |
    269  *  +----+---+---+------------+
    270  *
    271  *  . (bit 31~28) - The number of '.' characters
    272  *  N (bit 27)    - Are there any numbers in the string
    273  *  - (bit 26)    - Does the string have '-' character
    274  *  bit 25~0      - Bit 25 is set only if the string contains a 'a' or 'A', and
    275  *                  Bit 24 is set only if ...                   'b' or 'B', ...
    276  */
    277 template<>
    278 struct StringHash<ES> : public std::unary_function<const llvm::StringRef&, uint32_t>
    279 {
    280   uint32_t operator()(const llvm::StringRef& pString) const
    281   {
    282     uint32_t result = 0x0;
    283     unsigned int dot = 0;
    284     std::string::size_type idx;
    285     for (idx = 0; idx < pString.size(); ++idx) {
    286       int cur_char = pString[idx];
    287 
    288       if ('.' == cur_char) {
    289         ++dot;
    290         continue;
    291       }
    292 
    293       if (isdigit(cur_char)) {
    294         result |= (1 << 27);
    295         continue;
    296       }
    297 
    298       if ('_' == cur_char) {
    299         result |= (1 << 26);
    300         continue;
    301       }
    302 
    303       if (isupper(cur_char)) {
    304         result |= (1 << (cur_char - 'A'));
    305         continue;
    306       }
    307 
    308       if (islower(cur_char)) {
    309         result |= (1 << (cur_char - 'a'));
    310         continue;
    311       }
    312     }
    313     result |= (dot << 28);
    314     return result;
    315   }
    316 
    317 
    318   /** \func may_include
    319    *  \brief is it possible that pRule is a sub-string of pInString
    320    */
    321   static bool may_include(uint32_t pRule, uint32_t pInString)
    322   {
    323     uint32_t in_c = pInString << 4;
    324     uint32_t r_c  = pRule << 4;
    325 
    326     uint32_t res = (in_c ^ r_c) & r_c;
    327     if (0 != res)
    328       return false;
    329 
    330     uint32_t in_dot = pInString >> 28;
    331     uint32_t r_dot  = pRule >> 28;
    332     if (r_dot > in_dot)
    333       return false;
    334 
    335     return true;
    336   }
    337 };
    338 
    339 /** \class template<uint32_t TYPE> StringCompare
    340  *  \brief the template StringCompare class, for specification
    341  */
    342 template<typename STRING_TYPE>
    343 struct StringCompare : public std::binary_function<const STRING_TYPE&, const STRING_TYPE&, bool>
    344 {
    345   bool operator()(const STRING_TYPE& X, const STRING_TYPE& Y) const
    346   { return X == Y; }
    347 };
    348 
    349 template<>
    350 struct StringCompare<const char*> : public std::binary_function<const char*, const char*, bool>
    351 {
    352   bool operator()(const char* X, const char* Y) const
    353   { return (0 == std::strcmp(X, Y)); }
    354 };
    355 
    356 template<>
    357 struct StringCompare<char*> : public std::binary_function<const char*, const char*, bool>
    358 {
    359   bool operator()(const char* X, const char* Y) const
    360   { return (0 == std::strcmp(X, Y)); }
    361 };
    362 
    363 } // namespace of hash
    364 } // namespace of mcld
    365 
    366 #endif
    367 
    368