Home | History | Annotate | Download | only in ADT
      1 //===- StringHash.h -------------------------------------------------------===//
      2 //
      3 //                     The MCLinker Project
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 #ifndef MCLD_ADT_STRINGHASH_H
     10 #define MCLD_ADT_STRINGHASH_H
     11 #include <llvm/ADT/StringRef.h>
     12 #include <llvm/Support/DataTypes.h>
     13 #include <cctype>
     14 #include <cassert>
     15 #include <functional>
     16 
     17 namespace mcld {
     18 namespace hash {
     19 
     20 enum Type {
     21   RS,
     22   JS,
     23   PJW,
     24   ELF,
     25   BKDR,
     26   SDBM,
     27   DJB,
     28   DEK,
     29   BP,
     30   FNV,
     31   AP,
     32   ES
     33 };
     34 
     35 /** \class template<uint32_t TYPE> StringHash
     36  *  \brief the template StringHash class, for specification
     37  */
     38 template<uint32_t TYPE>
     39 struct StringHash : public std::unary_function<const llvm::StringRef&, uint32_t>
     40 {
     41   uint32_t operator()(const llvm::StringRef& pKey) const
     42   {
     43     assert(false && "Undefined StringHash function.\n");
     44     return 0;
     45   }
     46 };
     47 
     48 /** \class StringHash<RSHash>
     49  *  \brief RS StringHash funciton
     50  */
     51 template<>
     52 struct StringHash<RS> : public std::unary_function<const llvm::StringRef&, uint32_t>
     53 {
     54   uint32_t operator()(const llvm::StringRef& pKey) const
     55   {
     56     const unsigned int b = 378551;
     57     uint32_t a = 63689;
     58     uint32_t hash_val = 0;
     59 
     60     for(unsigned int i = 0; i < pKey.size(); ++i) {
     61       hash_val = hash_val * a + pKey[i];
     62       a = a * b;
     63     }
     64     return hash_val;
     65   }
     66 };
     67 
     68 /** \class StringHash<JSHash>
     69  *  \brief JS hash funciton
     70  */
     71 template<>
     72 struct StringHash<JS> : public std::unary_function<const llvm::StringRef&, uint32_t>
     73 {
     74   uint32_t operator()(const llvm::StringRef& pKey) const
     75   {
     76     uint32_t hash_val = 1315423911;
     77 
     78     for(unsigned int i = 0; i < pKey.size(); ++i) {
     79        hash_val ^= ((hash_val << 5) + pKey[i] + (hash_val >> 2));
     80     }
     81     return hash_val;
     82   }
     83 };
     84 
     85 /** \class StringHash<PJW>
     86  *  \brief P.J. Weinberger hash function
     87  */
     88 template<>
     89 struct StringHash<PJW> : public std::unary_function<const llvm::StringRef&, uint32_t>
     90 {
     91   uint32_t operator()(const llvm::StringRef& pKey) const
     92   {
     93     const unsigned int BitsInUnsignedInt = (unsigned int)(sizeof(unsigned int) * 8);
     94     const unsigned int ThreeQuarters     = (unsigned int)((BitsInUnsignedInt  * 3) / 4);
     95     const unsigned int OneEighth         = (unsigned int)(BitsInUnsignedInt / 8);
     96     const unsigned int HighBits          = (unsigned int)(0xFFFFFFFF) << (BitsInUnsignedInt - OneEighth);
     97     uint32_t hash_val = 0;
     98     uint32_t test = 0;
     99 
    100     for(unsigned int i = 0; i < pKey.size(); ++i) {
    101       hash_val = (hash_val << OneEighth) + pKey[i];
    102 
    103       if((test = hash_val & HighBits) != 0) {
    104         hash_val = (( hash_val ^ (test >> ThreeQuarters)) & (~HighBits));
    105       }
    106     }
    107     return hash_val;
    108   }
    109 };
    110 
    111 /** \class StringHash<ELF>
    112  *  \brief ELF hash function.
    113  */
    114 template<>
    115 struct StringHash<ELF> : public std::unary_function<const llvm::StringRef&, uint32_t>
    116 {
    117   uint32_t operator()(const llvm::StringRef& pKey) const
    118   {
    119     uint32_t hash_val = 0;
    120     uint32_t x = 0;
    121 
    122     for (unsigned int i = 0; i < pKey.size(); ++i) {
    123       hash_val = (hash_val << 4) + pKey[i];
    124       if((x = hash_val & 0xF0000000L) != 0)
    125         hash_val ^= (x >> 24);
    126       hash_val &= ~x;
    127     }
    128     return hash_val;
    129   }
    130 };
    131 
    132 /** \class StringHash<BKDR>
    133  *  \brief BKDR hash function
    134  */
    135 template<>
    136 struct StringHash<BKDR> : public std::unary_function<const llvm::StringRef&, uint32_t>
    137 {
    138   uint32_t operator()(const llvm::StringRef& pKey) const
    139   {
    140     const uint32_t seed = 131;
    141     uint32_t hash_val = 0;
    142 
    143     for(uint32_t i = 0; i < pKey.size(); ++i)
    144       hash_val = (hash_val * seed) + pKey[i];
    145     return hash_val;
    146   }
    147 };
    148 
    149 
    150 /** \class StringHash<SDBM>
    151  *  \brief SDBM hash function
    152  *  0.049s in 100000 test
    153  */
    154 template<>
    155 struct StringHash<SDBM> : public std::unary_function<const llvm::StringRef&, uint32_t>
    156 {
    157   uint32_t operator()(const llvm::StringRef& pKey) const
    158   {
    159     uint32_t hash_val = 0;
    160 
    161     for(uint32_t i = 0; i < pKey.size(); ++i)
    162       hash_val = pKey[i] + (hash_val << 6) + (hash_val << 16) - hash_val;
    163     return hash_val;
    164   }
    165 };
    166 
    167 /** \class StringHash<DJB>
    168  *  \brief DJB hash function
    169  *  0.057s in 100000 test
    170  */
    171 template<>
    172 struct StringHash<DJB> : public std::unary_function<const llvm::StringRef&, uint32_t>
    173 {
    174   uint32_t operator()(const llvm::StringRef& pKey) const
    175   {
    176     uint32_t hash_val = 5381;
    177 
    178     for(uint32_t i = 0; i < pKey.size(); ++i)
    179       hash_val = ((hash_val << 5) + hash_val) + pKey[i];
    180 
    181     return hash_val;
    182   }
    183 };
    184 
    185 /** \class StringHash<DEK>
    186  *  \brief DEK hash function
    187  *  0.60s
    188  */
    189 template<>
    190 struct StringHash<DEK> : public std::unary_function<const llvm::StringRef&, uint32_t>
    191 {
    192   uint32_t operator()(const llvm::StringRef& pKey) const
    193   {
    194     uint32_t hash_val = pKey.size();
    195 
    196     for(uint32_t i = 0; i < pKey.size(); ++i)
    197       hash_val = ((hash_val << 5) ^ (hash_val >> 27)) ^ pKey[i];
    198 
    199     return hash_val;
    200   }
    201 };
    202 
    203 /** \class StringHash<BP>
    204  *  \brief BP hash function
    205  *  0.057s
    206  */
    207 template<>
    208 struct StringHash<BP> : public std::unary_function<const llvm::StringRef&, uint32_t>
    209 {
    210   uint32_t operator()(const llvm::StringRef& pKey) const
    211   {
    212     uint32_t hash_val = 0;
    213     for(uint32_t i = 0; i < pKey.size(); ++i)
    214       hash_val = hash_val << 7 ^ pKey[i];
    215 
    216     return hash_val;
    217   }
    218 };
    219 
    220 /** \class StringHash<FNV>
    221  *  \brief FNV hash function
    222  *  0.058s
    223  */
    224 template<>
    225 struct StringHash<FNV> : public std::unary_function<const llvm::StringRef&, uint32_t>
    226 {
    227   uint32_t operator()(const llvm::StringRef& pKey) const
    228   {
    229     const uint32_t fnv_prime = 0x811C9DC5;
    230     uint32_t hash_val = 0;
    231     for(uint32_t i = 0; i < pKey.size(); ++i) {
    232       hash_val *= fnv_prime;
    233       hash_val ^= pKey[i];
    234     }
    235 
    236     return hash_val;
    237   }
    238 };
    239 
    240 /** \class StringHash<AP>
    241  *  \brief AP hash function
    242  *  0.060s
    243  */
    244 template<>
    245 struct StringHash<AP> : public std::unary_function<const llvm::StringRef&, uint32_t>
    246 {
    247   uint32_t operator()(const llvm::StringRef& pKey) const
    248   {
    249     unsigned int hash_val = 0xAAAAAAAA;
    250 
    251     for(uint32_t i = 0; i < pKey.size(); ++i) {
    252       hash_val ^= ((i & 1) == 0)?
    253                           ((hash_val <<  7) ^ pKey[i] * (hash_val >> 3)):
    254                           (~((hash_val << 11) + (pKey[i] ^ (hash_val >> 5))));
    255     }
    256 
    257     return hash_val;
    258   }
    259 };
    260 
    261 /** \class StringHash<ES>
    262  *  \brief This is a revision of Edward Sayers' string characteristic function.
    263  *
    264  *  31-28  27  26  25   -   0
    265  *  +----+---+---+------------+
    266  *  | .  | N | - | a/A  ~ z/Z |
    267  *  +----+---+---+------------+
    268  *
    269  *  . (bit 31~28) - The number of '.' characters
    270  *  N (bit 27)    - Are there any numbers in the string
    271  *  - (bit 26)    - Does the string have '-' character
    272  *  bit 25~0      - Bit 25 is set only if the string contains a 'a' or 'A', and
    273  *                  Bit 24 is set only if ...                   'b' or 'B', ...
    274  */
    275 template<>
    276 struct StringHash<ES> : public std::unary_function<const llvm::StringRef&, uint32_t>
    277 {
    278   uint32_t operator()(const llvm::StringRef& pString) const
    279   {
    280     uint32_t result = 0x0;
    281     unsigned int dot = 0;
    282     std::string::size_type idx;
    283     for (idx = 0; idx < pString.size(); ++idx) {
    284       int cur_char = pString[idx];
    285 
    286       if ('.' == cur_char) {
    287         ++dot;
    288         continue;
    289       }
    290 
    291       if (isdigit(cur_char)) {
    292         result |= (1 << 27);
    293         continue;
    294       }
    295 
    296       if ('_' == cur_char) {
    297         result |= (1 << 26);
    298         continue;
    299       }
    300 
    301       if (isupper(cur_char)) {
    302         result |= (1 << (cur_char - 'A'));
    303         continue;
    304       }
    305 
    306       if (islower(cur_char)) {
    307         result |= (1 << (cur_char - 'a'));
    308         continue;
    309       }
    310     }
    311     result |= (dot << 28);
    312     return result;
    313   }
    314 
    315 
    316   /** \func may_include
    317    *  \brief is it possible that pRule is a sub-string of pInString
    318    */
    319   static bool may_include(uint32_t pRule, uint32_t pInString)
    320   {
    321     uint32_t in_c = pInString << 4;
    322     uint32_t r_c  = pRule << 4;
    323 
    324     uint32_t res = (in_c ^ r_c) & r_c;
    325     if (0 != res)
    326       return false;
    327 
    328     uint32_t in_dot = pInString >> 28;
    329     uint32_t r_dot  = pRule >> 28;
    330     if (r_dot > in_dot)
    331       return false;
    332 
    333     return true;
    334   }
    335 };
    336 
    337 /** \class template<uint32_t TYPE> StringCompare
    338  *  \brief the template StringCompare class, for specification
    339  */
    340 template<typename STRING_TYPE>
    341 struct StringCompare : public std::binary_function<const STRING_TYPE&, const STRING_TYPE&, bool>
    342 {
    343   bool operator()(const STRING_TYPE& X, const STRING_TYPE& Y) const
    344   { return X == Y; }
    345 };
    346 
    347 template<>
    348 struct StringCompare<const char*> : public std::binary_function<const char*, const char*, bool>
    349 {
    350   bool operator()(const char* X, const char* Y) const
    351   { return (0 == std::strcmp(X, Y)); }
    352 };
    353 
    354 template<>
    355 struct StringCompare<char*> : public std::binary_function<const char*, const char*, bool>
    356 {
    357   bool operator()(const char* X, const char* Y) const
    358   { return (0 == std::strcmp(X, Y)); }
    359 };
    360 
    361 } // namespace of hash
    362 } // namespace of mcld
    363 
    364 #endif
    365 
    366