Home | History | Annotate | Download | only in ADT
      1 //===- StringHash.h ---------------------------------------------------------===//
      2 //
      3 //                     The MCLinker Project
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 
     10 #ifndef MCLD_STRING_HASH_FUNCTION_H
     11 #define MCLD_STRING_HASH_FUNCTION_H
     12 #ifdef ENABLE_UNITTEST
     13 #include <gtest.h>
     14 #endif
     15 #include <llvm/ADT/StringRef.h>
     16 #include <llvm/Support/DataTypes.h>
     17 #include <llvm/Support/ErrorHandling.h>
     18 #include <cctype>
     19 #include <functional>
     20 
     21 namespace mcld
     22 {
     23 
     24 enum StringHashType
     25 {
     26   RS,
     27   JS,
     28   PJW,
     29   ELF,
     30   BKDR,
     31   SDBM,
     32   DJB,
     33   DEK,
     34   BP,
     35   FNV,
     36   AP,
     37   ES
     38 };
     39 
     40 /** \class template<uint32_t TYPE> StringHash
     41  *  \brief the template StringHash class, for specification
     42  */
     43 template<uint32_t TYPE>
     44 struct StringHash : public std::unary_function<const llvm::StringRef&, uint32_t>
     45 {
     46   uint32_t operator()(const llvm::StringRef& pKey) const
     47   {
     48     llvm::report_fatal_error("Undefined StringHash function.\n");
     49   }
     50 };
     51 
     52 /** \class StringHash<RSHash>
     53  *  \brief RS StringHash funciton
     54  */
     55 template<>
     56 struct StringHash<RS> : public std::unary_function<const llvm::StringRef&, uint32_t>
     57 {
     58   uint32_t operator()(const llvm::StringRef& pKey) const
     59   {
     60     const unsigned int b = 378551;
     61     uint32_t a = 63689;
     62     uint32_t hash_val = 0;
     63 
     64     for(unsigned int i = 0; i < pKey.size(); ++i) {
     65       hash_val = hash_val * a + pKey[i];
     66       a = a * b;
     67     }
     68     return hash_val;
     69   }
     70 };
     71 
     72 /** \class StringHash<JSHash>
     73  *  \brief JS hash funciton
     74  */
     75 template<>
     76 struct StringHash<JS> : public std::unary_function<const llvm::StringRef&, uint32_t>
     77 {
     78   uint32_t operator()(const llvm::StringRef& pKey) const
     79   {
     80     uint32_t hash_val = 1315423911;
     81 
     82     for(unsigned int i = 0; i < pKey.size(); ++i) {
     83        hash_val ^= ((hash_val << 5) + pKey[i] + (hash_val >> 2));
     84     }
     85     return hash_val;
     86   }
     87 };
     88 
     89 /** \class StringHash<PJW>
     90  *  \brief P.J. Weinberger hash function
     91  */
     92 template<>
     93 struct StringHash<PJW> : public std::unary_function<const llvm::StringRef&, uint32_t>
     94 {
     95   uint32_t operator()(const llvm::StringRef& pKey) const
     96   {
     97     const unsigned int BitsInUnsignedInt = (unsigned int)(sizeof(unsigned int) * 8);
     98     const unsigned int ThreeQuarters     = (unsigned int)((BitsInUnsignedInt  * 3) / 4);
     99     const unsigned int OneEighth         = (unsigned int)(BitsInUnsignedInt / 8);
    100     const unsigned int HighBits          = (unsigned int)(0xFFFFFFFF) << (BitsInUnsignedInt - OneEighth);
    101     uint32_t hash_val = 0;
    102     uint32_t test = 0;
    103 
    104     for(unsigned int i = 0; i < pKey.size(); ++i) {
    105       hash_val = (hash_val << OneEighth) + pKey[i];
    106 
    107       if((test = hash_val & HighBits) != 0) {
    108         hash_val = (( hash_val ^ (test >> ThreeQuarters)) & (~HighBits));
    109       }
    110     }
    111     return hash_val;
    112   }
    113 };
    114 
    115 /** \class StringHash<ELF>
    116  *  \brief ELF hash function.
    117  */
    118 template<>
    119 struct StringHash<ELF> : public std::unary_function<const llvm::StringRef&, uint32_t>
    120 {
    121   uint32_t operator()(const llvm::StringRef& pKey) const
    122   {
    123     uint32_t hash_val = 0;
    124     uint32_t x = 0;
    125 
    126     for (unsigned int i = 0; i < pKey.size(); ++i) {
    127       hash_val = (hash_val << 4) + pKey[i];
    128       if((x = hash_val & 0xF0000000L) != 0)
    129         hash_val ^= (x >> 24);
    130       hash_val &= ~x;
    131     }
    132     return hash_val;
    133   }
    134 };
    135 
    136 /** \class StringHash<BKDR>
    137  *  \brief BKDR hash function
    138  */
    139 template<>
    140 struct StringHash<BKDR> : public std::unary_function<const llvm::StringRef&, uint32_t>
    141 {
    142   uint32_t operator()(const llvm::StringRef& pKey) const
    143   {
    144     const uint32_t seed = 131;
    145     uint32_t hash_val = 0;
    146 
    147     for(uint32_t i = 0; i < pKey.size(); ++i)
    148       hash_val = (hash_val * seed) + pKey[i];
    149     return hash_val;
    150   }
    151 };
    152 
    153 
    154 /** \class StringHash<SDBM>
    155  *  \brief SDBM hash function
    156  *  0.049s in 100000 test
    157  */
    158 template<>
    159 struct StringHash<SDBM> : public std::unary_function<const llvm::StringRef&, uint32_t>
    160 {
    161   uint32_t operator()(const llvm::StringRef& pKey) const
    162   {
    163     uint32_t hash_val = 0;
    164 
    165     for(uint32_t i = 0; i < pKey.size(); ++i)
    166       hash_val = pKey[i] + (hash_val << 6) + (hash_val << 16) - hash_val;
    167     return hash_val;
    168   }
    169 };
    170 
    171 /** \class StringHash<DJB>
    172  *  \brief DJB hash function
    173  *  0.057s in 100000 test
    174  */
    175 template<>
    176 struct StringHash<DJB> : public std::unary_function<const llvm::StringRef&, uint32_t>
    177 {
    178   uint32_t operator()(const llvm::StringRef& pKey) const
    179   {
    180     uint32_t hash_val = 5381;
    181 
    182     for(uint32_t i = 0; i < pKey.size(); ++i)
    183       hash_val = ((hash_val << 5) + hash_val) + pKey[i];
    184 
    185     return hash_val;
    186   }
    187 };
    188 
    189 /** \class StringHash<DEK>
    190  *  \brief DEK hash function
    191  *  0.60s
    192  */
    193 template<>
    194 struct StringHash<DEK> : public std::unary_function<const llvm::StringRef&, uint32_t>
    195 {
    196   uint32_t operator()(const llvm::StringRef& pKey) const
    197   {
    198     uint32_t hash_val = pKey.size();
    199 
    200     for(uint32_t i = 0; i < pKey.size(); ++i)
    201       hash_val = ((hash_val << 5) ^ (hash_val >> 27)) ^ pKey[i];
    202 
    203     return hash_val;
    204   }
    205 };
    206 
    207 /** \class StringHash<BP>
    208  *  \brief BP hash function
    209  *  0.057s
    210  */
    211 template<>
    212 struct StringHash<BP> : public std::unary_function<const llvm::StringRef&, uint32_t>
    213 {
    214   uint32_t operator()(const llvm::StringRef& pKey) const
    215   {
    216     uint32_t hash_val = 0;
    217     for(uint32_t i = 0; i < pKey.size(); ++i)
    218       hash_val = hash_val << 7 ^ pKey[i];
    219 
    220     return hash_val;
    221   }
    222 };
    223 
    224 /** \class StringHash<FNV>
    225  *  \brief FNV hash function
    226  *  0.058s
    227  */
    228 template<>
    229 struct StringHash<FNV> : public std::unary_function<const llvm::StringRef&, uint32_t>
    230 {
    231   uint32_t operator()(const llvm::StringRef& pKey) const
    232   {
    233     const uint32_t fnv_prime = 0x811C9DC5;
    234     uint32_t hash_val = 0;
    235     for(uint32_t i = 0; i < pKey.size(); ++i) {
    236       hash_val *= fnv_prime;
    237       hash_val ^= pKey[i];
    238     }
    239 
    240     return hash_val;
    241   }
    242 };
    243 
    244 /** \class StringHash<AP>
    245  *  \brief AP hash function
    246  *  0.060s
    247  */
    248 template<>
    249 struct StringHash<AP> : public std::unary_function<const llvm::StringRef&, uint32_t>
    250 {
    251   uint32_t operator()(const llvm::StringRef& pKey) const
    252   {
    253     unsigned int hash_val = 0xAAAAAAAA;
    254 
    255     for(uint32_t i = 0; i < pKey.size(); ++i) {
    256       hash_val ^= ((i & 1) == 0)?
    257                           ((hash_val <<  7) ^ pKey[i] * (hash_val >> 3)):
    258                           (~((hash_val << 11) + (pKey[i] ^ (hash_val >> 5))));
    259     }
    260 
    261     return hash_val;
    262   }
    263 };
    264 
    265 /** \class StringHash<ES>
    266  *  \brief This is a revision of Edward Sayers' string characteristic function.
    267  *
    268  *  31-28  27  26  25   -   0
    269  *  +----+---+---+------------+
    270  *  | .  | N | - | a/A  ~ z/Z |
    271  *  +----+---+---+------------+
    272  *
    273  *  . (bit 31~28) - The number of '.' characters
    274  *  N (bit 27)    - Are there any numbers in the string
    275  *  - (bit 26)    - Does the string have '-' character
    276  *  bit 25~0      - Bit 25 is set only if the string contains a 'a' or 'A', and
    277  *                  Bit 24 is set only if ...                   'b' or 'B', ...
    278  */
    279 template<>
    280 struct StringHash<ES> : public std::unary_function<const llvm::StringRef&, uint32_t>
    281 {
    282   uint32_t operator()(const llvm::StringRef& pString) const
    283   {
    284     uint32_t result = 0x0;
    285     unsigned int dot = 0;
    286     std::string::size_type idx;
    287     for (idx = 0; idx < pString.size(); ++idx) {
    288       int cur_char = pString[idx];
    289 
    290       if ('.' == cur_char) {
    291         ++dot;
    292         continue;
    293       }
    294 
    295       if (isdigit(cur_char)) {
    296         result |= (1 << 27);
    297         continue;
    298       }
    299 
    300       if ('_' == cur_char) {
    301         result |= (1 << 26);
    302         continue;
    303       }
    304 
    305       if (isupper(cur_char)) {
    306         result |= (1 << (cur_char - 'A'));
    307         continue;
    308       }
    309 
    310       if (islower(cur_char)) {
    311         result |= (1 << (cur_char - 'a'));
    312         continue;
    313       }
    314     }
    315     result |= (dot << 28);
    316     return result;
    317   }
    318 
    319 
    320   /** \func may_include
    321    *  \brief is it possible that pRule is a sub-string of pInString
    322    */
    323   static bool may_include(uint32_t pRule, uint32_t pInString)
    324   {
    325     uint32_t in_c = pInString << 4;
    326     uint32_t r_c  = pRule << 4;
    327 
    328     uint32_t res = (in_c ^ r_c) & r_c;
    329     if (0 != res)
    330       return false;
    331 
    332     uint32_t in_dot = pInString >> 28;
    333     uint32_t r_dot  = pRule >> 28;
    334     if (r_dot > in_dot)
    335       return false;
    336 
    337     return true;
    338   }
    339 };
    340 
    341 /** \class template<uint32_t TYPE> StringCompare
    342  *  \brief the template StringCompare class, for specification
    343  */
    344 template<typename STRING_TYPE>
    345 struct StringCompare : public std::binary_function<const STRING_TYPE&, const STRING_TYPE&, bool>
    346 {
    347   bool operator()(const STRING_TYPE& X, const STRING_TYPE& Y) const
    348   { return X == Y; }
    349 };
    350 
    351 template<>
    352 struct StringCompare<const char*> : public std::binary_function<const char*, const char*, bool>
    353 {
    354   bool operator()(const char* X, const char* Y) const
    355   { return (0 == std::strcmp(X, Y)); }
    356 };
    357 
    358 template<>
    359 struct StringCompare<char*> : public std::binary_function<const char*, const char*, bool>
    360 {
    361   bool operator()(const char* X, const char* Y) const
    362   { return (0 == std::strcmp(X, Y)); }
    363 };
    364 
    365 } // namespace of mcld
    366 
    367 #endif
    368 
    369