1 //===- StringHash.h ---------------------------------------------------------===// 2 // 3 // The MCLinker Project 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #ifndef MCLD_STRING_HASH_FUNCTION_H 11 #define MCLD_STRING_HASH_FUNCTION_H 12 #ifdef ENABLE_UNITTEST 13 #include <gtest.h> 14 #endif 15 #include <llvm/ADT/StringRef.h> 16 #include <llvm/Support/DataTypes.h> 17 #include <llvm/Support/ErrorHandling.h> 18 #include <cctype> 19 #include <functional> 20 21 namespace mcld 22 { 23 24 enum StringHashType 25 { 26 RS, 27 JS, 28 PJW, 29 ELF, 30 BKDR, 31 SDBM, 32 DJB, 33 DEK, 34 BP, 35 FNV, 36 AP, 37 ES 38 }; 39 40 /** \class template<uint32_t TYPE> StringHash 41 * \brief the template StringHash class, for specification 42 */ 43 template<uint32_t TYPE> 44 struct StringHash : public std::unary_function<const llvm::StringRef&, uint32_t> 45 { 46 uint32_t operator()(const llvm::StringRef& pKey) const 47 { 48 llvm::report_fatal_error("Undefined StringHash function.\n"); 49 } 50 }; 51 52 /** \class StringHash<RSHash> 53 * \brief RS StringHash funciton 54 */ 55 template<> 56 struct StringHash<RS> : public std::unary_function<const llvm::StringRef&, uint32_t> 57 { 58 uint32_t operator()(const llvm::StringRef& pKey) const 59 { 60 const unsigned int b = 378551; 61 uint32_t a = 63689; 62 uint32_t hash_val = 0; 63 64 for(unsigned int i = 0; i < pKey.size(); ++i) { 65 hash_val = hash_val * a + pKey[i]; 66 a = a * b; 67 } 68 return hash_val; 69 } 70 }; 71 72 /** \class StringHash<JSHash> 73 * \brief JS hash funciton 74 */ 75 template<> 76 struct StringHash<JS> : public std::unary_function<const llvm::StringRef&, uint32_t> 77 { 78 uint32_t operator()(const llvm::StringRef& pKey) const 79 { 80 uint32_t hash_val = 1315423911; 81 82 for(unsigned int i = 0; i < pKey.size(); ++i) { 83 hash_val ^= ((hash_val << 5) + pKey[i] + (hash_val >> 2)); 84 } 85 return hash_val; 86 } 87 }; 88 89 /** \class StringHash<PJW> 90 * \brief P.J. Weinberger hash function 91 */ 92 template<> 93 struct StringHash<PJW> : public std::unary_function<const llvm::StringRef&, uint32_t> 94 { 95 uint32_t operator()(const llvm::StringRef& pKey) const 96 { 97 const unsigned int BitsInUnsignedInt = (unsigned int)(sizeof(unsigned int) * 8); 98 const unsigned int ThreeQuarters = (unsigned int)((BitsInUnsignedInt * 3) / 4); 99 const unsigned int OneEighth = (unsigned int)(BitsInUnsignedInt / 8); 100 const unsigned int HighBits = (unsigned int)(0xFFFFFFFF) << (BitsInUnsignedInt - OneEighth); 101 uint32_t hash_val = 0; 102 uint32_t test = 0; 103 104 for(unsigned int i = 0; i < pKey.size(); ++i) { 105 hash_val = (hash_val << OneEighth) + pKey[i]; 106 107 if((test = hash_val & HighBits) != 0) { 108 hash_val = (( hash_val ^ (test >> ThreeQuarters)) & (~HighBits)); 109 } 110 } 111 return hash_val; 112 } 113 }; 114 115 /** \class StringHash<ELF> 116 * \brief ELF hash function. 117 */ 118 template<> 119 struct StringHash<ELF> : public std::unary_function<const llvm::StringRef&, uint32_t> 120 { 121 uint32_t operator()(const llvm::StringRef& pKey) const 122 { 123 uint32_t hash_val = 0; 124 uint32_t x = 0; 125 126 for (unsigned int i = 0; i < pKey.size(); ++i) { 127 hash_val = (hash_val << 4) + pKey[i]; 128 if((x = hash_val & 0xF0000000L) != 0) 129 hash_val ^= (x >> 24); 130 hash_val &= ~x; 131 } 132 return hash_val; 133 } 134 }; 135 136 /** \class StringHash<BKDR> 137 * \brief BKDR hash function 138 */ 139 template<> 140 struct StringHash<BKDR> : public std::unary_function<const llvm::StringRef&, uint32_t> 141 { 142 uint32_t operator()(const llvm::StringRef& pKey) const 143 { 144 const uint32_t seed = 131; 145 uint32_t hash_val = 0; 146 147 for(uint32_t i = 0; i < pKey.size(); ++i) 148 hash_val = (hash_val * seed) + pKey[i]; 149 return hash_val; 150 } 151 }; 152 153 154 /** \class StringHash<SDBM> 155 * \brief SDBM hash function 156 * 0.049s in 100000 test 157 */ 158 template<> 159 struct StringHash<SDBM> : public std::unary_function<const llvm::StringRef&, uint32_t> 160 { 161 uint32_t operator()(const llvm::StringRef& pKey) const 162 { 163 uint32_t hash_val = 0; 164 165 for(uint32_t i = 0; i < pKey.size(); ++i) 166 hash_val = pKey[i] + (hash_val << 6) + (hash_val << 16) - hash_val; 167 return hash_val; 168 } 169 }; 170 171 /** \class StringHash<DJB> 172 * \brief DJB hash function 173 * 0.057s in 100000 test 174 */ 175 template<> 176 struct StringHash<DJB> : public std::unary_function<const llvm::StringRef&, uint32_t> 177 { 178 uint32_t operator()(const llvm::StringRef& pKey) const 179 { 180 uint32_t hash_val = 5381; 181 182 for(uint32_t i = 0; i < pKey.size(); ++i) 183 hash_val = ((hash_val << 5) + hash_val) + pKey[i]; 184 185 return hash_val; 186 } 187 }; 188 189 /** \class StringHash<DEK> 190 * \brief DEK hash function 191 * 0.60s 192 */ 193 template<> 194 struct StringHash<DEK> : public std::unary_function<const llvm::StringRef&, uint32_t> 195 { 196 uint32_t operator()(const llvm::StringRef& pKey) const 197 { 198 uint32_t hash_val = pKey.size(); 199 200 for(uint32_t i = 0; i < pKey.size(); ++i) 201 hash_val = ((hash_val << 5) ^ (hash_val >> 27)) ^ pKey[i]; 202 203 return hash_val; 204 } 205 }; 206 207 /** \class StringHash<BP> 208 * \brief BP hash function 209 * 0.057s 210 */ 211 template<> 212 struct StringHash<BP> : public std::unary_function<const llvm::StringRef&, uint32_t> 213 { 214 uint32_t operator()(const llvm::StringRef& pKey) const 215 { 216 uint32_t hash_val = 0; 217 for(uint32_t i = 0; i < pKey.size(); ++i) 218 hash_val = hash_val << 7 ^ pKey[i]; 219 220 return hash_val; 221 } 222 }; 223 224 /** \class StringHash<FNV> 225 * \brief FNV hash function 226 * 0.058s 227 */ 228 template<> 229 struct StringHash<FNV> : public std::unary_function<const llvm::StringRef&, uint32_t> 230 { 231 uint32_t operator()(const llvm::StringRef& pKey) const 232 { 233 const uint32_t fnv_prime = 0x811C9DC5; 234 uint32_t hash_val = 0; 235 for(uint32_t i = 0; i < pKey.size(); ++i) { 236 hash_val *= fnv_prime; 237 hash_val ^= pKey[i]; 238 } 239 240 return hash_val; 241 } 242 }; 243 244 /** \class StringHash<AP> 245 * \brief AP hash function 246 * 0.060s 247 */ 248 template<> 249 struct StringHash<AP> : public std::unary_function<const llvm::StringRef&, uint32_t> 250 { 251 uint32_t operator()(const llvm::StringRef& pKey) const 252 { 253 unsigned int hash_val = 0xAAAAAAAA; 254 255 for(uint32_t i = 0; i < pKey.size(); ++i) { 256 hash_val ^= ((i & 1) == 0)? 257 ((hash_val << 7) ^ pKey[i] * (hash_val >> 3)): 258 (~((hash_val << 11) + (pKey[i] ^ (hash_val >> 5)))); 259 } 260 261 return hash_val; 262 } 263 }; 264 265 /** \class StringHash<ES> 266 * \brief This is a revision of Edward Sayers' string characteristic function. 267 * 268 * 31-28 27 26 25 - 0 269 * +----+---+---+------------+ 270 * | . | N | - | a/A ~ z/Z | 271 * +----+---+---+------------+ 272 * 273 * . (bit 31~28) - The number of '.' characters 274 * N (bit 27) - Are there any numbers in the string 275 * - (bit 26) - Does the string have '-' character 276 * bit 25~0 - Bit 25 is set only if the string contains a 'a' or 'A', and 277 * Bit 24 is set only if ... 'b' or 'B', ... 278 */ 279 template<> 280 struct StringHash<ES> : public std::unary_function<const llvm::StringRef&, uint32_t> 281 { 282 uint32_t operator()(const llvm::StringRef& pString) const 283 { 284 uint32_t result = 0x0; 285 unsigned int dot = 0; 286 std::string::size_type idx; 287 for (idx = 0; idx < pString.size(); ++idx) { 288 int cur_char = pString[idx]; 289 290 if ('.' == cur_char) { 291 ++dot; 292 continue; 293 } 294 295 if (isdigit(cur_char)) { 296 result |= (1 << 27); 297 continue; 298 } 299 300 if ('_' == cur_char) { 301 result |= (1 << 26); 302 continue; 303 } 304 305 if (isupper(cur_char)) { 306 result |= (1 << (cur_char - 'A')); 307 continue; 308 } 309 310 if (islower(cur_char)) { 311 result |= (1 << (cur_char - 'a')); 312 continue; 313 } 314 } 315 result |= (dot << 28); 316 return result; 317 } 318 319 320 /** \func may_include 321 * \brief is it possible that pRule is a sub-string of pInString 322 */ 323 static bool may_include(uint32_t pRule, uint32_t pInString) 324 { 325 uint32_t in_c = pInString << 4; 326 uint32_t r_c = pRule << 4; 327 328 uint32_t res = (in_c ^ r_c) & r_c; 329 if (0 != res) 330 return false; 331 332 uint32_t in_dot = pInString >> 28; 333 uint32_t r_dot = pRule >> 28; 334 if (r_dot > in_dot) 335 return false; 336 337 return true; 338 } 339 }; 340 341 /** \class template<uint32_t TYPE> StringCompare 342 * \brief the template StringCompare class, for specification 343 */ 344 template<typename STRING_TYPE> 345 struct StringCompare : public std::binary_function<const STRING_TYPE&, const STRING_TYPE&, bool> 346 { 347 bool operator()(const STRING_TYPE& X, const STRING_TYPE& Y) const 348 { return X == Y; } 349 }; 350 351 template<> 352 struct StringCompare<const char*> : public std::binary_function<const char*, const char*, bool> 353 { 354 bool operator()(const char* X, const char* Y) const 355 { return (0 == std::strcmp(X, Y)); } 356 }; 357 358 template<> 359 struct StringCompare<char*> : public std::binary_function<const char*, const char*, bool> 360 { 361 bool operator()(const char* X, const char* Y) const 362 { return (0 == std::strcmp(X, Y)); } 363 }; 364 365 } // namespace of mcld 366 367 #endif 368 369