1 //----------------------------------------------------------------------------- 2 // MurmurHash2 was written by Austin Appleby, and is placed in the public 3 // domain. The author hereby disclaims copyright to this source code. 4 5 // Note - This code makes a few assumptions about how your machine behaves - 6 7 // 1. We can read a 4-byte value from any address without crashing 8 // 2. sizeof(int) == 4 9 10 // And it has a few limitations - 11 12 // 1. It will not work incrementally. 13 // 2. It will not produce the same results on little-endian and big-endian 14 // machines. 15 16 #include "MurmurHash2.h" 17 18 //----------------------------------------------------------------------------- 19 // Platform-specific functions and macros 20 21 // Microsoft Visual Studio 22 23 #if defined(_MSC_VER) 24 25 #define BIG_CONSTANT(x) (x) 26 27 // Other compilers 28 29 #else // defined(_MSC_VER) 30 31 #define BIG_CONSTANT(x) (x##LLU) 32 33 #endif // !defined(_MSC_VER) 34 35 //----------------------------------------------------------------------------- 36 37 uint32_t MurmurHash2 ( const void * key, int len, uint32_t seed ) 38 { 39 // 'm' and 'r' are mixing constants generated offline. 40 // They're not really 'magic', they just happen to work well. 41 42 const uint32_t m = 0x5bd1e995; 43 const int r = 24; 44 45 // Initialize the hash to a 'random' value 46 47 uint32_t h = seed ^ len; 48 49 // Mix 4 bytes at a time into the hash 50 51 const unsigned char * data = (const unsigned char *)key; 52 53 while(len >= 4) 54 { 55 uint32_t k = *(uint32_t*)data; 56 57 k *= m; 58 k ^= k >> r; 59 k *= m; 60 61 h *= m; 62 h ^= k; 63 64 data += 4; 65 len -= 4; 66 } 67 68 // Handle the last few bytes of the input array 69 70 switch(len) 71 { 72 case 3: h ^= data[2] << 16; 73 case 2: h ^= data[1] << 8; 74 case 1: h ^= data[0]; 75 h *= m; 76 }; 77 78 // Do a few final mixes of the hash to ensure the last few 79 // bytes are well-incorporated. 80 81 h ^= h >> 13; 82 h *= m; 83 h ^= h >> 15; 84 85 return h; 86 } 87 88 //----------------------------------------------------------------------------- 89 // MurmurHash2, 64-bit versions, by Austin Appleby 90 91 // The same caveats as 32-bit MurmurHash2 apply here - beware of alignment 92 // and endian-ness issues if used across multiple platforms. 93 94 // 64-bit hash for 64-bit platforms 95 96 uint64_t MurmurHash64A ( const void * key, int len, uint64_t seed ) 97 { 98 const uint64_t m = BIG_CONSTANT(0xc6a4a7935bd1e995); 99 const int r = 47; 100 101 uint64_t h = seed ^ (len * m); 102 103 const uint64_t * data = (const uint64_t *)key; 104 const uint64_t * end = data + (len/8); 105 106 while(data != end) 107 { 108 uint64_t k = *data++; 109 110 k *= m; 111 k ^= k >> r; 112 k *= m; 113 114 h ^= k; 115 h *= m; 116 } 117 118 const unsigned char * data2 = (const unsigned char*)data; 119 120 switch(len & 7) 121 { 122 case 7: h ^= uint64_t(data2[6]) << 48; 123 case 6: h ^= uint64_t(data2[5]) << 40; 124 case 5: h ^= uint64_t(data2[4]) << 32; 125 case 4: h ^= uint64_t(data2[3]) << 24; 126 case 3: h ^= uint64_t(data2[2]) << 16; 127 case 2: h ^= uint64_t(data2[1]) << 8; 128 case 1: h ^= uint64_t(data2[0]); 129 h *= m; 130 }; 131 132 h ^= h >> r; 133 h *= m; 134 h ^= h >> r; 135 136 return h; 137 } 138 139 140 // 64-bit hash for 32-bit platforms 141 142 uint64_t MurmurHash64B ( const void * key, int len, uint64_t seed ) 143 { 144 const uint32_t m = 0x5bd1e995; 145 const int r = 24; 146 147 uint32_t h1 = uint32_t(seed) ^ len; 148 uint32_t h2 = uint32_t(seed >> 32); 149 150 const uint32_t * data = (const uint32_t *)key; 151 152 while(len >= 8) 153 { 154 uint32_t k1 = *data++; 155 k1 *= m; k1 ^= k1 >> r; k1 *= m; 156 h1 *= m; h1 ^= k1; 157 len -= 4; 158 159 uint32_t k2 = *data++; 160 k2 *= m; k2 ^= k2 >> r; k2 *= m; 161 h2 *= m; h2 ^= k2; 162 len -= 4; 163 } 164 165 if(len >= 4) 166 { 167 uint32_t k1 = *data++; 168 k1 *= m; k1 ^= k1 >> r; k1 *= m; 169 h1 *= m; h1 ^= k1; 170 len -= 4; 171 } 172 173 switch(len) 174 { 175 case 3: h2 ^= ((unsigned char*)data)[2] << 16; 176 case 2: h2 ^= ((unsigned char*)data)[1] << 8; 177 case 1: h2 ^= ((unsigned char*)data)[0]; 178 h2 *= m; 179 }; 180 181 h1 ^= h2 >> 18; h1 *= m; 182 h2 ^= h1 >> 22; h2 *= m; 183 h1 ^= h2 >> 17; h1 *= m; 184 h2 ^= h1 >> 19; h2 *= m; 185 186 uint64_t h = h1; 187 188 h = (h << 32) | h2; 189 190 return h; 191 } 192 193 //----------------------------------------------------------------------------- 194 // MurmurHash2A, by Austin Appleby 195 196 // This is a variant of MurmurHash2 modified to use the Merkle-Damgard 197 // construction. Bulk speed should be identical to Murmur2, small-key speed 198 // will be 10%-20% slower due to the added overhead at the end of the hash. 199 200 // This variant fixes a minor issue where null keys were more likely to 201 // collide with each other than expected, and also makes the function 202 // more amenable to incremental implementations. 203 204 #define mmix(h,k) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; } 205 206 uint32_t MurmurHash2A ( const void * key, int len, uint32_t seed ) 207 { 208 const uint32_t m = 0x5bd1e995; 209 const int r = 24; 210 uint32_t l = len; 211 212 const unsigned char * data = (const unsigned char *)key; 213 214 uint32_t h = seed; 215 216 while(len >= 4) 217 { 218 uint32_t k = *(uint32_t*)data; 219 220 mmix(h,k); 221 222 data += 4; 223 len -= 4; 224 } 225 226 uint32_t t = 0; 227 228 switch(len) 229 { 230 case 3: t ^= data[2] << 16; 231 case 2: t ^= data[1] << 8; 232 case 1: t ^= data[0]; 233 }; 234 235 mmix(h,t); 236 mmix(h,l); 237 238 h ^= h >> 13; 239 h *= m; 240 h ^= h >> 15; 241 242 return h; 243 } 244 245 //----------------------------------------------------------------------------- 246 // CMurmurHash2A, by Austin Appleby 247 248 // This is a sample implementation of MurmurHash2A designed to work 249 // incrementally. 250 251 // Usage - 252 253 // CMurmurHash2A hasher 254 // hasher.Begin(seed); 255 // hasher.Add(data1,size1); 256 // hasher.Add(data2,size2); 257 // ... 258 // hasher.Add(dataN,sizeN); 259 // uint32_t hash = hasher.End() 260 261 class CMurmurHash2A 262 { 263 public: 264 265 void Begin ( uint32_t seed = 0 ) 266 { 267 m_hash = seed; 268 m_tail = 0; 269 m_count = 0; 270 m_size = 0; 271 } 272 273 void Add ( const unsigned char * data, int len ) 274 { 275 m_size += len; 276 277 MixTail(data,len); 278 279 while(len >= 4) 280 { 281 uint32_t k = *(uint32_t*)data; 282 283 mmix(m_hash,k); 284 285 data += 4; 286 len -= 4; 287 } 288 289 MixTail(data,len); 290 } 291 292 uint32_t End ( void ) 293 { 294 mmix(m_hash,m_tail); 295 mmix(m_hash,m_size); 296 297 m_hash ^= m_hash >> 13; 298 m_hash *= m; 299 m_hash ^= m_hash >> 15; 300 301 return m_hash; 302 } 303 304 private: 305 306 static const uint32_t m = 0x5bd1e995; 307 static const int r = 24; 308 309 void MixTail ( const unsigned char * & data, int & len ) 310 { 311 while( len && ((len<4) || m_count) ) 312 { 313 m_tail |= (*data++) << (m_count * 8); 314 315 m_count++; 316 len--; 317 318 if(m_count == 4) 319 { 320 mmix(m_hash,m_tail); 321 m_tail = 0; 322 m_count = 0; 323 } 324 } 325 } 326 327 uint32_t m_hash; 328 uint32_t m_tail; 329 uint32_t m_count; 330 uint32_t m_size; 331 }; 332 333 //----------------------------------------------------------------------------- 334 // MurmurHashNeutral2, by Austin Appleby 335 336 // Same as MurmurHash2, but endian- and alignment-neutral. 337 // Half the speed though, alas. 338 339 uint32_t MurmurHashNeutral2 ( const void * key, int len, uint32_t seed ) 340 { 341 const uint32_t m = 0x5bd1e995; 342 const int r = 24; 343 344 uint32_t h = seed ^ len; 345 346 const unsigned char * data = (const unsigned char *)key; 347 348 while(len >= 4) 349 { 350 uint32_t k; 351 352 k = data[0]; 353 k |= data[1] << 8; 354 k |= data[2] << 16; 355 k |= data[3] << 24; 356 357 k *= m; 358 k ^= k >> r; 359 k *= m; 360 361 h *= m; 362 h ^= k; 363 364 data += 4; 365 len -= 4; 366 } 367 368 switch(len) 369 { 370 case 3: h ^= data[2] << 16; 371 case 2: h ^= data[1] << 8; 372 case 1: h ^= data[0]; 373 h *= m; 374 }; 375 376 h ^= h >> 13; 377 h *= m; 378 h ^= h >> 15; 379 380 return h; 381 } 382 383 //----------------------------------------------------------------------------- 384 // MurmurHashAligned2, by Austin Appleby 385 386 // Same algorithm as MurmurHash2, but only does aligned reads - should be safer 387 // on certain platforms. 388 389 // Performance will be lower than MurmurHash2 390 391 #define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; } 392 393 394 uint32_t MurmurHashAligned2 ( const void * key, int len, uint32_t seed ) 395 { 396 const uint32_t m = 0x5bd1e995; 397 const int r = 24; 398 399 const unsigned char * data = (const unsigned char *)key; 400 401 uint32_t h = seed ^ len; 402 403 int align = (uint64_t)data & 3; 404 405 if(align && (len >= 4)) 406 { 407 // Pre-load the temp registers 408 409 uint32_t t = 0, d = 0; 410 411 switch(align) 412 { 413 case 1: t |= data[2] << 16; 414 case 2: t |= data[1] << 8; 415 case 3: t |= data[0]; 416 } 417 418 t <<= (8 * align); 419 420 data += 4-align; 421 len -= 4-align; 422 423 int sl = 8 * (4-align); 424 int sr = 8 * align; 425 426 // Mix 427 428 while(len >= 4) 429 { 430 d = *(uint32_t *)data; 431 t = (t >> sr) | (d << sl); 432 433 uint32_t k = t; 434 435 MIX(h,k,m); 436 437 t = d; 438 439 data += 4; 440 len -= 4; 441 } 442 443 // Handle leftover data in temp registers 444 445 d = 0; 446 447 if(len >= align) 448 { 449 switch(align) 450 { 451 case 3: d |= data[2] << 16; 452 case 2: d |= data[1] << 8; 453 case 1: d |= data[0]; 454 } 455 456 uint32_t k = (t >> sr) | (d << sl); 457 MIX(h,k,m); 458 459 data += align; 460 len -= align; 461 462 //---------- 463 // Handle tail bytes 464 465 switch(len) 466 { 467 case 3: h ^= data[2] << 16; 468 case 2: h ^= data[1] << 8; 469 case 1: h ^= data[0]; 470 h *= m; 471 }; 472 } 473 else 474 { 475 switch(len) 476 { 477 case 3: d |= data[2] << 16; 478 case 2: d |= data[1] << 8; 479 case 1: d |= data[0]; 480 case 0: h ^= (t >> sr) | (d << sl); 481 h *= m; 482 } 483 } 484 485 h ^= h >> 13; 486 h *= m; 487 h ^= h >> 15; 488 489 return h; 490 } 491 else 492 { 493 while(len >= 4) 494 { 495 uint32_t k = *(uint32_t *)data; 496 497 MIX(h,k,m); 498 499 data += 4; 500 len -= 4; 501 } 502 503 //---------- 504 // Handle tail bytes 505 506 switch(len) 507 { 508 case 3: h ^= data[2] << 16; 509 case 2: h ^= data[1] << 8; 510 case 1: h ^= data[0]; 511 h *= m; 512 }; 513 514 h ^= h >> 13; 515 h *= m; 516 h ^= h >> 15; 517 518 return h; 519 } 520 } 521 522 //----------------------------------------------------------------------------- 523 524