Home | History | Annotate | Download | only in src
      1 //-----------------------------------------------------------------------------
      2 // MurmurHash2 was written by Austin Appleby, and is placed in the public
      3 // domain. The author hereby disclaims copyright to this source code.
      4 
      5 // Note - This code makes a few assumptions about how your machine behaves -
      6 
      7 // 1. We can read a 4-byte value from any address without crashing
      8 // 2. sizeof(int) == 4
      9 
     10 // And it has a few limitations -
     11 
     12 // 1. It will not work incrementally.
     13 // 2. It will not produce the same results on little-endian and big-endian
     14 //    machines.
     15 
     16 #include "MurmurHash2.h"
     17 
     18 //-----------------------------------------------------------------------------
     19 // Platform-specific functions and macros
     20 
     21 // Microsoft Visual Studio
     22 
     23 #if defined(_MSC_VER)
     24 
     25 #define BIG_CONSTANT(x) (x)
     26 
     27 // Other compilers
     28 
     29 #else	// defined(_MSC_VER)
     30 
     31 #define BIG_CONSTANT(x) (x##LLU)
     32 
     33 #endif // !defined(_MSC_VER)
     34 
     35 //-----------------------------------------------------------------------------
     36 
     37 uint32_t MurmurHash2 ( const void * key, int len, uint32_t seed )
     38 {
     39   // 'm' and 'r' are mixing constants generated offline.
     40   // They're not really 'magic', they just happen to work well.
     41 
     42   const uint32_t m = 0x5bd1e995;
     43   const int r = 24;
     44 
     45   // Initialize the hash to a 'random' value
     46 
     47   uint32_t h = seed ^ len;
     48 
     49   // Mix 4 bytes at a time into the hash
     50 
     51   const unsigned char * data = (const unsigned char *)key;
     52 
     53   while(len >= 4)
     54   {
     55     uint32_t k = *(uint32_t*)data;
     56 
     57     k *= m;
     58     k ^= k >> r;
     59     k *= m;
     60 
     61     h *= m;
     62     h ^= k;
     63 
     64     data += 4;
     65     len -= 4;
     66   }
     67 
     68   // Handle the last few bytes of the input array
     69 
     70   switch(len)
     71   {
     72   case 3: h ^= data[2] << 16;
     73   case 2: h ^= data[1] << 8;
     74   case 1: h ^= data[0];
     75       h *= m;
     76   };
     77 
     78   // Do a few final mixes of the hash to ensure the last few
     79   // bytes are well-incorporated.
     80 
     81   h ^= h >> 13;
     82   h *= m;
     83   h ^= h >> 15;
     84 
     85   return h;
     86 }
     87 
     88 //-----------------------------------------------------------------------------
     89 // MurmurHash2, 64-bit versions, by Austin Appleby
     90 
     91 // The same caveats as 32-bit MurmurHash2 apply here - beware of alignment
     92 // and endian-ness issues if used across multiple platforms.
     93 
     94 // 64-bit hash for 64-bit platforms
     95 
     96 uint64_t MurmurHash64A ( const void * key, int len, uint64_t seed )
     97 {
     98   const uint64_t m = BIG_CONSTANT(0xc6a4a7935bd1e995);
     99   const int r = 47;
    100 
    101   uint64_t h = seed ^ (len * m);
    102 
    103   const uint64_t * data = (const uint64_t *)key;
    104   const uint64_t * end = data + (len/8);
    105 
    106   while(data != end)
    107   {
    108     uint64_t k = *data++;
    109 
    110     k *= m;
    111     k ^= k >> r;
    112     k *= m;
    113 
    114     h ^= k;
    115     h *= m;
    116   }
    117 
    118   const unsigned char * data2 = (const unsigned char*)data;
    119 
    120   switch(len & 7)
    121   {
    122   case 7: h ^= uint64_t(data2[6]) << 48;
    123   case 6: h ^= uint64_t(data2[5]) << 40;
    124   case 5: h ^= uint64_t(data2[4]) << 32;
    125   case 4: h ^= uint64_t(data2[3]) << 24;
    126   case 3: h ^= uint64_t(data2[2]) << 16;
    127   case 2: h ^= uint64_t(data2[1]) << 8;
    128   case 1: h ^= uint64_t(data2[0]);
    129           h *= m;
    130   };
    131 
    132   h ^= h >> r;
    133   h *= m;
    134   h ^= h >> r;
    135 
    136   return h;
    137 }
    138 
    139 
    140 // 64-bit hash for 32-bit platforms
    141 
    142 uint64_t MurmurHash64B ( const void * key, int len, uint64_t seed )
    143 {
    144   const uint32_t m = 0x5bd1e995;
    145   const int r = 24;
    146 
    147   uint32_t h1 = uint32_t(seed) ^ len;
    148   uint32_t h2 = uint32_t(seed >> 32);
    149 
    150   const uint32_t * data = (const uint32_t *)key;
    151 
    152   while(len >= 8)
    153   {
    154     uint32_t k1 = *data++;
    155     k1 *= m; k1 ^= k1 >> r; k1 *= m;
    156     h1 *= m; h1 ^= k1;
    157     len -= 4;
    158 
    159     uint32_t k2 = *data++;
    160     k2 *= m; k2 ^= k2 >> r; k2 *= m;
    161     h2 *= m; h2 ^= k2;
    162     len -= 4;
    163   }
    164 
    165   if(len >= 4)
    166   {
    167     uint32_t k1 = *data++;
    168     k1 *= m; k1 ^= k1 >> r; k1 *= m;
    169     h1 *= m; h1 ^= k1;
    170     len -= 4;
    171   }
    172 
    173   switch(len)
    174   {
    175   case 3: h2 ^= ((unsigned char*)data)[2] << 16;
    176   case 2: h2 ^= ((unsigned char*)data)[1] << 8;
    177   case 1: h2 ^= ((unsigned char*)data)[0];
    178       h2 *= m;
    179   };
    180 
    181   h1 ^= h2 >> 18; h1 *= m;
    182   h2 ^= h1 >> 22; h2 *= m;
    183   h1 ^= h2 >> 17; h1 *= m;
    184   h2 ^= h1 >> 19; h2 *= m;
    185 
    186   uint64_t h = h1;
    187 
    188   h = (h << 32) | h2;
    189 
    190   return h;
    191 }
    192 
    193 //-----------------------------------------------------------------------------
    194 // MurmurHash2A, by Austin Appleby
    195 
    196 // This is a variant of MurmurHash2 modified to use the Merkle-Damgard
    197 // construction. Bulk speed should be identical to Murmur2, small-key speed
    198 // will be 10%-20% slower due to the added overhead at the end of the hash.
    199 
    200 // This variant fixes a minor issue where null keys were more likely to
    201 // collide with each other than expected, and also makes the function
    202 // more amenable to incremental implementations.
    203 
    204 #define mmix(h,k) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
    205 
    206 uint32_t MurmurHash2A ( const void * key, int len, uint32_t seed )
    207 {
    208   const uint32_t m = 0x5bd1e995;
    209   const int r = 24;
    210   uint32_t l = len;
    211 
    212   const unsigned char * data = (const unsigned char *)key;
    213 
    214   uint32_t h = seed;
    215 
    216   while(len >= 4)
    217   {
    218     uint32_t k = *(uint32_t*)data;
    219 
    220     mmix(h,k);
    221 
    222     data += 4;
    223     len -= 4;
    224   }
    225 
    226   uint32_t t = 0;
    227 
    228   switch(len)
    229   {
    230   case 3: t ^= data[2] << 16;
    231   case 2: t ^= data[1] << 8;
    232   case 1: t ^= data[0];
    233   };
    234 
    235   mmix(h,t);
    236   mmix(h,l);
    237 
    238   h ^= h >> 13;
    239   h *= m;
    240   h ^= h >> 15;
    241 
    242   return h;
    243 }
    244 
    245 //-----------------------------------------------------------------------------
    246 // CMurmurHash2A, by Austin Appleby
    247 
    248 // This is a sample implementation of MurmurHash2A designed to work
    249 // incrementally.
    250 
    251 // Usage -
    252 
    253 // CMurmurHash2A hasher
    254 // hasher.Begin(seed);
    255 // hasher.Add(data1,size1);
    256 // hasher.Add(data2,size2);
    257 // ...
    258 // hasher.Add(dataN,sizeN);
    259 // uint32_t hash = hasher.End()
    260 
    261 class CMurmurHash2A
    262 {
    263 public:
    264 
    265   void Begin ( uint32_t seed = 0 )
    266   {
    267     m_hash  = seed;
    268     m_tail  = 0;
    269     m_count = 0;
    270     m_size  = 0;
    271   }
    272 
    273   void Add ( const unsigned char * data, int len )
    274   {
    275     m_size += len;
    276 
    277     MixTail(data,len);
    278 
    279     while(len >= 4)
    280     {
    281       uint32_t k = *(uint32_t*)data;
    282 
    283       mmix(m_hash,k);
    284 
    285       data += 4;
    286       len -= 4;
    287     }
    288 
    289     MixTail(data,len);
    290   }
    291 
    292   uint32_t End ( void )
    293   {
    294     mmix(m_hash,m_tail);
    295     mmix(m_hash,m_size);
    296 
    297     m_hash ^= m_hash >> 13;
    298     m_hash *= m;
    299     m_hash ^= m_hash >> 15;
    300 
    301     return m_hash;
    302   }
    303 
    304 private:
    305 
    306   static const uint32_t m = 0x5bd1e995;
    307   static const int r = 24;
    308 
    309   void MixTail ( const unsigned char * & data, int & len )
    310   {
    311     while( len && ((len<4) || m_count) )
    312     {
    313       m_tail |= (*data++) << (m_count * 8);
    314 
    315       m_count++;
    316       len--;
    317 
    318       if(m_count == 4)
    319       {
    320         mmix(m_hash,m_tail);
    321         m_tail = 0;
    322         m_count = 0;
    323       }
    324     }
    325   }
    326 
    327   uint32_t m_hash;
    328   uint32_t m_tail;
    329   uint32_t m_count;
    330   uint32_t m_size;
    331 };
    332 
    333 //-----------------------------------------------------------------------------
    334 // MurmurHashNeutral2, by Austin Appleby
    335 
    336 // Same as MurmurHash2, but endian- and alignment-neutral.
    337 // Half the speed though, alas.
    338 
    339 uint32_t MurmurHashNeutral2 ( const void * key, int len, uint32_t seed )
    340 {
    341   const uint32_t m = 0x5bd1e995;
    342   const int r = 24;
    343 
    344   uint32_t h = seed ^ len;
    345 
    346   const unsigned char * data = (const unsigned char *)key;
    347 
    348   while(len >= 4)
    349   {
    350     uint32_t k;
    351 
    352     k  = data[0];
    353     k |= data[1] << 8;
    354     k |= data[2] << 16;
    355     k |= data[3] << 24;
    356 
    357     k *= m;
    358     k ^= k >> r;
    359     k *= m;
    360 
    361     h *= m;
    362     h ^= k;
    363 
    364     data += 4;
    365     len -= 4;
    366   }
    367 
    368   switch(len)
    369   {
    370   case 3: h ^= data[2] << 16;
    371   case 2: h ^= data[1] << 8;
    372   case 1: h ^= data[0];
    373           h *= m;
    374   };
    375 
    376   h ^= h >> 13;
    377   h *= m;
    378   h ^= h >> 15;
    379 
    380   return h;
    381 }
    382 
    383 //-----------------------------------------------------------------------------
    384 // MurmurHashAligned2, by Austin Appleby
    385 
    386 // Same algorithm as MurmurHash2, but only does aligned reads - should be safer
    387 // on certain platforms.
    388 
    389 // Performance will be lower than MurmurHash2
    390 
    391 #define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
    392 
    393 
    394 uint32_t MurmurHashAligned2 ( const void * key, int len, uint32_t seed )
    395 {
    396   const uint32_t m = 0x5bd1e995;
    397   const int r = 24;
    398 
    399   const unsigned char * data = (const unsigned char *)key;
    400 
    401   uint32_t h = seed ^ len;
    402 
    403   int align = (uint64_t)data & 3;
    404 
    405   if(align && (len >= 4))
    406   {
    407     // Pre-load the temp registers
    408 
    409     uint32_t t = 0, d = 0;
    410 
    411     switch(align)
    412     {
    413       case 1: t |= data[2] << 16;
    414       case 2: t |= data[1] << 8;
    415       case 3: t |= data[0];
    416     }
    417 
    418     t <<= (8 * align);
    419 
    420     data += 4-align;
    421     len -= 4-align;
    422 
    423     int sl = 8 * (4-align);
    424     int sr = 8 * align;
    425 
    426     // Mix
    427 
    428     while(len >= 4)
    429     {
    430       d = *(uint32_t *)data;
    431       t = (t >> sr) | (d << sl);
    432 
    433       uint32_t k = t;
    434 
    435       MIX(h,k,m);
    436 
    437       t = d;
    438 
    439       data += 4;
    440       len -= 4;
    441     }
    442 
    443     // Handle leftover data in temp registers
    444 
    445     d = 0;
    446 
    447     if(len >= align)
    448     {
    449       switch(align)
    450       {
    451       case 3: d |= data[2] << 16;
    452       case 2: d |= data[1] << 8;
    453       case 1: d |= data[0];
    454       }
    455 
    456       uint32_t k = (t >> sr) | (d << sl);
    457       MIX(h,k,m);
    458 
    459       data += align;
    460       len -= align;
    461 
    462       //----------
    463       // Handle tail bytes
    464 
    465       switch(len)
    466       {
    467       case 3: h ^= data[2] << 16;
    468       case 2: h ^= data[1] << 8;
    469       case 1: h ^= data[0];
    470           h *= m;
    471       };
    472     }
    473     else
    474     {
    475       switch(len)
    476       {
    477       case 3: d |= data[2] << 16;
    478       case 2: d |= data[1] << 8;
    479       case 1: d |= data[0];
    480       case 0: h ^= (t >> sr) | (d << sl);
    481           h *= m;
    482       }
    483     }
    484 
    485     h ^= h >> 13;
    486     h *= m;
    487     h ^= h >> 15;
    488 
    489     return h;
    490   }
    491   else
    492   {
    493     while(len >= 4)
    494     {
    495       uint32_t k = *(uint32_t *)data;
    496 
    497       MIX(h,k,m);
    498 
    499       data += 4;
    500       len -= 4;
    501     }
    502 
    503     //----------
    504     // Handle tail bytes
    505 
    506     switch(len)
    507     {
    508     case 3: h ^= data[2] << 16;
    509     case 2: h ^= data[1] << 8;
    510     case 1: h ^= data[0];
    511         h *= m;
    512     };
    513 
    514     h ^= h >> 13;
    515     h *= m;
    516     h ^= h >> 15;
    517 
    518     return h;
    519   }
    520 }
    521 
    522 //-----------------------------------------------------------------------------
    523 
    524