Home | History | Annotate | Download | only in src
      1 // Spooky Hash
      2 // A 128-bit noncryptographic hash, for checksums and table lookup
      3 // By Bob Jenkins.  Public domain.
      4 //   Oct 31 2010: published framework, disclaimer ShortHash isn't right
      5 //   Nov 7 2010: disabled ShortHash
      6 //   Oct 31 2011: replace End, ShortMix, ShortEnd, enable ShortHash again
      7 
      8 #include <memory.h>
      9 #include "Spooky.h"
     10 
     11 #define ALLOW_UNALIGNED_READS 1
     12 
     13 //
     14 // short hash ... it could be used on any message,
     15 // but it's used by Spooky just for short messages.
     16 //
     17 void SpookyHash::Short(
     18     const void *message,
     19     size_t length,
     20     uint64 *hash1,
     21     uint64 *hash2)
     22 {
     23     uint64 buf[sc_numVars];
     24     union
     25     {
     26         const uint8 *p8;
     27         uint32 *p32;
     28         uint64 *p64;
     29         size_t i;
     30     } u;
     31 
     32     u.p8 = (const uint8 *)message;
     33 
     34     if (!ALLOW_UNALIGNED_READS && (u.i & 0x7))
     35     {
     36         memcpy(buf, message, length);
     37         u.p64 = buf;
     38     }
     39 
     40     size_t remainder = length%32;
     41     uint64 a=*hash1;
     42     uint64 b=*hash2;
     43     uint64 c=sc_const;
     44     uint64 d=sc_const;
     45 
     46     if (length > 15)
     47     {
     48         const uint64 *end = u.p64 + (length/32)*4;
     49 
     50         // handle all complete sets of 32 bytes
     51         for (; u.p64 < end; u.p64 += 4)
     52         {
     53             c += u.p64[0];
     54             d += u.p64[1];
     55             ShortMix(a,b,c,d);
     56             a += u.p64[2];
     57             b += u.p64[3];
     58         }
     59 
     60         //Handle the case of 16+ remaining bytes.
     61         if (remainder >= 16)
     62         {
     63             c += u.p64[0];
     64             d += u.p64[1];
     65             ShortMix(a,b,c,d);
     66             u.p64 += 2;
     67             remainder -= 16;
     68         }
     69     }
     70 
     71     // Handle the last 0..15 bytes, and its length
     72     d = ((uint64)length) << 56;
     73     switch (remainder)
     74     {
     75     case 15:
     76     d += ((uint64)u.p8[14]) << 48;
     77     case 14:
     78         d += ((uint64)u.p8[13]) << 40;
     79     case 13:
     80         d += ((uint64)u.p8[12]) << 32;
     81     case 12:
     82         d += u.p32[2];
     83         c += u.p64[0];
     84         break;
     85     case 11:
     86         d += ((uint64)u.p8[10]) << 16;
     87     case 10:
     88         d += ((uint64)u.p8[9]) << 8;
     89     case 9:
     90         d += (uint64)u.p8[8];
     91     case 8:
     92         c += u.p64[0];
     93         break;
     94     case 7:
     95         c += ((uint64)u.p8[6]) << 48;
     96     case 6:
     97         c += ((uint64)u.p8[5]) << 40;
     98     case 5:
     99         c += ((uint64)u.p8[4]) << 32;
    100     case 4:
    101         c += u.p32[0];
    102         break;
    103     case 3:
    104         c += ((uint64)u.p8[2]) << 16;
    105     case 2:
    106         c += ((uint64)u.p8[1]) << 8;
    107     case 1:
    108         c += (uint64)u.p8[0];
    109         break;
    110     case 0:
    111         c += sc_const;
    112         d += sc_const;
    113     }
    114     ShortEnd(a,b,c,d);
    115     *hash1 = a;
    116     *hash2 = b;
    117 }
    118 
    119 
    120 
    121 
    122 // do the whole hash in one call
    123 void SpookyHash::Hash128(
    124     const void *message,
    125     size_t length,
    126     uint64 *hash1,
    127     uint64 *hash2)
    128 {
    129     if (length < sc_bufSize)
    130     {
    131         Short(message, length, hash1, hash2);
    132         return;
    133     }
    134 
    135     uint64 h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11;
    136     uint64 buf[sc_numVars];
    137     uint64 *end;
    138     union
    139     {
    140         const uint8 *p8;
    141         uint64 *p64;
    142         size_t i;
    143     } u;
    144     size_t remainder;
    145 
    146     h0=h3=h6=h9  = *hash1;
    147     h1=h4=h7=h10 = *hash2;
    148     h2=h5=h8=h11 = sc_const;
    149 
    150     u.p8 = (const uint8 *)message;
    151     end = u.p64 + (length/sc_blockSize)*sc_numVars;
    152 
    153     // handle all whole sc_blockSize blocks of bytes
    154     if (ALLOW_UNALIGNED_READS || ((u.i & 0x7) == 0))
    155     {
    156         while (u.p64 < end)
    157         {
    158             Mix(u.p64, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
    159 	    u.p64 += sc_numVars;
    160         }
    161     }
    162     else
    163     {
    164         while (u.p64 < end)
    165         {
    166             memcpy(buf, u.p64, sc_blockSize);
    167             Mix(buf, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
    168 	    u.p64 += sc_numVars;
    169         }
    170     }
    171 
    172     // handle the last partial block of sc_blockSize bytes
    173     remainder = (length - ((const uint8 *)end-(const uint8 *)message));
    174     memcpy(buf, end, remainder);
    175     memset(((uint8 *)buf)+remainder, 0, sc_blockSize-remainder);
    176     ((uint8 *)buf)[sc_blockSize-1] = remainder;
    177     Mix(buf, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
    178 
    179     // do some final mixing
    180     End(h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
    181     *hash1 = h0;
    182     *hash2 = h1;
    183 }
    184 
    185 
    186 
    187 // init spooky state
    188 void SpookyHash::Init(uint64 seed1, uint64 seed2)
    189 {
    190     m_length = 0;
    191     m_remainder = 0;
    192     m_state[0] = seed1;
    193     m_state[1] = seed2;
    194 }
    195 
    196 
    197 // add a message fragment to the state
    198 void SpookyHash::Update(const void *message, size_t length)
    199 {
    200     uint64 h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11;
    201     size_t newLength = length + m_remainder;
    202     uint8  remainder;
    203     union
    204     {
    205         const uint8 *p8;
    206         uint64 *p64;
    207         size_t i;
    208     } u;
    209     const uint64 *end;
    210 
    211     // Is this message fragment too short?  If it is, stuff it away.
    212     if (newLength < sc_bufSize)
    213     {
    214         memcpy(&((uint8 *)m_data)[m_remainder], message, length);
    215         m_length = length + m_length;
    216         m_remainder = (uint8)newLength;
    217         return;
    218     }
    219 
    220     // init the variables
    221     if (m_length < sc_bufSize)
    222     {
    223         h0=h3=h6=h9  = m_state[0];
    224         h1=h4=h7=h10 = m_state[1];
    225         h2=h5=h8=h11 = sc_const;
    226     }
    227     else
    228     {
    229         h0 = m_state[0];
    230         h1 = m_state[1];
    231         h2 = m_state[2];
    232         h3 = m_state[3];
    233         h4 = m_state[4];
    234         h5 = m_state[5];
    235         h6 = m_state[6];
    236         h7 = m_state[7];
    237         h8 = m_state[8];
    238         h9 = m_state[9];
    239         h10 = m_state[10];
    240         h11 = m_state[11];
    241     }
    242     m_length = length + m_length;
    243 
    244     // if we've got anything stuffed away, use it now
    245     if (m_remainder)
    246     {
    247         uint8 prefix = sc_bufSize-m_remainder;
    248         memcpy(&(((uint8 *)m_data)[m_remainder]), message, prefix);
    249         u.p64 = m_data;
    250         Mix(u.p64, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
    251         Mix(&u.p64[sc_numVars], h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
    252         u.p8 = ((const uint8 *)message) + prefix;
    253         length -= prefix;
    254     }
    255     else
    256     {
    257         u.p8 = (const uint8 *)message;
    258     }
    259 
    260     // handle all whole blocks of sc_blockSize bytes
    261     end = u.p64 + (length/sc_blockSize)*sc_numVars;
    262     remainder = (uint8)(length-((const uint8 *)end-u.p8));
    263     if (ALLOW_UNALIGNED_READS || (u.i & 0x7) == 0)
    264     {
    265         while (u.p64 < end)
    266         {
    267             Mix(u.p64, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
    268 	    u.p64 += sc_numVars;
    269         }
    270     }
    271     else
    272     {
    273         while (u.p64 < end)
    274         {
    275             memcpy(m_data, u.p8, sc_blockSize);
    276             Mix(m_data, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
    277 	    u.p64 += sc_numVars;
    278         }
    279     }
    280 
    281     // stuff away the last few bytes
    282     m_remainder = remainder;
    283     memcpy(m_data, end, remainder);
    284 
    285     // stuff away the variables
    286     m_state[0] = h0;
    287     m_state[1] = h1;
    288     m_state[2] = h2;
    289     m_state[3] = h3;
    290     m_state[4] = h4;
    291     m_state[5] = h5;
    292     m_state[6] = h6;
    293     m_state[7] = h7;
    294     m_state[8] = h8;
    295     m_state[9] = h9;
    296     m_state[10] = h10;
    297     m_state[11] = h11;
    298 }
    299 
    300 
    301 // report the hash for the concatenation of all message fragments so far
    302 void SpookyHash::Final(uint64 *hash1, uint64 *hash2)
    303 {
    304     // init the variables
    305     if (m_length < sc_bufSize)
    306     {
    307         Short( m_data, m_length, hash1, hash2);
    308         return;
    309     }
    310 
    311     const uint64 *data = (const uint64 *)m_data;
    312     uint8 remainder = m_remainder;
    313 
    314     uint64 h0 = m_state[0];
    315     uint64 h1 = m_state[1];
    316     uint64 h2 = m_state[2];
    317     uint64 h3 = m_state[3];
    318     uint64 h4 = m_state[4];
    319     uint64 h5 = m_state[5];
    320     uint64 h6 = m_state[6];
    321     uint64 h7 = m_state[7];
    322     uint64 h8 = m_state[8];
    323     uint64 h9 = m_state[9];
    324     uint64 h10 = m_state[10];
    325     uint64 h11 = m_state[11];
    326 
    327     if (remainder >= sc_blockSize)
    328     {
    329         // m_data can contain two blocks; handle any whole first block
    330         Mix(data, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
    331 	data += sc_numVars;
    332 	remainder -= sc_blockSize;
    333     }
    334 
    335     // mix in the last partial block, and the length mod sc_blockSize
    336     memset(&((uint8 *)data)[remainder], 0, (sc_blockSize-remainder));
    337 
    338     ((uint8 *)data)[sc_blockSize-1] = remainder;
    339     Mix(data, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
    340 
    341     // do some final mixing
    342     End(h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
    343 
    344     *hash1 = h0;
    345     *hash2 = h1;
    346 }
    347 
    348