Home | History | Annotate | Download | only in src
      1 // Copyright (c) 2011 Google, Inc.
      2 //
      3 // Permission is hereby granted, free of charge, to any person obtaining a copy
      4 // of this software and associated documentation files (the "Software"), to deal
      5 // in the Software without restriction, including without limitation the rights
      6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
      7 // copies of the Software, and to permit persons to whom the Software is
      8 // furnished to do so, subject to the following conditions:
      9 //
     10 // The above copyright notice and this permission notice shall be included in
     11 // all copies or substantial portions of the Software.
     12 //
     13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
     16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
     19 // THE SOFTWARE.
     20 //
     21 // CityHash, by Geoff Pike and Jyrki Alakuijala
     22 //
     23 // This file provides CityHash64() and related functions.
     24 //
     25 // It's probably possible to create even faster hash functions by
     26 // writing a program that systematically explores some of the space of
     27 // possible hash functions, by using SIMD instructions, or by
     28 // compromising on hash quality.
     29 
     30 #include "City.h"
     31 
     32 #include <algorithm>
     33 #include <string.h>  // for memcpy and memset
     34 
     35 using namespace std;
     36 
     37 static uint64 UNALIGNED_LOAD64(const char *p) {
     38   uint64 result;
     39   memcpy(&result, p, sizeof(result));
     40   return result;
     41 }
     42 
     43 static uint32 UNALIGNED_LOAD32(const char *p) {
     44   uint32 result;
     45   memcpy(&result, p, sizeof(result));
     46   return result;
     47 }
     48 
     49 #ifndef __BIG_ENDIAN__
     50 
     51 #define uint32_in_expected_order(x) (x)
     52 #define uint64_in_expected_order(x) (x)
     53 
     54 #else
     55 
     56 #ifdef _MSC_VER
     57 #include <stdlib.h>
     58 #define bswap_32(x) _byteswap_ulong(x)
     59 #define bswap_64(x) _byteswap_uint64(x)
     60 
     61 #elif defined(__APPLE__)
     62 // Mac OS X / Darwin features
     63 #include <libkern/OSByteOrder.h>
     64 #define bswap_32(x) OSSwapInt32(x)
     65 #define bswap_64(x) OSSwapInt64(x)
     66 
     67 #else
     68 #include <byteswap.h>
     69 #endif
     70 
     71 #define uint32_in_expected_order(x) (bswap_32(x))
     72 #define uint64_in_expected_order(x) (bswap_64(x))
     73 
     74 #endif  // __BIG_ENDIAN__
     75 
     76 #if !defined(LIKELY)
     77 #if defined(__GNUC__) || defined(__INTEL_COMPILER)
     78 #define LIKELY(x) (__builtin_expect(!!(x), 1))
     79 #else
     80 #define LIKELY(x) (x)
     81 #endif
     82 #endif
     83 
     84 static uint64 Fetch64(const char *p) {
     85   return uint64_in_expected_order(UNALIGNED_LOAD64(p));
     86 }
     87 
     88 static uint32 Fetch32(const char *p) {
     89   return uint32_in_expected_order(UNALIGNED_LOAD32(p));
     90 }
     91 
     92 // Some primes between 2^63 and 2^64 for various uses.
     93 static const uint64 k0 = 0xc3a5c85c97cb3127ULL;
     94 static const uint64 k1 = 0xb492b66fbe98f273ULL;
     95 static const uint64 k2 = 0x9ae16a3b2f90404fULL;
     96 static const uint64 k3 = 0xc949d7c7509e6557ULL;
     97 
     98 // Bitwise right rotate.  Normally this will compile to a single
     99 // instruction, especially if the shift is a manifest constant.
    100 static uint64 Rotate(uint64 val, int shift) {
    101   // Avoid shifting by 64: doing so yields an undefined result.
    102   return shift == 0 ? val : ((val >> shift) | (val << (64 - shift)));
    103 }
    104 
    105 // Equivalent to Rotate(), but requires the second arg to be non-zero.
    106 // On x86-64, and probably others, it's possible for this to compile
    107 // to a single instruction if both args are already in registers.
    108 static uint64 RotateByAtLeast1(uint64 val, int shift) {
    109   return (val >> shift) | (val << (64 - shift));
    110 }
    111 
    112 static uint64 ShiftMix(uint64 val) {
    113   return val ^ (val >> 47);
    114 }
    115 
    116 static uint64 HashLen16(uint64 u, uint64 v) {
    117   return Hash128to64(uint128(u, v));
    118 }
    119 
    120 static uint64 HashLen0to16(const char *s, size_t len) {
    121   if (len > 8) {
    122     uint64 a = Fetch64(s);
    123     uint64 b = Fetch64(s + len - 8);
    124     return HashLen16(a, RotateByAtLeast1(b + len, len)) ^ b;
    125   }
    126   if (len >= 4) {
    127     uint64 a = Fetch32(s);
    128     return HashLen16(len + (a << 3), Fetch32(s + len - 4));
    129   }
    130   if (len > 0) {
    131     uint8 a = s[0];
    132     uint8 b = s[len >> 1];
    133     uint8 c = s[len - 1];
    134     uint32 y = static_cast<uint32>(a) + (static_cast<uint32>(b) << 8);
    135     uint32 z = len + (static_cast<uint32>(c) << 2);
    136     return ShiftMix(y * k2 ^ z * k3) * k2;
    137   }
    138   return k2;
    139 }
    140 
    141 // This probably works well for 16-byte strings as well, but it may be overkill
    142 // in that case.
    143 static uint64 HashLen17to32(const char *s, size_t len) {
    144   uint64 a = Fetch64(s) * k1;
    145   uint64 b = Fetch64(s + 8);
    146   uint64 c = Fetch64(s + len - 8) * k2;
    147   uint64 d = Fetch64(s + len - 16) * k0;
    148   return HashLen16(Rotate(a - b, 43) + Rotate(c, 30) + d,
    149                    a + Rotate(b ^ k3, 20) - c + len);
    150 }
    151 
    152 // Return a 16-byte hash for 48 bytes.  Quick and dirty.
    153 // Callers do best to use "random-looking" values for a and b.
    154 static pair<uint64, uint64> WeakHashLen32WithSeeds(
    155     uint64 w, uint64 x, uint64 y, uint64 z, uint64 a, uint64 b) {
    156   a += w;
    157   b = Rotate(b + a + z, 21);
    158   uint64 c = a;
    159   a += x;
    160   a += y;
    161   b += Rotate(a, 44);
    162   return make_pair(a + z, b + c);
    163 }
    164 
    165 // Return a 16-byte hash for s[0] ... s[31], a, and b.  Quick and dirty.
    166 static pair<uint64, uint64> WeakHashLen32WithSeeds(
    167     const char* s, uint64 a, uint64 b) {
    168   return WeakHashLen32WithSeeds(Fetch64(s),
    169                                 Fetch64(s + 8),
    170                                 Fetch64(s + 16),
    171                                 Fetch64(s + 24),
    172                                 a,
    173                                 b);
    174 }
    175 
    176 // Return an 8-byte hash for 33 to 64 bytes.
    177 static uint64 HashLen33to64(const char *s, size_t len) {
    178   uint64 z = Fetch64(s + 24);
    179   uint64 a = Fetch64(s) + (len + Fetch64(s + len - 16)) * k0;
    180   uint64 b = Rotate(a + z, 52);
    181   uint64 c = Rotate(a, 37);
    182   a += Fetch64(s + 8);
    183   c += Rotate(a, 7);
    184   a += Fetch64(s + 16);
    185   uint64 vf = a + z;
    186   uint64 vs = b + Rotate(a, 31) + c;
    187   a = Fetch64(s + 16) + Fetch64(s + len - 32);
    188   z = Fetch64(s + len - 8);
    189   b = Rotate(a + z, 52);
    190   c = Rotate(a, 37);
    191   a += Fetch64(s + len - 24);
    192   c += Rotate(a, 7);
    193   a += Fetch64(s + len - 16);
    194   uint64 wf = a + z;
    195   uint64 ws = b + Rotate(a, 31) + c;
    196   uint64 r = ShiftMix((vf + ws) * k2 + (wf + vs) * k0);
    197   return ShiftMix(r * k0 + vs) * k2;
    198 }
    199 
    200 uint64 CityHash64(const char *s, size_t len) {
    201   if (len <= 32) {
    202     if (len <= 16) {
    203       return HashLen0to16(s, len);
    204     } else {
    205       return HashLen17to32(s, len);
    206     }
    207   } else if (len <= 64) {
    208     return HashLen33to64(s, len);
    209   }
    210 
    211   // For strings over 64 bytes we hash the end first, and then as we
    212   // loop we keep 56 bytes of state: v, w, x, y, and z.
    213   uint64 x = Fetch64(s + len - 40);
    214   uint64 y = Fetch64(s + len - 16) + Fetch64(s + len - 56);
    215   uint64 z = HashLen16(Fetch64(s + len - 48) + len, Fetch64(s + len - 24));
    216   pair<uint64, uint64> v = WeakHashLen32WithSeeds(s + len - 64, len, z);
    217   pair<uint64, uint64> w = WeakHashLen32WithSeeds(s + len - 32, y + k1, x);
    218   x = x * k1 + Fetch64(s);
    219 
    220   // Decrease len to the nearest multiple of 64, and operate on 64-byte chunks.
    221   len = (len - 1) & ~static_cast<size_t>(63);
    222   do {
    223     x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
    224     y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
    225     x ^= w.second;
    226     y += v.first + Fetch64(s + 40);
    227     z = Rotate(z + w.first, 33) * k1;
    228     v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
    229     w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
    230     std::swap(z, x);
    231     s += 64;
    232     len -= 64;
    233   } while (len != 0);
    234   return HashLen16(HashLen16(v.first, w.first) + ShiftMix(y) * k1 + z,
    235                    HashLen16(v.second, w.second) + x);
    236 }
    237 
    238 uint64 CityHash64WithSeed(const char *s, size_t len, uint64 seed) {
    239   return CityHash64WithSeeds(s, len, k2, seed);
    240 }
    241 
    242 uint64 CityHash64WithSeeds(const char *s, size_t len,
    243                            uint64 seed0, uint64 seed1) {
    244   return HashLen16(CityHash64(s, len) - seed0, seed1);
    245 }
    246 
    247 // A subroutine for CityHash128().  Returns a decent 128-bit hash for strings
    248 // of any length representable in signed long.  Based on City and Murmur.
    249 static uint128 CityMurmur(const char *s, size_t len, uint128 seed) {
    250   uint64 a = Uint128Low64(seed);
    251   uint64 b = Uint128High64(seed);
    252   uint64 c = 0;
    253   uint64 d = 0;
    254   signed long l = len - 16;
    255   if (l <= 0) {  // len <= 16
    256     a = ShiftMix(a * k1) * k1;
    257     c = b * k1 + HashLen0to16(s, len);
    258     d = ShiftMix(a + (len >= 8 ? Fetch64(s) : c));
    259   } else {  // len > 16
    260     c = HashLen16(Fetch64(s + len - 8) + k1, a);
    261     d = HashLen16(b + len, c + Fetch64(s + len - 16));
    262     a += d;
    263     do {
    264       a ^= ShiftMix(Fetch64(s) * k1) * k1;
    265       a *= k1;
    266       b ^= a;
    267       c ^= ShiftMix(Fetch64(s + 8) * k1) * k1;
    268       c *= k1;
    269       d ^= c;
    270       s += 16;
    271       l -= 16;
    272     } while (l > 0);
    273   }
    274   a = HashLen16(a, c);
    275   b = HashLen16(d, b);
    276   return uint128(a ^ b, HashLen16(b, a));
    277 }
    278 
    279 uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed) {
    280   if (len < 128) {
    281     return CityMurmur(s, len, seed);
    282   }
    283 
    284   // We expect len >= 128 to be the common case.  Keep 56 bytes of state:
    285   // v, w, x, y, and z.
    286   pair<uint64, uint64> v, w;
    287   uint64 x = Uint128Low64(seed);
    288   uint64 y = Uint128High64(seed);
    289   uint64 z = len * k1;
    290   v.first = Rotate(y ^ k1, 49) * k1 + Fetch64(s);
    291   v.second = Rotate(v.first, 42) * k1 + Fetch64(s + 8);
    292   w.first = Rotate(y + z, 35) * k1 + x;
    293   w.second = Rotate(x + Fetch64(s + 88), 53) * k1;
    294 
    295   // This is the same inner loop as CityHash64(), manually unrolled.
    296   do {
    297     x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
    298     y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
    299     x ^= w.second;
    300     y += v.first + Fetch64(s + 40);
    301     z = Rotate(z + w.first, 33) * k1;
    302     v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
    303     w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
    304     std::swap(z, x);
    305     s += 64;
    306     x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
    307     y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
    308     x ^= w.second;
    309     y += v.first + Fetch64(s + 40);
    310     z = Rotate(z + w.first, 33) * k1;
    311     v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
    312     w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
    313     std::swap(z, x);
    314     s += 64;
    315     len -= 128;
    316   } while (LIKELY(len >= 128));
    317   x += Rotate(v.first + z, 49) * k0;
    318   z += Rotate(w.first, 37) * k0;
    319   // If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s.
    320   for (size_t tail_done = 0; tail_done < len; ) {
    321     tail_done += 32;
    322     y = Rotate(x + y, 42) * k0 + v.second;
    323     w.first += Fetch64(s + len - tail_done + 16);
    324     x = x * k0 + w.first;
    325     z += w.second + Fetch64(s + len - tail_done);
    326     w.second += v.first;
    327     v = WeakHashLen32WithSeeds(s + len - tail_done, v.first + z, v.second);
    328   }
    329   // At this point our 56 bytes of state should contain more than
    330   // enough information for a strong 128-bit hash.  We use two
    331   // different 56-byte-to-8-byte hashes to get a 16-byte final result.
    332   x = HashLen16(x, v.first);
    333   y = HashLen16(y + z, w.first);
    334   return uint128(HashLen16(x + v.second, w.second) + y,
    335                  HashLen16(x + w.second, y + v.second));
    336 }
    337 
    338 uint128 CityHash128(const char *s, size_t len) {
    339   if (len >= 16) {
    340     return CityHash128WithSeed(s + 16,
    341                                len - 16,
    342                                uint128(Fetch64(s) ^ k3,
    343                                        Fetch64(s + 8)));
    344   } else if (len >= 8) {
    345     return CityHash128WithSeed(NULL,
    346                                0,
    347                                uint128(Fetch64(s) ^ (len * k0),
    348                                        Fetch64(s + len - 8) ^ k1));
    349   } else {
    350     return CityHash128WithSeed(s, len, uint128(k0, k1));
    351   }
    352 }
    353 
    354 #if defined(__SSE4_2__) && defined(__x86_64__)
    355 #include <nmmintrin.h>
    356 
    357 // Requires len >= 240.
    358 static void CityHashCrc256Long(const char *s, size_t len,
    359                                uint32 seed, uint64 *result) {
    360   uint64 a = Fetch64(s + 56) + k0;
    361   uint64 b = Fetch64(s + 96) + k0;
    362   uint64 c = result[0] = HashLen16(b, len);
    363   uint64 d = result[1] = Fetch64(s + 120) * k0 + len;
    364   uint64 e = Fetch64(s + 184) + seed;
    365   uint64 f = seed;
    366   uint64 g = 0;
    367   uint64 h = 0;
    368   uint64 i = 0;
    369   uint64 j = 0;
    370   uint64 t = c + d;
    371 
    372   // 240 bytes of input per iter.
    373   size_t iters = len / 240;
    374   len -= iters * 240;
    375   do {
    376 #define CHUNK(multiplier, z)                                    \
    377     {                                                           \
    378       uint64 old_a = a;                                         \
    379       a = Rotate(b, 41 ^ z) * multiplier + Fetch64(s);          \
    380       b = Rotate(c, 27 ^ z) * multiplier + Fetch64(s + 8);      \
    381       c = Rotate(d, 41 ^ z) * multiplier + Fetch64(s + 16);     \
    382       d = Rotate(e, 33 ^ z) * multiplier + Fetch64(s + 24);     \
    383       e = Rotate(t, 25 ^ z) * multiplier + Fetch64(s + 32);     \
    384       t = old_a;                                                \
    385     }                                                           \
    386     f = _mm_crc32_u64(f, a);                                    \
    387     g = _mm_crc32_u64(g, b);                                    \
    388     h = _mm_crc32_u64(h, c);                                    \
    389     i = _mm_crc32_u64(i, d);                                    \
    390     j = _mm_crc32_u64(j, e);                                    \
    391     s += 40
    392 
    393     CHUNK(1, 1); CHUNK(k0, 0);
    394     CHUNK(1, 1); CHUNK(k0, 0);
    395     CHUNK(1, 1); CHUNK(k0, 0);
    396   } while (--iters > 0);
    397 
    398   while (len >= 40) {
    399     CHUNK(k0, 0);
    400     len -= 40;
    401   }
    402   if (len > 0) {
    403     s = s + len - 40;
    404     CHUNK(k0, 0);
    405   }
    406   j += i << 32;
    407   a = HashLen16(a, j);
    408   h += g << 32;
    409   b += h;
    410   c = HashLen16(c, f) + i;
    411   d = HashLen16(d, e + result[0]);
    412   j += e;
    413   i += HashLen16(h, t);
    414   e = HashLen16(a, d) + j;
    415   f = HashLen16(b, c) + a;
    416   g = HashLen16(j, i) + c;
    417   result[0] = e + f + g + h;
    418   a = ShiftMix((a + g) * k0) * k0 + b;
    419   result[1] += a + result[0];
    420   a = ShiftMix(a * k0) * k0 + c;
    421   result[2] = a + result[1];
    422   a = ShiftMix((a + e) * k0) * k0;
    423   result[3] = a + result[2];
    424 }
    425 
    426 // Requires len < 240.
    427 static void CityHashCrc256Short(const char *s, size_t len, uint64 *result) {
    428   char buf[240];
    429   memcpy(buf, s, len);
    430   memset(buf + len, 0, 240 - len);
    431   CityHashCrc256Long(buf, 240, ~static_cast<uint32>(len), result);
    432 }
    433 
    434 void CityHashCrc256(const char *s, size_t len, uint64 *result) {
    435   if (LIKELY(len >= 240)) {
    436     CityHashCrc256Long(s, len, 0, result);
    437   } else {
    438     CityHashCrc256Short(s, len, result);
    439   }
    440 }
    441 
    442 uint128 CityHashCrc128WithSeed(const char *s, size_t len, uint128 seed) {
    443   if (len <= 900) {
    444     return CityHash128WithSeed(s, len, seed);
    445   } else {
    446     uint64 result[4];
    447     CityHashCrc256(s, len, result);
    448     uint64 u = Uint128High64(seed) + result[0];
    449     uint64 v = Uint128Low64(seed) + result[1];
    450     return uint128(HashLen16(u, v + result[2]),
    451                    HashLen16(Rotate(v, 32), u * k0 + result[3]));
    452   }
    453 }
    454 
    455 uint128 CityHashCrc128(const char *s, size_t len) {
    456   if (len <= 900) {
    457     return CityHash128(s, len);
    458   } else {
    459     uint64 result[4];
    460     CityHashCrc256(s, len, result);
    461     return uint128(result[2], result[3]);
    462   }
    463 }
    464 
    465 #endif
    466