Home | History | Annotate | Download | only in hash
      1 /*
      2  * Copyright (C) 2017 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifndef LIBTEXTCLASSIFIER_UTIL_HASH_FARMHASH_H_
     18 #define LIBTEXTCLASSIFIER_UTIL_HASH_FARMHASH_H_
     19 
     20 #include <assert.h>
     21 #include <stdint.h>
     22 #include <stdlib.h>
     23 #include <string.h>   // for memcpy and memset
     24 #include <utility>
     25 
     26 #ifndef NAMESPACE_FOR_HASH_FUNCTIONS
     27 #define NAMESPACE_FOR_HASH_FUNCTIONS tc2farmhash
     28 #endif
     29 
     30 namespace NAMESPACE_FOR_HASH_FUNCTIONS {
     31 
     32 #if defined(FARMHASH_UINT128_T_DEFINED)
     33 inline uint64_t Uint128Low64(const uint128_t x) {
     34   return static_cast<uint64_t>(x);
     35 }
     36 inline uint64_t Uint128High64(const uint128_t x) {
     37   return static_cast<uint64_t>(x >> 64);
     38 }
     39 inline uint128_t Uint128(uint64_t lo, uint64_t hi) {
     40   return lo + (((uint128_t)hi) << 64);
     41 }
     42 #else
     43 typedef std::pair<uint64_t, uint64_t> uint128_t;
     44 inline uint64_t Uint128Low64(const uint128_t x) { return x.first; }
     45 inline uint64_t Uint128High64(const uint128_t x) { return x.second; }
     46 inline uint128_t Uint128(uint64_t lo, uint64_t hi) { return uint128_t(lo, hi); }
     47 #endif
     48 
     49 
     50 // BASIC STRING HASHING
     51 
     52 // Hash function for a byte array.
     53 // May change from time to time, may differ on different platforms, may differ
     54 // depending on NDEBUG.
     55 size_t Hash(const char* s, size_t len);
     56 
     57 // Hash function for a byte array.  Most useful in 32-bit binaries.
     58 // May change from time to time, may differ on different platforms, may differ
     59 // depending on NDEBUG.
     60 uint32_t Hash32(const char* s, size_t len);
     61 
     62 // Hash function for a byte array.  For convenience, a 32-bit seed is also
     63 // hashed into the result.
     64 // May change from time to time, may differ on different platforms, may differ
     65 // depending on NDEBUG.
     66 uint32_t Hash32WithSeed(const char* s, size_t len, uint32_t seed);
     67 
     68 // Hash 128 input bits down to 64 bits of output.
     69 // Hash function for a byte array.
     70 // May change from time to time, may differ on different platforms, may differ
     71 // depending on NDEBUG.
     72 uint64_t Hash64(const char* s, size_t len);
     73 
     74 // Hash function for a byte array.  For convenience, a 64-bit seed is also
     75 // hashed into the result.
     76 // May change from time to time, may differ on different platforms, may differ
     77 // depending on NDEBUG.
     78 uint64_t Hash64WithSeed(const char* s, size_t len, uint64_t seed);
     79 
     80 // Hash function for a byte array.  For convenience, two seeds are also
     81 // hashed into the result.
     82 // May change from time to time, may differ on different platforms, may differ
     83 // depending on NDEBUG.
     84 uint64_t Hash64WithSeeds(const char* s, size_t len,
     85                        uint64_t seed0, uint64_t seed1);
     86 
     87 // Hash function for a byte array.
     88 // May change from time to time, may differ on different platforms, may differ
     89 // depending on NDEBUG.
     90 uint128_t Hash128(const char* s, size_t len);
     91 
     92 // Hash function for a byte array.  For convenience, a 128-bit seed is also
     93 // hashed into the result.
     94 // May change from time to time, may differ on different platforms, may differ
     95 // depending on NDEBUG.
     96 uint128_t Hash128WithSeed(const char* s, size_t len, uint128_t seed);
     97 
     98 // BASIC NON-STRING HASHING
     99 
    100 // This is intended to be a reasonably good hash function.
    101 // May change from time to time, may differ on different platforms, may differ
    102 // depending on NDEBUG.
    103 inline uint64_t Hash128to64(uint128_t x) {
    104   // Murmur-inspired hashing.
    105   const uint64_t kMul = 0x9ddfea08eb382d69ULL;
    106   uint64_t a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul;
    107   a ^= (a >> 47);
    108   uint64_t b = (Uint128High64(x) ^ a) * kMul;
    109   b ^= (b >> 47);
    110   b *= kMul;
    111   return b;
    112 }
    113 
    114 // FINGERPRINTING (i.e., good, portable, forever-fixed hash functions)
    115 
    116 // Fingerprint function for a byte array.  Most useful in 32-bit binaries.
    117 uint32_t Fingerprint32(const char* s, size_t len);
    118 
    119 // Fingerprint function for a byte array.
    120 uint64_t Fingerprint64(const char* s, size_t len);
    121 
    122 // Fingerprint function for a byte array.
    123 uint128_t Fingerprint128(const char* s, size_t len);
    124 
    125 // This is intended to be a good fingerprinting primitive.
    126 // See below for more overloads.
    127 inline uint64_t Fingerprint(uint128_t x) {
    128   // Murmur-inspired hashing.
    129   const uint64_t kMul = 0x9ddfea08eb382d69ULL;
    130   uint64_t a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul;
    131   a ^= (a >> 47);
    132   uint64_t b = (Uint128High64(x) ^ a) * kMul;
    133   b ^= (b >> 44);
    134   b *= kMul;
    135   b ^= (b >> 41);
    136   b *= kMul;
    137   return b;
    138 }
    139 
    140 // This is intended to be a good fingerprinting primitive.
    141 inline uint64_t Fingerprint(uint64_t x) {
    142   // Murmur-inspired hashing.
    143   const uint64_t kMul = 0x9ddfea08eb382d69ULL;
    144   uint64_t b = x * kMul;
    145   b ^= (b >> 44);
    146   b *= kMul;
    147   b ^= (b >> 41);
    148   b *= kMul;
    149   return b;
    150 }
    151 
    152 #ifndef FARMHASH_NO_CXX_STRING
    153 
    154 // Convenience functions to hash or fingerprint C++ strings.
    155 // These require that Str::data() return a pointer to the first char
    156 // (as a const char*) and that Str::length() return the string's length;
    157 // they work with std::string, for example.
    158 
    159 // Hash function for a byte array.  Most useful in 32-bit binaries.
    160 // May change from time to time, may differ on different platforms, may differ
    161 // depending on NDEBUG.
    162 template <typename Str>
    163 inline size_t Hash(const Str& s) {
    164   assert(sizeof(s[0]) == 1);
    165   return Hash(s.data(), s.length());
    166 }
    167 
    168 // Hash function for a byte array.  Most useful in 32-bit binaries.
    169 // May change from time to time, may differ on different platforms, may differ
    170 // depending on NDEBUG.
    171 template <typename Str>
    172 inline uint32_t Hash32(const Str& s) {
    173   assert(sizeof(s[0]) == 1);
    174   return Hash32(s.data(), s.length());
    175 }
    176 
    177 // Hash function for a byte array.  For convenience, a 32-bit seed is also
    178 // hashed into the result.
    179 // May change from time to time, may differ on different platforms, may differ
    180 // depending on NDEBUG.
    181 template <typename Str>
    182 inline uint32_t Hash32WithSeed(const Str& s, uint32_t seed) {
    183   assert(sizeof(s[0]) == 1);
    184   return Hash32WithSeed(s.data(), s.length(), seed);
    185 }
    186 
    187 // Hash 128 input bits down to 64 bits of output.
    188 // Hash function for a byte array.
    189 // May change from time to time, may differ on different platforms, may differ
    190 // depending on NDEBUG.
    191 template <typename Str>
    192 inline uint64_t Hash64(const Str& s) {
    193   assert(sizeof(s[0]) == 1);
    194   return Hash64(s.data(), s.length());
    195 }
    196 
    197 // Hash function for a byte array.  For convenience, a 64-bit seed is also
    198 // hashed into the result.
    199 // May change from time to time, may differ on different platforms, may differ
    200 // depending on NDEBUG.
    201 template <typename Str>
    202 inline uint64_t Hash64WithSeed(const Str& s, uint64_t seed) {
    203   assert(sizeof(s[0]) == 1);
    204   return Hash64WithSeed(s.data(), s.length(), seed);
    205 }
    206 
    207 // Hash function for a byte array.  For convenience, two seeds are also
    208 // hashed into the result.
    209 // May change from time to time, may differ on different platforms, may differ
    210 // depending on NDEBUG.
    211 template <typename Str>
    212 inline uint64_t Hash64WithSeeds(const Str& s, uint64_t seed0, uint64_t seed1) {
    213   assert(sizeof(s[0]) == 1);
    214   return Hash64WithSeeds(s.data(), s.length(), seed0, seed1);
    215 }
    216 
    217 // Hash function for a byte array.
    218 // May change from time to time, may differ on different platforms, may differ
    219 // depending on NDEBUG.
    220 template <typename Str>
    221 inline uint128_t Hash128(const Str& s) {
    222   assert(sizeof(s[0]) == 1);
    223   return Hash128(s.data(), s.length());
    224 }
    225 
    226 // Hash function for a byte array.  For convenience, a 128-bit seed is also
    227 // hashed into the result.
    228 // May change from time to time, may differ on different platforms, may differ
    229 // depending on NDEBUG.
    230 template <typename Str>
    231 inline uint128_t Hash128WithSeed(const Str& s, uint128_t seed) {
    232   assert(sizeof(s[0]) == 1);
    233   return Hash128(s.data(), s.length(), seed);
    234 }
    235 
    236 // FINGERPRINTING (i.e., good, portable, forever-fixed hash functions)
    237 
    238 // Fingerprint function for a byte array.  Most useful in 32-bit binaries.
    239 template <typename Str>
    240 inline uint32_t Fingerprint32(const Str& s) {
    241   assert(sizeof(s[0]) == 1);
    242   return Fingerprint32(s.data(), s.length());
    243 }
    244 
    245 // Fingerprint 128 input bits down to 64 bits of output.
    246 // Fingerprint function for a byte array.
    247 template <typename Str>
    248 inline uint64_t Fingerprint64(const Str& s) {
    249   assert(sizeof(s[0]) == 1);
    250   return Fingerprint64(s.data(), s.length());
    251 }
    252 
    253 // Fingerprint function for a byte array.
    254 template <typename Str>
    255 inline uint128_t Fingerprint128(const Str& s) {
    256   assert(sizeof(s[0]) == 1);
    257   return Fingerprint128(s.data(), s.length());
    258 }
    259 
    260 #endif
    261 
    262 }  // namespace NAMESPACE_FOR_HASH_FUNCTIONS
    263 
    264 #endif  // LIBTEXTCLASSIFIER_UTIL_HASH_FARMHASH_H_
    265