1 /* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef LIBTEXTCLASSIFIER_UTIL_HASH_FARMHASH_H_ 18 #define LIBTEXTCLASSIFIER_UTIL_HASH_FARMHASH_H_ 19 20 #include <assert.h> 21 #include <stdint.h> 22 #include <stdlib.h> 23 #include <string.h> // for memcpy and memset 24 #include <utility> 25 26 #ifndef NAMESPACE_FOR_HASH_FUNCTIONS 27 #define NAMESPACE_FOR_HASH_FUNCTIONS tc2farmhash 28 #endif 29 30 namespace NAMESPACE_FOR_HASH_FUNCTIONS { 31 32 #if defined(FARMHASH_UINT128_T_DEFINED) 33 inline uint64_t Uint128Low64(const uint128_t x) { 34 return static_cast<uint64_t>(x); 35 } 36 inline uint64_t Uint128High64(const uint128_t x) { 37 return static_cast<uint64_t>(x >> 64); 38 } 39 inline uint128_t Uint128(uint64_t lo, uint64_t hi) { 40 return lo + (((uint128_t)hi) << 64); 41 } 42 #else 43 typedef std::pair<uint64_t, uint64_t> uint128_t; 44 inline uint64_t Uint128Low64(const uint128_t x) { return x.first; } 45 inline uint64_t Uint128High64(const uint128_t x) { return x.second; } 46 inline uint128_t Uint128(uint64_t lo, uint64_t hi) { return uint128_t(lo, hi); } 47 #endif 48 49 50 // BASIC STRING HASHING 51 52 // Hash function for a byte array. 53 // May change from time to time, may differ on different platforms, may differ 54 // depending on NDEBUG. 55 size_t Hash(const char* s, size_t len); 56 57 // Hash function for a byte array. Most useful in 32-bit binaries. 58 // May change from time to time, may differ on different platforms, may differ 59 // depending on NDEBUG. 60 uint32_t Hash32(const char* s, size_t len); 61 62 // Hash function for a byte array. For convenience, a 32-bit seed is also 63 // hashed into the result. 64 // May change from time to time, may differ on different platforms, may differ 65 // depending on NDEBUG. 66 uint32_t Hash32WithSeed(const char* s, size_t len, uint32_t seed); 67 68 // Hash 128 input bits down to 64 bits of output. 69 // Hash function for a byte array. 70 // May change from time to time, may differ on different platforms, may differ 71 // depending on NDEBUG. 72 uint64_t Hash64(const char* s, size_t len); 73 74 // Hash function for a byte array. For convenience, a 64-bit seed is also 75 // hashed into the result. 76 // May change from time to time, may differ on different platforms, may differ 77 // depending on NDEBUG. 78 uint64_t Hash64WithSeed(const char* s, size_t len, uint64_t seed); 79 80 // Hash function for a byte array. For convenience, two seeds are also 81 // hashed into the result. 82 // May change from time to time, may differ on different platforms, may differ 83 // depending on NDEBUG. 84 uint64_t Hash64WithSeeds(const char* s, size_t len, 85 uint64_t seed0, uint64_t seed1); 86 87 // Hash function for a byte array. 88 // May change from time to time, may differ on different platforms, may differ 89 // depending on NDEBUG. 90 uint128_t Hash128(const char* s, size_t len); 91 92 // Hash function for a byte array. For convenience, a 128-bit seed is also 93 // hashed into the result. 94 // May change from time to time, may differ on different platforms, may differ 95 // depending on NDEBUG. 96 uint128_t Hash128WithSeed(const char* s, size_t len, uint128_t seed); 97 98 // BASIC NON-STRING HASHING 99 100 // This is intended to be a reasonably good hash function. 101 // May change from time to time, may differ on different platforms, may differ 102 // depending on NDEBUG. 103 inline uint64_t Hash128to64(uint128_t x) { 104 // Murmur-inspired hashing. 105 const uint64_t kMul = 0x9ddfea08eb382d69ULL; 106 uint64_t a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul; 107 a ^= (a >> 47); 108 uint64_t b = (Uint128High64(x) ^ a) * kMul; 109 b ^= (b >> 47); 110 b *= kMul; 111 return b; 112 } 113 114 // FINGERPRINTING (i.e., good, portable, forever-fixed hash functions) 115 116 // Fingerprint function for a byte array. Most useful in 32-bit binaries. 117 uint32_t Fingerprint32(const char* s, size_t len); 118 119 // Fingerprint function for a byte array. 120 uint64_t Fingerprint64(const char* s, size_t len); 121 122 // Fingerprint function for a byte array. 123 uint128_t Fingerprint128(const char* s, size_t len); 124 125 // This is intended to be a good fingerprinting primitive. 126 // See below for more overloads. 127 inline uint64_t Fingerprint(uint128_t x) { 128 // Murmur-inspired hashing. 129 const uint64_t kMul = 0x9ddfea08eb382d69ULL; 130 uint64_t a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul; 131 a ^= (a >> 47); 132 uint64_t b = (Uint128High64(x) ^ a) * kMul; 133 b ^= (b >> 44); 134 b *= kMul; 135 b ^= (b >> 41); 136 b *= kMul; 137 return b; 138 } 139 140 // This is intended to be a good fingerprinting primitive. 141 inline uint64_t Fingerprint(uint64_t x) { 142 // Murmur-inspired hashing. 143 const uint64_t kMul = 0x9ddfea08eb382d69ULL; 144 uint64_t b = x * kMul; 145 b ^= (b >> 44); 146 b *= kMul; 147 b ^= (b >> 41); 148 b *= kMul; 149 return b; 150 } 151 152 #ifndef FARMHASH_NO_CXX_STRING 153 154 // Convenience functions to hash or fingerprint C++ strings. 155 // These require that Str::data() return a pointer to the first char 156 // (as a const char*) and that Str::length() return the string's length; 157 // they work with std::string, for example. 158 159 // Hash function for a byte array. Most useful in 32-bit binaries. 160 // May change from time to time, may differ on different platforms, may differ 161 // depending on NDEBUG. 162 template <typename Str> 163 inline size_t Hash(const Str& s) { 164 assert(sizeof(s[0]) == 1); 165 return Hash(s.data(), s.length()); 166 } 167 168 // Hash function for a byte array. Most useful in 32-bit binaries. 169 // May change from time to time, may differ on different platforms, may differ 170 // depending on NDEBUG. 171 template <typename Str> 172 inline uint32_t Hash32(const Str& s) { 173 assert(sizeof(s[0]) == 1); 174 return Hash32(s.data(), s.length()); 175 } 176 177 // Hash function for a byte array. For convenience, a 32-bit seed is also 178 // hashed into the result. 179 // May change from time to time, may differ on different platforms, may differ 180 // depending on NDEBUG. 181 template <typename Str> 182 inline uint32_t Hash32WithSeed(const Str& s, uint32_t seed) { 183 assert(sizeof(s[0]) == 1); 184 return Hash32WithSeed(s.data(), s.length(), seed); 185 } 186 187 // Hash 128 input bits down to 64 bits of output. 188 // Hash function for a byte array. 189 // May change from time to time, may differ on different platforms, may differ 190 // depending on NDEBUG. 191 template <typename Str> 192 inline uint64_t Hash64(const Str& s) { 193 assert(sizeof(s[0]) == 1); 194 return Hash64(s.data(), s.length()); 195 } 196 197 // Hash function for a byte array. For convenience, a 64-bit seed is also 198 // hashed into the result. 199 // May change from time to time, may differ on different platforms, may differ 200 // depending on NDEBUG. 201 template <typename Str> 202 inline uint64_t Hash64WithSeed(const Str& s, uint64_t seed) { 203 assert(sizeof(s[0]) == 1); 204 return Hash64WithSeed(s.data(), s.length(), seed); 205 } 206 207 // Hash function for a byte array. For convenience, two seeds are also 208 // hashed into the result. 209 // May change from time to time, may differ on different platforms, may differ 210 // depending on NDEBUG. 211 template <typename Str> 212 inline uint64_t Hash64WithSeeds(const Str& s, uint64_t seed0, uint64_t seed1) { 213 assert(sizeof(s[0]) == 1); 214 return Hash64WithSeeds(s.data(), s.length(), seed0, seed1); 215 } 216 217 // Hash function for a byte array. 218 // May change from time to time, may differ on different platforms, may differ 219 // depending on NDEBUG. 220 template <typename Str> 221 inline uint128_t Hash128(const Str& s) { 222 assert(sizeof(s[0]) == 1); 223 return Hash128(s.data(), s.length()); 224 } 225 226 // Hash function for a byte array. For convenience, a 128-bit seed is also 227 // hashed into the result. 228 // May change from time to time, may differ on different platforms, may differ 229 // depending on NDEBUG. 230 template <typename Str> 231 inline uint128_t Hash128WithSeed(const Str& s, uint128_t seed) { 232 assert(sizeof(s[0]) == 1); 233 return Hash128(s.data(), s.length(), seed); 234 } 235 236 // FINGERPRINTING (i.e., good, portable, forever-fixed hash functions) 237 238 // Fingerprint function for a byte array. Most useful in 32-bit binaries. 239 template <typename Str> 240 inline uint32_t Fingerprint32(const Str& s) { 241 assert(sizeof(s[0]) == 1); 242 return Fingerprint32(s.data(), s.length()); 243 } 244 245 // Fingerprint 128 input bits down to 64 bits of output. 246 // Fingerprint function for a byte array. 247 template <typename Str> 248 inline uint64_t Fingerprint64(const Str& s) { 249 assert(sizeof(s[0]) == 1); 250 return Fingerprint64(s.data(), s.length()); 251 } 252 253 // Fingerprint function for a byte array. 254 template <typename Str> 255 inline uint128_t Fingerprint128(const Str& s) { 256 assert(sizeof(s[0]) == 1); 257 return Fingerprint128(s.data(), s.length()); 258 } 259 260 #endif 261 262 } // namespace NAMESPACE_FOR_HASH_FUNCTIONS 263 264 #endif // LIBTEXTCLASSIFIER_UTIL_HASH_FARMHASH_H_ 265