1 //----------------------------------------------------------------------------- 2 // Keyset tests generate various sorts of difficult-to-hash keysets and compare 3 // the distribution and collision frequency of the hash results against an 4 // ideal random distribution 5 6 // The sanity checks are also in this cpp/h 7 8 #pragma once 9 10 #include "Types.h" 11 #include "Stats.h" 12 #include "Random.h" // for rand_p 13 14 #include <algorithm> // for std::swap 15 #include <assert.h> 16 17 //----------------------------------------------------------------------------- 18 // Sanity tests 19 20 bool VerificationTest ( pfHash hash, const int hashbits, uint32_t expected, bool verbose ); 21 bool SanityTest ( pfHash hash, const int hashbits ); 22 void AppendedZeroesTest ( pfHash hash, const int hashbits ); 23 24 //----------------------------------------------------------------------------- 25 // Keyset 'Combination' - all possible combinations of input blocks 26 27 template< typename hashtype > 28 void CombinationKeygenRecurse ( uint32_t * key, int len, int maxlen, 29 uint32_t * blocks, int blockcount, 30 pfHash hash, std::vector<hashtype> & hashes ) 31 { 32 if(len == maxlen) return; 33 34 for(int i = 0; i < blockcount; i++) 35 { 36 key[len] = blocks[i]; 37 38 //if(len == maxlen-1) 39 { 40 hashtype h; 41 hash(key,(len+1) * sizeof(uint32_t),0,&h); 42 hashes.push_back(h); 43 } 44 45 //else 46 { 47 CombinationKeygenRecurse(key,len+1,maxlen,blocks,blockcount,hash,hashes); 48 } 49 } 50 } 51 52 template< typename hashtype > 53 bool CombinationKeyTest ( hashfunc<hashtype> hash, int maxlen, uint32_t * blocks, int blockcount, bool testColl, bool testDist, bool drawDiagram ) 54 { 55 printf("Keyset 'Combination' - up to %d blocks from a set of %d - ",maxlen,blockcount); 56 57 //---------- 58 59 std::vector<hashtype> hashes; 60 61 uint32_t * key = new uint32_t[maxlen]; 62 63 CombinationKeygenRecurse<hashtype>(key,0,maxlen,blocks,blockcount,hash,hashes); 64 65 delete [] key; 66 67 printf("%d keys\n",(int)hashes.size()); 68 69 //---------- 70 71 bool result = true; 72 73 result &= TestHashList<hashtype>(hashes,testColl,testDist,drawDiagram); 74 75 printf("\n"); 76 77 return result; 78 } 79 80 //---------------------------------------------------------------------------- 81 // Keyset 'Permutation' - given a set of 32-bit blocks, generate keys 82 // consisting of all possible permutations of those blocks 83 84 template< typename hashtype > 85 void PermutationKeygenRecurse ( pfHash hash, uint32_t * blocks, int blockcount, int k, std::vector<hashtype> & hashes ) 86 { 87 if(k == blockcount-1) 88 { 89 hashtype h; 90 91 hash(blocks,blockcount * sizeof(uint32_t),0,&h); 92 93 hashes.push_back(h); 94 95 return; 96 } 97 98 for(int i = k; i < blockcount; i++) 99 { 100 std::swap(blocks[k],blocks[i]); 101 102 PermutationKeygenRecurse(hash,blocks,blockcount,k+1,hashes); 103 104 std::swap(blocks[k],blocks[i]); 105 } 106 } 107 108 template< typename hashtype > 109 bool PermutationKeyTest ( hashfunc<hashtype> hash, uint32_t * blocks, int blockcount, bool testColl, bool testDist, bool drawDiagram ) 110 { 111 printf("Keyset 'Permutation' - %d blocks - ",blockcount); 112 113 //---------- 114 115 std::vector<hashtype> hashes; 116 117 PermutationKeygenRecurse<hashtype>(hash,blocks,blockcount,0,hashes); 118 119 printf("%d keys\n",(int)hashes.size()); 120 121 //---------- 122 123 bool result = true; 124 125 result &= TestHashList<hashtype>(hashes,testColl,testDist,drawDiagram); 126 127 printf("\n"); 128 129 return result; 130 } 131 132 //----------------------------------------------------------------------------- 133 // Keyset 'Sparse' - generate all possible N-bit keys with up to K bits set 134 135 template < typename keytype, typename hashtype > 136 void SparseKeygenRecurse ( pfHash hash, int start, int bitsleft, bool inclusive, keytype & k, std::vector<hashtype> & hashes ) 137 { 138 const int nbytes = sizeof(keytype); 139 const int nbits = nbytes * 8; 140 141 hashtype h; 142 143 for(int i = start; i < nbits; i++) 144 { 145 flipbit(&k,nbytes,i); 146 147 if(inclusive || (bitsleft == 1)) 148 { 149 hash(&k,sizeof(keytype),0,&h); 150 hashes.push_back(h); 151 } 152 153 if(bitsleft > 1) 154 { 155 SparseKeygenRecurse(hash,i+1,bitsleft-1,inclusive,k,hashes); 156 } 157 158 flipbit(&k,nbytes,i); 159 } 160 } 161 162 //---------- 163 164 template < int keybits, typename hashtype > 165 bool SparseKeyTest ( hashfunc<hashtype> hash, const int setbits, bool inclusive, bool testColl, bool testDist, bool drawDiagram ) 166 { 167 printf("Keyset 'Sparse' - %d-bit keys with %s %d bits set - ",keybits, inclusive ? "up to" : "exactly", setbits); 168 169 typedef Blob<keybits> keytype; 170 171 std::vector<hashtype> hashes; 172 173 keytype k; 174 memset(&k,0,sizeof(k)); 175 176 if(inclusive) 177 { 178 hashtype h; 179 180 hash(&k,sizeof(keytype),0,&h); 181 182 hashes.push_back(h); 183 } 184 185 SparseKeygenRecurse(hash,0,setbits,inclusive,k,hashes); 186 187 printf("%d keys\n",(int)hashes.size()); 188 189 bool result = true; 190 191 result &= TestHashList<hashtype>(hashes,testColl,testDist,drawDiagram); 192 193 printf("\n"); 194 195 return result; 196 } 197 198 //----------------------------------------------------------------------------- 199 // Keyset 'Windows' - for all possible N-bit windows of a K-bit key, generate 200 // all possible keys with bits set in that window 201 202 template < typename keytype, typename hashtype > 203 bool WindowedKeyTest ( hashfunc<hashtype> hash, const int windowbits, bool testCollision, bool testDistribution, bool drawDiagram ) 204 { 205 const int keybits = sizeof(keytype) * 8; 206 const int keycount = 1 << windowbits; 207 208 std::vector<hashtype> hashes; 209 hashes.resize(keycount); 210 211 bool result = true; 212 213 int testcount = keybits; 214 215 printf("Keyset 'Windowed' - %3d-bit key, %3d-bit window - %d tests, %d keys per test\n",keybits,windowbits,testcount,keycount); 216 217 for(int j = 0; j <= testcount; j++) 218 { 219 int minbit = j; 220 221 keytype key; 222 223 for(int i = 0; i < keycount; i++) 224 { 225 key = i; 226 //key = key << minbit; 227 228 lrot(&key,sizeof(keytype),minbit); 229 230 hash(&key,sizeof(keytype),0,&hashes[i]); 231 } 232 233 printf("Window at %3d - ",j); 234 235 result &= TestHashList(hashes,testCollision,testDistribution,drawDiagram); 236 237 //printf("\n"); 238 } 239 240 return result; 241 } 242 243 //----------------------------------------------------------------------------- 244 // Keyset 'Cyclic' - generate keys that consist solely of N repetitions of M 245 // bytes. 246 247 // (This keyset type is designed to make MurmurHash2 fail) 248 249 template < typename hashtype > 250 bool CyclicKeyTest ( pfHash hash, int cycleLen, int cycleReps, const int keycount, bool drawDiagram ) 251 { 252 printf("Keyset 'Cyclic' - %d cycles of %d bytes - %d keys\n",cycleReps,cycleLen,keycount); 253 254 Rand r(483723); 255 256 std::vector<hashtype> hashes; 257 hashes.resize(keycount); 258 259 int keyLen = cycleLen * cycleReps; 260 261 uint8_t * cycle = new uint8_t[cycleLen + 16]; 262 uint8_t * key = new uint8_t[keyLen]; 263 264 //---------- 265 266 for(int i = 0; i < keycount; i++) 267 { 268 r.rand_p(cycle,cycleLen); 269 270 *(uint32_t*)cycle = f3mix(i ^ 0x746a94f1); 271 272 for(int j = 0; j < keyLen; j++) 273 { 274 key[j] = cycle[j % cycleLen]; 275 } 276 277 hash(key,keyLen,0,&hashes[i]); 278 } 279 280 //---------- 281 282 bool result = true; 283 284 result &= TestHashList(hashes,true,true,drawDiagram); 285 printf("\n"); 286 287 delete [] cycle; 288 delete [] key; 289 290 return result; 291 } 292 293 //----------------------------------------------------------------------------- 294 // Keyset 'TwoBytes' - generate all keys up to length N with two non-zero bytes 295 296 void TwoBytesKeygen ( int maxlen, KeyCallback & c ); 297 298 template < typename hashtype > 299 bool TwoBytesTest2 ( pfHash hash, int maxlen, bool drawDiagram ) 300 { 301 std::vector<hashtype> hashes; 302 303 HashCallback<hashtype> c(hash,hashes); 304 305 TwoBytesKeygen(maxlen,c); 306 307 bool result = true; 308 309 result &= TestHashList(hashes,true,true,drawDiagram); 310 printf("\n"); 311 312 return result; 313 } 314 315 //----------------------------------------------------------------------------- 316 // Keyset 'Text' - generate all keys of the form "prefix"+"core"+"suffix", 317 // where "core" consists of all possible combinations of the given character 318 // set of length N. 319 320 template < typename hashtype > 321 bool TextKeyTest ( hashfunc<hashtype> hash, const char * prefix, const char * coreset, const int corelen, const char * suffix, bool drawDiagram ) 322 { 323 const int prefixlen = (int)strlen(prefix); 324 const int suffixlen = (int)strlen(suffix); 325 const int corecount = (int)strlen(coreset); 326 327 const int keybytes = prefixlen + corelen + suffixlen; 328 const int keycount = (int)pow(double(corecount),double(corelen)); 329 330 printf("Keyset 'Text' - keys of form \"%s[",prefix); 331 for(int i = 0; i < corelen; i++) printf("X"); 332 printf("]%s\" - %d keys\n",suffix,keycount); 333 334 uint8_t * key = new uint8_t[keybytes+1]; 335 336 key[keybytes] = 0; 337 338 memcpy(key,prefix,prefixlen); 339 memcpy(key+prefixlen+corelen,suffix,suffixlen); 340 341 //---------- 342 343 std::vector<hashtype> hashes; 344 hashes.resize(keycount); 345 346 for(int i = 0; i < keycount; i++) 347 { 348 int t = i; 349 350 for(int j = 0; j < corelen; j++) 351 { 352 key[prefixlen+j] = coreset[t % corecount]; t /= corecount; 353 } 354 355 hash(key,keybytes,0,&hashes[i]); 356 } 357 358 //---------- 359 360 bool result = true; 361 362 result &= TestHashList(hashes,true,true,drawDiagram); 363 364 printf("\n"); 365 366 delete [] key; 367 368 return result; 369 } 370 371 //----------------------------------------------------------------------------- 372 // Keyset 'Zeroes' - keys consisting of all zeroes, differing only in length 373 374 // We reuse one block of empty bytes, otherwise the RAM cost is enormous. 375 376 template < typename hashtype > 377 bool ZeroKeyTest ( pfHash hash, bool drawDiagram ) 378 { 379 int keycount = 64*1024; 380 381 printf("Keyset 'Zeroes' - %d keys\n",keycount); 382 383 unsigned char * nullblock = new unsigned char[keycount]; 384 memset(nullblock,0,keycount); 385 386 //---------- 387 388 std::vector<hashtype> hashes; 389 390 hashes.resize(keycount); 391 392 for(int i = 0; i < keycount; i++) 393 { 394 hash(nullblock,i,0,&hashes[i]); 395 } 396 397 bool result = true; 398 399 result &= TestHashList(hashes,true,true,drawDiagram); 400 401 printf("\n"); 402 403 delete [] nullblock; 404 405 return result; 406 } 407 408 //----------------------------------------------------------------------------- 409 // Keyset 'Seed' - hash "the quick brown fox..." using different seeds 410 411 template < typename hashtype > 412 bool SeedTest ( pfHash hash, int keycount, bool drawDiagram ) 413 { 414 printf("Keyset 'Seed' - %d keys\n",keycount); 415 416 const char * text = "The quick brown fox jumps over the lazy dog"; 417 const int len = (int)strlen(text); 418 419 //---------- 420 421 std::vector<hashtype> hashes; 422 423 hashes.resize(keycount); 424 425 for(int i = 0; i < keycount; i++) 426 { 427 hash(text,len,i,&hashes[i]); 428 } 429 430 bool result = true; 431 432 result &= TestHashList(hashes,true,true,drawDiagram); 433 434 printf("\n"); 435 436 return result; 437 } 438 439 //----------------------------------------------------------------------------- 440