Home | History | Annotate | Download | only in src
      1 //-----------------------------------------------------------------------------
      2 // Keyset tests generate various sorts of difficult-to-hash keysets and compare
      3 // the distribution and collision frequency of the hash results against an
      4 // ideal random distribution
      5 
      6 // The sanity checks are also in this cpp/h
      7 
      8 #pragma once
      9 
     10 #include "Types.h"
     11 #include "Stats.h"
     12 #include "Random.h"   // for rand_p
     13 
     14 #include <algorithm>  // for std::swap
     15 #include <assert.h>
     16 
     17 //-----------------------------------------------------------------------------
     18 // Sanity tests
     19 
     20 bool VerificationTest   ( pfHash hash, const int hashbits, uint32_t expected, bool verbose );
     21 bool SanityTest         ( pfHash hash, const int hashbits );
     22 void AppendedZeroesTest ( pfHash hash, const int hashbits );
     23 
     24 //-----------------------------------------------------------------------------
     25 // Keyset 'Combination' - all possible combinations of input blocks
     26 
     27 template< typename hashtype >
     28 void CombinationKeygenRecurse ( uint32_t * key, int len, int maxlen,
     29                   uint32_t * blocks, int blockcount,
     30                 pfHash hash, std::vector<hashtype> & hashes )
     31 {
     32   if(len == maxlen) return;
     33 
     34   for(int i = 0; i < blockcount; i++)
     35   {
     36     key[len] = blocks[i];
     37 
     38     //if(len == maxlen-1)
     39     {
     40       hashtype h;
     41       hash(key,(len+1) * sizeof(uint32_t),0,&h);
     42       hashes.push_back(h);
     43     }
     44 
     45     //else
     46     {
     47       CombinationKeygenRecurse(key,len+1,maxlen,blocks,blockcount,hash,hashes);
     48     }
     49   }
     50 }
     51 
     52 template< typename hashtype >
     53 bool CombinationKeyTest ( hashfunc<hashtype> hash, int maxlen, uint32_t * blocks, int blockcount, bool testColl, bool testDist, bool drawDiagram )
     54 {
     55   printf("Keyset 'Combination' - up to %d blocks from a set of %d - ",maxlen,blockcount);
     56 
     57   //----------
     58 
     59   std::vector<hashtype> hashes;
     60 
     61   uint32_t * key = new uint32_t[maxlen];
     62 
     63   CombinationKeygenRecurse<hashtype>(key,0,maxlen,blocks,blockcount,hash,hashes);
     64 
     65   delete [] key;
     66 
     67   printf("%d keys\n",(int)hashes.size());
     68 
     69   //----------
     70 
     71   bool result = true;
     72 
     73   result &= TestHashList<hashtype>(hashes,testColl,testDist,drawDiagram);
     74 
     75   printf("\n");
     76 
     77   return result;
     78 }
     79 
     80 //----------------------------------------------------------------------------
     81 // Keyset 'Permutation' - given a set of 32-bit blocks, generate keys
     82 // consisting of all possible permutations of those blocks
     83 
     84 template< typename hashtype >
     85 void PermutationKeygenRecurse ( pfHash hash, uint32_t * blocks, int blockcount, int k, std::vector<hashtype> & hashes )
     86 {
     87   if(k == blockcount-1)
     88   {
     89     hashtype h;
     90 
     91     hash(blocks,blockcount * sizeof(uint32_t),0,&h);
     92 
     93     hashes.push_back(h);
     94 
     95     return;
     96   }
     97 
     98   for(int i = k; i < blockcount; i++)
     99   {
    100     std::swap(blocks[k],blocks[i]);
    101 
    102     PermutationKeygenRecurse(hash,blocks,blockcount,k+1,hashes);
    103 
    104     std::swap(blocks[k],blocks[i]);
    105   }
    106 }
    107 
    108 template< typename hashtype >
    109 bool PermutationKeyTest ( hashfunc<hashtype> hash, uint32_t * blocks, int blockcount, bool testColl, bool testDist, bool drawDiagram )
    110 {
    111   printf("Keyset 'Permutation' - %d blocks - ",blockcount);
    112 
    113   //----------
    114 
    115   std::vector<hashtype> hashes;
    116 
    117   PermutationKeygenRecurse<hashtype>(hash,blocks,blockcount,0,hashes);
    118 
    119   printf("%d keys\n",(int)hashes.size());
    120 
    121   //----------
    122 
    123   bool result = true;
    124 
    125   result &= TestHashList<hashtype>(hashes,testColl,testDist,drawDiagram);
    126 
    127   printf("\n");
    128 
    129   return result;
    130 }
    131 
    132 //-----------------------------------------------------------------------------
    133 // Keyset 'Sparse' - generate all possible N-bit keys with up to K bits set
    134 
    135 template < typename keytype, typename hashtype >
    136 void SparseKeygenRecurse ( pfHash hash, int start, int bitsleft, bool inclusive, keytype & k, std::vector<hashtype> & hashes )
    137 {
    138   const int nbytes = sizeof(keytype);
    139   const int nbits = nbytes * 8;
    140 
    141   hashtype h;
    142 
    143   for(int i = start; i < nbits; i++)
    144   {
    145     flipbit(&k,nbytes,i);
    146 
    147     if(inclusive || (bitsleft == 1))
    148     {
    149       hash(&k,sizeof(keytype),0,&h);
    150       hashes.push_back(h);
    151     }
    152 
    153     if(bitsleft > 1)
    154     {
    155       SparseKeygenRecurse(hash,i+1,bitsleft-1,inclusive,k,hashes);
    156     }
    157 
    158     flipbit(&k,nbytes,i);
    159   }
    160 }
    161 
    162 //----------
    163 
    164 template < int keybits, typename hashtype >
    165 bool SparseKeyTest ( hashfunc<hashtype> hash, const int setbits, bool inclusive, bool testColl, bool testDist, bool drawDiagram  )
    166 {
    167   printf("Keyset 'Sparse' - %d-bit keys with %s %d bits set - ",keybits, inclusive ? "up to" : "exactly", setbits);
    168 
    169   typedef Blob<keybits> keytype;
    170 
    171   std::vector<hashtype> hashes;
    172 
    173   keytype k;
    174   memset(&k,0,sizeof(k));
    175 
    176   if(inclusive)
    177   {
    178     hashtype h;
    179 
    180     hash(&k,sizeof(keytype),0,&h);
    181 
    182     hashes.push_back(h);
    183   }
    184 
    185   SparseKeygenRecurse(hash,0,setbits,inclusive,k,hashes);
    186 
    187   printf("%d keys\n",(int)hashes.size());
    188 
    189   bool result = true;
    190 
    191   result &= TestHashList<hashtype>(hashes,testColl,testDist,drawDiagram);
    192 
    193   printf("\n");
    194 
    195   return result;
    196 }
    197 
    198 //-----------------------------------------------------------------------------
    199 // Keyset 'Windows' - for all possible N-bit windows of a K-bit key, generate
    200 // all possible keys with bits set in that window
    201 
    202 template < typename keytype, typename hashtype >
    203 bool WindowedKeyTest ( hashfunc<hashtype> hash, const int windowbits, bool testCollision, bool testDistribution, bool drawDiagram )
    204 {
    205   const int keybits = sizeof(keytype) * 8;
    206   const int keycount = 1 << windowbits;
    207 
    208   std::vector<hashtype> hashes;
    209   hashes.resize(keycount);
    210 
    211   bool result = true;
    212 
    213   int testcount = keybits;
    214 
    215   printf("Keyset 'Windowed' - %3d-bit key, %3d-bit window - %d tests, %d keys per test\n",keybits,windowbits,testcount,keycount);
    216 
    217   for(int j = 0; j <= testcount; j++)
    218   {
    219     int minbit = j;
    220 
    221     keytype key;
    222 
    223     for(int i = 0; i < keycount; i++)
    224     {
    225       key = i;
    226       //key = key << minbit;
    227 
    228       lrot(&key,sizeof(keytype),minbit);
    229 
    230       hash(&key,sizeof(keytype),0,&hashes[i]);
    231     }
    232 
    233     printf("Window at %3d - ",j);
    234 
    235     result &= TestHashList(hashes,testCollision,testDistribution,drawDiagram);
    236 
    237     //printf("\n");
    238   }
    239 
    240   return result;
    241 }
    242 
    243 //-----------------------------------------------------------------------------
    244 // Keyset 'Cyclic' - generate keys that consist solely of N repetitions of M
    245 // bytes.
    246 
    247 // (This keyset type is designed to make MurmurHash2 fail)
    248 
    249 template < typename hashtype >
    250 bool CyclicKeyTest ( pfHash hash, int cycleLen, int cycleReps, const int keycount, bool drawDiagram )
    251 {
    252   printf("Keyset 'Cyclic' - %d cycles of %d bytes - %d keys\n",cycleReps,cycleLen,keycount);
    253 
    254   Rand r(483723);
    255 
    256   std::vector<hashtype> hashes;
    257   hashes.resize(keycount);
    258 
    259   int keyLen = cycleLen * cycleReps;
    260 
    261   uint8_t * cycle = new uint8_t[cycleLen + 16];
    262   uint8_t * key = new uint8_t[keyLen];
    263 
    264   //----------
    265 
    266   for(int i = 0; i < keycount; i++)
    267   {
    268     r.rand_p(cycle,cycleLen);
    269 
    270     *(uint32_t*)cycle = f3mix(i ^ 0x746a94f1);
    271 
    272     for(int j = 0; j < keyLen; j++)
    273     {
    274       key[j] = cycle[j % cycleLen];
    275     }
    276 
    277     hash(key,keyLen,0,&hashes[i]);
    278   }
    279 
    280   //----------
    281 
    282   bool result = true;
    283 
    284   result &= TestHashList(hashes,true,true,drawDiagram);
    285   printf("\n");
    286 
    287   delete [] cycle;
    288   delete [] key;
    289 
    290   return result;
    291 }
    292 
    293 //-----------------------------------------------------------------------------
    294 // Keyset 'TwoBytes' - generate all keys up to length N with two non-zero bytes
    295 
    296 void TwoBytesKeygen ( int maxlen, KeyCallback & c );
    297 
    298 template < typename hashtype >
    299 bool TwoBytesTest2 ( pfHash hash, int maxlen, bool drawDiagram )
    300 {
    301   std::vector<hashtype> hashes;
    302 
    303   HashCallback<hashtype> c(hash,hashes);
    304 
    305   TwoBytesKeygen(maxlen,c);
    306 
    307   bool result = true;
    308 
    309   result &= TestHashList(hashes,true,true,drawDiagram);
    310   printf("\n");
    311 
    312   return result;
    313 }
    314 
    315 //-----------------------------------------------------------------------------
    316 // Keyset 'Text' - generate all keys of the form "prefix"+"core"+"suffix",
    317 // where "core" consists of all possible combinations of the given character
    318 // set of length N.
    319 
    320 template < typename hashtype >
    321 bool TextKeyTest ( hashfunc<hashtype> hash, const char * prefix, const char * coreset, const int corelen, const char * suffix, bool drawDiagram )
    322 {
    323   const int prefixlen = (int)strlen(prefix);
    324   const int suffixlen = (int)strlen(suffix);
    325   const int corecount = (int)strlen(coreset);
    326 
    327   const int keybytes = prefixlen + corelen + suffixlen;
    328   const int keycount = (int)pow(double(corecount),double(corelen));
    329 
    330   printf("Keyset 'Text' - keys of form \"%s[",prefix);
    331   for(int i = 0; i < corelen; i++) printf("X");
    332   printf("]%s\" - %d keys\n",suffix,keycount);
    333 
    334   uint8_t * key = new uint8_t[keybytes+1];
    335 
    336   key[keybytes] = 0;
    337 
    338   memcpy(key,prefix,prefixlen);
    339   memcpy(key+prefixlen+corelen,suffix,suffixlen);
    340 
    341   //----------
    342 
    343   std::vector<hashtype> hashes;
    344   hashes.resize(keycount);
    345 
    346   for(int i = 0; i < keycount; i++)
    347   {
    348     int t = i;
    349 
    350     for(int j = 0; j < corelen; j++)
    351     {
    352       key[prefixlen+j] = coreset[t % corecount]; t /= corecount;
    353     }
    354 
    355     hash(key,keybytes,0,&hashes[i]);
    356   }
    357 
    358   //----------
    359 
    360   bool result = true;
    361 
    362   result &= TestHashList(hashes,true,true,drawDiagram);
    363 
    364   printf("\n");
    365 
    366   delete [] key;
    367 
    368   return result;
    369 }
    370 
    371 //-----------------------------------------------------------------------------
    372 // Keyset 'Zeroes' - keys consisting of all zeroes, differing only in length
    373 
    374 // We reuse one block of empty bytes, otherwise the RAM cost is enormous.
    375 
    376 template < typename hashtype >
    377 bool ZeroKeyTest ( pfHash hash, bool drawDiagram )
    378 {
    379   int keycount = 64*1024;
    380 
    381   printf("Keyset 'Zeroes' - %d keys\n",keycount);
    382 
    383   unsigned char * nullblock = new unsigned char[keycount];
    384   memset(nullblock,0,keycount);
    385 
    386   //----------
    387 
    388   std::vector<hashtype> hashes;
    389 
    390   hashes.resize(keycount);
    391 
    392   for(int i = 0; i < keycount; i++)
    393   {
    394     hash(nullblock,i,0,&hashes[i]);
    395   }
    396 
    397   bool result = true;
    398 
    399   result &= TestHashList(hashes,true,true,drawDiagram);
    400 
    401   printf("\n");
    402 
    403   delete [] nullblock;
    404 
    405   return result;
    406 }
    407 
    408 //-----------------------------------------------------------------------------
    409 // Keyset 'Seed' - hash "the quick brown fox..." using different seeds
    410 
    411 template < typename hashtype >
    412 bool SeedTest ( pfHash hash, int keycount, bool drawDiagram )
    413 {
    414   printf("Keyset 'Seed' - %d keys\n",keycount);
    415 
    416   const char * text = "The quick brown fox jumps over the lazy dog";
    417   const int len = (int)strlen(text);
    418 
    419   //----------
    420 
    421   std::vector<hashtype> hashes;
    422 
    423   hashes.resize(keycount);
    424 
    425   for(int i = 0; i < keycount; i++)
    426   {
    427     hash(text,len,i,&hashes[i]);
    428   }
    429 
    430   bool result = true;
    431 
    432   result &= TestHashList(hashes,true,true,drawDiagram);
    433 
    434   printf("\n");
    435 
    436   return result;
    437 }
    438 
    439 //-----------------------------------------------------------------------------
    440