Home | History | Annotate | Download | only in src
      1 //-----------------------------------------------------------------------------
      2 // Differential collision & distribution tests - generate a bunch of random keys,
      3 // see what happens to the hash value when we flip a few bits of the key.
      4 
      5 #pragma once
      6 
      7 #include "Types.h"
      8 #include "Stats.h"      // for chooseUpToK
      9 #include "KeysetTest.h" // for SparseKeygenRecurse
     10 #include "Random.h"
     11 
     12 #include <vector>
     13 #include <algorithm>
     14 #include <stdio.h>
     15 
     16 //-----------------------------------------------------------------------------
     17 // Sort through the differentials, ignoring collisions that only occured once
     18 // (these could be false positives). If we find collisions of 3 or more, the
     19 // differential test fails.
     20 
     21 template < class keytype >
     22 bool ProcessDifferentials ( std::vector<keytype> & diffs, int reps, bool dumpCollisions )
     23 {
     24   std::sort(diffs.begin(), diffs.end());
     25 
     26   int count = 1;
     27   int ignore = 0;
     28 
     29   bool result = true;
     30 
     31   if(diffs.size())
     32   {
     33     keytype kp = diffs[0];
     34 
     35     for(int i = 1; i < (int)diffs.size(); i++)
     36     {
     37       if(diffs[i] == kp)
     38       {
     39         count++;
     40         continue;
     41       }
     42       else
     43       {
     44         if(count > 1)
     45         {
     46           result = false;
     47 
     48           double pct = 100 * (double(count) / double(reps));
     49 
     50           if(dumpCollisions)
     51           {
     52             printbits((unsigned char*)&kp,sizeof(kp));
     53             printf(" - %4.2f%%\n", pct );
     54           }
     55         }
     56         else
     57         {
     58           ignore++;
     59         }
     60 
     61         kp = diffs[i];
     62         count = 1;
     63       }
     64     }
     65 
     66     if(count > 1)
     67     {
     68       double pct = 100 * (double(count) / double(reps));
     69 
     70       if(dumpCollisions)
     71       {
     72         printbits((unsigned char*)&kp,sizeof(kp));
     73         printf(" - %4.2f%%\n", pct );
     74       }
     75     }
     76     else
     77     {
     78       ignore++;
     79     }
     80   }
     81 
     82   printf("%d total collisions, of which %d single collisions were ignored",(int)diffs.size(),ignore);
     83 
     84   if(result == false)
     85   {
     86     printf(" !!!!! ");
     87   }
     88 
     89   printf("\n");
     90   printf("\n");
     91 
     92   return result;
     93 }
     94 
     95 //-----------------------------------------------------------------------------
     96 // Check all possible keybits-choose-N differentials for collisions, report
     97 // ones that occur significantly more often than expected.
     98 
     99 // Random collisions can happen with probability 1 in 2^32 - if we do more than
    100 // 2^32 tests, we'll probably see some spurious random collisions, so don't report
    101 // them.
    102 
    103 template < typename keytype, typename hashtype >
    104 void DiffTestRecurse ( pfHash hash, keytype & k1, keytype & k2, hashtype & h1, hashtype & h2, int start, int bitsleft, std::vector<keytype> & diffs )
    105 {
    106   const int bits = sizeof(keytype)*8;
    107 
    108   for(int i = start; i < bits; i++)
    109   {
    110     flipbit(&k2,sizeof(k2),i);
    111     bitsleft--;
    112 
    113     hash(&k2,sizeof(k2),0,&h2);
    114 
    115     if(h1 == h2)
    116     {
    117       diffs.push_back(k1 ^ k2);
    118     }
    119 
    120     if(bitsleft)
    121     {
    122       DiffTestRecurse(hash,k1,k2,h1,h2,i+1,bitsleft,diffs);
    123     }
    124 
    125     flipbit(&k2,sizeof(k2),i);
    126     bitsleft++;
    127   }
    128 }
    129 
    130 //----------
    131 
    132 template < typename keytype, typename hashtype >
    133 bool DiffTest ( pfHash hash, int diffbits, int reps, bool dumpCollisions )
    134 {
    135   const int keybits = sizeof(keytype) * 8;
    136   const int hashbits = sizeof(hashtype) * 8;
    137 
    138   double diffcount = chooseUpToK(keybits,diffbits);
    139   double testcount = (diffcount * double(reps));
    140   double expected  = testcount / pow(2.0,double(hashbits));
    141 
    142   Rand r(100);
    143 
    144   std::vector<keytype> diffs;
    145 
    146   keytype k1,k2;
    147   hashtype h1,h2;
    148 
    149   printf("Testing %0.f up-to-%d-bit differentials in %d-bit keys -> %d bit hashes.\n",diffcount,diffbits,keybits,hashbits);
    150   printf("%d reps, %0.f total tests, expecting %2.2f random collisions",reps,testcount,expected);
    151 
    152   for(int i = 0; i < reps; i++)
    153   {
    154     if(i % (reps/10) == 0) printf(".");
    155 
    156     r.rand_p(&k1,sizeof(keytype));
    157     k2 = k1;
    158 
    159     hash(&k1,sizeof(k1),0,(uint32_t*)&h1);
    160 
    161     DiffTestRecurse<keytype,hashtype>(hash,k1,k2,h1,h2,0,diffbits,diffs);
    162   }
    163   printf("\n");
    164 
    165   bool result = true;
    166 
    167   result &= ProcessDifferentials(diffs,reps,dumpCollisions);
    168 
    169   return result;
    170 }
    171 
    172 //-----------------------------------------------------------------------------
    173 // Differential distribution test - for each N-bit input differential, generate
    174 // a large set of differential key pairs, hash them, and test the output
    175 // differentials using our distribution test code.
    176 
    177 // This is a very hard test to pass - even if the hash values are well-distributed,
    178 // the differences between hash values may not be. It's also not entirely relevant
    179 // for testing hash functions, but it's still interesting.
    180 
    181 // This test is a _lot_ of work, as it's essentially a full keyset test for
    182 // each of a potentially huge number of input differentials. To speed things
    183 // along, we do only a few distribution tests per keyset instead of the full
    184 // grid.
    185 
    186 // #TODO - put diagram drawing back on
    187 
    188 template < typename keytype, typename hashtype >
    189 void DiffDistTest ( pfHash hash, const int diffbits, int trials, double & worst, double & avg )
    190 {
    191   std::vector<keytype>  keys(trials);
    192   std::vector<hashtype> A(trials),B(trials);
    193 
    194   for(int i = 0; i < trials; i++)
    195   {
    196     rand_p(&keys[i],sizeof(keytype));
    197 
    198     hash(&keys[i],sizeof(keytype),0,(uint32_t*)&A[i]);
    199   }
    200 
    201   //----------
    202 
    203   std::vector<keytype> diffs;
    204 
    205   keytype temp(0);
    206 
    207   SparseKeygenRecurse<keytype>(0,diffbits,true,temp,diffs);
    208 
    209   //----------
    210 
    211   worst = 0;
    212   avg = 0;
    213 
    214   hashtype h2;
    215 
    216   for(size_t j = 0; j < diffs.size(); j++)
    217   {
    218     keytype & d = diffs[j];
    219 
    220     for(int i = 0; i < trials; i++)
    221     {
    222       keytype k2 = keys[i] ^ d;
    223 
    224       hash(&k2,sizeof(k2),0,&h2);
    225 
    226       B[i] = A[i] ^ h2;
    227     }
    228 
    229     double dworst,davg;
    230 
    231     TestDistributionFast(B,dworst,davg);
    232 
    233     avg += davg;
    234     worst = (dworst > worst) ? dworst : worst;
    235   }
    236 
    237   avg /= double(diffs.size());
    238 }
    239 
    240 //-----------------------------------------------------------------------------
    241 // Simpler differential-distribution test - for all 1-bit differentials,
    242 // generate random key pairs and run full distribution/collision tests on the
    243 // hash differentials
    244 
    245 template < typename keytype, typename hashtype >
    246 bool DiffDistTest2 ( pfHash hash  )
    247 {
    248   Rand r(857374);
    249 
    250   int keybits = sizeof(keytype) * 8;
    251   const int keycount = 256*256*32;
    252   keytype k;
    253 
    254   std::vector<hashtype> hashes(keycount);
    255   hashtype h1,h2;
    256 
    257   bool result = true;
    258 
    259   for(int keybit = 0; keybit < keybits; keybit++)
    260   {
    261     printf("Testing bit %d\n",keybit);
    262 
    263     for(int i = 0; i < keycount; i++)
    264     {
    265       r.rand_p(&k,sizeof(keytype));
    266 
    267       hash(&k,sizeof(keytype),0,&h1);
    268       flipbit(&k,sizeof(keytype),keybit);
    269       hash(&k,sizeof(keytype),0,&h2);
    270 
    271       hashes[i] = h1 ^ h2;
    272     }
    273 
    274     result &= TestHashList<hashtype>(hashes,true,true,true);
    275     printf("\n");
    276   }
    277 
    278   return result;
    279 }
    280 
    281 //----------------------------------------------------------------------------
    282