Home | History | Annotate | Download | only in src
      1 #include "SpeedTest.h"
      2 
      3 #include "Random.h"
      4 
      5 #include <stdio.h>   // for printf
      6 #include <memory.h>  // for memset
      7 #include <math.h>    // for sqrt
      8 #include <algorithm> // for sort
      9 
     10 //-----------------------------------------------------------------------------
     11 // We view our timing values as a series of random variables V that has been
     12 // contaminated with occasional outliers due to cache misses, thread
     13 // preemption, etcetera. To filter out the outliers, we search for the largest
     14 // subset of V such that all its values are within three standard deviations
     15 // of the mean.
     16 
     17 double CalcMean ( std::vector<double> & v )
     18 {
     19   double mean = 0;
     20 
     21   for(int i = 0; i < (int)v.size(); i++)
     22   {
     23     mean += v[i];
     24   }
     25 
     26   mean /= double(v.size());
     27 
     28   return mean;
     29 }
     30 
     31 double CalcMean ( std::vector<double> & v, int a, int b )
     32 {
     33   double mean = 0;
     34 
     35   for(int i = a; i <= b; i++)
     36   {
     37     mean += v[i];
     38   }
     39 
     40   mean /= (b-a+1);
     41 
     42   return mean;
     43 }
     44 
     45 double CalcStdv ( std::vector<double> & v, int a, int b )
     46 {
     47   double mean = CalcMean(v,a,b);
     48 
     49   double stdv = 0;
     50 
     51   for(int i = a; i <= b; i++)
     52   {
     53     double x = v[i] - mean;
     54 
     55     stdv += x*x;
     56   }
     57 
     58   stdv = sqrt(stdv / (b-a+1));
     59 
     60   return stdv;
     61 }
     62 
     63 // Return true if the largest value in v[0,len) is more than three
     64 // standard deviations from the mean
     65 
     66 bool ContainsOutlier ( std::vector<double> & v, size_t len )
     67 {
     68   double mean = 0;
     69 
     70   for(size_t i = 0; i < len; i++)
     71   {
     72     mean += v[i];
     73   }
     74 
     75   mean /= double(len);
     76 
     77   double stdv = 0;
     78 
     79   for(size_t i = 0; i < len; i++)
     80   {
     81     double x = v[i] - mean;
     82     stdv += x*x;
     83   }
     84 
     85   stdv = sqrt(stdv / double(len));
     86 
     87   double cutoff = mean + stdv*3;
     88 
     89   return v[len-1] > cutoff;
     90 }
     91 
     92 // Do a binary search to find the largest subset of v that does not contain
     93 // outliers.
     94 
     95 void FilterOutliers ( std::vector<double> & v )
     96 {
     97   std::sort(v.begin(),v.end());
     98 
     99   size_t len = 0;
    100 
    101   for(size_t x = 0x40000000; x; x = x >> 1 )
    102   {
    103     if((len | x) >= v.size()) continue;
    104 
    105     if(!ContainsOutlier(v,len | x))
    106     {
    107       len |= x;
    108     }
    109   }
    110 
    111   v.resize(len);
    112 }
    113 
    114 // Iteratively tighten the set to find a subset that does not contain
    115 // outliers. I'm not positive this works correctly in all cases.
    116 
    117 void FilterOutliers2 ( std::vector<double> & v )
    118 {
    119   std::sort(v.begin(),v.end());
    120 
    121   int a = 0;
    122   int b = (int)(v.size() - 1);
    123 
    124   for(int i = 0; i < 10; i++)
    125   {
    126     //printf("%d %d\n",a,b);
    127 
    128     double mean = CalcMean(v,a,b);
    129     double stdv = CalcStdv(v,a,b);
    130 
    131     double cutA = mean - stdv*3;
    132     double cutB = mean + stdv*3;
    133 
    134     while((a < b) && (v[a] < cutA)) a++;
    135     while((b > a) && (v[b] > cutB)) b--;
    136   }
    137 
    138   std::vector<double> v2;
    139 
    140   v2.insert(v2.begin(),v.begin()+a,v.begin()+b+1);
    141 
    142   v.swap(v2);
    143 }
    144 
    145 //-----------------------------------------------------------------------------
    146 // We really want the rdtsc() calls to bracket the function call as tightly
    147 // as possible, but that's hard to do portably. We'll try and get as close as
    148 // possible by marking the function as NEVER_INLINE (to keep the optimizer from
    149 // moving it) and marking the timing variables as "volatile register".
    150 
    151 NEVER_INLINE int64_t timehash ( pfHash hash, const void * key, int len, int seed )
    152 {
    153   volatile register int64_t begin,end;
    154 
    155   uint32_t temp[16];
    156 
    157   begin = rdtsc();
    158 
    159   hash(key,len,seed,temp);
    160 
    161   end = rdtsc();
    162 
    163   return end-begin;
    164 }
    165 
    166 //-----------------------------------------------------------------------------
    167 
    168 double SpeedTest ( pfHash hash, uint32_t seed, const int trials, const int blocksize, const int align )
    169 {
    170   Rand r(seed);
    171 
    172   uint8_t * buf = new uint8_t[blocksize + 512];
    173 
    174   uint64_t t1 = reinterpret_cast<uint64_t>(buf);
    175 
    176   t1 = (t1 + 255) & BIG_CONSTANT(0xFFFFFFFFFFFFFF00);
    177   t1 += align;
    178 
    179   uint8_t * block = reinterpret_cast<uint8_t*>(t1);
    180 
    181   r.rand_p(block,blocksize);
    182 
    183   //----------
    184 
    185   std::vector<double> times;
    186   times.reserve(trials);
    187 
    188   for(int itrial = 0; itrial < trials; itrial++)
    189   {
    190     r.rand_p(block,blocksize);
    191 
    192     double t = (double)timehash(hash,block,blocksize,itrial);
    193 
    194     if(t > 0) times.push_back(t);
    195   }
    196 
    197   //----------
    198 
    199   std::sort(times.begin(),times.end());
    200 
    201   FilterOutliers(times);
    202 
    203   delete [] buf;
    204 
    205   return CalcMean(times);
    206 }
    207 
    208 //-----------------------------------------------------------------------------
    209 // 256k blocks seem to give the best results.
    210 
    211 void BulkSpeedTest ( pfHash hash, uint32_t seed )
    212 {
    213   const int trials = 2999;
    214   const int blocksize = 256 * 1024;
    215 
    216   printf("Bulk speed test - %d-byte keys\n",blocksize);
    217 
    218   for(int align = 0; align < 8; align++)
    219   {
    220     double cycles = SpeedTest(hash,seed,trials,blocksize,align);
    221 
    222     double bestbpc = double(blocksize)/cycles;
    223 
    224     double bestbps = (bestbpc * 3000000000.0 / 1048576.0);
    225     printf("Alignment %2d - %6.3f bytes/cycle - %7.2f MiB/sec @ 3 ghz\n",align,bestbpc,bestbps);
    226   }
    227 }
    228 
    229 //-----------------------------------------------------------------------------
    230 
    231 void TinySpeedTest ( pfHash hash, int hashsize, int keysize, uint32_t seed, bool verbose, double & /*outCycles*/ )
    232 {
    233   const int trials = 999999;
    234 
    235   if(verbose) printf("Small key speed test - %4d-byte keys - ",keysize);
    236 
    237   double cycles = SpeedTest(hash,seed,trials,keysize,0);
    238 
    239   printf("%8.2f cycles/hash\n",cycles);
    240 }
    241 
    242 //-----------------------------------------------------------------------------
    243