Home | History | Annotate | Download | only in bench
      1 
      2 #include <iostream>
      3 #include <Eigen/Core>
      4 #include <bench/BenchTimer.h>
      5 using namespace Eigen;
      6 
      7 #ifndef SIZE
      8 #define SIZE 50
      9 #endif
     10 
     11 #ifndef REPEAT
     12 #define REPEAT 10000
     13 #endif
     14 
     15 typedef float Scalar;
     16 
     17 __attribute__ ((noinline)) void benchVec(Scalar* a, Scalar* b, Scalar* c, int size);
     18 __attribute__ ((noinline)) void benchVec(MatrixXf& a, MatrixXf& b, MatrixXf& c);
     19 __attribute__ ((noinline)) void benchVec(VectorXf& a, VectorXf& b, VectorXf& c);
     20 
     21 int main(int argc, char* argv[])
     22 {
     23     int size = SIZE * 8;
     24     int size2 = size * size;
     25     Scalar* a = internal::aligned_new<Scalar>(size2);
     26     Scalar* b = internal::aligned_new<Scalar>(size2+4)+1;
     27     Scalar* c = internal::aligned_new<Scalar>(size2);
     28 
     29     for (int i=0; i<size; ++i)
     30     {
     31         a[i] = b[i] = c[i] = 0;
     32     }
     33 
     34     BenchTimer timer;
     35 
     36     timer.reset();
     37     for (int k=0; k<10; ++k)
     38     {
     39         timer.start();
     40         benchVec(a, b, c, size2);
     41         timer.stop();
     42     }
     43     std::cout << timer.value() << "s  " << (double(size2*REPEAT)/timer.value())/(1024.*1024.*1024.) << " GFlops\n";
     44     return 0;
     45     for (int innersize = size; innersize>2 ; --innersize)
     46     {
     47         if (size2%innersize==0)
     48         {
     49             int outersize = size2/innersize;
     50             MatrixXf ma = Map<MatrixXf>(a, innersize, outersize );
     51             MatrixXf mb = Map<MatrixXf>(b, innersize, outersize );
     52             MatrixXf mc = Map<MatrixXf>(c, innersize, outersize );
     53             timer.reset();
     54             for (int k=0; k<3; ++k)
     55             {
     56                 timer.start();
     57                 benchVec(ma, mb, mc);
     58                 timer.stop();
     59             }
     60             std::cout << innersize << " x " << outersize << "  " << timer.value() << "s   " << (double(size2*REPEAT)/timer.value())/(1024.*1024.*1024.) << " GFlops\n";
     61         }
     62     }
     63 
     64     VectorXf va = Map<VectorXf>(a, size2);
     65     VectorXf vb = Map<VectorXf>(b, size2);
     66     VectorXf vc = Map<VectorXf>(c, size2);
     67     timer.reset();
     68     for (int k=0; k<3; ++k)
     69     {
     70         timer.start();
     71         benchVec(va, vb, vc);
     72         timer.stop();
     73     }
     74     std::cout << timer.value() << "s   " << (double(size2*REPEAT)/timer.value())/(1024.*1024.*1024.) << " GFlops\n";
     75 
     76     return 0;
     77 }
     78 
     79 void benchVec(MatrixXf& a, MatrixXf& b, MatrixXf& c)
     80 {
     81     for (int k=0; k<REPEAT; ++k)
     82         a = a + b;
     83 }
     84 
     85 void benchVec(VectorXf& a, VectorXf& b, VectorXf& c)
     86 {
     87     for (int k=0; k<REPEAT; ++k)
     88         a = a + b;
     89 }
     90 
     91 void benchVec(Scalar* a, Scalar* b, Scalar* c, int size)
     92 {
     93     typedef internal::packet_traits<Scalar>::type PacketScalar;
     94     const int PacketSize = internal::packet_traits<Scalar>::size;
     95     PacketScalar a0, a1, a2, a3, b0, b1, b2, b3;
     96     for (int k=0; k<REPEAT; ++k)
     97         for (int i=0; i<size; i+=PacketSize*8)
     98         {
     99 //             a0 = internal::pload(&a[i]);
    100 //             b0 = internal::pload(&b[i]);
    101 //             a1 = internal::pload(&a[i+1*PacketSize]);
    102 //             b1 = internal::pload(&b[i+1*PacketSize]);
    103 //             a2 = internal::pload(&a[i+2*PacketSize]);
    104 //             b2 = internal::pload(&b[i+2*PacketSize]);
    105 //             a3 = internal::pload(&a[i+3*PacketSize]);
    106 //             b3 = internal::pload(&b[i+3*PacketSize]);
    107 //             internal::pstore(&a[i], internal::padd(a0, b0));
    108 //             a0 = internal::pload(&a[i+4*PacketSize]);
    109 //             b0 = internal::pload(&b[i+4*PacketSize]);
    110 //
    111 //             internal::pstore(&a[i+1*PacketSize], internal::padd(a1, b1));
    112 //             a1 = internal::pload(&a[i+5*PacketSize]);
    113 //             b1 = internal::pload(&b[i+5*PacketSize]);
    114 //
    115 //             internal::pstore(&a[i+2*PacketSize], internal::padd(a2, b2));
    116 //             a2 = internal::pload(&a[i+6*PacketSize]);
    117 //             b2 = internal::pload(&b[i+6*PacketSize]);
    118 //
    119 //             internal::pstore(&a[i+3*PacketSize], internal::padd(a3, b3));
    120 //             a3 = internal::pload(&a[i+7*PacketSize]);
    121 //             b3 = internal::pload(&b[i+7*PacketSize]);
    122 //
    123 //             internal::pstore(&a[i+4*PacketSize], internal::padd(a0, b0));
    124 //             internal::pstore(&a[i+5*PacketSize], internal::padd(a1, b1));
    125 //             internal::pstore(&a[i+6*PacketSize], internal::padd(a2, b2));
    126 //             internal::pstore(&a[i+7*PacketSize], internal::padd(a3, b3));
    127 
    128             internal::pstore(&a[i+2*PacketSize], internal::padd(internal::ploadu(&a[i+2*PacketSize]), internal::ploadu(&b[i+2*PacketSize])));
    129             internal::pstore(&a[i+3*PacketSize], internal::padd(internal::ploadu(&a[i+3*PacketSize]), internal::ploadu(&b[i+3*PacketSize])));
    130             internal::pstore(&a[i+4*PacketSize], internal::padd(internal::ploadu(&a[i+4*PacketSize]), internal::ploadu(&b[i+4*PacketSize])));
    131             internal::pstore(&a[i+5*PacketSize], internal::padd(internal::ploadu(&a[i+5*PacketSize]), internal::ploadu(&b[i+5*PacketSize])));
    132             internal::pstore(&a[i+6*PacketSize], internal::padd(internal::ploadu(&a[i+6*PacketSize]), internal::ploadu(&b[i+6*PacketSize])));
    133             internal::pstore(&a[i+7*PacketSize], internal::padd(internal::ploadu(&a[i+7*PacketSize]), internal::ploadu(&b[i+7*PacketSize])));
    134         }
    135 }
    136