Home | History | Annotate | Download | only in bench
      1 // g++ -O3 -DNDEBUG -I.. -L /usr/lib64/atlas/ benchBlasGemm.cpp -o benchBlasGemm -lrt -lcblas
      2 // possible options:
      3 //    -DEIGEN_DONT_VECTORIZE
      4 //    -msse2
      5 
      6 // #define EIGEN_DEFAULT_TO_ROW_MAJOR
      7 #define _FLOAT
      8 
      9 #include <iostream>
     10 
     11 #include <Eigen/Core>
     12 #include "BenchTimer.h"
     13 
     14 // include the BLAS headers
     15 extern "C" {
     16 #include <cblas.h>
     17 }
     18 #include <string>
     19 
     20 #ifdef _FLOAT
     21 typedef float Scalar;
     22 #define CBLAS_GEMM cblas_sgemm
     23 #else
     24 typedef double Scalar;
     25 #define CBLAS_GEMM cblas_dgemm
     26 #endif
     27 
     28 
     29 typedef Eigen::Matrix<Scalar,Eigen::Dynamic,Eigen::Dynamic> MyMatrix;
     30 void bench_eigengemm(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops);
     31 void check_product(int M, int N, int K);
     32 void check_product(void);
     33 
     34 int main(int argc, char *argv[])
     35 {
     36   // disable SSE exceptions
     37   #ifdef __GNUC__
     38   {
     39     int aux;
     40     asm(
     41     "stmxcsr   %[aux]           \n\t"
     42     "orl       $32832, %[aux]   \n\t"
     43     "ldmxcsr   %[aux]           \n\t"
     44     : : [aux] "m" (aux));
     45   }
     46   #endif
     47 
     48   int nbtries=1, nbloops=1, M, N, K;
     49 
     50   if (argc==2)
     51   {
     52     if (std::string(argv[1])=="check")
     53       check_product();
     54     else
     55       M = N = K = atoi(argv[1]);
     56   }
     57   else if ((argc==3) && (std::string(argv[1])=="auto"))
     58   {
     59     M = N = K = atoi(argv[2]);
     60     nbloops = 1000000000/(M*M*M);
     61     if (nbloops<1)
     62       nbloops = 1;
     63     nbtries = 6;
     64   }
     65   else if (argc==4)
     66   {
     67     M = N = K = atoi(argv[1]);
     68     nbloops = atoi(argv[2]);
     69     nbtries = atoi(argv[3]);
     70   }
     71   else if (argc==6)
     72   {
     73     M = atoi(argv[1]);
     74     N = atoi(argv[2]);
     75     K = atoi(argv[3]);
     76     nbloops = atoi(argv[4]);
     77     nbtries = atoi(argv[5]);
     78   }
     79   else
     80   {
     81     std::cout << "Usage: " << argv[0] << " size  \n";
     82     std::cout << "Usage: " << argv[0] << " auto size\n";
     83     std::cout << "Usage: " << argv[0] << " size nbloops nbtries\n";
     84     std::cout << "Usage: " << argv[0] << " M N K nbloops nbtries\n";
     85     std::cout << "Usage: " << argv[0] << " check\n";
     86     std::cout << "Options:\n";
     87     std::cout << "    size       unique size of the 2 matrices (integer)\n";
     88     std::cout << "    auto       automatically set the number of repetitions and tries\n";
     89     std::cout << "    nbloops    number of times the GEMM routines is executed\n";
     90     std::cout << "    nbtries    number of times the loop is benched (return the best try)\n";
     91     std::cout << "    M N K      sizes of the matrices: MxN  =  MxK * KxN (integers)\n";
     92     std::cout << "    check      check eigen product using cblas as a reference\n";
     93     exit(1);
     94   }
     95 
     96   double nbmad = double(M) * double(N) * double(K) * double(nbloops);
     97 
     98   if (!(std::string(argv[1])=="auto"))
     99     std::cout << M << " x " << N << " x " << K << "\n";
    100 
    101   Scalar alpha, beta;
    102   MyMatrix ma(M,K), mb(K,N), mc(M,N);
    103   ma = MyMatrix::Random(M,K);
    104   mb = MyMatrix::Random(K,N);
    105   mc = MyMatrix::Random(M,N);
    106 
    107   Eigen::BenchTimer timer;
    108 
    109   // we simply compute c += a*b, so:
    110   alpha = 1;
    111   beta = 1;
    112 
    113   // bench cblas
    114   // ROWS_A, COLS_B, COLS_A, 1.0,  A, COLS_A, B, COLS_B, 0.0, C, COLS_B);
    115   if (!(std::string(argv[1])=="auto"))
    116   {
    117     timer.reset();
    118     for (uint k=0 ; k<nbtries ; ++k)
    119     {
    120         timer.start();
    121         for (uint j=0 ; j<nbloops ; ++j)
    122               #ifdef EIGEN_DEFAULT_TO_ROW_MAJOR
    123               CBLAS_GEMM(CblasRowMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, ma.data(), K, mb.data(), N, beta, mc.data(), N);
    124               #else
    125               CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, ma.data(), M, mb.data(), K, beta, mc.data(), M);
    126               #endif
    127         timer.stop();
    128     }
    129     if (!(std::string(argv[1])=="auto"))
    130       std::cout << "cblas: " << timer.value() << " (" << 1e-3*floor(1e-6*nbmad/timer.value()) << " GFlops/s)\n";
    131     else
    132         std::cout << M << " : " << timer.value() << " ; " << 1e-3*floor(1e-6*nbmad/timer.value()) << "\n";
    133   }
    134 
    135   // clear
    136   ma = MyMatrix::Random(M,K);
    137   mb = MyMatrix::Random(K,N);
    138   mc = MyMatrix::Random(M,N);
    139 
    140   // eigen
    141 //   if (!(std::string(argv[1])=="auto"))
    142   {
    143       timer.reset();
    144       for (uint k=0 ; k<nbtries ; ++k)
    145       {
    146           timer.start();
    147           bench_eigengemm(mc, ma, mb, nbloops);
    148           timer.stop();
    149       }
    150       if (!(std::string(argv[1])=="auto"))
    151         std::cout << "eigen : " << timer.value() << " (" << 1e-3*floor(1e-6*nbmad/timer.value()) << " GFlops/s)\n";
    152       else
    153         std::cout << M << " : " << timer.value() << " ; " << 1e-3*floor(1e-6*nbmad/timer.value()) << "\n";
    154   }
    155 
    156   std::cout << "l1: " << Eigen::l1CacheSize() << std::endl;
    157   std::cout << "l2: " << Eigen::l2CacheSize() << std::endl;
    158 
    159 
    160   return 0;
    161 }
    162 
    163 using namespace Eigen;
    164 
    165 void bench_eigengemm(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops)
    166 {
    167   for (uint j=0 ; j<nbloops ; ++j)
    168       mc.noalias() += ma * mb;
    169 }
    170 
    171 #define MYVERIFY(A,M) if (!(A)) { \
    172     std::cout << "FAIL: " << M << "\n"; \
    173   }
    174 void check_product(int M, int N, int K)
    175 {
    176   MyMatrix ma(M,K), mb(K,N), mc(M,N), maT(K,M), mbT(N,K), meigen(M,N), mref(M,N);
    177   ma = MyMatrix::Random(M,K);
    178   mb = MyMatrix::Random(K,N);
    179   maT = ma.transpose();
    180   mbT = mb.transpose();
    181   mc = MyMatrix::Random(M,N);
    182 
    183   MyMatrix::Scalar eps = 1e-4;
    184 
    185   meigen = mref = mc;
    186   CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, 1, ma.data(), M, mb.data(), K, 1, mref.data(), M);
    187   meigen += ma * mb;
    188   MYVERIFY(meigen.isApprox(mref, eps),". * .");
    189 
    190   meigen = mref = mc;
    191   CBLAS_GEMM(CblasColMajor, CblasTrans, CblasNoTrans, M, N, K, 1, maT.data(), K, mb.data(), K, 1, mref.data(), M);
    192   meigen += maT.transpose() * mb;
    193   MYVERIFY(meigen.isApprox(mref, eps),"T * .");
    194 
    195   meigen = mref = mc;
    196   CBLAS_GEMM(CblasColMajor, CblasTrans, CblasTrans, M, N, K, 1, maT.data(), K, mbT.data(), N, 1, mref.data(), M);
    197   meigen += (maT.transpose()) * (mbT.transpose());
    198   MYVERIFY(meigen.isApprox(mref, eps),"T * T");
    199 
    200   meigen = mref = mc;
    201   CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasTrans, M, N, K, 1, ma.data(), M, mbT.data(), N, 1, mref.data(), M);
    202   meigen += ma * mbT.transpose();
    203   MYVERIFY(meigen.isApprox(mref, eps),". * T");
    204 }
    205 
    206 void check_product(void)
    207 {
    208   int M, N, K;
    209   for (uint i=0; i<1000; ++i)
    210   {
    211     M = internal::random<int>(1,64);
    212     N = internal::random<int>(1,768);
    213     K = internal::random<int>(1,768);
    214     M = (0 + M) * 1;
    215     std::cout << M << " x " << N << " x " << K << "\n";
    216     check_product(M, N, K);
    217   }
    218 }
    219 
    220