Home | History | Annotate | Download | only in performance
      1 #include <iomanip>
      2 #include <stdexcept>
      3 #include <string>
      4 #include "performance.h"
      5 #include "opencv2/core/cuda.hpp"
      6 
      7 using namespace std;
      8 using namespace cv;
      9 using namespace cv::cuda;
     10 
     11 void TestSystem::run()
     12 {
     13     if (is_list_mode_)
     14     {
     15         for (vector<Runnable*>::iterator it = tests_.begin(); it != tests_.end(); ++it)
     16             cout << (*it)->name() << endl;
     17 
     18         return;
     19     }
     20 
     21     // Run test initializers
     22     for (vector<Runnable*>::iterator it = inits_.begin(); it != inits_.end(); ++it)
     23     {
     24         if ((*it)->name().find(test_filter_, 0) != string::npos)
     25             (*it)->run();
     26     }
     27 
     28     printHeading();
     29 
     30     // Run tests
     31     for (vector<Runnable*>::iterator it = tests_.begin(); it != tests_.end(); ++it)
     32     {
     33         try
     34         {
     35             if ((*it)->name().find(test_filter_, 0) != string::npos)
     36             {
     37                 cout << endl << (*it)->name() << ":\n";
     38                 (*it)->run();
     39                 finishCurrentSubtest();
     40             }
     41         }
     42         catch (const Exception&)
     43         {
     44             // Message is printed via callback
     45             resetCurrentSubtest();
     46         }
     47         catch (const runtime_error& e)
     48         {
     49             printError(e.what());
     50             resetCurrentSubtest();
     51         }
     52     }
     53 
     54     printSummary();
     55 }
     56 
     57 
     58 void TestSystem::finishCurrentSubtest()
     59 {
     60     if (cur_subtest_is_empty_)
     61         // There is no need to print subtest statistics
     62         return;
     63 
     64     double cpu_time = cpu_elapsed_ / getTickFrequency() * 1000.0;
     65     double gpu_time = gpu_elapsed_ / getTickFrequency() * 1000.0;
     66 
     67     double speedup = static_cast<double>(cpu_elapsed_) / std::max(1.0, gpu_elapsed_);
     68     speedup_total_ += speedup;
     69 
     70     printMetrics(cpu_time, gpu_time, speedup);
     71 
     72     num_subtests_called_++;
     73     resetCurrentSubtest();
     74 }
     75 
     76 
     77 double TestSystem::meanTime(const vector<int64> &samples)
     78 {
     79     double sum = accumulate(samples.begin(), samples.end(), 0.);
     80     if (samples.size() > 1)
     81         return (sum - samples[0]) / (samples.size() - 1);
     82     return sum;
     83 }
     84 
     85 
     86 void TestSystem::printHeading()
     87 {
     88     cout << endl;
     89     cout << setiosflags(ios_base::left);
     90     cout << TAB << setw(10) << "CPU, ms" << setw(10) << "GPU, ms"
     91         << setw(14) << "SPEEDUP"
     92         << "DESCRIPTION\n";
     93     cout << resetiosflags(ios_base::left);
     94 }
     95 
     96 
     97 void TestSystem::printSummary()
     98 {
     99     cout << setiosflags(ios_base::fixed);
    100     cout << "\naverage GPU speedup: x"
    101         << setprecision(3) << speedup_total_ / std::max(1, num_subtests_called_)
    102         << endl;
    103     cout << resetiosflags(ios_base::fixed);
    104 }
    105 
    106 
    107 void TestSystem::printMetrics(double cpu_time, double gpu_time, double speedup)
    108 {
    109     cout << TAB << setiosflags(ios_base::left);
    110     stringstream stream;
    111 
    112     stream << cpu_time;
    113     cout << setw(10) << stream.str();
    114 
    115     stream.str("");
    116     stream << gpu_time;
    117     cout << setw(10) << stream.str();
    118 
    119     stream.str("");
    120     stream << "x" << setprecision(3) << speedup;
    121     cout << setw(14) << stream.str();
    122 
    123     cout << cur_subtest_description_.str();
    124     cout << resetiosflags(ios_base::left) << endl;
    125 }
    126 
    127 
    128 void TestSystem::printError(const std::string& msg)
    129 {
    130     cout << TAB << "[error: " << msg << "] " << cur_subtest_description_.str() << endl;
    131 }
    132 
    133 
    134 void gen(Mat& mat, int rows, int cols, int type, Scalar low, Scalar high)
    135 {
    136     mat.create(rows, cols, type);
    137     RNG rng(0);
    138     rng.fill(mat, RNG::UNIFORM, low, high);
    139 }
    140 
    141 
    142 string abspath(const string& relpath)
    143 {
    144     return TestSystem::instance().workingDir() + relpath;
    145 }
    146 
    147 
    148 static int cvErrorCallback(int /*status*/, const char* /*func_name*/,
    149                              const char* err_msg, const char* /*file_name*/,
    150                              int /*line*/, void* /*userdata*/)
    151 {
    152     TestSystem::instance().printError(err_msg);
    153     return 0;
    154 }
    155 
    156 
    157 int main(int argc, const char* argv[])
    158 {
    159     int num_devices = getCudaEnabledDeviceCount();
    160     if (num_devices == 0)
    161     {
    162         cerr << "No GPU found or the library was compiled without CUDA support";
    163         return -1;
    164     }
    165 
    166     redirectError(cvErrorCallback);
    167 
    168     const char* keys =
    169        "{ h  help    |       | print help message }"
    170        "{ f  filter  |       | filter for test }"
    171        "{ w  workdir |       | set working directory }"
    172        "{ l  list    |       | show all tests }"
    173        "{ d  device  | 0     | device id }"
    174        "{ i  iters   | 10    | iteration count }";
    175 
    176     CommandLineParser cmd(argc, argv, keys);
    177 
    178     if (cmd.has("help") || !cmd.check())
    179     {
    180         cmd.printMessage();
    181         cmd.printErrors();
    182         return 0;
    183     }
    184 
    185 
    186     int device = cmd.get<int>("device");
    187     if (device < 0 || device >= num_devices)
    188     {
    189         cerr << "Invalid device ID" << endl;
    190         return -1;
    191     }
    192     DeviceInfo dev_info(device);
    193     if (!dev_info.isCompatible())
    194     {
    195         cerr << "CUDA module isn't built for GPU #" << device << " " << dev_info.name() << ", CC " << dev_info.majorVersion() << '.' << dev_info.minorVersion() << endl;
    196         return -1;
    197     }
    198     setDevice(device);
    199     printShortCudaDeviceInfo(device);
    200 
    201     string filter = cmd.get<string>("filter");
    202     string workdir = cmd.get<string>("workdir");
    203     bool list = cmd.has("list");
    204     int iters = cmd.get<int>("iters");
    205 
    206     if (!filter.empty())
    207         TestSystem::instance().setTestFilter(filter);
    208 
    209     if (!workdir.empty())
    210     {
    211         if (workdir[workdir.size() - 1] != '/' && workdir[workdir.size() - 1] != '\\')
    212             workdir += '/';
    213 
    214         TestSystem::instance().setWorkingDir(workdir);
    215     }
    216 
    217     if (list)
    218         TestSystem::instance().setListMode(true);
    219 
    220     TestSystem::instance().setNumIters(iters);
    221 
    222     cout << "\nNote: the timings for GPU don't include data transfer" << endl;
    223 
    224     TestSystem::instance().run();
    225 
    226     return 0;
    227 }
    228