1 #include <iomanip> 2 #include <stdexcept> 3 #include <string> 4 #include "performance.h" 5 #include "opencv2/core/cuda.hpp" 6 7 using namespace std; 8 using namespace cv; 9 using namespace cv::cuda; 10 11 void TestSystem::run() 12 { 13 if (is_list_mode_) 14 { 15 for (vector<Runnable*>::iterator it = tests_.begin(); it != tests_.end(); ++it) 16 cout << (*it)->name() << endl; 17 18 return; 19 } 20 21 // Run test initializers 22 for (vector<Runnable*>::iterator it = inits_.begin(); it != inits_.end(); ++it) 23 { 24 if ((*it)->name().find(test_filter_, 0) != string::npos) 25 (*it)->run(); 26 } 27 28 printHeading(); 29 30 // Run tests 31 for (vector<Runnable*>::iterator it = tests_.begin(); it != tests_.end(); ++it) 32 { 33 try 34 { 35 if ((*it)->name().find(test_filter_, 0) != string::npos) 36 { 37 cout << endl << (*it)->name() << ":\n"; 38 (*it)->run(); 39 finishCurrentSubtest(); 40 } 41 } 42 catch (const Exception&) 43 { 44 // Message is printed via callback 45 resetCurrentSubtest(); 46 } 47 catch (const runtime_error& e) 48 { 49 printError(e.what()); 50 resetCurrentSubtest(); 51 } 52 } 53 54 printSummary(); 55 } 56 57 58 void TestSystem::finishCurrentSubtest() 59 { 60 if (cur_subtest_is_empty_) 61 // There is no need to print subtest statistics 62 return; 63 64 double cpu_time = cpu_elapsed_ / getTickFrequency() * 1000.0; 65 double gpu_time = gpu_elapsed_ / getTickFrequency() * 1000.0; 66 67 double speedup = static_cast<double>(cpu_elapsed_) / std::max(1.0, gpu_elapsed_); 68 speedup_total_ += speedup; 69 70 printMetrics(cpu_time, gpu_time, speedup); 71 72 num_subtests_called_++; 73 resetCurrentSubtest(); 74 } 75 76 77 double TestSystem::meanTime(const vector<int64> &samples) 78 { 79 double sum = accumulate(samples.begin(), samples.end(), 0.); 80 if (samples.size() > 1) 81 return (sum - samples[0]) / (samples.size() - 1); 82 return sum; 83 } 84 85 86 void TestSystem::printHeading() 87 { 88 cout << endl; 89 cout << setiosflags(ios_base::left); 90 cout << TAB << setw(10) << "CPU, ms" << setw(10) << "GPU, ms" 91 << setw(14) << "SPEEDUP" 92 << "DESCRIPTION\n"; 93 cout << resetiosflags(ios_base::left); 94 } 95 96 97 void TestSystem::printSummary() 98 { 99 cout << setiosflags(ios_base::fixed); 100 cout << "\naverage GPU speedup: x" 101 << setprecision(3) << speedup_total_ / std::max(1, num_subtests_called_) 102 << endl; 103 cout << resetiosflags(ios_base::fixed); 104 } 105 106 107 void TestSystem::printMetrics(double cpu_time, double gpu_time, double speedup) 108 { 109 cout << TAB << setiosflags(ios_base::left); 110 stringstream stream; 111 112 stream << cpu_time; 113 cout << setw(10) << stream.str(); 114 115 stream.str(""); 116 stream << gpu_time; 117 cout << setw(10) << stream.str(); 118 119 stream.str(""); 120 stream << "x" << setprecision(3) << speedup; 121 cout << setw(14) << stream.str(); 122 123 cout << cur_subtest_description_.str(); 124 cout << resetiosflags(ios_base::left) << endl; 125 } 126 127 128 void TestSystem::printError(const std::string& msg) 129 { 130 cout << TAB << "[error: " << msg << "] " << cur_subtest_description_.str() << endl; 131 } 132 133 134 void gen(Mat& mat, int rows, int cols, int type, Scalar low, Scalar high) 135 { 136 mat.create(rows, cols, type); 137 RNG rng(0); 138 rng.fill(mat, RNG::UNIFORM, low, high); 139 } 140 141 142 string abspath(const string& relpath) 143 { 144 return TestSystem::instance().workingDir() + relpath; 145 } 146 147 148 static int cvErrorCallback(int /*status*/, const char* /*func_name*/, 149 const char* err_msg, const char* /*file_name*/, 150 int /*line*/, void* /*userdata*/) 151 { 152 TestSystem::instance().printError(err_msg); 153 return 0; 154 } 155 156 157 int main(int argc, const char* argv[]) 158 { 159 int num_devices = getCudaEnabledDeviceCount(); 160 if (num_devices == 0) 161 { 162 cerr << "No GPU found or the library was compiled without CUDA support"; 163 return -1; 164 } 165 166 redirectError(cvErrorCallback); 167 168 const char* keys = 169 "{ h help | | print help message }" 170 "{ f filter | | filter for test }" 171 "{ w workdir | | set working directory }" 172 "{ l list | | show all tests }" 173 "{ d device | 0 | device id }" 174 "{ i iters | 10 | iteration count }"; 175 176 CommandLineParser cmd(argc, argv, keys); 177 178 if (cmd.has("help") || !cmd.check()) 179 { 180 cmd.printMessage(); 181 cmd.printErrors(); 182 return 0; 183 } 184 185 186 int device = cmd.get<int>("device"); 187 if (device < 0 || device >= num_devices) 188 { 189 cerr << "Invalid device ID" << endl; 190 return -1; 191 } 192 DeviceInfo dev_info(device); 193 if (!dev_info.isCompatible()) 194 { 195 cerr << "CUDA module isn't built for GPU #" << device << " " << dev_info.name() << ", CC " << dev_info.majorVersion() << '.' << dev_info.minorVersion() << endl; 196 return -1; 197 } 198 setDevice(device); 199 printShortCudaDeviceInfo(device); 200 201 string filter = cmd.get<string>("filter"); 202 string workdir = cmd.get<string>("workdir"); 203 bool list = cmd.has("list"); 204 int iters = cmd.get<int>("iters"); 205 206 if (!filter.empty()) 207 TestSystem::instance().setTestFilter(filter); 208 209 if (!workdir.empty()) 210 { 211 if (workdir[workdir.size() - 1] != '/' && workdir[workdir.size() - 1] != '\\') 212 workdir += '/'; 213 214 TestSystem::instance().setWorkingDir(workdir); 215 } 216 217 if (list) 218 TestSystem::instance().setListMode(true); 219 220 TestSystem::instance().setNumIters(iters); 221 222 cout << "\nNote: the timings for GPU don't include data transfer" << endl; 223 224 TestSystem::instance().run(); 225 226 return 0; 227 } 228