1 /* This sample demonstrates the way you can perform independed tasks 2 on the different GPUs */ 3 4 // Disable some warnings which are caused with CUDA headers 5 #if defined(_MSC_VER) 6 #pragma warning(disable: 4201 4408 4100) 7 #endif 8 9 #include <iostream> 10 #include "cvconfig.h" 11 #include "opencv2/core/core.hpp" 12 #include "opencv2/cudaarithm.hpp" 13 14 #ifdef HAVE_TBB 15 # include "tbb/tbb_stddef.h" 16 # if TBB_VERSION_MAJOR*100 + TBB_VERSION_MINOR >= 202 17 # include "tbb/tbb.h" 18 # include "tbb/task.h" 19 # undef min 20 # undef max 21 # else 22 # undef HAVE_TBB 23 # endif 24 #endif 25 26 #if !defined(HAVE_CUDA) || !defined(HAVE_TBB) || defined(__arm__) 27 28 int main() 29 { 30 #if !defined(HAVE_CUDA) 31 std::cout << "CUDA support is required (CMake key 'WITH_CUDA' must be true).\n"; 32 #endif 33 34 #if !defined(HAVE_TBB) 35 std::cout << "TBB support is required (CMake key 'WITH_TBB' must be true).\n"; 36 #endif 37 38 #if defined(__arm__) 39 std::cout << "Unsupported for ARM CUDA library." << std::endl; 40 #endif 41 42 return 0; 43 } 44 45 #else 46 47 #include <cuda.h> 48 #include <cuda_runtime.h> 49 50 using namespace std; 51 using namespace cv; 52 using namespace cv::cuda; 53 54 struct Worker { void operator()(int device_id) const; }; 55 void destroyContexts(); 56 57 #define safeCall(expr) safeCall_(expr, #expr, __FILE__, __LINE__) 58 inline void safeCall_(int code, const char* expr, const char* file, int line) 59 { 60 if (code != CUDA_SUCCESS) 61 { 62 std::cout << "CUDA driver API error: code " << code << ", expr " << expr 63 << ", file " << file << ", line " << line << endl; 64 destroyContexts(); 65 exit(-1); 66 } 67 } 68 69 // Each GPU is associated with its own context 70 CUcontext contexts[2]; 71 72 int main() 73 { 74 int num_devices = getCudaEnabledDeviceCount(); 75 if (num_devices < 2) 76 { 77 std::cout << "Two or more GPUs are required\n"; 78 return -1; 79 } 80 81 for (int i = 0; i < num_devices; ++i) 82 { 83 cv::cuda::printShortCudaDeviceInfo(i); 84 85 DeviceInfo dev_info(i); 86 if (!dev_info.isCompatible()) 87 { 88 std::cout << "CUDA module isn't built for GPU #" << i << " (" 89 << dev_info.name() << ", CC " << dev_info.majorVersion() 90 << dev_info.minorVersion() << "\n"; 91 return -1; 92 } 93 } 94 95 // Init CUDA Driver API 96 safeCall(cuInit(0)); 97 98 // Create context for GPU #0 99 CUdevice device; 100 safeCall(cuDeviceGet(&device, 0)); 101 safeCall(cuCtxCreate(&contexts[0], 0, device)); 102 103 CUcontext prev_context; 104 safeCall(cuCtxPopCurrent(&prev_context)); 105 106 // Create context for GPU #1 107 safeCall(cuDeviceGet(&device, 1)); 108 safeCall(cuCtxCreate(&contexts[1], 0, device)); 109 110 safeCall(cuCtxPopCurrent(&prev_context)); 111 112 // Execute calculation in two threads using two GPUs 113 int devices[] = {0, 1}; 114 tbb::parallel_do(devices, devices + 2, Worker()); 115 116 destroyContexts(); 117 return 0; 118 } 119 120 121 void Worker::operator()(int device_id) const 122 { 123 // Set the proper context 124 safeCall(cuCtxPushCurrent(contexts[device_id])); 125 126 Mat src(1000, 1000, CV_32F); 127 Mat dst; 128 129 RNG rng(0); 130 rng.fill(src, RNG::UNIFORM, 0, 1); 131 132 // CPU works 133 cv::transpose(src, dst); 134 135 // GPU works 136 GpuMat d_src(src); 137 GpuMat d_dst; 138 cuda::transpose(d_src, d_dst); 139 140 // Check results 141 bool passed = cv::norm(dst - Mat(d_dst), NORM_INF) < 1e-3; 142 std::cout << "GPU #" << device_id << " (" << DeviceInfo().name() << "): " 143 << (passed ? "passed" : "FAILED") << endl; 144 145 // Deallocate data here, otherwise deallocation will be performed 146 // after context is extracted from the stack 147 d_src.release(); 148 d_dst.release(); 149 150 CUcontext prev_context; 151 safeCall(cuCtxPopCurrent(&prev_context)); 152 } 153 154 155 void destroyContexts() 156 { 157 safeCall(cuCtxDestroy(contexts[0])); 158 safeCall(cuCtxDestroy(contexts[1])); 159 } 160 161 #endif 162