Home | History | Annotate | Download | only in gpu
      1 /* This sample demonstrates the way you can perform independed tasks
      2    on the different GPUs */
      3 
      4 // Disable some warnings which are caused with CUDA headers
      5 #if defined(_MSC_VER)
      6 #pragma warning(disable: 4201 4408 4100)
      7 #endif
      8 
      9 #include <iostream>
     10 #include "cvconfig.h"
     11 #include "opencv2/core/core.hpp"
     12 #include "opencv2/cudaarithm.hpp"
     13 
     14 #ifdef HAVE_TBB
     15 #  include "tbb/tbb_stddef.h"
     16 #  if TBB_VERSION_MAJOR*100 + TBB_VERSION_MINOR >= 202
     17 #    include "tbb/tbb.h"
     18 #    include "tbb/task.h"
     19 #    undef min
     20 #    undef max
     21 #  else
     22 #    undef HAVE_TBB
     23 #  endif
     24 #endif
     25 
     26 #if !defined(HAVE_CUDA) || !defined(HAVE_TBB) || defined(__arm__)
     27 
     28 int main()
     29 {
     30 #if !defined(HAVE_CUDA)
     31     std::cout << "CUDA support is required (CMake key 'WITH_CUDA' must be true).\n";
     32 #endif
     33 
     34 #if !defined(HAVE_TBB)
     35     std::cout << "TBB support is required (CMake key 'WITH_TBB' must be true).\n";
     36 #endif
     37 
     38 #if defined(__arm__)
     39     std::cout << "Unsupported for ARM CUDA library." << std::endl;
     40 #endif
     41 
     42     return 0;
     43 }
     44 
     45 #else
     46 
     47 #include <cuda.h>
     48 #include <cuda_runtime.h>
     49 
     50 using namespace std;
     51 using namespace cv;
     52 using namespace cv::cuda;
     53 
     54 struct Worker { void operator()(int device_id) const; };
     55 void destroyContexts();
     56 
     57 #define safeCall(expr) safeCall_(expr, #expr, __FILE__, __LINE__)
     58 inline void safeCall_(int code, const char* expr, const char* file, int line)
     59 {
     60     if (code != CUDA_SUCCESS)
     61     {
     62         std::cout << "CUDA driver API error: code " << code << ", expr " << expr
     63             << ", file " << file << ", line " << line << endl;
     64         destroyContexts();
     65         exit(-1);
     66     }
     67 }
     68 
     69 // Each GPU is associated with its own context
     70 CUcontext contexts[2];
     71 
     72 int main()
     73 {
     74     int num_devices = getCudaEnabledDeviceCount();
     75     if (num_devices < 2)
     76     {
     77         std::cout << "Two or more GPUs are required\n";
     78         return -1;
     79     }
     80 
     81     for (int i = 0; i < num_devices; ++i)
     82     {
     83         cv::cuda::printShortCudaDeviceInfo(i);
     84 
     85         DeviceInfo dev_info(i);
     86         if (!dev_info.isCompatible())
     87         {
     88             std::cout << "CUDA module isn't built for GPU #" << i << " ("
     89                  << dev_info.name() << ", CC " << dev_info.majorVersion()
     90                  << dev_info.minorVersion() << "\n";
     91             return -1;
     92         }
     93     }
     94 
     95     // Init CUDA Driver API
     96     safeCall(cuInit(0));
     97 
     98     // Create context for GPU #0
     99     CUdevice device;
    100     safeCall(cuDeviceGet(&device, 0));
    101     safeCall(cuCtxCreate(&contexts[0], 0, device));
    102 
    103     CUcontext prev_context;
    104     safeCall(cuCtxPopCurrent(&prev_context));
    105 
    106     // Create context for GPU #1
    107     safeCall(cuDeviceGet(&device, 1));
    108     safeCall(cuCtxCreate(&contexts[1], 0, device));
    109 
    110     safeCall(cuCtxPopCurrent(&prev_context));
    111 
    112     // Execute calculation in two threads using two GPUs
    113     int devices[] = {0, 1};
    114     tbb::parallel_do(devices, devices + 2, Worker());
    115 
    116     destroyContexts();
    117     return 0;
    118 }
    119 
    120 
    121 void Worker::operator()(int device_id) const
    122 {
    123     // Set the proper context
    124     safeCall(cuCtxPushCurrent(contexts[device_id]));
    125 
    126     Mat src(1000, 1000, CV_32F);
    127     Mat dst;
    128 
    129     RNG rng(0);
    130     rng.fill(src, RNG::UNIFORM, 0, 1);
    131 
    132     // CPU works
    133     cv::transpose(src, dst);
    134 
    135     // GPU works
    136     GpuMat d_src(src);
    137     GpuMat d_dst;
    138     cuda::transpose(d_src, d_dst);
    139 
    140     // Check results
    141     bool passed = cv::norm(dst - Mat(d_dst), NORM_INF) < 1e-3;
    142     std::cout << "GPU #" << device_id << " (" << DeviceInfo().name() << "): "
    143         << (passed ? "passed" : "FAILED") << endl;
    144 
    145     // Deallocate data here, otherwise deallocation will be performed
    146     // after context is extracted from the stack
    147     d_src.release();
    148     d_dst.release();
    149 
    150     CUcontext prev_context;
    151     safeCall(cuCtxPopCurrent(&prev_context));
    152 }
    153 
    154 
    155 void destroyContexts()
    156 {
    157     safeCall(cuCtxDestroy(contexts[0]));
    158     safeCall(cuCtxDestroy(contexts[1]));
    159 }
    160 
    161 #endif
    162