Home | History | Annotate | Download | only in core
      1 /*M///////////////////////////////////////////////////////////////////////////////////////
      2 //
      3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
      4 //
      5 //  By downloading, copying, installing or using the software you agree to this license.
      6 //  If you do not agree to this license, do not download, install,
      7 //  copy or use the software.
      8 //
      9 //
     10 //                          License Agreement
     11 //                For Open Source Computer Vision Library
     12 //
     13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
     14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
     15 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
     16 // Third party copyrights are property of their respective owners.
     17 //
     18 // Redistribution and use in source and binary forms, with or without modification,
     19 // are permitted provided that the following conditions are met:
     20 //
     21 //   * Redistribution's of source code must retain the above copyright notice,
     22 //     this list of conditions and the following disclaimer.
     23 //
     24 //   * Redistribution's in binary form must reproduce the above copyright notice,
     25 //     this list of conditions and the following disclaimer in the documentation
     26 //     and/or other materials provided with the distribution.
     27 //
     28 //   * The name of the copyright holders may not be used to endorse or promote products
     29 //     derived from this software without specific prior written permission.
     30 //
     31 // This software is provided by the copyright holders and contributors "as is" and
     32 // any express or implied warranties, including, but not limited to, the implied
     33 // warranties of merchantability and fitness for a particular purpose are disclaimed.
     34 // In no event shall the Intel Corporation or contributors be liable for any direct,
     35 // indirect, incidental, special, exemplary, or consequential damages
     36 // (including, but not limited to, procurement of substitute goods or services;
     37 // loss of use, data, or profits; or business interruption) however caused
     38 // and on any theory of liability, whether in contract, strict liability,
     39 // or tort (including negligence or otherwise) arising in any way out of
     40 // the use of this software, even if advised of the possibility of such damage.
     41 //
     42 //M*/
     43 
     44 #ifndef __OPENCV_CORE_CUDA_HPP__
     45 #define __OPENCV_CORE_CUDA_HPP__
     46 
     47 #ifndef __cplusplus
     48 #  error cuda.hpp header must be compiled as C++
     49 #endif
     50 
     51 #include "opencv2/core.hpp"
     52 #include "opencv2/core/cuda_types.hpp"
     53 
     54 /**
     55   @defgroup cuda CUDA-accelerated Computer Vision
     56   @{
     57     @defgroup cudacore Core part
     58     @{
     59       @defgroup cudacore_init Initalization and Information
     60       @defgroup cudacore_struct Data Structures
     61     @}
     62   @}
     63  */
     64 
     65 namespace cv { namespace cuda {
     66 
     67 //! @addtogroup cudacore_struct
     68 //! @{
     69 
     70 //===================================================================================
     71 // GpuMat
     72 //===================================================================================
     73 
     74 /** @brief Base storage class for GPU memory with reference counting.
     75 
     76 Its interface matches the Mat interface with the following limitations:
     77 
     78 -   no arbitrary dimensions support (only 2D)
     79 -   no functions that return references to their data (because references on GPU are not valid for
     80     CPU)
     81 -   no expression templates technique support
     82 
     83 Beware that the latter limitation may lead to overloaded matrix operators that cause memory
     84 allocations. The GpuMat class is convertible to cuda::PtrStepSz and cuda::PtrStep so it can be
     85 passed directly to the kernel.
     86 
     87 @note In contrast with Mat, in most cases GpuMat::isContinuous() == false . This means that rows are
     88 aligned to a size depending on the hardware. Single-row GpuMat is always a continuous matrix.
     89 
     90 @note You are not recommended to leave static or global GpuMat variables allocated, that is, to rely
     91 on its destructor. The destruction order of such variables and CUDA context is undefined. GPU memory
     92 release function returns error if the CUDA context has been destroyed before.
     93 
     94 @sa Mat
     95  */
     96 class CV_EXPORTS GpuMat
     97 {
     98 public:
     99     class CV_EXPORTS Allocator
    100     {
    101     public:
    102         virtual ~Allocator() {}
    103 
    104         // allocator must fill data, step and refcount fields
    105         virtual bool allocate(GpuMat* mat, int rows, int cols, size_t elemSize) = 0;
    106         virtual void free(GpuMat* mat) = 0;
    107     };
    108 
    109     //! default allocator
    110     static Allocator* defaultAllocator();
    111     static void setDefaultAllocator(Allocator* allocator);
    112 
    113     //! default constructor
    114     explicit GpuMat(Allocator* allocator = defaultAllocator());
    115 
    116     //! constructs GpuMat of the specified size and type
    117     GpuMat(int rows, int cols, int type, Allocator* allocator = defaultAllocator());
    118     GpuMat(Size size, int type, Allocator* allocator = defaultAllocator());
    119 
    120     //! constucts GpuMat and fills it with the specified value _s
    121     GpuMat(int rows, int cols, int type, Scalar s, Allocator* allocator = defaultAllocator());
    122     GpuMat(Size size, int type, Scalar s, Allocator* allocator = defaultAllocator());
    123 
    124     //! copy constructor
    125     GpuMat(const GpuMat& m);
    126 
    127     //! constructor for GpuMat headers pointing to user-allocated data
    128     GpuMat(int rows, int cols, int type, void* data, size_t step = Mat::AUTO_STEP);
    129     GpuMat(Size size, int type, void* data, size_t step = Mat::AUTO_STEP);
    130 
    131     //! creates a GpuMat header for a part of the bigger matrix
    132     GpuMat(const GpuMat& m, Range rowRange, Range colRange);
    133     GpuMat(const GpuMat& m, Rect roi);
    134 
    135     //! builds GpuMat from host memory (Blocking call)
    136     explicit GpuMat(InputArray arr, Allocator* allocator = defaultAllocator());
    137 
    138     //! destructor - calls release()
    139     ~GpuMat();
    140 
    141     //! assignment operators
    142     GpuMat& operator =(const GpuMat& m);
    143 
    144     //! allocates new GpuMat data unless the GpuMat already has specified size and type
    145     void create(int rows, int cols, int type);
    146     void create(Size size, int type);
    147 
    148     //! decreases reference counter, deallocate the data when reference counter reaches 0
    149     void release();
    150 
    151     //! swaps with other smart pointer
    152     void swap(GpuMat& mat);
    153 
    154     //! pefroms upload data to GpuMat (Blocking call)
    155     void upload(InputArray arr);
    156 
    157     //! pefroms upload data to GpuMat (Non-Blocking call)
    158     void upload(InputArray arr, Stream& stream);
    159 
    160     //! pefroms download data from device to host memory (Blocking call)
    161     void download(OutputArray dst) const;
    162 
    163     //! pefroms download data from device to host memory (Non-Blocking call)
    164     void download(OutputArray dst, Stream& stream) const;
    165 
    166     //! returns deep copy of the GpuMat, i.e. the data is copied
    167     GpuMat clone() const;
    168 
    169     //! copies the GpuMat content to device memory (Blocking call)
    170     void copyTo(OutputArray dst) const;
    171 
    172     //! copies the GpuMat content to device memory (Non-Blocking call)
    173     void copyTo(OutputArray dst, Stream& stream) const;
    174 
    175     //! copies those GpuMat elements to "m" that are marked with non-zero mask elements (Blocking call)
    176     void copyTo(OutputArray dst, InputArray mask) const;
    177 
    178     //! copies those GpuMat elements to "m" that are marked with non-zero mask elements (Non-Blocking call)
    179     void copyTo(OutputArray dst, InputArray mask, Stream& stream) const;
    180 
    181     //! sets some of the GpuMat elements to s (Blocking call)
    182     GpuMat& setTo(Scalar s);
    183 
    184     //! sets some of the GpuMat elements to s (Non-Blocking call)
    185     GpuMat& setTo(Scalar s, Stream& stream);
    186 
    187     //! sets some of the GpuMat elements to s, according to the mask (Blocking call)
    188     GpuMat& setTo(Scalar s, InputArray mask);
    189 
    190     //! sets some of the GpuMat elements to s, according to the mask (Non-Blocking call)
    191     GpuMat& setTo(Scalar s, InputArray mask, Stream& stream);
    192 
    193     //! converts GpuMat to another datatype (Blocking call)
    194     void convertTo(OutputArray dst, int rtype) const;
    195 
    196     //! converts GpuMat to another datatype (Non-Blocking call)
    197     void convertTo(OutputArray dst, int rtype, Stream& stream) const;
    198 
    199     //! converts GpuMat to another datatype with scaling (Blocking call)
    200     void convertTo(OutputArray dst, int rtype, double alpha, double beta = 0.0) const;
    201 
    202     //! converts GpuMat to another datatype with scaling (Non-Blocking call)
    203     void convertTo(OutputArray dst, int rtype, double alpha, Stream& stream) const;
    204 
    205     //! converts GpuMat to another datatype with scaling (Non-Blocking call)
    206     void convertTo(OutputArray dst, int rtype, double alpha, double beta, Stream& stream) const;
    207 
    208     void assignTo(GpuMat& m, int type=-1) const;
    209 
    210     //! returns pointer to y-th row
    211     uchar* ptr(int y = 0);
    212     const uchar* ptr(int y = 0) const;
    213 
    214     //! template version of the above method
    215     template<typename _Tp> _Tp* ptr(int y = 0);
    216     template<typename _Tp> const _Tp* ptr(int y = 0) const;
    217 
    218     template <typename _Tp> operator PtrStepSz<_Tp>() const;
    219     template <typename _Tp> operator PtrStep<_Tp>() const;
    220 
    221     //! returns a new GpuMat header for the specified row
    222     GpuMat row(int y) const;
    223 
    224     //! returns a new GpuMat header for the specified column
    225     GpuMat col(int x) const;
    226 
    227     //! ... for the specified row span
    228     GpuMat rowRange(int startrow, int endrow) const;
    229     GpuMat rowRange(Range r) const;
    230 
    231     //! ... for the specified column span
    232     GpuMat colRange(int startcol, int endcol) const;
    233     GpuMat colRange(Range r) const;
    234 
    235     //! extracts a rectangular sub-GpuMat (this is a generalized form of row, rowRange etc.)
    236     GpuMat operator ()(Range rowRange, Range colRange) const;
    237     GpuMat operator ()(Rect roi) const;
    238 
    239     //! creates alternative GpuMat header for the same data, with different
    240     //! number of channels and/or different number of rows
    241     GpuMat reshape(int cn, int rows = 0) const;
    242 
    243     //! locates GpuMat header within a parent GpuMat
    244     void locateROI(Size& wholeSize, Point& ofs) const;
    245 
    246     //! moves/resizes the current GpuMat ROI inside the parent GpuMat
    247     GpuMat& adjustROI(int dtop, int dbottom, int dleft, int dright);
    248 
    249     //! returns true iff the GpuMat data is continuous
    250     //! (i.e. when there are no gaps between successive rows)
    251     bool isContinuous() const;
    252 
    253     //! returns element size in bytes
    254     size_t elemSize() const;
    255 
    256     //! returns the size of element channel in bytes
    257     size_t elemSize1() const;
    258 
    259     //! returns element type
    260     int type() const;
    261 
    262     //! returns element type
    263     int depth() const;
    264 
    265     //! returns number of channels
    266     int channels() const;
    267 
    268     //! returns step/elemSize1()
    269     size_t step1() const;
    270 
    271     //! returns GpuMat size : width == number of columns, height == number of rows
    272     Size size() const;
    273 
    274     //! returns true if GpuMat data is NULL
    275     bool empty() const;
    276 
    277     /*! includes several bit-fields:
    278     - the magic signature
    279     - continuity flag
    280     - depth
    281     - number of channels
    282     */
    283     int flags;
    284 
    285     //! the number of rows and columns
    286     int rows, cols;
    287 
    288     //! a distance between successive rows in bytes; includes the gap if any
    289     size_t step;
    290 
    291     //! pointer to the data
    292     uchar* data;
    293 
    294     //! pointer to the reference counter;
    295     //! when GpuMat points to user-allocated data, the pointer is NULL
    296     int* refcount;
    297 
    298     //! helper fields used in locateROI and adjustROI
    299     uchar* datastart;
    300     const uchar* dataend;
    301 
    302     //! allocator
    303     Allocator* allocator;
    304 };
    305 
    306 /** @brief Creates a continuous matrix.
    307 
    308 @param rows Row count.
    309 @param cols Column count.
    310 @param type Type of the matrix.
    311 @param arr Destination matrix. This parameter changes only if it has a proper type and area (
    312 \f$\texttt{rows} \times \texttt{cols}\f$ ).
    313 
    314 Matrix is called continuous if its elements are stored continuously, that is, without gaps at the
    315 end of each row.
    316  */
    317 CV_EXPORTS void createContinuous(int rows, int cols, int type, OutputArray arr);
    318 
    319 /** @brief Ensures that the size of a matrix is big enough and the matrix has a proper type.
    320 
    321 @param rows Minimum desired number of rows.
    322 @param cols Minimum desired number of columns.
    323 @param type Desired matrix type.
    324 @param arr Destination matrix.
    325 
    326 The function does not reallocate memory if the matrix has proper attributes already.
    327  */
    328 CV_EXPORTS void ensureSizeIsEnough(int rows, int cols, int type, OutputArray arr);
    329 
    330 //! BufferPool management (must be called before Stream creation)
    331 CV_EXPORTS void setBufferPoolUsage(bool on);
    332 CV_EXPORTS void setBufferPoolConfig(int deviceId, size_t stackSize, int stackCount);
    333 
    334 //===================================================================================
    335 // HostMem
    336 //===================================================================================
    337 
    338 /** @brief Class with reference counting wrapping special memory type allocation functions from CUDA.
    339 
    340 Its interface is also Mat-like but with additional memory type parameters.
    341 
    342 -   **PAGE_LOCKED** sets a page locked memory type used commonly for fast and asynchronous
    343     uploading/downloading data from/to GPU.
    344 -   **SHARED** specifies a zero copy memory allocation that enables mapping the host memory to GPU
    345     address space, if supported.
    346 -   **WRITE_COMBINED** sets the write combined buffer that is not cached by CPU. Such buffers are
    347     used to supply GPU with data when GPU only reads it. The advantage is a better CPU cache
    348     utilization.
    349 
    350 @note Allocation size of such memory types is usually limited. For more details, see *CUDA 2.2
    351 Pinned Memory APIs* document or *CUDA C Programming Guide*.
    352  */
    353 class CV_EXPORTS HostMem
    354 {
    355 public:
    356     enum AllocType { PAGE_LOCKED = 1, SHARED = 2, WRITE_COMBINED = 4 };
    357 
    358     static MatAllocator* getAllocator(AllocType alloc_type = PAGE_LOCKED);
    359 
    360     explicit HostMem(AllocType alloc_type = PAGE_LOCKED);
    361 
    362     HostMem(const HostMem& m);
    363 
    364     HostMem(int rows, int cols, int type, AllocType alloc_type = PAGE_LOCKED);
    365     HostMem(Size size, int type, AllocType alloc_type = PAGE_LOCKED);
    366 
    367     //! creates from host memory with coping data
    368     explicit HostMem(InputArray arr, AllocType alloc_type = PAGE_LOCKED);
    369 
    370     ~HostMem();
    371 
    372     HostMem& operator =(const HostMem& m);
    373 
    374     //! swaps with other smart pointer
    375     void swap(HostMem& b);
    376 
    377     //! returns deep copy of the matrix, i.e. the data is copied
    378     HostMem clone() const;
    379 
    380     //! allocates new matrix data unless the matrix already has specified size and type.
    381     void create(int rows, int cols, int type);
    382     void create(Size size, int type);
    383 
    384     //! creates alternative HostMem header for the same data, with different
    385     //! number of channels and/or different number of rows
    386     HostMem reshape(int cn, int rows = 0) const;
    387 
    388     //! decrements reference counter and released memory if needed.
    389     void release();
    390 
    391     //! returns matrix header with disabled reference counting for HostMem data.
    392     Mat createMatHeader() const;
    393 
    394     /** @brief Maps CPU memory to GPU address space and creates the cuda::GpuMat header without reference counting
    395     for it.
    396 
    397     This can be done only if memory was allocated with the SHARED flag and if it is supported by the
    398     hardware. Laptops often share video and CPU memory, so address spaces can be mapped, which
    399     eliminates an extra copy.
    400      */
    401     GpuMat createGpuMatHeader() const;
    402 
    403     // Please see cv::Mat for descriptions
    404     bool isContinuous() const;
    405     size_t elemSize() const;
    406     size_t elemSize1() const;
    407     int type() const;
    408     int depth() const;
    409     int channels() const;
    410     size_t step1() const;
    411     Size size() const;
    412     bool empty() const;
    413 
    414     // Please see cv::Mat for descriptions
    415     int flags;
    416     int rows, cols;
    417     size_t step;
    418 
    419     uchar* data;
    420     int* refcount;
    421 
    422     uchar* datastart;
    423     const uchar* dataend;
    424 
    425     AllocType alloc_type;
    426 };
    427 
    428 /** @brief Page-locks the memory of matrix and maps it for the device(s).
    429 
    430 @param m Input matrix.
    431  */
    432 CV_EXPORTS void registerPageLocked(Mat& m);
    433 
    434 /** @brief Unmaps the memory of matrix and makes it pageable again.
    435 
    436 @param m Input matrix.
    437  */
    438 CV_EXPORTS void unregisterPageLocked(Mat& m);
    439 
    440 //===================================================================================
    441 // Stream
    442 //===================================================================================
    443 
    444 /** @brief This class encapsulates a queue of asynchronous calls.
    445 
    446 @note Currently, you may face problems if an operation is enqueued twice with different data. Some
    447 functions use the constant GPU memory, and next call may update the memory before the previous one
    448 has been finished. But calling different operations asynchronously is safe because each operation
    449 has its own constant buffer. Memory copy/upload/download/set operations to the buffers you hold are
    450 also safe. :
    451  */
    452 class CV_EXPORTS Stream
    453 {
    454     typedef void (Stream::*bool_type)() const;
    455     void this_type_does_not_support_comparisons() const {}
    456 
    457 public:
    458     typedef void (*StreamCallback)(int status, void* userData);
    459 
    460     //! creates a new asynchronous stream
    461     Stream();
    462 
    463     /** @brief Returns true if the current stream queue is finished. Otherwise, it returns false.
    464     */
    465     bool queryIfComplete() const;
    466 
    467     /** @brief Blocks the current CPU thread until all operations in the stream are complete.
    468     */
    469     void waitForCompletion();
    470 
    471     /** @brief Makes a compute stream wait on an event.
    472     */
    473     void waitEvent(const Event& event);
    474 
    475     /** @brief Adds a callback to be called on the host after all currently enqueued items in the stream have
    476     completed.
    477 
    478     @note Callbacks must not make any CUDA API calls. Callbacks must not perform any synchronization
    479     that may depend on outstanding device work or other callbacks that are not mandated to run earlier.
    480     Callbacks without a mandated order (in independent streams) execute in undefined order and may be
    481     serialized.
    482      */
    483     void enqueueHostCallback(StreamCallback callback, void* userData);
    484 
    485     //! return Stream object for default CUDA stream
    486     static Stream& Null();
    487 
    488     //! returns true if stream object is not default (!= 0)
    489     operator bool_type() const;
    490 
    491     class Impl;
    492 
    493 private:
    494     Ptr<Impl> impl_;
    495     Stream(const Ptr<Impl>& impl);
    496 
    497     friend struct StreamAccessor;
    498     friend class BufferPool;
    499     friend class DefaultDeviceInitializer;
    500 };
    501 
    502 class CV_EXPORTS Event
    503 {
    504 public:
    505     enum CreateFlags
    506     {
    507         DEFAULT        = 0x00,  /**< Default event flag */
    508         BLOCKING_SYNC  = 0x01,  /**< Event uses blocking synchronization */
    509         DISABLE_TIMING = 0x02,  /**< Event will not record timing data */
    510         INTERPROCESS   = 0x04   /**< Event is suitable for interprocess use. DisableTiming must be set */
    511     };
    512 
    513     explicit Event(CreateFlags flags = DEFAULT);
    514 
    515     //! records an event
    516     void record(Stream& stream = Stream::Null());
    517 
    518     //! queries an event's status
    519     bool queryIfComplete() const;
    520 
    521     //! waits for an event to complete
    522     void waitForCompletion();
    523 
    524     //! computes the elapsed time between events
    525     static float elapsedTime(const Event& start, const Event& end);
    526 
    527     class Impl;
    528 
    529 private:
    530     Ptr<Impl> impl_;
    531 
    532     friend struct EventAccessor;
    533 };
    534 
    535 //! @} cudacore_struct
    536 
    537 //===================================================================================
    538 // Initialization & Info
    539 //===================================================================================
    540 
    541 //! @addtogroup cudacore_init
    542 //! @{
    543 
    544 /** @brief Returns the number of installed CUDA-enabled devices.
    545 
    546 Use this function before any other CUDA functions calls. If OpenCV is compiled without CUDA support,
    547 this function returns 0.
    548  */
    549 CV_EXPORTS int getCudaEnabledDeviceCount();
    550 
    551 /** @brief Sets a device and initializes it for the current thread.
    552 
    553 @param device System index of a CUDA device starting with 0.
    554 
    555 If the call of this function is omitted, a default device is initialized at the fist CUDA usage.
    556  */
    557 CV_EXPORTS void setDevice(int device);
    558 
    559 /** @brief Returns the current device index set by cuda::setDevice or initialized by default.
    560  */
    561 CV_EXPORTS int getDevice();
    562 
    563 /** @brief Explicitly destroys and cleans up all resources associated with the current device in the current
    564 process.
    565 
    566 Any subsequent API call to this device will reinitialize the device.
    567  */
    568 CV_EXPORTS void resetDevice();
    569 
    570 /** @brief Enumeration providing CUDA computing features.
    571  */
    572 enum FeatureSet
    573 {
    574     FEATURE_SET_COMPUTE_10 = 10,
    575     FEATURE_SET_COMPUTE_11 = 11,
    576     FEATURE_SET_COMPUTE_12 = 12,
    577     FEATURE_SET_COMPUTE_13 = 13,
    578     FEATURE_SET_COMPUTE_20 = 20,
    579     FEATURE_SET_COMPUTE_21 = 21,
    580     FEATURE_SET_COMPUTE_30 = 30,
    581     FEATURE_SET_COMPUTE_32 = 32,
    582     FEATURE_SET_COMPUTE_35 = 35,
    583     FEATURE_SET_COMPUTE_50 = 50,
    584 
    585     GLOBAL_ATOMICS = FEATURE_SET_COMPUTE_11,
    586     SHARED_ATOMICS = FEATURE_SET_COMPUTE_12,
    587     NATIVE_DOUBLE = FEATURE_SET_COMPUTE_13,
    588     WARP_SHUFFLE_FUNCTIONS = FEATURE_SET_COMPUTE_30,
    589     DYNAMIC_PARALLELISM = FEATURE_SET_COMPUTE_35
    590 };
    591 
    592 //! checks whether current device supports the given feature
    593 CV_EXPORTS bool deviceSupports(FeatureSet feature_set);
    594 
    595 /** @brief Class providing a set of static methods to check what NVIDIA\* card architecture the CUDA module was
    596 built for.
    597 
    598 According to the CUDA C Programming Guide Version 3.2: "PTX code produced for some specific compute
    599 capability can always be compiled to binary code of greater or equal compute capability".
    600  */
    601 class CV_EXPORTS TargetArchs
    602 {
    603 public:
    604     /** @brief The following method checks whether the module was built with the support of the given feature:
    605 
    606     @param feature_set Features to be checked. See :ocvcuda::FeatureSet.
    607      */
    608     static bool builtWith(FeatureSet feature_set);
    609 
    610     /** @brief There is a set of methods to check whether the module contains intermediate (PTX) or binary CUDA
    611     code for the given architecture(s):
    612 
    613     @param major Major compute capability version.
    614     @param minor Minor compute capability version.
    615      */
    616     static bool has(int major, int minor);
    617     static bool hasPtx(int major, int minor);
    618     static bool hasBin(int major, int minor);
    619 
    620     static bool hasEqualOrLessPtx(int major, int minor);
    621     static bool hasEqualOrGreater(int major, int minor);
    622     static bool hasEqualOrGreaterPtx(int major, int minor);
    623     static bool hasEqualOrGreaterBin(int major, int minor);
    624 };
    625 
    626 /** @brief Class providing functionality for querying the specified GPU properties.
    627  */
    628 class CV_EXPORTS DeviceInfo
    629 {
    630 public:
    631     //! creates DeviceInfo object for the current GPU
    632     DeviceInfo();
    633 
    634     /** @brief The constructors.
    635 
    636     @param device_id System index of the CUDA device starting with 0.
    637 
    638     Constructs the DeviceInfo object for the specified device. If device_id parameter is missed, it
    639     constructs an object for the current device.
    640      */
    641     DeviceInfo(int device_id);
    642 
    643     /** @brief Returns system index of the CUDA device starting with 0.
    644     */
    645     int deviceID() const;
    646 
    647     //! ASCII string identifying device
    648     const char* name() const;
    649 
    650     //! global memory available on device in bytes
    651     size_t totalGlobalMem() const;
    652 
    653     //! shared memory available per block in bytes
    654     size_t sharedMemPerBlock() const;
    655 
    656     //! 32-bit registers available per block
    657     int regsPerBlock() const;
    658 
    659     //! warp size in threads
    660     int warpSize() const;
    661 
    662     //! maximum pitch in bytes allowed by memory copies
    663     size_t memPitch() const;
    664 
    665     //! maximum number of threads per block
    666     int maxThreadsPerBlock() const;
    667 
    668     //! maximum size of each dimension of a block
    669     Vec3i maxThreadsDim() const;
    670 
    671     //! maximum size of each dimension of a grid
    672     Vec3i maxGridSize() const;
    673 
    674     //! clock frequency in kilohertz
    675     int clockRate() const;
    676 
    677     //! constant memory available on device in bytes
    678     size_t totalConstMem() const;
    679 
    680     //! major compute capability
    681     int majorVersion() const;
    682 
    683     //! minor compute capability
    684     int minorVersion() const;
    685 
    686     //! alignment requirement for textures
    687     size_t textureAlignment() const;
    688 
    689     //! pitch alignment requirement for texture references bound to pitched memory
    690     size_t texturePitchAlignment() const;
    691 
    692     //! number of multiprocessors on device
    693     int multiProcessorCount() const;
    694 
    695     //! specified whether there is a run time limit on kernels
    696     bool kernelExecTimeoutEnabled() const;
    697 
    698     //! device is integrated as opposed to discrete
    699     bool integrated() const;
    700 
    701     //! device can map host memory with cudaHostAlloc/cudaHostGetDevicePointer
    702     bool canMapHostMemory() const;
    703 
    704     enum ComputeMode
    705     {
    706         ComputeModeDefault,         /**< default compute mode (Multiple threads can use cudaSetDevice with this device) */
    707         ComputeModeExclusive,       /**< compute-exclusive-thread mode (Only one thread in one process will be able to use cudaSetDevice with this device) */
    708         ComputeModeProhibited,      /**< compute-prohibited mode (No threads can use cudaSetDevice with this device) */
    709         ComputeModeExclusiveProcess /**< compute-exclusive-process mode (Many threads in one process will be able to use cudaSetDevice with this device) */
    710     };
    711 
    712     //! compute mode
    713     ComputeMode computeMode() const;
    714 
    715     //! maximum 1D texture size
    716     int maxTexture1D() const;
    717 
    718     //! maximum 1D mipmapped texture size
    719     int maxTexture1DMipmap() const;
    720 
    721     //! maximum size for 1D textures bound to linear memory
    722     int maxTexture1DLinear() const;
    723 
    724     //! maximum 2D texture dimensions
    725     Vec2i maxTexture2D() const;
    726 
    727     //! maximum 2D mipmapped texture dimensions
    728     Vec2i maxTexture2DMipmap() const;
    729 
    730     //! maximum dimensions (width, height, pitch) for 2D textures bound to pitched memory
    731     Vec3i maxTexture2DLinear() const;
    732 
    733     //! maximum 2D texture dimensions if texture gather operations have to be performed
    734     Vec2i maxTexture2DGather() const;
    735 
    736     //! maximum 3D texture dimensions
    737     Vec3i maxTexture3D() const;
    738 
    739     //! maximum Cubemap texture dimensions
    740     int maxTextureCubemap() const;
    741 
    742     //! maximum 1D layered texture dimensions
    743     Vec2i maxTexture1DLayered() const;
    744 
    745     //! maximum 2D layered texture dimensions
    746     Vec3i maxTexture2DLayered() const;
    747 
    748     //! maximum Cubemap layered texture dimensions
    749     Vec2i maxTextureCubemapLayered() const;
    750 
    751     //! maximum 1D surface size
    752     int maxSurface1D() const;
    753 
    754     //! maximum 2D surface dimensions
    755     Vec2i maxSurface2D() const;
    756 
    757     //! maximum 3D surface dimensions
    758     Vec3i maxSurface3D() const;
    759 
    760     //! maximum 1D layered surface dimensions
    761     Vec2i maxSurface1DLayered() const;
    762 
    763     //! maximum 2D layered surface dimensions
    764     Vec3i maxSurface2DLayered() const;
    765 
    766     //! maximum Cubemap surface dimensions
    767     int maxSurfaceCubemap() const;
    768 
    769     //! maximum Cubemap layered surface dimensions
    770     Vec2i maxSurfaceCubemapLayered() const;
    771 
    772     //! alignment requirements for surfaces
    773     size_t surfaceAlignment() const;
    774 
    775     //! device can possibly execute multiple kernels concurrently
    776     bool concurrentKernels() const;
    777 
    778     //! device has ECC support enabled
    779     bool ECCEnabled() const;
    780 
    781     //! PCI bus ID of the device
    782     int pciBusID() const;
    783 
    784     //! PCI device ID of the device
    785     int pciDeviceID() const;
    786 
    787     //! PCI domain ID of the device
    788     int pciDomainID() const;
    789 
    790     //! true if device is a Tesla device using TCC driver, false otherwise
    791     bool tccDriver() const;
    792 
    793     //! number of asynchronous engines
    794     int asyncEngineCount() const;
    795 
    796     //! device shares a unified address space with the host
    797     bool unifiedAddressing() const;
    798 
    799     //! peak memory clock frequency in kilohertz
    800     int memoryClockRate() const;
    801 
    802     //! global memory bus width in bits
    803     int memoryBusWidth() const;
    804 
    805     //! size of L2 cache in bytes
    806     int l2CacheSize() const;
    807 
    808     //! maximum resident threads per multiprocessor
    809     int maxThreadsPerMultiProcessor() const;
    810 
    811     //! gets free and total device memory
    812     void queryMemory(size_t& totalMemory, size_t& freeMemory) const;
    813     size_t freeMemory() const;
    814     size_t totalMemory() const;
    815 
    816     /** @brief Provides information on CUDA feature support.
    817 
    818     @param feature_set Features to be checked. See cuda::FeatureSet.
    819 
    820     This function returns true if the device has the specified CUDA feature. Otherwise, it returns false
    821      */
    822     bool supports(FeatureSet feature_set) const;
    823 
    824     /** @brief Checks the CUDA module and device compatibility.
    825 
    826     This function returns true if the CUDA module can be run on the specified device. Otherwise, it
    827     returns false .
    828      */
    829     bool isCompatible() const;
    830 
    831 private:
    832     int device_id_;
    833 };
    834 
    835 CV_EXPORTS void printCudaDeviceInfo(int device);
    836 CV_EXPORTS void printShortCudaDeviceInfo(int device);
    837 
    838 //! @} cudacore_init
    839 
    840 }} // namespace cv { namespace cuda {
    841 
    842 
    843 #include "opencv2/core/cuda.inl.hpp"
    844 
    845 #endif /* __OPENCV_CORE_CUDA_HPP__ */
    846