Home | History | Annotate | Download | only in cudalegacy
      1 /*M///////////////////////////////////////////////////////////////////////////////////////
      2 //
      3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
      4 //
      5 //  By downloading, copying, installing or using the software you agree to this license.
      6 //  If you do not agree to this license, do not download, install,
      7 //  copy or use the software.
      8 //
      9 //
     10 //                           License Agreement
     11 //                For Open Source Computer Vision Library
     12 //
     13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
     14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
     15 // Third party copyrights are property of their respective owners.
     16 //
     17 // Redistribution and use in source and binary forms, with or without modification,
     18 // are permitted provided that the following conditions are met:
     19 //
     20 //   * Redistribution's of source code must retain the above copyright notice,
     21 //     this list of conditions and the following disclaimer.
     22 //
     23 //   * Redistribution's in binary form must reproduce the above copyright notice,
     24 //     this list of conditions and the following disclaimer in the documentation
     25 //     and/or other materials provided with the distribution.
     26 //
     27 //   * The name of the copyright holders may not be used to endorse or promote products
     28 //     derived from this software without specific prior written permission.
     29 //
     30 // This software is provided by the copyright holders and contributors "as is" and
     31 // any express or implied warranties, including, but not limited to, the implied
     32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
     33 // In no event shall the Intel Corporation or contributors be liable for any direct,
     34 // indirect, incidental, special, exemplary, or consequential damages
     35 // (including, but not limited to, procurement of substitute goods or services;
     36 // loss of use, data, or profits; or business interruption) however caused
     37 // and on any theory of liability, whether in contract, strict liability,
     38 // or tort (including negligence or otherwise) arising in any way out of
     39 // the use of this software, even if advised of the possibility of such damage.
     40 //
     41 //M*/
     42 
     43 #ifndef _ncv_hpp_
     44 #define _ncv_hpp_
     45 
     46 #include "opencv2/core/cvdef.h"
     47 
     48 #ifdef _WIN32
     49     #define WIN32_LEAN_AND_MEAN
     50 #endif
     51 
     52 #include <cuda_runtime.h>
     53 #include "opencv2/core/cvstd.hpp"
     54 #include "opencv2/core/utility.hpp"
     55 
     56 
     57 //==============================================================================
     58 //
     59 // Compile-time assert functionality
     60 //
     61 //==============================================================================
     62 
     63 //! @addtogroup cudalegacy
     64 //! @{
     65 
     66 /**
     67 * Compile-time assert namespace
     68 */
     69 namespace NcvCTprep
     70 {
     71     template <bool x>
     72     struct CT_ASSERT_FAILURE;
     73 
     74     template <>
     75     struct CT_ASSERT_FAILURE<true> {};
     76 
     77     template <int x>
     78     struct assertTest{};
     79 }
     80 
     81 
     82 #define NCV_CT_PREP_PASTE_AUX(a,b)      a##b                         ///< Concatenation indirection macro
     83 #define NCV_CT_PREP_PASTE(a,b)          NCV_CT_PREP_PASTE_AUX(a, b)  ///< Concatenation macro
     84 
     85 
     86 /**
     87 * Performs compile-time assertion of a condition on the file scope
     88 */
     89 #define NCV_CT_ASSERT(X) \
     90     typedef NcvCTprep::assertTest<sizeof(NcvCTprep::CT_ASSERT_FAILURE< (bool)(X) >)> \
     91     NCV_CT_PREP_PASTE(__ct_assert_typedef_, __LINE__)
     92 
     93 
     94 
     95 //==============================================================================
     96 //
     97 // Alignment macros
     98 //
     99 //==============================================================================
    100 
    101 
    102 #if !defined(__align__) && !defined(__CUDACC__)
    103     #if defined(_WIN32) || defined(_WIN64)
    104         #define __align__(n)         __declspec(align(n))
    105     #elif defined(__unix__)
    106         #define __align__(n)         __attribute__((__aligned__(n)))
    107     #endif
    108 #endif
    109 
    110 
    111 //==============================================================================
    112 //
    113 // Integral and compound types of guaranteed size
    114 //
    115 //==============================================================================
    116 
    117 
    118 typedef               bool NcvBool;
    119 typedef          long long Ncv64s;
    120 
    121 #if defined(__APPLE__) && !defined(__CUDACC__)
    122     typedef uint64_t Ncv64u;
    123 #else
    124     typedef unsigned long long Ncv64u;
    125 #endif
    126 
    127 typedef                int Ncv32s;
    128 typedef       unsigned int Ncv32u;
    129 typedef              short Ncv16s;
    130 typedef     unsigned short Ncv16u;
    131 typedef        signed char Ncv8s;
    132 typedef      unsigned char Ncv8u;
    133 typedef              float Ncv32f;
    134 typedef             double Ncv64f;
    135 
    136 
    137 struct NcvRect8u
    138 {
    139     Ncv8u x;
    140     Ncv8u y;
    141     Ncv8u width;
    142     Ncv8u height;
    143     __host__ __device__ NcvRect8u() : x(0), y(0), width(0), height(0) {};
    144     __host__ __device__ NcvRect8u(Ncv8u x_, Ncv8u y_, Ncv8u width_, Ncv8u height_) : x(x_), y(y_), width(width_), height(height_) {}
    145 };
    146 
    147 
    148 struct NcvRect32s
    149 {
    150     Ncv32s x;          ///< x-coordinate of upper left corner.
    151     Ncv32s y;          ///< y-coordinate of upper left corner.
    152     Ncv32s width;      ///< Rectangle width.
    153     Ncv32s height;     ///< Rectangle height.
    154     __host__ __device__ NcvRect32s() : x(0), y(0), width(0), height(0) {};
    155     __host__ __device__ NcvRect32s(Ncv32s x_, Ncv32s y_, Ncv32s width_, Ncv32s height_)
    156         : x(x_), y(y_), width(width_), height(height_) {}
    157 };
    158 
    159 
    160 struct NcvRect32u
    161 {
    162     Ncv32u x;          ///< x-coordinate of upper left corner.
    163     Ncv32u y;          ///< y-coordinate of upper left corner.
    164     Ncv32u width;      ///< Rectangle width.
    165     Ncv32u height;     ///< Rectangle height.
    166     __host__ __device__ NcvRect32u() : x(0), y(0), width(0), height(0) {};
    167     __host__ __device__ NcvRect32u(Ncv32u x_, Ncv32u y_, Ncv32u width_, Ncv32u height_)
    168         : x(x_), y(y_), width(width_), height(height_) {}
    169 };
    170 
    171 
    172 struct NcvSize32s
    173 {
    174     Ncv32s width;  ///< Rectangle width.
    175     Ncv32s height; ///< Rectangle height.
    176     __host__ __device__ NcvSize32s() : width(0), height(0) {};
    177     __host__ __device__ NcvSize32s(Ncv32s width_, Ncv32s height_) : width(width_), height(height_) {}
    178 };
    179 
    180 
    181 struct NcvSize32u
    182 {
    183     Ncv32u width;  ///< Rectangle width.
    184     Ncv32u height; ///< Rectangle height.
    185     __host__ __device__ NcvSize32u() : width(0), height(0) {};
    186     __host__ __device__ NcvSize32u(Ncv32u width_, Ncv32u height_) : width(width_), height(height_) {}
    187     __host__ __device__ bool operator == (const NcvSize32u &another) const {return this->width == another.width && this->height == another.height;}
    188 };
    189 
    190 
    191 struct NcvPoint2D32s
    192 {
    193     Ncv32s x; ///< Point X.
    194     Ncv32s y; ///< Point Y.
    195     __host__ __device__ NcvPoint2D32s() : x(0), y(0) {};
    196     __host__ __device__ NcvPoint2D32s(Ncv32s x_, Ncv32s y_) : x(x_), y(y_) {}
    197 };
    198 
    199 
    200 struct NcvPoint2D32u
    201 {
    202     Ncv32u x; ///< Point X.
    203     Ncv32u y; ///< Point Y.
    204     __host__ __device__ NcvPoint2D32u() : x(0), y(0) {};
    205     __host__ __device__ NcvPoint2D32u(Ncv32u x_, Ncv32u y_) : x(x_), y(y_) {}
    206 };
    207 
    208 //! @cond IGNORED
    209 
    210 NCV_CT_ASSERT(sizeof(NcvBool) <= 4);
    211 NCV_CT_ASSERT(sizeof(Ncv64s) == 8);
    212 NCV_CT_ASSERT(sizeof(Ncv64u) == 8);
    213 NCV_CT_ASSERT(sizeof(Ncv32s) == 4);
    214 NCV_CT_ASSERT(sizeof(Ncv32u) == 4);
    215 NCV_CT_ASSERT(sizeof(Ncv16s) == 2);
    216 NCV_CT_ASSERT(sizeof(Ncv16u) == 2);
    217 NCV_CT_ASSERT(sizeof(Ncv8s) == 1);
    218 NCV_CT_ASSERT(sizeof(Ncv8u) == 1);
    219 NCV_CT_ASSERT(sizeof(Ncv32f) == 4);
    220 NCV_CT_ASSERT(sizeof(Ncv64f) == 8);
    221 NCV_CT_ASSERT(sizeof(NcvRect8u) == sizeof(Ncv32u));
    222 NCV_CT_ASSERT(sizeof(NcvRect32s) == 4 * sizeof(Ncv32s));
    223 NCV_CT_ASSERT(sizeof(NcvRect32u) == 4 * sizeof(Ncv32u));
    224 NCV_CT_ASSERT(sizeof(NcvSize32u) == 2 * sizeof(Ncv32u));
    225 NCV_CT_ASSERT(sizeof(NcvPoint2D32u) == 2 * sizeof(Ncv32u));
    226 
    227 //! @endcond
    228 
    229 //==============================================================================
    230 //
    231 // Persistent constants
    232 //
    233 //==============================================================================
    234 
    235 
    236 const Ncv32u K_WARP_SIZE = 32;
    237 const Ncv32u K_LOG2_WARP_SIZE = 5;
    238 
    239 
    240 //==============================================================================
    241 //
    242 // Error handling
    243 //
    244 //==============================================================================
    245 
    246 
    247 CV_EXPORTS void ncvDebugOutput(const cv::String &msg);
    248 
    249 
    250 typedef void NCVDebugOutputHandler(const cv::String &msg);
    251 
    252 
    253 CV_EXPORTS void ncvSetDebugOutputHandler(NCVDebugOutputHandler* func);
    254 
    255 
    256 #define ncvAssertPrintCheck(pred, msg) \
    257     do \
    258     { \
    259         if (!(pred)) \
    260         { \
    261             cv::String str = cv::format("NCV Assertion Failed: %s, file=%s, line=%d", msg, __FILE__, __LINE__); \
    262             ncvDebugOutput(str); \
    263         } \
    264     } while (0)
    265 
    266 
    267 #define ncvAssertPrintReturn(pred, msg, err) \
    268     do \
    269     { \
    270         ncvAssertPrintCheck(pred, msg); \
    271         if (!(pred)) return err; \
    272     } while (0)
    273 
    274 
    275 #define ncvAssertReturn(pred, err) \
    276     do \
    277     { \
    278         cv::String msg = cv::format("retcode=%d", (int)err); \
    279         ncvAssertPrintReturn(pred, msg.c_str(), err); \
    280     } while (0)
    281 
    282 
    283 #define ncvAssertReturnNcvStat(ncvOp) \
    284     do \
    285     { \
    286         NCVStatus _ncvStat = ncvOp; \
    287         cv::String msg = cv::format("NcvStat=%d", (int)_ncvStat); \
    288         ncvAssertPrintReturn(NCV_SUCCESS==_ncvStat, msg.c_str(), _ncvStat); \
    289     } while (0)
    290 
    291 
    292 #define ncvAssertCUDAReturn(cudacall, errCode) \
    293     do \
    294     { \
    295         cudaError_t res = cudacall; \
    296         cv::String msg = cv::format("cudaError_t=%d", (int)res); \
    297         ncvAssertPrintReturn(cudaSuccess==res, msg.c_str(), errCode); \
    298     } while (0)
    299 
    300 
    301 #define ncvAssertCUDALastErrorReturn(errCode) \
    302     do \
    303     { \
    304         cudaError_t res = cudaGetLastError(); \
    305         cv::String msg = cv::format("cudaError_t=%d", (int)res); \
    306         ncvAssertPrintReturn(cudaSuccess==res, msg.c_str(), errCode); \
    307     } while (0)
    308 
    309 
    310 /**
    311 * Return-codes for status notification, errors and warnings
    312 */
    313 enum
    314 {
    315     //NCV statuses
    316     NCV_SUCCESS,
    317     NCV_UNKNOWN_ERROR,
    318 
    319     NCV_CUDA_ERROR,
    320     NCV_NPP_ERROR,
    321     NCV_FILE_ERROR,
    322 
    323     NCV_NULL_PTR,
    324     NCV_INCONSISTENT_INPUT,
    325     NCV_TEXTURE_BIND_ERROR,
    326     NCV_DIMENSIONS_INVALID,
    327 
    328     NCV_INVALID_ROI,
    329     NCV_INVALID_STEP,
    330     NCV_INVALID_SCALE,
    331 
    332     NCV_ALLOCATOR_NOT_INITIALIZED,
    333     NCV_ALLOCATOR_BAD_ALLOC,
    334     NCV_ALLOCATOR_BAD_DEALLOC,
    335     NCV_ALLOCATOR_INSUFFICIENT_CAPACITY,
    336     NCV_ALLOCATOR_DEALLOC_ORDER,
    337     NCV_ALLOCATOR_BAD_REUSE,
    338 
    339     NCV_MEM_COPY_ERROR,
    340     NCV_MEM_RESIDENCE_ERROR,
    341     NCV_MEM_INSUFFICIENT_CAPACITY,
    342 
    343     NCV_HAAR_INVALID_PIXEL_STEP,
    344     NCV_HAAR_TOO_MANY_FEATURES_IN_CLASSIFIER,
    345     NCV_HAAR_TOO_MANY_FEATURES_IN_CASCADE,
    346     NCV_HAAR_TOO_LARGE_FEATURES,
    347     NCV_HAAR_XML_LOADING_EXCEPTION,
    348 
    349     NCV_NOIMPL_HAAR_TILTED_FEATURES,
    350     NCV_NOT_IMPLEMENTED,
    351 
    352     NCV_WARNING_HAAR_DETECTIONS_VECTOR_OVERFLOW,
    353 
    354     //NPP statuses
    355     NPPST_SUCCESS = NCV_SUCCESS,              ///< Successful operation (same as NPP_NO_ERROR)
    356     NPPST_ERROR,                              ///< Unknown error
    357     NPPST_CUDA_KERNEL_EXECUTION_ERROR,        ///< CUDA kernel execution error
    358     NPPST_NULL_POINTER_ERROR,                 ///< NULL pointer argument error
    359     NPPST_TEXTURE_BIND_ERROR,                 ///< CUDA texture binding error or non-zero offset returned
    360     NPPST_MEMCPY_ERROR,                       ///< CUDA memory copy error
    361     NPPST_MEM_ALLOC_ERR,                      ///< CUDA memory allocation error
    362     NPPST_MEMFREE_ERR,                        ///< CUDA memory deallocation error
    363 
    364     //NPPST statuses
    365     NPPST_INVALID_ROI,                        ///< Invalid region of interest argument
    366     NPPST_INVALID_STEP,                       ///< Invalid image lines step argument (check sign, alignment, relation to image width)
    367     NPPST_INVALID_SCALE,                      ///< Invalid scale parameter passed
    368     NPPST_MEM_INSUFFICIENT_BUFFER,            ///< Insufficient user-allocated buffer
    369     NPPST_MEM_RESIDENCE_ERROR,                ///< Memory residence error detected (check if pointers should be device or pinned)
    370     NPPST_MEM_INTERNAL_ERROR,                 ///< Internal memory management error
    371 
    372     NCV_LAST_STATUS                           ///< Marker to continue error numeration in other files
    373 };
    374 
    375 
    376 typedef Ncv32u NCVStatus;
    377 
    378 
    379 #define NCV_SET_SKIP_COND(x) \
    380     bool __ncv_skip_cond = x
    381 
    382 
    383 #define NCV_RESET_SKIP_COND(x) \
    384     __ncv_skip_cond = x
    385 
    386 
    387 #define NCV_SKIP_COND_BEGIN \
    388     if (!__ncv_skip_cond) {
    389 
    390 
    391 #define NCV_SKIP_COND_END \
    392     }
    393 
    394 
    395 //==============================================================================
    396 //
    397 // Timer
    398 //
    399 //==============================================================================
    400 
    401 
    402 typedef struct _NcvTimer *NcvTimer;
    403 
    404 CV_EXPORTS NcvTimer ncvStartTimer(void);
    405 
    406 CV_EXPORTS double ncvEndQueryTimerUs(NcvTimer t);
    407 
    408 CV_EXPORTS double ncvEndQueryTimerMs(NcvTimer t);
    409 
    410 
    411 //==============================================================================
    412 //
    413 // Memory management classes template compound types
    414 //
    415 //==============================================================================
    416 
    417 
    418 /**
    419 * Calculates the aligned top bound value
    420 */
    421 CV_EXPORTS Ncv32u alignUp(Ncv32u what, Ncv32u alignment);
    422 
    423 
    424 /**
    425 * NCVMemoryType
    426 */
    427 enum NCVMemoryType
    428 {
    429     NCVMemoryTypeNone,
    430     NCVMemoryTypeHostPageable,
    431     NCVMemoryTypeHostPinned,
    432     NCVMemoryTypeDevice
    433 };
    434 
    435 
    436 /**
    437 * NCVMemPtr
    438 */
    439 struct CV_EXPORTS NCVMemPtr
    440 {
    441     void *ptr;
    442     NCVMemoryType memtype;
    443     void clear();
    444 };
    445 
    446 
    447 /**
    448 * NCVMemSegment
    449 */
    450 struct CV_EXPORTS NCVMemSegment
    451 {
    452     NCVMemPtr begin;
    453     size_t size;
    454     void clear();
    455 };
    456 
    457 
    458 /**
    459 * INCVMemAllocator (Interface)
    460 */
    461 class CV_EXPORTS INCVMemAllocator
    462 {
    463 public:
    464     virtual ~INCVMemAllocator() = 0;
    465 
    466     virtual NCVStatus alloc(NCVMemSegment &seg, size_t size) = 0;
    467     virtual NCVStatus dealloc(NCVMemSegment &seg) = 0;
    468 
    469     virtual NcvBool isInitialized(void) const = 0;
    470     virtual NcvBool isCounting(void) const = 0;
    471 
    472     virtual NCVMemoryType memType(void) const = 0;
    473     virtual Ncv32u alignment(void) const = 0;
    474     virtual size_t maxSize(void) const = 0;
    475 };
    476 
    477 inline INCVMemAllocator::~INCVMemAllocator() {}
    478 
    479 
    480 /**
    481 * NCVMemStackAllocator
    482 */
    483 class CV_EXPORTS NCVMemStackAllocator : public INCVMemAllocator
    484 {
    485     NCVMemStackAllocator();
    486     NCVMemStackAllocator(const NCVMemStackAllocator &);
    487 
    488 public:
    489 
    490     explicit NCVMemStackAllocator(Ncv32u alignment);
    491     NCVMemStackAllocator(NCVMemoryType memT, size_t capacity, Ncv32u alignment, void *reusePtr=NULL);
    492     virtual ~NCVMemStackAllocator();
    493 
    494     virtual NCVStatus alloc(NCVMemSegment &seg, size_t size);
    495     virtual NCVStatus dealloc(NCVMemSegment &seg);
    496 
    497     virtual NcvBool isInitialized(void) const;
    498     virtual NcvBool isCounting(void) const;
    499 
    500     virtual NCVMemoryType memType(void) const;
    501     virtual Ncv32u alignment(void) const;
    502     virtual size_t maxSize(void) const;
    503 
    504 private:
    505 
    506     NCVMemoryType _memType;
    507     Ncv32u _alignment;
    508     Ncv8u *allocBegin;
    509     Ncv8u *begin;
    510     Ncv8u *end;
    511     size_t currentSize;
    512     size_t _maxSize;
    513     NcvBool bReusesMemory;
    514 };
    515 
    516 
    517 /**
    518 * NCVMemNativeAllocator
    519 */
    520 class CV_EXPORTS NCVMemNativeAllocator : public INCVMemAllocator
    521 {
    522 public:
    523 
    524     NCVMemNativeAllocator(NCVMemoryType memT, Ncv32u alignment);
    525     virtual ~NCVMemNativeAllocator();
    526 
    527     virtual NCVStatus alloc(NCVMemSegment &seg, size_t size);
    528     virtual NCVStatus dealloc(NCVMemSegment &seg);
    529 
    530     virtual NcvBool isInitialized(void) const;
    531     virtual NcvBool isCounting(void) const;
    532 
    533     virtual NCVMemoryType memType(void) const;
    534     virtual Ncv32u alignment(void) const;
    535     virtual size_t maxSize(void) const;
    536 
    537 private:
    538 
    539     NCVMemNativeAllocator();
    540     NCVMemNativeAllocator(const NCVMemNativeAllocator &);
    541 
    542     NCVMemoryType _memType;
    543     Ncv32u _alignment;
    544     size_t currentSize;
    545     size_t _maxSize;
    546 };
    547 
    548 
    549 /**
    550 * Copy dispatchers
    551 */
    552 CV_EXPORTS NCVStatus memSegCopyHelper(void *dst, NCVMemoryType dstType,
    553                                        const void *src, NCVMemoryType srcType,
    554                                        size_t sz, cudaStream_t cuStream);
    555 
    556 
    557 CV_EXPORTS NCVStatus memSegCopyHelper2D(void *dst, Ncv32u dstPitch, NCVMemoryType dstType,
    558                                          const void *src, Ncv32u srcPitch, NCVMemoryType srcType,
    559                                          Ncv32u widthbytes, Ncv32u height, cudaStream_t cuStream);
    560 
    561 
    562 /**
    563 * NCVVector (1D)
    564 */
    565 template <class T>
    566 class NCVVector
    567 {
    568     NCVVector(const NCVVector &);
    569 
    570 public:
    571 
    572     NCVVector()
    573     {
    574         clear();
    575     }
    576 
    577     virtual ~NCVVector() {}
    578 
    579     void clear()
    580     {
    581         _ptr = NULL;
    582         _length = 0;
    583         _memtype = NCVMemoryTypeNone;
    584     }
    585 
    586     NCVStatus copySolid(NCVVector<T> &dst, cudaStream_t cuStream, size_t howMuch=0) const
    587     {
    588         if (howMuch == 0)
    589         {
    590             ncvAssertReturn(dst._length == this->_length, NCV_MEM_COPY_ERROR);
    591             howMuch = this->_length * sizeof(T);
    592         }
    593         else
    594         {
    595             ncvAssertReturn(dst._length * sizeof(T) >= howMuch &&
    596                 this->_length * sizeof(T) >= howMuch &&
    597                 howMuch > 0, NCV_MEM_COPY_ERROR);
    598         }
    599         ncvAssertReturn((this->_ptr != NULL || this->_memtype == NCVMemoryTypeNone) &&
    600                         (dst._ptr != NULL || dst._memtype == NCVMemoryTypeNone), NCV_NULL_PTR);
    601 
    602         NCVStatus ncvStat = NCV_SUCCESS;
    603         if (this->_memtype != NCVMemoryTypeNone)
    604         {
    605             ncvStat = memSegCopyHelper(dst._ptr, dst._memtype,
    606                                        this->_ptr, this->_memtype,
    607                                        howMuch, cuStream);
    608         }
    609 
    610         return ncvStat;
    611     }
    612 
    613     T *ptr() const {return this->_ptr;}
    614     size_t length() const {return this->_length;}
    615     NCVMemoryType memType() const {return this->_memtype;}
    616 
    617 protected:
    618 
    619     T *_ptr;
    620     size_t _length;
    621     NCVMemoryType _memtype;
    622 };
    623 
    624 
    625 /**
    626 * NCVVectorAlloc
    627 */
    628 template <class T>
    629 class NCVVectorAlloc : public NCVVector<T>
    630 {
    631     NCVVectorAlloc();
    632     NCVVectorAlloc(const NCVVectorAlloc &);
    633     NCVVectorAlloc& operator=(const NCVVectorAlloc<T>&);
    634 
    635 public:
    636 
    637     NCVVectorAlloc(INCVMemAllocator &allocator_, Ncv32u length_)
    638         :
    639         allocator(allocator_)
    640     {
    641         NCVStatus ncvStat;
    642 
    643         this->clear();
    644         this->allocatedMem.clear();
    645 
    646         ncvStat = allocator.alloc(this->allocatedMem, length_ * sizeof(T));
    647         ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "NCVVectorAlloc ctor:: alloc failed", );
    648 
    649         this->_ptr = (T *)this->allocatedMem.begin.ptr;
    650         this->_length = length_;
    651         this->_memtype = this->allocatedMem.begin.memtype;
    652     }
    653 
    654     ~NCVVectorAlloc()
    655     {
    656         NCVStatus ncvStat;
    657 
    658         ncvStat = allocator.dealloc(this->allocatedMem);
    659         ncvAssertPrintCheck(ncvStat == NCV_SUCCESS, "NCVVectorAlloc dtor:: dealloc failed");
    660 
    661         this->clear();
    662     }
    663 
    664     NcvBool isMemAllocated() const
    665     {
    666         return (this->allocatedMem.begin.ptr != NULL) || (this->allocator.isCounting());
    667     }
    668 
    669     Ncv32u getAllocatorsAlignment() const
    670     {
    671         return allocator.alignment();
    672     }
    673 
    674     NCVMemSegment getSegment() const
    675     {
    676         return allocatedMem;
    677     }
    678 
    679 private:
    680     INCVMemAllocator &allocator;
    681     NCVMemSegment allocatedMem;
    682 };
    683 
    684 
    685 /**
    686 * NCVVectorReuse
    687 */
    688 template <class T>
    689 class NCVVectorReuse : public NCVVector<T>
    690 {
    691     NCVVectorReuse();
    692     NCVVectorReuse(const NCVVectorReuse &);
    693 
    694 public:
    695 
    696     explicit NCVVectorReuse(const NCVMemSegment &memSegment)
    697     {
    698         this->bReused = false;
    699         this->clear();
    700 
    701         this->_length = memSegment.size / sizeof(T);
    702         this->_ptr = (T *)memSegment.begin.ptr;
    703         this->_memtype = memSegment.begin.memtype;
    704 
    705         this->bReused = true;
    706     }
    707 
    708     NCVVectorReuse(const NCVMemSegment &memSegment, Ncv32u length_)
    709     {
    710         this->bReused = false;
    711         this->clear();
    712 
    713         ncvAssertPrintReturn(length_ * sizeof(T) <= memSegment.size, \
    714             "NCVVectorReuse ctor:: memory binding failed due to size mismatch", );
    715 
    716         this->_length = length_;
    717         this->_ptr = (T *)memSegment.begin.ptr;
    718         this->_memtype = memSegment.begin.memtype;
    719 
    720         this->bReused = true;
    721     }
    722 
    723     NcvBool isMemReused() const
    724     {
    725         return this->bReused;
    726     }
    727 
    728 private:
    729 
    730     NcvBool bReused;
    731 };
    732 
    733 
    734 /**
    735 * NCVMatrix (2D)
    736 */
    737 template <class T>
    738 class NCVMatrix
    739 {
    740     NCVMatrix(const NCVMatrix &);
    741 
    742 public:
    743 
    744     NCVMatrix()
    745     {
    746         clear();
    747     }
    748 
    749     virtual ~NCVMatrix() {}
    750 
    751     void clear()
    752     {
    753         _ptr = NULL;
    754         _pitch = 0;
    755         _width = 0;
    756         _height = 0;
    757         _memtype = NCVMemoryTypeNone;
    758     }
    759 
    760     Ncv32u stride() const
    761     {
    762         return _pitch / sizeof(T);
    763     }
    764 
    765     //a side effect of this function is that it copies everything in a single chunk, so the "padding" will be overwritten
    766     NCVStatus copySolid(NCVMatrix<T> &dst, cudaStream_t cuStream, size_t howMuch=0) const
    767     {
    768         if (howMuch == 0)
    769         {
    770             ncvAssertReturn(dst._pitch == this->_pitch &&
    771                             dst._height == this->_height, NCV_MEM_COPY_ERROR);
    772             howMuch = this->_pitch * this->_height;
    773         }
    774         else
    775         {
    776             ncvAssertReturn(dst._pitch * dst._height >= howMuch &&
    777                             this->_pitch * this->_height >= howMuch &&
    778                             howMuch > 0, NCV_MEM_COPY_ERROR);
    779         }
    780         ncvAssertReturn((this->_ptr != NULL || this->_memtype == NCVMemoryTypeNone) &&
    781                         (dst._ptr != NULL || dst._memtype == NCVMemoryTypeNone), NCV_NULL_PTR);
    782 
    783         NCVStatus ncvStat = NCV_SUCCESS;
    784         if (this->_memtype != NCVMemoryTypeNone)
    785         {
    786             ncvStat = memSegCopyHelper(dst._ptr, dst._memtype,
    787                                        this->_ptr, this->_memtype,
    788                                        howMuch, cuStream);
    789         }
    790 
    791         return ncvStat;
    792     }
    793 
    794     NCVStatus copy2D(NCVMatrix<T> &dst, NcvSize32u roi, cudaStream_t cuStream) const
    795     {
    796         ncvAssertReturn(this->width() >= roi.width && this->height() >= roi.height &&
    797                         dst.width() >= roi.width && dst.height() >= roi.height, NCV_MEM_COPY_ERROR);
    798         ncvAssertReturn((this->_ptr != NULL || this->_memtype == NCVMemoryTypeNone) &&
    799                         (dst._ptr != NULL || dst._memtype == NCVMemoryTypeNone), NCV_NULL_PTR);
    800 
    801         NCVStatus ncvStat = NCV_SUCCESS;
    802         if (this->_memtype != NCVMemoryTypeNone)
    803         {
    804             ncvStat = memSegCopyHelper2D(dst._ptr, dst._pitch, dst._memtype,
    805                                          this->_ptr, this->_pitch, this->_memtype,
    806                                          roi.width * sizeof(T), roi.height, cuStream);
    807         }
    808 
    809         return ncvStat;
    810     }
    811 
    812     T& at(Ncv32u x, Ncv32u y) const
    813     {
    814         NcvBool bOutRange = (x >= this->_width || y >= this->_height);
    815         ncvAssertPrintCheck(!bOutRange, "Error addressing matrix");
    816         if (bOutRange)
    817         {
    818             return *this->_ptr;
    819         }
    820         return ((T *)((Ncv8u *)this->_ptr + y * this->_pitch))[x];
    821     }
    822 
    823     T *ptr() const {return this->_ptr;}
    824     Ncv32u width() const {return this->_width;}
    825     Ncv32u height() const {return this->_height;}
    826     NcvSize32u size() const {return NcvSize32u(this->_width, this->_height);}
    827     Ncv32u pitch() const {return this->_pitch;}
    828     NCVMemoryType memType() const {return this->_memtype;}
    829 
    830 protected:
    831 
    832     T *_ptr;
    833     Ncv32u _width;
    834     Ncv32u _height;
    835     Ncv32u _pitch;
    836     NCVMemoryType _memtype;
    837 };
    838 
    839 
    840 /**
    841 * NCVMatrixAlloc
    842 */
    843 template <class T>
    844 class NCVMatrixAlloc : public NCVMatrix<T>
    845 {
    846     NCVMatrixAlloc();
    847     NCVMatrixAlloc(const NCVMatrixAlloc &);
    848     NCVMatrixAlloc& operator=(const NCVMatrixAlloc &);
    849 public:
    850 
    851     NCVMatrixAlloc(INCVMemAllocator &allocator_, Ncv32u width_, Ncv32u height_, Ncv32u pitch_=0)
    852         :
    853         allocator(allocator_)
    854     {
    855         NCVStatus ncvStat;
    856 
    857         this->clear();
    858         this->allocatedMem.clear();
    859 
    860         Ncv32u widthBytes = width_ * sizeof(T);
    861         Ncv32u pitchBytes = alignUp(widthBytes, allocator.alignment());
    862 
    863         if (pitch_ != 0)
    864         {
    865             ncvAssertPrintReturn(pitch_ >= pitchBytes &&
    866                 (pitch_ & (allocator.alignment() - 1)) == 0,
    867                 "NCVMatrixAlloc ctor:: incorrect pitch passed", );
    868             pitchBytes = pitch_;
    869         }
    870 
    871         Ncv32u requiredAllocSize = pitchBytes * height_;
    872 
    873         ncvStat = allocator.alloc(this->allocatedMem, requiredAllocSize);
    874         ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "NCVMatrixAlloc ctor:: alloc failed", );
    875 
    876         this->_ptr = (T *)this->allocatedMem.begin.ptr;
    877         this->_width = width_;
    878         this->_height = height_;
    879         this->_pitch = pitchBytes;
    880         this->_memtype = this->allocatedMem.begin.memtype;
    881     }
    882 
    883     ~NCVMatrixAlloc()
    884     {
    885         NCVStatus ncvStat;
    886 
    887         ncvStat = allocator.dealloc(this->allocatedMem);
    888         ncvAssertPrintCheck(ncvStat == NCV_SUCCESS, "NCVMatrixAlloc dtor:: dealloc failed");
    889 
    890         this->clear();
    891     }
    892 
    893     NcvBool isMemAllocated() const
    894     {
    895         return (this->allocatedMem.begin.ptr != NULL) || (this->allocator.isCounting());
    896     }
    897 
    898     Ncv32u getAllocatorsAlignment() const
    899     {
    900         return allocator.alignment();
    901     }
    902 
    903     NCVMemSegment getSegment() const
    904     {
    905         return allocatedMem;
    906     }
    907 
    908 private:
    909 
    910     INCVMemAllocator &allocator;
    911     NCVMemSegment allocatedMem;
    912 };
    913 
    914 
    915 /**
    916 * NCVMatrixReuse
    917 */
    918 template <class T>
    919 class NCVMatrixReuse : public NCVMatrix<T>
    920 {
    921     NCVMatrixReuse();
    922     NCVMatrixReuse(const NCVMatrixReuse &);
    923 
    924 public:
    925 
    926     NCVMatrixReuse(const NCVMemSegment &memSegment, Ncv32u alignment, Ncv32u width_, Ncv32u height_, Ncv32u pitch_=0, NcvBool bSkipPitchCheck=false)
    927     {
    928         this->bReused = false;
    929         this->clear();
    930 
    931         Ncv32u widthBytes = width_ * sizeof(T);
    932         Ncv32u pitchBytes = alignUp(widthBytes, alignment);
    933 
    934         if (pitch_ != 0)
    935         {
    936             if (!bSkipPitchCheck)
    937             {
    938                 ncvAssertPrintReturn(pitch_ >= pitchBytes &&
    939                     (pitch_ & (alignment - 1)) == 0,
    940                     "NCVMatrixReuse ctor:: incorrect pitch passed", );
    941             }
    942             else
    943             {
    944                 ncvAssertPrintReturn(pitch_ >= widthBytes, "NCVMatrixReuse ctor:: incorrect pitch passed", );
    945             }
    946             pitchBytes = pitch_;
    947         }
    948 
    949         ncvAssertPrintReturn(pitchBytes * height_ <= memSegment.size, \
    950             "NCVMatrixReuse ctor:: memory binding failed due to size mismatch", );
    951 
    952         this->_width = width_;
    953         this->_height = height_;
    954         this->_pitch = pitchBytes;
    955         this->_ptr = (T *)memSegment.begin.ptr;
    956         this->_memtype = memSegment.begin.memtype;
    957 
    958         this->bReused = true;
    959     }
    960 
    961     NCVMatrixReuse(const NCVMatrix<T> &mat, NcvRect32u roi)
    962     {
    963         this->bReused = false;
    964         this->clear();
    965 
    966         ncvAssertPrintReturn(roi.x < mat.width() && roi.y < mat.height() && \
    967             roi.x + roi.width <= mat.width() && roi.y + roi.height <= mat.height(),
    968             "NCVMatrixReuse ctor:: memory binding failed due to mismatching ROI and source matrix dims", );
    969 
    970         this->_width = roi.width;
    971         this->_height = roi.height;
    972         this->_pitch = mat.pitch();
    973         this->_ptr = &mat.at(roi.x, roi.y);
    974         this->_memtype = mat.memType();
    975 
    976         this->bReused = true;
    977     }
    978 
    979     NcvBool isMemReused() const
    980     {
    981         return this->bReused;
    982     }
    983 
    984 private:
    985 
    986     NcvBool bReused;
    987 };
    988 
    989 
    990 /**
    991 * Operations with rectangles
    992 */
    993 CV_EXPORTS NCVStatus ncvGroupRectangles_host(NCVVector<NcvRect32u> &hypotheses, Ncv32u &numHypotheses,
    994                                               Ncv32u minNeighbors, Ncv32f intersectEps, NCVVector<Ncv32u> *hypothesesWeights);
    995 
    996 
    997 CV_EXPORTS NCVStatus ncvDrawRects_8u_host(Ncv8u *h_dst, Ncv32u dstStride, Ncv32u dstWidth, Ncv32u dstHeight,
    998                                            NcvRect32u *h_rects, Ncv32u numRects, Ncv8u color);
    999 
   1000 
   1001 CV_EXPORTS NCVStatus ncvDrawRects_32u_host(Ncv32u *h_dst, Ncv32u dstStride, Ncv32u dstWidth, Ncv32u dstHeight,
   1002                                             NcvRect32u *h_rects, Ncv32u numRects, Ncv32u color);
   1003 
   1004 
   1005 CV_EXPORTS NCVStatus ncvDrawRects_8u_device(Ncv8u *d_dst, Ncv32u dstStride, Ncv32u dstWidth, Ncv32u dstHeight,
   1006                                              NcvRect32u *d_rects, Ncv32u numRects, Ncv8u color, cudaStream_t cuStream);
   1007 
   1008 
   1009 CV_EXPORTS NCVStatus ncvDrawRects_32u_device(Ncv32u *d_dst, Ncv32u dstStride, Ncv32u dstWidth, Ncv32u dstHeight,
   1010                                               NcvRect32u *d_rects, Ncv32u numRects, Ncv32u color, cudaStream_t cuStream);
   1011 
   1012 
   1013 #define CLAMP(x,a,b)        ( (x) > (b) ? (b) : ( (x) < (a) ? (a) : (x) ) )
   1014 #define CLAMP_TOP(x, a)     (((x) > (a)) ? (a) : (x))
   1015 #define CLAMP_BOTTOM(x, a)  (((x) < (a)) ? (a) : (x))
   1016 #define CLAMP_0_255(x)      CLAMP(x,0,255)
   1017 
   1018 
   1019 #define SUB_BEGIN(type, name)    struct { __inline type name
   1020 #define SUB_END(name)            } name;
   1021 #define SUB_CALL(name)           name.name
   1022 
   1023 #define SQR(x)              ((x)*(x))
   1024 
   1025 
   1026 #define ncvSafeMatAlloc(name, type, alloc, width, height, err) \
   1027     NCVMatrixAlloc<type> name(alloc, width, height); \
   1028     ncvAssertReturn(name.isMemAllocated(), err);
   1029 
   1030 //! @}
   1031 
   1032 #endif // _ncv_hpp_
   1033