Home | History | Annotate | Download | only in cuda
      1 /*M///////////////////////////////////////////////////////////////////////////////////////
      2 //
      3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
      4 //
      5 //  By downloading, copying, installing or using the software you agree to this license.
      6 //  If you do not agree to this license, do not download, install,
      7 //  copy or use the software.
      8 //
      9 //
     10 //                           License Agreement
     11 //                For Open Source Computer Vision Library
     12 //
     13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
     14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
     15 // Third party copyrights are property of their respective owners.
     16 //
     17 // Redistribution and use in source and binary forms, with or without modification,
     18 // are permitted provided that the following conditions are met:
     19 //
     20 //   * Redistribution's of source code must retain the above copyright notice,
     21 //     this list of conditions and the following disclaimer.
     22 //
     23 //   * Redistribution's in binary form must reproduce the above copyright notice,
     24 //     this list of conditions and the following disclaimer in the documentation
     25 //     and/or other materials provided with the distribution.
     26 //
     27 //   * The name of the copyright holders may not be used to endorse or promote products
     28 //     derived from this software without specific prior written permission.
     29 //
     30 // This software is provided by the copyright holders and contributors "as is" and
     31 // any express or implied warranties, including, but not limited to, the implied
     32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
     33 // In no event shall the Intel Corporation or contributors be liable for any direct,
     34 // indirect, incidental, special, exemplary, or consequential damages
     35 // (including, but not limited to, procurement of substitute goods or services;
     36 // loss of use, data, or profits; or business interruption) however caused
     37 // and on any theory of liability, whether in contract, strict liability,
     38 // or tort (including negligence or otherwise) arising in any way out of
     39 // the use of this software, even if advised of the possibility of such damage.
     40 //
     41 //M*/
     42 
     43 #include "opencv2/opencv_modules.hpp"
     44 
     45 #ifndef HAVE_OPENCV_CUDEV
     46 
     47 #error "opencv_cudev is required"
     48 
     49 #else
     50 
     51 #include "opencv2/cudev.hpp"
     52 
     53 using namespace cv::cudev;
     54 
     55 void cmpMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat&, double, Stream& stream, int cmpop);
     56 
     57 namespace
     58 {
     59     template <class Op, typename T> struct CmpOp : binary_function<T, T, uchar>
     60     {
     61         __device__ __forceinline__ uchar operator()(T a, T b) const
     62         {
     63             Op op;
     64             return -op(a, b);
     65         }
     66     };
     67 
     68     template <typename ScalarDepth> struct TransformPolicy : DefaultTransformPolicy
     69     {
     70     };
     71     template <> struct TransformPolicy<double> : DefaultTransformPolicy
     72     {
     73         enum {
     74             shift = 1
     75         };
     76     };
     77 
     78     template <template <typename> class Op, typename T>
     79     void cmpMat_v1(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream)
     80     {
     81         CmpOp<Op<T>, T> op;
     82         gridTransformBinary_< TransformPolicy<T> >(globPtr<T>(src1), globPtr<T>(src2), globPtr<uchar>(dst), op, stream);
     83     }
     84 
     85     struct VCmpEq4 : binary_function<uint, uint, uint>
     86     {
     87         __device__ __forceinline__ uint operator ()(uint a, uint b) const
     88         {
     89             return vcmpeq4(a, b);
     90         }
     91     };
     92     struct VCmpNe4 : binary_function<uint, uint, uint>
     93     {
     94         __device__ __forceinline__ uint operator ()(uint a, uint b) const
     95         {
     96             return vcmpne4(a, b);
     97         }
     98     };
     99     struct VCmpLt4 : binary_function<uint, uint, uint>
    100     {
    101         __device__ __forceinline__ uint operator ()(uint a, uint b) const
    102         {
    103             return vcmplt4(a, b);
    104         }
    105     };
    106     struct VCmpLe4 : binary_function<uint, uint, uint>
    107     {
    108         __device__ __forceinline__ uint operator ()(uint a, uint b) const
    109         {
    110             return vcmple4(a, b);
    111         }
    112     };
    113 
    114     void cmpMatEq_v4(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream)
    115     {
    116         const int vcols = src1.cols >> 2;
    117 
    118         GlobPtrSz<uint> src1_ = globPtr((uint*) src1.data, src1.step, src1.rows, vcols);
    119         GlobPtrSz<uint> src2_ = globPtr((uint*) src2.data, src2.step, src1.rows, vcols);
    120         GlobPtrSz<uint> dst_ = globPtr((uint*) dst.data, dst.step, src1.rows, vcols);
    121 
    122         gridTransformBinary(src1_, src2_, dst_, VCmpEq4(), stream);
    123     }
    124     void cmpMatNe_v4(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream)
    125     {
    126         const int vcols = src1.cols >> 2;
    127 
    128         GlobPtrSz<uint> src1_ = globPtr((uint*) src1.data, src1.step, src1.rows, vcols);
    129         GlobPtrSz<uint> src2_ = globPtr((uint*) src2.data, src2.step, src1.rows, vcols);
    130         GlobPtrSz<uint> dst_ = globPtr((uint*) dst.data, dst.step, src1.rows, vcols);
    131 
    132         gridTransformBinary(src1_, src2_, dst_, VCmpNe4(), stream);
    133     }
    134     void cmpMatLt_v4(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream)
    135     {
    136         const int vcols = src1.cols >> 2;
    137 
    138         GlobPtrSz<uint> src1_ = globPtr((uint*) src1.data, src1.step, src1.rows, vcols);
    139         GlobPtrSz<uint> src2_ = globPtr((uint*) src2.data, src2.step, src1.rows, vcols);
    140         GlobPtrSz<uint> dst_ = globPtr((uint*) dst.data, dst.step, src1.rows, vcols);
    141 
    142         gridTransformBinary(src1_, src2_, dst_, VCmpLt4(), stream);
    143     }
    144     void cmpMatLe_v4(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream)
    145     {
    146         const int vcols = src1.cols >> 2;
    147 
    148         GlobPtrSz<uint> src1_ = globPtr((uint*) src1.data, src1.step, src1.rows, vcols);
    149         GlobPtrSz<uint> src2_ = globPtr((uint*) src2.data, src2.step, src1.rows, vcols);
    150         GlobPtrSz<uint> dst_ = globPtr((uint*) dst.data, dst.step, src1.rows, vcols);
    151 
    152         gridTransformBinary(src1_, src2_, dst_, VCmpLe4(), stream);
    153     }
    154 }
    155 
    156 void cmpMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat&, double, Stream& stream, int cmpop)
    157 {
    158     typedef void (*func_t)(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream);
    159     static const func_t funcs[7][4] =
    160     {
    161         {cmpMat_v1<equal_to, uchar> , cmpMat_v1<not_equal_to, uchar> , cmpMat_v1<less, uchar> , cmpMat_v1<less_equal, uchar> },
    162         {cmpMat_v1<equal_to, schar> , cmpMat_v1<not_equal_to, schar> , cmpMat_v1<less, schar> , cmpMat_v1<less_equal, schar> },
    163         {cmpMat_v1<equal_to, ushort>, cmpMat_v1<not_equal_to, ushort>, cmpMat_v1<less, ushort>, cmpMat_v1<less_equal, ushort>},
    164         {cmpMat_v1<equal_to, short> , cmpMat_v1<not_equal_to, short> , cmpMat_v1<less, short> , cmpMat_v1<less_equal, short> },
    165         {cmpMat_v1<equal_to, int>   , cmpMat_v1<not_equal_to, int>   , cmpMat_v1<less, int>   , cmpMat_v1<less_equal, int>   },
    166         {cmpMat_v1<equal_to, float> , cmpMat_v1<not_equal_to, float> , cmpMat_v1<less, float> , cmpMat_v1<less_equal, float> },
    167         {cmpMat_v1<equal_to, double>, cmpMat_v1<not_equal_to, double>, cmpMat_v1<less, double>, cmpMat_v1<less_equal, double>}
    168     };
    169 
    170     typedef void (*func_v4_t)(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream);
    171     static const func_v4_t funcs_v4[] =
    172     {
    173         cmpMatEq_v4, cmpMatNe_v4, cmpMatLt_v4, cmpMatLe_v4
    174     };
    175 
    176     const int depth = src1.depth();
    177 
    178     CV_DbgAssert( depth <= CV_64F );
    179 
    180     static const int codes[] =
    181     {
    182         0, 2, 3, 2, 3, 1
    183     };
    184     const GpuMat* psrc1[] =
    185     {
    186         &src1, &src2, &src2, &src1, &src1, &src1
    187     };
    188     const GpuMat* psrc2[] =
    189     {
    190         &src2, &src1, &src1, &src2, &src2, &src2
    191     };
    192 
    193     const int code = codes[cmpop];
    194 
    195     GpuMat src1_ = psrc1[cmpop]->reshape(1);
    196     GpuMat src2_ = psrc2[cmpop]->reshape(1);
    197     GpuMat dst_ = dst.reshape(1);
    198 
    199     if (depth == CV_8U && (src1_.cols & 3) == 0)
    200     {
    201         const intptr_t src1ptr = reinterpret_cast<intptr_t>(src1_.data);
    202         const intptr_t src2ptr = reinterpret_cast<intptr_t>(src2_.data);
    203         const intptr_t dstptr = reinterpret_cast<intptr_t>(dst_.data);
    204 
    205         const bool isAllAligned = (src1ptr & 31) == 0 && (src2ptr & 31) == 0 && (dstptr & 31) == 0;
    206 
    207         if (isAllAligned)
    208         {
    209             funcs_v4[code](src1_, src2_, dst_, stream);
    210             return;
    211         }
    212     }
    213 
    214     const func_t func = funcs[depth][code];
    215 
    216     func(src1_, src2_, dst_, stream);
    217 }
    218 
    219 #endif
    220