1 /*M/////////////////////////////////////////////////////////////////////////////////////// 2 // 3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 4 // 5 // By downloading, copying, installing or using the software you agree to this license. 6 // If you do not agree to this license, do not download, install, 7 // copy or use the software. 8 // 9 // 10 // License Agreement 11 // For Open Source Computer Vision Library 12 // 13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. 14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved. 15 // Third party copyrights are property of their respective owners. 16 // 17 // Redistribution and use in source and binary forms, with or without modification, 18 // are permitted provided that the following conditions are met: 19 // 20 // * Redistribution's of source code must retain the above copyright notice, 21 // this list of conditions and the following disclaimer. 22 // 23 // * Redistribution's in binary form must reproduce the above copyright notice, 24 // this list of conditions and the following disclaimer in the documentation 25 // and/or other materials provided with the distribution. 26 // 27 // * The name of the copyright holders may not be used to endorse or promote products 28 // derived from this software without specific prior written permission. 29 // 30 // This software is provided by the copyright holders and contributors "as is" and 31 // any express or implied warranties, including, but not limited to, the implied 32 // warranties of merchantability and fitness for a particular purpose are disclaimed. 33 // In no event shall the Intel Corporation or contributors be liable for any direct, 34 // indirect, incidental, special, exemplary, or consequential damages 35 // (including, but not limited to, procurement of substitute goods or services; 36 // loss of use, data, or profits; or business interruption) however caused 37 // and on any theory of liability, whether in contract, strict liability, 38 // or tort (including negligence or otherwise) arising in any way out of 39 // the use of this software, even if advised of the possibility of such damage. 40 // 41 //M*/ 42 43 #include "opencv2/opencv_modules.hpp" 44 45 #ifndef HAVE_OPENCV_CUDEV 46 47 #error "opencv_cudev is required" 48 49 #else 50 51 #include "opencv2/cudev.hpp" 52 53 using namespace cv::cudev; 54 55 void cmpMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat&, double, Stream& stream, int cmpop); 56 57 namespace 58 { 59 template <class Op, typename T> struct CmpOp : binary_function<T, T, uchar> 60 { 61 __device__ __forceinline__ uchar operator()(T a, T b) const 62 { 63 Op op; 64 return -op(a, b); 65 } 66 }; 67 68 template <typename ScalarDepth> struct TransformPolicy : DefaultTransformPolicy 69 { 70 }; 71 template <> struct TransformPolicy<double> : DefaultTransformPolicy 72 { 73 enum { 74 shift = 1 75 }; 76 }; 77 78 template <template <typename> class Op, typename T> 79 void cmpMat_v1(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream) 80 { 81 CmpOp<Op<T>, T> op; 82 gridTransformBinary_< TransformPolicy<T> >(globPtr<T>(src1), globPtr<T>(src2), globPtr<uchar>(dst), op, stream); 83 } 84 85 struct VCmpEq4 : binary_function<uint, uint, uint> 86 { 87 __device__ __forceinline__ uint operator ()(uint a, uint b) const 88 { 89 return vcmpeq4(a, b); 90 } 91 }; 92 struct VCmpNe4 : binary_function<uint, uint, uint> 93 { 94 __device__ __forceinline__ uint operator ()(uint a, uint b) const 95 { 96 return vcmpne4(a, b); 97 } 98 }; 99 struct VCmpLt4 : binary_function<uint, uint, uint> 100 { 101 __device__ __forceinline__ uint operator ()(uint a, uint b) const 102 { 103 return vcmplt4(a, b); 104 } 105 }; 106 struct VCmpLe4 : binary_function<uint, uint, uint> 107 { 108 __device__ __forceinline__ uint operator ()(uint a, uint b) const 109 { 110 return vcmple4(a, b); 111 } 112 }; 113 114 void cmpMatEq_v4(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream) 115 { 116 const int vcols = src1.cols >> 2; 117 118 GlobPtrSz<uint> src1_ = globPtr((uint*) src1.data, src1.step, src1.rows, vcols); 119 GlobPtrSz<uint> src2_ = globPtr((uint*) src2.data, src2.step, src1.rows, vcols); 120 GlobPtrSz<uint> dst_ = globPtr((uint*) dst.data, dst.step, src1.rows, vcols); 121 122 gridTransformBinary(src1_, src2_, dst_, VCmpEq4(), stream); 123 } 124 void cmpMatNe_v4(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream) 125 { 126 const int vcols = src1.cols >> 2; 127 128 GlobPtrSz<uint> src1_ = globPtr((uint*) src1.data, src1.step, src1.rows, vcols); 129 GlobPtrSz<uint> src2_ = globPtr((uint*) src2.data, src2.step, src1.rows, vcols); 130 GlobPtrSz<uint> dst_ = globPtr((uint*) dst.data, dst.step, src1.rows, vcols); 131 132 gridTransformBinary(src1_, src2_, dst_, VCmpNe4(), stream); 133 } 134 void cmpMatLt_v4(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream) 135 { 136 const int vcols = src1.cols >> 2; 137 138 GlobPtrSz<uint> src1_ = globPtr((uint*) src1.data, src1.step, src1.rows, vcols); 139 GlobPtrSz<uint> src2_ = globPtr((uint*) src2.data, src2.step, src1.rows, vcols); 140 GlobPtrSz<uint> dst_ = globPtr((uint*) dst.data, dst.step, src1.rows, vcols); 141 142 gridTransformBinary(src1_, src2_, dst_, VCmpLt4(), stream); 143 } 144 void cmpMatLe_v4(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream) 145 { 146 const int vcols = src1.cols >> 2; 147 148 GlobPtrSz<uint> src1_ = globPtr((uint*) src1.data, src1.step, src1.rows, vcols); 149 GlobPtrSz<uint> src2_ = globPtr((uint*) src2.data, src2.step, src1.rows, vcols); 150 GlobPtrSz<uint> dst_ = globPtr((uint*) dst.data, dst.step, src1.rows, vcols); 151 152 gridTransformBinary(src1_, src2_, dst_, VCmpLe4(), stream); 153 } 154 } 155 156 void cmpMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat&, double, Stream& stream, int cmpop) 157 { 158 typedef void (*func_t)(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream); 159 static const func_t funcs[7][4] = 160 { 161 {cmpMat_v1<equal_to, uchar> , cmpMat_v1<not_equal_to, uchar> , cmpMat_v1<less, uchar> , cmpMat_v1<less_equal, uchar> }, 162 {cmpMat_v1<equal_to, schar> , cmpMat_v1<not_equal_to, schar> , cmpMat_v1<less, schar> , cmpMat_v1<less_equal, schar> }, 163 {cmpMat_v1<equal_to, ushort>, cmpMat_v1<not_equal_to, ushort>, cmpMat_v1<less, ushort>, cmpMat_v1<less_equal, ushort>}, 164 {cmpMat_v1<equal_to, short> , cmpMat_v1<not_equal_to, short> , cmpMat_v1<less, short> , cmpMat_v1<less_equal, short> }, 165 {cmpMat_v1<equal_to, int> , cmpMat_v1<not_equal_to, int> , cmpMat_v1<less, int> , cmpMat_v1<less_equal, int> }, 166 {cmpMat_v1<equal_to, float> , cmpMat_v1<not_equal_to, float> , cmpMat_v1<less, float> , cmpMat_v1<less_equal, float> }, 167 {cmpMat_v1<equal_to, double>, cmpMat_v1<not_equal_to, double>, cmpMat_v1<less, double>, cmpMat_v1<less_equal, double>} 168 }; 169 170 typedef void (*func_v4_t)(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream); 171 static const func_v4_t funcs_v4[] = 172 { 173 cmpMatEq_v4, cmpMatNe_v4, cmpMatLt_v4, cmpMatLe_v4 174 }; 175 176 const int depth = src1.depth(); 177 178 CV_DbgAssert( depth <= CV_64F ); 179 180 static const int codes[] = 181 { 182 0, 2, 3, 2, 3, 1 183 }; 184 const GpuMat* psrc1[] = 185 { 186 &src1, &src2, &src2, &src1, &src1, &src1 187 }; 188 const GpuMat* psrc2[] = 189 { 190 &src2, &src1, &src1, &src2, &src2, &src2 191 }; 192 193 const int code = codes[cmpop]; 194 195 GpuMat src1_ = psrc1[cmpop]->reshape(1); 196 GpuMat src2_ = psrc2[cmpop]->reshape(1); 197 GpuMat dst_ = dst.reshape(1); 198 199 if (depth == CV_8U && (src1_.cols & 3) == 0) 200 { 201 const intptr_t src1ptr = reinterpret_cast<intptr_t>(src1_.data); 202 const intptr_t src2ptr = reinterpret_cast<intptr_t>(src2_.data); 203 const intptr_t dstptr = reinterpret_cast<intptr_t>(dst_.data); 204 205 const bool isAllAligned = (src1ptr & 31) == 0 && (src2ptr & 31) == 0 && (dstptr & 31) == 0; 206 207 if (isAllAligned) 208 { 209 funcs_v4[code](src1_, src2_, dst_, stream); 210 return; 211 } 212 } 213 214 const func_t func = funcs[depth][code]; 215 216 func(src1_, src2_, dst_, stream); 217 } 218 219 #endif 220