1 /*M/////////////////////////////////////////////////////////////////////////////////////// 2 // 3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 4 // 5 // By downloading, copying, installing or using the software you agree to this license. 6 // If you do not agree to this license, do not download, install, 7 // copy or use the software. 8 // 9 // 10 // License Agreement 11 // For Open Source Computer Vision Library 12 // 13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. 14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved. 15 // Third party copyrights are property of their respective owners. 16 // 17 // Redistribution and use in source and binary forms, with or without modification, 18 // are permitted provided that the following conditions are met: 19 // 20 // * Redistribution's of source code must retain the above copyright notice, 21 // this list of conditions and the following disclaimer. 22 // 23 // * Redistribution's in binary form must reproduce the above copyright notice, 24 // this list of conditions and the following disclaimer in the documentation 25 // and/or other materials provided with the distribution. 26 // 27 // * The name of the copyright holders may not be used to endorse or promote products 28 // derived from this software without specific prior written permission. 29 // 30 // This software is provided by the copyright holders and contributors "as is" and 31 // any express or implied warranties, including, but not limited to, the implied 32 // warranties of merchantability and fitness for a particular purpose are disclaimed. 33 // In no event shall the Intel Corporation or contributors be liable for any direct, 34 // indirect, incidental, special, exemplary, or consequential damages 35 // (including, but not limited to, procurement of substitute goods or services; 36 // loss of use, data, or profits; or business interruption) however caused 37 // and on any theory of liability, whether in contract, strict liability, 38 // or tort (including negligence or otherwise) arising in any way out of 39 // the use of this software, even if advised of the possibility of such damage. 40 // 41 //M*/ 42 43 #if !defined CUDA_DISABLER 44 45 #include "opencv2/core/cuda/common.hpp" 46 #include "opencv2/core/cuda/saturate_cast.hpp" 47 #include "opencv2/core/cuda/border_interpolate.hpp" 48 49 namespace cv { namespace cuda { namespace device 50 { 51 template <class SrcPtr, typename D> 52 __global__ void filter2D(const SrcPtr src, PtrStepSz<D> dst, 53 const float* __restrict__ kernel, 54 const int kWidth, const int kHeight, 55 const int anchorX, const int anchorY) 56 { 57 typedef typename TypeVec<float, VecTraits<D>::cn>::vec_type sum_t; 58 59 const int x = blockIdx.x * blockDim.x + threadIdx.x; 60 const int y = blockIdx.y * blockDim.y + threadIdx.y; 61 62 if (x >= dst.cols || y >= dst.rows) 63 return; 64 65 sum_t res = VecTraits<sum_t>::all(0); 66 int kInd = 0; 67 68 for (int i = 0; i < kHeight; ++i) 69 { 70 for (int j = 0; j < kWidth; ++j) 71 res = res + src(y - anchorY + i, x - anchorX + j) * kernel[kInd++]; 72 } 73 74 dst(y, x) = saturate_cast<D>(res); 75 } 76 77 template <typename T, typename D, template <typename> class Brd> struct Filter2DCaller; 78 79 #define IMPLEMENT_FILTER2D_TEX_READER(type) \ 80 texture< type , cudaTextureType2D, cudaReadModeElementType> tex_filter2D_ ## type (0, cudaFilterModePoint, cudaAddressModeClamp); \ 81 struct tex_filter2D_ ## type ## _reader \ 82 { \ 83 typedef type elem_type; \ 84 typedef int index_type; \ 85 const int xoff; \ 86 const int yoff; \ 87 tex_filter2D_ ## type ## _reader (int xoff_, int yoff_) : xoff(xoff_), yoff(yoff_) {} \ 88 __device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const \ 89 { \ 90 return tex2D(tex_filter2D_ ## type , x + xoff, y + yoff); \ 91 } \ 92 }; \ 93 template <typename D, template <typename> class Brd> struct Filter2DCaller< type , D, Brd> \ 94 { \ 95 static void call(const PtrStepSz< type > srcWhole, int xoff, int yoff, PtrStepSz<D> dst, const float* kernel, \ 96 int kWidth, int kHeight, int anchorX, int anchorY, const float* borderValue, cudaStream_t stream) \ 97 { \ 98 typedef typename TypeVec<float, VecTraits< type >::cn>::vec_type work_type; \ 99 dim3 block(16, 16); \ 100 dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \ 101 bindTexture(&tex_filter2D_ ## type , srcWhole); \ 102 tex_filter2D_ ## type ##_reader texSrc(xoff, yoff); \ 103 Brd<work_type> brd(dst.rows, dst.cols, VecTraits<work_type>::make(borderValue)); \ 104 BorderReader< tex_filter2D_ ## type ##_reader, Brd<work_type> > brdSrc(texSrc, brd); \ 105 filter2D<<<grid, block, 0, stream>>>(brdSrc, dst, kernel, kWidth, kHeight, anchorX, anchorY); \ 106 cudaSafeCall( cudaGetLastError() ); \ 107 if (stream == 0) \ 108 cudaSafeCall( cudaDeviceSynchronize() ); \ 109 } \ 110 }; 111 112 IMPLEMENT_FILTER2D_TEX_READER(uchar); 113 IMPLEMENT_FILTER2D_TEX_READER(uchar4); 114 115 IMPLEMENT_FILTER2D_TEX_READER(ushort); 116 IMPLEMENT_FILTER2D_TEX_READER(ushort4); 117 118 IMPLEMENT_FILTER2D_TEX_READER(float); 119 IMPLEMENT_FILTER2D_TEX_READER(float4); 120 121 #undef IMPLEMENT_FILTER2D_TEX_READER 122 123 template <typename T, typename D> 124 void filter2D(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, const float* kernel, 125 int kWidth, int kHeight, int anchorX, int anchorY, 126 int borderMode, const float* borderValue, cudaStream_t stream) 127 { 128 typedef void (*func_t)(const PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSz<D> dst, const float* kernel, 129 int kWidth, int kHeight, int anchorX, int anchorY, const float* borderValue, cudaStream_t stream); 130 static const func_t funcs[] = 131 { 132 Filter2DCaller<T, D, BrdConstant>::call, 133 Filter2DCaller<T, D, BrdReplicate>::call, 134 Filter2DCaller<T, D, BrdReflect>::call, 135 Filter2DCaller<T, D, BrdWrap>::call, 136 Filter2DCaller<T, D, BrdReflect101>::call 137 }; 138 139 funcs[borderMode]((PtrStepSz<T>) srcWhole, ofsX, ofsY, (PtrStepSz<D>) dst, kernel, 140 kWidth, kHeight, anchorX, anchorY, borderValue, stream); 141 } 142 143 template void filter2D<uchar , uchar >(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, const float* kernel, int kWidth, int kHeight, int anchorX, int anchorY, int borderMode, const float* borderValue, cudaStream_t stream); 144 template void filter2D<uchar4 , uchar4 >(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, const float* kernel, int kWidth, int kHeight, int anchorX, int anchorY, int borderMode, const float* borderValue, cudaStream_t stream); 145 template void filter2D<ushort , ushort >(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, const float* kernel, int kWidth, int kHeight, int anchorX, int anchorY, int borderMode, const float* borderValue, cudaStream_t stream); 146 template void filter2D<ushort4, ushort4>(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, const float* kernel, int kWidth, int kHeight, int anchorX, int anchorY, int borderMode, const float* borderValue, cudaStream_t stream); 147 template void filter2D<float , float >(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, const float* kernel, int kWidth, int kHeight, int anchorX, int anchorY, int borderMode, const float* borderValue, cudaStream_t stream); 148 template void filter2D<float4 , float4 >(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, const float* kernel, int kWidth, int kHeight, int anchorX, int anchorY, int borderMode, const float* borderValue, cudaStream_t stream); 149 }}} 150 151 #endif // CUDA_DISABLER 152