Home | History | Annotate | Download | only in cuda
      1 /*M///////////////////////////////////////////////////////////////////////////////////////
      2 //
      3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
      4 //
      5 //  By downloading, copying, installing or using the software you agree to this license.
      6 //  If you do not agree to this license, do not download, install,
      7 //  copy or use the software.
      8 //
      9 //
     10 //                           License Agreement
     11 //                For Open Source Computer Vision Library
     12 //
     13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
     14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
     15 // Third party copyrights are property of their respective owners.
     16 //
     17 // Redistribution and use in source and binary forms, with or without modification,
     18 // are permitted provided that the following conditions are met:
     19 //
     20 //   * Redistribution's of source code must retain the above copyright notice,
     21 //     this list of conditions and the following disclaimer.
     22 //
     23 //   * Redistribution's in binary form must reproduce the above copyright notice,
     24 //     this list of conditions and the following disclaimer in the documentation
     25 //     and/or other materials provided with the distribution.
     26 //
     27 //   * The name of the copyright holders may not be used to endorse or promote products
     28 //     derived from this software without specific prior written permission.
     29 //
     30 // This software is provided by the copyright holders and contributors "as is" and
     31 // any express or implied warranties, including, but not limited to, the implied
     32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
     33 // In no event shall the Intel Corporation or contributors be liable for any direct,
     34 // indirect, incidental, special, exemplary, or consequential damages
     35 // (including, but not limited to, procurement of substitute goods or services;
     36 // loss of use, data, or profits; or business interruption) however caused
     37 // and on any theory of liability, whether in contract, strict liability,
     38 // or tort (including negligence or otherwise) arising in any way out of
     39 // the use of this software, even if advised of the possibility of such damage.
     40 //
     41 //M*/
     42 
     43 #if !defined CUDA_DISABLER
     44 
     45 #include "opencv2/core/cuda/common.hpp"
     46 #include "opencv2/core/cuda/saturate_cast.hpp"
     47 #include "opencv2/core/cuda/border_interpolate.hpp"
     48 
     49 namespace cv { namespace cuda { namespace device
     50 {
     51     template <class SrcPtr, typename D>
     52     __global__ void filter2D(const SrcPtr src, PtrStepSz<D> dst,
     53                              const float* __restrict__ kernel,
     54                              const int kWidth, const int kHeight,
     55                              const int anchorX, const int anchorY)
     56     {
     57         typedef typename TypeVec<float, VecTraits<D>::cn>::vec_type sum_t;
     58 
     59         const int x = blockIdx.x * blockDim.x + threadIdx.x;
     60         const int y = blockIdx.y * blockDim.y + threadIdx.y;
     61 
     62         if (x >= dst.cols || y >= dst.rows)
     63             return;
     64 
     65         sum_t res = VecTraits<sum_t>::all(0);
     66         int kInd = 0;
     67 
     68         for (int i = 0; i < kHeight; ++i)
     69         {
     70             for (int j = 0; j < kWidth; ++j)
     71                 res = res + src(y - anchorY + i, x - anchorX + j) * kernel[kInd++];
     72         }
     73 
     74         dst(y, x) = saturate_cast<D>(res);
     75     }
     76 
     77     template <typename T, typename D, template <typename> class Brd> struct Filter2DCaller;
     78 
     79     #define IMPLEMENT_FILTER2D_TEX_READER(type) \
     80         texture< type , cudaTextureType2D, cudaReadModeElementType> tex_filter2D_ ## type (0, cudaFilterModePoint, cudaAddressModeClamp); \
     81         struct tex_filter2D_ ## type ## _reader \
     82         { \
     83             typedef type elem_type; \
     84             typedef int index_type; \
     85             const int xoff; \
     86             const int yoff; \
     87             tex_filter2D_ ## type ## _reader (int xoff_, int yoff_) : xoff(xoff_), yoff(yoff_) {} \
     88             __device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const \
     89             { \
     90                 return tex2D(tex_filter2D_ ## type , x + xoff, y + yoff); \
     91             } \
     92         }; \
     93         template <typename D, template <typename> class Brd> struct Filter2DCaller< type , D, Brd> \
     94         { \
     95             static void call(const PtrStepSz< type > srcWhole, int xoff, int yoff, PtrStepSz<D> dst, const float* kernel, \
     96                 int kWidth, int kHeight, int anchorX, int anchorY, const float* borderValue, cudaStream_t stream) \
     97             { \
     98                 typedef typename TypeVec<float, VecTraits< type >::cn>::vec_type work_type; \
     99                 dim3 block(16, 16); \
    100                 dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
    101                 bindTexture(&tex_filter2D_ ## type , srcWhole); \
    102                 tex_filter2D_ ## type ##_reader texSrc(xoff, yoff); \
    103                 Brd<work_type> brd(dst.rows, dst.cols, VecTraits<work_type>::make(borderValue)); \
    104                 BorderReader< tex_filter2D_ ## type ##_reader, Brd<work_type> > brdSrc(texSrc, brd); \
    105                 filter2D<<<grid, block, 0, stream>>>(brdSrc, dst, kernel, kWidth, kHeight, anchorX, anchorY); \
    106                 cudaSafeCall( cudaGetLastError() ); \
    107                 if (stream == 0) \
    108                     cudaSafeCall( cudaDeviceSynchronize() ); \
    109             } \
    110         };
    111 
    112     IMPLEMENT_FILTER2D_TEX_READER(uchar);
    113     IMPLEMENT_FILTER2D_TEX_READER(uchar4);
    114 
    115     IMPLEMENT_FILTER2D_TEX_READER(ushort);
    116     IMPLEMENT_FILTER2D_TEX_READER(ushort4);
    117 
    118     IMPLEMENT_FILTER2D_TEX_READER(float);
    119     IMPLEMENT_FILTER2D_TEX_READER(float4);
    120 
    121     #undef IMPLEMENT_FILTER2D_TEX_READER
    122 
    123     template <typename T, typename D>
    124     void filter2D(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, const float* kernel,
    125                   int kWidth, int kHeight, int anchorX, int anchorY,
    126                   int borderMode, const float* borderValue, cudaStream_t stream)
    127     {
    128         typedef void (*func_t)(const PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSz<D> dst, const float* kernel,
    129                                int kWidth, int kHeight, int anchorX, int anchorY, const float* borderValue, cudaStream_t stream);
    130         static const func_t funcs[] =
    131         {
    132             Filter2DCaller<T, D, BrdConstant>::call,
    133             Filter2DCaller<T, D, BrdReplicate>::call,
    134             Filter2DCaller<T, D, BrdReflect>::call,
    135             Filter2DCaller<T, D, BrdWrap>::call,
    136             Filter2DCaller<T, D, BrdReflect101>::call
    137         };
    138 
    139         funcs[borderMode]((PtrStepSz<T>) srcWhole, ofsX, ofsY, (PtrStepSz<D>) dst, kernel,
    140                           kWidth, kHeight, anchorX, anchorY, borderValue, stream);
    141     }
    142 
    143     template void filter2D<uchar  , uchar  >(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, const float* kernel, int kWidth, int kHeight, int anchorX, int anchorY, int borderMode, const float* borderValue, cudaStream_t stream);
    144     template void filter2D<uchar4 , uchar4 >(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, const float* kernel, int kWidth, int kHeight, int anchorX, int anchorY, int borderMode, const float* borderValue, cudaStream_t stream);
    145     template void filter2D<ushort , ushort >(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, const float* kernel, int kWidth, int kHeight, int anchorX, int anchorY, int borderMode, const float* borderValue, cudaStream_t stream);
    146     template void filter2D<ushort4, ushort4>(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, const float* kernel, int kWidth, int kHeight, int anchorX, int anchorY, int borderMode, const float* borderValue, cudaStream_t stream);
    147     template void filter2D<float  , float  >(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, const float* kernel, int kWidth, int kHeight, int anchorX, int anchorY, int borderMode, const float* borderValue, cudaStream_t stream);
    148     template void filter2D<float4 , float4 >(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, const float* kernel, int kWidth, int kHeight, int anchorX, int anchorY, int borderMode, const float* borderValue, cudaStream_t stream);
    149 }}}
    150 
    151 #endif // CUDA_DISABLER
    152