Home | History | Annotate | Download | only in detail
      1 /*M///////////////////////////////////////////////////////////////////////////////////////
      2 //
      3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
      4 //
      5 //  By downloading, copying, installing or using the software you agree to this license.
      6 //  If you do not agree to this license, do not download, install,
      7 //  copy or use the software.
      8 //
      9 //
     10 //                          License Agreement
     11 //                For Open Source Computer Vision Library
     12 //
     13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
     14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
     15 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
     16 // Third party copyrights are property of their respective owners.
     17 //
     18 // Redistribution and use in source and binary forms, with or without modification,
     19 // are permitted provided that the following conditions are met:
     20 //
     21 //   * Redistribution's of source code must retain the above copyright notice,
     22 //     this list of conditions and the following disclaimer.
     23 //
     24 //   * Redistribution's in binary form must reproduce the above copyright notice,
     25 //     this list of conditions and the following disclaimer in the documentation
     26 //     and/or other materials provided with the distribution.
     27 //
     28 //   * The name of the copyright holders may not be used to endorse or promote products
     29 //     derived from this software without specific prior written permission.
     30 //
     31 // This software is provided by the copyright holders and contributors "as is" and
     32 // any express or implied warranties, including, but not limited to, the implied
     33 // warranties of merchantability and fitness for a particular purpose are disclaimed.
     34 // In no event shall the Intel Corporation or contributors be liable for any direct,
     35 // indirect, incidental, special, exemplary, or consequential damages
     36 // (including, but not limited to, procurement of substitute goods or services;
     37 // loss of use, data, or profits; or business interruption) however caused
     38 // and on any theory of liability, whether in contract, strict liability,
     39 // or tort (including negligence or otherwise) arising in any way out of
     40 // the use of this software, even if advised of the possibility of such damage.
     41 //
     42 //M*/
     43 
     44 #pragma once
     45 
     46 #ifndef __OPENCV_CUDEV_GRID_TRANSFORM_DETAIL_HPP__
     47 #define __OPENCV_CUDEV_GRID_TRANSFORM_DETAIL_HPP__
     48 
     49 #include "../../common.hpp"
     50 #include "../../util/tuple.hpp"
     51 #include "../../util/saturate_cast.hpp"
     52 #include "../../util/vec_traits.hpp"
     53 #include "../../ptr2d/glob.hpp"
     54 #include "../../ptr2d/traits.hpp"
     55 
     56 namespace cv { namespace cudev {
     57 
     58 namespace grid_transform_detail
     59 {
     60     // OpUnroller
     61 
     62     template <int cn> struct OpUnroller;
     63 
     64     template <> struct OpUnroller<1>
     65     {
     66         template <typename T, typename D, class UnOp, class MaskPtr>
     67         __device__ __forceinline__ static void unroll(const T& src, D& dst, const UnOp& op, const MaskPtr& mask, int x_shifted, int y)
     68         {
     69             if (mask(y, x_shifted))
     70                 dst.x = op(src.x);
     71         }
     72 
     73         template <typename T1, typename T2, typename D, class BinOp, class MaskPtr>
     74         __device__ __forceinline__ static void unroll(const T1& src1, const T2& src2, D& dst, const BinOp& op, const MaskPtr& mask, int x_shifted, int y)
     75         {
     76             if (mask(y, x_shifted))
     77                 dst.x = op(src1.x, src2.x);
     78         }
     79     };
     80 
     81     template <> struct OpUnroller<2>
     82     {
     83         template <typename T, typename D, class UnOp, class MaskPtr>
     84         __device__ __forceinline__ static void unroll(const T& src, D& dst, const UnOp& op, const MaskPtr& mask, int x_shifted, int y)
     85         {
     86             if (mask(y, x_shifted))
     87                 dst.x = op(src.x);
     88             if (mask(y, x_shifted + 1))
     89                 dst.y = op(src.y);
     90         }
     91 
     92         template <typename T1, typename T2, typename D, class BinOp, class MaskPtr>
     93         __device__ __forceinline__ static void unroll(const T1& src1, const T2& src2, D& dst, const BinOp& op, const MaskPtr& mask, int x_shifted, int y)
     94         {
     95             if (mask(y, x_shifted))
     96                 dst.x = op(src1.x, src2.x);
     97             if (mask(y, x_shifted + 1))
     98                 dst.y = op(src1.y, src2.y);
     99         }
    100     };
    101 
    102     template <> struct OpUnroller<3>
    103     {
    104         template <typename T, typename D, class UnOp, class MaskPtr>
    105         __device__ __forceinline__ static void unroll(const T& src, D& dst, const UnOp& op, const MaskPtr& mask, int x_shifted, int y)
    106         {
    107             if (mask(y, x_shifted))
    108                 dst.x = op(src.x);
    109             if (mask(y, x_shifted + 1))
    110                 dst.y = op(src.y);
    111             if (mask(y, x_shifted + 2))
    112                 dst.z = op(src.z);
    113         }
    114 
    115         template <typename T1, typename T2, typename D, class BinOp, class MaskPtr>
    116         __device__ __forceinline__ static void unroll(const T1& src1, const T2& src2, D& dst, const BinOp& op, const MaskPtr& mask, int x_shifted, int y)
    117         {
    118             if (mask(y, x_shifted))
    119                 dst.x = op(src1.x, src2.x);
    120             if (mask(y, x_shifted + 1))
    121                 dst.y = op(src1.y, src2.y);
    122             if (mask(y, x_shifted + 2))
    123                 dst.z = op(src1.z, src2.z);
    124         }
    125     };
    126 
    127     template <> struct OpUnroller<4>
    128     {
    129         template <typename T, typename D, class UnOp, class MaskPtr>
    130         __device__ __forceinline__ static void unroll(const T& src, D& dst, const UnOp& op, const MaskPtr& mask, int x_shifted, int y)
    131         {
    132             if (mask(y, x_shifted))
    133                 dst.x = op(src.x);
    134             if (mask(y, x_shifted + 1))
    135                 dst.y = op(src.y);
    136             if (mask(y, x_shifted + 2))
    137                 dst.z = op(src.z);
    138             if (mask(y, x_shifted + 3))
    139                 dst.w = op(src.w);
    140         }
    141 
    142         template <typename T1, typename T2, typename D, class BinOp, class MaskPtr>
    143         __device__ __forceinline__ static void unroll(const T1& src1, const T2& src2, D& dst, const BinOp& op, const MaskPtr& mask, int x_shifted, int y)
    144         {
    145             if (mask(y, x_shifted))
    146                 dst.x = op(src1.x, src2.x);
    147             if (mask(y, x_shifted + 1))
    148                 dst.y = op(src1.y, src2.y);
    149             if (mask(y, x_shifted + 2))
    150                 dst.z = op(src1.z, src2.z);
    151             if (mask(y, x_shifted + 3))
    152                 dst.w = op(src1.w, src2.w);
    153         }
    154     };
    155 
    156     // transformSimple
    157 
    158     template <class SrcPtr, typename DstType, class UnOp, class MaskPtr>
    159     __global__ void transformSimple(const SrcPtr src, GlobPtr<DstType> dst, const UnOp op, const MaskPtr mask, const int rows, const int cols)
    160     {
    161         const int x = blockIdx.x * blockDim.x + threadIdx.x;
    162         const int y = blockIdx.y * blockDim.y + threadIdx.y;
    163 
    164         if (x >= cols || y >= rows || !mask(y, x))
    165             return;
    166 
    167         dst(y, x) = saturate_cast<DstType>(op(src(y, x)));
    168     }
    169 
    170     template <class SrcPtr1, class SrcPtr2, typename DstType, class BinOp, class MaskPtr>
    171     __global__ void transformSimple(const SrcPtr1 src1, const SrcPtr2 src2, GlobPtr<DstType> dst, const BinOp op, const MaskPtr mask, const int rows, const int cols)
    172     {
    173         const int x = blockIdx.x * blockDim.x + threadIdx.x;
    174         const int y = blockIdx.y * blockDim.y + threadIdx.y;
    175 
    176         if (x >= cols || y >= rows || !mask(y, x))
    177             return;
    178 
    179         dst(y, x) = saturate_cast<DstType>(op(src1(y, x), src2(y, x)));
    180     }
    181 
    182     // transformSmart
    183 
    184     template <int SHIFT, typename SrcType, typename DstType, class UnOp, class MaskPtr>
    185     __global__ void transformSmart(const GlobPtr<SrcType> src_, GlobPtr<DstType> dst_, const UnOp op, const MaskPtr mask, const int rows, const int cols)
    186     {
    187         typedef typename MakeVec<SrcType, SHIFT>::type read_type;
    188         typedef typename MakeVec<DstType, SHIFT>::type write_type;
    189 
    190         const int x = blockIdx.x * blockDim.x + threadIdx.x;
    191         const int y = blockIdx.y * blockDim.y + threadIdx.y;
    192         const int x_shifted = x * SHIFT;
    193 
    194         if (y < rows)
    195         {
    196             const SrcType* src = src_.row(y);
    197             DstType* dst = dst_.row(y);
    198 
    199             if (x_shifted + SHIFT - 1 < cols)
    200             {
    201                 const read_type src_n_el = ((const read_type*)src)[x];
    202                 write_type dst_n_el = ((const write_type*)dst)[x];
    203 
    204                 OpUnroller<SHIFT>::unroll(src_n_el, dst_n_el, op, mask, x_shifted, y);
    205 
    206                 ((write_type*)dst)[x] = dst_n_el;
    207             }
    208             else
    209             {
    210                 for (int real_x = x_shifted; real_x < cols; ++real_x)
    211                 {
    212                     if (mask(y, real_x))
    213                         dst[real_x] = op(src[real_x]);
    214                 }
    215             }
    216         }
    217     }
    218 
    219     template <int SHIFT, typename SrcType1, typename SrcType2, typename DstType, class BinOp, class MaskPtr>
    220     __global__ void transformSmart(const GlobPtr<SrcType1> src1_, const GlobPtr<SrcType2> src2_, GlobPtr<DstType> dst_, const BinOp op, const MaskPtr mask, const int rows, const int cols)
    221     {
    222         typedef typename MakeVec<SrcType1, SHIFT>::type read_type1;
    223         typedef typename MakeVec<SrcType2, SHIFT>::type read_type2;
    224         typedef typename MakeVec<DstType, SHIFT>::type write_type;
    225 
    226         const int x = blockIdx.x * blockDim.x + threadIdx.x;
    227         const int y = blockIdx.y * blockDim.y + threadIdx.y;
    228         const int x_shifted = x * SHIFT;
    229 
    230         if (y < rows)
    231         {
    232             const SrcType1* src1 = src1_.row(y);
    233             const SrcType2* src2 = src2_.row(y);
    234             DstType* dst = dst_.row(y);
    235 
    236             if (x_shifted + SHIFT - 1 < cols)
    237             {
    238                 const read_type1 src1_n_el = ((const read_type1*)src1)[x];
    239                 const read_type2 src2_n_el = ((const read_type2*)src2)[x];
    240                 write_type dst_n_el = ((const write_type*)dst)[x];
    241 
    242                 OpUnroller<SHIFT>::unroll(src1_n_el, src2_n_el, dst_n_el, op, mask, x_shifted, y);
    243 
    244                 ((write_type*)dst)[x] = dst_n_el;
    245             }
    246             else
    247             {
    248                 for (int real_x = x_shifted; real_x < cols; ++real_x)
    249                 {
    250                     if (mask(y, real_x))
    251                         dst[real_x] = op(src1[real_x], src2[real_x]);
    252                 }
    253             }
    254         }
    255     }
    256 
    257     // TransformDispatcher
    258 
    259     template <bool UseSmart, class Policy> struct TransformDispatcher;
    260 
    261     template <class Policy> struct TransformDispatcher<false, Policy>
    262     {
    263         template <class SrcPtr, typename DstType, class UnOp, class MaskPtr>
    264         __host__ static void call(const SrcPtr& src, const GlobPtr<DstType>& dst, const UnOp& op, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
    265         {
    266             const dim3 block(Policy::block_size_x, Policy::block_size_y);
    267             const dim3 grid(divUp(cols, block.x), divUp(rows, block.y));
    268 
    269             transformSimple<<<grid, block, 0, stream>>>(src, dst, op, mask, rows, cols);
    270             CV_CUDEV_SAFE_CALL( cudaGetLastError() );
    271 
    272             if (stream == 0)
    273                 CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() );
    274         }
    275 
    276         template <class SrcPtr1, class SrcPtr2, typename DstType, class BinOp, class MaskPtr>
    277         __host__ static void call(const SrcPtr1& src1, const SrcPtr2& src2, const GlobPtr<DstType>& dst, const BinOp& op, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
    278         {
    279             const dim3 block(Policy::block_size_x, Policy::block_size_y);
    280             const dim3 grid(divUp(cols, block.x), divUp(rows, block.y));
    281 
    282             transformSimple<<<grid, block, 0, stream>>>(src1, src2, dst, op, mask, rows, cols);
    283             CV_CUDEV_SAFE_CALL( cudaGetLastError() );
    284 
    285             if (stream == 0)
    286                 CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() );
    287         }
    288     };
    289 
    290     template <class Policy> struct TransformDispatcher<true, Policy>
    291     {
    292         template <typename T>
    293         __host__ static bool isAligned(const T* ptr, size_t size)
    294         {
    295             return reinterpret_cast<size_t>(ptr) % size == 0;
    296         }
    297 
    298         __host__ static bool isAligned(size_t step, size_t size)
    299         {
    300             return step % size == 0;
    301         }
    302 
    303         template <typename SrcType, typename DstType, class UnOp, class MaskPtr>
    304         __host__ static void call(const GlobPtr<SrcType>& src, const GlobPtr<DstType>& dst, const UnOp& op, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
    305         {
    306             if (Policy::shift == 1 ||
    307                 !isAligned(src.data, Policy::shift * sizeof(SrcType)) || !isAligned(src.step, Policy::shift * sizeof(SrcType)) ||
    308                 !isAligned(dst.data, Policy::shift * sizeof(DstType)) || !isAligned(dst.step, Policy::shift * sizeof(DstType)))
    309             {
    310                 TransformDispatcher<false, Policy>::call(src, dst, op, mask, rows, cols, stream);
    311                 return;
    312             }
    313 
    314             const dim3 block(Policy::block_size_x, Policy::block_size_y);
    315             const dim3 grid(divUp(cols, block.x * Policy::shift), divUp(rows, block.y));
    316 
    317             transformSmart<Policy::shift><<<grid, block, 0, stream>>>(src, dst, op, mask, rows, cols);
    318             CV_CUDEV_SAFE_CALL( cudaGetLastError() );
    319 
    320             if (stream == 0)
    321                 CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() );
    322         }
    323 
    324         template <typename SrcType1, typename SrcType2, typename DstType, class BinOp, class MaskPtr>
    325         __host__ static void call(const GlobPtr<SrcType1>& src1, const GlobPtr<SrcType2>& src2, const GlobPtr<DstType>& dst, const BinOp& op, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
    326         {
    327             if (Policy::shift == 1 ||
    328                 !isAligned(src1.data, Policy::shift * sizeof(SrcType1)) || !isAligned(src1.step, Policy::shift * sizeof(SrcType1)) ||
    329                 !isAligned(src2.data, Policy::shift * sizeof(SrcType2)) || !isAligned(src2.step, Policy::shift * sizeof(SrcType2)) ||
    330                 !isAligned(dst.data,  Policy::shift * sizeof(DstType))  || !isAligned(dst.step,  Policy::shift * sizeof(DstType)))
    331             {
    332                 TransformDispatcher<false, Policy>::call(src1, src2, dst, op, mask, rows, cols, stream);
    333                 return;
    334             }
    335 
    336             const dim3 block(Policy::block_size_x, Policy::block_size_y);
    337             const dim3 grid(divUp(cols, block.x * Policy::shift), divUp(rows, block.y));
    338 
    339             transformSmart<Policy::shift><<<grid, block, 0, stream>>>(src1, src2, dst, op, mask, rows, cols);
    340             CV_CUDEV_SAFE_CALL( cudaGetLastError() );
    341 
    342             if (stream == 0)
    343                 CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() );
    344         }
    345     };
    346 
    347     template <class Policy, class SrcPtr, typename DstType, class UnOp, class MaskPtr>
    348     __host__ void transform_unary(const SrcPtr& src, const GlobPtr<DstType>& dst, const UnOp& op, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
    349     {
    350         TransformDispatcher<false, Policy>::call(src, dst, op, mask, rows, cols, stream);
    351     }
    352 
    353     template <class Policy, class SrcPtr1, class SrcPtr2, typename DstType, class BinOp, class MaskPtr>
    354     __host__ void transform_binary(const SrcPtr1& src1, const SrcPtr2& src2, const GlobPtr<DstType>& dst, const BinOp& op, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
    355     {
    356         TransformDispatcher<false, Policy>::call(src1, src2, dst, op, mask, rows, cols, stream);
    357     }
    358 
    359     template <class Policy, typename SrcType, typename DstType, class UnOp, class MaskPtr>
    360     __host__ void transform_unary(const GlobPtr<SrcType>& src, const GlobPtr<DstType>& dst, const UnOp& op, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
    361     {
    362         TransformDispatcher<VecTraits<SrcType>::cn == 1 && VecTraits<DstType>::cn == 1 && Policy::shift != 1, Policy>::call(src, dst, op, mask, rows, cols, stream);
    363     }
    364 
    365     template <class Policy, typename SrcType1, typename SrcType2, typename DstType, class BinOp, class MaskPtr>
    366     __host__ void transform_binary(const GlobPtr<SrcType1>& src1, const GlobPtr<SrcType2>& src2, const GlobPtr<DstType>& dst, const BinOp& op, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
    367     {
    368         TransformDispatcher<VecTraits<SrcType1>::cn == 1 && VecTraits<SrcType2>::cn == 1 && VecTraits<DstType>::cn == 1 && Policy::shift != 1, Policy>::call(src1, src2, dst, op, mask, rows, cols, stream);
    369     }
    370 
    371     // transform_tuple
    372 
    373     template <int count> struct Unroll
    374     {
    375         template <class SrcVal, class DstPtrTuple, class OpTuple>
    376         __device__ static void transform(const SrcVal& srcVal, DstPtrTuple& dst, const OpTuple& op, int y, int x)
    377         {
    378             typedef typename tuple_element<count - 1, DstPtrTuple>::type dst_ptr_type;
    379             typedef typename PtrTraits<dst_ptr_type>::value_type dst_type;
    380 
    381             get<count - 1>(dst)(y, x) = saturate_cast<dst_type>(get<count - 1>(op)(srcVal));
    382             Unroll<count - 1>::transform(srcVal, dst, op, y, x);
    383         }
    384     };
    385     template <> struct Unroll<0>
    386     {
    387         template <class SrcVal, class DstPtrTuple, class OpTuple>
    388         __device__ __forceinline__ static void transform(const SrcVal&, DstPtrTuple&, const OpTuple&, int, int)
    389         {
    390         }
    391     };
    392 
    393     template <class SrcPtr, class DstPtrTuple, class OpTuple, class MaskPtr>
    394     __global__ void transform_tuple(const SrcPtr src, DstPtrTuple dst, const OpTuple op, const MaskPtr mask, const int rows, const int cols)
    395     {
    396         const int x = blockIdx.x * blockDim.x + threadIdx.x;
    397         const int y = blockIdx.y * blockDim.y + threadIdx.y;
    398 
    399         if (x >= cols || y >= rows || !mask(y, x))
    400             return;
    401 
    402         typename PtrTraits<SrcPtr>::value_type srcVal = src(y, x);
    403 
    404         Unroll<tuple_size<DstPtrTuple>::value>::transform(srcVal, dst, op, y, x);
    405     }
    406 
    407     template <class Policy, class SrcPtrTuple, class DstPtrTuple, class OpTuple, class MaskPtr>
    408     __host__ void transform_tuple(const SrcPtrTuple& src, const DstPtrTuple& dst, const OpTuple& op, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
    409     {
    410         const dim3 block(Policy::block_size_x, Policy::block_size_y);
    411         const dim3 grid(divUp(cols, block.x), divUp(rows, block.y));
    412 
    413         transform_tuple<<<grid, block, 0, stream>>>(src, dst, op, mask, rows, cols);
    414         CV_CUDEV_SAFE_CALL( cudaGetLastError() );
    415 
    416         if (stream == 0)
    417             CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() );
    418     }
    419 }
    420 
    421 }}
    422 
    423 #endif
    424