Home | History | Annotate | Download | only in detail
      1 /*M///////////////////////////////////////////////////////////////////////////////////////
      2 //
      3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
      4 //
      5 //  By downloading, copying, installing or using the software you agree to this license.
      6 //  If you do not agree to this license, do not download, install,
      7 //  copy or use the software.
      8 //
      9 //
     10 //                          License Agreement
     11 //                For Open Source Computer Vision Library
     12 //
     13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
     14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
     15 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
     16 // Third party copyrights are property of their respective owners.
     17 //
     18 // Redistribution and use in source and binary forms, with or without modification,
     19 // are permitted provided that the following conditions are met:
     20 //
     21 //   * Redistribution's of source code must retain the above copyright notice,
     22 //     this list of conditions and the following disclaimer.
     23 //
     24 //   * Redistribution's in binary form must reproduce the above copyright notice,
     25 //     this list of conditions and the following disclaimer in the documentation
     26 //     and/or other materials provided with the distribution.
     27 //
     28 //   * The name of the copyright holders may not be used to endorse or promote products
     29 //     derived from this software without specific prior written permission.
     30 //
     31 // This software is provided by the copyright holders and contributors "as is" and
     32 // any express or implied warranties, including, but not limited to, the implied
     33 // warranties of merchantability and fitness for a particular purpose are disclaimed.
     34 // In no event shall the Intel Corporation or contributors be liable for any direct,
     35 // indirect, incidental, special, exemplary, or consequential damages
     36 // (including, but not limited to, procurement of substitute goods or services;
     37 // loss of use, data, or profits; or business interruption) however caused
     38 // and on any theory of liability, whether in contract, strict liability,
     39 // or tort (including negligence or otherwise) arising in any way out of
     40 // the use of this software, even if advised of the possibility of such damage.
     41 //
     42 //M*/
     43 
     44 #pragma once
     45 
     46 #ifndef __OPENCV_CUDEV_GRID_SPLIT_MERGE_DETAIL_HPP__
     47 #define __OPENCV_CUDEV_GRID_SPLIT_MERGE_DETAIL_HPP__
     48 
     49 #include "../../common.hpp"
     50 #include "../../util/saturate_cast.hpp"
     51 #include "../../util/tuple.hpp"
     52 #include "../../util/vec_traits.hpp"
     53 #include "../../ptr2d/glob.hpp"
     54 #include "../../ptr2d/traits.hpp"
     55 
     56 namespace cv { namespace cudev {
     57 
     58 namespace grid_split_merge_detail
     59 {
     60     // merge
     61 
     62     template <class Src1Ptr, class Src2Ptr, typename DstType, class MaskPtr>
     63     __global__ void mergeC2(const Src1Ptr src1, const Src2Ptr src2, GlobPtr<DstType> dst, const MaskPtr mask, const int rows, const int cols)
     64     {
     65         typedef typename VecTraits<DstType>::elem_type dst_elem_type;
     66 
     67         const int x = blockIdx.x * blockDim.x + threadIdx.x;
     68         const int y = blockIdx.y * blockDim.y + threadIdx.y;
     69 
     70         if (x >= cols || y >= rows || !mask(y, x))
     71             return;
     72 
     73         dst(y, x) = VecTraits<DstType>::make(
     74                     saturate_cast<dst_elem_type>(src1(y, x)),
     75                     saturate_cast<dst_elem_type>(src2(y, x))
     76                     );
     77     }
     78 
     79     template <class Policy, class Src1Ptr, class Src2Ptr, typename DstType, class MaskPtr>
     80     __host__ void mergeC2(const Src1Ptr& src1, const Src2Ptr& src2, const GlobPtr<DstType>& dst, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
     81     {
     82         const dim3 block(Policy::block_size_x, Policy::block_size_y);
     83         const dim3 grid(divUp(cols, block.x), divUp(rows, block.y));
     84 
     85         mergeC2<<<grid, block, 0, stream>>>(src1, src2, dst, mask, rows, cols);
     86         CV_CUDEV_SAFE_CALL( cudaGetLastError() );
     87 
     88         if (stream == 0)
     89             CV_CUDEV_SAFE_CALL(cudaDeviceSynchronize());
     90     }
     91 
     92     template <class Src1Ptr, class Src2Ptr, class Src3Ptr, typename DstType, class MaskPtr>
     93     __global__ void mergeC3(const Src1Ptr src1, const Src2Ptr src2, const Src3Ptr src3, GlobPtr<DstType> dst, const MaskPtr mask, const int rows, const int cols)
     94     {
     95         typedef typename VecTraits<DstType>::elem_type dst_elem_type;
     96 
     97         const int x = blockIdx.x * blockDim.x + threadIdx.x;
     98         const int y = blockIdx.y * blockDim.y + threadIdx.y;
     99 
    100         if (x >= cols || y >= rows || !mask(y, x))
    101             return;
    102 
    103         dst(y, x) = VecTraits<DstType>::make(
    104                     saturate_cast<dst_elem_type>(src1(y, x)),
    105                     saturate_cast<dst_elem_type>(src2(y, x)),
    106                     saturate_cast<dst_elem_type>(src3(y, x))
    107                     );
    108     }
    109 
    110     template <class Policy, class Src1Ptr, class Src2Ptr, class Src3Ptr, typename DstType, class MaskPtr>
    111     __host__ void mergeC3(const Src1Ptr& src1, const Src2Ptr& src2, const Src3Ptr& src3, const GlobPtr<DstType>& dst, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
    112     {
    113         const dim3 block(Policy::block_size_x, Policy::block_size_y);
    114         const dim3 grid(divUp(cols, block.x), divUp(rows, block.y));
    115 
    116         mergeC3<<<grid, block, 0, stream>>>(src1, src2, src3, dst, mask, rows, cols);
    117         CV_CUDEV_SAFE_CALL( cudaGetLastError() );
    118 
    119         if (stream == 0)
    120             CV_CUDEV_SAFE_CALL(cudaDeviceSynchronize());
    121     }
    122 
    123     template <class Src1Ptr, class Src2Ptr, class Src3Ptr, class Src4Ptr, typename DstType, class MaskPtr>
    124     __global__ void mergeC4(const Src1Ptr src1, const Src2Ptr src2, const Src3Ptr src3, const Src4Ptr src4, GlobPtr<DstType> dst, const MaskPtr mask, const int rows, const int cols)
    125     {
    126         typedef typename VecTraits<DstType>::elem_type dst_elem_type;
    127 
    128         const int x = blockIdx.x * blockDim.x + threadIdx.x;
    129         const int y = blockIdx.y * blockDim.y + threadIdx.y;
    130 
    131         if (x >= cols || y >= rows || !mask(y, x))
    132             return;
    133 
    134         dst(y, x) = VecTraits<DstType>::make(
    135                     saturate_cast<dst_elem_type>(src1(y, x)),
    136                     saturate_cast<dst_elem_type>(src2(y, x)),
    137                     saturate_cast<dst_elem_type>(src3(y, x)),
    138                     saturate_cast<dst_elem_type>(src4(y, x))
    139                     );
    140     }
    141 
    142     template <class Policy, class Src1Ptr, class Src2Ptr, class Src3Ptr, class Src4Ptr, typename DstType, class MaskPtr>
    143     __host__ void mergeC4(const Src1Ptr& src1, const Src2Ptr& src2, const Src3Ptr& src3, const Src4Ptr& src4, const GlobPtr<DstType>& dst, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
    144     {
    145         const dim3 block(Policy::block_size_x, Policy::block_size_y);
    146         const dim3 grid(divUp(cols, block.x), divUp(rows, block.y));
    147 
    148         mergeC4<<<grid, block, 0, stream>>>(src1, src2, src3, src4, dst, mask, rows, cols);
    149         CV_CUDEV_SAFE_CALL( cudaGetLastError() );
    150 
    151         if (stream == 0)
    152             CV_CUDEV_SAFE_CALL(cudaDeviceSynchronize());
    153     }
    154 
    155     template <int cn, class Policy> struct MergeImpl;
    156 
    157     template <class Policy> struct MergeImpl<2, Policy>
    158     {
    159         template <class SrcPtrTuple, typename DstType, class MaskPtr>
    160         __host__ static void merge(const SrcPtrTuple& src, const GlobPtr<DstType>& dst, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
    161         {
    162             mergeC2<Policy>(get<0>(src), get<1>(src), dst, mask, rows, cols, stream);
    163         }
    164     };
    165 
    166     template <class Policy> struct MergeImpl<3, Policy>
    167     {
    168         template <class SrcPtrTuple, typename DstType, class MaskPtr>
    169         __host__ static void merge(const SrcPtrTuple& src, const GlobPtr<DstType>& dst, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
    170         {
    171             mergeC3<Policy>(get<0>(src), get<1>(src), get<2>(src), dst, mask, rows, cols, stream);
    172         }
    173     };
    174 
    175     template <class Policy> struct MergeImpl<4, Policy>
    176     {
    177         template <class SrcPtrTuple, typename DstType, class MaskPtr>
    178         __host__ static void merge(const SrcPtrTuple& src, const GlobPtr<DstType>& dst, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
    179         {
    180             mergeC4<Policy>(get<0>(src), get<1>(src), get<2>(src), get<3>(src), dst, mask, rows, cols, stream);
    181         }
    182     };
    183 
    184     // split
    185 
    186     template <class SrcPtr, typename DstType, class MaskPtr>
    187     __global__ void split(const SrcPtr src, GlobPtr<DstType> dst1, GlobPtr<DstType> dst2, const MaskPtr mask, const int rows, const int cols)
    188     {
    189         typedef typename PtrTraits<SrcPtr>::value_type src_type;
    190 
    191         const int x = blockIdx.x * blockDim.x + threadIdx.x;
    192         const int y = blockIdx.y * blockDim.y + threadIdx.y;
    193 
    194         if (x >= cols || y >= rows || !mask(y, x))
    195             return;
    196 
    197         const src_type src_value = src(y, x);
    198 
    199         dst1(y, x) = src_value.x;
    200         dst2(y, x) = src_value.y;
    201     }
    202 
    203     template <class Policy, class SrcPtr, typename DstType, class MaskPtr>
    204     __host__ void split(const SrcPtr& src, const GlobPtr<DstType>& dst1, const GlobPtr<DstType>& dst2, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
    205     {
    206         const dim3 block(Policy::block_size_x, Policy::block_size_y);
    207         const dim3 grid(divUp(cols, block.x), divUp(rows, block.y));
    208 
    209         split<<<grid, block, 0, stream>>>(src, dst1, dst2, mask, rows, cols);
    210         CV_CUDEV_SAFE_CALL( cudaGetLastError() );
    211 
    212         if (stream == 0)
    213             CV_CUDEV_SAFE_CALL(cudaDeviceSynchronize());
    214     }
    215 
    216     template <class SrcPtr, typename DstType, class MaskPtr>
    217     __global__ void split(const SrcPtr src, GlobPtr<DstType> dst1, GlobPtr<DstType> dst2, GlobPtr<DstType> dst3, const MaskPtr mask, const int rows, const int cols)
    218     {
    219         typedef typename PtrTraits<SrcPtr>::value_type src_type;
    220 
    221         const int x = blockIdx.x * blockDim.x + threadIdx.x;
    222         const int y = blockIdx.y * blockDim.y + threadIdx.y;
    223 
    224         if (x >= cols || y >= rows || !mask(y, x))
    225             return;
    226 
    227         const src_type src_value = src(y, x);
    228 
    229         dst1(y, x) = src_value.x;
    230         dst2(y, x) = src_value.y;
    231         dst3(y, x) = src_value.z;
    232     }
    233 
    234     template <class Policy, class SrcPtr, typename DstType, class MaskPtr>
    235     __host__ void split(const SrcPtr& src, const GlobPtr<DstType>& dst1, const GlobPtr<DstType>& dst2, const GlobPtr<DstType>& dst3, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
    236     {
    237         const dim3 block(Policy::block_size_x, Policy::block_size_y);
    238         const dim3 grid(divUp(cols, block.x), divUp(rows, block.y));
    239 
    240         split<<<grid, block, 0, stream>>>(src, dst1, dst2, dst3, mask, rows, cols);
    241         CV_CUDEV_SAFE_CALL( cudaGetLastError() );
    242 
    243         if (stream == 0)
    244             CV_CUDEV_SAFE_CALL(cudaDeviceSynchronize());
    245     }
    246 
    247     template <class SrcPtr, typename DstType, class MaskPtr>
    248     __global__ void split(const SrcPtr src, GlobPtr<DstType> dst1, GlobPtr<DstType> dst2, GlobPtr<DstType> dst3, GlobPtr<DstType> dst4, const MaskPtr mask, const int rows, const int cols)
    249     {
    250         typedef typename PtrTraits<SrcPtr>::value_type src_type;
    251 
    252         const int x = blockIdx.x * blockDim.x + threadIdx.x;
    253         const int y = blockIdx.y * blockDim.y + threadIdx.y;
    254 
    255         if (x >= cols || y >= rows || !mask(y, x))
    256             return;
    257 
    258         const src_type src_value = src(y, x);
    259 
    260         dst1(y, x) = src_value.x;
    261         dst2(y, x) = src_value.y;
    262         dst3(y, x) = src_value.z;
    263         dst4(y, x) = src_value.w;
    264     }
    265 
    266     template <class Policy, class SrcPtr, typename DstType, class MaskPtr>
    267     __host__ void split(const SrcPtr& src, const GlobPtr<DstType>& dst1, const GlobPtr<DstType>& dst2, const GlobPtr<DstType>& dst3, const GlobPtr<DstType>& dst4, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
    268     {
    269         const dim3 block(Policy::block_size_x, Policy::block_size_y);
    270         const dim3 grid(divUp(cols, block.x), divUp(rows, block.y));
    271 
    272         split<<<grid, block, 0, stream>>>(src, dst1, dst2, dst3, dst4, mask, rows, cols);
    273         CV_CUDEV_SAFE_CALL( cudaGetLastError() );
    274 
    275         if (stream == 0)
    276             CV_CUDEV_SAFE_CALL(cudaDeviceSynchronize());
    277     }
    278 }
    279 
    280 }}
    281 
    282 #endif
    283