Home | History | Annotate | Download | only in detail
      1 /*M///////////////////////////////////////////////////////////////////////////////////////
      2 //
      3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
      4 //
      5 //  By downloading, copying, installing or using the software you agree to this license.
      6 //  If you do not agree to this license, do not download, install,
      7 //  copy or use the software.
      8 //
      9 //
     10 //                          License Agreement
     11 //                For Open Source Computer Vision Library
     12 //
     13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
     14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
     15 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
     16 // Third party copyrights are property of their respective owners.
     17 //
     18 // Redistribution and use in source and binary forms, with or without modification,
     19 // are permitted provided that the following conditions are met:
     20 //
     21 //   * Redistribution's of source code must retain the above copyright notice,
     22 //     this list of conditions and the following disclaimer.
     23 //
     24 //   * Redistribution's in binary form must reproduce the above copyright notice,
     25 //     this list of conditions and the following disclaimer in the documentation
     26 //     and/or other materials provided with the distribution.
     27 //
     28 //   * The name of the copyright holders may not be used to endorse or promote products
     29 //     derived from this software without specific prior written permission.
     30 //
     31 // This software is provided by the copyright holders and contributors "as is" and
     32 // any express or implied warranties, including, but not limited to, the implied
     33 // warranties of merchantability and fitness for a particular purpose are disclaimed.
     34 // In no event shall the Intel Corporation or contributors be liable for any direct,
     35 // indirect, incidental, special, exemplary, or consequential damages
     36 // (including, but not limited to, procurement of substitute goods or services;
     37 // loss of use, data, or profits; or business interruption) however caused
     38 // and on any theory of liability, whether in contract, strict liability,
     39 // or tort (including negligence or otherwise) arising in any way out of
     40 // the use of this software, even if advised of the possibility of such damage.
     41 //
     42 //M*/
     43 
     44 #pragma once
     45 
     46 #ifndef __OPENCV_CUDEV_GRID_PYR_DOWN_DETAIL_HPP__
     47 #define __OPENCV_CUDEV_GRID_PYR_DOWN_DETAIL_HPP__
     48 
     49 #include "../../common.hpp"
     50 #include "../../util/vec_traits.hpp"
     51 #include "../../util/saturate_cast.hpp"
     52 #include "../../util/type_traits.hpp"
     53 #include "../../ptr2d/glob.hpp"
     54 #include "../../ptr2d/traits.hpp"
     55 
     56 namespace cv { namespace cudev {
     57 
     58 namespace pyramids_detail
     59 {
     60     template <class Brd, class SrcPtr, typename DstType>
     61     __global__ void pyrDown(const SrcPtr src, GlobPtr<DstType> dst, const int src_rows, const int src_cols, const int dst_cols)
     62     {
     63         typedef typename PtrTraits<SrcPtr>::value_type src_type;
     64         typedef typename VecTraits<src_type>::elem_type src_elem_type;
     65         typedef typename LargerType<float, src_elem_type>::type work_elem_type;
     66         typedef typename MakeVec<work_elem_type, VecTraits<src_type>::cn>::type work_type;
     67 
     68         __shared__ work_type smem[256 + 4];
     69 
     70         const int x = blockIdx.x * blockDim.x + threadIdx.x;
     71         const int y = blockIdx.y;
     72 
     73         const int src_y = 2 * y;
     74 
     75         if (src_y >= 2 && src_y < src_rows - 2 && x >= 2 && x < src_cols - 2)
     76         {
     77             {
     78                 work_type sum;
     79 
     80                 sum =       0.0625f * src(src_y - 2, x);
     81                 sum = sum + 0.25f   * src(src_y - 1, x);
     82                 sum = sum + 0.375f  * src(src_y    , x);
     83                 sum = sum + 0.25f   * src(src_y + 1, x);
     84                 sum = sum + 0.0625f * src(src_y + 2, x);
     85 
     86                 smem[2 + threadIdx.x] = sum;
     87             }
     88 
     89             if (threadIdx.x < 2)
     90             {
     91                 const int left_x = x - 2;
     92 
     93                 work_type sum;
     94 
     95                 sum =       0.0625f * src(src_y - 2, left_x);
     96                 sum = sum + 0.25f   * src(src_y - 1, left_x);
     97                 sum = sum + 0.375f  * src(src_y    , left_x);
     98                 sum = sum + 0.25f   * src(src_y + 1, left_x);
     99                 sum = sum + 0.0625f * src(src_y + 2, left_x);
    100 
    101                 smem[threadIdx.x] = sum;
    102             }
    103 
    104             if (threadIdx.x > 253)
    105             {
    106                 const int right_x = x + 2;
    107 
    108                 work_type sum;
    109 
    110                 sum =       0.0625f * src(src_y - 2, right_x);
    111                 sum = sum + 0.25f   * src(src_y - 1, right_x);
    112                 sum = sum + 0.375f  * src(src_y    , right_x);
    113                 sum = sum + 0.25f   * src(src_y + 1, right_x);
    114                 sum = sum + 0.0625f * src(src_y + 2, right_x);
    115 
    116                 smem[4 + threadIdx.x] = sum;
    117             }
    118         }
    119         else
    120         {
    121             {
    122                 work_type sum;
    123 
    124                 sum =       0.0625f * src(Brd::idx_low(src_y - 2, src_rows) , Brd::idx_high(x, src_cols));
    125                 sum = sum + 0.25f   * src(Brd::idx_low(src_y - 1, src_rows) , Brd::idx_high(x, src_cols));
    126                 sum = sum + 0.375f  * src(src_y                             , Brd::idx_high(x, src_cols));
    127                 sum = sum + 0.25f   * src(Brd::idx_high(src_y + 1, src_rows), Brd::idx_high(x, src_cols));
    128                 sum = sum + 0.0625f * src(Brd::idx_high(src_y + 2, src_rows), Brd::idx_high(x, src_cols));
    129 
    130                 smem[2 + threadIdx.x] = sum;
    131             }
    132 
    133             if (threadIdx.x < 2)
    134             {
    135                 const int left_x = x - 2;
    136 
    137                 work_type sum;
    138 
    139                 sum =       0.0625f * src(Brd::idx_low(src_y - 2, src_rows) , Brd::idx_low(Brd::idx_high(left_x, src_cols), src_cols));
    140                 sum = sum + 0.25f   * src(Brd::idx_low(src_y - 1, src_rows) , Brd::idx_low(Brd::idx_high(left_x, src_cols), src_cols));
    141                 sum = sum + 0.375f  * src(src_y                             , Brd::idx_low(Brd::idx_high(left_x, src_cols), src_cols));
    142                 sum = sum + 0.25f   * src(Brd::idx_high(src_y + 1, src_rows), Brd::idx_low(Brd::idx_high(left_x, src_cols), src_cols));
    143                 sum = sum + 0.0625f * src(Brd::idx_high(src_y + 2, src_rows), Brd::idx_low(Brd::idx_high(left_x, src_cols), src_cols));
    144 
    145                 smem[threadIdx.x] = sum;
    146             }
    147 
    148             if (threadIdx.x > 253)
    149             {
    150                 const int right_x = x + 2;
    151 
    152                 work_type sum;
    153 
    154                 sum =       0.0625f * src(Brd::idx_low(src_y - 2, src_rows) , Brd::idx_high(right_x, src_cols));
    155                 sum = sum + 0.25f   * src(Brd::idx_low(src_y - 1, src_rows) , Brd::idx_high(right_x, src_cols));
    156                 sum = sum + 0.375f  * src(src_y                             , Brd::idx_high(right_x, src_cols));
    157                 sum = sum + 0.25f   * src(Brd::idx_high(src_y + 1, src_rows), Brd::idx_high(right_x, src_cols));
    158                 sum = sum + 0.0625f * src(Brd::idx_high(src_y + 2, src_rows), Brd::idx_high(right_x, src_cols));
    159 
    160                 smem[4 + threadIdx.x] = sum;
    161             }
    162         }
    163 
    164         __syncthreads();
    165 
    166         if (threadIdx.x < 128)
    167         {
    168             const int tid2 = threadIdx.x * 2;
    169 
    170             work_type sum;
    171 
    172             sum =       0.0625f * smem[2 + tid2 - 2];
    173             sum = sum + 0.25f   * smem[2 + tid2 - 1];
    174             sum = sum + 0.375f  * smem[2 + tid2    ];
    175             sum = sum + 0.25f   * smem[2 + tid2 + 1];
    176             sum = sum + 0.0625f * smem[2 + tid2 + 2];
    177 
    178             const int dst_x = (blockIdx.x * blockDim.x + tid2) / 2;
    179 
    180             if (dst_x < dst_cols)
    181                 dst(y, dst_x) = saturate_cast<DstType>(sum);
    182         }
    183     }
    184 
    185     template <class Brd, class SrcPtr, typename DstType>
    186     __host__ void pyrDown(const SrcPtr& src, const GlobPtr<DstType>& dst, int src_rows, int src_cols, int dst_rows, int dst_cols, cudaStream_t stream)
    187     {
    188         const dim3 block(256);
    189         const dim3 grid(divUp(src_cols, block.x), dst_rows);
    190 
    191         pyrDown<Brd><<<grid, block, 0, stream>>>(src, dst, src_rows, src_cols, dst_cols);
    192         CV_CUDEV_SAFE_CALL( cudaGetLastError() );
    193 
    194         if (stream == 0)
    195             CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() );
    196     }
    197 }
    198 
    199 }}
    200 
    201 #endif
    202