1 /*M/////////////////////////////////////////////////////////////////////////////////////// 2 // 3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 4 // 5 // By downloading, copying, installing or using the software you agree to this license. 6 // If you do not agree to this license, do not download, install, 7 // copy or use the software. 8 // 9 // 10 // License Agreement 11 // For Open Source Computer Vision Library 12 // 13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. 14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved. 15 // Third party copyrights are property of their respective owners. 16 // 17 // Redistribution and use in source and binary forms, with or without modification, 18 // are permitted provided that the following conditions are met: 19 // 20 // * Redistribution's of source code must retain the above copyright notice, 21 // this list of conditions and the following disclaimer. 22 // 23 // * Redistribution's in binary form must reproduce the above copyright notice, 24 // this list of conditions and the following disclaimer in the documentation 25 // and/or other materials provided with the distribution. 26 // 27 // * The name of the copyright holders may not be used to endorse or promote products 28 // derived from this software without specific prior written permission. 29 // 30 // This software is provided by the copyright holders and contributors "as is" and 31 // any express or implied warranties, including, but not limited to, the implied 32 // warranties of merchantability and fitness for a particular purpose are disclaimed. 33 // In no event shall the Intel Corporation or contributors be liable for any direct, 34 // indirect, incidental, special, exemplary, or consequential damages 35 // (including, but not limited to, procurement of substitute goods or services; 36 // loss of use, data, or profits; or business interruption) however caused 37 // and on any theory of liability, whether in contract, strict liability, 38 // or tort (including negligence or otherwise) arising in any way out of 39 // the use of this software, even if advised of the possibility of such damage. 40 // 41 //M*/ 42 43 #if !defined CUDA_DISABLER 44 45 #include "opencv2/core/cuda/common.hpp" 46 #include "opencv2/core/cuda/border_interpolate.hpp" 47 #include "opencv2/core/cuda/vec_traits.hpp" 48 #include "opencv2/core/cuda/vec_math.hpp" 49 #include "opencv2/core/cuda/saturate_cast.hpp" 50 51 namespace cv { namespace cuda { namespace device 52 { 53 namespace imgproc 54 { 55 template <typename T, typename B> __global__ void pyrDown(const PtrStepSz<T> src, PtrStep<T> dst, const B b, int dst_cols) 56 { 57 typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_t; 58 59 __shared__ work_t smem[256 + 4]; 60 61 const int x = blockIdx.x * blockDim.x + threadIdx.x; 62 const int y = blockIdx.y; 63 64 const int src_y = 2 * y; 65 66 if (src_y >= 2 && src_y < src.rows - 2 && x >= 2 && x < src.cols - 2) 67 { 68 { 69 work_t sum; 70 71 sum = 0.0625f * src(src_y - 2, x); 72 sum = sum + 0.25f * src(src_y - 1, x); 73 sum = sum + 0.375f * src(src_y , x); 74 sum = sum + 0.25f * src(src_y + 1, x); 75 sum = sum + 0.0625f * src(src_y + 2, x); 76 77 smem[2 + threadIdx.x] = sum; 78 } 79 80 if (threadIdx.x < 2) 81 { 82 const int left_x = x - 2; 83 84 work_t sum; 85 86 sum = 0.0625f * src(src_y - 2, left_x); 87 sum = sum + 0.25f * src(src_y - 1, left_x); 88 sum = sum + 0.375f * src(src_y , left_x); 89 sum = sum + 0.25f * src(src_y + 1, left_x); 90 sum = sum + 0.0625f * src(src_y + 2, left_x); 91 92 smem[threadIdx.x] = sum; 93 } 94 95 if (threadIdx.x > 253) 96 { 97 const int right_x = x + 2; 98 99 work_t sum; 100 101 sum = 0.0625f * src(src_y - 2, right_x); 102 sum = sum + 0.25f * src(src_y - 1, right_x); 103 sum = sum + 0.375f * src(src_y , right_x); 104 sum = sum + 0.25f * src(src_y + 1, right_x); 105 sum = sum + 0.0625f * src(src_y + 2, right_x); 106 107 smem[4 + threadIdx.x] = sum; 108 } 109 } 110 else 111 { 112 { 113 work_t sum; 114 115 sum = 0.0625f * src(b.idx_row_low (src_y - 2), b.idx_col_high(x)); 116 sum = sum + 0.25f * src(b.idx_row_low (src_y - 1), b.idx_col_high(x)); 117 sum = sum + 0.375f * src(src_y , b.idx_col_high(x)); 118 sum = sum + 0.25f * src(b.idx_row_high(src_y + 1), b.idx_col_high(x)); 119 sum = sum + 0.0625f * src(b.idx_row_high(src_y + 2), b.idx_col_high(x)); 120 121 smem[2 + threadIdx.x] = sum; 122 } 123 124 if (threadIdx.x < 2) 125 { 126 const int left_x = x - 2; 127 128 work_t sum; 129 130 sum = 0.0625f * src(b.idx_row_low (src_y - 2), b.idx_col(left_x)); 131 sum = sum + 0.25f * src(b.idx_row_low (src_y - 1), b.idx_col(left_x)); 132 sum = sum + 0.375f * src(src_y , b.idx_col(left_x)); 133 sum = sum + 0.25f * src(b.idx_row_high(src_y + 1), b.idx_col(left_x)); 134 sum = sum + 0.0625f * src(b.idx_row_high(src_y + 2), b.idx_col(left_x)); 135 136 smem[threadIdx.x] = sum; 137 } 138 139 if (threadIdx.x > 253) 140 { 141 const int right_x = x + 2; 142 143 work_t sum; 144 145 sum = 0.0625f * src(b.idx_row_low (src_y - 2), b.idx_col_high(right_x)); 146 sum = sum + 0.25f * src(b.idx_row_low (src_y - 1), b.idx_col_high(right_x)); 147 sum = sum + 0.375f * src(src_y , b.idx_col_high(right_x)); 148 sum = sum + 0.25f * src(b.idx_row_high(src_y + 1), b.idx_col_high(right_x)); 149 sum = sum + 0.0625f * src(b.idx_row_high(src_y + 2), b.idx_col_high(right_x)); 150 151 smem[4 + threadIdx.x] = sum; 152 } 153 } 154 155 __syncthreads(); 156 157 if (threadIdx.x < 128) 158 { 159 const int tid2 = threadIdx.x * 2; 160 161 work_t sum; 162 163 sum = 0.0625f * smem[2 + tid2 - 2]; 164 sum = sum + 0.25f * smem[2 + tid2 - 1]; 165 sum = sum + 0.375f * smem[2 + tid2 ]; 166 sum = sum + 0.25f * smem[2 + tid2 + 1]; 167 sum = sum + 0.0625f * smem[2 + tid2 + 2]; 168 169 const int dst_x = (blockIdx.x * blockDim.x + tid2) / 2; 170 171 if (dst_x < dst_cols) 172 dst.ptr(y)[dst_x] = saturate_cast<T>(sum); 173 } 174 } 175 176 template <typename T, template <typename> class B> void pyrDown_caller(PtrStepSz<T> src, PtrStepSz<T> dst, cudaStream_t stream) 177 { 178 const dim3 block(256); 179 const dim3 grid(divUp(src.cols, block.x), dst.rows); 180 181 B<T> b(src.rows, src.cols); 182 183 pyrDown<T><<<grid, block, 0, stream>>>(src, dst, b, dst.cols); 184 cudaSafeCall( cudaGetLastError() ); 185 186 if (stream == 0) 187 cudaSafeCall( cudaDeviceSynchronize() ); 188 } 189 190 template <typename T> void pyrDown_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream) 191 { 192 pyrDown_caller<T, BrdReflect101>(static_cast< PtrStepSz<T> >(src), static_cast< PtrStepSz<T> >(dst), stream); 193 } 194 195 template void pyrDown_gpu<uchar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); 196 //template void pyrDown_gpu<uchar2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); 197 template void pyrDown_gpu<uchar3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); 198 template void pyrDown_gpu<uchar4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); 199 200 //template void pyrDown_gpu<schar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); 201 //template void pyrDown_gpu<char2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); 202 //template void pyrDown_gpu<char3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); 203 //template void pyrDown_gpu<char4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); 204 205 template void pyrDown_gpu<ushort>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); 206 //template void pyrDown_gpu<ushort2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); 207 template void pyrDown_gpu<ushort3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); 208 template void pyrDown_gpu<ushort4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); 209 210 template void pyrDown_gpu<short>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); 211 //template void pyrDown_gpu<short2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); 212 template void pyrDown_gpu<short3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); 213 template void pyrDown_gpu<short4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); 214 215 //template void pyrDown_gpu<int>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); 216 //template void pyrDown_gpu<int2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); 217 //template void pyrDown_gpu<int3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); 218 //template void pyrDown_gpu<int4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); 219 220 template void pyrDown_gpu<float>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); 221 //template void pyrDown_gpu<float2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); 222 template void pyrDown_gpu<float3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); 223 template void pyrDown_gpu<float4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); 224 } // namespace imgproc 225 }}} // namespace cv { namespace cuda { namespace cudev 226 227 228 #endif /* CUDA_DISABLER */ 229