1 /*M/////////////////////////////////////////////////////////////////////////////////////// 2 // 3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 4 // 5 // By downloading, copying, installing or using the software you agree to this license. 6 // If you do not agree to this license, do not download, install, 7 // copy or use the software. 8 // 9 // 10 // License Agreement 11 // For Open Source Computer Vision Library 12 // 13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. 14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved. 15 // Third party copyrights are property of their respective owners. 16 // 17 // Redistribution and use in source and binary forms, with or without modification, 18 // are permitted provided that the following conditions are met: 19 // 20 // * Redistribution's of source code must retain the above copyright notice, 21 // this list of conditions and the following disclaimer. 22 // 23 // * Redistribution's in binary form must reproduce the above copyright notice, 24 // this list of conditions and the following disclaimer in the documentation 25 // and/or other materials provided with the distribution. 26 // 27 // * The name of the copyright holders may not be used to endorse or promote products 28 // derived from this software without specific prior written permission. 29 // 30 // This software is provided by the copyright holders and contributors "as is" and 31 // any express or implied warranties, including, but not limited to, the implied 32 // warranties of merchantability and fitness for a particular purpose are disclaimed. 33 // In no event shall the Intel Corporation or contributors be liable for any direct, 34 // indirect, incidental, special, exemplary, or consequential damages 35 // (including, but not limited to, procurement of substitute goods or services; 36 // loss of use, data, or profits; or business interruption) however caused 37 // and on any theory of liability, whether in contract, strict liability, 38 // or tort (including negligence or otherwise) arising in any way out of 39 // the use of this software, even if advised of the possibility of such damage. 40 // 41 //M*/ 42 43 #if !defined CUDA_DISABLER 44 45 #include "opencv2/core/cuda/common.hpp" 46 #include "opencv2/core/cuda/border_interpolate.hpp" 47 #include "opencv2/core/cuda/vec_traits.hpp" 48 #include "opencv2/core/cuda/vec_math.hpp" 49 #include "opencv2/core/cuda/saturate_cast.hpp" 50 51 namespace cv { namespace cuda { namespace device 52 { 53 namespace imgproc 54 { 55 template <typename T> __global__ void pyrUp(const PtrStepSz<T> src, PtrStepSz<T> dst) 56 { 57 typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type sum_t; 58 59 const int x = blockIdx.x * blockDim.x + threadIdx.x; 60 const int y = blockIdx.y * blockDim.y + threadIdx.y; 61 62 __shared__ sum_t s_srcPatch[10][10]; 63 __shared__ sum_t s_dstPatch[20][16]; 64 65 if (threadIdx.x < 10 && threadIdx.y < 10) 66 { 67 int srcx = static_cast<int>((blockIdx.x * blockDim.x) / 2 + threadIdx.x) - 1; 68 int srcy = static_cast<int>((blockIdx.y * blockDim.y) / 2 + threadIdx.y) - 1; 69 70 srcx = ::abs(srcx); 71 srcx = ::min(src.cols - 1, srcx); 72 73 srcy = ::abs(srcy); 74 srcy = ::min(src.rows - 1, srcy); 75 76 s_srcPatch[threadIdx.y][threadIdx.x] = saturate_cast<sum_t>(src(srcy, srcx)); 77 } 78 79 __syncthreads(); 80 81 sum_t sum = VecTraits<sum_t>::all(0); 82 83 const int evenFlag = static_cast<int>((threadIdx.x & 1) == 0); 84 const int oddFlag = static_cast<int>((threadIdx.x & 1) != 0); 85 const bool eveny = ((threadIdx.y & 1) == 0); 86 const int tidx = threadIdx.x; 87 88 if (eveny) 89 { 90 sum = sum + (evenFlag * 0.0625f) * s_srcPatch[1 + (threadIdx.y >> 1)][1 + ((tidx - 2) >> 1)]; 91 sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[1 + (threadIdx.y >> 1)][1 + ((tidx - 1) >> 1)]; 92 sum = sum + (evenFlag * 0.375f ) * s_srcPatch[1 + (threadIdx.y >> 1)][1 + ((tidx ) >> 1)]; 93 sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[1 + (threadIdx.y >> 1)][1 + ((tidx + 1) >> 1)]; 94 sum = sum + (evenFlag * 0.0625f) * s_srcPatch[1 + (threadIdx.y >> 1)][1 + ((tidx + 2) >> 1)]; 95 } 96 97 s_dstPatch[2 + threadIdx.y][threadIdx.x] = sum; 98 99 if (threadIdx.y < 2) 100 { 101 sum = VecTraits<sum_t>::all(0); 102 103 if (eveny) 104 { 105 sum = sum + (evenFlag * 0.0625f) * s_srcPatch[0][1 + ((tidx - 2) >> 1)]; 106 sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[0][1 + ((tidx - 1) >> 1)]; 107 sum = sum + (evenFlag * 0.375f ) * s_srcPatch[0][1 + ((tidx ) >> 1)]; 108 sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[0][1 + ((tidx + 1) >> 1)]; 109 sum = sum + (evenFlag * 0.0625f) * s_srcPatch[0][1 + ((tidx + 2) >> 1)]; 110 } 111 112 s_dstPatch[threadIdx.y][threadIdx.x] = sum; 113 } 114 115 if (threadIdx.y > 13) 116 { 117 sum = VecTraits<sum_t>::all(0); 118 119 if (eveny) 120 { 121 sum = sum + (evenFlag * 0.0625f) * s_srcPatch[9][1 + ((tidx - 2) >> 1)]; 122 sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[9][1 + ((tidx - 1) >> 1)]; 123 sum = sum + (evenFlag * 0.375f ) * s_srcPatch[9][1 + ((tidx ) >> 1)]; 124 sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[9][1 + ((tidx + 1) >> 1)]; 125 sum = sum + (evenFlag * 0.0625f) * s_srcPatch[9][1 + ((tidx + 2) >> 1)]; 126 } 127 128 s_dstPatch[4 + threadIdx.y][threadIdx.x] = sum; 129 } 130 131 __syncthreads(); 132 133 sum = VecTraits<sum_t>::all(0); 134 135 const int tidy = threadIdx.y; 136 137 sum = sum + 0.0625f * s_dstPatch[2 + tidy - 2][threadIdx.x]; 138 sum = sum + 0.25f * s_dstPatch[2 + tidy - 1][threadIdx.x]; 139 sum = sum + 0.375f * s_dstPatch[2 + tidy ][threadIdx.x]; 140 sum = sum + 0.25f * s_dstPatch[2 + tidy + 1][threadIdx.x]; 141 sum = sum + 0.0625f * s_dstPatch[2 + tidy + 2][threadIdx.x]; 142 143 if (x < dst.cols && y < dst.rows) 144 dst(y, x) = saturate_cast<T>(4.0f * sum); 145 } 146 147 template <typename T> void pyrUp_caller(PtrStepSz<T> src, PtrStepSz<T> dst, cudaStream_t stream) 148 { 149 const dim3 block(16, 16); 150 const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); 151 152 pyrUp<<<grid, block, 0, stream>>>(src, dst); 153 cudaSafeCall( cudaGetLastError() ); 154 155 if (stream == 0) 156 cudaSafeCall( cudaDeviceSynchronize() ); 157 } 158 159 template <typename T> void pyrUp_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream) 160 { 161 pyrUp_caller<T>(static_cast< PtrStepSz<T> >(src), static_cast< PtrStepSz<T> >(dst), stream); 162 } 163 164 template void pyrUp_gpu<uchar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); 165 //template void pyrUp_gpu<uchar2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); 166 template void pyrUp_gpu<uchar3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); 167 template void pyrUp_gpu<uchar4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); 168 169 //template void pyrUp_gpu<schar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); 170 //template void pyrUp_gpu<char2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); 171 //template void pyrUp_gpu<char3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); 172 //template void pyrUp_gpu<char4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); 173 174 template void pyrUp_gpu<ushort>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); 175 //template void pyrUp_gpu<ushort2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); 176 template void pyrUp_gpu<ushort3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); 177 template void pyrUp_gpu<ushort4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); 178 179 template void pyrUp_gpu<short>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); 180 //template void pyrUp_gpu<short2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); 181 template void pyrUp_gpu<short3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); 182 template void pyrUp_gpu<short4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); 183 184 //template void pyrUp_gpu<int>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); 185 //template void pyrUp_gpu<int2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); 186 //template void pyrUp_gpu<int3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); 187 //template void pyrUp_gpu<int4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); 188 189 template void pyrUp_gpu<float>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); 190 //template void pyrUp_gpu<float2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); 191 template void pyrUp_gpu<float3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); 192 template void pyrUp_gpu<float4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); 193 } // namespace imgproc 194 }}} // namespace cv { namespace cuda { namespace cudev 195 196 #endif /* CUDA_DISABLER */ 197