1 /*M/////////////////////////////////////////////////////////////////////////////////////// 2 // 3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 4 // 5 // By downloading, copying, installing or using the software you agree to this license. 6 // If you do not agree to this license, do not download, install, 7 // copy or use the software. 8 // 9 // 10 // License Agreement 11 // For Open Source Computer Vision Library 12 // 13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. 14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved. 15 // Third party copyrights are property of their respective owners. 16 // 17 // Redistribution and use in source and binary forms, with or without modification, 18 // are permitted provided that the following conditions are met: 19 // 20 // * Redistribution's of source code must retain the above copyright notice, 21 // this list of conditions and the following disclaimer. 22 // 23 // * Redistribution's in binary form must reproduce the above copyright notice, 24 // this list of conditions and the following disclaimer in the documentation 25 // and/or other materials provided with the distribution. 26 // 27 // * The name of the copyright holders may not be used to endorse or promote products 28 // derived from this software without specific prior written permission. 29 // 30 // This software is provided by the copyright holders and contributors "as is" and 31 // any express or implied warranties, including, but not limited to, the implied 32 // warranties of merchantability and fitness for a particular purpose are disclaimed. 33 // In no event shall the Intel Corporation or contributors be liable for any direct, 34 // indirect, incidental, special, exemplary, or consequential damages 35 // (including, but not limited to, procurement of substitute goods or services; 36 // loss of use, data, or profits; or business interruption) however caused 37 // and on any theory of liability, whether in contract, strict liability, 38 // or tort (including negligence or otherwise) arising in any way out of 39 // the use of this software, even if advised of the possibility of such damage. 40 // 41 //M*/ 42 43 #if !defined CUDA_DISABLER 44 45 #include <cfloat> 46 #include "opencv2/core/cuda/common.hpp" 47 #include "opencv2/core/cuda/border_interpolate.hpp" 48 #include "opencv2/core/cuda/vec_traits.hpp" 49 #include "opencv2/core/cuda/vec_math.hpp" 50 #include "opencv2/core/cuda/saturate_cast.hpp" 51 #include "opencv2/core/cuda/filters.hpp" 52 53 namespace cv { namespace cuda { namespace device 54 { 55 // kernels 56 57 template <typename T> __global__ void resize_nearest(const PtrStep<T> src, PtrStepSz<T> dst, const float fy, const float fx) 58 { 59 const int dst_x = blockDim.x * blockIdx.x + threadIdx.x; 60 const int dst_y = blockDim.y * blockIdx.y + threadIdx.y; 61 62 if (dst_x < dst.cols && dst_y < dst.rows) 63 { 64 const float src_x = dst_x * fx; 65 const float src_y = dst_y * fy; 66 67 dst(dst_y, dst_x) = src(__float2int_rz(src_y), __float2int_rz(src_x)); 68 } 69 } 70 71 template <typename T> __global__ void resize_linear(const PtrStepSz<T> src, PtrStepSz<T> dst, const float fy, const float fx) 72 { 73 typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type; 74 75 const int dst_x = blockDim.x * blockIdx.x + threadIdx.x; 76 const int dst_y = blockDim.y * blockIdx.y + threadIdx.y; 77 78 if (dst_x < dst.cols && dst_y < dst.rows) 79 { 80 const float src_x = dst_x * fx; 81 const float src_y = dst_y * fy; 82 83 work_type out = VecTraits<work_type>::all(0); 84 85 const int x1 = __float2int_rd(src_x); 86 const int y1 = __float2int_rd(src_y); 87 const int x2 = x1 + 1; 88 const int y2 = y1 + 1; 89 const int x2_read = ::min(x2, src.cols - 1); 90 const int y2_read = ::min(y2, src.rows - 1); 91 92 T src_reg = src(y1, x1); 93 out = out + src_reg * ((x2 - src_x) * (y2 - src_y)); 94 95 src_reg = src(y1, x2_read); 96 out = out + src_reg * ((src_x - x1) * (y2 - src_y)); 97 98 src_reg = src(y2_read, x1); 99 out = out + src_reg * ((x2 - src_x) * (src_y - y1)); 100 101 src_reg = src(y2_read, x2_read); 102 out = out + src_reg * ((src_x - x1) * (src_y - y1)); 103 104 dst(dst_y, dst_x) = saturate_cast<T>(out); 105 } 106 } 107 108 template <class Ptr2D, typename T> __global__ void resize(const Ptr2D src, PtrStepSz<T> dst, const float fy, const float fx) 109 { 110 const int dst_x = blockDim.x * blockIdx.x + threadIdx.x; 111 const int dst_y = blockDim.y * blockIdx.y + threadIdx.y; 112 113 if (dst_x < dst.cols && dst_y < dst.rows) 114 { 115 const float src_x = dst_x * fx; 116 const float src_y = dst_y * fy; 117 118 dst(dst_y, dst_x) = src(src_y, src_x); 119 } 120 } 121 122 template <typename Ptr2D, typename T> __global__ void resize_area(const Ptr2D src, PtrStepSz<T> dst) 123 { 124 const int x = blockDim.x * blockIdx.x + threadIdx.x; 125 const int y = blockDim.y * blockIdx.y + threadIdx.y; 126 127 if (x < dst.cols && y < dst.rows) 128 { 129 dst(y, x) = src(y, x); 130 } 131 } 132 133 // textures 134 135 template <typename T> struct TextureAccessor; 136 137 #define OPENCV_CUDA_IMPLEMENT_RESIZE_TEX(type) \ 138 texture<type, cudaTextureType2D, cudaReadModeElementType> tex_resize_##type (0, cudaFilterModePoint, cudaAddressModeClamp); \ 139 template <> struct TextureAccessor<type> \ 140 { \ 141 typedef type elem_type; \ 142 typedef int index_type; \ 143 int xoff; \ 144 int yoff; \ 145 __device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const \ 146 { \ 147 return tex2D(tex_resize_##type, x + xoff, y + yoff); \ 148 } \ 149 __host__ static void bind(const PtrStepSz<type>& mat) \ 150 { \ 151 bindTexture(&tex_resize_##type, mat); \ 152 } \ 153 }; 154 155 OPENCV_CUDA_IMPLEMENT_RESIZE_TEX(uchar) 156 OPENCV_CUDA_IMPLEMENT_RESIZE_TEX(uchar4) 157 158 OPENCV_CUDA_IMPLEMENT_RESIZE_TEX(ushort) 159 OPENCV_CUDA_IMPLEMENT_RESIZE_TEX(ushort4) 160 161 OPENCV_CUDA_IMPLEMENT_RESIZE_TEX(short) 162 OPENCV_CUDA_IMPLEMENT_RESIZE_TEX(short4) 163 164 OPENCV_CUDA_IMPLEMENT_RESIZE_TEX(float) 165 OPENCV_CUDA_IMPLEMENT_RESIZE_TEX(float4) 166 167 #undef OPENCV_CUDA_IMPLEMENT_RESIZE_TEX 168 169 template <typename T> 170 TextureAccessor<T> texAccessor(const PtrStepSz<T>& mat, int yoff, int xoff) 171 { 172 TextureAccessor<T>::bind(mat); 173 174 TextureAccessor<T> t; 175 t.xoff = xoff; 176 t.yoff = yoff; 177 178 return t; 179 } 180 181 // callers for nearest interpolation 182 183 template <typename T> 184 void call_resize_nearest_glob(const PtrStepSz<T>& src, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream) 185 { 186 const dim3 block(32, 8); 187 const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); 188 189 resize_nearest<<<grid, block, 0, stream>>>(src, dst, fy, fx); 190 cudaSafeCall( cudaGetLastError() ); 191 192 if (stream == 0) 193 cudaSafeCall( cudaDeviceSynchronize() ); 194 } 195 196 template <typename T> 197 void call_resize_nearest_tex(const PtrStepSz<T>& /*src*/, const PtrStepSz<T>& srcWhole, int yoff, int xoff, const PtrStepSz<T>& dst, float fy, float fx) 198 { 199 const dim3 block(32, 8); 200 const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); 201 202 resize<<<grid, block>>>(texAccessor(srcWhole, yoff, xoff), dst, fy, fx); 203 cudaSafeCall( cudaGetLastError() ); 204 205 cudaSafeCall( cudaDeviceSynchronize() ); 206 } 207 208 // callers for linear interpolation 209 210 template <typename T> 211 void call_resize_linear_glob(const PtrStepSz<T>& src, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream) 212 { 213 const dim3 block(32, 8); 214 const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); 215 216 resize_linear<<<grid, block, 0, stream>>>(src, dst, fy, fx); 217 cudaSafeCall( cudaGetLastError() ); 218 219 if (stream == 0) 220 cudaSafeCall( cudaDeviceSynchronize() ); 221 } 222 223 template <typename T> 224 void call_resize_linear_tex(const PtrStepSz<T>& src, const PtrStepSz<T>& srcWhole, int yoff, int xoff, const PtrStepSz<T>& dst, float fy, float fx) 225 { 226 const dim3 block(32, 8); 227 const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); 228 229 if (srcWhole.data == src.data) 230 { 231 TextureAccessor<T> texSrc = texAccessor(src, 0, 0); 232 LinearFilter< TextureAccessor<T> > filteredSrc(texSrc); 233 234 resize<<<grid, block>>>(filteredSrc, dst, fy, fx); 235 } 236 else 237 { 238 TextureAccessor<T> texSrc = texAccessor(srcWhole, yoff, xoff); 239 240 BrdReplicate<T> brd(src.rows, src.cols); 241 BorderReader<TextureAccessor<T>, BrdReplicate<T> > brdSrc(texSrc, brd); 242 LinearFilter< BorderReader<TextureAccessor<T>, BrdReplicate<T> > > filteredSrc(brdSrc); 243 244 resize<<<grid, block>>>(filteredSrc, dst, fy, fx); 245 } 246 247 cudaSafeCall( cudaGetLastError() ); 248 249 cudaSafeCall( cudaDeviceSynchronize() ); 250 } 251 252 // callers for cubic interpolation 253 254 template <typename T> 255 void call_resize_cubic_glob(const PtrStepSz<T>& src, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream) 256 { 257 const dim3 block(32, 8); 258 const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); 259 260 BrdReplicate<T> brd(src.rows, src.cols); 261 BorderReader< PtrStep<T>, BrdReplicate<T> > brdSrc(src, brd); 262 CubicFilter< BorderReader< PtrStep<T>, BrdReplicate<T> > > filteredSrc(brdSrc); 263 264 resize<<<grid, block, 0, stream>>>(filteredSrc, dst, fy, fx); 265 cudaSafeCall( cudaGetLastError() ); 266 267 if (stream == 0) 268 cudaSafeCall( cudaDeviceSynchronize() ); 269 } 270 271 template <typename T> 272 void call_resize_cubic_tex(const PtrStepSz<T>& src, const PtrStepSz<T>& srcWhole, int yoff, int xoff, const PtrStepSz<T>& dst, float fy, float fx) 273 { 274 const dim3 block(32, 8); 275 const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); 276 277 if (srcWhole.data == src.data) 278 { 279 TextureAccessor<T> texSrc = texAccessor(src, 0, 0); 280 CubicFilter< TextureAccessor<T> > filteredSrc(texSrc); 281 282 resize<<<grid, block>>>(filteredSrc, dst, fy, fx); 283 } 284 else 285 { 286 TextureAccessor<T> texSrc = texAccessor(srcWhole, yoff, xoff); 287 288 BrdReplicate<T> brd(src.rows, src.cols); 289 BorderReader<TextureAccessor<T>, BrdReplicate<T> > brdSrc(texSrc, brd); 290 CubicFilter< BorderReader<TextureAccessor<T>, BrdReplicate<T> > > filteredSrc(brdSrc); 291 292 resize<<<grid, block>>>(filteredSrc, dst, fy, fx); 293 } 294 295 cudaSafeCall( cudaGetLastError() ); 296 297 cudaSafeCall( cudaDeviceSynchronize() ); 298 } 299 300 // ResizeNearestDispatcher 301 302 template <typename T> struct ResizeNearestDispatcher 303 { 304 static void call(const PtrStepSz<T>& src, const PtrStepSz<T>& /*srcWhole*/, int /*yoff*/, int /*xoff*/, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream) 305 { 306 call_resize_nearest_glob(src, dst, fy, fx, stream); 307 } 308 }; 309 310 template <typename T> struct SelectImplForNearest 311 { 312 static void call(const PtrStepSz<T>& src, const PtrStepSz<T>& srcWhole, int yoff, int xoff, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream) 313 { 314 if (stream) 315 call_resize_nearest_glob(src, dst, fy, fx, stream); 316 else 317 { 318 if (fx > 1 || fy > 1) 319 call_resize_nearest_glob(src, dst, fy, fx, 0); 320 else 321 call_resize_nearest_tex(src, srcWhole, yoff, xoff, dst, fy, fx); 322 } 323 } 324 }; 325 326 template <> struct ResizeNearestDispatcher<uchar> : SelectImplForNearest<uchar> {}; 327 template <> struct ResizeNearestDispatcher<uchar4> : SelectImplForNearest<uchar4> {}; 328 329 template <> struct ResizeNearestDispatcher<ushort> : SelectImplForNearest<ushort> {}; 330 template <> struct ResizeNearestDispatcher<ushort4> : SelectImplForNearest<ushort4> {}; 331 332 template <> struct ResizeNearestDispatcher<short> : SelectImplForNearest<short> {}; 333 template <> struct ResizeNearestDispatcher<short4> : SelectImplForNearest<short4> {}; 334 335 template <> struct ResizeNearestDispatcher<float> : SelectImplForNearest<float> {}; 336 template <> struct ResizeNearestDispatcher<float4> : SelectImplForNearest<float4> {}; 337 338 // ResizeLinearDispatcher 339 340 template <typename T> struct ResizeLinearDispatcher 341 { 342 static void call(const PtrStepSz<T>& src, const PtrStepSz<T>& /*srcWhole*/, int /*yoff*/, int /*xoff*/, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream) 343 { 344 call_resize_linear_glob(src, dst, fy, fx, stream); 345 } 346 }; 347 348 template <typename T> struct SelectImplForLinear 349 { 350 static void call(const PtrStepSz<T>& src, const PtrStepSz<T>& srcWhole, int yoff, int xoff, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream) 351 { 352 if (stream) 353 call_resize_linear_glob(src, dst, fy, fx, stream); 354 else 355 { 356 if (fx > 1 || fy > 1) 357 call_resize_linear_glob(src, dst, fy, fx, 0); 358 else 359 call_resize_linear_tex(src, srcWhole, yoff, xoff, dst, fy, fx); 360 } 361 } 362 }; 363 364 template <> struct ResizeLinearDispatcher<uchar> : SelectImplForLinear<uchar> {}; 365 template <> struct ResizeLinearDispatcher<uchar4> : SelectImplForLinear<uchar4> {}; 366 367 template <> struct ResizeLinearDispatcher<ushort> : SelectImplForLinear<ushort> {}; 368 template <> struct ResizeLinearDispatcher<ushort4> : SelectImplForLinear<ushort4> {}; 369 370 template <> struct ResizeLinearDispatcher<short> : SelectImplForLinear<short> {}; 371 template <> struct ResizeLinearDispatcher<short4> : SelectImplForLinear<short4> {}; 372 373 template <> struct ResizeLinearDispatcher<float> : SelectImplForLinear<float> {}; 374 template <> struct ResizeLinearDispatcher<float4> : SelectImplForLinear<float4> {}; 375 376 // ResizeCubicDispatcher 377 378 template <typename T> struct ResizeCubicDispatcher 379 { 380 static void call(const PtrStepSz<T>& src, const PtrStepSz<T>& /*srcWhole*/, int /*yoff*/, int /*xoff*/, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream) 381 { 382 call_resize_cubic_glob(src, dst, fy, fx, stream); 383 } 384 }; 385 386 template <typename T> struct SelectImplForCubic 387 { 388 static void call(const PtrStepSz<T>& src, const PtrStepSz<T>& srcWhole, int yoff, int xoff, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream) 389 { 390 if (stream) 391 call_resize_cubic_glob(src, dst, fy, fx, stream); 392 else 393 call_resize_cubic_tex(src, srcWhole, yoff, xoff, dst, fy, fx); 394 } 395 }; 396 397 template <> struct ResizeCubicDispatcher<uchar> : SelectImplForCubic<uchar> {}; 398 template <> struct ResizeCubicDispatcher<uchar4> : SelectImplForCubic<uchar4> {}; 399 400 template <> struct ResizeCubicDispatcher<ushort> : SelectImplForCubic<ushort> {}; 401 template <> struct ResizeCubicDispatcher<ushort4> : SelectImplForCubic<ushort4> {}; 402 403 template <> struct ResizeCubicDispatcher<short> : SelectImplForCubic<short> {}; 404 template <> struct ResizeCubicDispatcher<short4> : SelectImplForCubic<short4> {}; 405 406 template <> struct ResizeCubicDispatcher<float> : SelectImplForCubic<float> {}; 407 template <> struct ResizeCubicDispatcher<float4> : SelectImplForCubic<float4> {}; 408 409 // ResizeAreaDispatcher 410 411 template <typename T> struct ResizeAreaDispatcher 412 { 413 static void call(const PtrStepSz<T>& src, const PtrStepSz<T>&, int, int, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream) 414 { 415 const int iscale_x = (int) round(fx); 416 const int iscale_y = (int) round(fy); 417 418 const dim3 block(32, 8); 419 const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); 420 421 if (std::abs(fx - iscale_x) < FLT_MIN && std::abs(fy - iscale_y) < FLT_MIN) 422 { 423 BrdConstant<T> brd(src.rows, src.cols); 424 BorderReader< PtrStep<T>, BrdConstant<T> > brdSrc(src, brd); 425 IntegerAreaFilter< BorderReader< PtrStep<T>, BrdConstant<T> > > filteredSrc(brdSrc, fx, fy); 426 427 resize_area<<<grid, block, 0, stream>>>(filteredSrc, dst); 428 } 429 else 430 { 431 BrdConstant<T> brd(src.rows, src.cols); 432 BorderReader< PtrStep<T>, BrdConstant<T> > brdSrc(src, brd); 433 AreaFilter< BorderReader< PtrStep<T>, BrdConstant<T> > > filteredSrc(brdSrc, fx, fy); 434 435 resize_area<<<grid, block, 0, stream>>>(filteredSrc, dst); 436 } 437 438 cudaSafeCall( cudaGetLastError() ); 439 440 if (stream == 0) 441 cudaSafeCall( cudaDeviceSynchronize() ); 442 } 443 }; 444 445 // resize 446 447 template <typename T> void resize(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream) 448 { 449 typedef void (*func_t)(const PtrStepSz<T>& src, const PtrStepSz<T>& srcWhole, int yoff, int xoff, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream); 450 static const func_t funcs[4] = 451 { 452 ResizeNearestDispatcher<T>::call, 453 ResizeLinearDispatcher<T>::call, 454 ResizeCubicDispatcher<T>::call, 455 ResizeAreaDispatcher<T>::call 456 }; 457 458 // change to linear if area interpolation upscaling 459 if (interpolation == 3 && (fx <= 1.f || fy <= 1.f)) 460 interpolation = 1; 461 462 funcs[interpolation](static_cast< PtrStepSz<T> >(src), static_cast< PtrStepSz<T> >(srcWhole), yoff, xoff, static_cast< PtrStepSz<T> >(dst), fy, fx, stream); 463 } 464 465 template void resize<uchar >(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream); 466 template void resize<uchar3>(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream); 467 template void resize<uchar4>(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream); 468 469 template void resize<ushort >(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream); 470 template void resize<ushort3>(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream); 471 template void resize<ushort4>(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream); 472 473 template void resize<short >(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream); 474 template void resize<short3>(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream); 475 template void resize<short4>(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream); 476 477 template void resize<float >(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream); 478 template void resize<float3>(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream); 479 template void resize<float4>(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream); 480 }}} 481 482 #endif /* CUDA_DISABLER */ 483