Home | History | Annotate | Download | only in cuda
      1 /*M///////////////////////////////////////////////////////////////////////////////////////
      2 //
      3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
      4 //
      5 //  By downloading, copying, installing or using the software you agree to this license.
      6 //  If you do not agree to this license, do not download, install,
      7 //  copy or use the software.
      8 //
      9 //
     10 //                           License Agreement
     11 //                For Open Source Computer Vision Library
     12 //
     13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
     14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
     15 // Third party copyrights are property of their respective owners.
     16 //
     17 // Redistribution and use in source and binary forms, with or without modification,
     18 // are permitted provided that the following conditions are met:
     19 //
     20 //   * Redistribution's of source code must retain the above copyright notice,
     21 //     this list of conditions and the following disclaimer.
     22 //
     23 //   * Redistribution's in binary form must reproduce the above copyright notice,
     24 //     this list of conditions and the following disclaimer in the documentation
     25 //     and/or other materials provided with the distribution.
     26 //
     27 //   * The name of the copyright holders may not be used to endorse or promote products
     28 //     derived from this software without specific prior written permission.
     29 //
     30 // This software is provided by the copyright holders and contributors "as is" and
     31 // any express or implied warranties, including, but not limited to, the implied
     32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
     33 // In no event shall the Intel Corporation or contributors be liable for any direct,
     34 // indirect, incidental, special, exemplary, or consequential damages
     35 // (including, but not limited to, procurement of substitute goods or services;
     36 // loss of use, data, or profits; or business interruption) however caused
     37 // and on any theory of liability, whether in contract, strict liability,
     38 // or tort (including negligence or otherwise) arising in any way out of
     39 // the use of this software, even if advised of the possibility of such damage.
     40 //
     41 //M*/
     42 
     43 #if !defined CUDA_DISABLER
     44 
     45 #include "opencv2/core/cuda/common.hpp"
     46 #include "opencv2/core/cuda/border_interpolate.hpp"
     47 #include "opencv2/core/cuda/vec_traits.hpp"
     48 #include "opencv2/core/cuda/vec_math.hpp"
     49 #include "opencv2/core/cuda/saturate_cast.hpp"
     50 #include "opencv2/core/cuda/filters.hpp"
     51 
     52 namespace cv { namespace cuda { namespace device
     53 {
     54     namespace imgproc
     55     {
     56         __constant__ float c_warpMat[3 * 3];
     57 
     58         struct AffineTransform
     59         {
     60             static __device__ __forceinline__ float2 calcCoord(int x, int y)
     61             {
     62                 const float xcoo = c_warpMat[0] * x + c_warpMat[1] * y + c_warpMat[2];
     63                 const float ycoo = c_warpMat[3] * x + c_warpMat[4] * y + c_warpMat[5];
     64 
     65                 return make_float2(xcoo, ycoo);
     66             }
     67         };
     68 
     69         struct PerspectiveTransform
     70         {
     71             static __device__ __forceinline__ float2 calcCoord(int x, int y)
     72             {
     73                 const float coeff = 1.0f / (c_warpMat[6] * x + c_warpMat[7] * y + c_warpMat[8]);
     74 
     75                 const float xcoo = coeff * (c_warpMat[0] * x + c_warpMat[1] * y + c_warpMat[2]);
     76                 const float ycoo = coeff * (c_warpMat[3] * x + c_warpMat[4] * y + c_warpMat[5]);
     77 
     78                 return make_float2(xcoo, ycoo);
     79             }
     80         };
     81 
     82         ///////////////////////////////////////////////////////////////////
     83         // Build Maps
     84 
     85         template <class Transform> __global__ void buildWarpMaps(PtrStepSzf xmap, PtrStepf ymap)
     86         {
     87             const int x = blockDim.x * blockIdx.x + threadIdx.x;
     88             const int y = blockDim.y * blockIdx.y + threadIdx.y;
     89 
     90             if (x < xmap.cols && y < xmap.rows)
     91             {
     92                 const float2 coord = Transform::calcCoord(x, y);
     93 
     94                 xmap(y, x) = coord.x;
     95                 ymap(y, x) = coord.y;
     96             }
     97         }
     98 
     99         template <class Transform> void buildWarpMaps_caller(PtrStepSzf xmap, PtrStepSzf ymap, cudaStream_t stream)
    100         {
    101             dim3 block(32, 8);
    102             dim3 grid(divUp(xmap.cols, block.x), divUp(xmap.rows, block.y));
    103 
    104             buildWarpMaps<Transform><<<grid, block, 0, stream>>>(xmap, ymap);
    105             cudaSafeCall( cudaGetLastError() );
    106 
    107             if (stream == 0)
    108                 cudaSafeCall( cudaDeviceSynchronize() );
    109         }
    110 
    111         void buildWarpAffineMaps_gpu(float coeffs[2 * 3], PtrStepSzf xmap, PtrStepSzf ymap, cudaStream_t stream)
    112         {
    113             cudaSafeCall( cudaMemcpyToSymbol(c_warpMat, coeffs, 2 * 3 * sizeof(float)) );
    114 
    115             buildWarpMaps_caller<AffineTransform>(xmap, ymap, stream);
    116         }
    117 
    118         void buildWarpPerspectiveMaps_gpu(float coeffs[3 * 3], PtrStepSzf xmap, PtrStepSzf ymap, cudaStream_t stream)
    119         {
    120             cudaSafeCall( cudaMemcpyToSymbol(c_warpMat, coeffs, 3 * 3 * sizeof(float)) );
    121 
    122             buildWarpMaps_caller<PerspectiveTransform>(xmap, ymap, stream);
    123         }
    124 
    125         ///////////////////////////////////////////////////////////////////
    126         // Warp
    127 
    128         template <class Transform, class Ptr2D, typename T> __global__ void warp(const Ptr2D src, PtrStepSz<T> dst)
    129         {
    130             const int x = blockDim.x * blockIdx.x + threadIdx.x;
    131             const int y = blockDim.y * blockIdx.y + threadIdx.y;
    132 
    133             if (x < dst.cols && y < dst.rows)
    134             {
    135                 const float2 coord = Transform::calcCoord(x, y);
    136 
    137                 dst.ptr(y)[x] = saturate_cast<T>(src(coord.y, coord.x));
    138             }
    139         }
    140 
    141         template <class Transform, template <typename> class Filter, template <typename> class B, typename T> struct WarpDispatcherStream
    142         {
    143             static void call(PtrStepSz<T> src, PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, bool)
    144             {
    145                 typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type;
    146 
    147                 dim3 block(32, 8);
    148                 dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
    149 
    150                 B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue));
    151                 BorderReader< PtrStep<T>, B<work_type> > brdSrc(src, brd);
    152                 Filter< BorderReader< PtrStep<T>, B<work_type> > > filter_src(brdSrc);
    153 
    154                 warp<Transform><<<grid, block, 0, stream>>>(filter_src, dst);
    155                 cudaSafeCall( cudaGetLastError() );
    156             }
    157         };
    158 
    159         template <class Transform, template <typename> class Filter, template <typename> class B, typename T> struct WarpDispatcherNonStream
    160         {
    161             static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSz<T> dst, const float* borderValue, bool)
    162             {
    163                 (void)xoff;
    164                 (void)yoff;
    165                 (void)srcWhole;
    166 
    167                 typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type;
    168 
    169                 dim3 block(32, 8);
    170                 dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
    171 
    172                 B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue));
    173                 BorderReader< PtrStep<T>, B<work_type> > brdSrc(src, brd);
    174                 Filter< BorderReader< PtrStep<T>, B<work_type> > > filter_src(brdSrc);
    175 
    176                 warp<Transform><<<grid, block>>>(filter_src, dst);
    177                 cudaSafeCall( cudaGetLastError() );
    178 
    179                 cudaSafeCall( cudaDeviceSynchronize() );
    180             }
    181         };
    182 
    183         #define OPENCV_CUDA_IMPLEMENT_WARP_TEX(type) \
    184             texture< type , cudaTextureType2D > tex_warp_ ## type (0, cudaFilterModePoint, cudaAddressModeClamp); \
    185             struct tex_warp_ ## type ## _reader \
    186             { \
    187                 typedef type elem_type; \
    188                 typedef int index_type; \
    189                 int xoff, yoff; \
    190                 tex_warp_ ## type ## _reader (int xoff_, int yoff_) : xoff(xoff_), yoff(yoff_) {} \
    191                 __device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const \
    192                 { \
    193                     return tex2D(tex_warp_ ## type , x + xoff, y + yoff); \
    194                 } \
    195             }; \
    196             template <class Transform, template <typename> class Filter, template <typename> class B> struct WarpDispatcherNonStream<Transform, Filter, B, type> \
    197             { \
    198                 static void call(PtrStepSz< type > src, PtrStepSz< type > srcWhole, int xoff, int yoff, PtrStepSz< type > dst, const float* borderValue, bool cc20) \
    199                 { \
    200                     typedef typename TypeVec<float, VecTraits< type >::cn>::vec_type work_type; \
    201                     dim3 block(32, cc20 ? 8 : 4); \
    202                     dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
    203                     bindTexture(&tex_warp_ ## type , srcWhole); \
    204                     tex_warp_ ## type ##_reader texSrc(xoff, yoff); \
    205                     B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue)); \
    206                     BorderReader< tex_warp_ ## type ##_reader, B<work_type> > brdSrc(texSrc, brd); \
    207                     Filter< BorderReader< tex_warp_ ## type ##_reader, B<work_type> > > filter_src(brdSrc); \
    208                     warp<Transform><<<grid, block>>>(filter_src, dst); \
    209                     cudaSafeCall( cudaGetLastError() ); \
    210                     cudaSafeCall( cudaDeviceSynchronize() ); \
    211                 } \
    212             }; \
    213             template <class Transform, template <typename> class Filter> struct WarpDispatcherNonStream<Transform, Filter, BrdReplicate, type> \
    214             { \
    215                 static void call(PtrStepSz< type > src, PtrStepSz< type > srcWhole, int xoff, int yoff, PtrStepSz< type > dst, const float*, bool) \
    216                 { \
    217                     dim3 block(32, 8); \
    218                     dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
    219                     bindTexture(&tex_warp_ ## type , srcWhole); \
    220                     tex_warp_ ## type ##_reader texSrc(xoff, yoff); \
    221                     if (srcWhole.cols == src.cols && srcWhole.rows == src.rows) \
    222                     { \
    223                         Filter< tex_warp_ ## type ##_reader > filter_src(texSrc); \
    224                         warp<Transform><<<grid, block>>>(filter_src, dst); \
    225                     } \
    226                     else \
    227                     { \
    228                         BrdReplicate<type> brd(src.rows, src.cols); \
    229                         BorderReader< tex_warp_ ## type ##_reader, BrdReplicate<type> > brdSrc(texSrc, brd); \
    230                         Filter< BorderReader< tex_warp_ ## type ##_reader, BrdReplicate<type> > > filter_src(brdSrc); \
    231                         warp<Transform><<<grid, block>>>(filter_src, dst); \
    232                     } \
    233                     cudaSafeCall( cudaGetLastError() ); \
    234                     cudaSafeCall( cudaDeviceSynchronize() ); \
    235                 } \
    236             };
    237 
    238         OPENCV_CUDA_IMPLEMENT_WARP_TEX(uchar)
    239         //OPENCV_CUDA_IMPLEMENT_WARP_TEX(uchar2)
    240         OPENCV_CUDA_IMPLEMENT_WARP_TEX(uchar4)
    241 
    242         //OPENCV_CUDA_IMPLEMENT_WARP_TEX(schar)
    243         //OPENCV_CUDA_IMPLEMENT_WARP_TEX(char2)
    244         //OPENCV_CUDA_IMPLEMENT_WARP_TEX(char4)
    245 
    246         OPENCV_CUDA_IMPLEMENT_WARP_TEX(ushort)
    247         //OPENCV_CUDA_IMPLEMENT_WARP_TEX(ushort2)
    248         OPENCV_CUDA_IMPLEMENT_WARP_TEX(ushort4)
    249 
    250         OPENCV_CUDA_IMPLEMENT_WARP_TEX(short)
    251         //OPENCV_CUDA_IMPLEMENT_WARP_TEX(short2)
    252         OPENCV_CUDA_IMPLEMENT_WARP_TEX(short4)
    253 
    254         //OPENCV_CUDA_IMPLEMENT_WARP_TEX(int)
    255         //OPENCV_CUDA_IMPLEMENT_WARP_TEX(int2)
    256         //OPENCV_CUDA_IMPLEMENT_WARP_TEX(int4)
    257 
    258         OPENCV_CUDA_IMPLEMENT_WARP_TEX(float)
    259         //OPENCV_CUDA_IMPLEMENT_WARP_TEX(float2)
    260         OPENCV_CUDA_IMPLEMENT_WARP_TEX(float4)
    261 
    262         #undef OPENCV_CUDA_IMPLEMENT_WARP_TEX
    263 
    264         template <class Transform, template <typename> class Filter, template <typename> class B, typename T> struct WarpDispatcher
    265         {
    266             static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, bool cc20)
    267             {
    268                 if (stream == 0)
    269                     WarpDispatcherNonStream<Transform, Filter, B, T>::call(src, srcWhole, xoff, yoff, dst, borderValue, cc20);
    270                 else
    271                     WarpDispatcherStream<Transform, Filter, B, T>::call(src, dst, borderValue, stream, cc20);
    272             }
    273         };
    274 
    275         template <class Transform, typename T>
    276         void warp_caller(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzb dst, int interpolation,
    277                          int borderMode, const float* borderValue, cudaStream_t stream, bool cc20)
    278         {
    279             typedef void (*func_t)(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, bool cc20);
    280 
    281             static const func_t funcs[3][5] =
    282             {
    283                 {
    284                     WarpDispatcher<Transform, PointFilter, BrdConstant, T>::call,
    285                     WarpDispatcher<Transform, PointFilter, BrdReplicate, T>::call,
    286                     WarpDispatcher<Transform, PointFilter, BrdReflect, T>::call,
    287                     WarpDispatcher<Transform, PointFilter, BrdWrap, T>::call,
    288                     WarpDispatcher<Transform, PointFilter, BrdReflect101, T>::call
    289                 },
    290                 {
    291                     WarpDispatcher<Transform, LinearFilter, BrdConstant, T>::call,
    292                     WarpDispatcher<Transform, LinearFilter, BrdReplicate, T>::call,
    293                     WarpDispatcher<Transform, LinearFilter, BrdReflect, T>::call,
    294                     WarpDispatcher<Transform, LinearFilter, BrdWrap, T>::call,
    295                     WarpDispatcher<Transform, LinearFilter, BrdReflect101, T>::call
    296                 },
    297                 {
    298                     WarpDispatcher<Transform, CubicFilter, BrdConstant, T>::call,
    299                     WarpDispatcher<Transform, CubicFilter, BrdReplicate, T>::call,
    300                     WarpDispatcher<Transform, CubicFilter, BrdReflect, T>::call,
    301                     WarpDispatcher<Transform, CubicFilter, BrdWrap, T>::call,
    302                     WarpDispatcher<Transform, CubicFilter, BrdReflect101, T>::call
    303                 }
    304             };
    305 
    306             funcs[interpolation][borderMode](static_cast< PtrStepSz<T> >(src), static_cast< PtrStepSz<T> >(srcWhole), xoff, yoff,
    307                 static_cast< PtrStepSz<T> >(dst), borderValue, stream, cc20);
    308         }
    309 
    310         template <typename T> void warpAffine_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation,
    311                                                   int borderMode, const float* borderValue, cudaStream_t stream, bool cc20)
    312         {
    313             cudaSafeCall( cudaMemcpyToSymbol(c_warpMat, coeffs, 2 * 3 * sizeof(float)) );
    314 
    315             warp_caller<AffineTransform, T>(src, srcWhole, xoff, yoff, dst, interpolation, borderMode, borderValue, stream, cc20);
    316         }
    317 
    318         template void warpAffine_gpu<uchar >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
    319         //template void warpAffine_gpu<uchar2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
    320         template void warpAffine_gpu<uchar3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
    321         template void warpAffine_gpu<uchar4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
    322 
    323         //template void warpAffine_gpu<schar>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
    324         //template void warpAffine_gpu<char2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
    325         //template void warpAffine_gpu<char3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
    326         //template void warpAffine_gpu<char4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
    327 
    328         template void warpAffine_gpu<ushort >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
    329         //template void warpAffine_gpu<ushort2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
    330         template void warpAffine_gpu<ushort3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
    331         template void warpAffine_gpu<ushort4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
    332 
    333         template void warpAffine_gpu<short >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
    334         //template void warpAffine_gpu<short2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
    335         template void warpAffine_gpu<short3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
    336         template void warpAffine_gpu<short4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
    337 
    338         //template void warpAffine_gpu<int >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
    339         //template void warpAffine_gpu<int2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
    340         //template void warpAffine_gpu<int3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
    341         //template void warpAffine_gpu<int4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
    342 
    343         template void warpAffine_gpu<float >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
    344         //template void warpAffine_gpu<float2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
    345         template void warpAffine_gpu<float3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
    346         template void warpAffine_gpu<float4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
    347 
    348         template <typename T> void warpPerspective_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation,
    349                                                   int borderMode, const float* borderValue, cudaStream_t stream, bool cc20)
    350         {
    351             cudaSafeCall( cudaMemcpyToSymbol(c_warpMat, coeffs, 3 * 3 * sizeof(float)) );
    352 
    353             warp_caller<PerspectiveTransform, T>(src, srcWhole, xoff, yoff, dst, interpolation, borderMode, borderValue, stream, cc20);
    354         }
    355 
    356         template void warpPerspective_gpu<uchar >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
    357         //template void warpPerspective_gpu<uchar2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
    358         template void warpPerspective_gpu<uchar3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
    359         template void warpPerspective_gpu<uchar4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
    360 
    361         //template void warpPerspective_gpu<schar>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
    362         //template void warpPerspective_gpu<char2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
    363         //template void warpPerspective_gpu<char3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
    364         //template void warpPerspective_gpu<char4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
    365 
    366         template void warpPerspective_gpu<ushort >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
    367         //template void warpPerspective_gpu<ushort2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
    368         template void warpPerspective_gpu<ushort3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
    369         template void warpPerspective_gpu<ushort4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
    370 
    371         template void warpPerspective_gpu<short >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
    372         //template void warpPerspective_gpu<short2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
    373         template void warpPerspective_gpu<short3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
    374         template void warpPerspective_gpu<short4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
    375 
    376         //template void warpPerspective_gpu<int >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
    377         //template void warpPerspective_gpu<int2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
    378         //template void warpPerspective_gpu<int3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
    379         //template void warpPerspective_gpu<int4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
    380 
    381         template void warpPerspective_gpu<float >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
    382         //template void warpPerspective_gpu<float2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
    383         template void warpPerspective_gpu<float3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
    384         template void warpPerspective_gpu<float4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
    385     } // namespace imgproc
    386 }}} // namespace cv { namespace cuda { namespace cudev
    387 
    388 
    389 #endif /* CUDA_DISABLER */
    390