Home | History | Annotate | Download | only in src
      1 /*M///////////////////////////////////////////////////////////////////////////////////////
      2 //
      3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
      4 //
      5 //  By downloading, copying, installing or using the software you agree to this license.
      6 //  If you do not agree to this license, do not download, install,
      7 //  copy or use the software.
      8 //
      9 //
     10 //                           License Agreement
     11 //                For Open Source Computer Vision Library
     12 //
     13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
     14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
     15 // Third party copyrights are property of their respective owners.
     16 //
     17 // Redistribution and use in source and binary forms, with or without modification,
     18 // are permitted provided that the following conditions are met:
     19 //
     20 //   * Redistribution's of source code must retain the above copyright notice,
     21 //     this list of conditions and the following disclaimer.
     22 //
     23 //   * Redistribution's in binary form must reproduce the above copyright notice,
     24 //     this list of conditions and the following disclaimer in the documentation
     25 //     and/or other materials provided with the distribution.
     26 //
     27 //   * The name of the copyright holders may not be used to endorse or promote products
     28 //     derived from this software without specific prior written permission.
     29 //
     30 // This software is provided by the copyright holders and contributors "as is" and
     31 // any express or implied warranties, including, but not limited to, the implied
     32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
     33 // In no event shall the Intel Corporation or contributors be liable for any direct,
     34 // indirect, incidental, special, exemplary, or consequential damages
     35 // (including, but not limited to, procurement of substitute goods or services;
     36 // loss of use, data, or profits; or business interruption) however caused
     37 // and on any theory of liability, whether in contract, strict liability,
     38 // or tort (including negligence or otherwise) arising in any way out of
     39 // the use of this software, even if advised of the possibility of such damage.
     40 //
     41 //M*/
     42 
     43 #include "precomp.hpp"
     44 
     45 using namespace cv;
     46 using namespace cv::cuda;
     47 
     48 #if !defined HAVE_CUDA || defined(CUDA_DISABLER)
     49 
     50 void cv::cuda::warpAffine(InputArray, OutputArray, InputArray, Size, int, int, Scalar, Stream&) { throw_no_cuda(); }
     51 void cv::cuda::buildWarpAffineMaps(InputArray, bool, Size, OutputArray, OutputArray, Stream&) { throw_no_cuda(); }
     52 
     53 void cv::cuda::warpPerspective(InputArray, OutputArray, InputArray, Size, int, int, Scalar, Stream&) { throw_no_cuda(); }
     54 void cv::cuda::buildWarpPerspectiveMaps(InputArray, bool, Size, OutputArray, OutputArray, Stream&) { throw_no_cuda(); }
     55 
     56 void cv::cuda::rotate(InputArray, OutputArray, Size, double, double, double, int, Stream&) { throw_no_cuda(); }
     57 
     58 #else // HAVE_CUDA
     59 
     60 namespace cv { namespace cuda { namespace device
     61 {
     62     namespace imgproc
     63     {
     64         void buildWarpAffineMaps_gpu(float coeffs[2 * 3], PtrStepSzf xmap, PtrStepSzf ymap, cudaStream_t stream);
     65 
     66         template <typename T>
     67         void warpAffine_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation,
     68                             int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
     69 
     70         void buildWarpPerspectiveMaps_gpu(float coeffs[3 * 3], PtrStepSzf xmap, PtrStepSzf ymap, cudaStream_t stream);
     71 
     72         template <typename T>
     73         void warpPerspective_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation,
     74                             int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
     75     }
     76 }}}
     77 
     78 void cv::cuda::buildWarpAffineMaps(InputArray _M, bool inverse, Size dsize, OutputArray _xmap, OutputArray _ymap, Stream& stream)
     79 {
     80     using namespace cv::cuda::device::imgproc;
     81 
     82     Mat M = _M.getMat();
     83 
     84     CV_Assert( M.rows == 2 && M.cols == 3 );
     85 
     86     _xmap.create(dsize, CV_32FC1);
     87     _ymap.create(dsize, CV_32FC1);
     88 
     89     GpuMat xmap = _xmap.getGpuMat();
     90     GpuMat ymap = _ymap.getGpuMat();
     91 
     92     float coeffs[2 * 3];
     93     Mat coeffsMat(2, 3, CV_32F, (void*)coeffs);
     94 
     95     if (inverse)
     96         M.convertTo(coeffsMat, coeffsMat.type());
     97     else
     98     {
     99         cv::Mat iM;
    100         invertAffineTransform(M, iM);
    101         iM.convertTo(coeffsMat, coeffsMat.type());
    102     }
    103 
    104     buildWarpAffineMaps_gpu(coeffs, xmap, ymap, StreamAccessor::getStream(stream));
    105 }
    106 
    107 void cv::cuda::buildWarpPerspectiveMaps(InputArray _M, bool inverse, Size dsize, OutputArray _xmap, OutputArray _ymap, Stream& stream)
    108 {
    109     using namespace cv::cuda::device::imgproc;
    110 
    111     Mat M = _M.getMat();
    112 
    113     CV_Assert( M.rows == 3 && M.cols == 3 );
    114 
    115     _xmap.create(dsize, CV_32FC1);
    116     _ymap.create(dsize, CV_32FC1);
    117 
    118     GpuMat xmap = _xmap.getGpuMat();
    119     GpuMat ymap = _ymap.getGpuMat();
    120 
    121     float coeffs[3 * 3];
    122     Mat coeffsMat(3, 3, CV_32F, (void*)coeffs);
    123 
    124     if (inverse)
    125         M.convertTo(coeffsMat, coeffsMat.type());
    126     else
    127     {
    128         cv::Mat iM;
    129         invert(M, iM);
    130         iM.convertTo(coeffsMat, coeffsMat.type());
    131     }
    132 
    133     buildWarpPerspectiveMaps_gpu(coeffs, xmap, ymap, StreamAccessor::getStream(stream));
    134 }
    135 
    136 namespace
    137 {
    138     template <int DEPTH> struct NppWarpFunc
    139     {
    140         typedef typename NPPTypeTraits<DEPTH>::npp_type npp_type;
    141 
    142         typedef NppStatus (*func_t)(const npp_type* pSrc, NppiSize srcSize, int srcStep, NppiRect srcRoi, npp_type* pDst,
    143                                     int dstStep, NppiRect dstRoi, const double coeffs[][3],
    144                                     int interpolation);
    145     };
    146 
    147     template <int DEPTH, typename NppWarpFunc<DEPTH>::func_t func> struct NppWarp
    148     {
    149         typedef typename NppWarpFunc<DEPTH>::npp_type npp_type;
    150 
    151         static void call(const cv::cuda::GpuMat& src, cv::cuda::GpuMat& dst, double coeffs[][3], int interpolation, cudaStream_t stream)
    152         {
    153             static const int npp_inter[] = {NPPI_INTER_NN, NPPI_INTER_LINEAR, NPPI_INTER_CUBIC};
    154 
    155             NppiSize srcsz;
    156             srcsz.height = src.rows;
    157             srcsz.width = src.cols;
    158 
    159             NppiRect srcroi;
    160             srcroi.x = 0;
    161             srcroi.y = 0;
    162             srcroi.height = src.rows;
    163             srcroi.width = src.cols;
    164 
    165             NppiRect dstroi;
    166             dstroi.x = 0;
    167             dstroi.y = 0;
    168             dstroi.height = dst.rows;
    169             dstroi.width = dst.cols;
    170 
    171             cv::cuda::NppStreamHandler h(stream);
    172 
    173             nppSafeCall( func(src.ptr<npp_type>(), srcsz, static_cast<int>(src.step), srcroi,
    174                               dst.ptr<npp_type>(), static_cast<int>(dst.step), dstroi,
    175                               coeffs, npp_inter[interpolation]) );
    176 
    177             if (stream == 0)
    178                 cudaSafeCall( cudaDeviceSynchronize() );
    179         }
    180     };
    181 }
    182 
    183 void cv::cuda::warpAffine(InputArray _src, OutputArray _dst, InputArray _M, Size dsize, int flags, int borderMode, Scalar borderValue, Stream& stream)
    184 {
    185     GpuMat src = _src.getGpuMat();
    186     Mat M = _M.getMat();
    187 
    188     CV_Assert( M.rows == 2 && M.cols == 3 );
    189 
    190     const int interpolation = flags & INTER_MAX;
    191 
    192     CV_Assert( src.depth() <= CV_32F && src.channels() <= 4 );
    193     CV_Assert( interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC );
    194     CV_Assert( borderMode == BORDER_REFLECT101 || borderMode == BORDER_REPLICATE || borderMode == BORDER_CONSTANT || borderMode == BORDER_REFLECT || borderMode == BORDER_WRAP );
    195 
    196     _dst.create(dsize, src.type());
    197     GpuMat dst = _dst.getGpuMat();
    198 
    199     Size wholeSize;
    200     Point ofs;
    201     src.locateROI(wholeSize, ofs);
    202 
    203     static const bool useNppTab[6][4][3] =
    204     {
    205         {
    206             {false, false, true},
    207             {false, false, false},
    208             {false, true, true},
    209             {false, false, false}
    210         },
    211         {
    212             {false, false, false},
    213             {false, false, false},
    214             {false, false, false},
    215             {false, false, false}
    216         },
    217         {
    218             {false, true, true},
    219             {false, false, false},
    220             {false, true, true},
    221             {false, false, false}
    222         },
    223         {
    224             {false, false, false},
    225             {false, false, false},
    226             {false, false, false},
    227             {false, false, false}
    228         },
    229         {
    230             {false, true, true},
    231             {false, false, false},
    232             {false, true, true},
    233             {false, false, true}
    234         },
    235         {
    236             {false, true, true},
    237             {false, false, false},
    238             {false, true, true},
    239             {false, false, true}
    240         }
    241     };
    242 
    243     bool useNpp = borderMode == BORDER_CONSTANT && ofs.x == 0 && ofs.y == 0 && useNppTab[src.depth()][src.channels() - 1][interpolation];
    244     // NPP bug on float data
    245     useNpp = useNpp && src.depth() != CV_32F;
    246 
    247     if (useNpp)
    248     {
    249         typedef void (*func_t)(const cv::cuda::GpuMat& src, cv::cuda::GpuMat& dst, double coeffs[][3], int flags, cudaStream_t stream);
    250 
    251         static const func_t funcs[2][6][4] =
    252         {
    253             {
    254                 {NppWarp<CV_8U, nppiWarpAffine_8u_C1R>::call, 0, NppWarp<CV_8U, nppiWarpAffine_8u_C3R>::call, NppWarp<CV_8U, nppiWarpAffine_8u_C4R>::call},
    255                 {0, 0, 0, 0},
    256                 {NppWarp<CV_16U, nppiWarpAffine_16u_C1R>::call, 0, NppWarp<CV_16U, nppiWarpAffine_16u_C3R>::call, NppWarp<CV_16U, nppiWarpAffine_16u_C4R>::call},
    257                 {0, 0, 0, 0},
    258                 {NppWarp<CV_32S, nppiWarpAffine_32s_C1R>::call, 0, NppWarp<CV_32S, nppiWarpAffine_32s_C3R>::call, NppWarp<CV_32S, nppiWarpAffine_32s_C4R>::call},
    259                 {NppWarp<CV_32F, nppiWarpAffine_32f_C1R>::call, 0, NppWarp<CV_32F, nppiWarpAffine_32f_C3R>::call, NppWarp<CV_32F, nppiWarpAffine_32f_C4R>::call}
    260             },
    261             {
    262                 {NppWarp<CV_8U, nppiWarpAffineBack_8u_C1R>::call, 0, NppWarp<CV_8U, nppiWarpAffineBack_8u_C3R>::call, NppWarp<CV_8U, nppiWarpAffineBack_8u_C4R>::call},
    263                 {0, 0, 0, 0},
    264                 {NppWarp<CV_16U, nppiWarpAffineBack_16u_C1R>::call, 0, NppWarp<CV_16U, nppiWarpAffineBack_16u_C3R>::call, NppWarp<CV_16U, nppiWarpAffineBack_16u_C4R>::call},
    265                 {0, 0, 0, 0},
    266                 {NppWarp<CV_32S, nppiWarpAffineBack_32s_C1R>::call, 0, NppWarp<CV_32S, nppiWarpAffineBack_32s_C3R>::call, NppWarp<CV_32S, nppiWarpAffineBack_32s_C4R>::call},
    267                 {NppWarp<CV_32F, nppiWarpAffineBack_32f_C1R>::call, 0, NppWarp<CV_32F, nppiWarpAffineBack_32f_C3R>::call, NppWarp<CV_32F, nppiWarpAffineBack_32f_C4R>::call}
    268             }
    269         };
    270 
    271         dst.setTo(borderValue, stream);
    272 
    273         double coeffs[2][3];
    274         Mat coeffsMat(2, 3, CV_64F, (void*)coeffs);
    275         M.convertTo(coeffsMat, coeffsMat.type());
    276 
    277         const func_t func = funcs[(flags & WARP_INVERSE_MAP) != 0][src.depth()][src.channels() - 1];
    278         CV_Assert(func != 0);
    279 
    280         func(src, dst, coeffs, interpolation, StreamAccessor::getStream(stream));
    281     }
    282     else
    283     {
    284         using namespace cv::cuda::device::imgproc;
    285 
    286         typedef void (*func_t)(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation,
    287             int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
    288 
    289         static const func_t funcs[6][4] =
    290         {
    291             {warpAffine_gpu<uchar>      , 0 /*warpAffine_gpu<uchar2>*/ , warpAffine_gpu<uchar3>     , warpAffine_gpu<uchar4>     },
    292             {0 /*warpAffine_gpu<schar>*/, 0 /*warpAffine_gpu<char2>*/  , 0 /*warpAffine_gpu<char3>*/, 0 /*warpAffine_gpu<char4>*/},
    293             {warpAffine_gpu<ushort>     , 0 /*warpAffine_gpu<ushort2>*/, warpAffine_gpu<ushort3>    , warpAffine_gpu<ushort4>    },
    294             {warpAffine_gpu<short>      , 0 /*warpAffine_gpu<short2>*/ , warpAffine_gpu<short3>     , warpAffine_gpu<short4>     },
    295             {0 /*warpAffine_gpu<int>*/  , 0 /*warpAffine_gpu<int2>*/   , 0 /*warpAffine_gpu<int3>*/ , 0 /*warpAffine_gpu<int4>*/ },
    296             {warpAffine_gpu<float>      , 0 /*warpAffine_gpu<float2>*/ , warpAffine_gpu<float3>     , warpAffine_gpu<float4>     }
    297         };
    298 
    299         const func_t func = funcs[src.depth()][src.channels() - 1];
    300         CV_Assert(func != 0);
    301 
    302         float coeffs[2 * 3];
    303         Mat coeffsMat(2, 3, CV_32F, (void*)coeffs);
    304 
    305         if (flags & WARP_INVERSE_MAP)
    306             M.convertTo(coeffsMat, coeffsMat.type());
    307         else
    308         {
    309             cv::Mat iM;
    310             invertAffineTransform(M, iM);
    311             iM.convertTo(coeffsMat, coeffsMat.type());
    312         }
    313 
    314         Scalar_<float> borderValueFloat;
    315         borderValueFloat = borderValue;
    316 
    317         func(src, PtrStepSzb(wholeSize.height, wholeSize.width, src.datastart, src.step), ofs.x, ofs.y, coeffs,
    318             dst, interpolation, borderMode, borderValueFloat.val, StreamAccessor::getStream(stream), deviceSupports(FEATURE_SET_COMPUTE_20));
    319     }
    320 }
    321 
    322 void cv::cuda::warpPerspective(InputArray _src, OutputArray _dst, InputArray _M, Size dsize, int flags, int borderMode, Scalar borderValue, Stream& stream)
    323 {
    324     GpuMat src = _src.getGpuMat();
    325     Mat M = _M.getMat();
    326 
    327     CV_Assert( M.rows == 3 && M.cols == 3 );
    328 
    329     const int interpolation = flags & INTER_MAX;
    330 
    331     CV_Assert( src.depth() <= CV_32F && src.channels() <= 4 );
    332     CV_Assert( interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC );
    333     CV_Assert( borderMode == BORDER_REFLECT101 || borderMode == BORDER_REPLICATE || borderMode == BORDER_CONSTANT || borderMode == BORDER_REFLECT || borderMode == BORDER_WRAP) ;
    334 
    335     _dst.create(dsize, src.type());
    336     GpuMat dst = _dst.getGpuMat();
    337 
    338     Size wholeSize;
    339     Point ofs;
    340     src.locateROI(wholeSize, ofs);
    341 
    342     static const bool useNppTab[6][4][3] =
    343     {
    344         {
    345             {false, false, true},
    346             {false, false, false},
    347             {false, true, true},
    348             {false, false, false}
    349         },
    350         {
    351             {false, false, false},
    352             {false, false, false},
    353             {false, false, false},
    354             {false, false, false}
    355         },
    356         {
    357             {false, true, true},
    358             {false, false, false},
    359             {false, true, true},
    360             {false, false, false}
    361         },
    362         {
    363             {false, false, false},
    364             {false, false, false},
    365             {false, false, false},
    366             {false, false, false}
    367         },
    368         {
    369             {false, true, true},
    370             {false, false, false},
    371             {false, true, true},
    372             {false, false, true}
    373         },
    374         {
    375             {false, true, true},
    376             {false, false, false},
    377             {false, true, true},
    378             {false, false, true}
    379         }
    380     };
    381 
    382     bool useNpp = borderMode == BORDER_CONSTANT && ofs.x == 0 && ofs.y == 0 && useNppTab[src.depth()][src.channels() - 1][interpolation];
    383     // NPP bug on float data
    384     useNpp = useNpp && src.depth() != CV_32F;
    385 
    386     if (useNpp)
    387     {
    388         typedef void (*func_t)(const cv::cuda::GpuMat& src, cv::cuda::GpuMat& dst, double coeffs[][3], int flags, cudaStream_t stream);
    389 
    390         static const func_t funcs[2][6][4] =
    391         {
    392             {
    393                 {NppWarp<CV_8U, nppiWarpPerspective_8u_C1R>::call, 0, NppWarp<CV_8U, nppiWarpPerspective_8u_C3R>::call, NppWarp<CV_8U, nppiWarpPerspective_8u_C4R>::call},
    394                 {0, 0, 0, 0},
    395                 {NppWarp<CV_16U, nppiWarpPerspective_16u_C1R>::call, 0, NppWarp<CV_16U, nppiWarpPerspective_16u_C3R>::call, NppWarp<CV_16U, nppiWarpPerspective_16u_C4R>::call},
    396                 {0, 0, 0, 0},
    397                 {NppWarp<CV_32S, nppiWarpPerspective_32s_C1R>::call, 0, NppWarp<CV_32S, nppiWarpPerspective_32s_C3R>::call, NppWarp<CV_32S, nppiWarpPerspective_32s_C4R>::call},
    398                 {NppWarp<CV_32F, nppiWarpPerspective_32f_C1R>::call, 0, NppWarp<CV_32F, nppiWarpPerspective_32f_C3R>::call, NppWarp<CV_32F, nppiWarpPerspective_32f_C4R>::call}
    399             },
    400             {
    401                 {NppWarp<CV_8U, nppiWarpPerspectiveBack_8u_C1R>::call, 0, NppWarp<CV_8U, nppiWarpPerspectiveBack_8u_C3R>::call, NppWarp<CV_8U, nppiWarpPerspectiveBack_8u_C4R>::call},
    402                 {0, 0, 0, 0},
    403                 {NppWarp<CV_16U, nppiWarpPerspectiveBack_16u_C1R>::call, 0, NppWarp<CV_16U, nppiWarpPerspectiveBack_16u_C3R>::call, NppWarp<CV_16U, nppiWarpPerspectiveBack_16u_C4R>::call},
    404                 {0, 0, 0, 0},
    405                 {NppWarp<CV_32S, nppiWarpPerspectiveBack_32s_C1R>::call, 0, NppWarp<CV_32S, nppiWarpPerspectiveBack_32s_C3R>::call, NppWarp<CV_32S, nppiWarpPerspectiveBack_32s_C4R>::call},
    406                 {NppWarp<CV_32F, nppiWarpPerspectiveBack_32f_C1R>::call, 0, NppWarp<CV_32F, nppiWarpPerspectiveBack_32f_C3R>::call, NppWarp<CV_32F, nppiWarpPerspectiveBack_32f_C4R>::call}
    407             }
    408         };
    409 
    410         dst.setTo(borderValue, stream);
    411 
    412         double coeffs[3][3];
    413         Mat coeffsMat(3, 3, CV_64F, (void*)coeffs);
    414         M.convertTo(coeffsMat, coeffsMat.type());
    415 
    416         const func_t func = funcs[(flags & WARP_INVERSE_MAP) != 0][src.depth()][src.channels() - 1];
    417         CV_Assert(func != 0);
    418 
    419         func(src, dst, coeffs, interpolation, StreamAccessor::getStream(stream));
    420     }
    421     else
    422     {
    423         using namespace cv::cuda::device::imgproc;
    424 
    425         typedef void (*func_t)(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation,
    426             int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
    427 
    428         static const func_t funcs[6][4] =
    429         {
    430             {warpPerspective_gpu<uchar>      , 0 /*warpPerspective_gpu<uchar2>*/ , warpPerspective_gpu<uchar3>     , warpPerspective_gpu<uchar4>     },
    431             {0 /*warpPerspective_gpu<schar>*/, 0 /*warpPerspective_gpu<char2>*/  , 0 /*warpPerspective_gpu<char3>*/, 0 /*warpPerspective_gpu<char4>*/},
    432             {warpPerspective_gpu<ushort>     , 0 /*warpPerspective_gpu<ushort2>*/, warpPerspective_gpu<ushort3>    , warpPerspective_gpu<ushort4>    },
    433             {warpPerspective_gpu<short>      , 0 /*warpPerspective_gpu<short2>*/ , warpPerspective_gpu<short3>     , warpPerspective_gpu<short4>     },
    434             {0 /*warpPerspective_gpu<int>*/  , 0 /*warpPerspective_gpu<int2>*/   , 0 /*warpPerspective_gpu<int3>*/ , 0 /*warpPerspective_gpu<int4>*/ },
    435             {warpPerspective_gpu<float>      , 0 /*warpPerspective_gpu<float2>*/ , warpPerspective_gpu<float3>     , warpPerspective_gpu<float4>     }
    436         };
    437 
    438         const func_t func = funcs[src.depth()][src.channels() - 1];
    439         CV_Assert(func != 0);
    440 
    441         float coeffs[3 * 3];
    442         Mat coeffsMat(3, 3, CV_32F, (void*)coeffs);
    443 
    444         if (flags & WARP_INVERSE_MAP)
    445             M.convertTo(coeffsMat, coeffsMat.type());
    446         else
    447         {
    448             cv::Mat iM;
    449             invert(M, iM);
    450             iM.convertTo(coeffsMat, coeffsMat.type());
    451         }
    452 
    453         Scalar_<float> borderValueFloat;
    454         borderValueFloat = borderValue;
    455 
    456         func(src, PtrStepSzb(wholeSize.height, wholeSize.width, src.datastart, src.step), ofs.x, ofs.y, coeffs,
    457             dst, interpolation, borderMode, borderValueFloat.val, StreamAccessor::getStream(stream), deviceSupports(FEATURE_SET_COMPUTE_20));
    458     }
    459 }
    460 
    461 ////////////////////////////////////////////////////////////////////////
    462 // rotate
    463 
    464 namespace
    465 {
    466     template <int DEPTH> struct NppRotateFunc
    467     {
    468         typedef typename NPPTypeTraits<DEPTH>::npp_type npp_type;
    469 
    470         typedef NppStatus (*func_t)(const npp_type* pSrc, NppiSize oSrcSize, int nSrcStep, NppiRect oSrcROI,
    471                                     npp_type* pDst, int nDstStep, NppiRect oDstROI,
    472                                     double nAngle, double nShiftX, double nShiftY, int eInterpolation);
    473     };
    474 
    475     template <int DEPTH, typename NppRotateFunc<DEPTH>::func_t func> struct NppRotate
    476     {
    477         typedef typename NppRotateFunc<DEPTH>::npp_type npp_type;
    478 
    479         static void call(const GpuMat& src, GpuMat& dst, Size dsize, double angle, double xShift, double yShift, int interpolation, cudaStream_t stream)
    480         {
    481             (void)dsize;
    482             static const int npp_inter[] = {NPPI_INTER_NN, NPPI_INTER_LINEAR, NPPI_INTER_CUBIC};
    483 
    484             NppStreamHandler h(stream);
    485 
    486             NppiSize srcsz;
    487             srcsz.height = src.rows;
    488             srcsz.width = src.cols;
    489             NppiRect srcroi;
    490             srcroi.x = srcroi.y = 0;
    491             srcroi.height = src.rows;
    492             srcroi.width = src.cols;
    493             NppiRect dstroi;
    494             dstroi.x = dstroi.y = 0;
    495             dstroi.height = dst.rows;
    496             dstroi.width = dst.cols;
    497 
    498             nppSafeCall( func(src.ptr<npp_type>(), srcsz, static_cast<int>(src.step), srcroi,
    499                 dst.ptr<npp_type>(), static_cast<int>(dst.step), dstroi, angle, xShift, yShift, npp_inter[interpolation]) );
    500 
    501             if (stream == 0)
    502                 cudaSafeCall( cudaDeviceSynchronize() );
    503         }
    504     };
    505 }
    506 
    507 void cv::cuda::rotate(InputArray _src, OutputArray _dst, Size dsize, double angle, double xShift, double yShift, int interpolation, Stream& stream)
    508 {
    509     typedef void (*func_t)(const GpuMat& src, GpuMat& dst, Size dsize, double angle, double xShift, double yShift, int interpolation, cudaStream_t stream);
    510     static const func_t funcs[6][4] =
    511     {
    512         {NppRotate<CV_8U, nppiRotate_8u_C1R>::call, 0, NppRotate<CV_8U, nppiRotate_8u_C3R>::call, NppRotate<CV_8U, nppiRotate_8u_C4R>::call},
    513         {0,0,0,0},
    514         {NppRotate<CV_16U, nppiRotate_16u_C1R>::call, 0, NppRotate<CV_16U, nppiRotate_16u_C3R>::call, NppRotate<CV_16U, nppiRotate_16u_C4R>::call},
    515         {0,0,0,0},
    516         {0,0,0,0},
    517         {NppRotate<CV_32F, nppiRotate_32f_C1R>::call, 0, NppRotate<CV_32F, nppiRotate_32f_C3R>::call, NppRotate<CV_32F, nppiRotate_32f_C4R>::call}
    518     };
    519 
    520     GpuMat src = _src.getGpuMat();
    521 
    522     CV_Assert( src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F );
    523     CV_Assert( src.channels() == 1 || src.channels() == 3 || src.channels() == 4 );
    524     CV_Assert( interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC );
    525 
    526     _dst.create(dsize, src.type());
    527     GpuMat dst = _dst.getGpuMat();
    528 
    529     dst.setTo(Scalar::all(0), stream);
    530 
    531     funcs[src.depth()][src.channels() - 1](src, dst, dsize, angle, xShift, yShift, interpolation, StreamAccessor::getStream(stream));
    532 }
    533 
    534 #endif // HAVE_CUDA
    535