Home | History | Annotate | Download | only in src
      1 /*M///////////////////////////////////////////////////////////////////////////////////////
      2 //
      3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
      4 //
      5 //  By downloading, copying, installing or using the software you agree to this license.
      6 //  If you do not agree to this license, do not download, install,
      7 //  copy or use the software.
      8 //
      9 //
     10 //                           License Agreement
     11 //                For Open Source Computer Vision Library
     12 //
     13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
     14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
     15 // Third party copyrights are property of their respective owners.
     16 //
     17 // Redistribution and use in source and binary forms, with or without modification,
     18 // are permitted provided that the following conditions are met:
     19 //
     20 //   * Redistribution's of source code must retain the above copyright notice,
     21 //     this list of conditions and the following disclaimer.
     22 //
     23 //   * Redistribution's in binary form must reproduce the above copyright notice,
     24 //     this list of conditions and the following disclaimer in the documentation
     25 //     and/or other materials provided with the distribution.
     26 //
     27 //   * The name of the copyright holders may not be used to endorse or promote products
     28 //     derived from this software without specific prior written permission.
     29 //
     30 // This software is provided by the copyright holders and contributors "as is" and
     31 // any express or implied warranties, including, but not limited to, the implied
     32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
     33 // In no event shall the Intel Corporation or contributors be liable for any direct,
     34 // indirect, incidental, special, exemplary, or consequential damages
     35 // (including, but not limited to, procurement of substitute goods or services;
     36 // loss of use, data, or profits; or business interruption) however caused
     37 // and on any theory of liability, whether in contract, strict liability,
     38 // or tort (including negligence or otherwise) arising in any way out of
     39 // the use of this software, even if advised of the possibility of such damage.
     40 //
     41 //M*/
     42 
     43 #include "precomp.hpp"
     44 
     45 using namespace cv;
     46 using namespace cv::cuda;
     47 
     48 #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
     49 
     50 void cv::cuda::add(InputArray, InputArray, OutputArray, InputArray, int, Stream&) { throw_no_cuda(); }
     51 void cv::cuda::subtract(InputArray, InputArray, OutputArray, InputArray, int, Stream&) { throw_no_cuda(); }
     52 void cv::cuda::multiply(InputArray, InputArray, OutputArray, double, int, Stream&) { throw_no_cuda(); }
     53 void cv::cuda::divide(InputArray, InputArray, OutputArray, double, int, Stream&) { throw_no_cuda(); }
     54 void cv::cuda::absdiff(InputArray, InputArray, OutputArray, Stream&) { throw_no_cuda(); }
     55 
     56 void cv::cuda::abs(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
     57 void cv::cuda::sqr(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
     58 void cv::cuda::sqrt(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
     59 void cv::cuda::exp(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
     60 void cv::cuda::log(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
     61 void cv::cuda::pow(InputArray, double, OutputArray, Stream&) { throw_no_cuda(); }
     62 
     63 void cv::cuda::compare(InputArray, InputArray, OutputArray, int, Stream&) { throw_no_cuda(); }
     64 
     65 void cv::cuda::bitwise_not(InputArray, OutputArray, InputArray, Stream&) { throw_no_cuda(); }
     66 void cv::cuda::bitwise_or(InputArray, InputArray, OutputArray, InputArray, Stream&) { throw_no_cuda(); }
     67 void cv::cuda::bitwise_and(InputArray, InputArray, OutputArray, InputArray, Stream&) { throw_no_cuda(); }
     68 void cv::cuda::bitwise_xor(InputArray, InputArray, OutputArray, InputArray, Stream&) { throw_no_cuda(); }
     69 
     70 void cv::cuda::rshift(InputArray, Scalar_<int>, OutputArray, Stream&) { throw_no_cuda(); }
     71 void cv::cuda::lshift(InputArray, Scalar_<int>, OutputArray, Stream&) { throw_no_cuda(); }
     72 
     73 void cv::cuda::min(InputArray, InputArray, OutputArray, Stream&) { throw_no_cuda(); }
     74 void cv::cuda::max(InputArray, InputArray, OutputArray, Stream&) { throw_no_cuda(); }
     75 
     76 void cv::cuda::addWeighted(InputArray, double, InputArray, double, double, OutputArray, int, Stream&) { throw_no_cuda(); }
     77 
     78 double cv::cuda::threshold(InputArray, OutputArray, double, double, int, Stream&) {throw_no_cuda(); return 0.0;}
     79 
     80 void cv::cuda::magnitude(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
     81 void cv::cuda::magnitude(InputArray, InputArray, OutputArray, Stream&) { throw_no_cuda(); }
     82 void cv::cuda::magnitudeSqr(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
     83 void cv::cuda::magnitudeSqr(InputArray, InputArray, OutputArray, Stream&) { throw_no_cuda(); }
     84 void cv::cuda::phase(InputArray, InputArray, OutputArray, bool, Stream&) { throw_no_cuda(); }
     85 void cv::cuda::cartToPolar(InputArray, InputArray, OutputArray, OutputArray, bool, Stream&) { throw_no_cuda(); }
     86 void cv::cuda::polarToCart(InputArray, InputArray, OutputArray, OutputArray, bool, Stream&) { throw_no_cuda(); }
     87 
     88 #else
     89 
     90 ////////////////////////////////////////////////////////////////////////
     91 // arithm_op
     92 
     93 namespace
     94 {
     95     typedef void (*mat_mat_func_t)(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, double scale, Stream& stream, int op);
     96     typedef void (*mat_scalar_func_t)(const GpuMat& src, Scalar val, bool inv, GpuMat& dst, const GpuMat& mask, double scale, Stream& stream, int op);
     97 
     98     void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst, InputArray _mask, double scale, int dtype, Stream& stream,
     99                    mat_mat_func_t mat_mat_func, mat_scalar_func_t mat_scalar_func, int op = 0)
    100     {
    101         const int kind1 = _src1.kind();
    102         const int kind2 = _src2.kind();
    103 
    104         const bool isScalar1 = (kind1 == _InputArray::MATX);
    105         const bool isScalar2 = (kind2 == _InputArray::MATX);
    106         CV_Assert( !isScalar1 || !isScalar2 );
    107 
    108         GpuMat src1;
    109         if (!isScalar1)
    110             src1 = getInputMat(_src1, stream);
    111 
    112         GpuMat src2;
    113         if (!isScalar2)
    114             src2 = getInputMat(_src2, stream);
    115 
    116         Mat scalar;
    117         if (isScalar1)
    118             scalar = _src1.getMat();
    119         else if (isScalar2)
    120             scalar = _src2.getMat();
    121 
    122         Scalar val;
    123         if (!scalar.empty())
    124         {
    125             CV_Assert( scalar.total() <= 4 );
    126             scalar.convertTo(Mat_<double>(scalar.rows, scalar.cols, &val[0]), CV_64F);
    127         }
    128 
    129         GpuMat mask = getInputMat(_mask, stream);
    130 
    131         const int sdepth = src1.empty() ? src2.depth() : src1.depth();
    132         const int cn = src1.empty() ? src2.channels() : src1.channels();
    133         const Size size = src1.empty() ? src2.size() : src1.size();
    134 
    135         if (dtype < 0)
    136             dtype = sdepth;
    137 
    138         const int ddepth = CV_MAT_DEPTH(dtype);
    139 
    140         CV_Assert( sdepth <= CV_64F && ddepth <= CV_64F );
    141         CV_Assert( !scalar.empty() || (src2.type() == src1.type() && src2.size() == src1.size()) );
    142         CV_Assert( mask.empty() || (cn == 1 && mask.size() == size && mask.type() == CV_8UC1) );
    143 
    144         if (sdepth == CV_64F || ddepth == CV_64F)
    145         {
    146             if (!deviceSupports(NATIVE_DOUBLE))
    147                 CV_Error(Error::StsUnsupportedFormat, "The device doesn't support double");
    148         }
    149 
    150         GpuMat dst = getOutputMat(_dst, size, CV_MAKE_TYPE(ddepth, cn), stream);
    151 
    152         if (isScalar1)
    153             mat_scalar_func(src2, val, true, dst, mask, scale, stream, op);
    154         else if (isScalar2)
    155             mat_scalar_func(src1, val, false, dst, mask, scale, stream, op);
    156         else
    157             mat_mat_func(src1, src2, dst, mask, scale, stream, op);
    158 
    159         syncOutput(dst, _dst, stream);
    160     }
    161 }
    162 
    163 ////////////////////////////////////////////////////////////////////////
    164 // add
    165 
    166 void addMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, double, Stream& _stream, int);
    167 
    168 void addScalar(const GpuMat& src, Scalar val, bool, GpuMat& dst, const GpuMat& mask, double, Stream& stream, int);
    169 
    170 void cv::cuda::add(InputArray src1, InputArray src2, OutputArray dst, InputArray mask, int dtype, Stream& stream)
    171 {
    172     arithm_op(src1, src2, dst, mask, 1.0, dtype, stream, addMat, addScalar);
    173 }
    174 
    175 ////////////////////////////////////////////////////////////////////////
    176 // subtract
    177 
    178 void subMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, double, Stream& _stream, int);
    179 
    180 void subScalar(const GpuMat& src, Scalar val, bool inv, GpuMat& dst, const GpuMat& mask, double, Stream& stream, int);
    181 
    182 void cv::cuda::subtract(InputArray src1, InputArray src2, OutputArray dst, InputArray mask, int dtype, Stream& stream)
    183 {
    184     arithm_op(src1, src2, dst, mask, 1.0, dtype, stream, subMat, subScalar);
    185 }
    186 
    187 ////////////////////////////////////////////////////////////////////////
    188 // multiply
    189 
    190 void mulMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat&, double scale, Stream& stream, int);
    191 void mulMat_8uc4_32f(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream);
    192 void mulMat_16sc4_32f(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream);
    193 
    194 void mulScalar(const GpuMat& src, cv::Scalar val, bool, GpuMat& dst, const GpuMat& mask, double scale, Stream& stream, int);
    195 
    196 void cv::cuda::multiply(InputArray _src1, InputArray _src2, OutputArray _dst, double scale, int dtype, Stream& stream)
    197 {
    198     if (_src1.type() == CV_8UC4 && _src2.type() == CV_32FC1)
    199     {
    200         GpuMat src1 = getInputMat(_src1, stream);
    201         GpuMat src2 = getInputMat(_src2, stream);
    202 
    203         CV_Assert( src1.size() == src2.size() );
    204 
    205         GpuMat dst = getOutputMat(_dst, src1.size(), src1.type(), stream);
    206 
    207         mulMat_8uc4_32f(src1, src2, dst, stream);
    208 
    209         syncOutput(dst, _dst, stream);
    210     }
    211     else if (_src1.type() == CV_16SC4 && _src2.type() == CV_32FC1)
    212     {
    213         GpuMat src1 = getInputMat(_src1, stream);
    214         GpuMat src2 = getInputMat(_src2, stream);
    215 
    216         CV_Assert( src1.size() == src2.size() );
    217 
    218         GpuMat dst = getOutputMat(_dst, src1.size(), src1.type(), stream);
    219 
    220         mulMat_16sc4_32f(src1, src2, dst, stream);
    221 
    222         syncOutput(dst, _dst, stream);
    223     }
    224     else
    225     {
    226         arithm_op(_src1, _src2, _dst, GpuMat(), scale, dtype, stream, mulMat, mulScalar);
    227     }
    228 }
    229 
    230 ////////////////////////////////////////////////////////////////////////
    231 // divide
    232 
    233 void divMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat&, double scale, Stream& stream, int);
    234 void divMat_8uc4_32f(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream);
    235 void divMat_16sc4_32f(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream);
    236 
    237 void divScalar(const GpuMat& src, cv::Scalar val, bool inv, GpuMat& dst, const GpuMat& mask, double scale, Stream& stream, int);
    238 
    239 void cv::cuda::divide(InputArray _src1, InputArray _src2, OutputArray _dst, double scale, int dtype, Stream& stream)
    240 {
    241     if (_src1.type() == CV_8UC4 && _src2.type() == CV_32FC1)
    242     {
    243         GpuMat src1 = getInputMat(_src1, stream);
    244         GpuMat src2 = getInputMat(_src2, stream);
    245 
    246         CV_Assert( src1.size() == src2.size() );
    247 
    248         GpuMat dst = getOutputMat(_dst, src1.size(), src1.type(), stream);
    249 
    250         divMat_8uc4_32f(src1, src2, dst, stream);
    251 
    252         syncOutput(dst, _dst, stream);
    253     }
    254     else if (_src1.type() == CV_16SC4 && _src2.type() == CV_32FC1)
    255     {
    256         GpuMat src1 = getInputMat(_src1, stream);
    257         GpuMat src2 = getInputMat(_src2, stream);
    258 
    259         CV_Assert( src1.size() == src2.size() );
    260 
    261         GpuMat dst = getOutputMat(_dst, src1.size(), src1.type(), stream);
    262 
    263         divMat_16sc4_32f(src1, src2, dst, stream);
    264 
    265         syncOutput(dst, _dst, stream);
    266     }
    267     else
    268     {
    269         arithm_op(_src1, _src2, _dst, GpuMat(), scale, dtype, stream, divMat, divScalar);
    270     }
    271 }
    272 
    273 //////////////////////////////////////////////////////////////////////////////
    274 // absdiff
    275 
    276 void absDiffMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat&, double, Stream& stream, int);
    277 
    278 void absDiffScalar(const GpuMat& src, cv::Scalar val, bool, GpuMat& dst, const GpuMat&, double, Stream& stream, int);
    279 
    280 void cv::cuda::absdiff(InputArray src1, InputArray src2, OutputArray dst, Stream& stream)
    281 {
    282     arithm_op(src1, src2, dst, noArray(), 1.0, -1, stream, absDiffMat, absDiffScalar);
    283 }
    284 
    285 //////////////////////////////////////////////////////////////////////////////
    286 // compare
    287 
    288 void cmpMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat&, double, Stream& stream, int cmpop);
    289 
    290 void cmpScalar(const GpuMat& src, Scalar val, bool inv, GpuMat& dst, const GpuMat&, double, Stream& stream, int cmpop);
    291 
    292 void cv::cuda::compare(InputArray src1, InputArray src2, OutputArray dst, int cmpop, Stream& stream)
    293 {
    294     arithm_op(src1, src2, dst, noArray(), 1.0, CV_8U, stream, cmpMat, cmpScalar, cmpop);
    295 }
    296 
    297 //////////////////////////////////////////////////////////////////////////////
    298 // Binary bitwise logical operations
    299 
    300 namespace
    301 {
    302     enum
    303     {
    304         BIT_OP_AND,
    305         BIT_OP_OR,
    306         BIT_OP_XOR
    307     };
    308 }
    309 
    310 void bitMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, double, Stream& stream, int op);
    311 
    312 void bitScalar(const GpuMat& src, cv::Scalar value, bool, GpuMat& dst, const GpuMat& mask, double, Stream& stream, int op);
    313 
    314 void cv::cuda::bitwise_or(InputArray src1, InputArray src2, OutputArray dst, InputArray mask, Stream& stream)
    315 {
    316     arithm_op(src1, src2, dst, mask, 1.0, -1, stream, bitMat, bitScalar, BIT_OP_OR);
    317 }
    318 
    319 void cv::cuda::bitwise_and(InputArray src1, InputArray src2, OutputArray dst, InputArray mask, Stream& stream)
    320 {
    321     arithm_op(src1, src2, dst, mask, 1.0, -1, stream, bitMat, bitScalar, BIT_OP_AND);
    322 }
    323 
    324 void cv::cuda::bitwise_xor(InputArray src1, InputArray src2, OutputArray dst, InputArray mask, Stream& stream)
    325 {
    326     arithm_op(src1, src2, dst, mask, 1.0, -1, stream, bitMat, bitScalar, BIT_OP_XOR);
    327 }
    328 
    329 //////////////////////////////////////////////////////////////////////////////
    330 // shift
    331 
    332 namespace
    333 {
    334     template <int DEPTH, int cn> struct NppShiftFunc
    335     {
    336         typedef typename NPPTypeTraits<DEPTH>::npp_type npp_type;
    337 
    338         typedef NppStatus (*func_t)(const npp_type* pSrc1, int nSrc1Step, const Npp32u* pConstants, npp_type* pDst,  int nDstStep,  NppiSize oSizeROI);
    339     };
    340     template <int DEPTH> struct NppShiftFunc<DEPTH, 1>
    341     {
    342         typedef typename NPPTypeTraits<DEPTH>::npp_type npp_type;
    343 
    344         typedef NppStatus (*func_t)(const npp_type* pSrc1, int nSrc1Step, const Npp32u pConstants, npp_type* pDst,  int nDstStep,  NppiSize oSizeROI);
    345     };
    346 
    347     template <int DEPTH, int cn, typename NppShiftFunc<DEPTH, cn>::func_t func> struct NppShift
    348     {
    349         typedef typename NPPTypeTraits<DEPTH>::npp_type npp_type;
    350 
    351         static void call(const GpuMat& src, Scalar_<Npp32u> sc, GpuMat& dst, cudaStream_t stream)
    352         {
    353             NppStreamHandler h(stream);
    354 
    355             NppiSize oSizeROI;
    356             oSizeROI.width = src.cols;
    357             oSizeROI.height = src.rows;
    358 
    359             nppSafeCall( func(src.ptr<npp_type>(), static_cast<int>(src.step), sc.val, dst.ptr<npp_type>(), static_cast<int>(dst.step), oSizeROI) );
    360 
    361             if (stream == 0)
    362                 cudaSafeCall( cudaDeviceSynchronize() );
    363         }
    364     };
    365     template <int DEPTH, typename NppShiftFunc<DEPTH, 1>::func_t func> struct NppShift<DEPTH, 1, func>
    366     {
    367         typedef typename NPPTypeTraits<DEPTH>::npp_type npp_type;
    368 
    369         static void call(const GpuMat& src, Scalar_<Npp32u> sc, GpuMat& dst, cudaStream_t stream)
    370         {
    371             NppStreamHandler h(stream);
    372 
    373             NppiSize oSizeROI;
    374             oSizeROI.width = src.cols;
    375             oSizeROI.height = src.rows;
    376 
    377             nppSafeCall( func(src.ptr<npp_type>(), static_cast<int>(src.step), sc.val[0], dst.ptr<npp_type>(), static_cast<int>(dst.step), oSizeROI) );
    378 
    379             if (stream == 0)
    380                 cudaSafeCall( cudaDeviceSynchronize() );
    381         }
    382     };
    383 }
    384 
    385 void cv::cuda::rshift(InputArray _src, Scalar_<int> val, OutputArray _dst, Stream& stream)
    386 {
    387     typedef void (*func_t)(const GpuMat& src, Scalar_<Npp32u> sc, GpuMat& dst, cudaStream_t stream);
    388     static const func_t funcs[5][4] =
    389     {
    390         {NppShift<CV_8U , 1, nppiRShiftC_8u_C1R >::call, 0, NppShift<CV_8U , 3, nppiRShiftC_8u_C3R >::call, NppShift<CV_8U , 4, nppiRShiftC_8u_C4R>::call },
    391         {NppShift<CV_8S , 1, nppiRShiftC_8s_C1R >::call, 0, NppShift<CV_8S , 3, nppiRShiftC_8s_C3R >::call, NppShift<CV_8S , 4, nppiRShiftC_8s_C4R>::call },
    392         {NppShift<CV_16U, 1, nppiRShiftC_16u_C1R>::call, 0, NppShift<CV_16U, 3, nppiRShiftC_16u_C3R>::call, NppShift<CV_16U, 4, nppiRShiftC_16u_C4R>::call},
    393         {NppShift<CV_16S, 1, nppiRShiftC_16s_C1R>::call, 0, NppShift<CV_16S, 3, nppiRShiftC_16s_C3R>::call, NppShift<CV_16S, 4, nppiRShiftC_16s_C4R>::call},
    394         {NppShift<CV_32S, 1, nppiRShiftC_32s_C1R>::call, 0, NppShift<CV_32S, 3, nppiRShiftC_32s_C3R>::call, NppShift<CV_32S, 4, nppiRShiftC_32s_C4R>::call},
    395     };
    396 
    397     GpuMat src = getInputMat(_src, stream);
    398 
    399     CV_Assert( src.depth() < CV_32F );
    400     CV_Assert( src.channels() == 1 || src.channels() == 3 || src.channels() == 4 );
    401 
    402     GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream);
    403 
    404     funcs[src.depth()][src.channels() - 1](src, val, dst, StreamAccessor::getStream(stream));
    405 
    406     syncOutput(dst, _dst, stream);
    407 }
    408 
    409 void cv::cuda::lshift(InputArray _src, Scalar_<int> val, OutputArray _dst, Stream& stream)
    410 {
    411     typedef void (*func_t)(const GpuMat& src, Scalar_<Npp32u> sc, GpuMat& dst, cudaStream_t stream);
    412     static const func_t funcs[5][4] =
    413     {
    414         {NppShift<CV_8U , 1, nppiLShiftC_8u_C1R>::call , 0, NppShift<CV_8U , 3, nppiLShiftC_8u_C3R>::call , NppShift<CV_8U , 4, nppiLShiftC_8u_C4R>::call },
    415         {0                                             , 0, 0                                             , 0                                             },
    416         {NppShift<CV_16U, 1, nppiLShiftC_16u_C1R>::call, 0, NppShift<CV_16U, 3, nppiLShiftC_16u_C3R>::call, NppShift<CV_16U, 4, nppiLShiftC_16u_C4R>::call},
    417         {0                                             , 0, 0                                             , 0                                             },
    418         {NppShift<CV_32S, 1, nppiLShiftC_32s_C1R>::call, 0, NppShift<CV_32S, 3, nppiLShiftC_32s_C3R>::call, NppShift<CV_32S, 4, nppiLShiftC_32s_C4R>::call},
    419     };
    420 
    421     GpuMat src = getInputMat(_src, stream);
    422 
    423     CV_Assert( src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32S );
    424     CV_Assert( src.channels() == 1 || src.channels() == 3 || src.channels() == 4 );
    425 
    426     GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream);
    427 
    428     funcs[src.depth()][src.channels() - 1](src, val, dst, StreamAccessor::getStream(stream));
    429 
    430     syncOutput(dst, _dst, stream);
    431 }
    432 
    433 //////////////////////////////////////////////////////////////////////////////
    434 // Minimum and maximum operations
    435 
    436 namespace
    437 {
    438     enum
    439     {
    440         MIN_OP,
    441         MAX_OP
    442     };
    443 }
    444 
    445 void minMaxMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat&, double, Stream& stream, int op);
    446 
    447 void minMaxScalar(const GpuMat& src, cv::Scalar value, bool, GpuMat& dst, const GpuMat&, double, Stream& stream, int op);
    448 
    449 void cv::cuda::min(InputArray src1, InputArray src2, OutputArray dst, Stream& stream)
    450 {
    451     arithm_op(src1, src2, dst, noArray(), 1.0, -1, stream, minMaxMat, minMaxScalar, MIN_OP);
    452 }
    453 
    454 void cv::cuda::max(InputArray src1, InputArray src2, OutputArray dst, Stream& stream)
    455 {
    456     arithm_op(src1, src2, dst, noArray(), 1.0, -1, stream, minMaxMat, minMaxScalar, MAX_OP);
    457 }
    458 
    459 ////////////////////////////////////////////////////////////////////////
    460 // NPP magnitide
    461 
    462 namespace
    463 {
    464     typedef NppStatus (*nppMagnitude_t)(const Npp32fc* pSrc, int nSrcStep, Npp32f* pDst, int nDstStep, NppiSize oSizeROI);
    465 
    466     void npp_magnitude(const GpuMat& src, GpuMat& dst, nppMagnitude_t func, cudaStream_t stream)
    467     {
    468         CV_Assert(src.type() == CV_32FC2);
    469 
    470         NppiSize sz;
    471         sz.width = src.cols;
    472         sz.height = src.rows;
    473 
    474         NppStreamHandler h(stream);
    475 
    476         nppSafeCall( func(src.ptr<Npp32fc>(), static_cast<int>(src.step), dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz) );
    477 
    478         if (stream == 0)
    479             cudaSafeCall( cudaDeviceSynchronize() );
    480     }
    481 }
    482 
    483 void cv::cuda::magnitude(InputArray _src, OutputArray _dst, Stream& stream)
    484 {
    485     GpuMat src = getInputMat(_src, stream);
    486 
    487     GpuMat dst = getOutputMat(_dst, src.size(), CV_32FC1, stream);
    488 
    489     npp_magnitude(src, dst, nppiMagnitude_32fc32f_C1R, StreamAccessor::getStream(stream));
    490 
    491     syncOutput(dst, _dst, stream);
    492 }
    493 
    494 void cv::cuda::magnitudeSqr(InputArray _src, OutputArray _dst, Stream& stream)
    495 {
    496     GpuMat src = getInputMat(_src, stream);
    497 
    498     GpuMat dst = getOutputMat(_dst, src.size(), CV_32FC1, stream);
    499 
    500     npp_magnitude(src, dst, nppiMagnitudeSqr_32fc32f_C1R, StreamAccessor::getStream(stream));
    501 
    502     syncOutput(dst, _dst, stream);
    503 }
    504 
    505 #endif
    506