1 /*M/////////////////////////////////////////////////////////////////////////////////////// 2 // 3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 4 // 5 // By downloading, copying, installing or using the software you agree to this license. 6 // If you do not agree to this license, do not download, install, 7 // copy or use the software. 8 // 9 // 10 // License Agreement 11 // For Open Source Computer Vision Library 12 // 13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. 14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved. 15 // Third party copyrights are property of their respective owners. 16 // 17 // Redistribution and use in source and binary forms, with or without modification, 18 // are permitted provided that the following conditions are met: 19 // 20 // * Redistribution's of source code must retain the above copyright notice, 21 // this list of conditions and the following disclaimer. 22 // 23 // * Redistribution's in binary form must reproduce the above copyright notice, 24 // this list of conditions and the following disclaimer in the documentation 25 // and/or other materials provided with the distribution. 26 // 27 // * The name of the copyright holders may not be used to endorse or promote products 28 // derived from this software without specific prior written permission. 29 // 30 // This software is provided by the copyright holders and contributors "as is" and 31 // any express or implied warranties, including, but not limited to, the implied 32 // warranties of merchantability and fitness for a particular purpose are disclaimed. 33 // In no event shall the Intel Corporation or contributors be liable for any direct, 34 // indirect, incidental, special, exemplary, or consequential damages 35 // (including, but not limited to, procurement of substitute goods or services; 36 // loss of use, data, or profits; or business interruption) however caused 37 // and on any theory of liability, whether in contract, strict liability, 38 // or tort (including negligence or otherwise) arising in any way out of 39 // the use of this software, even if advised of the possibility of such damage. 40 // 41 //M*/ 42 43 #include "precomp.hpp" 44 45 using namespace cv; 46 using namespace cv::cuda; 47 48 #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER) 49 50 void cv::cuda::add(InputArray, InputArray, OutputArray, InputArray, int, Stream&) { throw_no_cuda(); } 51 void cv::cuda::subtract(InputArray, InputArray, OutputArray, InputArray, int, Stream&) { throw_no_cuda(); } 52 void cv::cuda::multiply(InputArray, InputArray, OutputArray, double, int, Stream&) { throw_no_cuda(); } 53 void cv::cuda::divide(InputArray, InputArray, OutputArray, double, int, Stream&) { throw_no_cuda(); } 54 void cv::cuda::absdiff(InputArray, InputArray, OutputArray, Stream&) { throw_no_cuda(); } 55 56 void cv::cuda::abs(InputArray, OutputArray, Stream&) { throw_no_cuda(); } 57 void cv::cuda::sqr(InputArray, OutputArray, Stream&) { throw_no_cuda(); } 58 void cv::cuda::sqrt(InputArray, OutputArray, Stream&) { throw_no_cuda(); } 59 void cv::cuda::exp(InputArray, OutputArray, Stream&) { throw_no_cuda(); } 60 void cv::cuda::log(InputArray, OutputArray, Stream&) { throw_no_cuda(); } 61 void cv::cuda::pow(InputArray, double, OutputArray, Stream&) { throw_no_cuda(); } 62 63 void cv::cuda::compare(InputArray, InputArray, OutputArray, int, Stream&) { throw_no_cuda(); } 64 65 void cv::cuda::bitwise_not(InputArray, OutputArray, InputArray, Stream&) { throw_no_cuda(); } 66 void cv::cuda::bitwise_or(InputArray, InputArray, OutputArray, InputArray, Stream&) { throw_no_cuda(); } 67 void cv::cuda::bitwise_and(InputArray, InputArray, OutputArray, InputArray, Stream&) { throw_no_cuda(); } 68 void cv::cuda::bitwise_xor(InputArray, InputArray, OutputArray, InputArray, Stream&) { throw_no_cuda(); } 69 70 void cv::cuda::rshift(InputArray, Scalar_<int>, OutputArray, Stream&) { throw_no_cuda(); } 71 void cv::cuda::lshift(InputArray, Scalar_<int>, OutputArray, Stream&) { throw_no_cuda(); } 72 73 void cv::cuda::min(InputArray, InputArray, OutputArray, Stream&) { throw_no_cuda(); } 74 void cv::cuda::max(InputArray, InputArray, OutputArray, Stream&) { throw_no_cuda(); } 75 76 void cv::cuda::addWeighted(InputArray, double, InputArray, double, double, OutputArray, int, Stream&) { throw_no_cuda(); } 77 78 double cv::cuda::threshold(InputArray, OutputArray, double, double, int, Stream&) {throw_no_cuda(); return 0.0;} 79 80 void cv::cuda::magnitude(InputArray, OutputArray, Stream&) { throw_no_cuda(); } 81 void cv::cuda::magnitude(InputArray, InputArray, OutputArray, Stream&) { throw_no_cuda(); } 82 void cv::cuda::magnitudeSqr(InputArray, OutputArray, Stream&) { throw_no_cuda(); } 83 void cv::cuda::magnitudeSqr(InputArray, InputArray, OutputArray, Stream&) { throw_no_cuda(); } 84 void cv::cuda::phase(InputArray, InputArray, OutputArray, bool, Stream&) { throw_no_cuda(); } 85 void cv::cuda::cartToPolar(InputArray, InputArray, OutputArray, OutputArray, bool, Stream&) { throw_no_cuda(); } 86 void cv::cuda::polarToCart(InputArray, InputArray, OutputArray, OutputArray, bool, Stream&) { throw_no_cuda(); } 87 88 #else 89 90 //////////////////////////////////////////////////////////////////////// 91 // arithm_op 92 93 namespace 94 { 95 typedef void (*mat_mat_func_t)(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, double scale, Stream& stream, int op); 96 typedef void (*mat_scalar_func_t)(const GpuMat& src, Scalar val, bool inv, GpuMat& dst, const GpuMat& mask, double scale, Stream& stream, int op); 97 98 void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst, InputArray _mask, double scale, int dtype, Stream& stream, 99 mat_mat_func_t mat_mat_func, mat_scalar_func_t mat_scalar_func, int op = 0) 100 { 101 const int kind1 = _src1.kind(); 102 const int kind2 = _src2.kind(); 103 104 const bool isScalar1 = (kind1 == _InputArray::MATX); 105 const bool isScalar2 = (kind2 == _InputArray::MATX); 106 CV_Assert( !isScalar1 || !isScalar2 ); 107 108 GpuMat src1; 109 if (!isScalar1) 110 src1 = getInputMat(_src1, stream); 111 112 GpuMat src2; 113 if (!isScalar2) 114 src2 = getInputMat(_src2, stream); 115 116 Mat scalar; 117 if (isScalar1) 118 scalar = _src1.getMat(); 119 else if (isScalar2) 120 scalar = _src2.getMat(); 121 122 Scalar val; 123 if (!scalar.empty()) 124 { 125 CV_Assert( scalar.total() <= 4 ); 126 scalar.convertTo(Mat_<double>(scalar.rows, scalar.cols, &val[0]), CV_64F); 127 } 128 129 GpuMat mask = getInputMat(_mask, stream); 130 131 const int sdepth = src1.empty() ? src2.depth() : src1.depth(); 132 const int cn = src1.empty() ? src2.channels() : src1.channels(); 133 const Size size = src1.empty() ? src2.size() : src1.size(); 134 135 if (dtype < 0) 136 dtype = sdepth; 137 138 const int ddepth = CV_MAT_DEPTH(dtype); 139 140 CV_Assert( sdepth <= CV_64F && ddepth <= CV_64F ); 141 CV_Assert( !scalar.empty() || (src2.type() == src1.type() && src2.size() == src1.size()) ); 142 CV_Assert( mask.empty() || (cn == 1 && mask.size() == size && mask.type() == CV_8UC1) ); 143 144 if (sdepth == CV_64F || ddepth == CV_64F) 145 { 146 if (!deviceSupports(NATIVE_DOUBLE)) 147 CV_Error(Error::StsUnsupportedFormat, "The device doesn't support double"); 148 } 149 150 GpuMat dst = getOutputMat(_dst, size, CV_MAKE_TYPE(ddepth, cn), stream); 151 152 if (isScalar1) 153 mat_scalar_func(src2, val, true, dst, mask, scale, stream, op); 154 else if (isScalar2) 155 mat_scalar_func(src1, val, false, dst, mask, scale, stream, op); 156 else 157 mat_mat_func(src1, src2, dst, mask, scale, stream, op); 158 159 syncOutput(dst, _dst, stream); 160 } 161 } 162 163 //////////////////////////////////////////////////////////////////////// 164 // add 165 166 void addMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, double, Stream& _stream, int); 167 168 void addScalar(const GpuMat& src, Scalar val, bool, GpuMat& dst, const GpuMat& mask, double, Stream& stream, int); 169 170 void cv::cuda::add(InputArray src1, InputArray src2, OutputArray dst, InputArray mask, int dtype, Stream& stream) 171 { 172 arithm_op(src1, src2, dst, mask, 1.0, dtype, stream, addMat, addScalar); 173 } 174 175 //////////////////////////////////////////////////////////////////////// 176 // subtract 177 178 void subMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, double, Stream& _stream, int); 179 180 void subScalar(const GpuMat& src, Scalar val, bool inv, GpuMat& dst, const GpuMat& mask, double, Stream& stream, int); 181 182 void cv::cuda::subtract(InputArray src1, InputArray src2, OutputArray dst, InputArray mask, int dtype, Stream& stream) 183 { 184 arithm_op(src1, src2, dst, mask, 1.0, dtype, stream, subMat, subScalar); 185 } 186 187 //////////////////////////////////////////////////////////////////////// 188 // multiply 189 190 void mulMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat&, double scale, Stream& stream, int); 191 void mulMat_8uc4_32f(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream); 192 void mulMat_16sc4_32f(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream); 193 194 void mulScalar(const GpuMat& src, cv::Scalar val, bool, GpuMat& dst, const GpuMat& mask, double scale, Stream& stream, int); 195 196 void cv::cuda::multiply(InputArray _src1, InputArray _src2, OutputArray _dst, double scale, int dtype, Stream& stream) 197 { 198 if (_src1.type() == CV_8UC4 && _src2.type() == CV_32FC1) 199 { 200 GpuMat src1 = getInputMat(_src1, stream); 201 GpuMat src2 = getInputMat(_src2, stream); 202 203 CV_Assert( src1.size() == src2.size() ); 204 205 GpuMat dst = getOutputMat(_dst, src1.size(), src1.type(), stream); 206 207 mulMat_8uc4_32f(src1, src2, dst, stream); 208 209 syncOutput(dst, _dst, stream); 210 } 211 else if (_src1.type() == CV_16SC4 && _src2.type() == CV_32FC1) 212 { 213 GpuMat src1 = getInputMat(_src1, stream); 214 GpuMat src2 = getInputMat(_src2, stream); 215 216 CV_Assert( src1.size() == src2.size() ); 217 218 GpuMat dst = getOutputMat(_dst, src1.size(), src1.type(), stream); 219 220 mulMat_16sc4_32f(src1, src2, dst, stream); 221 222 syncOutput(dst, _dst, stream); 223 } 224 else 225 { 226 arithm_op(_src1, _src2, _dst, GpuMat(), scale, dtype, stream, mulMat, mulScalar); 227 } 228 } 229 230 //////////////////////////////////////////////////////////////////////// 231 // divide 232 233 void divMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat&, double scale, Stream& stream, int); 234 void divMat_8uc4_32f(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream); 235 void divMat_16sc4_32f(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream); 236 237 void divScalar(const GpuMat& src, cv::Scalar val, bool inv, GpuMat& dst, const GpuMat& mask, double scale, Stream& stream, int); 238 239 void cv::cuda::divide(InputArray _src1, InputArray _src2, OutputArray _dst, double scale, int dtype, Stream& stream) 240 { 241 if (_src1.type() == CV_8UC4 && _src2.type() == CV_32FC1) 242 { 243 GpuMat src1 = getInputMat(_src1, stream); 244 GpuMat src2 = getInputMat(_src2, stream); 245 246 CV_Assert( src1.size() == src2.size() ); 247 248 GpuMat dst = getOutputMat(_dst, src1.size(), src1.type(), stream); 249 250 divMat_8uc4_32f(src1, src2, dst, stream); 251 252 syncOutput(dst, _dst, stream); 253 } 254 else if (_src1.type() == CV_16SC4 && _src2.type() == CV_32FC1) 255 { 256 GpuMat src1 = getInputMat(_src1, stream); 257 GpuMat src2 = getInputMat(_src2, stream); 258 259 CV_Assert( src1.size() == src2.size() ); 260 261 GpuMat dst = getOutputMat(_dst, src1.size(), src1.type(), stream); 262 263 divMat_16sc4_32f(src1, src2, dst, stream); 264 265 syncOutput(dst, _dst, stream); 266 } 267 else 268 { 269 arithm_op(_src1, _src2, _dst, GpuMat(), scale, dtype, stream, divMat, divScalar); 270 } 271 } 272 273 ////////////////////////////////////////////////////////////////////////////// 274 // absdiff 275 276 void absDiffMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat&, double, Stream& stream, int); 277 278 void absDiffScalar(const GpuMat& src, cv::Scalar val, bool, GpuMat& dst, const GpuMat&, double, Stream& stream, int); 279 280 void cv::cuda::absdiff(InputArray src1, InputArray src2, OutputArray dst, Stream& stream) 281 { 282 arithm_op(src1, src2, dst, noArray(), 1.0, -1, stream, absDiffMat, absDiffScalar); 283 } 284 285 ////////////////////////////////////////////////////////////////////////////// 286 // compare 287 288 void cmpMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat&, double, Stream& stream, int cmpop); 289 290 void cmpScalar(const GpuMat& src, Scalar val, bool inv, GpuMat& dst, const GpuMat&, double, Stream& stream, int cmpop); 291 292 void cv::cuda::compare(InputArray src1, InputArray src2, OutputArray dst, int cmpop, Stream& stream) 293 { 294 arithm_op(src1, src2, dst, noArray(), 1.0, CV_8U, stream, cmpMat, cmpScalar, cmpop); 295 } 296 297 ////////////////////////////////////////////////////////////////////////////// 298 // Binary bitwise logical operations 299 300 namespace 301 { 302 enum 303 { 304 BIT_OP_AND, 305 BIT_OP_OR, 306 BIT_OP_XOR 307 }; 308 } 309 310 void bitMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, double, Stream& stream, int op); 311 312 void bitScalar(const GpuMat& src, cv::Scalar value, bool, GpuMat& dst, const GpuMat& mask, double, Stream& stream, int op); 313 314 void cv::cuda::bitwise_or(InputArray src1, InputArray src2, OutputArray dst, InputArray mask, Stream& stream) 315 { 316 arithm_op(src1, src2, dst, mask, 1.0, -1, stream, bitMat, bitScalar, BIT_OP_OR); 317 } 318 319 void cv::cuda::bitwise_and(InputArray src1, InputArray src2, OutputArray dst, InputArray mask, Stream& stream) 320 { 321 arithm_op(src1, src2, dst, mask, 1.0, -1, stream, bitMat, bitScalar, BIT_OP_AND); 322 } 323 324 void cv::cuda::bitwise_xor(InputArray src1, InputArray src2, OutputArray dst, InputArray mask, Stream& stream) 325 { 326 arithm_op(src1, src2, dst, mask, 1.0, -1, stream, bitMat, bitScalar, BIT_OP_XOR); 327 } 328 329 ////////////////////////////////////////////////////////////////////////////// 330 // shift 331 332 namespace 333 { 334 template <int DEPTH, int cn> struct NppShiftFunc 335 { 336 typedef typename NPPTypeTraits<DEPTH>::npp_type npp_type; 337 338 typedef NppStatus (*func_t)(const npp_type* pSrc1, int nSrc1Step, const Npp32u* pConstants, npp_type* pDst, int nDstStep, NppiSize oSizeROI); 339 }; 340 template <int DEPTH> struct NppShiftFunc<DEPTH, 1> 341 { 342 typedef typename NPPTypeTraits<DEPTH>::npp_type npp_type; 343 344 typedef NppStatus (*func_t)(const npp_type* pSrc1, int nSrc1Step, const Npp32u pConstants, npp_type* pDst, int nDstStep, NppiSize oSizeROI); 345 }; 346 347 template <int DEPTH, int cn, typename NppShiftFunc<DEPTH, cn>::func_t func> struct NppShift 348 { 349 typedef typename NPPTypeTraits<DEPTH>::npp_type npp_type; 350 351 static void call(const GpuMat& src, Scalar_<Npp32u> sc, GpuMat& dst, cudaStream_t stream) 352 { 353 NppStreamHandler h(stream); 354 355 NppiSize oSizeROI; 356 oSizeROI.width = src.cols; 357 oSizeROI.height = src.rows; 358 359 nppSafeCall( func(src.ptr<npp_type>(), static_cast<int>(src.step), sc.val, dst.ptr<npp_type>(), static_cast<int>(dst.step), oSizeROI) ); 360 361 if (stream == 0) 362 cudaSafeCall( cudaDeviceSynchronize() ); 363 } 364 }; 365 template <int DEPTH, typename NppShiftFunc<DEPTH, 1>::func_t func> struct NppShift<DEPTH, 1, func> 366 { 367 typedef typename NPPTypeTraits<DEPTH>::npp_type npp_type; 368 369 static void call(const GpuMat& src, Scalar_<Npp32u> sc, GpuMat& dst, cudaStream_t stream) 370 { 371 NppStreamHandler h(stream); 372 373 NppiSize oSizeROI; 374 oSizeROI.width = src.cols; 375 oSizeROI.height = src.rows; 376 377 nppSafeCall( func(src.ptr<npp_type>(), static_cast<int>(src.step), sc.val[0], dst.ptr<npp_type>(), static_cast<int>(dst.step), oSizeROI) ); 378 379 if (stream == 0) 380 cudaSafeCall( cudaDeviceSynchronize() ); 381 } 382 }; 383 } 384 385 void cv::cuda::rshift(InputArray _src, Scalar_<int> val, OutputArray _dst, Stream& stream) 386 { 387 typedef void (*func_t)(const GpuMat& src, Scalar_<Npp32u> sc, GpuMat& dst, cudaStream_t stream); 388 static const func_t funcs[5][4] = 389 { 390 {NppShift<CV_8U , 1, nppiRShiftC_8u_C1R >::call, 0, NppShift<CV_8U , 3, nppiRShiftC_8u_C3R >::call, NppShift<CV_8U , 4, nppiRShiftC_8u_C4R>::call }, 391 {NppShift<CV_8S , 1, nppiRShiftC_8s_C1R >::call, 0, NppShift<CV_8S , 3, nppiRShiftC_8s_C3R >::call, NppShift<CV_8S , 4, nppiRShiftC_8s_C4R>::call }, 392 {NppShift<CV_16U, 1, nppiRShiftC_16u_C1R>::call, 0, NppShift<CV_16U, 3, nppiRShiftC_16u_C3R>::call, NppShift<CV_16U, 4, nppiRShiftC_16u_C4R>::call}, 393 {NppShift<CV_16S, 1, nppiRShiftC_16s_C1R>::call, 0, NppShift<CV_16S, 3, nppiRShiftC_16s_C3R>::call, NppShift<CV_16S, 4, nppiRShiftC_16s_C4R>::call}, 394 {NppShift<CV_32S, 1, nppiRShiftC_32s_C1R>::call, 0, NppShift<CV_32S, 3, nppiRShiftC_32s_C3R>::call, NppShift<CV_32S, 4, nppiRShiftC_32s_C4R>::call}, 395 }; 396 397 GpuMat src = getInputMat(_src, stream); 398 399 CV_Assert( src.depth() < CV_32F ); 400 CV_Assert( src.channels() == 1 || src.channels() == 3 || src.channels() == 4 ); 401 402 GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream); 403 404 funcs[src.depth()][src.channels() - 1](src, val, dst, StreamAccessor::getStream(stream)); 405 406 syncOutput(dst, _dst, stream); 407 } 408 409 void cv::cuda::lshift(InputArray _src, Scalar_<int> val, OutputArray _dst, Stream& stream) 410 { 411 typedef void (*func_t)(const GpuMat& src, Scalar_<Npp32u> sc, GpuMat& dst, cudaStream_t stream); 412 static const func_t funcs[5][4] = 413 { 414 {NppShift<CV_8U , 1, nppiLShiftC_8u_C1R>::call , 0, NppShift<CV_8U , 3, nppiLShiftC_8u_C3R>::call , NppShift<CV_8U , 4, nppiLShiftC_8u_C4R>::call }, 415 {0 , 0, 0 , 0 }, 416 {NppShift<CV_16U, 1, nppiLShiftC_16u_C1R>::call, 0, NppShift<CV_16U, 3, nppiLShiftC_16u_C3R>::call, NppShift<CV_16U, 4, nppiLShiftC_16u_C4R>::call}, 417 {0 , 0, 0 , 0 }, 418 {NppShift<CV_32S, 1, nppiLShiftC_32s_C1R>::call, 0, NppShift<CV_32S, 3, nppiLShiftC_32s_C3R>::call, NppShift<CV_32S, 4, nppiLShiftC_32s_C4R>::call}, 419 }; 420 421 GpuMat src = getInputMat(_src, stream); 422 423 CV_Assert( src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32S ); 424 CV_Assert( src.channels() == 1 || src.channels() == 3 || src.channels() == 4 ); 425 426 GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream); 427 428 funcs[src.depth()][src.channels() - 1](src, val, dst, StreamAccessor::getStream(stream)); 429 430 syncOutput(dst, _dst, stream); 431 } 432 433 ////////////////////////////////////////////////////////////////////////////// 434 // Minimum and maximum operations 435 436 namespace 437 { 438 enum 439 { 440 MIN_OP, 441 MAX_OP 442 }; 443 } 444 445 void minMaxMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat&, double, Stream& stream, int op); 446 447 void minMaxScalar(const GpuMat& src, cv::Scalar value, bool, GpuMat& dst, const GpuMat&, double, Stream& stream, int op); 448 449 void cv::cuda::min(InputArray src1, InputArray src2, OutputArray dst, Stream& stream) 450 { 451 arithm_op(src1, src2, dst, noArray(), 1.0, -1, stream, minMaxMat, minMaxScalar, MIN_OP); 452 } 453 454 void cv::cuda::max(InputArray src1, InputArray src2, OutputArray dst, Stream& stream) 455 { 456 arithm_op(src1, src2, dst, noArray(), 1.0, -1, stream, minMaxMat, minMaxScalar, MAX_OP); 457 } 458 459 //////////////////////////////////////////////////////////////////////// 460 // NPP magnitide 461 462 namespace 463 { 464 typedef NppStatus (*nppMagnitude_t)(const Npp32fc* pSrc, int nSrcStep, Npp32f* pDst, int nDstStep, NppiSize oSizeROI); 465 466 void npp_magnitude(const GpuMat& src, GpuMat& dst, nppMagnitude_t func, cudaStream_t stream) 467 { 468 CV_Assert(src.type() == CV_32FC2); 469 470 NppiSize sz; 471 sz.width = src.cols; 472 sz.height = src.rows; 473 474 NppStreamHandler h(stream); 475 476 nppSafeCall( func(src.ptr<Npp32fc>(), static_cast<int>(src.step), dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz) ); 477 478 if (stream == 0) 479 cudaSafeCall( cudaDeviceSynchronize() ); 480 } 481 } 482 483 void cv::cuda::magnitude(InputArray _src, OutputArray _dst, Stream& stream) 484 { 485 GpuMat src = getInputMat(_src, stream); 486 487 GpuMat dst = getOutputMat(_dst, src.size(), CV_32FC1, stream); 488 489 npp_magnitude(src, dst, nppiMagnitude_32fc32f_C1R, StreamAccessor::getStream(stream)); 490 491 syncOutput(dst, _dst, stream); 492 } 493 494 void cv::cuda::magnitudeSqr(InputArray _src, OutputArray _dst, Stream& stream) 495 { 496 GpuMat src = getInputMat(_src, stream); 497 498 GpuMat dst = getOutputMat(_dst, src.size(), CV_32FC1, stream); 499 500 npp_magnitude(src, dst, nppiMagnitudeSqr_32fc32f_C1R, StreamAccessor::getStream(stream)); 501 502 syncOutput(dst, _dst, stream); 503 } 504 505 #endif 506