1 /*M/////////////////////////////////////////////////////////////////////////////////////// 2 // 3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 4 // 5 // By downloading, copying, installing or using the software you agree to this license. 6 // If you do not agree to this license, do not download, install, 7 // copy or use the software. 8 // 9 // 10 // License Agreement 11 // For Open Source Computer Vision Library 12 // 13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. 14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved. 15 // Third party copyrights are property of their respective owners. 16 // 17 // Redistribution and use in source and binary forms, with or without modification, 18 // are permitted provided that the following conditions are met: 19 // 20 // * Redistribution's of source code must retain the above copyright notice, 21 // this list of conditions and the following disclaimer. 22 // 23 // * Redistribution's in binary form must reproduce the above copyright notice, 24 // this list of conditions and the following disclaimer in the documentation 25 // and/or other materials provided with the distribution. 26 // 27 // * The name of the copyright holders may not be used to endorse or promote products 28 // derived from this software without specific prior written permission. 29 // 30 // This software is provided by the copyright holders and contributors "as is" and 31 // any express or implied warranties, including, but not limited to, the implied 32 // warranties of merchantability and fitness for a particular purpose are disclaimed. 33 // In no event shall the Intel Corporation or contributors be liable for any direct, 34 // indirect, incidental, special, exemplary, or consequential damages 35 // (including, but not limited to, procurement of substitute goods or services; 36 // loss of use, data, or profits; or business interruption) however caused 37 // and on any theory of liability, whether in contract, strict liability, 38 // or tort (including negligence or otherwise) arising in any way out of 39 // the use of this software, even if advised of the possibility of such damage. 40 // 41 //M*/ 42 43 #include "precomp.hpp" 44 45 using namespace cv; 46 using namespace cv::cuda; 47 48 #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER) 49 50 double cv::cuda::norm(InputArray, int, InputArray) { throw_no_cuda(); return 0.0; } 51 void cv::cuda::calcNorm(InputArray, OutputArray, int, InputArray, Stream&) { throw_no_cuda(); } 52 double cv::cuda::norm(InputArray, InputArray, int) { throw_no_cuda(); return 0.0; } 53 void cv::cuda::calcNormDiff(InputArray, InputArray, OutputArray, int, Stream&) { throw_no_cuda(); } 54 55 Scalar cv::cuda::sum(InputArray, InputArray) { throw_no_cuda(); return Scalar(); } 56 void cv::cuda::calcSum(InputArray, OutputArray, InputArray, Stream&) { throw_no_cuda(); } 57 Scalar cv::cuda::absSum(InputArray, InputArray) { throw_no_cuda(); return Scalar(); } 58 void cv::cuda::calcAbsSum(InputArray, OutputArray, InputArray, Stream&) { throw_no_cuda(); } 59 Scalar cv::cuda::sqrSum(InputArray, InputArray) { throw_no_cuda(); return Scalar(); } 60 void cv::cuda::calcSqrSum(InputArray, OutputArray, InputArray, Stream&) { throw_no_cuda(); } 61 62 void cv::cuda::minMax(InputArray, double*, double*, InputArray) { throw_no_cuda(); } 63 void cv::cuda::findMinMax(InputArray, OutputArray, InputArray, Stream&) { throw_no_cuda(); } 64 void cv::cuda::minMaxLoc(InputArray, double*, double*, Point*, Point*, InputArray) { throw_no_cuda(); } 65 void cv::cuda::findMinMaxLoc(InputArray, OutputArray, OutputArray, InputArray, Stream&) { throw_no_cuda(); } 66 67 int cv::cuda::countNonZero(InputArray) { throw_no_cuda(); return 0; } 68 void cv::cuda::countNonZero(InputArray, OutputArray, Stream&) { throw_no_cuda(); } 69 70 void cv::cuda::reduce(InputArray, OutputArray, int, int, int, Stream&) { throw_no_cuda(); } 71 72 void cv::cuda::meanStdDev(InputArray, Scalar&, Scalar&) { throw_no_cuda(); } 73 void cv::cuda::meanStdDev(InputArray, OutputArray, Stream&) { throw_no_cuda(); } 74 75 void cv::cuda::rectStdDev(InputArray, InputArray, OutputArray, Rect, Stream&) { throw_no_cuda(); } 76 77 void cv::cuda::normalize(InputArray, OutputArray, double, double, int, int, InputArray, Stream&) { throw_no_cuda(); } 78 79 void cv::cuda::integral(InputArray, OutputArray, Stream&) { throw_no_cuda(); } 80 void cv::cuda::sqrIntegral(InputArray, OutputArray, Stream&) { throw_no_cuda(); } 81 82 #else 83 84 //////////////////////////////////////////////////////////////////////// 85 // norm 86 87 namespace cv { namespace cuda { namespace device { 88 89 void normL2(cv::InputArray _src, cv::OutputArray _dst, cv::InputArray _mask, Stream& stream); 90 91 void findMaxAbs(cv::InputArray _src, cv::OutputArray _dst, cv::InputArray _mask, Stream& stream); 92 93 }}} 94 95 void cv::cuda::calcNorm(InputArray _src, OutputArray dst, int normType, InputArray mask, Stream& stream) 96 { 97 CV_Assert( normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 ); 98 99 GpuMat src = getInputMat(_src, stream); 100 101 GpuMat src_single_channel = src.reshape(1); 102 103 if (normType == NORM_L1) 104 { 105 calcAbsSum(src_single_channel, dst, mask, stream); 106 } 107 else if (normType == NORM_L2) 108 { 109 cv::cuda::device::normL2(src_single_channel, dst, mask, stream); 110 } 111 else // NORM_INF 112 { 113 cv::cuda::device::findMaxAbs(src_single_channel, dst, mask, stream); 114 } 115 } 116 117 double cv::cuda::norm(InputArray _src, int normType, InputArray _mask) 118 { 119 Stream& stream = Stream::Null(); 120 121 HostMem dst; 122 calcNorm(_src, dst, normType, _mask, stream); 123 124 stream.waitForCompletion(); 125 126 double val; 127 dst.createMatHeader().convertTo(Mat(1, 1, CV_64FC1, &val), CV_64F); 128 129 return val; 130 } 131 132 //////////////////////////////////////////////////////////////////////// 133 // meanStdDev 134 135 void cv::cuda::meanStdDev(InputArray _src, OutputArray _dst, Stream& stream) 136 { 137 if (!deviceSupports(FEATURE_SET_COMPUTE_13)) 138 CV_Error(cv::Error::StsNotImplemented, "Not sufficient compute capebility"); 139 140 const GpuMat src = getInputMat(_src, stream); 141 142 CV_Assert( src.type() == CV_8UC1 ); 143 144 GpuMat dst = getOutputMat(_dst, 1, 2, CV_64FC1, stream); 145 146 NppiSize sz; 147 sz.width = src.cols; 148 sz.height = src.rows; 149 150 int bufSize; 151 #if (CUDA_VERSION <= 4020) 152 nppSafeCall( nppiMeanStdDev8uC1RGetBufferHostSize(sz, &bufSize) ); 153 #else 154 nppSafeCall( nppiMeanStdDevGetBufferHostSize_8u_C1R(sz, &bufSize) ); 155 #endif 156 157 BufferPool pool(stream); 158 GpuMat buf = pool.getBuffer(1, bufSize, CV_8UC1); 159 160 NppStreamHandler h(StreamAccessor::getStream(stream)); 161 162 nppSafeCall( nppiMean_StdDev_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step), sz, buf.ptr<Npp8u>(), dst.ptr<Npp64f>(), dst.ptr<Npp64f>() + 1) ); 163 164 syncOutput(dst, _dst, stream); 165 } 166 167 void cv::cuda::meanStdDev(InputArray _src, Scalar& mean, Scalar& stddev) 168 { 169 Stream& stream = Stream::Null(); 170 171 HostMem dst; 172 meanStdDev(_src, dst, stream); 173 174 stream.waitForCompletion(); 175 176 double vals[2]; 177 dst.createMatHeader().copyTo(Mat(1, 2, CV_64FC1, &vals[0])); 178 179 mean = Scalar(vals[0]); 180 stddev = Scalar(vals[1]); 181 } 182 183 ////////////////////////////////////////////////////////////////////////////// 184 // rectStdDev 185 186 void cv::cuda::rectStdDev(InputArray _src, InputArray _sqr, OutputArray _dst, Rect rect, Stream& _stream) 187 { 188 GpuMat src = getInputMat(_src, _stream); 189 GpuMat sqr = getInputMat(_sqr, _stream); 190 191 CV_Assert( src.type() == CV_32SC1 && sqr.type() == CV_64FC1 ); 192 193 GpuMat dst = getOutputMat(_dst, src.size(), CV_32FC1, _stream); 194 195 NppiSize sz; 196 sz.width = src.cols; 197 sz.height = src.rows; 198 199 NppiRect nppRect; 200 nppRect.height = rect.height; 201 nppRect.width = rect.width; 202 nppRect.x = rect.x; 203 nppRect.y = rect.y; 204 205 cudaStream_t stream = StreamAccessor::getStream(_stream); 206 207 NppStreamHandler h(stream); 208 209 nppSafeCall( nppiRectStdDev_32s32f_C1R(src.ptr<Npp32s>(), static_cast<int>(src.step), sqr.ptr<Npp64f>(), static_cast<int>(sqr.step), 210 dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz, nppRect) ); 211 212 if (stream == 0) 213 cudaSafeCall( cudaDeviceSynchronize() ); 214 215 syncOutput(dst, _dst, _stream); 216 } 217 218 #endif 219