Home | History | Annotate | Download | only in src
      1 /*M///////////////////////////////////////////////////////////////////////////////////////
      2 //
      3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
      4 //
      5 //  By downloading, copying, installing or using the software you agree to this license.
      6 //  If you do not agree to this license, do not download, install,
      7 //  copy or use the software.
      8 //
      9 //
     10 //                           License Agreement
     11 //                For Open Source Computer Vision Library
     12 //
     13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
     14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
     15 // Third party copyrights are property of their respective owners.
     16 //
     17 // Redistribution and use in source and binary forms, with or without modification,
     18 // are permitted provided that the following conditions are met:
     19 //
     20 //   * Redistribution's of source code must retain the above copyright notice,
     21 //     this list of conditions and the following disclaimer.
     22 //
     23 //   * Redistribution's in binary form must reproduce the above copyright notice,
     24 //     this list of conditions and the following disclaimer in the documentation
     25 //     and/or other materials provided with the distribution.
     26 //
     27 //   * The name of the copyright holders may not be used to endorse or promote products
     28 //     derived from this software without specific prior written permission.
     29 //
     30 // This software is provided by the copyright holders and contributors "as is" and
     31 // any express or implied warranties, including, but not limited to, the implied
     32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
     33 // In no event shall the Intel Corporation or contributors be liable for any direct,
     34 // indirect, incidental, special, exemplary, or consequential damages
     35 // (including, but not limited to, procurement of substitute goods or services;
     36 // loss of use, data, or profits; or business interruption) however caused
     37 // and on any theory of liability, whether in contract, strict liability,
     38 // or tort (including negligence or otherwise) arising in any way out of
     39 // the use of this software, even if advised of the possibility of such damage.
     40 //
     41 //M*/
     42 
     43 #include "precomp.hpp"
     44 
     45 #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
     46 
     47 void cv::cuda::graphcut(GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
     48 void cv::cuda::graphcut(GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
     49 
     50 void cv::cuda::connectivityMask(const GpuMat&, GpuMat&, const cv::Scalar&, const cv::Scalar&, Stream&) { throw_no_cuda(); }
     51 void cv::cuda::labelComponents(const GpuMat&, GpuMat&, int, Stream&) { throw_no_cuda(); }
     52 
     53 #else /* !defined (HAVE_CUDA) */
     54 
     55 namespace cv { namespace cuda { namespace device
     56 {
     57     namespace ccl
     58     {
     59         void labelComponents(const PtrStepSzb& edges, PtrStepSzi comps, int flags, cudaStream_t stream);
     60 
     61         template<typename T>
     62         void computeEdges(const PtrStepSzb& image, PtrStepSzb edges, const float4& lo, const float4& hi, cudaStream_t stream);
     63     }
     64 }}}
     65 
     66 static float4 scalarToCudaType(const cv::Scalar& in)
     67 {
     68   return make_float4((float)in[0], (float)in[1], (float)in[2], (float)in[3]);
     69 }
     70 
     71 void cv::cuda::connectivityMask(const GpuMat& image, GpuMat& mask, const cv::Scalar& lo, const cv::Scalar& hi, Stream& s)
     72 {
     73     CV_Assert(!image.empty());
     74 
     75     int ch = image.channels();
     76     CV_Assert(ch <= 4);
     77 
     78     int depth = image.depth();
     79 
     80     typedef void (*func_t)(const PtrStepSzb& image, PtrStepSzb edges, const float4& lo, const float4& hi, cudaStream_t stream);
     81 
     82     static const func_t suppotLookup[8][4] =
     83     {   //    1,    2,     3,     4
     84         { device::ccl::computeEdges<uchar>,  0,  device::ccl::computeEdges<uchar3>,  device::ccl::computeEdges<uchar4>  },// CV_8U
     85         { 0,                                 0,  0,                                  0                                  },// CV_16U
     86         { device::ccl::computeEdges<ushort>, 0,  device::ccl::computeEdges<ushort3>, device::ccl::computeEdges<ushort4> },// CV_8S
     87         { 0,                                 0,  0,                                  0                                  },// CV_16S
     88         { device::ccl::computeEdges<int>,    0,  0,                                  0                                  },// CV_32S
     89         { device::ccl::computeEdges<float>,  0,  0,                                  0                                  },// CV_32F
     90         { 0,                                 0,  0,                                  0                                  },// CV_64F
     91         { 0,                                 0,  0,                                  0                                  } // CV_USRTYPE1
     92     };
     93 
     94     func_t f = suppotLookup[depth][ch - 1];
     95     CV_Assert(f);
     96 
     97     if (image.size() != mask.size() || mask.type() != CV_8UC1)
     98         mask.create(image.size(), CV_8UC1);
     99 
    100     cudaStream_t stream = StreamAccessor::getStream(s);
    101     float4 culo = scalarToCudaType(lo), cuhi = scalarToCudaType(hi);
    102     f(image, mask, culo, cuhi, stream);
    103 }
    104 
    105 void cv::cuda::labelComponents(const GpuMat& mask, GpuMat& components, int flags, Stream& s)
    106 {
    107     CV_Assert(!mask.empty() && mask.type() == CV_8U);
    108 
    109     if (!deviceSupports(SHARED_ATOMICS))
    110         CV_Error(cv::Error::StsNotImplemented, "The device doesn't support shared atomics and communicative synchronization!");
    111 
    112     components.create(mask.size(), CV_32SC1);
    113 
    114     cudaStream_t stream = StreamAccessor::getStream(s);
    115     device::ccl::labelComponents(mask, components, flags, stream);
    116 }
    117 
    118 namespace
    119 {
    120     typedef NppStatus (*init_func_t)(NppiSize oSize, NppiGraphcutState** ppState, Npp8u* pDeviceMem);
    121 
    122     class NppiGraphcutStateHandler
    123     {
    124     public:
    125         NppiGraphcutStateHandler(NppiSize sznpp, Npp8u* pDeviceMem, const init_func_t func)
    126         {
    127             nppSafeCall( func(sznpp, &pState, pDeviceMem) );
    128         }
    129 
    130         ~NppiGraphcutStateHandler()
    131         {
    132             nppSafeCall( nppiGraphcutFree(pState) );
    133         }
    134 
    135         operator NppiGraphcutState*()
    136         {
    137             return pState;
    138         }
    139 
    140     private:
    141         NppiGraphcutState* pState;
    142     };
    143 }
    144 
    145 void cv::cuda::graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTransp, GpuMat& top, GpuMat& bottom, GpuMat& labels, GpuMat& buf, Stream& s)
    146 {
    147 #if (CUDA_VERSION < 5000)
    148     CV_Assert(terminals.type() == CV_32S);
    149 #else
    150     CV_Assert(terminals.type() == CV_32S || terminals.type() == CV_32F);
    151 #endif
    152 
    153     Size src_size = terminals.size();
    154 
    155     CV_Assert(leftTransp.size() == Size(src_size.height, src_size.width));
    156     CV_Assert(leftTransp.type() == terminals.type());
    157 
    158     CV_Assert(rightTransp.size() == Size(src_size.height, src_size.width));
    159     CV_Assert(rightTransp.type() == terminals.type());
    160 
    161     CV_Assert(top.size() == src_size);
    162     CV_Assert(top.type() == terminals.type());
    163 
    164     CV_Assert(bottom.size() == src_size);
    165     CV_Assert(bottom.type() == terminals.type());
    166 
    167     labels.create(src_size, CV_8U);
    168 
    169     NppiSize sznpp;
    170     sznpp.width = src_size.width;
    171     sznpp.height = src_size.height;
    172 
    173     int bufsz;
    174     nppSafeCall( nppiGraphcutGetSize(sznpp, &bufsz) );
    175 
    176     ensureSizeIsEnough(1, bufsz, CV_8U, buf);
    177 
    178     cudaStream_t stream = StreamAccessor::getStream(s);
    179 
    180     NppStreamHandler h(stream);
    181 
    182     NppiGraphcutStateHandler state(sznpp, buf.ptr<Npp8u>(), nppiGraphcutInitAlloc);
    183 
    184 #if (CUDA_VERSION < 5000)
    185     nppSafeCall( nppiGraphcut_32s8u(terminals.ptr<Npp32s>(), leftTransp.ptr<Npp32s>(), rightTransp.ptr<Npp32s>(), top.ptr<Npp32s>(), bottom.ptr<Npp32s>(),
    186         static_cast<int>(terminals.step), static_cast<int>(leftTransp.step), sznpp, labels.ptr<Npp8u>(), static_cast<int>(labels.step), state) );
    187 #else
    188     if (terminals.type() == CV_32S)
    189     {
    190         nppSafeCall( nppiGraphcut_32s8u(terminals.ptr<Npp32s>(), leftTransp.ptr<Npp32s>(), rightTransp.ptr<Npp32s>(), top.ptr<Npp32s>(), bottom.ptr<Npp32s>(),
    191             static_cast<int>(terminals.step), static_cast<int>(leftTransp.step), sznpp, labels.ptr<Npp8u>(), static_cast<int>(labels.step), state) );
    192     }
    193     else
    194     {
    195         nppSafeCall( nppiGraphcut_32f8u(terminals.ptr<Npp32f>(), leftTransp.ptr<Npp32f>(), rightTransp.ptr<Npp32f>(), top.ptr<Npp32f>(), bottom.ptr<Npp32f>(),
    196             static_cast<int>(terminals.step), static_cast<int>(leftTransp.step), sznpp, labels.ptr<Npp8u>(), static_cast<int>(labels.step), state) );
    197     }
    198 #endif
    199 
    200     if (stream == 0)
    201         cudaSafeCall( cudaDeviceSynchronize() );
    202 }
    203 
    204 void cv::cuda::graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTransp, GpuMat& top, GpuMat& topLeft, GpuMat& topRight,
    205               GpuMat& bottom, GpuMat& bottomLeft, GpuMat& bottomRight, GpuMat& labels, GpuMat& buf, Stream& s)
    206 {
    207 #if (CUDA_VERSION < 5000)
    208     CV_Assert(terminals.type() == CV_32S);
    209 #else
    210     CV_Assert(terminals.type() == CV_32S || terminals.type() == CV_32F);
    211 #endif
    212 
    213     Size src_size = terminals.size();
    214 
    215     CV_Assert(leftTransp.size() == Size(src_size.height, src_size.width));
    216     CV_Assert(leftTransp.type() == terminals.type());
    217 
    218     CV_Assert(rightTransp.size() == Size(src_size.height, src_size.width));
    219     CV_Assert(rightTransp.type() == terminals.type());
    220 
    221     CV_Assert(top.size() == src_size);
    222     CV_Assert(top.type() == terminals.type());
    223 
    224     CV_Assert(topLeft.size() == src_size);
    225     CV_Assert(topLeft.type() == terminals.type());
    226 
    227     CV_Assert(topRight.size() == src_size);
    228     CV_Assert(topRight.type() == terminals.type());
    229 
    230     CV_Assert(bottom.size() == src_size);
    231     CV_Assert(bottom.type() == terminals.type());
    232 
    233     CV_Assert(bottomLeft.size() == src_size);
    234     CV_Assert(bottomLeft.type() == terminals.type());
    235 
    236     CV_Assert(bottomRight.size() == src_size);
    237     CV_Assert(bottomRight.type() == terminals.type());
    238 
    239     labels.create(src_size, CV_8U);
    240 
    241     NppiSize sznpp;
    242     sznpp.width = src_size.width;
    243     sznpp.height = src_size.height;
    244 
    245     int bufsz;
    246     nppSafeCall( nppiGraphcut8GetSize(sznpp, &bufsz) );
    247 
    248     ensureSizeIsEnough(1, bufsz, CV_8U, buf);
    249 
    250     cudaStream_t stream = StreamAccessor::getStream(s);
    251 
    252     NppStreamHandler h(stream);
    253 
    254     NppiGraphcutStateHandler state(sznpp, buf.ptr<Npp8u>(), nppiGraphcut8InitAlloc);
    255 
    256 #if (CUDA_VERSION < 5000)
    257     nppSafeCall( nppiGraphcut8_32s8u(terminals.ptr<Npp32s>(), leftTransp.ptr<Npp32s>(), rightTransp.ptr<Npp32s>(),
    258         top.ptr<Npp32s>(), topLeft.ptr<Npp32s>(), topRight.ptr<Npp32s>(),
    259         bottom.ptr<Npp32s>(), bottomLeft.ptr<Npp32s>(), bottomRight.ptr<Npp32s>(),
    260         static_cast<int>(terminals.step), static_cast<int>(leftTransp.step), sznpp, labels.ptr<Npp8u>(), static_cast<int>(labels.step), state) );
    261 #else
    262     if (terminals.type() == CV_32S)
    263     {
    264         nppSafeCall( nppiGraphcut8_32s8u(terminals.ptr<Npp32s>(), leftTransp.ptr<Npp32s>(), rightTransp.ptr<Npp32s>(),
    265             top.ptr<Npp32s>(), topLeft.ptr<Npp32s>(), topRight.ptr<Npp32s>(),
    266             bottom.ptr<Npp32s>(), bottomLeft.ptr<Npp32s>(), bottomRight.ptr<Npp32s>(),
    267             static_cast<int>(terminals.step), static_cast<int>(leftTransp.step), sznpp, labels.ptr<Npp8u>(), static_cast<int>(labels.step), state) );
    268     }
    269     else
    270     {
    271         nppSafeCall( nppiGraphcut8_32f8u(terminals.ptr<Npp32f>(), leftTransp.ptr<Npp32f>(), rightTransp.ptr<Npp32f>(),
    272             top.ptr<Npp32f>(), topLeft.ptr<Npp32f>(), topRight.ptr<Npp32f>(),
    273             bottom.ptr<Npp32f>(), bottomLeft.ptr<Npp32f>(), bottomRight.ptr<Npp32f>(),
    274             static_cast<int>(terminals.step), static_cast<int>(leftTransp.step), sznpp, labels.ptr<Npp8u>(), static_cast<int>(labels.step), state) );
    275     }
    276 #endif
    277 
    278     if (stream == 0)
    279         cudaSafeCall( cudaDeviceSynchronize() );
    280 }
    281 
    282 #endif /* !defined (HAVE_CUDA) */
    283