Home | History | Annotate | Download | only in cuda
      1 /*M///////////////////////////////////////////////////////////////////////////////////////
      2 //
      3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
      4 //
      5 //  By downloading, copying, installing or using the software you agree to this license.
      6 //  If you do not agree to this license, do not download, install,
      7 //  copy or use the software.
      8 //
      9 //
     10 //                           License Agreement
     11 //                For Open Source Computer Vision Library
     12 //
     13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
     14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
     15 // Third party copyrights are property of their respective owners.
     16 //
     17 // Redistribution and use in source and binary forms, with or without modification,
     18 // are permitted provided that the following conditions are met:
     19 //
     20 //   * Redistribution's of source code must retain the above copyright notice,
     21 //     this list of conditions and the following disclaimer.
     22 //
     23 //   * Redistribution's in binary form must reproduce the above copyright notice,
     24 //     this list of conditions and the following disclaimer in the documentation
     25 //     and/or other materials provided with the distribution.
     26 //
     27 //   * The name of the copyright holders may not be used to endorse or promote products
     28 //     derived from this software without specific prior written permission.
     29 //
     30 // This software is provided by the copyright holders and contributors "as is" and
     31 // any express or implied warranties, including, but not limited to, the implied
     32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
     33 // In no event shall the Intel Corporation or contributors be liable for any direct,
     34 // indirect, incidental, special, exemplary, or consequential damages
     35 // (including, but not limited to, procurement of substitute goods or services;
     36 // loss of use, data, or profits; or business interruption) however caused
     37 // and on any theory of liability, whether in contract, strict liability,
     38 // or tort (including negligence or otherwise) arising in any way out of
     39 // the use of this software, even if advised of the possibility of such damage.
     40 //
     41 //M*/
     42 
     43 #include "opencv2/opencv_modules.hpp"
     44 
     45 #ifndef HAVE_OPENCV_CUDEV
     46 
     47 #error "opencv_cudev is required"
     48 
     49 #else
     50 
     51 #include "opencv2/cudaarithm.hpp"
     52 #include "opencv2/cudev.hpp"
     53 #include "opencv2/core/private.cuda.hpp"
     54 
     55 using namespace cv;
     56 using namespace cv::cuda;
     57 using namespace cv::cudev;
     58 
     59 void bitMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, double, Stream& stream, int op);
     60 
     61 //////////////////////////////////////////////////////////////////////////////
     62 /// bitwise_not
     63 
     64 void cv::cuda::bitwise_not(InputArray _src, OutputArray _dst, InputArray _mask, Stream& stream)
     65 {
     66     GpuMat src = getInputMat(_src, stream);
     67     GpuMat mask = getInputMat(_mask, stream);
     68 
     69     const int depth = src.depth();
     70 
     71     CV_DbgAssert( depth <= CV_32F );
     72     CV_DbgAssert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == src.size()) );
     73 
     74     GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream);
     75 
     76     if (mask.empty())
     77     {
     78         const int bcols = (int) (src.cols * src.elemSize());
     79 
     80         if ((bcols & 3) == 0)
     81         {
     82             const int vcols = bcols >> 2;
     83 
     84             GlobPtrSz<uint> vsrc = globPtr((uint*) src.data, src.step, src.rows, vcols);
     85             GlobPtrSz<uint> vdst = globPtr((uint*) dst.data, dst.step, src.rows, vcols);
     86 
     87             gridTransformUnary(vsrc, vdst, bit_not<uint>(), stream);
     88         }
     89         else if ((bcols & 1) == 0)
     90         {
     91             const int vcols = bcols >> 1;
     92 
     93             GlobPtrSz<ushort> vsrc = globPtr((ushort*) src.data, src.step, src.rows, vcols);
     94             GlobPtrSz<ushort> vdst = globPtr((ushort*) dst.data, dst.step, src.rows, vcols);
     95 
     96             gridTransformUnary(vsrc, vdst, bit_not<ushort>(), stream);
     97         }
     98         else
     99         {
    100             GlobPtrSz<uchar> vsrc = globPtr((uchar*) src.data, src.step, src.rows, bcols);
    101             GlobPtrSz<uchar> vdst = globPtr((uchar*) dst.data, dst.step, src.rows, bcols);
    102 
    103             gridTransformUnary(vsrc, vdst, bit_not<uchar>(), stream);
    104         }
    105     }
    106     else
    107     {
    108         if (depth == CV_32F || depth == CV_32S)
    109         {
    110             GlobPtrSz<uint> vsrc = globPtr((uint*) src.data, src.step, src.rows, src.cols * src.channels());
    111             GlobPtrSz<uint> vdst = globPtr((uint*) dst.data, dst.step, src.rows, src.cols * src.channels());
    112 
    113             gridTransformUnary(vsrc, vdst, bit_not<uint>(), singleMaskChannels(globPtr<uchar>(mask), src.channels()), stream);
    114         }
    115         else if (depth == CV_16S || depth == CV_16U)
    116         {
    117             GlobPtrSz<ushort> vsrc = globPtr((ushort*) src.data, src.step, src.rows, src.cols * src.channels());
    118             GlobPtrSz<ushort> vdst = globPtr((ushort*) dst.data, dst.step, src.rows, src.cols * src.channels());
    119 
    120             gridTransformUnary(vsrc, vdst, bit_not<ushort>(), singleMaskChannels(globPtr<uchar>(mask), src.channels()), stream);
    121         }
    122         else
    123         {
    124             GlobPtrSz<uchar> vsrc = globPtr((uchar*) src.data, src.step, src.rows, src.cols * src.channels());
    125             GlobPtrSz<uchar> vdst = globPtr((uchar*) dst.data, dst.step, src.rows, src.cols * src.channels());
    126 
    127             gridTransformUnary(vsrc, vdst, bit_not<uchar>(), singleMaskChannels(globPtr<uchar>(mask), src.channels()), stream);
    128         }
    129     }
    130 
    131     syncOutput(dst, _dst, stream);
    132 }
    133 
    134 //////////////////////////////////////////////////////////////////////////////
    135 /// Binary bitwise logical operations
    136 
    137 namespace
    138 {
    139     template <template <typename> class Op, typename T>
    140     void bitMatOp(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, Stream& stream)
    141     {
    142         GlobPtrSz<T> vsrc1 = globPtr((T*) src1.data, src1.step, src1.rows, src1.cols * src1.channels());
    143         GlobPtrSz<T> vsrc2 = globPtr((T*) src2.data, src2.step, src1.rows, src1.cols * src1.channels());
    144         GlobPtrSz<T> vdst = globPtr((T*) dst.data, dst.step, src1.rows, src1.cols * src1.channels());
    145 
    146         if (mask.data)
    147             gridTransformBinary(vsrc1, vsrc2, vdst, Op<T>(), singleMaskChannels(globPtr<uchar>(mask), src1.channels()), stream);
    148         else
    149             gridTransformBinary(vsrc1, vsrc2, vdst, Op<T>(), stream);
    150     }
    151 }
    152 
    153 void bitMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, double, Stream& stream, int op)
    154 {
    155     typedef void (*func_t)(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, Stream& stream);
    156     static const func_t funcs32[] =
    157     {
    158         bitMatOp<bit_and, uint>,
    159         bitMatOp<bit_or, uint>,
    160         bitMatOp<bit_xor, uint>
    161     };
    162     static const func_t funcs16[] =
    163     {
    164         bitMatOp<bit_and, ushort>,
    165         bitMatOp<bit_or, ushort>,
    166         bitMatOp<bit_xor, ushort>
    167     };
    168     static const func_t funcs8[] =
    169     {
    170         bitMatOp<bit_and, uchar>,
    171         bitMatOp<bit_or, uchar>,
    172         bitMatOp<bit_xor, uchar>
    173     };
    174 
    175     const int depth = src1.depth();
    176 
    177     CV_DbgAssert( depth <= CV_32F );
    178     CV_DbgAssert( op >= 0 && op < 3 );
    179 
    180     if (mask.empty())
    181     {
    182         const int bcols = (int) (src1.cols * src1.elemSize());
    183 
    184         if ((bcols & 3) == 0)
    185         {
    186             const int vcols = bcols >> 2;
    187 
    188             GpuMat vsrc1(src1.rows, vcols, CV_32SC1, src1.data, src1.step);
    189             GpuMat vsrc2(src1.rows, vcols, CV_32SC1, src2.data, src2.step);
    190             GpuMat vdst(src1.rows, vcols, CV_32SC1, dst.data, dst.step);
    191 
    192             funcs32[op](vsrc1, vsrc2, vdst, GpuMat(), stream);
    193         }
    194         else if ((bcols & 1) == 0)
    195         {
    196             const int vcols = bcols >> 1;
    197 
    198             GpuMat vsrc1(src1.rows, vcols, CV_16UC1, src1.data, src1.step);
    199             GpuMat vsrc2(src1.rows, vcols, CV_16UC1, src2.data, src2.step);
    200             GpuMat vdst(src1.rows, vcols, CV_16UC1, dst.data, dst.step);
    201 
    202             funcs16[op](vsrc1, vsrc2, vdst, GpuMat(), stream);
    203         }
    204         else
    205         {
    206             GpuMat vsrc1(src1.rows, bcols, CV_8UC1, src1.data, src1.step);
    207             GpuMat vsrc2(src1.rows, bcols, CV_8UC1, src2.data, src2.step);
    208             GpuMat vdst(src1.rows, bcols, CV_8UC1, dst.data, dst.step);
    209 
    210             funcs8[op](vsrc1, vsrc2, vdst, GpuMat(), stream);
    211         }
    212     }
    213     else
    214     {
    215         if (depth == CV_32F || depth == CV_32S)
    216         {
    217             funcs32[op](src1, src2, dst, mask, stream);
    218         }
    219         else if (depth == CV_16S || depth == CV_16U)
    220         {
    221             funcs16[op](src1, src2, dst, mask, stream);
    222         }
    223         else
    224         {
    225             funcs8[op](src1, src2, dst, mask, stream);
    226         }
    227     }
    228 }
    229 
    230 #endif
    231