Home | History | Annotate | Download | only in cuda
      1 /*M///////////////////////////////////////////////////////////////////////////////////////
      2 //
      3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
      4 //
      5 //  By downloading, copying, installing or using the software you agree to this license.
      6 //  If you do not agree to this license, do not download, install,
      7 //  copy or use the software.
      8 //
      9 //
     10 //                           License Agreement
     11 //                For Open Source Computer Vision Library
     12 //
     13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
     14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
     15 // Third party copyrights are property of their respective owners.
     16 //
     17 // Redistribution and use in source and binary forms, with or without modification,
     18 // are permitted provided that the following conditions are met:
     19 //
     20 //   * Redistribution's of source code must retain the above copyright notice,
     21 //     this list of conditions and the following disclaimer.
     22 //
     23 //   * Redistribution's in binary form must reproduce the above copyright notice,
     24 //     this list of conditions and the following disclaimer in the documentation
     25 //     and/or other materials provided with the distribution.
     26 //
     27 //   * The name of the copyright holders may not be used to endorse or promote products
     28 //     derived from this software without specific prior written permission.
     29 //
     30 // This software is provided by the copyright holders and contributors "as is" and
     31 // any express or implied warranties, including, but not limited to, the implied
     32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
     33 // In no event shall the Intel Corporation or contributors be liable for any direct,
     34 // indirect, incidental, special, exemplary, or consequential damages
     35 // (including, but not limited to, procurement of substitute goods or services;
     36 // loss of use, data, or profits; or business interruption) however caused
     37 // and on any theory of liability, whether in contract, strict liability,
     38 // or tort (including negligence or otherwise) arising in any way out of
     39 // the use of this software, even if advised of the possibility of such damage.
     40 //
     41 //M*/
     42 
     43 #include "opencv2/opencv_modules.hpp"
     44 
     45 #ifndef HAVE_OPENCV_CUDEV
     46 
     47 #error "opencv_cudev is required"
     48 
     49 #else
     50 
     51 #include "opencv2/cudev.hpp"
     52 
     53 using namespace cv::cudev;
     54 
     55 void addMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, double, Stream& _stream, int);
     56 
     57 namespace
     58 {
     59     template <typename T, typename D> struct AddOp1 : binary_function<T, T, D>
     60     {
     61         __device__ __forceinline__ D operator ()(T a, T b) const
     62         {
     63             return saturate_cast<D>(a + b);
     64         }
     65     };
     66 
     67     template <typename T, typename D>
     68     void addMat_v1(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, Stream& stream)
     69     {
     70         if (mask.data)
     71             gridTransformBinary(globPtr<T>(src1), globPtr<T>(src2), globPtr<D>(dst), AddOp1<T, D>(), globPtr<uchar>(mask), stream);
     72         else
     73             gridTransformBinary(globPtr<T>(src1), globPtr<T>(src2), globPtr<D>(dst), AddOp1<T, D>(), stream);
     74     }
     75 
     76     struct AddOp2 : binary_function<uint, uint, uint>
     77     {
     78         __device__ __forceinline__ uint operator ()(uint a, uint b) const
     79         {
     80             return vadd2(a, b);
     81         }
     82     };
     83 
     84     void addMat_v2(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream)
     85     {
     86         const int vcols = src1.cols >> 1;
     87 
     88         GlobPtrSz<uint> src1_ = globPtr((uint*) src1.data, src1.step, src1.rows, vcols);
     89         GlobPtrSz<uint> src2_ = globPtr((uint*) src2.data, src2.step, src1.rows, vcols);
     90         GlobPtrSz<uint> dst_ = globPtr((uint*) dst.data, dst.step, src1.rows, vcols);
     91 
     92         gridTransformBinary(src1_, src2_, dst_, AddOp2(), stream);
     93     }
     94 
     95     struct AddOp4 : binary_function<uint, uint, uint>
     96     {
     97         __device__ __forceinline__ uint operator ()(uint a, uint b) const
     98         {
     99             return vadd4(a, b);
    100         }
    101     };
    102 
    103     void addMat_v4(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream)
    104     {
    105         const int vcols = src1.cols >> 2;
    106 
    107         GlobPtrSz<uint> src1_ = globPtr((uint*) src1.data, src1.step, src1.rows, vcols);
    108         GlobPtrSz<uint> src2_ = globPtr((uint*) src2.data, src2.step, src1.rows, vcols);
    109         GlobPtrSz<uint> dst_ = globPtr((uint*) dst.data, dst.step, src1.rows, vcols);
    110 
    111         gridTransformBinary(src1_, src2_, dst_, AddOp4(), stream);
    112     }
    113 }
    114 
    115 void addMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, double, Stream& stream, int)
    116 {
    117     typedef void (*func_t)(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, Stream& stream);
    118     static const func_t funcs[7][7] =
    119     {
    120         {
    121             addMat_v1<uchar, uchar>,
    122             addMat_v1<uchar, schar>,
    123             addMat_v1<uchar, ushort>,
    124             addMat_v1<uchar, short>,
    125             addMat_v1<uchar, int>,
    126             addMat_v1<uchar, float>,
    127             addMat_v1<uchar, double>
    128         },
    129         {
    130             addMat_v1<schar, uchar>,
    131             addMat_v1<schar, schar>,
    132             addMat_v1<schar, ushort>,
    133             addMat_v1<schar, short>,
    134             addMat_v1<schar, int>,
    135             addMat_v1<schar, float>,
    136             addMat_v1<schar, double>
    137         },
    138         {
    139             0 /*addMat_v1<ushort, uchar>*/,
    140             0 /*addMat_v1<ushort, schar>*/,
    141             addMat_v1<ushort, ushort>,
    142             addMat_v1<ushort, short>,
    143             addMat_v1<ushort, int>,
    144             addMat_v1<ushort, float>,
    145             addMat_v1<ushort, double>
    146         },
    147         {
    148             0 /*addMat_v1<short, uchar>*/,
    149             0 /*addMat_v1<short, schar>*/,
    150             addMat_v1<short, ushort>,
    151             addMat_v1<short, short>,
    152             addMat_v1<short, int>,
    153             addMat_v1<short, float>,
    154             addMat_v1<short, double>
    155         },
    156         {
    157             0 /*addMat_v1<int, uchar>*/,
    158             0 /*addMat_v1<int, schar>*/,
    159             0 /*addMat_v1<int, ushort>*/,
    160             0 /*addMat_v1<int, short>*/,
    161             addMat_v1<int, int>,
    162             addMat_v1<int, float>,
    163             addMat_v1<int, double>
    164         },
    165         {
    166             0 /*addMat_v1<float, uchar>*/,
    167             0 /*addMat_v1<float, schar>*/,
    168             0 /*addMat_v1<float, ushort>*/,
    169             0 /*addMat_v1<float, short>*/,
    170             0 /*addMat_v1<float, int>*/,
    171             addMat_v1<float, float>,
    172             addMat_v1<float, double>
    173         },
    174         {
    175             0 /*addMat_v1<double, uchar>*/,
    176             0 /*addMat_v1<double, schar>*/,
    177             0 /*addMat_v1<double, ushort>*/,
    178             0 /*addMat_v1<double, short>*/,
    179             0 /*addMat_v1<double, int>*/,
    180             0 /*addMat_v1<double, float>*/,
    181             addMat_v1<double, double>
    182         }
    183     };
    184 
    185     const int sdepth = src1.depth();
    186     const int ddepth = dst.depth();
    187 
    188     CV_DbgAssert( sdepth <= CV_64F && ddepth <= CV_64F );
    189 
    190     GpuMat src1_ = src1.reshape(1);
    191     GpuMat src2_ = src2.reshape(1);
    192     GpuMat dst_ = dst.reshape(1);
    193 
    194     if (mask.empty() && (sdepth == CV_8U || sdepth == CV_16U) && ddepth == sdepth)
    195     {
    196         const intptr_t src1ptr = reinterpret_cast<intptr_t>(src1_.data);
    197         const intptr_t src2ptr = reinterpret_cast<intptr_t>(src2_.data);
    198         const intptr_t dstptr = reinterpret_cast<intptr_t>(dst_.data);
    199 
    200         const bool isAllAligned = (src1ptr & 31) == 0 && (src2ptr & 31) == 0 && (dstptr & 31) == 0;
    201 
    202         if (isAllAligned)
    203         {
    204             if (sdepth == CV_8U && (src1_.cols & 3) == 0)
    205             {
    206                 addMat_v4(src1_, src2_, dst_, stream);
    207                 return;
    208             }
    209             else if (sdepth == CV_16U && (src1_.cols & 1) == 0)
    210             {
    211                 addMat_v2(src1_, src2_, dst_, stream);
    212                 return;
    213             }
    214         }
    215     }
    216 
    217     const func_t func = funcs[sdepth][ddepth];
    218 
    219     if (!func)
    220         CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types");
    221 
    222     func(src1_, src2_, dst_, mask, stream);
    223 }
    224 
    225 #endif
    226