Home | History | Annotate | Download | only in src
      1 /*M///////////////////////////////////////////////////////////////////////////////////////
      2 //
      3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
      4 //
      5 //  By downloading, copying, installing or using the software you agree to this license.
      6 //  If you do not agree to this license, do not download, install,
      7 //  copy or use the software.
      8 //
      9 //
     10 //                           License Agreement
     11 //                For Open Source Computer Vision Library
     12 //
     13 // Copyright (C) 2013, NVIDIA Corporation, all rights reserved.
     14 // Copyright (C) 2014, Itseez Inc., all rights reserved.
     15 // Third party copyrights are property of their respective owners.
     16 //
     17 // Redistribution and use in source and binary forms, with or without modification,
     18 // are permitted provided that the following conditions are met:
     19 //
     20 //   * Redistribution's of source code must retain the above copyright notice,
     21 //     this list of conditions and the following disclaimer.
     22 //
     23 //   * Redistribution's in binary form must reproduce the above copyright notice,
     24 //     this list of conditions and the following disclaimer in the documentation
     25 //     and/or other materials provided with the distribution.
     26 //
     27 //   * The name of the copyright holders may not be used to endorse or promote products
     28 //     derived from this software without specific prior written permission.
     29 //
     30 // This software is provided by the copyright holders and contributors "as is" and
     31 // any express or implied warranties, including, but not limited to, the implied
     32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
     33 // In no event shall the copyright holders or contributors be liable for any direct,
     34 // indirect, incidental, special, exemplary, or consequential damages
     35 // (including, but not limited to, procurement of substitute goods or services;
     36 // loss of use, data, or profits; or business interruption) however caused
     37 // and on any theory of liability, whether in contract, strict liability,
     38 // or tort (including negligence or otherwise) arising in any way out of
     39 // the use of this software, even if advised of the possibility of such damage.
     40 //
     41 //M*/
     42 
     43 #include "precomp.hpp"
     44 #include "opencl_kernels_imgproc.hpp"
     45 
     46 // ----------------------------------------------------------------------
     47 // CLAHE
     48 
     49 #ifdef HAVE_OPENCL
     50 
     51 namespace clahe
     52 {
     53     static bool calcLut(cv::InputArray _src, cv::OutputArray _dst,
     54         const int tilesX, const int tilesY, const cv::Size tileSize,
     55         const int clipLimit, const float lutScale)
     56     {
     57         cv::ocl::Kernel _k("calcLut", cv::ocl::imgproc::clahe_oclsrc);
     58 
     59         bool is_cpu = cv::ocl::Device::getDefault().type() == cv::ocl::Device::TYPE_CPU;
     60         cv::String opts;
     61         if(is_cpu)
     62             opts = "-D CPU ";
     63         else
     64             opts = cv::format("-D WAVE_SIZE=%d", _k.preferedWorkGroupSizeMultiple());
     65 
     66         cv::ocl::Kernel k("calcLut", cv::ocl::imgproc::clahe_oclsrc, opts);
     67         if(k.empty())
     68             return false;
     69 
     70         cv::UMat src = _src.getUMat();
     71         _dst.create(tilesX * tilesY, 256, CV_8UC1);
     72         cv::UMat dst = _dst.getUMat();
     73 
     74         int tile_size[2];
     75         tile_size[0] = tileSize.width;
     76         tile_size[1] = tileSize.height;
     77 
     78         size_t localThreads[3]  = { 32, 8, 1 };
     79         size_t globalThreads[3] = { tilesX * localThreads[0], tilesY * localThreads[1], 1 };
     80 
     81         int idx = 0;
     82         idx = k.set(idx, cv::ocl::KernelArg::ReadOnlyNoSize(src));
     83         idx = k.set(idx, cv::ocl::KernelArg::WriteOnlyNoSize(dst));
     84         idx = k.set(idx, tile_size);
     85         idx = k.set(idx, tilesX);
     86         idx = k.set(idx, clipLimit);
     87         k.set(idx, lutScale);
     88 
     89         return k.run(2, globalThreads, localThreads, false);
     90     }
     91 
     92     static bool transform(cv::InputArray _src, cv::OutputArray _dst, cv::InputArray _lut,
     93         const int tilesX, const int tilesY, const cv::Size & tileSize)
     94     {
     95 
     96         cv::ocl::Kernel k("transform", cv::ocl::imgproc::clahe_oclsrc);
     97         if(k.empty())
     98             return false;
     99 
    100         int tile_size[2];
    101         tile_size[0] = tileSize.width;
    102         tile_size[1] = tileSize.height;
    103 
    104         cv::UMat src = _src.getUMat();
    105         _dst.create(src.size(), src.type());
    106         cv::UMat dst = _dst.getUMat();
    107         cv::UMat lut = _lut.getUMat();
    108 
    109         size_t localThreads[3]  = { 32, 8, 1 };
    110         size_t globalThreads[3] = { src.cols, src.rows, 1 };
    111 
    112         int idx = 0;
    113         idx = k.set(idx, cv::ocl::KernelArg::ReadOnlyNoSize(src));
    114         idx = k.set(idx, cv::ocl::KernelArg::WriteOnlyNoSize(dst));
    115         idx = k.set(idx, cv::ocl::KernelArg::ReadOnlyNoSize(lut));
    116         idx = k.set(idx, src.cols);
    117         idx = k.set(idx, src.rows);
    118         idx = k.set(idx, tile_size);
    119         idx = k.set(idx, tilesX);
    120         k.set(idx, tilesY);
    121 
    122         return k.run(2, globalThreads, localThreads, false);
    123     }
    124 }
    125 
    126 #endif
    127 
    128 namespace
    129 {
    130     template <class T, int histSize, int shift>
    131     class CLAHE_CalcLut_Body : public cv::ParallelLoopBody
    132     {
    133     public:
    134         CLAHE_CalcLut_Body(const cv::Mat& src, const cv::Mat& lut, const cv::Size& tileSize, const int& tilesX, const int& clipLimit, const float& lutScale) :
    135             src_(src), lut_(lut), tileSize_(tileSize), tilesX_(tilesX), clipLimit_(clipLimit), lutScale_(lutScale)
    136         {
    137         }
    138 
    139         void operator ()(const cv::Range& range) const;
    140 
    141     private:
    142         cv::Mat src_;
    143         mutable cv::Mat lut_;
    144 
    145         cv::Size tileSize_;
    146         int tilesX_;
    147         int clipLimit_;
    148         float lutScale_;
    149     };
    150 
    151     template <class T, int histSize, int shift>
    152     void CLAHE_CalcLut_Body<T,histSize,shift>::operator ()(const cv::Range& range) const
    153     {
    154         T* tileLut = lut_.ptr<T>(range.start);
    155         const size_t lut_step = lut_.step / sizeof(T);
    156 
    157         for (int k = range.start; k < range.end; ++k, tileLut += lut_step)
    158         {
    159             const int ty = k / tilesX_;
    160             const int tx = k % tilesX_;
    161 
    162             // retrieve tile submatrix
    163 
    164             cv::Rect tileROI;
    165             tileROI.x = tx * tileSize_.width;
    166             tileROI.y = ty * tileSize_.height;
    167             tileROI.width = tileSize_.width;
    168             tileROI.height = tileSize_.height;
    169 
    170             const cv::Mat tile = src_(tileROI);
    171 
    172             // calc histogram
    173 
    174             int tileHist[histSize] = {0, };
    175 
    176             int height = tileROI.height;
    177             const size_t sstep = src_.step / sizeof(T);
    178             for (const T* ptr = tile.ptr<T>(0); height--; ptr += sstep)
    179             {
    180                 int x = 0;
    181                 for (; x <= tileROI.width - 4; x += 4)
    182                 {
    183                     int t0 = ptr[x], t1 = ptr[x+1];
    184                     tileHist[t0 >> shift]++; tileHist[t1 >> shift]++;
    185                     t0 = ptr[x+2]; t1 = ptr[x+3];
    186                     tileHist[t0 >> shift]++; tileHist[t1 >> shift]++;
    187                 }
    188 
    189                 for (; x < tileROI.width; ++x)
    190                     tileHist[ptr[x] >> shift]++;
    191             }
    192 
    193             // clip histogram
    194 
    195             if (clipLimit_ > 0)
    196             {
    197                 // how many pixels were clipped
    198                 int clipped = 0;
    199                 for (int i = 0; i < histSize; ++i)
    200                 {
    201                     if (tileHist[i] > clipLimit_)
    202                     {
    203                         clipped += tileHist[i] - clipLimit_;
    204                         tileHist[i] = clipLimit_;
    205                     }
    206                 }
    207 
    208                 // redistribute clipped pixels
    209                 int redistBatch = clipped / histSize;
    210                 int residual = clipped - redistBatch * histSize;
    211 
    212                 for (int i = 0; i < histSize; ++i)
    213                     tileHist[i] += redistBatch;
    214 
    215                 for (int i = 0; i < residual; ++i)
    216                     tileHist[i]++;
    217             }
    218 
    219             // calc Lut
    220 
    221             int sum = 0;
    222             for (int i = 0; i < histSize; ++i)
    223             {
    224                 sum += tileHist[i];
    225                 tileLut[i] = cv::saturate_cast<T>(sum * lutScale_);
    226             }
    227         }
    228     }
    229 
    230     template <class T>
    231     class CLAHE_Interpolation_Body : public cv::ParallelLoopBody
    232     {
    233     public:
    234         CLAHE_Interpolation_Body(const cv::Mat& src, const cv::Mat& dst, const cv::Mat& lut, const cv::Size& tileSize, const int& tilesX, const int& tilesY) :
    235             src_(src), dst_(dst), lut_(lut), tileSize_(tileSize), tilesX_(tilesX), tilesY_(tilesY)
    236         {
    237             buf.allocate(src.cols << 2);
    238             ind1_p = (int *)buf;
    239             ind2_p = ind1_p + src.cols;
    240             xa_p = (float *)(ind2_p + src.cols);
    241             xa1_p = xa_p + src.cols;
    242 
    243             int lut_step = static_cast<int>(lut_.step / sizeof(T));
    244             float inv_tw = 1.0f / tileSize_.width;
    245 
    246             for (int x = 0; x < src.cols; ++x)
    247             {
    248                 float txf = x * inv_tw - 0.5f;
    249 
    250                 int tx1 = cvFloor(txf);
    251                 int tx2 = tx1 + 1;
    252 
    253                 xa_p[x] = txf - tx1;
    254                 xa1_p[x] = 1.0f - xa_p[x];
    255 
    256                 tx1 = std::max(tx1, 0);
    257                 tx2 = std::min(tx2, tilesX_ - 1);
    258 
    259                 ind1_p[x] = tx1 * lut_step;
    260                 ind2_p[x] = tx2 * lut_step;
    261             }
    262         }
    263 
    264         void operator ()(const cv::Range& range) const;
    265 
    266     private:
    267         cv::Mat src_;
    268         mutable cv::Mat dst_;
    269         cv::Mat lut_;
    270 
    271         cv::Size tileSize_;
    272         int tilesX_;
    273         int tilesY_;
    274 
    275         cv::AutoBuffer<int> buf;
    276         int * ind1_p, * ind2_p;
    277         float * xa_p, * xa1_p;
    278     };
    279 
    280     template <class T>
    281     void CLAHE_Interpolation_Body<T>::operator ()(const cv::Range& range) const
    282     {
    283         float inv_th = 1.0f / tileSize_.height;
    284 
    285         for (int y = range.start; y < range.end; ++y)
    286         {
    287             const T* srcRow = src_.ptr<T>(y);
    288             T* dstRow = dst_.ptr<T>(y);
    289 
    290             float tyf = y * inv_th - 0.5f;
    291 
    292             int ty1 = cvFloor(tyf);
    293             int ty2 = ty1 + 1;
    294 
    295             float ya = tyf - ty1, ya1 = 1.0f - ya;
    296 
    297             ty1 = std::max(ty1, 0);
    298             ty2 = std::min(ty2, tilesY_ - 1);
    299 
    300             const T* lutPlane1 = lut_.ptr<T>(ty1 * tilesX_);
    301             const T* lutPlane2 = lut_.ptr<T>(ty2 * tilesX_);
    302 
    303             for (int x = 0; x < src_.cols; ++x)
    304             {
    305                 int srcVal = srcRow[x];
    306 
    307                 int ind1 = ind1_p[x] + srcVal;
    308                 int ind2 = ind2_p[x] + srcVal;
    309 
    310                 float res = (lutPlane1[ind1] * xa1_p[x] + lutPlane1[ind2] * xa_p[x]) * ya1 +
    311                             (lutPlane2[ind1] * xa1_p[x] + lutPlane2[ind2] * xa_p[x]) * ya;
    312 
    313                 dstRow[x] = cv::saturate_cast<T>(res);
    314             }
    315         }
    316     }
    317 
    318     class CLAHE_Impl : public cv::CLAHE
    319     {
    320     public:
    321         CLAHE_Impl(double clipLimit = 40.0, int tilesX = 8, int tilesY = 8);
    322 
    323         void apply(cv::InputArray src, cv::OutputArray dst);
    324 
    325         void setClipLimit(double clipLimit);
    326         double getClipLimit() const;
    327 
    328         void setTilesGridSize(cv::Size tileGridSize);
    329         cv::Size getTilesGridSize() const;
    330 
    331         void collectGarbage();
    332 
    333     private:
    334         double clipLimit_;
    335         int tilesX_;
    336         int tilesY_;
    337 
    338         cv::Mat srcExt_;
    339         cv::Mat lut_;
    340 
    341 #ifdef HAVE_OPENCL
    342         cv::UMat usrcExt_;
    343         cv::UMat ulut_;
    344 #endif
    345     };
    346 
    347     CLAHE_Impl::CLAHE_Impl(double clipLimit, int tilesX, int tilesY) :
    348         clipLimit_(clipLimit), tilesX_(tilesX), tilesY_(tilesY)
    349     {
    350     }
    351 
    352     void CLAHE_Impl::apply(cv::InputArray _src, cv::OutputArray _dst)
    353     {
    354         CV_Assert( _src.type() == CV_8UC1 || _src.type() == CV_16UC1 );
    355 
    356 #ifdef HAVE_OPENCL
    357         bool useOpenCL = cv::ocl::useOpenCL() && _src.isUMat() && _src.dims()<=2 && _src.type() == CV_8UC1;
    358 #endif
    359 
    360         int histSize = _src.type() == CV_8UC1 ? 256 : 4096;
    361 
    362         cv::Size tileSize;
    363         cv::_InputArray _srcForLut;
    364 
    365         if (_src.size().width % tilesX_ == 0 && _src.size().height % tilesY_ == 0)
    366         {
    367             tileSize = cv::Size(_src.size().width / tilesX_, _src.size().height / tilesY_);
    368             _srcForLut = _src;
    369         }
    370         else
    371         {
    372 #ifdef HAVE_OPENCL
    373             if(useOpenCL)
    374             {
    375                 cv::copyMakeBorder(_src, usrcExt_, 0, tilesY_ - (_src.size().height % tilesY_), 0, tilesX_ - (_src.size().width % tilesX_), cv::BORDER_REFLECT_101);
    376                 tileSize = cv::Size(usrcExt_.size().width / tilesX_, usrcExt_.size().height / tilesY_);
    377                 _srcForLut = usrcExt_;
    378             }
    379             else
    380 #endif
    381             {
    382                 cv::copyMakeBorder(_src, srcExt_, 0, tilesY_ - (_src.size().height % tilesY_), 0, tilesX_ - (_src.size().width % tilesX_), cv::BORDER_REFLECT_101);
    383                 tileSize = cv::Size(srcExt_.size().width / tilesX_, srcExt_.size().height / tilesY_);
    384                 _srcForLut = srcExt_;
    385             }
    386         }
    387 
    388         const int tileSizeTotal = tileSize.area();
    389         const float lutScale = static_cast<float>(histSize - 1) / tileSizeTotal;
    390 
    391         int clipLimit = 0;
    392         if (clipLimit_ > 0.0)
    393         {
    394             clipLimit = static_cast<int>(clipLimit_ * tileSizeTotal / histSize);
    395             clipLimit = std::max(clipLimit, 1);
    396         }
    397 
    398 #ifdef HAVE_OPENCL
    399         if (useOpenCL && clahe::calcLut(_srcForLut, ulut_, tilesX_, tilesY_, tileSize, clipLimit, lutScale) )
    400             if( clahe::transform(_src, _dst, ulut_, tilesX_, tilesY_, tileSize) )
    401             {
    402                 CV_IMPL_ADD(CV_IMPL_OCL);
    403                 return;
    404             }
    405 #endif
    406 
    407         cv::Mat src = _src.getMat();
    408         _dst.create( src.size(), src.type() );
    409         cv::Mat dst = _dst.getMat();
    410         cv::Mat srcForLut = _srcForLut.getMat();
    411         lut_.create(tilesX_ * tilesY_, histSize, _src.type());
    412 
    413         cv::Ptr<cv::ParallelLoopBody> calcLutBody;
    414         if (_src.type() == CV_8UC1)
    415             calcLutBody = cv::makePtr<CLAHE_CalcLut_Body<uchar, 256, 0> >(srcForLut, lut_, tileSize, tilesX_, clipLimit, lutScale);
    416         else if (_src.type() == CV_16UC1)
    417             calcLutBody = cv::makePtr<CLAHE_CalcLut_Body<ushort, 4096, 4> >(srcForLut, lut_, tileSize, tilesX_, clipLimit, lutScale);
    418         else
    419             CV_Error( CV_StsBadArg, "Unsupported type" );
    420 
    421         cv::parallel_for_(cv::Range(0, tilesX_ * tilesY_), *calcLutBody);
    422 
    423         cv::Ptr<cv::ParallelLoopBody> interpolationBody;
    424         if (_src.type() == CV_8UC1)
    425             interpolationBody = cv::makePtr<CLAHE_Interpolation_Body<uchar> >(src, dst, lut_, tileSize, tilesX_, tilesY_);
    426         else if (_src.type() == CV_16UC1)
    427             interpolationBody = cv::makePtr<CLAHE_Interpolation_Body<ushort> >(src, dst, lut_, tileSize, tilesX_, tilesY_);
    428 
    429         cv::parallel_for_(cv::Range(0, src.rows), *interpolationBody);
    430     }
    431 
    432     void CLAHE_Impl::setClipLimit(double clipLimit)
    433     {
    434         clipLimit_ = clipLimit;
    435     }
    436 
    437     double CLAHE_Impl::getClipLimit() const
    438     {
    439         return clipLimit_;
    440     }
    441 
    442     void CLAHE_Impl::setTilesGridSize(cv::Size tileGridSize)
    443     {
    444         tilesX_ = tileGridSize.width;
    445         tilesY_ = tileGridSize.height;
    446     }
    447 
    448     cv::Size CLAHE_Impl::getTilesGridSize() const
    449     {
    450         return cv::Size(tilesX_, tilesY_);
    451     }
    452 
    453     void CLAHE_Impl::collectGarbage()
    454     {
    455         srcExt_.release();
    456         lut_.release();
    457 #ifdef HAVE_OPENCL
    458         usrcExt_.release();
    459         ulut_.release();
    460 #endif
    461     }
    462 }
    463 
    464 cv::Ptr<cv::CLAHE> cv::createCLAHE(double clipLimit, cv::Size tileGridSize)
    465 {
    466     return makePtr<CLAHE_Impl>(clipLimit, tileGridSize.width, tileGridSize.height);
    467 }
    468