1 /*M/////////////////////////////////////////////////////////////////////////////////////// 2 // 3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 4 // 5 // By downloading, copying, installing or using the software you agree to this license. 6 // If you do not agree to this license, do not download, install, 7 // copy or use the software. 8 // 9 // 10 // License Agreement 11 // For Open Source Computer Vision Library 12 // 13 // Copyright (C) 2013, NVIDIA Corporation, all rights reserved. 14 // Copyright (C) 2014, Itseez Inc., all rights reserved. 15 // Third party copyrights are property of their respective owners. 16 // 17 // Redistribution and use in source and binary forms, with or without modification, 18 // are permitted provided that the following conditions are met: 19 // 20 // * Redistribution's of source code must retain the above copyright notice, 21 // this list of conditions and the following disclaimer. 22 // 23 // * Redistribution's in binary form must reproduce the above copyright notice, 24 // this list of conditions and the following disclaimer in the documentation 25 // and/or other materials provided with the distribution. 26 // 27 // * The name of the copyright holders may not be used to endorse or promote products 28 // derived from this software without specific prior written permission. 29 // 30 // This software is provided by the copyright holders and contributors "as is" and 31 // any express or implied warranties, including, but not limited to, the implied 32 // warranties of merchantability and fitness for a particular purpose are disclaimed. 33 // In no event shall the copyright holders or contributors be liable for any direct, 34 // indirect, incidental, special, exemplary, or consequential damages 35 // (including, but not limited to, procurement of substitute goods or services; 36 // loss of use, data, or profits; or business interruption) however caused 37 // and on any theory of liability, whether in contract, strict liability, 38 // or tort (including negligence or otherwise) arising in any way out of 39 // the use of this software, even if advised of the possibility of such damage. 40 // 41 //M*/ 42 43 #include "precomp.hpp" 44 #include "opencl_kernels_imgproc.hpp" 45 46 // ---------------------------------------------------------------------- 47 // CLAHE 48 49 #ifdef HAVE_OPENCL 50 51 namespace clahe 52 { 53 static bool calcLut(cv::InputArray _src, cv::OutputArray _dst, 54 const int tilesX, const int tilesY, const cv::Size tileSize, 55 const int clipLimit, const float lutScale) 56 { 57 cv::ocl::Kernel _k("calcLut", cv::ocl::imgproc::clahe_oclsrc); 58 59 bool is_cpu = cv::ocl::Device::getDefault().type() == cv::ocl::Device::TYPE_CPU; 60 cv::String opts; 61 if(is_cpu) 62 opts = "-D CPU "; 63 else 64 opts = cv::format("-D WAVE_SIZE=%d", _k.preferedWorkGroupSizeMultiple()); 65 66 cv::ocl::Kernel k("calcLut", cv::ocl::imgproc::clahe_oclsrc, opts); 67 if(k.empty()) 68 return false; 69 70 cv::UMat src = _src.getUMat(); 71 _dst.create(tilesX * tilesY, 256, CV_8UC1); 72 cv::UMat dst = _dst.getUMat(); 73 74 int tile_size[2]; 75 tile_size[0] = tileSize.width; 76 tile_size[1] = tileSize.height; 77 78 size_t localThreads[3] = { 32, 8, 1 }; 79 size_t globalThreads[3] = { tilesX * localThreads[0], tilesY * localThreads[1], 1 }; 80 81 int idx = 0; 82 idx = k.set(idx, cv::ocl::KernelArg::ReadOnlyNoSize(src)); 83 idx = k.set(idx, cv::ocl::KernelArg::WriteOnlyNoSize(dst)); 84 idx = k.set(idx, tile_size); 85 idx = k.set(idx, tilesX); 86 idx = k.set(idx, clipLimit); 87 k.set(idx, lutScale); 88 89 return k.run(2, globalThreads, localThreads, false); 90 } 91 92 static bool transform(cv::InputArray _src, cv::OutputArray _dst, cv::InputArray _lut, 93 const int tilesX, const int tilesY, const cv::Size & tileSize) 94 { 95 96 cv::ocl::Kernel k("transform", cv::ocl::imgproc::clahe_oclsrc); 97 if(k.empty()) 98 return false; 99 100 int tile_size[2]; 101 tile_size[0] = tileSize.width; 102 tile_size[1] = tileSize.height; 103 104 cv::UMat src = _src.getUMat(); 105 _dst.create(src.size(), src.type()); 106 cv::UMat dst = _dst.getUMat(); 107 cv::UMat lut = _lut.getUMat(); 108 109 size_t localThreads[3] = { 32, 8, 1 }; 110 size_t globalThreads[3] = { src.cols, src.rows, 1 }; 111 112 int idx = 0; 113 idx = k.set(idx, cv::ocl::KernelArg::ReadOnlyNoSize(src)); 114 idx = k.set(idx, cv::ocl::KernelArg::WriteOnlyNoSize(dst)); 115 idx = k.set(idx, cv::ocl::KernelArg::ReadOnlyNoSize(lut)); 116 idx = k.set(idx, src.cols); 117 idx = k.set(idx, src.rows); 118 idx = k.set(idx, tile_size); 119 idx = k.set(idx, tilesX); 120 k.set(idx, tilesY); 121 122 return k.run(2, globalThreads, localThreads, false); 123 } 124 } 125 126 #endif 127 128 namespace 129 { 130 template <class T, int histSize, int shift> 131 class CLAHE_CalcLut_Body : public cv::ParallelLoopBody 132 { 133 public: 134 CLAHE_CalcLut_Body(const cv::Mat& src, const cv::Mat& lut, const cv::Size& tileSize, const int& tilesX, const int& clipLimit, const float& lutScale) : 135 src_(src), lut_(lut), tileSize_(tileSize), tilesX_(tilesX), clipLimit_(clipLimit), lutScale_(lutScale) 136 { 137 } 138 139 void operator ()(const cv::Range& range) const; 140 141 private: 142 cv::Mat src_; 143 mutable cv::Mat lut_; 144 145 cv::Size tileSize_; 146 int tilesX_; 147 int clipLimit_; 148 float lutScale_; 149 }; 150 151 template <class T, int histSize, int shift> 152 void CLAHE_CalcLut_Body<T,histSize,shift>::operator ()(const cv::Range& range) const 153 { 154 T* tileLut = lut_.ptr<T>(range.start); 155 const size_t lut_step = lut_.step / sizeof(T); 156 157 for (int k = range.start; k < range.end; ++k, tileLut += lut_step) 158 { 159 const int ty = k / tilesX_; 160 const int tx = k % tilesX_; 161 162 // retrieve tile submatrix 163 164 cv::Rect tileROI; 165 tileROI.x = tx * tileSize_.width; 166 tileROI.y = ty * tileSize_.height; 167 tileROI.width = tileSize_.width; 168 tileROI.height = tileSize_.height; 169 170 const cv::Mat tile = src_(tileROI); 171 172 // calc histogram 173 174 int tileHist[histSize] = {0, }; 175 176 int height = tileROI.height; 177 const size_t sstep = src_.step / sizeof(T); 178 for (const T* ptr = tile.ptr<T>(0); height--; ptr += sstep) 179 { 180 int x = 0; 181 for (; x <= tileROI.width - 4; x += 4) 182 { 183 int t0 = ptr[x], t1 = ptr[x+1]; 184 tileHist[t0 >> shift]++; tileHist[t1 >> shift]++; 185 t0 = ptr[x+2]; t1 = ptr[x+3]; 186 tileHist[t0 >> shift]++; tileHist[t1 >> shift]++; 187 } 188 189 for (; x < tileROI.width; ++x) 190 tileHist[ptr[x] >> shift]++; 191 } 192 193 // clip histogram 194 195 if (clipLimit_ > 0) 196 { 197 // how many pixels were clipped 198 int clipped = 0; 199 for (int i = 0; i < histSize; ++i) 200 { 201 if (tileHist[i] > clipLimit_) 202 { 203 clipped += tileHist[i] - clipLimit_; 204 tileHist[i] = clipLimit_; 205 } 206 } 207 208 // redistribute clipped pixels 209 int redistBatch = clipped / histSize; 210 int residual = clipped - redistBatch * histSize; 211 212 for (int i = 0; i < histSize; ++i) 213 tileHist[i] += redistBatch; 214 215 for (int i = 0; i < residual; ++i) 216 tileHist[i]++; 217 } 218 219 // calc Lut 220 221 int sum = 0; 222 for (int i = 0; i < histSize; ++i) 223 { 224 sum += tileHist[i]; 225 tileLut[i] = cv::saturate_cast<T>(sum * lutScale_); 226 } 227 } 228 } 229 230 template <class T> 231 class CLAHE_Interpolation_Body : public cv::ParallelLoopBody 232 { 233 public: 234 CLAHE_Interpolation_Body(const cv::Mat& src, const cv::Mat& dst, const cv::Mat& lut, const cv::Size& tileSize, const int& tilesX, const int& tilesY) : 235 src_(src), dst_(dst), lut_(lut), tileSize_(tileSize), tilesX_(tilesX), tilesY_(tilesY) 236 { 237 buf.allocate(src.cols << 2); 238 ind1_p = (int *)buf; 239 ind2_p = ind1_p + src.cols; 240 xa_p = (float *)(ind2_p + src.cols); 241 xa1_p = xa_p + src.cols; 242 243 int lut_step = static_cast<int>(lut_.step / sizeof(T)); 244 float inv_tw = 1.0f / tileSize_.width; 245 246 for (int x = 0; x < src.cols; ++x) 247 { 248 float txf = x * inv_tw - 0.5f; 249 250 int tx1 = cvFloor(txf); 251 int tx2 = tx1 + 1; 252 253 xa_p[x] = txf - tx1; 254 xa1_p[x] = 1.0f - xa_p[x]; 255 256 tx1 = std::max(tx1, 0); 257 tx2 = std::min(tx2, tilesX_ - 1); 258 259 ind1_p[x] = tx1 * lut_step; 260 ind2_p[x] = tx2 * lut_step; 261 } 262 } 263 264 void operator ()(const cv::Range& range) const; 265 266 private: 267 cv::Mat src_; 268 mutable cv::Mat dst_; 269 cv::Mat lut_; 270 271 cv::Size tileSize_; 272 int tilesX_; 273 int tilesY_; 274 275 cv::AutoBuffer<int> buf; 276 int * ind1_p, * ind2_p; 277 float * xa_p, * xa1_p; 278 }; 279 280 template <class T> 281 void CLAHE_Interpolation_Body<T>::operator ()(const cv::Range& range) const 282 { 283 float inv_th = 1.0f / tileSize_.height; 284 285 for (int y = range.start; y < range.end; ++y) 286 { 287 const T* srcRow = src_.ptr<T>(y); 288 T* dstRow = dst_.ptr<T>(y); 289 290 float tyf = y * inv_th - 0.5f; 291 292 int ty1 = cvFloor(tyf); 293 int ty2 = ty1 + 1; 294 295 float ya = tyf - ty1, ya1 = 1.0f - ya; 296 297 ty1 = std::max(ty1, 0); 298 ty2 = std::min(ty2, tilesY_ - 1); 299 300 const T* lutPlane1 = lut_.ptr<T>(ty1 * tilesX_); 301 const T* lutPlane2 = lut_.ptr<T>(ty2 * tilesX_); 302 303 for (int x = 0; x < src_.cols; ++x) 304 { 305 int srcVal = srcRow[x]; 306 307 int ind1 = ind1_p[x] + srcVal; 308 int ind2 = ind2_p[x] + srcVal; 309 310 float res = (lutPlane1[ind1] * xa1_p[x] + lutPlane1[ind2] * xa_p[x]) * ya1 + 311 (lutPlane2[ind1] * xa1_p[x] + lutPlane2[ind2] * xa_p[x]) * ya; 312 313 dstRow[x] = cv::saturate_cast<T>(res); 314 } 315 } 316 } 317 318 class CLAHE_Impl : public cv::CLAHE 319 { 320 public: 321 CLAHE_Impl(double clipLimit = 40.0, int tilesX = 8, int tilesY = 8); 322 323 void apply(cv::InputArray src, cv::OutputArray dst); 324 325 void setClipLimit(double clipLimit); 326 double getClipLimit() const; 327 328 void setTilesGridSize(cv::Size tileGridSize); 329 cv::Size getTilesGridSize() const; 330 331 void collectGarbage(); 332 333 private: 334 double clipLimit_; 335 int tilesX_; 336 int tilesY_; 337 338 cv::Mat srcExt_; 339 cv::Mat lut_; 340 341 #ifdef HAVE_OPENCL 342 cv::UMat usrcExt_; 343 cv::UMat ulut_; 344 #endif 345 }; 346 347 CLAHE_Impl::CLAHE_Impl(double clipLimit, int tilesX, int tilesY) : 348 clipLimit_(clipLimit), tilesX_(tilesX), tilesY_(tilesY) 349 { 350 } 351 352 void CLAHE_Impl::apply(cv::InputArray _src, cv::OutputArray _dst) 353 { 354 CV_Assert( _src.type() == CV_8UC1 || _src.type() == CV_16UC1 ); 355 356 #ifdef HAVE_OPENCL 357 bool useOpenCL = cv::ocl::useOpenCL() && _src.isUMat() && _src.dims()<=2 && _src.type() == CV_8UC1; 358 #endif 359 360 int histSize = _src.type() == CV_8UC1 ? 256 : 4096; 361 362 cv::Size tileSize; 363 cv::_InputArray _srcForLut; 364 365 if (_src.size().width % tilesX_ == 0 && _src.size().height % tilesY_ == 0) 366 { 367 tileSize = cv::Size(_src.size().width / tilesX_, _src.size().height / tilesY_); 368 _srcForLut = _src; 369 } 370 else 371 { 372 #ifdef HAVE_OPENCL 373 if(useOpenCL) 374 { 375 cv::copyMakeBorder(_src, usrcExt_, 0, tilesY_ - (_src.size().height % tilesY_), 0, tilesX_ - (_src.size().width % tilesX_), cv::BORDER_REFLECT_101); 376 tileSize = cv::Size(usrcExt_.size().width / tilesX_, usrcExt_.size().height / tilesY_); 377 _srcForLut = usrcExt_; 378 } 379 else 380 #endif 381 { 382 cv::copyMakeBorder(_src, srcExt_, 0, tilesY_ - (_src.size().height % tilesY_), 0, tilesX_ - (_src.size().width % tilesX_), cv::BORDER_REFLECT_101); 383 tileSize = cv::Size(srcExt_.size().width / tilesX_, srcExt_.size().height / tilesY_); 384 _srcForLut = srcExt_; 385 } 386 } 387 388 const int tileSizeTotal = tileSize.area(); 389 const float lutScale = static_cast<float>(histSize - 1) / tileSizeTotal; 390 391 int clipLimit = 0; 392 if (clipLimit_ > 0.0) 393 { 394 clipLimit = static_cast<int>(clipLimit_ * tileSizeTotal / histSize); 395 clipLimit = std::max(clipLimit, 1); 396 } 397 398 #ifdef HAVE_OPENCL 399 if (useOpenCL && clahe::calcLut(_srcForLut, ulut_, tilesX_, tilesY_, tileSize, clipLimit, lutScale) ) 400 if( clahe::transform(_src, _dst, ulut_, tilesX_, tilesY_, tileSize) ) 401 { 402 CV_IMPL_ADD(CV_IMPL_OCL); 403 return; 404 } 405 #endif 406 407 cv::Mat src = _src.getMat(); 408 _dst.create( src.size(), src.type() ); 409 cv::Mat dst = _dst.getMat(); 410 cv::Mat srcForLut = _srcForLut.getMat(); 411 lut_.create(tilesX_ * tilesY_, histSize, _src.type()); 412 413 cv::Ptr<cv::ParallelLoopBody> calcLutBody; 414 if (_src.type() == CV_8UC1) 415 calcLutBody = cv::makePtr<CLAHE_CalcLut_Body<uchar, 256, 0> >(srcForLut, lut_, tileSize, tilesX_, clipLimit, lutScale); 416 else if (_src.type() == CV_16UC1) 417 calcLutBody = cv::makePtr<CLAHE_CalcLut_Body<ushort, 4096, 4> >(srcForLut, lut_, tileSize, tilesX_, clipLimit, lutScale); 418 else 419 CV_Error( CV_StsBadArg, "Unsupported type" ); 420 421 cv::parallel_for_(cv::Range(0, tilesX_ * tilesY_), *calcLutBody); 422 423 cv::Ptr<cv::ParallelLoopBody> interpolationBody; 424 if (_src.type() == CV_8UC1) 425 interpolationBody = cv::makePtr<CLAHE_Interpolation_Body<uchar> >(src, dst, lut_, tileSize, tilesX_, tilesY_); 426 else if (_src.type() == CV_16UC1) 427 interpolationBody = cv::makePtr<CLAHE_Interpolation_Body<ushort> >(src, dst, lut_, tileSize, tilesX_, tilesY_); 428 429 cv::parallel_for_(cv::Range(0, src.rows), *interpolationBody); 430 } 431 432 void CLAHE_Impl::setClipLimit(double clipLimit) 433 { 434 clipLimit_ = clipLimit; 435 } 436 437 double CLAHE_Impl::getClipLimit() const 438 { 439 return clipLimit_; 440 } 441 442 void CLAHE_Impl::setTilesGridSize(cv::Size tileGridSize) 443 { 444 tilesX_ = tileGridSize.width; 445 tilesY_ = tileGridSize.height; 446 } 447 448 cv::Size CLAHE_Impl::getTilesGridSize() const 449 { 450 return cv::Size(tilesX_, tilesY_); 451 } 452 453 void CLAHE_Impl::collectGarbage() 454 { 455 srcExt_.release(); 456 lut_.release(); 457 #ifdef HAVE_OPENCL 458 usrcExt_.release(); 459 ulut_.release(); 460 #endif 461 } 462 } 463 464 cv::Ptr<cv::CLAHE> cv::createCLAHE(double clipLimit, cv::Size tileGridSize) 465 { 466 return makePtr<CLAHE_Impl>(clipLimit, tileGridSize.width, tileGridSize.height); 467 } 468