1 /*M/////////////////////////////////////////////////////////////////////////////////////// 2 // 3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 4 // 5 // By downloading, copying, installing or using the software you agree to this license. 6 // If you do not agree to this license, do not download, install, 7 // copy or use the software. 8 // 9 // 10 // License Agreement 11 // For Open Source Computer Vision Library 12 // 13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. 14 // Copyright (C) 2009-2010, Willow Garage Inc., all rights reserved. 15 // Copyright (C) 2014-2015, Itseez Inc., all rights reserved. 16 // Third party copyrights are property of their respective owners. 17 // 18 // Redistribution and use in source and binary forms, with or without modification, 19 // are permitted provided that the following conditions are met: 20 // 21 // * Redistribution's of source code must retain the above copyright notice, 22 // this list of conditions and the following disclaimer. 23 // 24 // * Redistribution's in binary form must reproduce the above copyright notice, 25 // this list of conditions and the following disclaimer in the documentation 26 // and/or other materials provided with the distribution. 27 // 28 // * The name of the copyright holders may not be used to endorse or promote products 29 // derived from this software without specific prior written permission. 30 // 31 // This software is provided by the copyright holders and contributors "as is" and 32 // any express or implied warranties, including, but not limited to, the implied 33 // warranties of merchantability and fitness for a particular purpose are disclaimed. 34 // In no event shall the Intel Corporation or contributors be liable for any direct, 35 // indirect, incidental, special, exemplary, or consequential damages 36 // (including, but not limited to, procurement of substitute goods or services; 37 // loss of use, data, or profits; or business interruption) however caused 38 // and on any theory of liability, whether in contract, strict liability, 39 // or tort (including negligence or otherwise) arising in any way out of 40 // the use of this software, even if advised of the possibility of such damage. 41 // 42 //M*/ 43 44 /********************************* COPYRIGHT NOTICE *******************************\ 45 The function for RGB to Lab conversion is based on the MATLAB script 46 RGB2Lab.m translated by Mark Ruzon from C code by Yossi Rubner, 23 September 1997. 47 See the page [http://vision.stanford.edu/~ruzon/software/rgblab.html] 48 \**********************************************************************************/ 49 50 /********************************* COPYRIGHT NOTICE *******************************\ 51 Original code for Bayer->BGR/RGB conversion is provided by Dirk Schaefer 52 from MD-Mathematische Dienste GmbH. Below is the copyright notice: 53 54 IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 55 By downloading, copying, installing or using the software you agree 56 to this license. If you do not agree to this license, do not download, 57 install, copy or use the software. 58 59 Contributors License Agreement: 60 61 Copyright (c) 2002, 62 MD-Mathematische Dienste GmbH 63 Im Defdahl 5-10 64 44141 Dortmund 65 Germany 66 www.md-it.de 67 68 Redistribution and use in source and binary forms, 69 with or without modification, are permitted provided 70 that the following conditions are met: 71 72 Redistributions of source code must retain 73 the above copyright notice, this list of conditions and the following disclaimer. 74 Redistributions in binary form must reproduce the above copyright notice, 75 this list of conditions and the following disclaimer in the documentation 76 and/or other materials provided with the distribution. 77 The name of Contributor may not be used to endorse or promote products 78 derived from this software without specific prior written permission. 79 80 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 81 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 82 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 83 PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS BE LIABLE 84 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 85 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 86 OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 87 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 88 STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 89 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 90 THE POSSIBILITY OF SUCH DAMAGE. 91 \**********************************************************************************/ 92 93 #include "precomp.hpp" 94 #include "opencl_kernels_imgproc.hpp" 95 #include <limits> 96 97 #define CV_DESCALE(x,n) (((x) + (1 << ((n)-1))) >> (n)) 98 99 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) 100 #define MAX_IPP8u 255 101 #define MAX_IPP16u 65535 102 #define MAX_IPP32f 1.0 103 static IppStatus sts = ippInit(); 104 #endif 105 106 namespace cv 107 { 108 109 // computes cubic spline coefficients for a function: (xi=i, yi=f[i]), i=0..n 110 template<typename _Tp> static void splineBuild(const _Tp* f, int n, _Tp* tab) 111 { 112 _Tp cn = 0; 113 int i; 114 tab[0] = tab[1] = (_Tp)0; 115 116 for(i = 1; i < n-1; i++) 117 { 118 _Tp t = 3*(f[i+1] - 2*f[i] + f[i-1]); 119 _Tp l = 1/(4 - tab[(i-1)*4]); 120 tab[i*4] = l; tab[i*4+1] = (t - tab[(i-1)*4+1])*l; 121 } 122 123 for(i = n-1; i >= 0; i--) 124 { 125 _Tp c = tab[i*4+1] - tab[i*4]*cn; 126 _Tp b = f[i+1] - f[i] - (cn + c*2)*(_Tp)0.3333333333333333; 127 _Tp d = (cn - c)*(_Tp)0.3333333333333333; 128 tab[i*4] = f[i]; tab[i*4+1] = b; 129 tab[i*4+2] = c; tab[i*4+3] = d; 130 cn = c; 131 } 132 } 133 134 // interpolates value of a function at x, 0 <= x <= n using a cubic spline. 135 template<typename _Tp> static inline _Tp splineInterpolate(_Tp x, const _Tp* tab, int n) 136 { 137 // don't touch this function without urgent need - some versions of gcc fail to inline it correctly 138 int ix = std::min(std::max(int(x), 0), n-1); 139 x -= ix; 140 tab += ix*4; 141 return ((tab[3]*x + tab[2])*x + tab[1])*x + tab[0]; 142 } 143 144 145 template<typename _Tp> struct ColorChannel 146 { 147 typedef float worktype_f; 148 static _Tp max() { return std::numeric_limits<_Tp>::max(); } 149 static _Tp half() { return (_Tp)(max()/2 + 1); } 150 }; 151 152 template<> struct ColorChannel<float> 153 { 154 typedef float worktype_f; 155 static float max() { return 1.f; } 156 static float half() { return 0.5f; } 157 }; 158 159 /*template<> struct ColorChannel<double> 160 { 161 typedef double worktype_f; 162 static double max() { return 1.; } 163 static double half() { return 0.5; } 164 };*/ 165 166 167 ///////////////////////////// Top-level template function //////////////////////////////// 168 169 template <typename Cvt> 170 class CvtColorLoop_Invoker : public ParallelLoopBody 171 { 172 typedef typename Cvt::channel_type _Tp; 173 public: 174 175 CvtColorLoop_Invoker(const Mat& _src, Mat& _dst, const Cvt& _cvt) : 176 ParallelLoopBody(), src(_src), dst(_dst), cvt(_cvt) 177 { 178 } 179 180 virtual void operator()(const Range& range) const 181 { 182 const uchar* yS = src.ptr<uchar>(range.start); 183 uchar* yD = dst.ptr<uchar>(range.start); 184 185 for( int i = range.start; i < range.end; ++i, yS += src.step, yD += dst.step ) 186 cvt((const _Tp*)yS, (_Tp*)yD, src.cols); 187 } 188 189 private: 190 const Mat& src; 191 Mat& dst; 192 const Cvt& cvt; 193 194 const CvtColorLoop_Invoker& operator= (const CvtColorLoop_Invoker&); 195 }; 196 197 template <typename Cvt> 198 void CvtColorLoop(const Mat& src, Mat& dst, const Cvt& cvt) 199 { 200 parallel_for_(Range(0, src.rows), CvtColorLoop_Invoker<Cvt>(src, dst, cvt), src.total()/(double)(1<<16) ); 201 } 202 203 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) 204 205 typedef IppStatus (CV_STDCALL* ippiReorderFunc)(const void *, int, void *, int, IppiSize, const int *); 206 typedef IppStatus (CV_STDCALL* ippiGeneralFunc)(const void *, int, void *, int, IppiSize); 207 typedef IppStatus (CV_STDCALL* ippiColor2GrayFunc)(const void *, int, void *, int, IppiSize, const Ipp32f *); 208 209 template <typename Cvt> 210 class CvtColorIPPLoop_Invoker : 211 public ParallelLoopBody 212 { 213 public: 214 215 CvtColorIPPLoop_Invoker(const Mat& _src, Mat& _dst, const Cvt& _cvt, bool *_ok) : 216 ParallelLoopBody(), src(_src), dst(_dst), cvt(_cvt), ok(_ok) 217 { 218 *ok = true; 219 } 220 221 virtual void operator()(const Range& range) const 222 { 223 const void *yS = src.ptr<uchar>(range.start); 224 void *yD = dst.ptr<uchar>(range.start); 225 if( !cvt(yS, (int)src.step[0], yD, (int)dst.step[0], src.cols, range.end - range.start) ) 226 *ok = false; 227 else 228 { 229 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 230 } 231 } 232 233 private: 234 const Mat& src; 235 Mat& dst; 236 const Cvt& cvt; 237 bool *ok; 238 239 const CvtColorIPPLoop_Invoker& operator= (const CvtColorIPPLoop_Invoker&); 240 }; 241 242 template <typename Cvt> 243 bool CvtColorIPPLoop(const Mat& src, Mat& dst, const Cvt& cvt) 244 { 245 bool ok; 246 parallel_for_(Range(0, src.rows), CvtColorIPPLoop_Invoker<Cvt>(src, dst, cvt, &ok), src.total()/(double)(1<<16) ); 247 return ok; 248 } 249 250 template <typename Cvt> 251 bool CvtColorIPPLoopCopy(Mat& src, Mat& dst, const Cvt& cvt) 252 { 253 Mat temp; 254 Mat &source = src; 255 if( src.data == dst.data ) 256 { 257 src.copyTo(temp); 258 source = temp; 259 } 260 bool ok; 261 parallel_for_(Range(0, source.rows), CvtColorIPPLoop_Invoker<Cvt>(source, dst, cvt, &ok), 262 source.total()/(double)(1<<16) ); 263 return ok; 264 } 265 266 static IppStatus CV_STDCALL ippiSwapChannels_8u_C3C4Rf(const Ipp8u* pSrc, int srcStep, Ipp8u* pDst, int dstStep, 267 IppiSize roiSize, const int *dstOrder) 268 { 269 return ippiSwapChannels_8u_C3C4R(pSrc, srcStep, pDst, dstStep, roiSize, dstOrder, MAX_IPP8u); 270 } 271 272 static IppStatus CV_STDCALL ippiSwapChannels_16u_C3C4Rf(const Ipp16u* pSrc, int srcStep, Ipp16u* pDst, int dstStep, 273 IppiSize roiSize, const int *dstOrder) 274 { 275 return ippiSwapChannels_16u_C3C4R(pSrc, srcStep, pDst, dstStep, roiSize, dstOrder, MAX_IPP16u); 276 } 277 278 static IppStatus CV_STDCALL ippiSwapChannels_32f_C3C4Rf(const Ipp32f* pSrc, int srcStep, Ipp32f* pDst, int dstStep, 279 IppiSize roiSize, const int *dstOrder) 280 { 281 return ippiSwapChannels_32f_C3C4R(pSrc, srcStep, pDst, dstStep, roiSize, dstOrder, MAX_IPP32f); 282 } 283 284 static ippiReorderFunc ippiSwapChannelsC3C4RTab[] = 285 { 286 (ippiReorderFunc)ippiSwapChannels_8u_C3C4Rf, 0, (ippiReorderFunc)ippiSwapChannels_16u_C3C4Rf, 0, 287 0, (ippiReorderFunc)ippiSwapChannels_32f_C3C4Rf, 0, 0 288 }; 289 290 static ippiGeneralFunc ippiCopyAC4C3RTab[] = 291 { 292 (ippiGeneralFunc)ippiCopy_8u_AC4C3R, 0, (ippiGeneralFunc)ippiCopy_16u_AC4C3R, 0, 293 0, (ippiGeneralFunc)ippiCopy_32f_AC4C3R, 0, 0 294 }; 295 296 static ippiReorderFunc ippiSwapChannelsC4C3RTab[] = 297 { 298 (ippiReorderFunc)ippiSwapChannels_8u_C4C3R, 0, (ippiReorderFunc)ippiSwapChannels_16u_C4C3R, 0, 299 0, (ippiReorderFunc)ippiSwapChannels_32f_C4C3R, 0, 0 300 }; 301 302 static ippiReorderFunc ippiSwapChannelsC3RTab[] = 303 { 304 (ippiReorderFunc)ippiSwapChannels_8u_C3R, 0, (ippiReorderFunc)ippiSwapChannels_16u_C3R, 0, 305 0, (ippiReorderFunc)ippiSwapChannels_32f_C3R, 0, 0 306 }; 307 308 #if IPP_VERSION_X100 >= 801 309 static ippiReorderFunc ippiSwapChannelsC4RTab[] = 310 { 311 (ippiReorderFunc)ippiSwapChannels_8u_C4R, 0, (ippiReorderFunc)ippiSwapChannels_16u_C4R, 0, 312 0, (ippiReorderFunc)ippiSwapChannels_32f_C4R, 0, 0 313 }; 314 #endif 315 316 static ippiColor2GrayFunc ippiColor2GrayC3Tab[] = 317 { 318 (ippiColor2GrayFunc)ippiColorToGray_8u_C3C1R, 0, (ippiColor2GrayFunc)ippiColorToGray_16u_C3C1R, 0, 319 0, (ippiColor2GrayFunc)ippiColorToGray_32f_C3C1R, 0, 0 320 }; 321 322 static ippiColor2GrayFunc ippiColor2GrayC4Tab[] = 323 { 324 (ippiColor2GrayFunc)ippiColorToGray_8u_AC4C1R, 0, (ippiColor2GrayFunc)ippiColorToGray_16u_AC4C1R, 0, 325 0, (ippiColor2GrayFunc)ippiColorToGray_32f_AC4C1R, 0, 0 326 }; 327 328 static ippiGeneralFunc ippiRGB2GrayC3Tab[] = 329 { 330 (ippiGeneralFunc)ippiRGBToGray_8u_C3C1R, 0, (ippiGeneralFunc)ippiRGBToGray_16u_C3C1R, 0, 331 0, (ippiGeneralFunc)ippiRGBToGray_32f_C3C1R, 0, 0 332 }; 333 334 static ippiGeneralFunc ippiRGB2GrayC4Tab[] = 335 { 336 (ippiGeneralFunc)ippiRGBToGray_8u_AC4C1R, 0, (ippiGeneralFunc)ippiRGBToGray_16u_AC4C1R, 0, 337 0, (ippiGeneralFunc)ippiRGBToGray_32f_AC4C1R, 0, 0 338 }; 339 340 static ippiGeneralFunc ippiCopyP3C3RTab[] = 341 { 342 (ippiGeneralFunc)ippiCopy_8u_P3C3R, 0, (ippiGeneralFunc)ippiCopy_16u_P3C3R, 0, 343 0, (ippiGeneralFunc)ippiCopy_32f_P3C3R, 0, 0 344 }; 345 346 static ippiGeneralFunc ippiRGB2XYZTab[] = 347 { 348 (ippiGeneralFunc)ippiRGBToXYZ_8u_C3R, 0, (ippiGeneralFunc)ippiRGBToXYZ_16u_C3R, 0, 349 0, (ippiGeneralFunc)ippiRGBToXYZ_32f_C3R, 0, 0 350 }; 351 352 static ippiGeneralFunc ippiXYZ2RGBTab[] = 353 { 354 (ippiGeneralFunc)ippiXYZToRGB_8u_C3R, 0, (ippiGeneralFunc)ippiXYZToRGB_16u_C3R, 0, 355 0, (ippiGeneralFunc)ippiXYZToRGB_32f_C3R, 0, 0 356 }; 357 358 static ippiGeneralFunc ippiRGB2HSVTab[] = 359 { 360 (ippiGeneralFunc)ippiRGBToHSV_8u_C3R, 0, (ippiGeneralFunc)ippiRGBToHSV_16u_C3R, 0, 361 0, 0, 0, 0 362 }; 363 364 static ippiGeneralFunc ippiHSV2RGBTab[] = 365 { 366 (ippiGeneralFunc)ippiHSVToRGB_8u_C3R, 0, (ippiGeneralFunc)ippiHSVToRGB_16u_C3R, 0, 367 0, 0, 0, 0 368 }; 369 370 static ippiGeneralFunc ippiRGB2HLSTab[] = 371 { 372 (ippiGeneralFunc)ippiRGBToHLS_8u_C3R, 0, (ippiGeneralFunc)ippiRGBToHLS_16u_C3R, 0, 373 0, (ippiGeneralFunc)ippiRGBToHLS_32f_C3R, 0, 0 374 }; 375 376 static ippiGeneralFunc ippiHLS2RGBTab[] = 377 { 378 (ippiGeneralFunc)ippiHLSToRGB_8u_C3R, 0, (ippiGeneralFunc)ippiHLSToRGB_16u_C3R, 0, 379 0, (ippiGeneralFunc)ippiHLSToRGB_32f_C3R, 0, 0 380 }; 381 382 #if !defined(HAVE_IPP_ICV_ONLY) && 0 383 static ippiGeneralFunc ippiRGBToLUVTab[] = 384 { 385 (ippiGeneralFunc)ippiRGBToLUV_8u_C3R, 0, (ippiGeneralFunc)ippiRGBToLUV_16u_C3R, 0, 386 0, (ippiGeneralFunc)ippiRGBToLUV_32f_C3R, 0, 0 387 }; 388 389 static ippiGeneralFunc ippiLUVToRGBTab[] = 390 { 391 (ippiGeneralFunc)ippiLUVToRGB_8u_C3R, 0, (ippiGeneralFunc)ippiLUVToRGB_16u_C3R, 0, 392 0, (ippiGeneralFunc)ippiLUVToRGB_32f_C3R, 0, 0 393 }; 394 #endif 395 396 struct IPPGeneralFunctor 397 { 398 IPPGeneralFunctor(ippiGeneralFunc _func) : func(_func){} 399 bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const 400 { 401 return func ? func(src, srcStep, dst, dstStep, ippiSize(cols, rows)) >= 0 : false; 402 } 403 private: 404 ippiGeneralFunc func; 405 }; 406 407 struct IPPReorderFunctor 408 { 409 IPPReorderFunctor(ippiReorderFunc _func, int _order0, int _order1, int _order2) : func(_func) 410 { 411 order[0] = _order0; 412 order[1] = _order1; 413 order[2] = _order2; 414 order[3] = 3; 415 } 416 bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const 417 { 418 return func ? func(src, srcStep, dst, dstStep, ippiSize(cols, rows), order) >= 0 : false; 419 } 420 private: 421 ippiReorderFunc func; 422 int order[4]; 423 }; 424 425 struct IPPColor2GrayFunctor 426 { 427 IPPColor2GrayFunctor(ippiColor2GrayFunc _func) : 428 func(_func) 429 { 430 coeffs[0] = 0.114f; 431 coeffs[1] = 0.587f; 432 coeffs[2] = 0.299f; 433 } 434 bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const 435 { 436 return func ? func(src, srcStep, dst, dstStep, ippiSize(cols, rows), coeffs) >= 0 : false; 437 } 438 private: 439 ippiColor2GrayFunc func; 440 Ipp32f coeffs[3]; 441 }; 442 443 struct IPPGray2BGRFunctor 444 { 445 IPPGray2BGRFunctor(ippiGeneralFunc _func) : 446 func(_func) 447 { 448 } 449 450 bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const 451 { 452 if (func == 0) 453 return false; 454 455 const void* srcarray[3] = { src, src, src }; 456 return func(srcarray, srcStep, dst, dstStep, ippiSize(cols, rows)) >= 0; 457 } 458 private: 459 ippiGeneralFunc func; 460 }; 461 462 struct IPPGray2BGRAFunctor 463 { 464 IPPGray2BGRAFunctor(ippiGeneralFunc _func1, ippiReorderFunc _func2, int _depth) : 465 func1(_func1), func2(_func2), depth(_depth) 466 { 467 } 468 469 bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const 470 { 471 if (func1 == 0 || func2 == 0) 472 return false; 473 474 const void* srcarray[3] = { src, src, src }; 475 Mat temp(rows, cols, CV_MAKETYPE(depth, 3)); 476 if(func1(srcarray, srcStep, temp.ptr(), (int)temp.step[0], ippiSize(cols, rows)) < 0) 477 return false; 478 int order[4] = {0, 1, 2, 3}; 479 return func2(temp.ptr(), (int)temp.step[0], dst, dstStep, ippiSize(cols, rows), order) >= 0; 480 } 481 private: 482 ippiGeneralFunc func1; 483 ippiReorderFunc func2; 484 int depth; 485 }; 486 487 struct IPPReorderGeneralFunctor 488 { 489 IPPReorderGeneralFunctor(ippiReorderFunc _func1, ippiGeneralFunc _func2, int _order0, int _order1, int _order2, int _depth) : 490 func1(_func1), func2(_func2), depth(_depth) 491 { 492 order[0] = _order0; 493 order[1] = _order1; 494 order[2] = _order2; 495 order[3] = 3; 496 } 497 bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const 498 { 499 if (func1 == 0 || func2 == 0) 500 return false; 501 502 Mat temp; 503 temp.create(rows, cols, CV_MAKETYPE(depth, 3)); 504 if(func1(src, srcStep, temp.ptr(), (int)temp.step[0], ippiSize(cols, rows), order) < 0) 505 return false; 506 return func2(temp.ptr(), (int)temp.step[0], dst, dstStep, ippiSize(cols, rows)) >= 0; 507 } 508 private: 509 ippiReorderFunc func1; 510 ippiGeneralFunc func2; 511 int order[4]; 512 int depth; 513 }; 514 515 struct IPPGeneralReorderFunctor 516 { 517 IPPGeneralReorderFunctor(ippiGeneralFunc _func1, ippiReorderFunc _func2, int _order0, int _order1, int _order2, int _depth) : 518 func1(_func1), func2(_func2), depth(_depth) 519 { 520 order[0] = _order0; 521 order[1] = _order1; 522 order[2] = _order2; 523 order[3] = 3; 524 } 525 bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const 526 { 527 if (func1 == 0 || func2 == 0) 528 return false; 529 530 Mat temp; 531 temp.create(rows, cols, CV_MAKETYPE(depth, 3)); 532 if(func1(src, srcStep, temp.ptr(), (int)temp.step[0], ippiSize(cols, rows)) < 0) 533 return false; 534 return func2(temp.ptr(), (int)temp.step[0], dst, dstStep, ippiSize(cols, rows), order) >= 0; 535 } 536 private: 537 ippiGeneralFunc func1; 538 ippiReorderFunc func2; 539 int order[4]; 540 int depth; 541 }; 542 543 #endif 544 545 ////////////////// Various 3/4-channel to 3/4-channel RGB transformations ///////////////// 546 547 template<typename _Tp> struct RGB2RGB 548 { 549 typedef _Tp channel_type; 550 551 RGB2RGB(int _srccn, int _dstcn, int _blueIdx) : srccn(_srccn), dstcn(_dstcn), blueIdx(_blueIdx) {} 552 void operator()(const _Tp* src, _Tp* dst, int n) const 553 { 554 int scn = srccn, dcn = dstcn, bidx = blueIdx; 555 if( dcn == 3 ) 556 { 557 n *= 3; 558 for( int i = 0; i < n; i += 3, src += scn ) 559 { 560 _Tp t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2]; 561 dst[i] = t0; dst[i+1] = t1; dst[i+2] = t2; 562 } 563 } 564 else if( scn == 3 ) 565 { 566 n *= 3; 567 _Tp alpha = ColorChannel<_Tp>::max(); 568 for( int i = 0; i < n; i += 3, dst += 4 ) 569 { 570 _Tp t0 = src[i], t1 = src[i+1], t2 = src[i+2]; 571 dst[bidx] = t0; dst[1] = t1; dst[bidx^2] = t2; dst[3] = alpha; 572 } 573 } 574 else 575 { 576 n *= 4; 577 for( int i = 0; i < n; i += 4 ) 578 { 579 _Tp t0 = src[i], t1 = src[i+1], t2 = src[i+2], t3 = src[i+3]; 580 dst[i] = t2; dst[i+1] = t1; dst[i+2] = t0; dst[i+3] = t3; 581 } 582 } 583 } 584 585 int srccn, dstcn, blueIdx; 586 }; 587 588 #if CV_NEON 589 590 template<> struct RGB2RGB<uchar> 591 { 592 typedef uchar channel_type; 593 594 RGB2RGB(int _srccn, int _dstcn, int _blueIdx) : 595 srccn(_srccn), dstcn(_dstcn), blueIdx(_blueIdx) 596 { 597 v_alpha = vdupq_n_u8(ColorChannel<uchar>::max()); 598 v_alpha2 = vget_low_u8(v_alpha); 599 } 600 601 void operator()(const uchar * src, uchar * dst, int n) const 602 { 603 int scn = srccn, dcn = dstcn, bidx = blueIdx, i = 0; 604 if (dcn == 3) 605 { 606 n *= 3; 607 if (scn == 3) 608 { 609 for ( ; i <= n - 48; i += 48, src += 48 ) 610 { 611 uint8x16x3_t v_src = vld3q_u8(src), v_dst; 612 v_dst.val[0] = v_src.val[bidx]; 613 v_dst.val[1] = v_src.val[1]; 614 v_dst.val[2] = v_src.val[bidx ^ 2]; 615 vst3q_u8(dst + i, v_dst); 616 } 617 for ( ; i <= n - 24; i += 24, src += 24 ) 618 { 619 uint8x8x3_t v_src = vld3_u8(src), v_dst; 620 v_dst.val[0] = v_src.val[bidx]; 621 v_dst.val[1] = v_src.val[1]; 622 v_dst.val[2] = v_src.val[bidx ^ 2]; 623 vst3_u8(dst + i, v_dst); 624 } 625 for ( ; i < n; i += 3, src += 3 ) 626 { 627 uchar t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2]; 628 dst[i] = t0; dst[i+1] = t1; dst[i+2] = t2; 629 } 630 } 631 else 632 { 633 for ( ; i <= n - 48; i += 48, src += 64 ) 634 { 635 uint8x16x4_t v_src = vld4q_u8(src); 636 uint8x16x3_t v_dst; 637 v_dst.val[0] = v_src.val[bidx]; 638 v_dst.val[1] = v_src.val[1]; 639 v_dst.val[2] = v_src.val[bidx ^ 2]; 640 vst3q_u8(dst + i, v_dst); 641 } 642 for ( ; i <= n - 24; i += 24, src += 32 ) 643 { 644 uint8x8x4_t v_src = vld4_u8(src); 645 uint8x8x3_t v_dst; 646 v_dst.val[0] = v_src.val[bidx]; 647 v_dst.val[1] = v_src.val[1]; 648 v_dst.val[2] = v_src.val[bidx ^ 2]; 649 vst3_u8(dst + i, v_dst); 650 } 651 for ( ; i < n; i += 3, src += 4 ) 652 { 653 uchar t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2]; 654 dst[i] = t0; dst[i+1] = t1; dst[i+2] = t2; 655 } 656 } 657 } 658 else if (scn == 3) 659 { 660 n *= 3; 661 for ( ; i <= n - 48; i += 48, dst += 64 ) 662 { 663 uint8x16x3_t v_src = vld3q_u8(src + i); 664 uint8x16x4_t v_dst; 665 v_dst.val[bidx] = v_src.val[0]; 666 v_dst.val[1] = v_src.val[1]; 667 v_dst.val[bidx ^ 2] = v_src.val[2]; 668 v_dst.val[3] = v_alpha; 669 vst4q_u8(dst, v_dst); 670 } 671 for ( ; i <= n - 24; i += 24, dst += 32 ) 672 { 673 uint8x8x3_t v_src = vld3_u8(src + i); 674 uint8x8x4_t v_dst; 675 v_dst.val[bidx] = v_src.val[0]; 676 v_dst.val[1] = v_src.val[1]; 677 v_dst.val[bidx ^ 2] = v_src.val[2]; 678 v_dst.val[3] = v_alpha2; 679 vst4_u8(dst, v_dst); 680 } 681 uchar alpha = ColorChannel<uchar>::max(); 682 for (; i < n; i += 3, dst += 4 ) 683 { 684 uchar t0 = src[i], t1 = src[i+1], t2 = src[i+2]; 685 dst[bidx] = t0; dst[1] = t1; dst[bidx^2] = t2; dst[3] = alpha; 686 } 687 } 688 else 689 { 690 n *= 4; 691 for ( ; i <= n - 64; i += 64 ) 692 { 693 uint8x16x4_t v_src = vld4q_u8(src + i), v_dst; 694 v_dst.val[0] = v_src.val[2]; 695 v_dst.val[1] = v_src.val[1]; 696 v_dst.val[2] = v_src.val[0]; 697 v_dst.val[3] = v_src.val[3]; 698 vst4q_u8(dst + i, v_dst); 699 } 700 for ( ; i <= n - 32; i += 32 ) 701 { 702 uint8x8x4_t v_src = vld4_u8(src + i), v_dst; 703 v_dst.val[0] = v_src.val[2]; 704 v_dst.val[1] = v_src.val[1]; 705 v_dst.val[2] = v_src.val[0]; 706 v_dst.val[3] = v_src.val[3]; 707 vst4_u8(dst + i, v_dst); 708 } 709 for ( ; i < n; i += 4) 710 { 711 uchar t0 = src[i], t1 = src[i+1], t2 = src[i+2], t3 = src[i+3]; 712 dst[i] = t2; dst[i+1] = t1; dst[i+2] = t0; dst[i+3] = t3; 713 } 714 } 715 } 716 717 int srccn, dstcn, blueIdx; 718 719 uint8x16_t v_alpha; 720 uint8x8_t v_alpha2; 721 }; 722 723 #endif 724 725 /////////// Transforming 16-bit (565 or 555) RGB to/from 24/32-bit (888[8]) RGB ////////// 726 727 struct RGB5x52RGB 728 { 729 typedef uchar channel_type; 730 731 RGB5x52RGB(int _dstcn, int _blueIdx, int _greenBits) 732 : dstcn(_dstcn), blueIdx(_blueIdx), greenBits(_greenBits) 733 { 734 #if CV_NEON 735 v_n3 = vdupq_n_u16(~3); 736 v_n7 = vdupq_n_u16(~7); 737 v_255 = vdupq_n_u8(255); 738 v_0 = vdupq_n_u8(0); 739 v_mask = vdupq_n_u16(0x8000); 740 #endif 741 } 742 743 void operator()(const uchar* src, uchar* dst, int n) const 744 { 745 int dcn = dstcn, bidx = blueIdx, i = 0; 746 if( greenBits == 6 ) 747 { 748 #if CV_NEON 749 for ( ; i <= n - 16; i += 16, dst += dcn * 16) 750 { 751 uint16x8_t v_src0 = vld1q_u16((const ushort *)src + i), v_src1 = vld1q_u16((const ushort *)src + i + 8); 752 uint8x16_t v_b = vcombine_u8(vmovn_u16(vshlq_n_u16(v_src0, 3)), vmovn_u16(vshlq_n_u16(v_src1, 3))); 753 uint8x16_t v_g = vcombine_u8(vmovn_u16(vandq_u16(vshrq_n_u16(v_src0, 3), v_n3)), 754 vmovn_u16(vandq_u16(vshrq_n_u16(v_src1, 3), v_n3))); 755 uint8x16_t v_r = vcombine_u8(vmovn_u16(vandq_u16(vshrq_n_u16(v_src0, 8), v_n7)), 756 vmovn_u16(vandq_u16(vshrq_n_u16(v_src1, 8), v_n7))); 757 if (dcn == 3) 758 { 759 uint8x16x3_t v_dst; 760 v_dst.val[bidx] = v_b; 761 v_dst.val[1] = v_g; 762 v_dst.val[bidx^2] = v_r; 763 vst3q_u8(dst, v_dst); 764 } 765 else 766 { 767 uint8x16x4_t v_dst; 768 v_dst.val[bidx] = v_b; 769 v_dst.val[1] = v_g; 770 v_dst.val[bidx^2] = v_r; 771 v_dst.val[3] = v_255; 772 vst4q_u8(dst, v_dst); 773 } 774 } 775 #endif 776 for( ; i < n; i++, dst += dcn ) 777 { 778 unsigned t = ((const ushort*)src)[i]; 779 dst[bidx] = (uchar)(t << 3); 780 dst[1] = (uchar)((t >> 3) & ~3); 781 dst[bidx ^ 2] = (uchar)((t >> 8) & ~7); 782 if( dcn == 4 ) 783 dst[3] = 255; 784 } 785 } 786 else 787 { 788 #if CV_NEON 789 for ( ; i <= n - 16; i += 16, dst += dcn * 16) 790 { 791 uint16x8_t v_src0 = vld1q_u16((const ushort *)src + i), v_src1 = vld1q_u16((const ushort *)src + i + 8); 792 uint8x16_t v_b = vcombine_u8(vmovn_u16(vshlq_n_u16(v_src0, 3)), vmovn_u16(vshlq_n_u16(v_src1, 3))); 793 uint8x16_t v_g = vcombine_u8(vmovn_u16(vandq_u16(vshrq_n_u16(v_src0, 2), v_n7)), 794 vmovn_u16(vandq_u16(vshrq_n_u16(v_src1, 2), v_n7))); 795 uint8x16_t v_r = vcombine_u8(vmovn_u16(vandq_u16(vshrq_n_u16(v_src0, 7), v_n7)), 796 vmovn_u16(vandq_u16(vshrq_n_u16(v_src1, 7), v_n7))); 797 if (dcn == 3) 798 { 799 uint8x16x3_t v_dst; 800 v_dst.val[bidx] = v_b; 801 v_dst.val[1] = v_g; 802 v_dst.val[bidx^2] = v_r; 803 vst3q_u8(dst, v_dst); 804 } 805 else 806 { 807 uint8x16x4_t v_dst; 808 v_dst.val[bidx] = v_b; 809 v_dst.val[1] = v_g; 810 v_dst.val[bidx^2] = v_r; 811 v_dst.val[3] = vbslq_u8(vcombine_u8(vqmovn_u16(vandq_u16(v_src0, v_mask)), 812 vqmovn_u16(vandq_u16(v_src1, v_mask))), v_255, v_0); 813 vst4q_u8(dst, v_dst); 814 } 815 } 816 #endif 817 for( ; i < n; i++, dst += dcn ) 818 { 819 unsigned t = ((const ushort*)src)[i]; 820 dst[bidx] = (uchar)(t << 3); 821 dst[1] = (uchar)((t >> 2) & ~7); 822 dst[bidx ^ 2] = (uchar)((t >> 7) & ~7); 823 if( dcn == 4 ) 824 dst[3] = t & 0x8000 ? 255 : 0; 825 } 826 } 827 } 828 829 int dstcn, blueIdx, greenBits; 830 #if CV_NEON 831 uint16x8_t v_n3, v_n7, v_mask; 832 uint8x16_t v_255, v_0; 833 #endif 834 }; 835 836 837 struct RGB2RGB5x5 838 { 839 typedef uchar channel_type; 840 841 RGB2RGB5x5(int _srccn, int _blueIdx, int _greenBits) 842 : srccn(_srccn), blueIdx(_blueIdx), greenBits(_greenBits) 843 { 844 #if CV_NEON 845 v_n3 = vdup_n_u8(~3); 846 v_n7 = vdup_n_u8(~7); 847 v_mask = vdupq_n_u16(0x8000); 848 v_0 = vdupq_n_u16(0); 849 v_full = vdupq_n_u16(0xffff); 850 #endif 851 } 852 853 void operator()(const uchar* src, uchar* dst, int n) const 854 { 855 int scn = srccn, bidx = blueIdx, i = 0; 856 if (greenBits == 6) 857 { 858 if (scn == 3) 859 { 860 #if CV_NEON 861 for ( ; i <= n - 8; i += 8, src += 24 ) 862 { 863 uint8x8x3_t v_src = vld3_u8(src); 864 uint16x8_t v_dst = vmovl_u8(vshr_n_u8(v_src.val[bidx], 3)); 865 v_dst = vorrq_u16(v_dst, vshlq_n_u16(vmovl_u8(vand_u8(v_src.val[1], v_n3)), 3)); 866 v_dst = vorrq_u16(v_dst, vshlq_n_u16(vmovl_u8(vand_u8(v_src.val[bidx^2], v_n7)), 8)); 867 vst1q_u16((ushort *)dst + i, v_dst); 868 } 869 #endif 870 for ( ; i < n; i++, src += 3 ) 871 ((ushort*)dst)[i] = (ushort)((src[bidx] >> 3)|((src[1]&~3) << 3)|((src[bidx^2]&~7) << 8)); 872 } 873 else 874 { 875 #if CV_NEON 876 for ( ; i <= n - 8; i += 8, src += 32 ) 877 { 878 uint8x8x4_t v_src = vld4_u8(src); 879 uint16x8_t v_dst = vmovl_u8(vshr_n_u8(v_src.val[bidx], 3)); 880 v_dst = vorrq_u16(v_dst, vshlq_n_u16(vmovl_u8(vand_u8(v_src.val[1], v_n3)), 3)); 881 v_dst = vorrq_u16(v_dst, vshlq_n_u16(vmovl_u8(vand_u8(v_src.val[bidx^2], v_n7)), 8)); 882 vst1q_u16((ushort *)dst + i, v_dst); 883 } 884 #endif 885 for ( ; i < n; i++, src += 4 ) 886 ((ushort*)dst)[i] = (ushort)((src[bidx] >> 3)|((src[1]&~3) << 3)|((src[bidx^2]&~7) << 8)); 887 } 888 } 889 else if (scn == 3) 890 { 891 #if CV_NEON 892 for ( ; i <= n - 8; i += 8, src += 24 ) 893 { 894 uint8x8x3_t v_src = vld3_u8(src); 895 uint16x8_t v_dst = vmovl_u8(vshr_n_u8(v_src.val[bidx], 3)); 896 v_dst = vorrq_u16(v_dst, vshlq_n_u16(vmovl_u8(vand_u8(v_src.val[1], v_n7)), 2)); 897 v_dst = vorrq_u16(v_dst, vshlq_n_u16(vmovl_u8(vand_u8(v_src.val[bidx^2], v_n7)), 7)); 898 vst1q_u16((ushort *)dst + i, v_dst); 899 } 900 #endif 901 for ( ; i < n; i++, src += 3 ) 902 ((ushort*)dst)[i] = (ushort)((src[bidx] >> 3)|((src[1]&~7) << 2)|((src[bidx^2]&~7) << 7)); 903 } 904 else 905 { 906 #if CV_NEON 907 for ( ; i <= n - 8; i += 8, src += 32 ) 908 { 909 uint8x8x4_t v_src = vld4_u8(src); 910 uint16x8_t v_dst = vmovl_u8(vshr_n_u8(v_src.val[bidx], 3)); 911 v_dst = vorrq_u16(v_dst, vshlq_n_u16(vmovl_u8(vand_u8(v_src.val[1], v_n7)), 2)); 912 v_dst = vorrq_u16(v_dst, vorrq_u16(vshlq_n_u16(vmovl_u8(vand_u8(v_src.val[bidx^2], v_n7)), 7), 913 vbslq_u16(veorq_u16(vceqq_u16(vmovl_u8(v_src.val[3]), v_0), v_full), v_mask, v_0))); 914 vst1q_u16((ushort *)dst + i, v_dst); 915 } 916 #endif 917 for ( ; i < n; i++, src += 4 ) 918 ((ushort*)dst)[i] = (ushort)((src[bidx] >> 3)|((src[1]&~7) << 2)| 919 ((src[bidx^2]&~7) << 7)|(src[3] ? 0x8000 : 0)); 920 } 921 } 922 923 int srccn, blueIdx, greenBits; 924 #if CV_NEON 925 uint8x8_t v_n3, v_n7; 926 uint16x8_t v_mask, v_0, v_full; 927 #endif 928 }; 929 930 ///////////////////////////////// Color to/from Grayscale //////////////////////////////// 931 932 template<typename _Tp> 933 struct Gray2RGB 934 { 935 typedef _Tp channel_type; 936 937 Gray2RGB(int _dstcn) : dstcn(_dstcn) {} 938 void operator()(const _Tp* src, _Tp* dst, int n) const 939 { 940 if( dstcn == 3 ) 941 for( int i = 0; i < n; i++, dst += 3 ) 942 { 943 dst[0] = dst[1] = dst[2] = src[i]; 944 } 945 else 946 { 947 _Tp alpha = ColorChannel<_Tp>::max(); 948 for( int i = 0; i < n; i++, dst += 4 ) 949 { 950 dst[0] = dst[1] = dst[2] = src[i]; 951 dst[3] = alpha; 952 } 953 } 954 } 955 956 int dstcn; 957 }; 958 959 960 struct Gray2RGB5x5 961 { 962 typedef uchar channel_type; 963 964 Gray2RGB5x5(int _greenBits) : greenBits(_greenBits) 965 { 966 #if CV_NEON 967 v_n7 = vdup_n_u8(~7); 968 v_n3 = vdup_n_u8(~3); 969 #elif CV_SSE2 970 haveSIMD = checkHardwareSupport(CV_CPU_SSE2); 971 v_n7 = _mm_set1_epi16(~7); 972 v_n3 = _mm_set1_epi16(~3); 973 v_zero = _mm_setzero_si128(); 974 #endif 975 } 976 977 void operator()(const uchar* src, uchar* dst, int n) const 978 { 979 int i = 0; 980 if( greenBits == 6 ) 981 { 982 #if CV_NEON 983 for ( ; i <= n - 8; i += 8 ) 984 { 985 uint8x8_t v_src = vld1_u8(src + i); 986 uint16x8_t v_dst = vmovl_u8(vshr_n_u8(v_src, 3)); 987 v_dst = vorrq_u16(v_dst, vshlq_n_u16(vmovl_u8(vand_u8(v_src, v_n3)), 3)); 988 v_dst = vorrq_u16(v_dst, vshlq_n_u16(vmovl_u8(vand_u8(v_src, v_n7)), 8)); 989 vst1q_u16((ushort *)dst + i, v_dst); 990 } 991 #elif CV_SSE2 992 if (haveSIMD) 993 { 994 for ( ; i <= n - 16; i += 16 ) 995 { 996 __m128i v_src = _mm_loadu_si128((__m128i const *)(src + i)); 997 998 __m128i v_src_p = _mm_unpacklo_epi8(v_src, v_zero); 999 __m128i v_dst = _mm_or_si128(_mm_srli_epi16(v_src_p, 3), 1000 _mm_or_si128(_mm_slli_epi16(_mm_and_si128(v_src_p, v_n3), 3), 1001 _mm_slli_epi16(_mm_and_si128(v_src_p, v_n7), 8))); 1002 _mm_storeu_si128((__m128i *)((ushort *)dst + i), v_dst); 1003 1004 v_src_p = _mm_unpackhi_epi8(v_src, v_zero); 1005 v_dst = _mm_or_si128(_mm_srli_epi16(v_src_p, 3), 1006 _mm_or_si128(_mm_slli_epi16(_mm_and_si128(v_src_p, v_n3), 3), 1007 _mm_slli_epi16(_mm_and_si128(v_src_p, v_n7), 8))); 1008 _mm_storeu_si128((__m128i *)((ushort *)dst + i + 8), v_dst); 1009 } 1010 } 1011 #endif 1012 for ( ; i < n; i++ ) 1013 { 1014 int t = src[i]; 1015 ((ushort*)dst)[i] = (ushort)((t >> 3)|((t & ~3) << 3)|((t & ~7) << 8)); 1016 } 1017 } 1018 else 1019 { 1020 #if CV_NEON 1021 for ( ; i <= n - 8; i += 8 ) 1022 { 1023 uint16x8_t v_src = vmovl_u8(vshr_n_u8(vld1_u8(src + i), 3)); 1024 uint16x8_t v_dst = vorrq_u16(vorrq_u16(v_src, vshlq_n_u16(v_src, 5)), vshlq_n_u16(v_src, 10)); 1025 vst1q_u16((ushort *)dst + i, v_dst); 1026 } 1027 #elif CV_SSE2 1028 if (haveSIMD) 1029 { 1030 for ( ; i <= n - 16; i += 8 ) 1031 { 1032 __m128i v_src = _mm_loadu_si128((__m128i const *)(src + i)); 1033 1034 __m128i v_src_p = _mm_srli_epi16(_mm_unpacklo_epi8(v_src, v_zero), 3); 1035 __m128i v_dst = _mm_or_si128(v_src_p, 1036 _mm_or_si128(_mm_slli_epi32(v_src_p, 5), 1037 _mm_slli_epi16(v_src_p, 10))); 1038 _mm_storeu_si128((__m128i *)((ushort *)dst + i), v_dst); 1039 1040 v_src_p = _mm_srli_epi16(_mm_unpackhi_epi8(v_src, v_zero), 3); 1041 v_dst = _mm_or_si128(v_src_p, 1042 _mm_or_si128(_mm_slli_epi16(v_src_p, 5), 1043 _mm_slli_epi16(v_src_p, 10))); 1044 _mm_storeu_si128((__m128i *)((ushort *)dst + i + 8), v_dst); 1045 } 1046 } 1047 #endif 1048 for( ; i < n; i++ ) 1049 { 1050 int t = src[i] >> 3; 1051 ((ushort*)dst)[i] = (ushort)(t|(t << 5)|(t << 10)); 1052 } 1053 } 1054 } 1055 int greenBits; 1056 1057 #if CV_NEON 1058 uint8x8_t v_n7, v_n3; 1059 #elif CV_SSE2 1060 __m128i v_n7, v_n3, v_zero; 1061 bool haveSIMD; 1062 #endif 1063 }; 1064 1065 1066 #undef R2Y 1067 #undef G2Y 1068 #undef B2Y 1069 1070 enum 1071 { 1072 yuv_shift = 14, 1073 xyz_shift = 12, 1074 R2Y = 4899, 1075 G2Y = 9617, 1076 B2Y = 1868, 1077 BLOCK_SIZE = 256 1078 }; 1079 1080 1081 struct RGB5x52Gray 1082 { 1083 typedef uchar channel_type; 1084 1085 RGB5x52Gray(int _greenBits) : greenBits(_greenBits) 1086 { 1087 #if CV_NEON 1088 v_b2y = vdup_n_u16(B2Y); 1089 v_g2y = vdup_n_u16(G2Y); 1090 v_r2y = vdup_n_u16(R2Y); 1091 v_delta = vdupq_n_u32(1 << (yuv_shift - 1)); 1092 v_f8 = vdupq_n_u16(0xf8); 1093 v_fc = vdupq_n_u16(0xfc); 1094 #elif CV_SSE2 1095 haveSIMD = checkHardwareSupport(CV_CPU_SSE2); 1096 v_b2y = _mm_set1_epi16(B2Y); 1097 v_g2y = _mm_set1_epi16(G2Y); 1098 v_r2y = _mm_set1_epi16(R2Y); 1099 v_delta = _mm_set1_epi32(1 << (yuv_shift - 1)); 1100 v_f8 = _mm_set1_epi16(0xf8); 1101 v_fc = _mm_set1_epi16(0xfc); 1102 #endif 1103 } 1104 1105 void operator()(const uchar* src, uchar* dst, int n) const 1106 { 1107 int i = 0; 1108 if( greenBits == 6 ) 1109 { 1110 #if CV_NEON 1111 for ( ; i <= n - 8; i += 8) 1112 { 1113 uint16x8_t v_src = vld1q_u16((ushort *)src + i); 1114 uint16x8_t v_t0 = vandq_u16(vshlq_n_u16(v_src, 3), v_f8), 1115 v_t1 = vandq_u16(vshrq_n_u16(v_src, 3), v_fc), 1116 v_t2 = vandq_u16(vshrq_n_u16(v_src, 8), v_f8); 1117 1118 uint32x4_t v_dst0 = vmlal_u16(vmlal_u16(vmull_u16(vget_low_u16(v_t0), v_b2y), 1119 vget_low_u16(v_t1), v_g2y), vget_low_u16(v_t2), v_r2y); 1120 uint32x4_t v_dst1 = vmlal_u16(vmlal_u16(vmull_u16(vget_high_u16(v_t0), v_b2y), 1121 vget_high_u16(v_t1), v_g2y), vget_high_u16(v_t2), v_r2y); 1122 v_dst0 = vshrq_n_u32(vaddq_u32(v_dst0, v_delta), yuv_shift); 1123 v_dst1 = vshrq_n_u32(vaddq_u32(v_dst1, v_delta), yuv_shift); 1124 1125 vst1_u8(dst + i, vmovn_u16(vcombine_u16(vmovn_u32(v_dst0), vmovn_u32(v_dst1)))); 1126 } 1127 #elif CV_SSE2 1128 if (haveSIMD) 1129 { 1130 __m128i v_zero = _mm_setzero_si128(); 1131 1132 for ( ; i <= n - 8; i += 8) 1133 { 1134 __m128i v_src = _mm_loadu_si128((__m128i const *)((ushort *)src + i)); 1135 __m128i v_t0 = _mm_and_si128(_mm_slli_epi16(v_src, 3), v_f8), 1136 v_t1 = _mm_and_si128(_mm_srli_epi16(v_src, 3), v_fc), 1137 v_t2 = _mm_and_si128(_mm_srli_epi16(v_src, 8), v_f8); 1138 1139 __m128i v_mullo_b = _mm_mullo_epi16(v_t0, v_b2y); 1140 __m128i v_mullo_g = _mm_mullo_epi16(v_t1, v_g2y); 1141 __m128i v_mullo_r = _mm_mullo_epi16(v_t2, v_r2y); 1142 __m128i v_mulhi_b = _mm_mulhi_epi16(v_t0, v_b2y); 1143 __m128i v_mulhi_g = _mm_mulhi_epi16(v_t1, v_g2y); 1144 __m128i v_mulhi_r = _mm_mulhi_epi16(v_t2, v_r2y); 1145 1146 __m128i v_dst0 = _mm_add_epi32(_mm_unpacklo_epi16(v_mullo_b, v_mulhi_b), 1147 _mm_unpacklo_epi16(v_mullo_g, v_mulhi_g)); 1148 v_dst0 = _mm_add_epi32(_mm_add_epi32(v_dst0, v_delta), 1149 _mm_unpacklo_epi16(v_mullo_r, v_mulhi_r)); 1150 1151 __m128i v_dst1 = _mm_add_epi32(_mm_unpackhi_epi16(v_mullo_b, v_mulhi_b), 1152 _mm_unpackhi_epi16(v_mullo_g, v_mulhi_g)); 1153 v_dst1 = _mm_add_epi32(_mm_add_epi32(v_dst1, v_delta), 1154 _mm_unpackhi_epi16(v_mullo_r, v_mulhi_r)); 1155 1156 v_dst0 = _mm_srli_epi32(v_dst0, yuv_shift); 1157 v_dst1 = _mm_srli_epi32(v_dst1, yuv_shift); 1158 1159 __m128i v_dst = _mm_packs_epi32(v_dst0, v_dst1); 1160 _mm_storel_epi64((__m128i *)(dst + i), _mm_packus_epi16(v_dst, v_zero)); 1161 } 1162 } 1163 #endif 1164 for ( ; i < n; i++) 1165 { 1166 int t = ((ushort*)src)[i]; 1167 dst[i] = (uchar)CV_DESCALE(((t << 3) & 0xf8)*B2Y + 1168 ((t >> 3) & 0xfc)*G2Y + 1169 ((t >> 8) & 0xf8)*R2Y, yuv_shift); 1170 } 1171 } 1172 else 1173 { 1174 #if CV_NEON 1175 for ( ; i <= n - 8; i += 8) 1176 { 1177 uint16x8_t v_src = vld1q_u16((ushort *)src + i); 1178 uint16x8_t v_t0 = vandq_u16(vshlq_n_u16(v_src, 3), v_f8), 1179 v_t1 = vandq_u16(vshrq_n_u16(v_src, 2), v_f8), 1180 v_t2 = vandq_u16(vshrq_n_u16(v_src, 7), v_f8); 1181 1182 uint32x4_t v_dst0 = vmlal_u16(vmlal_u16(vmull_u16(vget_low_u16(v_t0), v_b2y), 1183 vget_low_u16(v_t1), v_g2y), vget_low_u16(v_t2), v_r2y); 1184 uint32x4_t v_dst1 = vmlal_u16(vmlal_u16(vmull_u16(vget_high_u16(v_t0), v_b2y), 1185 vget_high_u16(v_t1), v_g2y), vget_high_u16(v_t2), v_r2y); 1186 v_dst0 = vshrq_n_u32(vaddq_u32(v_dst0, v_delta), yuv_shift); 1187 v_dst1 = vshrq_n_u32(vaddq_u32(v_dst1, v_delta), yuv_shift); 1188 1189 vst1_u8(dst + i, vmovn_u16(vcombine_u16(vmovn_u32(v_dst0), vmovn_u32(v_dst1)))); 1190 } 1191 #elif CV_SSE2 1192 if (haveSIMD) 1193 { 1194 __m128i v_zero = _mm_setzero_si128(); 1195 1196 for ( ; i <= n - 8; i += 8) 1197 { 1198 __m128i v_src = _mm_loadu_si128((__m128i const *)((ushort *)src + i)); 1199 __m128i v_t0 = _mm_and_si128(_mm_slli_epi16(v_src, 3), v_f8), 1200 v_t1 = _mm_and_si128(_mm_srli_epi16(v_src, 2), v_f8), 1201 v_t2 = _mm_and_si128(_mm_srli_epi16(v_src, 7), v_f8); 1202 1203 __m128i v_mullo_b = _mm_mullo_epi16(v_t0, v_b2y); 1204 __m128i v_mullo_g = _mm_mullo_epi16(v_t1, v_g2y); 1205 __m128i v_mullo_r = _mm_mullo_epi16(v_t2, v_r2y); 1206 __m128i v_mulhi_b = _mm_mulhi_epi16(v_t0, v_b2y); 1207 __m128i v_mulhi_g = _mm_mulhi_epi16(v_t1, v_g2y); 1208 __m128i v_mulhi_r = _mm_mulhi_epi16(v_t2, v_r2y); 1209 1210 __m128i v_dst0 = _mm_add_epi32(_mm_unpacklo_epi16(v_mullo_b, v_mulhi_b), 1211 _mm_unpacklo_epi16(v_mullo_g, v_mulhi_g)); 1212 v_dst0 = _mm_add_epi32(_mm_add_epi32(v_dst0, v_delta), 1213 _mm_unpacklo_epi16(v_mullo_r, v_mulhi_r)); 1214 1215 __m128i v_dst1 = _mm_add_epi32(_mm_unpackhi_epi16(v_mullo_b, v_mulhi_b), 1216 _mm_unpackhi_epi16(v_mullo_g, v_mulhi_g)); 1217 v_dst1 = _mm_add_epi32(_mm_add_epi32(v_dst1, v_delta), 1218 _mm_unpackhi_epi16(v_mullo_r, v_mulhi_r)); 1219 1220 v_dst0 = _mm_srli_epi32(v_dst0, yuv_shift); 1221 v_dst1 = _mm_srli_epi32(v_dst1, yuv_shift); 1222 1223 __m128i v_dst = _mm_packs_epi32(v_dst0, v_dst1); 1224 _mm_storel_epi64((__m128i *)(dst + i), _mm_packus_epi16(v_dst, v_zero)); 1225 } 1226 } 1227 #endif 1228 for ( ; i < n; i++) 1229 { 1230 int t = ((ushort*)src)[i]; 1231 dst[i] = (uchar)CV_DESCALE(((t << 3) & 0xf8)*B2Y + 1232 ((t >> 2) & 0xf8)*G2Y + 1233 ((t >> 7) & 0xf8)*R2Y, yuv_shift); 1234 } 1235 } 1236 } 1237 int greenBits; 1238 1239 #if CV_NEON 1240 uint16x4_t v_b2y, v_g2y, v_r2y; 1241 uint32x4_t v_delta; 1242 uint16x8_t v_f8, v_fc; 1243 #elif CV_SSE2 1244 bool haveSIMD; 1245 __m128i v_b2y, v_g2y, v_r2y; 1246 __m128i v_delta; 1247 __m128i v_f8, v_fc; 1248 #endif 1249 }; 1250 1251 1252 template<typename _Tp> struct RGB2Gray 1253 { 1254 typedef _Tp channel_type; 1255 1256 RGB2Gray(int _srccn, int blueIdx, const float* _coeffs) : srccn(_srccn) 1257 { 1258 static const float coeffs0[] = { 0.299f, 0.587f, 0.114f }; 1259 memcpy( coeffs, _coeffs ? _coeffs : coeffs0, 3*sizeof(coeffs[0]) ); 1260 if(blueIdx == 0) 1261 std::swap(coeffs[0], coeffs[2]); 1262 } 1263 1264 void operator()(const _Tp* src, _Tp* dst, int n) const 1265 { 1266 int scn = srccn; 1267 float cb = coeffs[0], cg = coeffs[1], cr = coeffs[2]; 1268 for(int i = 0; i < n; i++, src += scn) 1269 dst[i] = saturate_cast<_Tp>(src[0]*cb + src[1]*cg + src[2]*cr); 1270 } 1271 int srccn; 1272 float coeffs[3]; 1273 }; 1274 1275 template<> struct RGB2Gray<uchar> 1276 { 1277 typedef uchar channel_type; 1278 1279 RGB2Gray(int _srccn, int blueIdx, const int* coeffs) : srccn(_srccn) 1280 { 1281 const int coeffs0[] = { R2Y, G2Y, B2Y }; 1282 if(!coeffs) coeffs = coeffs0; 1283 1284 int b = 0, g = 0, r = (1 << (yuv_shift-1)); 1285 int db = coeffs[blueIdx^2], dg = coeffs[1], dr = coeffs[blueIdx]; 1286 1287 for( int i = 0; i < 256; i++, b += db, g += dg, r += dr ) 1288 { 1289 tab[i] = b; 1290 tab[i+256] = g; 1291 tab[i+512] = r; 1292 } 1293 } 1294 void operator()(const uchar* src, uchar* dst, int n) const 1295 { 1296 int scn = srccn; 1297 const int* _tab = tab; 1298 for(int i = 0; i < n; i++, src += scn) 1299 dst[i] = (uchar)((_tab[src[0]] + _tab[src[1]+256] + _tab[src[2]+512]) >> yuv_shift); 1300 } 1301 int srccn; 1302 int tab[256*3]; 1303 }; 1304 1305 #if CV_NEON 1306 1307 template <> 1308 struct RGB2Gray<ushort> 1309 { 1310 typedef ushort channel_type; 1311 1312 RGB2Gray(int _srccn, int blueIdx, const int* _coeffs) : 1313 srccn(_srccn) 1314 { 1315 static const int coeffs0[] = { R2Y, G2Y, B2Y }; 1316 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 3*sizeof(coeffs[0])); 1317 if( blueIdx == 0 ) 1318 std::swap(coeffs[0], coeffs[2]); 1319 1320 v_cb = vdup_n_u16(coeffs[0]); 1321 v_cg = vdup_n_u16(coeffs[1]); 1322 v_cr = vdup_n_u16(coeffs[2]); 1323 v_delta = vdupq_n_u32(1 << (yuv_shift - 1)); 1324 } 1325 1326 void operator()(const ushort* src, ushort* dst, int n) const 1327 { 1328 int scn = srccn, cb = coeffs[0], cg = coeffs[1], cr = coeffs[2], i = 0; 1329 1330 for ( ; i <= n - 8; i += 8, src += scn * 8) 1331 { 1332 uint16x8_t v_b, v_r, v_g; 1333 if (scn == 3) 1334 { 1335 uint16x8x3_t v_src = vld3q_u16(src); 1336 v_b = v_src.val[0]; 1337 v_g = v_src.val[1]; 1338 v_r = v_src.val[2]; 1339 } 1340 else 1341 { 1342 uint16x8x4_t v_src = vld4q_u16(src); 1343 v_b = v_src.val[0]; 1344 v_g = v_src.val[1]; 1345 v_r = v_src.val[2]; 1346 } 1347 1348 uint32x4_t v_dst0_ = vmlal_u16(vmlal_u16( 1349 vmull_u16(vget_low_u16(v_b), v_cb), 1350 vget_low_u16(v_g), v_cg), 1351 vget_low_u16(v_r), v_cr); 1352 uint32x4_t v_dst1_ = vmlal_u16(vmlal_u16( 1353 vmull_u16(vget_high_u16(v_b), v_cb), 1354 vget_high_u16(v_g), v_cg), 1355 vget_high_u16(v_r), v_cr); 1356 1357 uint16x4_t v_dst0 = vmovn_u32(vshrq_n_u32(vaddq_u32(v_dst0_, v_delta), yuv_shift)); 1358 uint16x4_t v_dst1 = vmovn_u32(vshrq_n_u32(vaddq_u32(v_dst1_, v_delta), yuv_shift)); 1359 1360 vst1q_u16(dst + i, vcombine_u16(v_dst0, v_dst1)); 1361 } 1362 1363 for ( ; i <= n - 4; i += 4, src += scn * 4) 1364 { 1365 uint16x4_t v_b, v_r, v_g; 1366 if (scn == 3) 1367 { 1368 uint16x4x3_t v_src = vld3_u16(src); 1369 v_b = v_src.val[0]; 1370 v_g = v_src.val[1]; 1371 v_r = v_src.val[2]; 1372 } 1373 else 1374 { 1375 uint16x4x4_t v_src = vld4_u16(src); 1376 v_b = v_src.val[0]; 1377 v_g = v_src.val[1]; 1378 v_r = v_src.val[2]; 1379 } 1380 1381 uint32x4_t v_dst = vmlal_u16(vmlal_u16( 1382 vmull_u16(v_b, v_cb), 1383 v_g, v_cg), 1384 v_r, v_cr); 1385 1386 vst1_u16(dst + i, vmovn_u32(vshrq_n_u32(vaddq_u32(v_dst, v_delta), yuv_shift))); 1387 } 1388 1389 for( ; i < n; i++, src += scn) 1390 dst[i] = (ushort)CV_DESCALE((unsigned)(src[0]*cb + src[1]*cg + src[2]*cr), yuv_shift); 1391 } 1392 1393 int srccn, coeffs[3]; 1394 uint16x4_t v_cb, v_cg, v_cr; 1395 uint32x4_t v_delta; 1396 }; 1397 1398 template <> 1399 struct RGB2Gray<float> 1400 { 1401 typedef float channel_type; 1402 1403 RGB2Gray(int _srccn, int blueIdx, const float* _coeffs) : srccn(_srccn) 1404 { 1405 static const float coeffs0[] = { 0.299f, 0.587f, 0.114f }; 1406 memcpy( coeffs, _coeffs ? _coeffs : coeffs0, 3*sizeof(coeffs[0]) ); 1407 if(blueIdx == 0) 1408 std::swap(coeffs[0], coeffs[2]); 1409 1410 v_cb = vdupq_n_f32(coeffs[0]); 1411 v_cg = vdupq_n_f32(coeffs[1]); 1412 v_cr = vdupq_n_f32(coeffs[2]); 1413 } 1414 1415 void operator()(const float * src, float * dst, int n) const 1416 { 1417 int scn = srccn, i = 0; 1418 float cb = coeffs[0], cg = coeffs[1], cr = coeffs[2]; 1419 1420 if (scn == 3) 1421 { 1422 for ( ; i <= n - 8; i += 8, src += scn * 8) 1423 { 1424 float32x4x3_t v_src = vld3q_f32(src); 1425 vst1q_f32(dst + i, vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_cb), v_src.val[1], v_cg), v_src.val[2], v_cr)); 1426 1427 v_src = vld3q_f32(src + scn * 4); 1428 vst1q_f32(dst + i + 4, vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_cb), v_src.val[1], v_cg), v_src.val[2], v_cr)); 1429 } 1430 1431 for ( ; i <= n - 4; i += 4, src += scn * 4) 1432 { 1433 float32x4x3_t v_src = vld3q_f32(src); 1434 vst1q_f32(dst + i, vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_cb), v_src.val[1], v_cg), v_src.val[2], v_cr)); 1435 } 1436 } 1437 else 1438 { 1439 for ( ; i <= n - 8; i += 8, src += scn * 8) 1440 { 1441 float32x4x4_t v_src = vld4q_f32(src); 1442 vst1q_f32(dst + i, vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_cb), v_src.val[1], v_cg), v_src.val[2], v_cr)); 1443 1444 v_src = vld4q_f32(src + scn * 4); 1445 vst1q_f32(dst + i + 4, vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_cb), v_src.val[1], v_cg), v_src.val[2], v_cr)); 1446 } 1447 1448 for ( ; i <= n - 4; i += 4, src += scn * 4) 1449 { 1450 float32x4x4_t v_src = vld4q_f32(src); 1451 vst1q_f32(dst + i, vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_cb), v_src.val[1], v_cg), v_src.val[2], v_cr)); 1452 } 1453 } 1454 1455 for ( ; i < n; i++, src += scn) 1456 dst[i] = src[0]*cb + src[1]*cg + src[2]*cr; 1457 } 1458 1459 int srccn; 1460 float coeffs[3]; 1461 float32x4_t v_cb, v_cg, v_cr; 1462 }; 1463 1464 #elif CV_SSE2 1465 1466 #if CV_SSE4_1 1467 1468 template <> 1469 struct RGB2Gray<ushort> 1470 { 1471 typedef ushort channel_type; 1472 1473 RGB2Gray(int _srccn, int blueIdx, const int* _coeffs) : 1474 srccn(_srccn) 1475 { 1476 static const int coeffs0[] = { R2Y, G2Y, B2Y }; 1477 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 3*sizeof(coeffs[0])); 1478 if( blueIdx == 0 ) 1479 std::swap(coeffs[0], coeffs[2]); 1480 1481 v_cb = _mm_set1_epi16((short)coeffs[0]); 1482 v_cg = _mm_set1_epi16((short)coeffs[1]); 1483 v_cr = _mm_set1_epi16((short)coeffs[2]); 1484 v_delta = _mm_set1_epi32(1 << (yuv_shift - 1)); 1485 1486 haveSIMD = checkHardwareSupport(CV_CPU_SSE4_1); 1487 } 1488 1489 // 16s x 8 1490 void process(__m128i v_b, __m128i v_g, __m128i v_r, 1491 __m128i & v_gray) const 1492 { 1493 __m128i v_mullo_r = _mm_mullo_epi16(v_r, v_cr); 1494 __m128i v_mullo_g = _mm_mullo_epi16(v_g, v_cg); 1495 __m128i v_mullo_b = _mm_mullo_epi16(v_b, v_cb); 1496 __m128i v_mulhi_r = _mm_mulhi_epu16(v_r, v_cr); 1497 __m128i v_mulhi_g = _mm_mulhi_epu16(v_g, v_cg); 1498 __m128i v_mulhi_b = _mm_mulhi_epu16(v_b, v_cb); 1499 1500 __m128i v_gray0 = _mm_add_epi32(_mm_unpacklo_epi16(v_mullo_r, v_mulhi_r), 1501 _mm_unpacklo_epi16(v_mullo_g, v_mulhi_g)); 1502 v_gray0 = _mm_add_epi32(_mm_unpacklo_epi16(v_mullo_b, v_mulhi_b), v_gray0); 1503 v_gray0 = _mm_srli_epi32(_mm_add_epi32(v_gray0, v_delta), yuv_shift); 1504 1505 __m128i v_gray1 = _mm_add_epi32(_mm_unpackhi_epi16(v_mullo_r, v_mulhi_r), 1506 _mm_unpackhi_epi16(v_mullo_g, v_mulhi_g)); 1507 v_gray1 = _mm_add_epi32(_mm_unpackhi_epi16(v_mullo_b, v_mulhi_b), v_gray1); 1508 v_gray1 = _mm_srli_epi32(_mm_add_epi32(v_gray1, v_delta), yuv_shift); 1509 1510 v_gray = _mm_packus_epi32(v_gray0, v_gray1); 1511 } 1512 1513 void operator()(const ushort* src, ushort* dst, int n) const 1514 { 1515 int scn = srccn, cb = coeffs[0], cg = coeffs[1], cr = coeffs[2], i = 0; 1516 1517 if (scn == 3 && haveSIMD) 1518 { 1519 for ( ; i <= n - 16; i += 16, src += scn * 16) 1520 { 1521 __m128i v_r0 = _mm_loadu_si128((__m128i const *)(src)); 1522 __m128i v_r1 = _mm_loadu_si128((__m128i const *)(src + 8)); 1523 __m128i v_g0 = _mm_loadu_si128((__m128i const *)(src + 16)); 1524 __m128i v_g1 = _mm_loadu_si128((__m128i const *)(src + 24)); 1525 __m128i v_b0 = _mm_loadu_si128((__m128i const *)(src + 32)); 1526 __m128i v_b1 = _mm_loadu_si128((__m128i const *)(src + 40)); 1527 1528 _mm_deinterleave_epi16(v_r0, v_r1, v_g0, v_g1, v_b0, v_b1); 1529 1530 __m128i v_gray0; 1531 process(v_r0, v_g0, v_b0, 1532 v_gray0); 1533 1534 __m128i v_gray1; 1535 process(v_r1, v_g1, v_b1, 1536 v_gray1); 1537 1538 _mm_storeu_si128((__m128i *)(dst + i), v_gray0); 1539 _mm_storeu_si128((__m128i *)(dst + i + 8), v_gray1); 1540 } 1541 } 1542 else if (scn == 4 && haveSIMD) 1543 { 1544 for ( ; i <= n - 16; i += 16, src += scn * 16) 1545 { 1546 __m128i v_r0 = _mm_loadu_si128((__m128i const *)(src)); 1547 __m128i v_r1 = _mm_loadu_si128((__m128i const *)(src + 8)); 1548 __m128i v_g0 = _mm_loadu_si128((__m128i const *)(src + 16)); 1549 __m128i v_g1 = _mm_loadu_si128((__m128i const *)(src + 24)); 1550 __m128i v_b0 = _mm_loadu_si128((__m128i const *)(src + 32)); 1551 __m128i v_b1 = _mm_loadu_si128((__m128i const *)(src + 40)); 1552 __m128i v_a0 = _mm_loadu_si128((__m128i const *)(src + 48)); 1553 __m128i v_a1 = _mm_loadu_si128((__m128i const *)(src + 56)); 1554 1555 _mm_deinterleave_epi16(v_r0, v_r1, v_g0, v_g1, v_b0, v_b1, v_a0, v_a1); 1556 1557 __m128i v_gray0; 1558 process(v_r0, v_g0, v_b0, 1559 v_gray0); 1560 1561 __m128i v_gray1; 1562 process(v_r1, v_g1, v_b1, 1563 v_gray1); 1564 1565 _mm_storeu_si128((__m128i *)(dst + i), v_gray0); 1566 _mm_storeu_si128((__m128i *)(dst + i + 8), v_gray1); 1567 } 1568 } 1569 1570 for( ; i < n; i++, src += scn) 1571 dst[i] = (ushort)CV_DESCALE((unsigned)(src[0]*cb + src[1]*cg + src[2]*cr), yuv_shift); 1572 } 1573 1574 int srccn, coeffs[3]; 1575 __m128i v_cb, v_cg, v_cr; 1576 __m128i v_delta; 1577 bool haveSIMD; 1578 }; 1579 1580 #endif // CV_SSE4_1 1581 1582 template <> 1583 struct RGB2Gray<float> 1584 { 1585 typedef float channel_type; 1586 1587 RGB2Gray(int _srccn, int blueIdx, const float* _coeffs) : srccn(_srccn) 1588 { 1589 static const float coeffs0[] = { 0.299f, 0.587f, 0.114f }; 1590 memcpy( coeffs, _coeffs ? _coeffs : coeffs0, 3*sizeof(coeffs[0]) ); 1591 if(blueIdx == 0) 1592 std::swap(coeffs[0], coeffs[2]); 1593 1594 v_cb = _mm_set1_ps(coeffs[0]); 1595 v_cg = _mm_set1_ps(coeffs[1]); 1596 v_cr = _mm_set1_ps(coeffs[2]); 1597 1598 haveSIMD = checkHardwareSupport(CV_CPU_SSE2); 1599 } 1600 1601 void process(__m128 v_b, __m128 v_g, __m128 v_r, 1602 __m128 & v_gray) const 1603 { 1604 v_gray = _mm_mul_ps(v_r, v_cr); 1605 v_gray = _mm_add_ps(v_gray, _mm_mul_ps(v_g, v_cg)); 1606 v_gray = _mm_add_ps(v_gray, _mm_mul_ps(v_b, v_cb)); 1607 } 1608 1609 void operator()(const float * src, float * dst, int n) const 1610 { 1611 int scn = srccn, i = 0; 1612 float cb = coeffs[0], cg = coeffs[1], cr = coeffs[2]; 1613 1614 if (scn == 3 && haveSIMD) 1615 { 1616 for ( ; i <= n - 8; i += 8, src += scn * 8) 1617 { 1618 __m128 v_r0 = _mm_loadu_ps(src); 1619 __m128 v_r1 = _mm_loadu_ps(src + 4); 1620 __m128 v_g0 = _mm_loadu_ps(src + 8); 1621 __m128 v_g1 = _mm_loadu_ps(src + 12); 1622 __m128 v_b0 = _mm_loadu_ps(src + 16); 1623 __m128 v_b1 = _mm_loadu_ps(src + 20); 1624 1625 _mm_deinterleave_ps(v_r0, v_r1, v_g0, v_g1, v_b0, v_b1); 1626 1627 __m128 v_gray0; 1628 process(v_r0, v_g0, v_b0, 1629 v_gray0); 1630 1631 __m128 v_gray1; 1632 process(v_r1, v_g1, v_b1, 1633 v_gray1); 1634 1635 _mm_storeu_ps(dst + i, v_gray0); 1636 _mm_storeu_ps(dst + i + 4, v_gray1); 1637 } 1638 } 1639 else if (scn == 4 && haveSIMD) 1640 { 1641 for ( ; i <= n - 8; i += 8, src += scn * 8) 1642 { 1643 __m128 v_r0 = _mm_loadu_ps(src); 1644 __m128 v_r1 = _mm_loadu_ps(src + 4); 1645 __m128 v_g0 = _mm_loadu_ps(src + 8); 1646 __m128 v_g1 = _mm_loadu_ps(src + 12); 1647 __m128 v_b0 = _mm_loadu_ps(src + 16); 1648 __m128 v_b1 = _mm_loadu_ps(src + 20); 1649 __m128 v_a0 = _mm_loadu_ps(src + 24); 1650 __m128 v_a1 = _mm_loadu_ps(src + 28); 1651 1652 _mm_deinterleave_ps(v_r0, v_r1, v_g0, v_g1, v_b0, v_b1, v_a0, v_a1); 1653 1654 __m128 v_gray0; 1655 process(v_r0, v_g0, v_b0, 1656 v_gray0); 1657 1658 __m128 v_gray1; 1659 process(v_r1, v_g1, v_b1, 1660 v_gray1); 1661 1662 _mm_storeu_ps(dst + i, v_gray0); 1663 _mm_storeu_ps(dst + i + 4, v_gray1); 1664 } 1665 } 1666 1667 for ( ; i < n; i++, src += scn) 1668 dst[i] = src[0]*cb + src[1]*cg + src[2]*cr; 1669 } 1670 1671 int srccn; 1672 float coeffs[3]; 1673 __m128 v_cb, v_cg, v_cr; 1674 bool haveSIMD; 1675 }; 1676 1677 #else 1678 1679 template<> struct RGB2Gray<ushort> 1680 { 1681 typedef ushort channel_type; 1682 1683 RGB2Gray(int _srccn, int blueIdx, const int* _coeffs) : srccn(_srccn) 1684 { 1685 static const int coeffs0[] = { R2Y, G2Y, B2Y }; 1686 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 3*sizeof(coeffs[0])); 1687 if( blueIdx == 0 ) 1688 std::swap(coeffs[0], coeffs[2]); 1689 } 1690 1691 void operator()(const ushort* src, ushort* dst, int n) const 1692 { 1693 int scn = srccn, cb = coeffs[0], cg = coeffs[1], cr = coeffs[2]; 1694 for(int i = 0; i < n; i++, src += scn) 1695 dst[i] = (ushort)CV_DESCALE((unsigned)(src[0]*cb + src[1]*cg + src[2]*cr), yuv_shift); 1696 } 1697 int srccn; 1698 int coeffs[3]; 1699 }; 1700 1701 #endif 1702 1703 ///////////////////////////////////// RGB <-> YCrCb ////////////////////////////////////// 1704 1705 template<typename _Tp> struct RGB2YCrCb_f 1706 { 1707 typedef _Tp channel_type; 1708 1709 RGB2YCrCb_f(int _srccn, int _blueIdx, const float* _coeffs) : srccn(_srccn), blueIdx(_blueIdx) 1710 { 1711 static const float coeffs0[] = {0.299f, 0.587f, 0.114f, 0.713f, 0.564f}; 1712 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 5*sizeof(coeffs[0])); 1713 if(blueIdx==0) std::swap(coeffs[0], coeffs[2]); 1714 } 1715 1716 void operator()(const _Tp* src, _Tp* dst, int n) const 1717 { 1718 int scn = srccn, bidx = blueIdx; 1719 const _Tp delta = ColorChannel<_Tp>::half(); 1720 float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4]; 1721 n *= 3; 1722 for(int i = 0; i < n; i += 3, src += scn) 1723 { 1724 _Tp Y = saturate_cast<_Tp>(src[0]*C0 + src[1]*C1 + src[2]*C2); 1725 _Tp Cr = saturate_cast<_Tp>((src[bidx^2] - Y)*C3 + delta); 1726 _Tp Cb = saturate_cast<_Tp>((src[bidx] - Y)*C4 + delta); 1727 dst[i] = Y; dst[i+1] = Cr; dst[i+2] = Cb; 1728 } 1729 } 1730 int srccn, blueIdx; 1731 float coeffs[5]; 1732 }; 1733 1734 #if CV_NEON 1735 1736 template <> 1737 struct RGB2YCrCb_f<float> 1738 { 1739 typedef float channel_type; 1740 1741 RGB2YCrCb_f(int _srccn, int _blueIdx, const float* _coeffs) : 1742 srccn(_srccn), blueIdx(_blueIdx) 1743 { 1744 static const float coeffs0[] = {0.299f, 0.587f, 0.114f, 0.713f, 0.564f}; 1745 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 5*sizeof(coeffs[0])); 1746 if(blueIdx==0) 1747 std::swap(coeffs[0], coeffs[2]); 1748 1749 v_c0 = vdupq_n_f32(coeffs[0]); 1750 v_c1 = vdupq_n_f32(coeffs[1]); 1751 v_c2 = vdupq_n_f32(coeffs[2]); 1752 v_c3 = vdupq_n_f32(coeffs[3]); 1753 v_c4 = vdupq_n_f32(coeffs[4]); 1754 v_delta = vdupq_n_f32(ColorChannel<float>::half()); 1755 } 1756 1757 void operator()(const float * src, float * dst, int n) const 1758 { 1759 int scn = srccn, bidx = blueIdx, i = 0; 1760 const float delta = ColorChannel<float>::half(); 1761 float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4]; 1762 n *= 3; 1763 1764 if (scn == 3) 1765 for ( ; i <= n - 12; i += 12, src += 12) 1766 { 1767 float32x4x3_t v_src = vld3q_f32(src), v_dst; 1768 v_dst.val[0] = vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_c0), v_src.val[1], v_c1), v_src.val[2], v_c2); 1769 v_dst.val[1] = vmlaq_f32(v_delta, vsubq_f32(v_src.val[bidx^2], v_dst.val[0]), v_c3); 1770 v_dst.val[2] = vmlaq_f32(v_delta, vsubq_f32(v_src.val[bidx], v_dst.val[0]), v_c4); 1771 1772 vst3q_f32(dst + i, v_dst); 1773 } 1774 else 1775 for ( ; i <= n - 12; i += 12, src += 16) 1776 { 1777 float32x4x4_t v_src = vld4q_f32(src); 1778 float32x4x3_t v_dst; 1779 v_dst.val[0] = vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_c0), v_src.val[1], v_c1), v_src.val[2], v_c2); 1780 v_dst.val[1] = vmlaq_f32(v_delta, vsubq_f32(v_src.val[bidx^2], v_dst.val[0]), v_c3); 1781 v_dst.val[2] = vmlaq_f32(v_delta, vsubq_f32(v_src.val[bidx], v_dst.val[0]), v_c4); 1782 1783 vst3q_f32(dst + i, v_dst); 1784 } 1785 1786 for ( ; i < n; i += 3, src += scn) 1787 { 1788 float Y = src[0]*C0 + src[1]*C1 + src[2]*C2; 1789 float Cr = (src[bidx^2] - Y)*C3 + delta; 1790 float Cb = (src[bidx] - Y)*C4 + delta; 1791 dst[i] = Y; dst[i+1] = Cr; dst[i+2] = Cb; 1792 } 1793 } 1794 int srccn, blueIdx; 1795 float coeffs[5]; 1796 float32x4_t v_c0, v_c1, v_c2, v_c3, v_c4, v_delta; 1797 }; 1798 1799 #elif CV_SSE2 1800 1801 template <> 1802 struct RGB2YCrCb_f<float> 1803 { 1804 typedef float channel_type; 1805 1806 RGB2YCrCb_f(int _srccn, int _blueIdx, const float* _coeffs) : 1807 srccn(_srccn), blueIdx(_blueIdx) 1808 { 1809 static const float coeffs0[] = {0.299f, 0.587f, 0.114f, 0.713f, 0.564f}; 1810 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 5*sizeof(coeffs[0])); 1811 if (blueIdx==0) 1812 std::swap(coeffs[0], coeffs[2]); 1813 1814 v_c0 = _mm_set1_ps(coeffs[0]); 1815 v_c1 = _mm_set1_ps(coeffs[1]); 1816 v_c2 = _mm_set1_ps(coeffs[2]); 1817 v_c3 = _mm_set1_ps(coeffs[3]); 1818 v_c4 = _mm_set1_ps(coeffs[4]); 1819 v_delta = _mm_set1_ps(ColorChannel<float>::half()); 1820 1821 haveSIMD = checkHardwareSupport(CV_CPU_SSE2); 1822 } 1823 1824 void process(__m128 v_r, __m128 v_g, __m128 v_b, 1825 __m128 & v_y, __m128 & v_cr, __m128 & v_cb) const 1826 { 1827 v_y = _mm_mul_ps(v_r, v_c0); 1828 v_y = _mm_add_ps(v_y, _mm_mul_ps(v_g, v_c1)); 1829 v_y = _mm_add_ps(v_y, _mm_mul_ps(v_b, v_c2)); 1830 1831 v_cr = _mm_add_ps(_mm_mul_ps(_mm_sub_ps(blueIdx == 0 ? v_b : v_r, v_y), v_c3), v_delta); 1832 v_cb = _mm_add_ps(_mm_mul_ps(_mm_sub_ps(blueIdx == 2 ? v_b : v_r, v_y), v_c4), v_delta); 1833 } 1834 1835 void operator()(const float * src, float * dst, int n) const 1836 { 1837 int scn = srccn, bidx = blueIdx, i = 0; 1838 const float delta = ColorChannel<float>::half(); 1839 float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4]; 1840 n *= 3; 1841 1842 if (haveSIMD) 1843 { 1844 for ( ; i <= n - 24; i += 24, src += 8 * scn) 1845 { 1846 __m128 v_r0 = _mm_loadu_ps(src); 1847 __m128 v_r1 = _mm_loadu_ps(src + 4); 1848 __m128 v_g0 = _mm_loadu_ps(src + 8); 1849 __m128 v_g1 = _mm_loadu_ps(src + 12); 1850 __m128 v_b0 = _mm_loadu_ps(src + 16); 1851 __m128 v_b1 = _mm_loadu_ps(src + 20); 1852 1853 if (scn == 4) 1854 { 1855 __m128 v_a0 = _mm_loadu_ps(src + 24); 1856 __m128 v_a1 = _mm_loadu_ps(src + 28); 1857 _mm_deinterleave_ps(v_r0, v_r1, v_g0, v_g1, 1858 v_b0, v_b1, v_a0, v_a1); 1859 } 1860 else 1861 _mm_deinterleave_ps(v_r0, v_r1, v_g0, v_g1, v_b0, v_b1); 1862 1863 __m128 v_y0, v_cr0, v_cb0; 1864 process(v_r0, v_g0, v_b0, 1865 v_y0, v_cr0, v_cb0); 1866 1867 __m128 v_y1, v_cr1, v_cb1; 1868 process(v_r1, v_g1, v_b1, 1869 v_y1, v_cr1, v_cb1); 1870 1871 _mm_interleave_ps(v_y0, v_y1, v_cr0, v_cr1, v_cb0, v_cb1); 1872 1873 _mm_storeu_ps(dst + i, v_y0); 1874 _mm_storeu_ps(dst + i + 4, v_y1); 1875 _mm_storeu_ps(dst + i + 8, v_cr0); 1876 _mm_storeu_ps(dst + i + 12, v_cr1); 1877 _mm_storeu_ps(dst + i + 16, v_cb0); 1878 _mm_storeu_ps(dst + i + 20, v_cb1); 1879 } 1880 } 1881 1882 for ( ; i < n; i += 3, src += scn) 1883 { 1884 float Y = src[0]*C0 + src[1]*C1 + src[2]*C2; 1885 float Cr = (src[bidx^2] - Y)*C3 + delta; 1886 float Cb = (src[bidx] - Y)*C4 + delta; 1887 dst[i] = Y; dst[i+1] = Cr; dst[i+2] = Cb; 1888 } 1889 } 1890 int srccn, blueIdx; 1891 float coeffs[5]; 1892 __m128 v_c0, v_c1, v_c2, v_c3, v_c4, v_delta; 1893 bool haveSIMD; 1894 }; 1895 1896 #endif 1897 1898 template<typename _Tp> struct RGB2YCrCb_i 1899 { 1900 typedef _Tp channel_type; 1901 1902 RGB2YCrCb_i(int _srccn, int _blueIdx, const int* _coeffs) 1903 : srccn(_srccn), blueIdx(_blueIdx) 1904 { 1905 static const int coeffs0[] = {R2Y, G2Y, B2Y, 11682, 9241}; 1906 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 5*sizeof(coeffs[0])); 1907 if(blueIdx==0) std::swap(coeffs[0], coeffs[2]); 1908 } 1909 void operator()(const _Tp* src, _Tp* dst, int n) const 1910 { 1911 int scn = srccn, bidx = blueIdx; 1912 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4]; 1913 int delta = ColorChannel<_Tp>::half()*(1 << yuv_shift); 1914 n *= 3; 1915 for(int i = 0; i < n; i += 3, src += scn) 1916 { 1917 int Y = CV_DESCALE(src[0]*C0 + src[1]*C1 + src[2]*C2, yuv_shift); 1918 int Cr = CV_DESCALE((src[bidx^2] - Y)*C3 + delta, yuv_shift); 1919 int Cb = CV_DESCALE((src[bidx] - Y)*C4 + delta, yuv_shift); 1920 dst[i] = saturate_cast<_Tp>(Y); 1921 dst[i+1] = saturate_cast<_Tp>(Cr); 1922 dst[i+2] = saturate_cast<_Tp>(Cb); 1923 } 1924 } 1925 int srccn, blueIdx; 1926 int coeffs[5]; 1927 }; 1928 1929 #if CV_NEON 1930 1931 template <> 1932 struct RGB2YCrCb_i<uchar> 1933 { 1934 typedef uchar channel_type; 1935 1936 RGB2YCrCb_i(int _srccn, int _blueIdx, const int* _coeffs) 1937 : srccn(_srccn), blueIdx(_blueIdx) 1938 { 1939 static const int coeffs0[] = {R2Y, G2Y, B2Y, 11682, 9241}; 1940 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 5*sizeof(coeffs[0])); 1941 if (blueIdx==0) 1942 std::swap(coeffs[0], coeffs[2]); 1943 1944 v_c0 = vdup_n_s16(coeffs[0]); 1945 v_c1 = vdup_n_s16(coeffs[1]); 1946 v_c2 = vdup_n_s16(coeffs[2]); 1947 v_c3 = vdupq_n_s32(coeffs[3]); 1948 v_c4 = vdupq_n_s32(coeffs[4]); 1949 v_delta = vdupq_n_s32(ColorChannel<uchar>::half()*(1 << yuv_shift)); 1950 v_delta2 = vdupq_n_s32(1 << (yuv_shift - 1)); 1951 } 1952 1953 void operator()(const uchar * src, uchar * dst, int n) const 1954 { 1955 int scn = srccn, bidx = blueIdx, i = 0; 1956 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4]; 1957 int delta = ColorChannel<uchar>::half()*(1 << yuv_shift); 1958 n *= 3; 1959 1960 for ( ; i <= n - 24; i += 24, src += scn * 8) 1961 { 1962 uint8x8x3_t v_dst; 1963 int16x8x3_t v_src16; 1964 1965 if (scn == 3) 1966 { 1967 uint8x8x3_t v_src = vld3_u8(src); 1968 v_src16.val[0] = vreinterpretq_s16_u16(vmovl_u8(v_src.val[0])); 1969 v_src16.val[1] = vreinterpretq_s16_u16(vmovl_u8(v_src.val[1])); 1970 v_src16.val[2] = vreinterpretq_s16_u16(vmovl_u8(v_src.val[2])); 1971 } 1972 else 1973 { 1974 uint8x8x4_t v_src = vld4_u8(src); 1975 v_src16.val[0] = vreinterpretq_s16_u16(vmovl_u8(v_src.val[0])); 1976 v_src16.val[1] = vreinterpretq_s16_u16(vmovl_u8(v_src.val[1])); 1977 v_src16.val[2] = vreinterpretq_s16_u16(vmovl_u8(v_src.val[2])); 1978 } 1979 1980 int16x4x3_t v_src0; 1981 v_src0.val[0] = vget_low_s16(v_src16.val[0]); 1982 v_src0.val[1] = vget_low_s16(v_src16.val[1]); 1983 v_src0.val[2] = vget_low_s16(v_src16.val[2]); 1984 1985 int32x4_t v_Y0 = vmlal_s16(vmlal_s16(vmull_s16(v_src0.val[0], v_c0), v_src0.val[1], v_c1), v_src0.val[2], v_c2); 1986 v_Y0 = vshrq_n_s32(vaddq_s32(v_Y0, v_delta2), yuv_shift); 1987 int32x4_t v_Cr0 = vmlaq_s32(v_delta, vsubq_s32(vmovl_s16(v_src0.val[bidx^2]), v_Y0), v_c3); 1988 v_Cr0 = vshrq_n_s32(vaddq_s32(v_Cr0, v_delta2), yuv_shift); 1989 int32x4_t v_Cb0 = vmlaq_s32(v_delta, vsubq_s32(vmovl_s16(v_src0.val[bidx]), v_Y0), v_c4); 1990 v_Cb0 = vshrq_n_s32(vaddq_s32(v_Cb0, v_delta2), yuv_shift); 1991 1992 v_src0.val[0] = vget_high_s16(v_src16.val[0]); 1993 v_src0.val[1] = vget_high_s16(v_src16.val[1]); 1994 v_src0.val[2] = vget_high_s16(v_src16.val[2]); 1995 1996 int32x4_t v_Y1 = vmlal_s16(vmlal_s16(vmull_s16(v_src0.val[0], v_c0), v_src0.val[1], v_c1), v_src0.val[2], v_c2); 1997 v_Y1 = vshrq_n_s32(vaddq_s32(v_Y1, v_delta2), yuv_shift); 1998 int32x4_t v_Cr1 = vmlaq_s32(v_delta, vsubq_s32(vmovl_s16(v_src0.val[bidx^2]), v_Y1), v_c3); 1999 v_Cr1 = vshrq_n_s32(vaddq_s32(v_Cr1, v_delta2), yuv_shift); 2000 int32x4_t v_Cb1 = vmlaq_s32(v_delta, vsubq_s32(vmovl_s16(v_src0.val[bidx]), v_Y1), v_c4); 2001 v_Cb1 = vshrq_n_s32(vaddq_s32(v_Cb1, v_delta2), yuv_shift); 2002 2003 v_dst.val[0] = vqmovun_s16(vcombine_s16(vqmovn_s32(v_Y0), vqmovn_s32(v_Y1))); 2004 v_dst.val[1] = vqmovun_s16(vcombine_s16(vqmovn_s32(v_Cr0), vqmovn_s32(v_Cr1))); 2005 v_dst.val[2] = vqmovun_s16(vcombine_s16(vqmovn_s32(v_Cb0), vqmovn_s32(v_Cb1))); 2006 2007 vst3_u8(dst + i, v_dst); 2008 } 2009 2010 for ( ; i < n; i += 3, src += scn) 2011 { 2012 int Y = CV_DESCALE(src[0]*C0 + src[1]*C1 + src[2]*C2, yuv_shift); 2013 int Cr = CV_DESCALE((src[bidx^2] - Y)*C3 + delta, yuv_shift); 2014 int Cb = CV_DESCALE((src[bidx] - Y)*C4 + delta, yuv_shift); 2015 dst[i] = saturate_cast<uchar>(Y); 2016 dst[i+1] = saturate_cast<uchar>(Cr); 2017 dst[i+2] = saturate_cast<uchar>(Cb); 2018 } 2019 } 2020 int srccn, blueIdx, coeffs[5]; 2021 int16x4_t v_c0, v_c1, v_c2; 2022 int32x4_t v_c3, v_c4, v_delta, v_delta2; 2023 }; 2024 2025 template <> 2026 struct RGB2YCrCb_i<ushort> 2027 { 2028 typedef ushort channel_type; 2029 2030 RGB2YCrCb_i(int _srccn, int _blueIdx, const int* _coeffs) 2031 : srccn(_srccn), blueIdx(_blueIdx) 2032 { 2033 static const int coeffs0[] = {R2Y, G2Y, B2Y, 11682, 9241}; 2034 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 5*sizeof(coeffs[0])); 2035 if (blueIdx==0) 2036 std::swap(coeffs[0], coeffs[2]); 2037 2038 v_c0 = vdupq_n_s32(coeffs[0]); 2039 v_c1 = vdupq_n_s32(coeffs[1]); 2040 v_c2 = vdupq_n_s32(coeffs[2]); 2041 v_c3 = vdupq_n_s32(coeffs[3]); 2042 v_c4 = vdupq_n_s32(coeffs[4]); 2043 v_delta = vdupq_n_s32(ColorChannel<ushort>::half()*(1 << yuv_shift)); 2044 v_delta2 = vdupq_n_s32(1 << (yuv_shift - 1)); 2045 } 2046 2047 void operator()(const ushort * src, ushort * dst, int n) const 2048 { 2049 int scn = srccn, bidx = blueIdx, i = 0; 2050 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4]; 2051 int delta = ColorChannel<ushort>::half()*(1 << yuv_shift); 2052 n *= 3; 2053 2054 for ( ; i <= n - 24; i += 24, src += scn * 8) 2055 { 2056 uint16x8x3_t v_src, v_dst; 2057 int32x4x3_t v_src0; 2058 2059 if (scn == 3) 2060 v_src = vld3q_u16(src); 2061 else 2062 { 2063 uint16x8x4_t v_src_ = vld4q_u16(src); 2064 v_src.val[0] = v_src_.val[0]; 2065 v_src.val[1] = v_src_.val[1]; 2066 v_src.val[2] = v_src_.val[2]; 2067 } 2068 2069 v_src0.val[0] = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(v_src.val[0]))); 2070 v_src0.val[1] = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(v_src.val[1]))); 2071 v_src0.val[2] = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(v_src.val[2]))); 2072 2073 int32x4_t v_Y0 = vmlaq_s32(vmlaq_s32(vmulq_s32(v_src0.val[0], v_c0), v_src0.val[1], v_c1), v_src0.val[2], v_c2); 2074 v_Y0 = vshrq_n_s32(vaddq_s32(v_Y0, v_delta2), yuv_shift); 2075 int32x4_t v_Cr0 = vmlaq_s32(v_delta, vsubq_s32(v_src0.val[bidx^2], v_Y0), v_c3); 2076 v_Cr0 = vshrq_n_s32(vaddq_s32(v_Cr0, v_delta2), yuv_shift); 2077 int32x4_t v_Cb0 = vmlaq_s32(v_delta, vsubq_s32(v_src0.val[bidx], v_Y0), v_c4); 2078 v_Cb0 = vshrq_n_s32(vaddq_s32(v_Cb0, v_delta2), yuv_shift); 2079 2080 v_src0.val[0] = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(v_src.val[0]))); 2081 v_src0.val[1] = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(v_src.val[1]))); 2082 v_src0.val[2] = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(v_src.val[2]))); 2083 2084 int32x4_t v_Y1 = vmlaq_s32(vmlaq_s32(vmulq_s32(v_src0.val[0], v_c0), v_src0.val[1], v_c1), v_src0.val[2], v_c2); 2085 v_Y1 = vshrq_n_s32(vaddq_s32(v_Y1, v_delta2), yuv_shift); 2086 int32x4_t v_Cr1 = vmlaq_s32(v_delta, vsubq_s32(v_src0.val[bidx^2], v_Y1), v_c3); 2087 v_Cr1 = vshrq_n_s32(vaddq_s32(v_Cr1, v_delta2), yuv_shift); 2088 int32x4_t v_Cb1 = vmlaq_s32(v_delta, vsubq_s32(v_src0.val[bidx], v_Y1), v_c4); 2089 v_Cb1 = vshrq_n_s32(vaddq_s32(v_Cb1, v_delta2), yuv_shift); 2090 2091 v_dst.val[0] = vcombine_u16(vqmovun_s32(v_Y0), vqmovun_s32(v_Y1)); 2092 v_dst.val[1] = vcombine_u16(vqmovun_s32(v_Cr0), vqmovun_s32(v_Cr1)); 2093 v_dst.val[2] = vcombine_u16(vqmovun_s32(v_Cb0), vqmovun_s32(v_Cb1)); 2094 2095 vst3q_u16(dst + i, v_dst); 2096 } 2097 2098 for ( ; i <= n - 12; i += 12, src += scn * 4) 2099 { 2100 uint16x4x3_t v_dst; 2101 int32x4x3_t v_src0; 2102 2103 if (scn == 3) 2104 { 2105 uint16x4x3_t v_src = vld3_u16(src); 2106 v_src0.val[0] = vreinterpretq_s32_u32(vmovl_u16(v_src.val[0])); 2107 v_src0.val[1] = vreinterpretq_s32_u32(vmovl_u16(v_src.val[1])); 2108 v_src0.val[2] = vreinterpretq_s32_u32(vmovl_u16(v_src.val[2])); 2109 } 2110 else 2111 { 2112 uint16x4x4_t v_src = vld4_u16(src); 2113 v_src0.val[0] = vreinterpretq_s32_u32(vmovl_u16(v_src.val[0])); 2114 v_src0.val[1] = vreinterpretq_s32_u32(vmovl_u16(v_src.val[1])); 2115 v_src0.val[2] = vreinterpretq_s32_u32(vmovl_u16(v_src.val[2])); 2116 } 2117 2118 int32x4_t v_Y = vmlaq_s32(vmlaq_s32(vmulq_s32(v_src0.val[0], v_c0), v_src0.val[1], v_c1), v_src0.val[2], v_c2); 2119 v_Y = vshrq_n_s32(vaddq_s32(v_Y, v_delta2), yuv_shift); 2120 int32x4_t v_Cr = vmlaq_s32(v_delta, vsubq_s32(v_src0.val[bidx^2], v_Y), v_c3); 2121 v_Cr = vshrq_n_s32(vaddq_s32(v_Cr, v_delta2), yuv_shift); 2122 int32x4_t v_Cb = vmlaq_s32(v_delta, vsubq_s32(v_src0.val[bidx], v_Y), v_c4); 2123 v_Cb = vshrq_n_s32(vaddq_s32(v_Cb, v_delta2), yuv_shift); 2124 2125 v_dst.val[0] = vqmovun_s32(v_Y); 2126 v_dst.val[1] = vqmovun_s32(v_Cr); 2127 v_dst.val[2] = vqmovun_s32(v_Cb); 2128 2129 vst3_u16(dst + i, v_dst); 2130 } 2131 2132 for ( ; i < n; i += 3, src += scn) 2133 { 2134 int Y = CV_DESCALE(src[0]*C0 + src[1]*C1 + src[2]*C2, yuv_shift); 2135 int Cr = CV_DESCALE((src[bidx^2] - Y)*C3 + delta, yuv_shift); 2136 int Cb = CV_DESCALE((src[bidx] - Y)*C4 + delta, yuv_shift); 2137 dst[i] = saturate_cast<ushort>(Y); 2138 dst[i+1] = saturate_cast<ushort>(Cr); 2139 dst[i+2] = saturate_cast<ushort>(Cb); 2140 } 2141 } 2142 int srccn, blueIdx, coeffs[5]; 2143 int32x4_t v_c0, v_c1, v_c2, v_c3, v_c4, v_delta, v_delta2; 2144 }; 2145 2146 #elif CV_SSE4_1 2147 2148 template <> 2149 struct RGB2YCrCb_i<uchar> 2150 { 2151 typedef uchar channel_type; 2152 2153 RGB2YCrCb_i(int _srccn, int _blueIdx, const int* _coeffs) 2154 : srccn(_srccn), blueIdx(_blueIdx) 2155 { 2156 static const int coeffs0[] = {R2Y, G2Y, B2Y, 11682, 9241}; 2157 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 5*sizeof(coeffs[0])); 2158 if (blueIdx==0) 2159 std::swap(coeffs[0], coeffs[2]); 2160 2161 v_c0 = _mm_set1_epi32(coeffs[0]); 2162 v_c1 = _mm_set1_epi32(coeffs[1]); 2163 v_c2 = _mm_set1_epi32(coeffs[2]); 2164 v_c3 = _mm_set1_epi32(coeffs[3]); 2165 v_c4 = _mm_set1_epi32(coeffs[4]); 2166 v_delta2 = _mm_set1_epi32(1 << (yuv_shift - 1)); 2167 v_delta = _mm_set1_epi32(ColorChannel<uchar>::half()*(1 << yuv_shift)); 2168 v_delta = _mm_add_epi32(v_delta, v_delta2); 2169 v_zero = _mm_setzero_si128(); 2170 2171 haveSIMD = checkHardwareSupport(CV_CPU_SSE4_1); 2172 } 2173 2174 // 16u x 8 2175 void process(__m128i v_r, __m128i v_g, __m128i v_b, 2176 __m128i & v_y, __m128i & v_cr, __m128i & v_cb) const 2177 { 2178 __m128i v_r_p = _mm_unpacklo_epi16(v_r, v_zero); 2179 __m128i v_g_p = _mm_unpacklo_epi16(v_g, v_zero); 2180 __m128i v_b_p = _mm_unpacklo_epi16(v_b, v_zero); 2181 2182 __m128i v_y0 = _mm_add_epi32(_mm_mullo_epi32(v_r_p, v_c0), 2183 _mm_add_epi32(_mm_mullo_epi32(v_g_p, v_c1), 2184 _mm_mullo_epi32(v_b_p, v_c2))); 2185 v_y0 = _mm_srli_epi32(_mm_add_epi32(v_delta2, v_y0), yuv_shift); 2186 2187 __m128i v_cr0 = _mm_mullo_epi32(_mm_sub_epi32(blueIdx == 2 ? v_r_p : v_b_p, v_y0), v_c3); 2188 __m128i v_cb0 = _mm_mullo_epi32(_mm_sub_epi32(blueIdx == 0 ? v_r_p : v_b_p, v_y0), v_c4); 2189 v_cr0 = _mm_srai_epi32(_mm_add_epi32(v_delta, v_cr0), yuv_shift); 2190 v_cb0 = _mm_srai_epi32(_mm_add_epi32(v_delta, v_cb0), yuv_shift); 2191 2192 v_r_p = _mm_unpackhi_epi16(v_r, v_zero); 2193 v_g_p = _mm_unpackhi_epi16(v_g, v_zero); 2194 v_b_p = _mm_unpackhi_epi16(v_b, v_zero); 2195 2196 __m128i v_y1 = _mm_add_epi32(_mm_mullo_epi32(v_r_p, v_c0), 2197 _mm_add_epi32(_mm_mullo_epi32(v_g_p, v_c1), 2198 _mm_mullo_epi32(v_b_p, v_c2))); 2199 v_y1 = _mm_srli_epi32(_mm_add_epi32(v_delta2, v_y1), yuv_shift); 2200 2201 __m128i v_cr1 = _mm_mullo_epi32(_mm_sub_epi32(blueIdx == 2 ? v_r_p : v_b_p, v_y1), v_c3); 2202 __m128i v_cb1 = _mm_mullo_epi32(_mm_sub_epi32(blueIdx == 0 ? v_r_p : v_b_p, v_y1), v_c4); 2203 v_cr1 = _mm_srai_epi32(_mm_add_epi32(v_delta, v_cr1), yuv_shift); 2204 v_cb1 = _mm_srai_epi32(_mm_add_epi32(v_delta, v_cb1), yuv_shift); 2205 2206 v_y = _mm_packs_epi32(v_y0, v_y1); 2207 v_cr = _mm_packs_epi32(v_cr0, v_cr1); 2208 v_cb = _mm_packs_epi32(v_cb0, v_cb1); 2209 } 2210 2211 void operator()(const uchar * src, uchar * dst, int n) const 2212 { 2213 int scn = srccn, bidx = blueIdx, i = 0; 2214 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4]; 2215 int delta = ColorChannel<uchar>::half()*(1 << yuv_shift); 2216 n *= 3; 2217 2218 if (haveSIMD) 2219 { 2220 for ( ; i <= n - 96; i += 96, src += scn * 32) 2221 { 2222 __m128i v_r0 = _mm_loadu_si128((__m128i const *)(src)); 2223 __m128i v_r1 = _mm_loadu_si128((__m128i const *)(src + 16)); 2224 __m128i v_g0 = _mm_loadu_si128((__m128i const *)(src + 32)); 2225 __m128i v_g1 = _mm_loadu_si128((__m128i const *)(src + 48)); 2226 __m128i v_b0 = _mm_loadu_si128((__m128i const *)(src + 64)); 2227 __m128i v_b1 = _mm_loadu_si128((__m128i const *)(src + 80)); 2228 2229 if (scn == 4) 2230 { 2231 __m128i v_a0 = _mm_loadu_si128((__m128i const *)(src + 96)); 2232 __m128i v_a1 = _mm_loadu_si128((__m128i const *)(src + 112)); 2233 _mm_deinterleave_epi8(v_r0, v_r1, v_g0, v_g1, 2234 v_b0, v_b1, v_a0, v_a1); 2235 } 2236 else 2237 _mm_deinterleave_epi8(v_r0, v_r1, v_g0, v_g1, v_b0, v_b1); 2238 2239 __m128i v_y0 = v_zero, v_cr0 = v_zero, v_cb0 = v_zero; 2240 process(_mm_unpacklo_epi8(v_r0, v_zero), 2241 _mm_unpacklo_epi8(v_g0, v_zero), 2242 _mm_unpacklo_epi8(v_b0, v_zero), 2243 v_y0, v_cr0, v_cb0); 2244 2245 __m128i v_y1 = v_zero, v_cr1 = v_zero, v_cb1 = v_zero; 2246 process(_mm_unpackhi_epi8(v_r0, v_zero), 2247 _mm_unpackhi_epi8(v_g0, v_zero), 2248 _mm_unpackhi_epi8(v_b0, v_zero), 2249 v_y1, v_cr1, v_cb1); 2250 2251 __m128i v_y_0 = _mm_packus_epi16(v_y0, v_y1); 2252 __m128i v_cr_0 = _mm_packus_epi16(v_cr0, v_cr1); 2253 __m128i v_cb_0 = _mm_packus_epi16(v_cb0, v_cb1); 2254 2255 process(_mm_unpacklo_epi8(v_r1, v_zero), 2256 _mm_unpacklo_epi8(v_g1, v_zero), 2257 _mm_unpacklo_epi8(v_b1, v_zero), 2258 v_y0, v_cr0, v_cb0); 2259 2260 process(_mm_unpackhi_epi8(v_r1, v_zero), 2261 _mm_unpackhi_epi8(v_g1, v_zero), 2262 _mm_unpackhi_epi8(v_b1, v_zero), 2263 v_y1, v_cr1, v_cb1); 2264 2265 __m128i v_y_1 = _mm_packus_epi16(v_y0, v_y1); 2266 __m128i v_cr_1 = _mm_packus_epi16(v_cr0, v_cr1); 2267 __m128i v_cb_1 = _mm_packus_epi16(v_cb0, v_cb1); 2268 2269 _mm_interleave_epi8(v_y_0, v_y_1, v_cr_0, v_cr_1, v_cb_0, v_cb_1); 2270 2271 _mm_storeu_si128((__m128i *)(dst + i), v_y_0); 2272 _mm_storeu_si128((__m128i *)(dst + i + 16), v_y_1); 2273 _mm_storeu_si128((__m128i *)(dst + i + 32), v_cr_0); 2274 _mm_storeu_si128((__m128i *)(dst + i + 48), v_cr_1); 2275 _mm_storeu_si128((__m128i *)(dst + i + 64), v_cb_0); 2276 _mm_storeu_si128((__m128i *)(dst + i + 80), v_cb_1); 2277 } 2278 } 2279 2280 for ( ; i < n; i += 3, src += scn) 2281 { 2282 int Y = CV_DESCALE(src[0]*C0 + src[1]*C1 + src[2]*C2, yuv_shift); 2283 int Cr = CV_DESCALE((src[bidx^2] - Y)*C3 + delta, yuv_shift); 2284 int Cb = CV_DESCALE((src[bidx] - Y)*C4 + delta, yuv_shift); 2285 dst[i] = saturate_cast<uchar>(Y); 2286 dst[i+1] = saturate_cast<uchar>(Cr); 2287 dst[i+2] = saturate_cast<uchar>(Cb); 2288 } 2289 } 2290 2291 int srccn, blueIdx, coeffs[5]; 2292 __m128i v_c0, v_c1, v_c2; 2293 __m128i v_c3, v_c4, v_delta, v_delta2; 2294 __m128i v_zero; 2295 bool haveSIMD; 2296 }; 2297 2298 template <> 2299 struct RGB2YCrCb_i<ushort> 2300 { 2301 typedef ushort channel_type; 2302 2303 RGB2YCrCb_i(int _srccn, int _blueIdx, const int* _coeffs) 2304 : srccn(_srccn), blueIdx(_blueIdx) 2305 { 2306 static const int coeffs0[] = {R2Y, G2Y, B2Y, 11682, 9241}; 2307 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 5*sizeof(coeffs[0])); 2308 if (blueIdx==0) 2309 std::swap(coeffs[0], coeffs[2]); 2310 2311 v_c0 = _mm_set1_epi32(coeffs[0]); 2312 v_c1 = _mm_set1_epi32(coeffs[1]); 2313 v_c2 = _mm_set1_epi32(coeffs[2]); 2314 v_c3 = _mm_set1_epi32(coeffs[3]); 2315 v_c4 = _mm_set1_epi32(coeffs[4]); 2316 v_delta2 = _mm_set1_epi32(1 << (yuv_shift - 1)); 2317 v_delta = _mm_set1_epi32(ColorChannel<ushort>::half()*(1 << yuv_shift)); 2318 v_delta = _mm_add_epi32(v_delta, v_delta2); 2319 v_zero = _mm_setzero_si128(); 2320 2321 haveSIMD = checkHardwareSupport(CV_CPU_SSE4_1); 2322 } 2323 2324 // 16u x 8 2325 void process(__m128i v_r, __m128i v_g, __m128i v_b, 2326 __m128i & v_y, __m128i & v_cr, __m128i & v_cb) const 2327 { 2328 __m128i v_r_p = _mm_unpacklo_epi16(v_r, v_zero); 2329 __m128i v_g_p = _mm_unpacklo_epi16(v_g, v_zero); 2330 __m128i v_b_p = _mm_unpacklo_epi16(v_b, v_zero); 2331 2332 __m128i v_y0 = _mm_add_epi32(_mm_mullo_epi32(v_r_p, v_c0), 2333 _mm_add_epi32(_mm_mullo_epi32(v_g_p, v_c1), 2334 _mm_mullo_epi32(v_b_p, v_c2))); 2335 v_y0 = _mm_srli_epi32(_mm_add_epi32(v_delta2, v_y0), yuv_shift); 2336 2337 __m128i v_cr0 = _mm_mullo_epi32(_mm_sub_epi32(blueIdx == 2 ? v_r_p : v_b_p, v_y0), v_c3); 2338 __m128i v_cb0 = _mm_mullo_epi32(_mm_sub_epi32(blueIdx == 0 ? v_r_p : v_b_p, v_y0), v_c4); 2339 v_cr0 = _mm_srai_epi32(_mm_add_epi32(v_delta, v_cr0), yuv_shift); 2340 v_cb0 = _mm_srai_epi32(_mm_add_epi32(v_delta, v_cb0), yuv_shift); 2341 2342 v_r_p = _mm_unpackhi_epi16(v_r, v_zero); 2343 v_g_p = _mm_unpackhi_epi16(v_g, v_zero); 2344 v_b_p = _mm_unpackhi_epi16(v_b, v_zero); 2345 2346 __m128i v_y1 = _mm_add_epi32(_mm_mullo_epi32(v_r_p, v_c0), 2347 _mm_add_epi32(_mm_mullo_epi32(v_g_p, v_c1), 2348 _mm_mullo_epi32(v_b_p, v_c2))); 2349 v_y1 = _mm_srli_epi32(_mm_add_epi32(v_delta2, v_y1), yuv_shift); 2350 2351 __m128i v_cr1 = _mm_mullo_epi32(_mm_sub_epi32(blueIdx == 2 ? v_r_p : v_b_p, v_y1), v_c3); 2352 __m128i v_cb1 = _mm_mullo_epi32(_mm_sub_epi32(blueIdx == 0 ? v_r_p : v_b_p, v_y1), v_c4); 2353 v_cr1 = _mm_srai_epi32(_mm_add_epi32(v_delta, v_cr1), yuv_shift); 2354 v_cb1 = _mm_srai_epi32(_mm_add_epi32(v_delta, v_cb1), yuv_shift); 2355 2356 v_y = _mm_packus_epi32(v_y0, v_y1); 2357 v_cr = _mm_packus_epi32(v_cr0, v_cr1); 2358 v_cb = _mm_packus_epi32(v_cb0, v_cb1); 2359 } 2360 2361 void operator()(const ushort * src, ushort * dst, int n) const 2362 { 2363 int scn = srccn, bidx = blueIdx, i = 0; 2364 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4]; 2365 int delta = ColorChannel<ushort>::half()*(1 << yuv_shift); 2366 n *= 3; 2367 2368 if (haveSIMD) 2369 { 2370 for ( ; i <= n - 48; i += 48, src += scn * 16) 2371 { 2372 __m128i v_r0 = _mm_loadu_si128((__m128i const *)(src)); 2373 __m128i v_r1 = _mm_loadu_si128((__m128i const *)(src + 8)); 2374 __m128i v_g0 = _mm_loadu_si128((__m128i const *)(src + 16)); 2375 __m128i v_g1 = _mm_loadu_si128((__m128i const *)(src + 24)); 2376 __m128i v_b0 = _mm_loadu_si128((__m128i const *)(src + 32)); 2377 __m128i v_b1 = _mm_loadu_si128((__m128i const *)(src + 40)); 2378 2379 if (scn == 4) 2380 { 2381 __m128i v_a0 = _mm_loadu_si128((__m128i const *)(src + 48)); 2382 __m128i v_a1 = _mm_loadu_si128((__m128i const *)(src + 56)); 2383 2384 _mm_deinterleave_epi16(v_r0, v_r1, v_g0, v_g1, 2385 v_b0, v_b1, v_a0, v_a1); 2386 } 2387 else 2388 _mm_deinterleave_epi16(v_r0, v_r1, v_g0, v_g1, v_b0, v_b1); 2389 2390 __m128i v_y0 = v_zero, v_cr0 = v_zero, v_cb0 = v_zero; 2391 process(v_r0, v_g0, v_b0, 2392 v_y0, v_cr0, v_cb0); 2393 2394 __m128i v_y1 = v_zero, v_cr1 = v_zero, v_cb1 = v_zero; 2395 process(v_r1, v_g1, v_b1, 2396 v_y1, v_cr1, v_cb1); 2397 2398 _mm_interleave_epi16(v_y0, v_y1, v_cr0, v_cr1, v_cb0, v_cb1); 2399 2400 _mm_storeu_si128((__m128i *)(dst + i), v_y0); 2401 _mm_storeu_si128((__m128i *)(dst + i + 8), v_y1); 2402 _mm_storeu_si128((__m128i *)(dst + i + 16), v_cr0); 2403 _mm_storeu_si128((__m128i *)(dst + i + 24), v_cr1); 2404 _mm_storeu_si128((__m128i *)(dst + i + 32), v_cb0); 2405 _mm_storeu_si128((__m128i *)(dst + i + 40), v_cb1); 2406 } 2407 } 2408 2409 for ( ; i < n; i += 3, src += scn) 2410 { 2411 int Y = CV_DESCALE(src[0]*C0 + src[1]*C1 + src[2]*C2, yuv_shift); 2412 int Cr = CV_DESCALE((src[bidx^2] - Y)*C3 + delta, yuv_shift); 2413 int Cb = CV_DESCALE((src[bidx] - Y)*C4 + delta, yuv_shift); 2414 dst[i] = saturate_cast<ushort>(Y); 2415 dst[i+1] = saturate_cast<ushort>(Cr); 2416 dst[i+2] = saturate_cast<ushort>(Cb); 2417 } 2418 } 2419 2420 int srccn, blueIdx, coeffs[5]; 2421 __m128i v_c0, v_c1, v_c2; 2422 __m128i v_c3, v_c4, v_delta, v_delta2; 2423 __m128i v_zero; 2424 bool haveSIMD; 2425 }; 2426 2427 #endif // CV_SSE4_1 2428 2429 template<typename _Tp> struct YCrCb2RGB_f 2430 { 2431 typedef _Tp channel_type; 2432 2433 YCrCb2RGB_f(int _dstcn, int _blueIdx, const float* _coeffs) 2434 : dstcn(_dstcn), blueIdx(_blueIdx) 2435 { 2436 static const float coeffs0[] = {1.403f, -0.714f, -0.344f, 1.773f}; 2437 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 4*sizeof(coeffs[0])); 2438 } 2439 void operator()(const _Tp* src, _Tp* dst, int n) const 2440 { 2441 int dcn = dstcn, bidx = blueIdx; 2442 const _Tp delta = ColorChannel<_Tp>::half(), alpha = ColorChannel<_Tp>::max(); 2443 float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3]; 2444 n *= 3; 2445 for(int i = 0; i < n; i += 3, dst += dcn) 2446 { 2447 _Tp Y = src[i]; 2448 _Tp Cr = src[i+1]; 2449 _Tp Cb = src[i+2]; 2450 2451 _Tp b = saturate_cast<_Tp>(Y + (Cb - delta)*C3); 2452 _Tp g = saturate_cast<_Tp>(Y + (Cb - delta)*C2 + (Cr - delta)*C1); 2453 _Tp r = saturate_cast<_Tp>(Y + (Cr - delta)*C0); 2454 2455 dst[bidx] = b; dst[1] = g; dst[bidx^2] = r; 2456 if( dcn == 4 ) 2457 dst[3] = alpha; 2458 } 2459 } 2460 int dstcn, blueIdx; 2461 float coeffs[4]; 2462 }; 2463 2464 #if CV_NEON 2465 2466 template <> 2467 struct YCrCb2RGB_f<float> 2468 { 2469 typedef float channel_type; 2470 2471 YCrCb2RGB_f(int _dstcn, int _blueIdx, const float* _coeffs) 2472 : dstcn(_dstcn), blueIdx(_blueIdx) 2473 { 2474 static const float coeffs0[] = {1.403f, -0.714f, -0.344f, 1.773f}; 2475 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 4*sizeof(coeffs[0])); 2476 2477 v_c0 = vdupq_n_f32(coeffs[0]); 2478 v_c1 = vdupq_n_f32(coeffs[1]); 2479 v_c2 = vdupq_n_f32(coeffs[2]); 2480 v_c3 = vdupq_n_f32(coeffs[3]); 2481 v_delta = vdupq_n_f32(ColorChannel<float>::half()); 2482 v_alpha = vdupq_n_f32(ColorChannel<float>::max()); 2483 } 2484 2485 void operator()(const float* src, float* dst, int n) const 2486 { 2487 int dcn = dstcn, bidx = blueIdx, i = 0; 2488 const float delta = ColorChannel<float>::half(), alpha = ColorChannel<float>::max(); 2489 float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3]; 2490 n *= 3; 2491 2492 if (dcn == 3) 2493 for ( ; i <= n - 12; i += 12, dst += 12) 2494 { 2495 float32x4x3_t v_src = vld3q_f32(src + i), v_dst; 2496 float32x4_t v_Y = v_src.val[0], v_Cr = v_src.val[1], v_Cb = v_src.val[2]; 2497 2498 v_dst.val[bidx] = vmlaq_f32(v_Y, vsubq_f32(v_Cb, v_delta), v_c3); 2499 v_dst.val[1] = vaddq_f32(vmlaq_f32(vmulq_f32(vsubq_f32(v_Cb, v_delta), v_c2), vsubq_f32(v_Cr, v_delta), v_c1), v_Y); 2500 v_dst.val[bidx^2] = vmlaq_f32(v_Y, vsubq_f32(v_Cr, v_delta), v_c0); 2501 2502 vst3q_f32(dst, v_dst); 2503 } 2504 else 2505 for ( ; i <= n - 12; i += 12, dst += 16) 2506 { 2507 float32x4x3_t v_src = vld3q_f32(src + i); 2508 float32x4x4_t v_dst; 2509 float32x4_t v_Y = v_src.val[0], v_Cr = v_src.val[1], v_Cb = v_src.val[2]; 2510 2511 v_dst.val[bidx] = vmlaq_f32(v_Y, vsubq_f32(v_Cb, v_delta), v_c3); 2512 v_dst.val[1] = vaddq_f32(vmlaq_f32(vmulq_f32(vsubq_f32(v_Cb, v_delta), v_c2), vsubq_f32(v_Cr, v_delta), v_c1), v_Y); 2513 v_dst.val[bidx^2] = vmlaq_f32(v_Y, vsubq_f32(v_Cr, v_delta), v_c0); 2514 v_dst.val[3] = v_alpha; 2515 2516 vst4q_f32(dst, v_dst); 2517 } 2518 2519 for ( ; i < n; i += 3, dst += dcn) 2520 { 2521 float Y = src[i], Cr = src[i+1], Cb = src[i+2]; 2522 2523 float b = Y + (Cb - delta)*C3; 2524 float g = Y + (Cb - delta)*C2 + (Cr - delta)*C1; 2525 float r = Y + (Cr - delta)*C0; 2526 2527 dst[bidx] = b; dst[1] = g; dst[bidx^2] = r; 2528 if( dcn == 4 ) 2529 dst[3] = alpha; 2530 } 2531 } 2532 int dstcn, blueIdx; 2533 float coeffs[4]; 2534 float32x4_t v_c0, v_c1, v_c2, v_c3, v_alpha, v_delta; 2535 }; 2536 2537 #elif CV_SSE2 2538 2539 template <> 2540 struct YCrCb2RGB_f<float> 2541 { 2542 typedef float channel_type; 2543 2544 YCrCb2RGB_f(int _dstcn, int _blueIdx, const float* _coeffs) 2545 : dstcn(_dstcn), blueIdx(_blueIdx) 2546 { 2547 static const float coeffs0[] = {1.403f, -0.714f, -0.344f, 1.773f}; 2548 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 4*sizeof(coeffs[0])); 2549 2550 v_c0 = _mm_set1_ps(coeffs[0]); 2551 v_c1 = _mm_set1_ps(coeffs[1]); 2552 v_c2 = _mm_set1_ps(coeffs[2]); 2553 v_c3 = _mm_set1_ps(coeffs[3]); 2554 v_delta = _mm_set1_ps(ColorChannel<float>::half()); 2555 v_alpha = _mm_set1_ps(ColorChannel<float>::max()); 2556 2557 haveSIMD = checkHardwareSupport(CV_CPU_SSE2); 2558 } 2559 2560 void process(__m128 v_y, __m128 v_cr, __m128 v_cb, 2561 __m128 & v_r, __m128 & v_g, __m128 & v_b) const 2562 { 2563 v_cb = _mm_sub_ps(v_cb, v_delta); 2564 v_cr = _mm_sub_ps(v_cr, v_delta); 2565 2566 v_b = _mm_mul_ps(v_cb, v_c3); 2567 v_g = _mm_add_ps(_mm_mul_ps(v_cb, v_c2), _mm_mul_ps(v_cr, v_c1)); 2568 v_r = _mm_mul_ps(v_cr, v_c0); 2569 2570 v_b = _mm_add_ps(v_b, v_y); 2571 v_g = _mm_add_ps(v_g, v_y); 2572 v_r = _mm_add_ps(v_r, v_y); 2573 2574 if (blueIdx == 0) 2575 std::swap(v_b, v_r); 2576 } 2577 2578 void operator()(const float* src, float* dst, int n) const 2579 { 2580 int dcn = dstcn, bidx = blueIdx, i = 0; 2581 const float delta = ColorChannel<float>::half(), alpha = ColorChannel<float>::max(); 2582 float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3]; 2583 n *= 3; 2584 2585 if (haveSIMD) 2586 { 2587 for ( ; i <= n - 24; i += 24, dst += 8 * dcn) 2588 { 2589 __m128 v_y0 = _mm_loadu_ps(src + i); 2590 __m128 v_y1 = _mm_loadu_ps(src + i + 4); 2591 __m128 v_cr0 = _mm_loadu_ps(src + i + 8); 2592 __m128 v_cr1 = _mm_loadu_ps(src + i + 12); 2593 __m128 v_cb0 = _mm_loadu_ps(src + i + 16); 2594 __m128 v_cb1 = _mm_loadu_ps(src + i + 20); 2595 2596 _mm_deinterleave_ps(v_y0, v_y1, v_cr0, v_cr1, v_cb0, v_cb1); 2597 2598 __m128 v_r0, v_g0, v_b0; 2599 process(v_y0, v_cr0, v_cb0, 2600 v_r0, v_g0, v_b0); 2601 2602 __m128 v_r1, v_g1, v_b1; 2603 process(v_y1, v_cr1, v_cb1, 2604 v_r1, v_g1, v_b1); 2605 2606 __m128 v_a0 = v_alpha, v_a1 = v_alpha; 2607 2608 if (dcn == 3) 2609 _mm_interleave_ps(v_r0, v_r1, v_g0, v_g1, v_b0, v_b1); 2610 else 2611 _mm_interleave_ps(v_r0, v_r1, v_g0, v_g1, 2612 v_b0, v_b1, v_a0, v_a1); 2613 2614 _mm_storeu_ps(dst, v_r0); 2615 _mm_storeu_ps(dst + 4, v_r1); 2616 _mm_storeu_ps(dst + 8, v_g0); 2617 _mm_storeu_ps(dst + 12, v_g1); 2618 _mm_storeu_ps(dst + 16, v_b0); 2619 _mm_storeu_ps(dst + 20, v_b1); 2620 2621 if (dcn == 4) 2622 { 2623 _mm_storeu_ps(dst + 24, v_a0); 2624 _mm_storeu_ps(dst + 28, v_a1); 2625 } 2626 } 2627 } 2628 2629 for ( ; i < n; i += 3, dst += dcn) 2630 { 2631 float Y = src[i], Cr = src[i+1], Cb = src[i+2]; 2632 2633 float b = Y + (Cb - delta)*C3; 2634 float g = Y + (Cb - delta)*C2 + (Cr - delta)*C1; 2635 float r = Y + (Cr - delta)*C0; 2636 2637 dst[bidx] = b; dst[1] = g; dst[bidx^2] = r; 2638 if( dcn == 4 ) 2639 dst[3] = alpha; 2640 } 2641 } 2642 int dstcn, blueIdx; 2643 float coeffs[4]; 2644 2645 __m128 v_c0, v_c1, v_c2, v_c3, v_alpha, v_delta; 2646 bool haveSIMD; 2647 }; 2648 2649 #endif 2650 2651 template<typename _Tp> struct YCrCb2RGB_i 2652 { 2653 typedef _Tp channel_type; 2654 2655 YCrCb2RGB_i(int _dstcn, int _blueIdx, const int* _coeffs) 2656 : dstcn(_dstcn), blueIdx(_blueIdx) 2657 { 2658 static const int coeffs0[] = {22987, -11698, -5636, 29049}; 2659 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 4*sizeof(coeffs[0])); 2660 } 2661 2662 void operator()(const _Tp* src, _Tp* dst, int n) const 2663 { 2664 int dcn = dstcn, bidx = blueIdx; 2665 const _Tp delta = ColorChannel<_Tp>::half(), alpha = ColorChannel<_Tp>::max(); 2666 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3]; 2667 n *= 3; 2668 for(int i = 0; i < n; i += 3, dst += dcn) 2669 { 2670 _Tp Y = src[i]; 2671 _Tp Cr = src[i+1]; 2672 _Tp Cb = src[i+2]; 2673 2674 int b = Y + CV_DESCALE((Cb - delta)*C3, yuv_shift); 2675 int g = Y + CV_DESCALE((Cb - delta)*C2 + (Cr - delta)*C1, yuv_shift); 2676 int r = Y + CV_DESCALE((Cr - delta)*C0, yuv_shift); 2677 2678 dst[bidx] = saturate_cast<_Tp>(b); 2679 dst[1] = saturate_cast<_Tp>(g); 2680 dst[bidx^2] = saturate_cast<_Tp>(r); 2681 if( dcn == 4 ) 2682 dst[3] = alpha; 2683 } 2684 } 2685 int dstcn, blueIdx; 2686 int coeffs[4]; 2687 }; 2688 2689 #if CV_NEON 2690 2691 template <> 2692 struct YCrCb2RGB_i<uchar> 2693 { 2694 typedef uchar channel_type; 2695 2696 YCrCb2RGB_i(int _dstcn, int _blueIdx, const int* _coeffs) 2697 : dstcn(_dstcn), blueIdx(_blueIdx) 2698 { 2699 static const int coeffs0[] = {22987, -11698, -5636, 29049}; 2700 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 4*sizeof(coeffs[0])); 2701 2702 v_c0 = vdupq_n_s32(coeffs[0]); 2703 v_c1 = vdupq_n_s32(coeffs[1]); 2704 v_c2 = vdupq_n_s32(coeffs[2]); 2705 v_c3 = vdupq_n_s32(coeffs[3]); 2706 v_delta = vdup_n_s16(ColorChannel<uchar>::half()); 2707 v_delta2 = vdupq_n_s32(1 << (yuv_shift - 1)); 2708 v_alpha = vdup_n_u8(ColorChannel<uchar>::max()); 2709 } 2710 2711 void operator()(const uchar* src, uchar* dst, int n) const 2712 { 2713 int dcn = dstcn, bidx = blueIdx, i = 0; 2714 const uchar delta = ColorChannel<uchar>::half(), alpha = ColorChannel<uchar>::max(); 2715 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3]; 2716 n *= 3; 2717 2718 for ( ; i <= n - 24; i += 24, dst += dcn * 8) 2719 { 2720 uint8x8x3_t v_src = vld3_u8(src + i); 2721 int16x8x3_t v_src16; 2722 v_src16.val[0] = vreinterpretq_s16_u16(vmovl_u8(v_src.val[0])); 2723 v_src16.val[1] = vreinterpretq_s16_u16(vmovl_u8(v_src.val[1])); 2724 v_src16.val[2] = vreinterpretq_s16_u16(vmovl_u8(v_src.val[2])); 2725 2726 int16x4_t v_Y = vget_low_s16(v_src16.val[0]), 2727 v_Cr = vget_low_s16(v_src16.val[1]), 2728 v_Cb = vget_low_s16(v_src16.val[2]); 2729 2730 int32x4_t v_b0 = vmulq_s32(v_c3, vsubl_s16(v_Cb, v_delta)); 2731 v_b0 = vaddw_s16(vshrq_n_s32(vaddq_s32(v_b0, v_delta2), yuv_shift), v_Y); 2732 int32x4_t v_g0 = vmlaq_s32(vmulq_s32(vsubl_s16(v_Cr, v_delta), v_c1), vsubl_s16(v_Cb, v_delta), v_c2); 2733 v_g0 = vaddw_s16(vshrq_n_s32(vaddq_s32(v_g0, v_delta2), yuv_shift), v_Y); 2734 int32x4_t v_r0 = vmulq_s32(v_c0, vsubl_s16(v_Cr, v_delta)); 2735 v_r0 = vaddw_s16(vshrq_n_s32(vaddq_s32(v_r0, v_delta2), yuv_shift), v_Y); 2736 2737 v_Y = vget_high_s16(v_src16.val[0]); 2738 v_Cr = vget_high_s16(v_src16.val[1]); 2739 v_Cb = vget_high_s16(v_src16.val[2]); 2740 2741 int32x4_t v_b1 = vmulq_s32(v_c3, vsubl_s16(v_Cb, v_delta)); 2742 v_b1 = vaddw_s16(vshrq_n_s32(vaddq_s32(v_b1, v_delta2), yuv_shift), v_Y); 2743 int32x4_t v_g1 = vmlaq_s32(vmulq_s32(vsubl_s16(v_Cr, v_delta), v_c1), vsubl_s16(v_Cb, v_delta), v_c2); 2744 v_g1 = vaddw_s16(vshrq_n_s32(vaddq_s32(v_g1, v_delta2), yuv_shift), v_Y); 2745 int32x4_t v_r1 = vmulq_s32(v_c0, vsubl_s16(v_Cr, v_delta)); 2746 v_r1 = vaddw_s16(vshrq_n_s32(vaddq_s32(v_r1, v_delta2), yuv_shift), v_Y); 2747 2748 uint8x8_t v_b = vqmovun_s16(vcombine_s16(vmovn_s32(v_b0), vmovn_s32(v_b1))); 2749 uint8x8_t v_g = vqmovun_s16(vcombine_s16(vmovn_s32(v_g0), vmovn_s32(v_g1))); 2750 uint8x8_t v_r = vqmovun_s16(vcombine_s16(vmovn_s32(v_r0), vmovn_s32(v_r1))); 2751 2752 if (dcn == 3) 2753 { 2754 uint8x8x3_t v_dst; 2755 v_dst.val[bidx] = v_b; 2756 v_dst.val[1] = v_g; 2757 v_dst.val[bidx^2] = v_r; 2758 vst3_u8(dst, v_dst); 2759 } 2760 else 2761 { 2762 uint8x8x4_t v_dst; 2763 v_dst.val[bidx] = v_b; 2764 v_dst.val[1] = v_g; 2765 v_dst.val[bidx^2] = v_r; 2766 v_dst.val[3] = v_alpha; 2767 vst4_u8(dst, v_dst); 2768 } 2769 } 2770 2771 for ( ; i < n; i += 3, dst += dcn) 2772 { 2773 uchar Y = src[i]; 2774 uchar Cr = src[i+1]; 2775 uchar Cb = src[i+2]; 2776 2777 int b = Y + CV_DESCALE((Cb - delta)*C3, yuv_shift); 2778 int g = Y + CV_DESCALE((Cb - delta)*C2 + (Cr - delta)*C1, yuv_shift); 2779 int r = Y + CV_DESCALE((Cr - delta)*C0, yuv_shift); 2780 2781 dst[bidx] = saturate_cast<uchar>(b); 2782 dst[1] = saturate_cast<uchar>(g); 2783 dst[bidx^2] = saturate_cast<uchar>(r); 2784 if( dcn == 4 ) 2785 dst[3] = alpha; 2786 } 2787 } 2788 int dstcn, blueIdx; 2789 int coeffs[4]; 2790 2791 int32x4_t v_c0, v_c1, v_c2, v_c3, v_delta2; 2792 int16x4_t v_delta; 2793 uint8x8_t v_alpha; 2794 }; 2795 2796 template <> 2797 struct YCrCb2RGB_i<ushort> 2798 { 2799 typedef ushort channel_type; 2800 2801 YCrCb2RGB_i(int _dstcn, int _blueIdx, const int* _coeffs) 2802 : dstcn(_dstcn), blueIdx(_blueIdx) 2803 { 2804 static const int coeffs0[] = {22987, -11698, -5636, 29049}; 2805 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 4*sizeof(coeffs[0])); 2806 2807 v_c0 = vdupq_n_s32(coeffs[0]); 2808 v_c1 = vdupq_n_s32(coeffs[1]); 2809 v_c2 = vdupq_n_s32(coeffs[2]); 2810 v_c3 = vdupq_n_s32(coeffs[3]); 2811 v_delta = vdupq_n_s32(ColorChannel<ushort>::half()); 2812 v_delta2 = vdupq_n_s32(1 << (yuv_shift - 1)); 2813 v_alpha = vdupq_n_u16(ColorChannel<ushort>::max()); 2814 v_alpha2 = vget_low_u16(v_alpha); 2815 } 2816 2817 void operator()(const ushort* src, ushort* dst, int n) const 2818 { 2819 int dcn = dstcn, bidx = blueIdx, i = 0; 2820 const ushort delta = ColorChannel<ushort>::half(), alpha = ColorChannel<ushort>::max(); 2821 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3]; 2822 n *= 3; 2823 2824 for ( ; i <= n - 24; i += 24, dst += dcn * 8) 2825 { 2826 uint16x8x3_t v_src = vld3q_u16(src + i); 2827 2828 int32x4_t v_Y = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(v_src.val[0]))), 2829 v_Cr = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(v_src.val[1]))), 2830 v_Cb = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(v_src.val[2]))); 2831 2832 int32x4_t v_b0 = vmulq_s32(v_c3, vsubq_s32(v_Cb, v_delta)); 2833 v_b0 = vaddq_s32(vshrq_n_s32(vaddq_s32(v_b0, v_delta2), yuv_shift), v_Y); 2834 int32x4_t v_g0 = vmlaq_s32(vmulq_s32(vsubq_s32(v_Cr, v_delta), v_c1), vsubq_s32(v_Cb, v_delta), v_c2); 2835 v_g0 = vaddq_s32(vshrq_n_s32(vaddq_s32(v_g0, v_delta2), yuv_shift), v_Y); 2836 int32x4_t v_r0 = vmulq_s32(v_c0, vsubq_s32(v_Cr, v_delta)); 2837 v_r0 = vaddq_s32(vshrq_n_s32(vaddq_s32(v_r0, v_delta2), yuv_shift), v_Y); 2838 2839 v_Y = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(v_src.val[0]))), 2840 v_Cr = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(v_src.val[1]))), 2841 v_Cb = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(v_src.val[2]))); 2842 2843 int32x4_t v_b1 = vmulq_s32(v_c3, vsubq_s32(v_Cb, v_delta)); 2844 v_b1 = vaddq_s32(vshrq_n_s32(vaddq_s32(v_b1, v_delta2), yuv_shift), v_Y); 2845 int32x4_t v_g1 = vmlaq_s32(vmulq_s32(vsubq_s32(v_Cr, v_delta), v_c1), vsubq_s32(v_Cb, v_delta), v_c2); 2846 v_g1 = vaddq_s32(vshrq_n_s32(vaddq_s32(v_g1, v_delta2), yuv_shift), v_Y); 2847 int32x4_t v_r1 = vmulq_s32(v_c0, vsubq_s32(v_Cr, v_delta)); 2848 v_r1 = vaddq_s32(vshrq_n_s32(vaddq_s32(v_r1, v_delta2), yuv_shift), v_Y); 2849 2850 uint16x8_t v_b = vcombine_u16(vqmovun_s32(v_b0), vqmovun_s32(v_b1)); 2851 uint16x8_t v_g = vcombine_u16(vqmovun_s32(v_g0), vqmovun_s32(v_g1)); 2852 uint16x8_t v_r = vcombine_u16(vqmovun_s32(v_r0), vqmovun_s32(v_r1)); 2853 2854 if (dcn == 3) 2855 { 2856 uint16x8x3_t v_dst; 2857 v_dst.val[bidx] = v_b; 2858 v_dst.val[1] = v_g; 2859 v_dst.val[bidx^2] = v_r; 2860 vst3q_u16(dst, v_dst); 2861 } 2862 else 2863 { 2864 uint16x8x4_t v_dst; 2865 v_dst.val[bidx] = v_b; 2866 v_dst.val[1] = v_g; 2867 v_dst.val[bidx^2] = v_r; 2868 v_dst.val[3] = v_alpha; 2869 vst4q_u16(dst, v_dst); 2870 } 2871 } 2872 2873 for ( ; i <= n - 12; i += 12, dst += dcn * 4) 2874 { 2875 uint16x4x3_t v_src = vld3_u16(src + i); 2876 2877 int32x4_t v_Y = vreinterpretq_s32_u32(vmovl_u16(v_src.val[0])), 2878 v_Cr = vreinterpretq_s32_u32(vmovl_u16(v_src.val[1])), 2879 v_Cb = vreinterpretq_s32_u32(vmovl_u16(v_src.val[2])); 2880 2881 int32x4_t v_b = vmulq_s32(v_c3, vsubq_s32(v_Cb, v_delta)); 2882 v_b = vaddq_s32(vshrq_n_s32(vaddq_s32(v_b, v_delta2), yuv_shift), v_Y); 2883 int32x4_t v_g = vmlaq_s32(vmulq_s32(vsubq_s32(v_Cr, v_delta), v_c1), vsubq_s32(v_Cb, v_delta), v_c2); 2884 v_g = vaddq_s32(vshrq_n_s32(vaddq_s32(v_g, v_delta2), yuv_shift), v_Y); 2885 int32x4_t v_r = vmulq_s32(vsubq_s32(v_Cr, v_delta), v_c0); 2886 v_r = vaddq_s32(vshrq_n_s32(vaddq_s32(v_r, v_delta2), yuv_shift), v_Y); 2887 2888 uint16x4_t v_bd = vqmovun_s32(v_b); 2889 uint16x4_t v_gd = vqmovun_s32(v_g); 2890 uint16x4_t v_rd = vqmovun_s32(v_r); 2891 2892 if (dcn == 3) 2893 { 2894 uint16x4x3_t v_dst; 2895 v_dst.val[bidx] = v_bd; 2896 v_dst.val[1] = v_gd; 2897 v_dst.val[bidx^2] = v_rd; 2898 vst3_u16(dst, v_dst); 2899 } 2900 else 2901 { 2902 uint16x4x4_t v_dst; 2903 v_dst.val[bidx] = v_bd; 2904 v_dst.val[1] = v_gd; 2905 v_dst.val[bidx^2] = v_rd; 2906 v_dst.val[3] = v_alpha2; 2907 vst4_u16(dst, v_dst); 2908 } 2909 } 2910 2911 for ( ; i < n; i += 3, dst += dcn) 2912 { 2913 ushort Y = src[i]; 2914 ushort Cr = src[i+1]; 2915 ushort Cb = src[i+2]; 2916 2917 int b = Y + CV_DESCALE((Cb - delta)*C3, yuv_shift); 2918 int g = Y + CV_DESCALE((Cb - delta)*C2 + (Cr - delta)*C1, yuv_shift); 2919 int r = Y + CV_DESCALE((Cr - delta)*C0, yuv_shift); 2920 2921 dst[bidx] = saturate_cast<ushort>(b); 2922 dst[1] = saturate_cast<ushort>(g); 2923 dst[bidx^2] = saturate_cast<ushort>(r); 2924 if( dcn == 4 ) 2925 dst[3] = alpha; 2926 } 2927 } 2928 int dstcn, blueIdx; 2929 int coeffs[4]; 2930 2931 int32x4_t v_c0, v_c1, v_c2, v_c3, v_delta2, v_delta; 2932 uint16x8_t v_alpha; 2933 uint16x4_t v_alpha2; 2934 }; 2935 2936 #elif CV_SSE2 2937 2938 template <> 2939 struct YCrCb2RGB_i<uchar> 2940 { 2941 typedef uchar channel_type; 2942 2943 YCrCb2RGB_i(int _dstcn, int _blueIdx, const int* _coeffs) 2944 : dstcn(_dstcn), blueIdx(_blueIdx) 2945 { 2946 static const int coeffs0[] = {22987, -11698, -5636, 29049}; 2947 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 4*sizeof(coeffs[0])); 2948 2949 v_c0 = _mm_set1_epi16((short)coeffs[0]); 2950 v_c1 = _mm_set1_epi16((short)coeffs[1]); 2951 v_c2 = _mm_set1_epi16((short)coeffs[2]); 2952 v_c3 = _mm_set1_epi16((short)coeffs[3]); 2953 v_delta = _mm_set1_epi16(ColorChannel<uchar>::half()); 2954 v_delta2 = _mm_set1_epi32(1 << (yuv_shift - 1)); 2955 v_zero = _mm_setzero_si128(); 2956 2957 uchar alpha = ColorChannel<uchar>::max(); 2958 v_alpha = _mm_set1_epi8(*(char *)&alpha); 2959 2960 useSSE = coeffs[0] <= std::numeric_limits<short>::max(); 2961 haveSIMD = checkHardwareSupport(CV_CPU_SSE2); 2962 } 2963 2964 // 16s x 8 2965 void process(__m128i v_y, __m128i v_cr, __m128i v_cb, 2966 __m128i & v_r, __m128i & v_g, __m128i & v_b) const 2967 { 2968 v_cr = _mm_sub_epi16(v_cr, v_delta); 2969 v_cb = _mm_sub_epi16(v_cb, v_delta); 2970 2971 __m128i v_y_p = _mm_unpacklo_epi16(v_y, v_zero); 2972 2973 __m128i v_mullo_3 = _mm_mullo_epi16(v_cb, v_c3); 2974 __m128i v_mullo_2 = _mm_mullo_epi16(v_cb, v_c2); 2975 __m128i v_mullo_1 = _mm_mullo_epi16(v_cr, v_c1); 2976 __m128i v_mullo_0 = _mm_mullo_epi16(v_cr, v_c0); 2977 2978 __m128i v_mulhi_3 = _mm_mulhi_epi16(v_cb, v_c3); 2979 __m128i v_mulhi_2 = _mm_mulhi_epi16(v_cb, v_c2); 2980 __m128i v_mulhi_1 = _mm_mulhi_epi16(v_cr, v_c1); 2981 __m128i v_mulhi_0 = _mm_mulhi_epi16(v_cr, v_c0); 2982 2983 __m128i v_b0 = _mm_srai_epi32(_mm_add_epi32(_mm_unpacklo_epi16(v_mullo_3, v_mulhi_3), v_delta2), yuv_shift); 2984 __m128i v_g0 = _mm_srai_epi32(_mm_add_epi32(_mm_add_epi32(_mm_unpacklo_epi16(v_mullo_2, v_mulhi_2), 2985 _mm_unpacklo_epi16(v_mullo_1, v_mulhi_1)), v_delta2), 2986 yuv_shift); 2987 __m128i v_r0 = _mm_srai_epi32(_mm_add_epi32(_mm_unpacklo_epi16(v_mullo_0, v_mulhi_0), v_delta2), yuv_shift); 2988 2989 v_r0 = _mm_add_epi32(v_r0, v_y_p); 2990 v_g0 = _mm_add_epi32(v_g0, v_y_p); 2991 v_b0 = _mm_add_epi32(v_b0, v_y_p); 2992 2993 v_y_p = _mm_unpackhi_epi16(v_y, v_zero); 2994 2995 __m128i v_b1 = _mm_srai_epi32(_mm_add_epi32(_mm_unpackhi_epi16(v_mullo_3, v_mulhi_3), v_delta2), yuv_shift); 2996 __m128i v_g1 = _mm_srai_epi32(_mm_add_epi32(_mm_add_epi32(_mm_unpackhi_epi16(v_mullo_2, v_mulhi_2), 2997 _mm_unpackhi_epi16(v_mullo_1, v_mulhi_1)), v_delta2), 2998 yuv_shift); 2999 __m128i v_r1 = _mm_srai_epi32(_mm_add_epi32(_mm_unpackhi_epi16(v_mullo_0, v_mulhi_0), v_delta2), yuv_shift); 3000 3001 v_r1 = _mm_add_epi32(v_r1, v_y_p); 3002 v_g1 = _mm_add_epi32(v_g1, v_y_p); 3003 v_b1 = _mm_add_epi32(v_b1, v_y_p); 3004 3005 v_r = _mm_packs_epi32(v_r0, v_r1); 3006 v_g = _mm_packs_epi32(v_g0, v_g1); 3007 v_b = _mm_packs_epi32(v_b0, v_b1); 3008 } 3009 3010 void operator()(const uchar* src, uchar* dst, int n) const 3011 { 3012 int dcn = dstcn, bidx = blueIdx, i = 0; 3013 const uchar delta = ColorChannel<uchar>::half(), alpha = ColorChannel<uchar>::max(); 3014 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3]; 3015 n *= 3; 3016 3017 if (haveSIMD && useSSE) 3018 { 3019 for ( ; i <= n - 96; i += 96, dst += dcn * 32) 3020 { 3021 __m128i v_y0 = _mm_loadu_si128((__m128i const *)(src + i)); 3022 __m128i v_y1 = _mm_loadu_si128((__m128i const *)(src + i + 16)); 3023 __m128i v_cr0 = _mm_loadu_si128((__m128i const *)(src + i + 32)); 3024 __m128i v_cr1 = _mm_loadu_si128((__m128i const *)(src + i + 48)); 3025 __m128i v_cb0 = _mm_loadu_si128((__m128i const *)(src + i + 64)); 3026 __m128i v_cb1 = _mm_loadu_si128((__m128i const *)(src + i + 80)); 3027 3028 _mm_deinterleave_epi8(v_y0, v_y1, v_cr0, v_cr1, v_cb0, v_cb1); 3029 3030 __m128i v_r_0 = v_zero, v_g_0 = v_zero, v_b_0 = v_zero; 3031 process(_mm_unpacklo_epi8(v_y0, v_zero), 3032 _mm_unpacklo_epi8(v_cr0, v_zero), 3033 _mm_unpacklo_epi8(v_cb0, v_zero), 3034 v_r_0, v_g_0, v_b_0); 3035 3036 __m128i v_r_1 = v_zero, v_g_1 = v_zero, v_b_1 = v_zero; 3037 process(_mm_unpackhi_epi8(v_y0, v_zero), 3038 _mm_unpackhi_epi8(v_cr0, v_zero), 3039 _mm_unpackhi_epi8(v_cb0, v_zero), 3040 v_r_1, v_g_1, v_b_1); 3041 3042 __m128i v_r0 = _mm_packus_epi16(v_r_0, v_r_1); 3043 __m128i v_g0 = _mm_packus_epi16(v_g_0, v_g_1); 3044 __m128i v_b0 = _mm_packus_epi16(v_b_0, v_b_1); 3045 3046 process(_mm_unpacklo_epi8(v_y1, v_zero), 3047 _mm_unpacklo_epi8(v_cr1, v_zero), 3048 _mm_unpacklo_epi8(v_cb1, v_zero), 3049 v_r_0, v_g_0, v_b_0); 3050 3051 process(_mm_unpackhi_epi8(v_y1, v_zero), 3052 _mm_unpackhi_epi8(v_cr1, v_zero), 3053 _mm_unpackhi_epi8(v_cb1, v_zero), 3054 v_r_1, v_g_1, v_b_1); 3055 3056 __m128i v_r1 = _mm_packus_epi16(v_r_0, v_r_1); 3057 __m128i v_g1 = _mm_packus_epi16(v_g_0, v_g_1); 3058 __m128i v_b1 = _mm_packus_epi16(v_b_0, v_b_1); 3059 3060 if (bidx == 0) 3061 { 3062 std::swap(v_r0, v_b0); 3063 std::swap(v_r1, v_b1); 3064 } 3065 3066 __m128i v_a0 = v_alpha, v_a1 = v_alpha; 3067 3068 if (dcn == 3) 3069 _mm_interleave_epi8(v_r0, v_r1, v_g0, v_g1, v_b0, v_b1); 3070 else 3071 _mm_interleave_epi8(v_r0, v_r1, v_g0, v_g1, 3072 v_b0, v_b1, v_a0, v_a1); 3073 3074 _mm_storeu_si128((__m128i *)(dst), v_r0); 3075 _mm_storeu_si128((__m128i *)(dst + 16), v_r1); 3076 _mm_storeu_si128((__m128i *)(dst + 32), v_g0); 3077 _mm_storeu_si128((__m128i *)(dst + 48), v_g1); 3078 _mm_storeu_si128((__m128i *)(dst + 64), v_b0); 3079 _mm_storeu_si128((__m128i *)(dst + 80), v_b1); 3080 3081 if (dcn == 4) 3082 { 3083 _mm_storeu_si128((__m128i *)(dst + 96), v_a0); 3084 _mm_storeu_si128((__m128i *)(dst + 112), v_a1); 3085 } 3086 } 3087 } 3088 3089 for ( ; i < n; i += 3, dst += dcn) 3090 { 3091 uchar Y = src[i]; 3092 uchar Cr = src[i+1]; 3093 uchar Cb = src[i+2]; 3094 3095 int b = Y + CV_DESCALE((Cb - delta)*C3, yuv_shift); 3096 int g = Y + CV_DESCALE((Cb - delta)*C2 + (Cr - delta)*C1, yuv_shift); 3097 int r = Y + CV_DESCALE((Cr - delta)*C0, yuv_shift); 3098 3099 dst[bidx] = saturate_cast<uchar>(b); 3100 dst[1] = saturate_cast<uchar>(g); 3101 dst[bidx^2] = saturate_cast<uchar>(r); 3102 if( dcn == 4 ) 3103 dst[3] = alpha; 3104 } 3105 } 3106 int dstcn, blueIdx; 3107 int coeffs[4]; 3108 bool useSSE, haveSIMD; 3109 3110 __m128i v_c0, v_c1, v_c2, v_c3, v_delta2; 3111 __m128i v_delta, v_alpha, v_zero; 3112 }; 3113 3114 #endif // CV_SSE2 3115 3116 ////////////////////////////////////// RGB <-> XYZ /////////////////////////////////////// 3117 3118 static const float sRGB2XYZ_D65[] = 3119 { 3120 0.412453f, 0.357580f, 0.180423f, 3121 0.212671f, 0.715160f, 0.072169f, 3122 0.019334f, 0.119193f, 0.950227f 3123 }; 3124 3125 static const float XYZ2sRGB_D65[] = 3126 { 3127 3.240479f, -1.53715f, -0.498535f, 3128 -0.969256f, 1.875991f, 0.041556f, 3129 0.055648f, -0.204043f, 1.057311f 3130 }; 3131 3132 template<typename _Tp> struct RGB2XYZ_f 3133 { 3134 typedef _Tp channel_type; 3135 3136 RGB2XYZ_f(int _srccn, int blueIdx, const float* _coeffs) : srccn(_srccn) 3137 { 3138 memcpy(coeffs, _coeffs ? _coeffs : sRGB2XYZ_D65, 9*sizeof(coeffs[0])); 3139 if(blueIdx == 0) 3140 { 3141 std::swap(coeffs[0], coeffs[2]); 3142 std::swap(coeffs[3], coeffs[5]); 3143 std::swap(coeffs[6], coeffs[8]); 3144 } 3145 } 3146 void operator()(const _Tp* src, _Tp* dst, int n) const 3147 { 3148 int scn = srccn; 3149 float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], 3150 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5], 3151 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8]; 3152 3153 n *= 3; 3154 for(int i = 0; i < n; i += 3, src += scn) 3155 { 3156 _Tp X = saturate_cast<_Tp>(src[0]*C0 + src[1]*C1 + src[2]*C2); 3157 _Tp Y = saturate_cast<_Tp>(src[0]*C3 + src[1]*C4 + src[2]*C5); 3158 _Tp Z = saturate_cast<_Tp>(src[0]*C6 + src[1]*C7 + src[2]*C8); 3159 dst[i] = X; dst[i+1] = Y; dst[i+2] = Z; 3160 } 3161 } 3162 int srccn; 3163 float coeffs[9]; 3164 }; 3165 3166 #if CV_NEON 3167 3168 template <> 3169 struct RGB2XYZ_f<float> 3170 { 3171 typedef float channel_type; 3172 3173 RGB2XYZ_f(int _srccn, int blueIdx, const float* _coeffs) : srccn(_srccn) 3174 { 3175 memcpy(coeffs, _coeffs ? _coeffs : sRGB2XYZ_D65, 9*sizeof(coeffs[0])); 3176 if(blueIdx == 0) 3177 { 3178 std::swap(coeffs[0], coeffs[2]); 3179 std::swap(coeffs[3], coeffs[5]); 3180 std::swap(coeffs[6], coeffs[8]); 3181 } 3182 3183 v_c0 = vdupq_n_f32(coeffs[0]); 3184 v_c1 = vdupq_n_f32(coeffs[1]); 3185 v_c2 = vdupq_n_f32(coeffs[2]); 3186 v_c3 = vdupq_n_f32(coeffs[3]); 3187 v_c4 = vdupq_n_f32(coeffs[4]); 3188 v_c5 = vdupq_n_f32(coeffs[5]); 3189 v_c6 = vdupq_n_f32(coeffs[6]); 3190 v_c7 = vdupq_n_f32(coeffs[7]); 3191 v_c8 = vdupq_n_f32(coeffs[8]); 3192 } 3193 3194 void operator()(const float* src, float* dst, int n) const 3195 { 3196 int scn = srccn, i = 0; 3197 float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], 3198 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5], 3199 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8]; 3200 3201 n *= 3; 3202 3203 if (scn == 3) 3204 for ( ; i <= n - 12; i += 12, src += 12) 3205 { 3206 float32x4x3_t v_src = vld3q_f32(src), v_dst; 3207 v_dst.val[0] = vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_c0), v_src.val[1], v_c1), v_src.val[2], v_c2); 3208 v_dst.val[1] = vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_c3), v_src.val[1], v_c4), v_src.val[2], v_c5); 3209 v_dst.val[2] = vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_c6), v_src.val[1], v_c7), v_src.val[2], v_c8); 3210 vst3q_f32(dst + i, v_dst); 3211 } 3212 else 3213 for ( ; i <= n - 12; i += 12, src += 16) 3214 { 3215 float32x4x4_t v_src = vld4q_f32(src); 3216 float32x4x3_t v_dst; 3217 v_dst.val[0] = vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_c0), v_src.val[1], v_c1), v_src.val[2], v_c2); 3218 v_dst.val[1] = vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_c3), v_src.val[1], v_c4), v_src.val[2], v_c5); 3219 v_dst.val[2] = vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_c6), v_src.val[1], v_c7), v_src.val[2], v_c8); 3220 vst3q_f32(dst + i, v_dst); 3221 } 3222 3223 for ( ; i < n; i += 3, src += scn) 3224 { 3225 float X = saturate_cast<float>(src[0]*C0 + src[1]*C1 + src[2]*C2); 3226 float Y = saturate_cast<float>(src[0]*C3 + src[1]*C4 + src[2]*C5); 3227 float Z = saturate_cast<float>(src[0]*C6 + src[1]*C7 + src[2]*C8); 3228 dst[i] = X; dst[i+1] = Y; dst[i+2] = Z; 3229 } 3230 } 3231 3232 int srccn; 3233 float coeffs[9]; 3234 float32x4_t v_c0, v_c1, v_c2, v_c3, v_c4, v_c5, v_c6, v_c7, v_c8; 3235 }; 3236 3237 #elif CV_SSE2 3238 3239 template <> 3240 struct RGB2XYZ_f<float> 3241 { 3242 typedef float channel_type; 3243 3244 RGB2XYZ_f(int _srccn, int blueIdx, const float* _coeffs) : srccn(_srccn) 3245 { 3246 memcpy(coeffs, _coeffs ? _coeffs : sRGB2XYZ_D65, 9*sizeof(coeffs[0])); 3247 if(blueIdx == 0) 3248 { 3249 std::swap(coeffs[0], coeffs[2]); 3250 std::swap(coeffs[3], coeffs[5]); 3251 std::swap(coeffs[6], coeffs[8]); 3252 } 3253 3254 v_c0 = _mm_set1_ps(coeffs[0]); 3255 v_c1 = _mm_set1_ps(coeffs[1]); 3256 v_c2 = _mm_set1_ps(coeffs[2]); 3257 v_c3 = _mm_set1_ps(coeffs[3]); 3258 v_c4 = _mm_set1_ps(coeffs[4]); 3259 v_c5 = _mm_set1_ps(coeffs[5]); 3260 v_c6 = _mm_set1_ps(coeffs[6]); 3261 v_c7 = _mm_set1_ps(coeffs[7]); 3262 v_c8 = _mm_set1_ps(coeffs[8]); 3263 3264 haveSIMD = checkHardwareSupport(CV_CPU_SSE2); 3265 } 3266 3267 void process(__m128 v_r, __m128 v_g, __m128 v_b, 3268 __m128 & v_x, __m128 & v_y, __m128 & v_z) const 3269 { 3270 v_x = _mm_mul_ps(v_r, v_c0); 3271 v_x = _mm_add_ps(v_x, _mm_mul_ps(v_g, v_c1)); 3272 v_x = _mm_add_ps(v_x, _mm_mul_ps(v_b, v_c2)); 3273 3274 v_y = _mm_mul_ps(v_r, v_c3); 3275 v_y = _mm_add_ps(v_y, _mm_mul_ps(v_g, v_c4)); 3276 v_y = _mm_add_ps(v_y, _mm_mul_ps(v_b, v_c5)); 3277 3278 v_z = _mm_mul_ps(v_r, v_c6); 3279 v_z = _mm_add_ps(v_z, _mm_mul_ps(v_g, v_c7)); 3280 v_z = _mm_add_ps(v_z, _mm_mul_ps(v_b, v_c8)); 3281 } 3282 3283 void operator()(const float* src, float* dst, int n) const 3284 { 3285 int scn = srccn, i = 0; 3286 float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], 3287 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5], 3288 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8]; 3289 3290 n *= 3; 3291 3292 if (haveSIMD) 3293 { 3294 for ( ; i <= n - 24; i += 24, src += 8 * scn) 3295 { 3296 __m128 v_r0 = _mm_loadu_ps(src); 3297 __m128 v_r1 = _mm_loadu_ps(src + 4); 3298 __m128 v_g0 = _mm_loadu_ps(src + 8); 3299 __m128 v_g1 = _mm_loadu_ps(src + 12); 3300 __m128 v_b0 = _mm_loadu_ps(src + 16); 3301 __m128 v_b1 = _mm_loadu_ps(src + 20); 3302 3303 if (scn == 4) 3304 { 3305 __m128 v_a0 = _mm_loadu_ps(src + 24); 3306 __m128 v_a1 = _mm_loadu_ps(src + 28); 3307 3308 _mm_deinterleave_ps(v_r0, v_r1, v_g0, v_g1, 3309 v_b0, v_b1, v_a0, v_a1); 3310 } 3311 else 3312 _mm_deinterleave_ps(v_r0, v_r1, v_g0, v_g1, v_b0, v_b1); 3313 3314 __m128 v_x0, v_y0, v_z0; 3315 process(v_r0, v_g0, v_b0, 3316 v_x0, v_y0, v_z0); 3317 3318 __m128 v_x1, v_y1, v_z1; 3319 process(v_r1, v_g1, v_b1, 3320 v_x1, v_y1, v_z1); 3321 3322 _mm_interleave_ps(v_x0, v_x1, v_y0, v_y1, v_z0, v_z1); 3323 3324 _mm_storeu_ps(dst + i, v_x0); 3325 _mm_storeu_ps(dst + i + 4, v_x1); 3326 _mm_storeu_ps(dst + i + 8, v_y0); 3327 _mm_storeu_ps(dst + i + 12, v_y1); 3328 _mm_storeu_ps(dst + i + 16, v_z0); 3329 _mm_storeu_ps(dst + i + 20, v_z1); 3330 } 3331 } 3332 3333 for ( ; i < n; i += 3, src += scn) 3334 { 3335 float X = saturate_cast<float>(src[0]*C0 + src[1]*C1 + src[2]*C2); 3336 float Y = saturate_cast<float>(src[0]*C3 + src[1]*C4 + src[2]*C5); 3337 float Z = saturate_cast<float>(src[0]*C6 + src[1]*C7 + src[2]*C8); 3338 dst[i] = X; dst[i+1] = Y; dst[i+2] = Z; 3339 } 3340 } 3341 3342 int srccn; 3343 float coeffs[9]; 3344 __m128 v_c0, v_c1, v_c2, v_c3, v_c4, v_c5, v_c6, v_c7, v_c8; 3345 bool haveSIMD; 3346 }; 3347 3348 3349 #endif 3350 3351 template<typename _Tp> struct RGB2XYZ_i 3352 { 3353 typedef _Tp channel_type; 3354 3355 RGB2XYZ_i(int _srccn, int blueIdx, const float* _coeffs) : srccn(_srccn) 3356 { 3357 static const int coeffs0[] = 3358 { 3359 1689, 1465, 739, 3360 871, 2929, 296, 3361 79, 488, 3892 3362 }; 3363 for( int i = 0; i < 9; i++ ) 3364 coeffs[i] = _coeffs ? cvRound(_coeffs[i]*(1 << xyz_shift)) : coeffs0[i]; 3365 if(blueIdx == 0) 3366 { 3367 std::swap(coeffs[0], coeffs[2]); 3368 std::swap(coeffs[3], coeffs[5]); 3369 std::swap(coeffs[6], coeffs[8]); 3370 } 3371 } 3372 void operator()(const _Tp* src, _Tp* dst, int n) const 3373 { 3374 int scn = srccn; 3375 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], 3376 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5], 3377 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8]; 3378 n *= 3; 3379 3380 for(int i = 0; i < n; i += 3, src += scn) 3381 { 3382 int X = CV_DESCALE(src[0]*C0 + src[1]*C1 + src[2]*C2, xyz_shift); 3383 int Y = CV_DESCALE(src[0]*C3 + src[1]*C4 + src[2]*C5, xyz_shift); 3384 int Z = CV_DESCALE(src[0]*C6 + src[1]*C7 + src[2]*C8, xyz_shift); 3385 dst[i] = saturate_cast<_Tp>(X); dst[i+1] = saturate_cast<_Tp>(Y); 3386 dst[i+2] = saturate_cast<_Tp>(Z); 3387 } 3388 } 3389 int srccn; 3390 int coeffs[9]; 3391 }; 3392 3393 #if CV_NEON 3394 3395 template <> 3396 struct RGB2XYZ_i<uchar> 3397 { 3398 typedef uchar channel_type; 3399 3400 RGB2XYZ_i(int _srccn, int blueIdx, const float* _coeffs) : srccn(_srccn) 3401 { 3402 static const int coeffs0[] = 3403 { 3404 1689, 1465, 739, 3405 871, 2929, 296, 3406 79, 488, 3892 3407 }; 3408 for( int i = 0; i < 9; i++ ) 3409 coeffs[i] = _coeffs ? cvRound(_coeffs[i]*(1 << xyz_shift)) : coeffs0[i]; 3410 if(blueIdx == 0) 3411 { 3412 std::swap(coeffs[0], coeffs[2]); 3413 std::swap(coeffs[3], coeffs[5]); 3414 std::swap(coeffs[6], coeffs[8]); 3415 } 3416 3417 v_c0 = vdup_n_u16(coeffs[0]); 3418 v_c1 = vdup_n_u16(coeffs[1]); 3419 v_c2 = vdup_n_u16(coeffs[2]); 3420 v_c3 = vdup_n_u16(coeffs[3]); 3421 v_c4 = vdup_n_u16(coeffs[4]); 3422 v_c5 = vdup_n_u16(coeffs[5]); 3423 v_c6 = vdup_n_u16(coeffs[6]); 3424 v_c7 = vdup_n_u16(coeffs[7]); 3425 v_c8 = vdup_n_u16(coeffs[8]); 3426 v_delta = vdupq_n_u32(1 << (xyz_shift - 1)); 3427 } 3428 void operator()(const uchar * src, uchar * dst, int n) const 3429 { 3430 int scn = srccn, i = 0; 3431 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], 3432 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5], 3433 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8]; 3434 n *= 3; 3435 3436 for ( ; i <= n - 24; i += 24, src += scn * 8) 3437 { 3438 uint8x8x3_t v_dst; 3439 uint16x8x3_t v_src16; 3440 3441 if (scn == 3) 3442 { 3443 uint8x8x3_t v_src = vld3_u8(src); 3444 v_src16.val[0] = vmovl_u8(v_src.val[0]); 3445 v_src16.val[1] = vmovl_u8(v_src.val[1]); 3446 v_src16.val[2] = vmovl_u8(v_src.val[2]); 3447 } 3448 else 3449 { 3450 uint8x8x4_t v_src = vld4_u8(src); 3451 v_src16.val[0] = vmovl_u8(v_src.val[0]); 3452 v_src16.val[1] = vmovl_u8(v_src.val[1]); 3453 v_src16.val[2] = vmovl_u8(v_src.val[2]); 3454 } 3455 3456 uint16x4_t v_s0 = vget_low_u16(v_src16.val[0]), 3457 v_s1 = vget_low_u16(v_src16.val[1]), 3458 v_s2 = vget_low_u16(v_src16.val[2]); 3459 3460 uint32x4_t v_X0 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c0), v_s1, v_c1), v_s2, v_c2); 3461 uint32x4_t v_Y0 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c3), v_s1, v_c4), v_s2, v_c5); 3462 uint32x4_t v_Z0 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c6), v_s1, v_c7), v_s2, v_c8); 3463 v_X0 = vshrq_n_u32(vaddq_u32(v_X0, v_delta), xyz_shift); 3464 v_Y0 = vshrq_n_u32(vaddq_u32(v_Y0, v_delta), xyz_shift); 3465 v_Z0 = vshrq_n_u32(vaddq_u32(v_Z0, v_delta), xyz_shift); 3466 3467 v_s0 = vget_high_u16(v_src16.val[0]), 3468 v_s1 = vget_high_u16(v_src16.val[1]), 3469 v_s2 = vget_high_u16(v_src16.val[2]); 3470 3471 uint32x4_t v_X1 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c0), v_s1, v_c1), v_s2, v_c2); 3472 uint32x4_t v_Y1 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c3), v_s1, v_c4), v_s2, v_c5); 3473 uint32x4_t v_Z1 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c6), v_s1, v_c7), v_s2, v_c8); 3474 v_X1 = vshrq_n_u32(vaddq_u32(v_X1, v_delta), xyz_shift); 3475 v_Y1 = vshrq_n_u32(vaddq_u32(v_Y1, v_delta), xyz_shift); 3476 v_Z1 = vshrq_n_u32(vaddq_u32(v_Z1, v_delta), xyz_shift); 3477 3478 v_dst.val[0] = vqmovn_u16(vcombine_u16(vmovn_u32(v_X0), vmovn_u32(v_X1))); 3479 v_dst.val[1] = vqmovn_u16(vcombine_u16(vmovn_u32(v_Y0), vmovn_u32(v_Y1))); 3480 v_dst.val[2] = vqmovn_u16(vcombine_u16(vmovn_u32(v_Z0), vmovn_u32(v_Z1))); 3481 3482 vst3_u8(dst + i, v_dst); 3483 } 3484 3485 for ( ; i < n; i += 3, src += scn) 3486 { 3487 int X = CV_DESCALE(src[0]*C0 + src[1]*C1 + src[2]*C2, xyz_shift); 3488 int Y = CV_DESCALE(src[0]*C3 + src[1]*C4 + src[2]*C5, xyz_shift); 3489 int Z = CV_DESCALE(src[0]*C6 + src[1]*C7 + src[2]*C8, xyz_shift); 3490 dst[i] = saturate_cast<uchar>(X); 3491 dst[i+1] = saturate_cast<uchar>(Y); 3492 dst[i+2] = saturate_cast<uchar>(Z); 3493 } 3494 } 3495 3496 int srccn, coeffs[9]; 3497 uint16x4_t v_c0, v_c1, v_c2, v_c3, v_c4, v_c5, v_c6, v_c7, v_c8; 3498 uint32x4_t v_delta; 3499 }; 3500 3501 template <> 3502 struct RGB2XYZ_i<ushort> 3503 { 3504 typedef ushort channel_type; 3505 3506 RGB2XYZ_i(int _srccn, int blueIdx, const float* _coeffs) : srccn(_srccn) 3507 { 3508 static const int coeffs0[] = 3509 { 3510 1689, 1465, 739, 3511 871, 2929, 296, 3512 79, 488, 3892 3513 }; 3514 for( int i = 0; i < 9; i++ ) 3515 coeffs[i] = _coeffs ? cvRound(_coeffs[i]*(1 << xyz_shift)) : coeffs0[i]; 3516 if(blueIdx == 0) 3517 { 3518 std::swap(coeffs[0], coeffs[2]); 3519 std::swap(coeffs[3], coeffs[5]); 3520 std::swap(coeffs[6], coeffs[8]); 3521 } 3522 3523 v_c0 = vdup_n_u16(coeffs[0]); 3524 v_c1 = vdup_n_u16(coeffs[1]); 3525 v_c2 = vdup_n_u16(coeffs[2]); 3526 v_c3 = vdup_n_u16(coeffs[3]); 3527 v_c4 = vdup_n_u16(coeffs[4]); 3528 v_c5 = vdup_n_u16(coeffs[5]); 3529 v_c6 = vdup_n_u16(coeffs[6]); 3530 v_c7 = vdup_n_u16(coeffs[7]); 3531 v_c8 = vdup_n_u16(coeffs[8]); 3532 v_delta = vdupq_n_u32(1 << (xyz_shift - 1)); 3533 } 3534 3535 void operator()(const ushort * src, ushort * dst, int n) const 3536 { 3537 int scn = srccn, i = 0; 3538 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], 3539 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5], 3540 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8]; 3541 n *= 3; 3542 3543 for ( ; i <= n - 24; i += 24, src += scn * 8) 3544 { 3545 uint16x8x3_t v_src, v_dst; 3546 3547 if (scn == 3) 3548 v_src = vld3q_u16(src); 3549 else 3550 { 3551 uint16x8x4_t v_src4 = vld4q_u16(src); 3552 v_src.val[0] = v_src4.val[0]; 3553 v_src.val[1] = v_src4.val[1]; 3554 v_src.val[2] = v_src4.val[2]; 3555 } 3556 3557 uint16x4_t v_s0 = vget_low_u16(v_src.val[0]), 3558 v_s1 = vget_low_u16(v_src.val[1]), 3559 v_s2 = vget_low_u16(v_src.val[2]); 3560 3561 uint32x4_t v_X0 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c0), v_s1, v_c1), v_s2, v_c2); 3562 uint32x4_t v_Y0 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c3), v_s1, v_c4), v_s2, v_c5); 3563 uint32x4_t v_Z0 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c6), v_s1, v_c7), v_s2, v_c8); 3564 v_X0 = vshrq_n_u32(vaddq_u32(v_X0, v_delta), xyz_shift); 3565 v_Y0 = vshrq_n_u32(vaddq_u32(v_Y0, v_delta), xyz_shift); 3566 v_Z0 = vshrq_n_u32(vaddq_u32(v_Z0, v_delta), xyz_shift); 3567 3568 v_s0 = vget_high_u16(v_src.val[0]), 3569 v_s1 = vget_high_u16(v_src.val[1]), 3570 v_s2 = vget_high_u16(v_src.val[2]); 3571 3572 uint32x4_t v_X1 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c0), v_s1, v_c1), v_s2, v_c2); 3573 uint32x4_t v_Y1 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c3), v_s1, v_c4), v_s2, v_c5); 3574 uint32x4_t v_Z1 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c6), v_s1, v_c7), v_s2, v_c8); 3575 v_X1 = vshrq_n_u32(vaddq_u32(v_X1, v_delta), xyz_shift); 3576 v_Y1 = vshrq_n_u32(vaddq_u32(v_Y1, v_delta), xyz_shift); 3577 v_Z1 = vshrq_n_u32(vaddq_u32(v_Z1, v_delta), xyz_shift); 3578 3579 v_dst.val[0] = vcombine_u16(vqmovn_u32(v_X0), vqmovn_u32(v_X1)); 3580 v_dst.val[1] = vcombine_u16(vqmovn_u32(v_Y0), vqmovn_u32(v_Y1)); 3581 v_dst.val[2] = vcombine_u16(vqmovn_u32(v_Z0), vqmovn_u32(v_Z1)); 3582 3583 vst3q_u16(dst + i, v_dst); 3584 } 3585 3586 for ( ; i <= n - 12; i += 12, src += scn * 4) 3587 { 3588 uint16x4x3_t v_dst; 3589 uint16x4_t v_s0, v_s1, v_s2; 3590 3591 if (scn == 3) 3592 { 3593 uint16x4x3_t v_src = vld3_u16(src); 3594 v_s0 = v_src.val[0]; 3595 v_s1 = v_src.val[1]; 3596 v_s2 = v_src.val[2]; 3597 } 3598 else 3599 { 3600 uint16x4x4_t v_src = vld4_u16(src); 3601 v_s0 = v_src.val[0]; 3602 v_s1 = v_src.val[1]; 3603 v_s2 = v_src.val[2]; 3604 } 3605 3606 uint32x4_t v_X = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c0), v_s1, v_c1), v_s2, v_c2); 3607 uint32x4_t v_Y = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c3), v_s1, v_c4), v_s2, v_c5); 3608 uint32x4_t v_Z = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c6), v_s1, v_c7), v_s2, v_c8); 3609 3610 v_dst.val[0] = vqmovn_u32(vshrq_n_u32(vaddq_u32(v_X, v_delta), xyz_shift)); 3611 v_dst.val[1] = vqmovn_u32(vshrq_n_u32(vaddq_u32(v_Y, v_delta), xyz_shift)); 3612 v_dst.val[2] = vqmovn_u32(vshrq_n_u32(vaddq_u32(v_Z, v_delta), xyz_shift)); 3613 3614 vst3_u16(dst + i, v_dst); 3615 } 3616 3617 for ( ; i < n; i += 3, src += scn) 3618 { 3619 int X = CV_DESCALE(src[0]*C0 + src[1]*C1 + src[2]*C2, xyz_shift); 3620 int Y = CV_DESCALE(src[0]*C3 + src[1]*C4 + src[2]*C5, xyz_shift); 3621 int Z = CV_DESCALE(src[0]*C6 + src[1]*C7 + src[2]*C8, xyz_shift); 3622 dst[i] = saturate_cast<ushort>(X); 3623 dst[i+1] = saturate_cast<ushort>(Y); 3624 dst[i+2] = saturate_cast<ushort>(Z); 3625 } 3626 } 3627 3628 int srccn, coeffs[9]; 3629 uint16x4_t v_c0, v_c1, v_c2, v_c3, v_c4, v_c5, v_c6, v_c7, v_c8; 3630 uint32x4_t v_delta; 3631 }; 3632 3633 #endif 3634 3635 template<typename _Tp> struct XYZ2RGB_f 3636 { 3637 typedef _Tp channel_type; 3638 3639 XYZ2RGB_f(int _dstcn, int _blueIdx, const float* _coeffs) 3640 : dstcn(_dstcn), blueIdx(_blueIdx) 3641 { 3642 memcpy(coeffs, _coeffs ? _coeffs : XYZ2sRGB_D65, 9*sizeof(coeffs[0])); 3643 if(blueIdx == 0) 3644 { 3645 std::swap(coeffs[0], coeffs[6]); 3646 std::swap(coeffs[1], coeffs[7]); 3647 std::swap(coeffs[2], coeffs[8]); 3648 } 3649 } 3650 3651 void operator()(const _Tp* src, _Tp* dst, int n) const 3652 { 3653 int dcn = dstcn; 3654 _Tp alpha = ColorChannel<_Tp>::max(); 3655 float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], 3656 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5], 3657 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8]; 3658 n *= 3; 3659 for(int i = 0; i < n; i += 3, dst += dcn) 3660 { 3661 _Tp B = saturate_cast<_Tp>(src[i]*C0 + src[i+1]*C1 + src[i+2]*C2); 3662 _Tp G = saturate_cast<_Tp>(src[i]*C3 + src[i+1]*C4 + src[i+2]*C5); 3663 _Tp R = saturate_cast<_Tp>(src[i]*C6 + src[i+1]*C7 + src[i+2]*C8); 3664 dst[0] = B; dst[1] = G; dst[2] = R; 3665 if( dcn == 4 ) 3666 dst[3] = alpha; 3667 } 3668 } 3669 int dstcn, blueIdx; 3670 float coeffs[9]; 3671 }; 3672 3673 #if CV_SSE2 3674 3675 template <> 3676 struct XYZ2RGB_f<float> 3677 { 3678 typedef float channel_type; 3679 3680 XYZ2RGB_f(int _dstcn, int _blueIdx, const float* _coeffs) 3681 : dstcn(_dstcn), blueIdx(_blueIdx) 3682 { 3683 memcpy(coeffs, _coeffs ? _coeffs : XYZ2sRGB_D65, 9*sizeof(coeffs[0])); 3684 if(blueIdx == 0) 3685 { 3686 std::swap(coeffs[0], coeffs[6]); 3687 std::swap(coeffs[1], coeffs[7]); 3688 std::swap(coeffs[2], coeffs[8]); 3689 } 3690 3691 v_c0 = _mm_set1_ps(coeffs[0]); 3692 v_c1 = _mm_set1_ps(coeffs[1]); 3693 v_c2 = _mm_set1_ps(coeffs[2]); 3694 v_c3 = _mm_set1_ps(coeffs[3]); 3695 v_c4 = _mm_set1_ps(coeffs[4]); 3696 v_c5 = _mm_set1_ps(coeffs[5]); 3697 v_c6 = _mm_set1_ps(coeffs[6]); 3698 v_c7 = _mm_set1_ps(coeffs[7]); 3699 v_c8 = _mm_set1_ps(coeffs[8]); 3700 3701 v_alpha = _mm_set1_ps(ColorChannel<float>::max()); 3702 3703 haveSIMD = checkHardwareSupport(CV_CPU_SSE2); 3704 } 3705 3706 void process(__m128 v_x, __m128 v_y, __m128 v_z, 3707 __m128 & v_r, __m128 & v_g, __m128 & v_b) const 3708 { 3709 v_b = _mm_mul_ps(v_x, v_c0); 3710 v_b = _mm_add_ps(v_b, _mm_mul_ps(v_y, v_c1)); 3711 v_b = _mm_add_ps(v_b, _mm_mul_ps(v_z, v_c2)); 3712 3713 v_g = _mm_mul_ps(v_x, v_c3); 3714 v_g = _mm_add_ps(v_g, _mm_mul_ps(v_y, v_c4)); 3715 v_g = _mm_add_ps(v_g, _mm_mul_ps(v_z, v_c5)); 3716 3717 v_r = _mm_mul_ps(v_x, v_c6); 3718 v_r = _mm_add_ps(v_r, _mm_mul_ps(v_y, v_c7)); 3719 v_r = _mm_add_ps(v_r, _mm_mul_ps(v_z, v_c8)); 3720 } 3721 3722 void operator()(const float* src, float* dst, int n) const 3723 { 3724 int dcn = dstcn; 3725 float alpha = ColorChannel<float>::max(); 3726 float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], 3727 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5], 3728 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8]; 3729 n *= 3; 3730 int i = 0; 3731 3732 if (haveSIMD) 3733 { 3734 for ( ; i <= n - 24; i += 24, dst += 8 * dcn) 3735 { 3736 __m128 v_x0 = _mm_loadu_ps(src + i); 3737 __m128 v_x1 = _mm_loadu_ps(src + i + 4); 3738 __m128 v_y0 = _mm_loadu_ps(src + i + 8); 3739 __m128 v_y1 = _mm_loadu_ps(src + i + 12); 3740 __m128 v_z0 = _mm_loadu_ps(src + i + 16); 3741 __m128 v_z1 = _mm_loadu_ps(src + i + 20); 3742 3743 _mm_deinterleave_ps(v_x0, v_x1, v_y0, v_y1, v_z0, v_z1); 3744 3745 __m128 v_r0, v_g0, v_b0; 3746 process(v_x0, v_y0, v_z0, 3747 v_r0, v_g0, v_b0); 3748 3749 __m128 v_r1, v_g1, v_b1; 3750 process(v_x1, v_y1, v_z1, 3751 v_r1, v_g1, v_b1); 3752 3753 __m128 v_a0 = v_alpha, v_a1 = v_alpha; 3754 3755 if (dcn == 4) 3756 _mm_interleave_ps(v_b0, v_b1, v_g0, v_g1, 3757 v_r0, v_r1, v_a0, v_a1); 3758 else 3759 _mm_interleave_ps(v_b0, v_b1, v_g0, v_g1, v_r0, v_r1); 3760 3761 _mm_storeu_ps(dst, v_b0); 3762 _mm_storeu_ps(dst + 4, v_b1); 3763 _mm_storeu_ps(dst + 8, v_g0); 3764 _mm_storeu_ps(dst + 12, v_g1); 3765 _mm_storeu_ps(dst + 16, v_r0); 3766 _mm_storeu_ps(dst + 20, v_r1); 3767 3768 if (dcn == 4) 3769 { 3770 _mm_storeu_ps(dst + 24, v_a0); 3771 _mm_storeu_ps(dst + 28, v_a1); 3772 } 3773 } 3774 3775 } 3776 3777 for( ; i < n; i += 3, dst += dcn) 3778 { 3779 float B = src[i]*C0 + src[i+1]*C1 + src[i+2]*C2; 3780 float G = src[i]*C3 + src[i+1]*C4 + src[i+2]*C5; 3781 float R = src[i]*C6 + src[i+1]*C7 + src[i+2]*C8; 3782 dst[0] = B; dst[1] = G; dst[2] = R; 3783 if( dcn == 4 ) 3784 dst[3] = alpha; 3785 } 3786 } 3787 int dstcn, blueIdx; 3788 float coeffs[9]; 3789 3790 __m128 v_c0, v_c1, v_c2, v_c3, v_c4, v_c5, v_c6, v_c7, v_c8; 3791 __m128 v_alpha; 3792 bool haveSIMD; 3793 }; 3794 3795 #endif // CV_SSE2 3796 3797 3798 template<typename _Tp> struct XYZ2RGB_i 3799 { 3800 typedef _Tp channel_type; 3801 3802 XYZ2RGB_i(int _dstcn, int _blueIdx, const int* _coeffs) 3803 : dstcn(_dstcn), blueIdx(_blueIdx) 3804 { 3805 static const int coeffs0[] = 3806 { 3807 13273, -6296, -2042, 3808 -3970, 7684, 170, 3809 228, -836, 4331 3810 }; 3811 for(int i = 0; i < 9; i++) 3812 coeffs[i] = _coeffs ? cvRound(_coeffs[i]*(1 << xyz_shift)) : coeffs0[i]; 3813 3814 if(blueIdx == 0) 3815 { 3816 std::swap(coeffs[0], coeffs[6]); 3817 std::swap(coeffs[1], coeffs[7]); 3818 std::swap(coeffs[2], coeffs[8]); 3819 } 3820 } 3821 void operator()(const _Tp* src, _Tp* dst, int n) const 3822 { 3823 int dcn = dstcn; 3824 _Tp alpha = ColorChannel<_Tp>::max(); 3825 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], 3826 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5], 3827 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8]; 3828 n *= 3; 3829 for(int i = 0; i < n; i += 3, dst += dcn) 3830 { 3831 int B = CV_DESCALE(src[i]*C0 + src[i+1]*C1 + src[i+2]*C2, xyz_shift); 3832 int G = CV_DESCALE(src[i]*C3 + src[i+1]*C4 + src[i+2]*C5, xyz_shift); 3833 int R = CV_DESCALE(src[i]*C6 + src[i+1]*C7 + src[i+2]*C8, xyz_shift); 3834 dst[0] = saturate_cast<_Tp>(B); dst[1] = saturate_cast<_Tp>(G); 3835 dst[2] = saturate_cast<_Tp>(R); 3836 if( dcn == 4 ) 3837 dst[3] = alpha; 3838 } 3839 } 3840 int dstcn, blueIdx; 3841 int coeffs[9]; 3842 }; 3843 3844 #if CV_NEON 3845 3846 template <> 3847 struct XYZ2RGB_i<uchar> 3848 { 3849 typedef uchar channel_type; 3850 3851 XYZ2RGB_i(int _dstcn, int _blueIdx, const int* _coeffs) 3852 : dstcn(_dstcn), blueIdx(_blueIdx) 3853 { 3854 static const int coeffs0[] = 3855 { 3856 13273, -6296, -2042, 3857 -3970, 7684, 170, 3858 228, -836, 4331 3859 }; 3860 for(int i = 0; i < 9; i++) 3861 coeffs[i] = _coeffs ? cvRound(_coeffs[i]*(1 << xyz_shift)) : coeffs0[i]; 3862 3863 if(blueIdx == 0) 3864 { 3865 std::swap(coeffs[0], coeffs[6]); 3866 std::swap(coeffs[1], coeffs[7]); 3867 std::swap(coeffs[2], coeffs[8]); 3868 } 3869 3870 v_c0 = vdup_n_s16(coeffs[0]); 3871 v_c1 = vdup_n_s16(coeffs[1]); 3872 v_c2 = vdup_n_s16(coeffs[2]); 3873 v_c3 = vdup_n_s16(coeffs[3]); 3874 v_c4 = vdup_n_s16(coeffs[4]); 3875 v_c5 = vdup_n_s16(coeffs[5]); 3876 v_c6 = vdup_n_s16(coeffs[6]); 3877 v_c7 = vdup_n_s16(coeffs[7]); 3878 v_c8 = vdup_n_s16(coeffs[8]); 3879 v_delta = vdupq_n_s32(1 << (xyz_shift - 1)); 3880 v_alpha = vmovn_u16(vdupq_n_u16(ColorChannel<uchar>::max())); 3881 } 3882 3883 void operator()(const uchar* src, uchar* dst, int n) const 3884 { 3885 int dcn = dstcn, i = 0; 3886 uchar alpha = ColorChannel<uchar>::max(); 3887 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], 3888 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5], 3889 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8]; 3890 n *= 3; 3891 3892 for ( ; i <= n - 24; i += 24, dst += dcn * 8) 3893 { 3894 uint8x8x3_t v_src = vld3_u8(src + i); 3895 int16x8x3_t v_src16; 3896 v_src16.val[0] = vreinterpretq_s16_u16(vmovl_u8(v_src.val[0])); 3897 v_src16.val[1] = vreinterpretq_s16_u16(vmovl_u8(v_src.val[1])); 3898 v_src16.val[2] = vreinterpretq_s16_u16(vmovl_u8(v_src.val[2])); 3899 3900 int16x4_t v_s0 = vget_low_s16(v_src16.val[0]), 3901 v_s1 = vget_low_s16(v_src16.val[1]), 3902 v_s2 = vget_low_s16(v_src16.val[2]); 3903 3904 int32x4_t v_X0 = vmlal_s16(vmlal_s16(vmull_s16(v_s0, v_c0), v_s1, v_c1), v_s2, v_c2); 3905 int32x4_t v_Y0 = vmlal_s16(vmlal_s16(vmull_s16(v_s0, v_c3), v_s1, v_c4), v_s2, v_c5); 3906 int32x4_t v_Z0 = vmlal_s16(vmlal_s16(vmull_s16(v_s0, v_c6), v_s1, v_c7), v_s2, v_c8); 3907 v_X0 = vshrq_n_s32(vaddq_s32(v_X0, v_delta), xyz_shift); 3908 v_Y0 = vshrq_n_s32(vaddq_s32(v_Y0, v_delta), xyz_shift); 3909 v_Z0 = vshrq_n_s32(vaddq_s32(v_Z0, v_delta), xyz_shift); 3910 3911 v_s0 = vget_high_s16(v_src16.val[0]), 3912 v_s1 = vget_high_s16(v_src16.val[1]), 3913 v_s2 = vget_high_s16(v_src16.val[2]); 3914 3915 int32x4_t v_X1 = vmlal_s16(vmlal_s16(vmull_s16(v_s0, v_c0), v_s1, v_c1), v_s2, v_c2); 3916 int32x4_t v_Y1 = vmlal_s16(vmlal_s16(vmull_s16(v_s0, v_c3), v_s1, v_c4), v_s2, v_c5); 3917 int32x4_t v_Z1 = vmlal_s16(vmlal_s16(vmull_s16(v_s0, v_c6), v_s1, v_c7), v_s2, v_c8); 3918 v_X1 = vshrq_n_s32(vaddq_s32(v_X1, v_delta), xyz_shift); 3919 v_Y1 = vshrq_n_s32(vaddq_s32(v_Y1, v_delta), xyz_shift); 3920 v_Z1 = vshrq_n_s32(vaddq_s32(v_Z1, v_delta), xyz_shift); 3921 3922 uint8x8_t v_b = vqmovun_s16(vcombine_s16(vqmovn_s32(v_X0), vqmovn_s32(v_X1))); 3923 uint8x8_t v_g = vqmovun_s16(vcombine_s16(vqmovn_s32(v_Y0), vqmovn_s32(v_Y1))); 3924 uint8x8_t v_r = vqmovun_s16(vcombine_s16(vqmovn_s32(v_Z0), vqmovn_s32(v_Z1))); 3925 3926 if (dcn == 3) 3927 { 3928 uint8x8x3_t v_dst; 3929 v_dst.val[0] = v_b; 3930 v_dst.val[1] = v_g; 3931 v_dst.val[2] = v_r; 3932 vst3_u8(dst, v_dst); 3933 } 3934 else 3935 { 3936 uint8x8x4_t v_dst; 3937 v_dst.val[0] = v_b; 3938 v_dst.val[1] = v_g; 3939 v_dst.val[2] = v_r; 3940 v_dst.val[3] = v_alpha; 3941 vst4_u8(dst, v_dst); 3942 } 3943 } 3944 3945 for ( ; i < n; i += 3, dst += dcn) 3946 { 3947 int B = CV_DESCALE(src[i]*C0 + src[i+1]*C1 + src[i+2]*C2, xyz_shift); 3948 int G = CV_DESCALE(src[i]*C3 + src[i+1]*C4 + src[i+2]*C5, xyz_shift); 3949 int R = CV_DESCALE(src[i]*C6 + src[i+1]*C7 + src[i+2]*C8, xyz_shift); 3950 dst[0] = saturate_cast<uchar>(B); dst[1] = saturate_cast<uchar>(G); 3951 dst[2] = saturate_cast<uchar>(R); 3952 if( dcn == 4 ) 3953 dst[3] = alpha; 3954 } 3955 } 3956 int dstcn, blueIdx; 3957 int coeffs[9]; 3958 3959 int16x4_t v_c0, v_c1, v_c2, v_c3, v_c4, v_c5, v_c6, v_c7, v_c8; 3960 uint8x8_t v_alpha; 3961 int32x4_t v_delta; 3962 }; 3963 3964 template <> 3965 struct XYZ2RGB_i<ushort> 3966 { 3967 typedef ushort channel_type; 3968 3969 XYZ2RGB_i(int _dstcn, int _blueIdx, const int* _coeffs) 3970 : dstcn(_dstcn), blueIdx(_blueIdx) 3971 { 3972 static const int coeffs0[] = 3973 { 3974 13273, -6296, -2042, 3975 -3970, 7684, 170, 3976 228, -836, 4331 3977 }; 3978 for(int i = 0; i < 9; i++) 3979 coeffs[i] = _coeffs ? cvRound(_coeffs[i]*(1 << xyz_shift)) : coeffs0[i]; 3980 3981 if(blueIdx == 0) 3982 { 3983 std::swap(coeffs[0], coeffs[6]); 3984 std::swap(coeffs[1], coeffs[7]); 3985 std::swap(coeffs[2], coeffs[8]); 3986 } 3987 3988 v_c0 = vdupq_n_s32(coeffs[0]); 3989 v_c1 = vdupq_n_s32(coeffs[1]); 3990 v_c2 = vdupq_n_s32(coeffs[2]); 3991 v_c3 = vdupq_n_s32(coeffs[3]); 3992 v_c4 = vdupq_n_s32(coeffs[4]); 3993 v_c5 = vdupq_n_s32(coeffs[5]); 3994 v_c6 = vdupq_n_s32(coeffs[6]); 3995 v_c7 = vdupq_n_s32(coeffs[7]); 3996 v_c8 = vdupq_n_s32(coeffs[8]); 3997 v_delta = vdupq_n_s32(1 << (xyz_shift - 1)); 3998 v_alpha = vdupq_n_u16(ColorChannel<ushort>::max()); 3999 v_alpha2 = vget_low_u16(v_alpha); 4000 } 4001 4002 void operator()(const ushort* src, ushort* dst, int n) const 4003 { 4004 int dcn = dstcn, i = 0; 4005 ushort alpha = ColorChannel<ushort>::max(); 4006 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], 4007 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5], 4008 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8]; 4009 n *= 3; 4010 4011 for ( ; i <= n - 24; i += 24, dst += dcn * 8) 4012 { 4013 uint16x8x3_t v_src = vld3q_u16(src + i); 4014 int32x4_t v_s0 = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(v_src.val[0]))), 4015 v_s1 = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(v_src.val[1]))), 4016 v_s2 = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(v_src.val[2]))); 4017 4018 int32x4_t v_X0 = vmlaq_s32(vmlaq_s32(vmulq_s32(v_s0, v_c0), v_s1, v_c1), v_s2, v_c2); 4019 int32x4_t v_Y0 = vmlaq_s32(vmlaq_s32(vmulq_s32(v_s0, v_c3), v_s1, v_c4), v_s2, v_c5); 4020 int32x4_t v_Z0 = vmlaq_s32(vmlaq_s32(vmulq_s32(v_s0, v_c6), v_s1, v_c7), v_s2, v_c8); 4021 v_X0 = vshrq_n_s32(vaddq_s32(v_X0, v_delta), xyz_shift); 4022 v_Y0 = vshrq_n_s32(vaddq_s32(v_Y0, v_delta), xyz_shift); 4023 v_Z0 = vshrq_n_s32(vaddq_s32(v_Z0, v_delta), xyz_shift); 4024 4025 v_s0 = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(v_src.val[0]))); 4026 v_s1 = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(v_src.val[1]))); 4027 v_s2 = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(v_src.val[2]))); 4028 4029 int32x4_t v_X1 = vmlaq_s32(vmlaq_s32(vmulq_s32(v_s0, v_c0), v_s1, v_c1), v_s2, v_c2); 4030 int32x4_t v_Y1 = vmlaq_s32(vmlaq_s32(vmulq_s32(v_s0, v_c3), v_s1, v_c4), v_s2, v_c5); 4031 int32x4_t v_Z1 = vmlaq_s32(vmlaq_s32(vmulq_s32(v_s0, v_c6), v_s1, v_c7), v_s2, v_c8); 4032 v_X1 = vshrq_n_s32(vaddq_s32(v_X1, v_delta), xyz_shift); 4033 v_Y1 = vshrq_n_s32(vaddq_s32(v_Y1, v_delta), xyz_shift); 4034 v_Z1 = vshrq_n_s32(vaddq_s32(v_Z1, v_delta), xyz_shift); 4035 4036 uint16x8_t v_b = vcombine_u16(vqmovun_s32(v_X0), vqmovun_s32(v_X1)); 4037 uint16x8_t v_g = vcombine_u16(vqmovun_s32(v_Y0), vqmovun_s32(v_Y1)); 4038 uint16x8_t v_r = vcombine_u16(vqmovun_s32(v_Z0), vqmovun_s32(v_Z1)); 4039 4040 if (dcn == 3) 4041 { 4042 uint16x8x3_t v_dst; 4043 v_dst.val[0] = v_b; 4044 v_dst.val[1] = v_g; 4045 v_dst.val[2] = v_r; 4046 vst3q_u16(dst, v_dst); 4047 } 4048 else 4049 { 4050 uint16x8x4_t v_dst; 4051 v_dst.val[0] = v_b; 4052 v_dst.val[1] = v_g; 4053 v_dst.val[2] = v_r; 4054 v_dst.val[3] = v_alpha; 4055 vst4q_u16(dst, v_dst); 4056 } 4057 } 4058 4059 for ( ; i <= n - 12; i += 12, dst += dcn * 4) 4060 { 4061 uint16x4x3_t v_src = vld3_u16(src + i); 4062 int32x4_t v_s0 = vreinterpretq_s32_u32(vmovl_u16(v_src.val[0])), 4063 v_s1 = vreinterpretq_s32_u32(vmovl_u16(v_src.val[1])), 4064 v_s2 = vreinterpretq_s32_u32(vmovl_u16(v_src.val[2])); 4065 4066 int32x4_t v_X = vmlaq_s32(vmlaq_s32(vmulq_s32(v_s0, v_c0), v_s1, v_c1), v_s2, v_c2); 4067 int32x4_t v_Y = vmlaq_s32(vmlaq_s32(vmulq_s32(v_s0, v_c3), v_s1, v_c4), v_s2, v_c5); 4068 int32x4_t v_Z = vmlaq_s32(vmlaq_s32(vmulq_s32(v_s0, v_c6), v_s1, v_c7), v_s2, v_c8); 4069 v_X = vshrq_n_s32(vaddq_s32(v_X, v_delta), xyz_shift); 4070 v_Y = vshrq_n_s32(vaddq_s32(v_Y, v_delta), xyz_shift); 4071 v_Z = vshrq_n_s32(vaddq_s32(v_Z, v_delta), xyz_shift); 4072 4073 uint16x4_t v_b = vqmovun_s32(v_X); 4074 uint16x4_t v_g = vqmovun_s32(v_Y); 4075 uint16x4_t v_r = vqmovun_s32(v_Z); 4076 4077 if (dcn == 3) 4078 { 4079 uint16x4x3_t v_dst; 4080 v_dst.val[0] = v_b; 4081 v_dst.val[1] = v_g; 4082 v_dst.val[2] = v_r; 4083 vst3_u16(dst, v_dst); 4084 } 4085 else 4086 { 4087 uint16x4x4_t v_dst; 4088 v_dst.val[0] = v_b; 4089 v_dst.val[1] = v_g; 4090 v_dst.val[2] = v_r; 4091 v_dst.val[3] = v_alpha2; 4092 vst4_u16(dst, v_dst); 4093 } 4094 } 4095 4096 for ( ; i < n; i += 3, dst += dcn) 4097 { 4098 int B = CV_DESCALE(src[i]*C0 + src[i+1]*C1 + src[i+2]*C2, xyz_shift); 4099 int G = CV_DESCALE(src[i]*C3 + src[i+1]*C4 + src[i+2]*C5, xyz_shift); 4100 int R = CV_DESCALE(src[i]*C6 + src[i+1]*C7 + src[i+2]*C8, xyz_shift); 4101 dst[0] = saturate_cast<ushort>(B); dst[1] = saturate_cast<ushort>(G); 4102 dst[2] = saturate_cast<ushort>(R); 4103 if( dcn == 4 ) 4104 dst[3] = alpha; 4105 } 4106 } 4107 int dstcn, blueIdx; 4108 int coeffs[9]; 4109 4110 int32x4_t v_c0, v_c1, v_c2, v_c3, v_c4, v_c5, v_c6, v_c7, v_c8, v_delta; 4111 uint16x4_t v_alpha2; 4112 uint16x8_t v_alpha; 4113 }; 4114 4115 #endif 4116 4117 ////////////////////////////////////// RGB <-> HSV /////////////////////////////////////// 4118 4119 4120 struct RGB2HSV_b 4121 { 4122 typedef uchar channel_type; 4123 4124 RGB2HSV_b(int _srccn, int _blueIdx, int _hrange) 4125 : srccn(_srccn), blueIdx(_blueIdx), hrange(_hrange) 4126 { 4127 CV_Assert( hrange == 180 || hrange == 256 ); 4128 } 4129 4130 void operator()(const uchar* src, uchar* dst, int n) const 4131 { 4132 int i, bidx = blueIdx, scn = srccn; 4133 const int hsv_shift = 12; 4134 4135 static int sdiv_table[256]; 4136 static int hdiv_table180[256]; 4137 static int hdiv_table256[256]; 4138 static volatile bool initialized = false; 4139 4140 int hr = hrange; 4141 const int* hdiv_table = hr == 180 ? hdiv_table180 : hdiv_table256; 4142 n *= 3; 4143 4144 if( !initialized ) 4145 { 4146 sdiv_table[0] = hdiv_table180[0] = hdiv_table256[0] = 0; 4147 for( i = 1; i < 256; i++ ) 4148 { 4149 sdiv_table[i] = saturate_cast<int>((255 << hsv_shift)/(1.*i)); 4150 hdiv_table180[i] = saturate_cast<int>((180 << hsv_shift)/(6.*i)); 4151 hdiv_table256[i] = saturate_cast<int>((256 << hsv_shift)/(6.*i)); 4152 } 4153 initialized = true; 4154 } 4155 4156 for( i = 0; i < n; i += 3, src += scn ) 4157 { 4158 int b = src[bidx], g = src[1], r = src[bidx^2]; 4159 int h, s, v = b; 4160 int vmin = b, diff; 4161 int vr, vg; 4162 4163 CV_CALC_MAX_8U( v, g ); 4164 CV_CALC_MAX_8U( v, r ); 4165 CV_CALC_MIN_8U( vmin, g ); 4166 CV_CALC_MIN_8U( vmin, r ); 4167 4168 diff = v - vmin; 4169 vr = v == r ? -1 : 0; 4170 vg = v == g ? -1 : 0; 4171 4172 s = (diff * sdiv_table[v] + (1 << (hsv_shift-1))) >> hsv_shift; 4173 h = (vr & (g - b)) + 4174 (~vr & ((vg & (b - r + 2 * diff)) + ((~vg) & (r - g + 4 * diff)))); 4175 h = (h * hdiv_table[diff] + (1 << (hsv_shift-1))) >> hsv_shift; 4176 h += h < 0 ? hr : 0; 4177 4178 dst[i] = saturate_cast<uchar>(h); 4179 dst[i+1] = (uchar)s; 4180 dst[i+2] = (uchar)v; 4181 } 4182 } 4183 4184 int srccn, blueIdx, hrange; 4185 }; 4186 4187 4188 struct RGB2HSV_f 4189 { 4190 typedef float channel_type; 4191 4192 RGB2HSV_f(int _srccn, int _blueIdx, float _hrange) 4193 : srccn(_srccn), blueIdx(_blueIdx), hrange(_hrange) {} 4194 4195 void operator()(const float* src, float* dst, int n) const 4196 { 4197 int i, bidx = blueIdx, scn = srccn; 4198 float hscale = hrange*(1.f/360.f); 4199 n *= 3; 4200 4201 for( i = 0; i < n; i += 3, src += scn ) 4202 { 4203 float b = src[bidx], g = src[1], r = src[bidx^2]; 4204 float h, s, v; 4205 4206 float vmin, diff; 4207 4208 v = vmin = r; 4209 if( v < g ) v = g; 4210 if( v < b ) v = b; 4211 if( vmin > g ) vmin = g; 4212 if( vmin > b ) vmin = b; 4213 4214 diff = v - vmin; 4215 s = diff/(float)(fabs(v) + FLT_EPSILON); 4216 diff = (float)(60./(diff + FLT_EPSILON)); 4217 if( v == r ) 4218 h = (g - b)*diff; 4219 else if( v == g ) 4220 h = (b - r)*diff + 120.f; 4221 else 4222 h = (r - g)*diff + 240.f; 4223 4224 if( h < 0 ) h += 360.f; 4225 4226 dst[i] = h*hscale; 4227 dst[i+1] = s; 4228 dst[i+2] = v; 4229 } 4230 } 4231 4232 int srccn, blueIdx; 4233 float hrange; 4234 }; 4235 4236 4237 struct HSV2RGB_f 4238 { 4239 typedef float channel_type; 4240 4241 HSV2RGB_f(int _dstcn, int _blueIdx, float _hrange) 4242 : dstcn(_dstcn), blueIdx(_blueIdx), hscale(6.f/_hrange) {} 4243 4244 void operator()(const float* src, float* dst, int n) const 4245 { 4246 int i, bidx = blueIdx, dcn = dstcn; 4247 float _hscale = hscale; 4248 float alpha = ColorChannel<float>::max(); 4249 n *= 3; 4250 4251 for( i = 0; i < n; i += 3, dst += dcn ) 4252 { 4253 float h = src[i], s = src[i+1], v = src[i+2]; 4254 float b, g, r; 4255 4256 if( s == 0 ) 4257 b = g = r = v; 4258 else 4259 { 4260 static const int sector_data[][3]= 4261 {{1,3,0}, {1,0,2}, {3,0,1}, {0,2,1}, {0,1,3}, {2,1,0}}; 4262 float tab[4]; 4263 int sector; 4264 h *= _hscale; 4265 if( h < 0 ) 4266 do h += 6; while( h < 0 ); 4267 else if( h >= 6 ) 4268 do h -= 6; while( h >= 6 ); 4269 sector = cvFloor(h); 4270 h -= sector; 4271 if( (unsigned)sector >= 6u ) 4272 { 4273 sector = 0; 4274 h = 0.f; 4275 } 4276 4277 tab[0] = v; 4278 tab[1] = v*(1.f - s); 4279 tab[2] = v*(1.f - s*h); 4280 tab[3] = v*(1.f - s*(1.f - h)); 4281 4282 b = tab[sector_data[sector][0]]; 4283 g = tab[sector_data[sector][1]]; 4284 r = tab[sector_data[sector][2]]; 4285 } 4286 4287 dst[bidx] = b; 4288 dst[1] = g; 4289 dst[bidx^2] = r; 4290 if( dcn == 4 ) 4291 dst[3] = alpha; 4292 } 4293 } 4294 4295 int dstcn, blueIdx; 4296 float hscale; 4297 }; 4298 4299 4300 struct HSV2RGB_b 4301 { 4302 typedef uchar channel_type; 4303 4304 HSV2RGB_b(int _dstcn, int _blueIdx, int _hrange) 4305 : dstcn(_dstcn), cvt(3, _blueIdx, (float)_hrange) 4306 { 4307 #if CV_NEON 4308 v_scale_inv = vdupq_n_f32(1.f/255.f); 4309 v_scale = vdupq_n_f32(255.f); 4310 v_alpha = vdup_n_u8(ColorChannel<uchar>::max()); 4311 #elif CV_SSE2 4312 v_scale_inv = _mm_set1_ps(1.f/255.f); 4313 v_scale = _mm_set1_ps(255.0f); 4314 v_zero = _mm_setzero_si128(); 4315 haveSIMD = checkHardwareSupport(CV_CPU_SSE2); 4316 #endif 4317 } 4318 4319 #if CV_SSE2 4320 // 16s x 8 4321 void process(__m128i v_r, __m128i v_g, __m128i v_b, 4322 float * buf) const 4323 { 4324 __m128 v_r0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(v_r, v_zero)); 4325 __m128 v_g0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(v_g, v_zero)); 4326 __m128 v_b0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(v_b, v_zero)); 4327 4328 __m128 v_r1 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(v_r, v_zero)); 4329 __m128 v_g1 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(v_g, v_zero)); 4330 __m128 v_b1 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(v_b, v_zero)); 4331 4332 v_g0 = _mm_mul_ps(v_g0, v_scale_inv); 4333 v_b0 = _mm_mul_ps(v_b0, v_scale_inv); 4334 4335 v_g1 = _mm_mul_ps(v_g1, v_scale_inv); 4336 v_b1 = _mm_mul_ps(v_b1, v_scale_inv); 4337 4338 _mm_interleave_ps(v_r0, v_r1, v_g0, v_g1, v_b0, v_b1); 4339 4340 _mm_store_ps(buf, v_r0); 4341 _mm_store_ps(buf + 4, v_r1); 4342 _mm_store_ps(buf + 8, v_g0); 4343 _mm_store_ps(buf + 12, v_g1); 4344 _mm_store_ps(buf + 16, v_b0); 4345 _mm_store_ps(buf + 20, v_b1); 4346 } 4347 #endif 4348 4349 void operator()(const uchar* src, uchar* dst, int n) const 4350 { 4351 int i, j, dcn = dstcn; 4352 uchar alpha = ColorChannel<uchar>::max(); 4353 float CV_DECL_ALIGNED(16) buf[3*BLOCK_SIZE]; 4354 4355 for( i = 0; i < n; i += BLOCK_SIZE, src += BLOCK_SIZE*3 ) 4356 { 4357 int dn = std::min(n - i, (int)BLOCK_SIZE); 4358 j = 0; 4359 4360 #if CV_NEON 4361 for ( ; j <= (dn - 8) * 3; j += 24) 4362 { 4363 uint8x8x3_t v_src = vld3_u8(src + j); 4364 uint16x8_t v_t0 = vmovl_u8(v_src.val[0]), 4365 v_t1 = vmovl_u8(v_src.val[1]), 4366 v_t2 = vmovl_u8(v_src.val[2]); 4367 4368 float32x4x3_t v_dst; 4369 v_dst.val[0] = vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t0))); 4370 v_dst.val[1] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t1))), v_scale_inv); 4371 v_dst.val[2] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t2))), v_scale_inv); 4372 vst3q_f32(buf + j, v_dst); 4373 4374 v_dst.val[0] = vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t0))); 4375 v_dst.val[1] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t1))), v_scale_inv); 4376 v_dst.val[2] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t2))), v_scale_inv); 4377 vst3q_f32(buf + j + 12, v_dst); 4378 } 4379 #elif CV_SSE2 4380 if (haveSIMD) 4381 { 4382 for ( ; j <= (dn - 32) * 3; j += 96) 4383 { 4384 __m128i v_r0 = _mm_loadu_si128((__m128i const *)(src + j)); 4385 __m128i v_r1 = _mm_loadu_si128((__m128i const *)(src + j + 16)); 4386 __m128i v_g0 = _mm_loadu_si128((__m128i const *)(src + j + 32)); 4387 __m128i v_g1 = _mm_loadu_si128((__m128i const *)(src + j + 48)); 4388 __m128i v_b0 = _mm_loadu_si128((__m128i const *)(src + j + 64)); 4389 __m128i v_b1 = _mm_loadu_si128((__m128i const *)(src + j + 80)); 4390 4391 _mm_deinterleave_epi8(v_r0, v_r1, v_g0, v_g1, v_b0, v_b1); 4392 4393 process(_mm_unpacklo_epi8(v_r0, v_zero), 4394 _mm_unpacklo_epi8(v_g0, v_zero), 4395 _mm_unpacklo_epi8(v_b0, v_zero), 4396 buf + j); 4397 4398 process(_mm_unpackhi_epi8(v_r0, v_zero), 4399 _mm_unpackhi_epi8(v_g0, v_zero), 4400 _mm_unpackhi_epi8(v_b0, v_zero), 4401 buf + j + 24); 4402 4403 process(_mm_unpacklo_epi8(v_r1, v_zero), 4404 _mm_unpacklo_epi8(v_g1, v_zero), 4405 _mm_unpacklo_epi8(v_b1, v_zero), 4406 buf + j + 48); 4407 4408 process(_mm_unpackhi_epi8(v_r1, v_zero), 4409 _mm_unpackhi_epi8(v_g1, v_zero), 4410 _mm_unpackhi_epi8(v_b1, v_zero), 4411 buf + j + 72); 4412 } 4413 } 4414 #endif 4415 4416 for( ; j < dn*3; j += 3 ) 4417 { 4418 buf[j] = src[j]; 4419 buf[j+1] = src[j+1]*(1.f/255.f); 4420 buf[j+2] = src[j+2]*(1.f/255.f); 4421 } 4422 cvt(buf, buf, dn); 4423 4424 j = 0; 4425 #if CV_NEON 4426 for ( ; j <= (dn - 8) * 3; j += 24, dst += dcn * 8) 4427 { 4428 float32x4x3_t v_src0 = vld3q_f32(buf + j), v_src1 = vld3q_f32(buf + j + 12); 4429 uint8x8_t v_dst0 = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[0], v_scale))), 4430 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[0], v_scale))))); 4431 uint8x8_t v_dst1 = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[1], v_scale))), 4432 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[1], v_scale))))); 4433 uint8x8_t v_dst2 = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[2], v_scale))), 4434 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[2], v_scale))))); 4435 4436 if (dcn == 4) 4437 { 4438 uint8x8x4_t v_dst; 4439 v_dst.val[0] = v_dst0; 4440 v_dst.val[1] = v_dst1; 4441 v_dst.val[2] = v_dst2; 4442 v_dst.val[3] = v_alpha; 4443 vst4_u8(dst, v_dst); 4444 } 4445 else 4446 { 4447 uint8x8x3_t v_dst; 4448 v_dst.val[0] = v_dst0; 4449 v_dst.val[1] = v_dst1; 4450 v_dst.val[2] = v_dst2; 4451 vst3_u8(dst, v_dst); 4452 } 4453 } 4454 #elif CV_SSE2 4455 if (dcn == 3 && haveSIMD) 4456 { 4457 for ( ; j <= (dn * 3 - 16); j += 16, dst += 16) 4458 { 4459 __m128 v_src0 = _mm_mul_ps(_mm_load_ps(buf + j), v_scale); 4460 __m128 v_src1 = _mm_mul_ps(_mm_load_ps(buf + j + 4), v_scale); 4461 __m128 v_src2 = _mm_mul_ps(_mm_load_ps(buf + j + 8), v_scale); 4462 __m128 v_src3 = _mm_mul_ps(_mm_load_ps(buf + j + 12), v_scale); 4463 4464 __m128i v_dst0 = _mm_packs_epi32(_mm_cvtps_epi32(v_src0), 4465 _mm_cvtps_epi32(v_src1)); 4466 __m128i v_dst1 = _mm_packs_epi32(_mm_cvtps_epi32(v_src2), 4467 _mm_cvtps_epi32(v_src3)); 4468 4469 _mm_storeu_si128((__m128i *)dst, _mm_packus_epi16(v_dst0, v_dst1)); 4470 } 4471 4472 int jr = j % 3; 4473 if (jr) 4474 dst -= jr, j -= jr; 4475 } 4476 #endif 4477 4478 for( ; j < dn*3; j += 3, dst += dcn ) 4479 { 4480 dst[0] = saturate_cast<uchar>(buf[j]*255.f); 4481 dst[1] = saturate_cast<uchar>(buf[j+1]*255.f); 4482 dst[2] = saturate_cast<uchar>(buf[j+2]*255.f); 4483 if( dcn == 4 ) 4484 dst[3] = alpha; 4485 } 4486 } 4487 } 4488 4489 int dstcn; 4490 HSV2RGB_f cvt; 4491 #if CV_NEON 4492 float32x4_t v_scale, v_scale_inv; 4493 uint8x8_t v_alpha; 4494 #elif CV_SSE2 4495 __m128 v_scale_inv, v_scale; 4496 __m128i v_zero; 4497 bool haveSIMD; 4498 #endif 4499 }; 4500 4501 4502 ///////////////////////////////////// RGB <-> HLS //////////////////////////////////////// 4503 4504 struct RGB2HLS_f 4505 { 4506 typedef float channel_type; 4507 4508 RGB2HLS_f(int _srccn, int _blueIdx, float _hrange) 4509 : srccn(_srccn), blueIdx(_blueIdx), hrange(_hrange) {} 4510 4511 void operator()(const float* src, float* dst, int n) const 4512 { 4513 int i, bidx = blueIdx, scn = srccn; 4514 float hscale = hrange*(1.f/360.f); 4515 n *= 3; 4516 4517 for( i = 0; i < n; i += 3, src += scn ) 4518 { 4519 float b = src[bidx], g = src[1], r = src[bidx^2]; 4520 float h = 0.f, s = 0.f, l; 4521 float vmin, vmax, diff; 4522 4523 vmax = vmin = r; 4524 if( vmax < g ) vmax = g; 4525 if( vmax < b ) vmax = b; 4526 if( vmin > g ) vmin = g; 4527 if( vmin > b ) vmin = b; 4528 4529 diff = vmax - vmin; 4530 l = (vmax + vmin)*0.5f; 4531 4532 if( diff > FLT_EPSILON ) 4533 { 4534 s = l < 0.5f ? diff/(vmax + vmin) : diff/(2 - vmax - vmin); 4535 diff = 60.f/diff; 4536 4537 if( vmax == r ) 4538 h = (g - b)*diff; 4539 else if( vmax == g ) 4540 h = (b - r)*diff + 120.f; 4541 else 4542 h = (r - g)*diff + 240.f; 4543 4544 if( h < 0.f ) h += 360.f; 4545 } 4546 4547 dst[i] = h*hscale; 4548 dst[i+1] = l; 4549 dst[i+2] = s; 4550 } 4551 } 4552 4553 int srccn, blueIdx; 4554 float hrange; 4555 }; 4556 4557 4558 struct RGB2HLS_b 4559 { 4560 typedef uchar channel_type; 4561 4562 RGB2HLS_b(int _srccn, int _blueIdx, int _hrange) 4563 : srccn(_srccn), cvt(3, _blueIdx, (float)_hrange) 4564 { 4565 #if CV_NEON 4566 v_scale_inv = vdupq_n_f32(1.f/255.f); 4567 v_scale = vdupq_n_f32(255.f); 4568 v_alpha = vdup_n_u8(ColorChannel<uchar>::max()); 4569 #elif CV_SSE2 4570 v_scale_inv = _mm_set1_ps(1.f/255.f); 4571 v_scale = _mm_set1_ps(255.f); 4572 v_zero = _mm_setzero_si128(); 4573 haveSIMD = checkHardwareSupport(CV_CPU_SSE2); 4574 #endif 4575 } 4576 4577 #if CV_SSE2 4578 void process(const float * buf, 4579 __m128i & v_h, __m128i & v_l, __m128i & v_s) const 4580 { 4581 __m128 v_h0f = _mm_load_ps(buf); 4582 __m128 v_h1f = _mm_load_ps(buf + 4); 4583 __m128 v_l0f = _mm_load_ps(buf + 8); 4584 __m128 v_l1f = _mm_load_ps(buf + 12); 4585 __m128 v_s0f = _mm_load_ps(buf + 16); 4586 __m128 v_s1f = _mm_load_ps(buf + 20); 4587 4588 _mm_deinterleave_ps(v_h0f, v_h1f, v_l0f, v_l1f, v_s0f, v_s1f); 4589 4590 v_l0f = _mm_mul_ps(v_l0f, v_scale); 4591 v_l1f = _mm_mul_ps(v_l1f, v_scale); 4592 v_s0f = _mm_mul_ps(v_s0f, v_scale); 4593 v_s1f = _mm_mul_ps(v_s1f, v_scale); 4594 4595 v_h = _mm_packs_epi32(_mm_cvtps_epi32(v_h0f), _mm_cvtps_epi32(v_h1f)); 4596 v_l = _mm_packs_epi32(_mm_cvtps_epi32(v_l0f), _mm_cvtps_epi32(v_l1f)); 4597 v_s = _mm_packs_epi32(_mm_cvtps_epi32(v_s0f), _mm_cvtps_epi32(v_s1f)); 4598 } 4599 #endif 4600 4601 void operator()(const uchar* src, uchar* dst, int n) const 4602 { 4603 int i, j, scn = srccn; 4604 float CV_DECL_ALIGNED(16) buf[3*BLOCK_SIZE]; 4605 4606 for( i = 0; i < n; i += BLOCK_SIZE, dst += BLOCK_SIZE*3 ) 4607 { 4608 int dn = std::min(n - i, (int)BLOCK_SIZE); 4609 j = 0; 4610 4611 #if CV_NEON 4612 for ( ; j <= (dn - 8) * 3; j += 24, src += 8 * scn) 4613 { 4614 uint16x8_t v_t0, v_t1, v_t2; 4615 4616 if (scn == 3) 4617 { 4618 uint8x8x3_t v_src = vld3_u8(src); 4619 v_t0 = vmovl_u8(v_src.val[0]); 4620 v_t1 = vmovl_u8(v_src.val[1]); 4621 v_t2 = vmovl_u8(v_src.val[2]); 4622 } 4623 else 4624 { 4625 uint8x8x4_t v_src = vld4_u8(src); 4626 v_t0 = vmovl_u8(v_src.val[0]); 4627 v_t1 = vmovl_u8(v_src.val[1]); 4628 v_t2 = vmovl_u8(v_src.val[2]); 4629 } 4630 4631 float32x4x3_t v_dst; 4632 v_dst.val[0] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t0))), v_scale_inv); 4633 v_dst.val[1] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t1))), v_scale_inv); 4634 v_dst.val[2] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t2))), v_scale_inv); 4635 vst3q_f32(buf + j, v_dst); 4636 4637 v_dst.val[0] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t0))), v_scale_inv); 4638 v_dst.val[1] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t1))), v_scale_inv); 4639 v_dst.val[2] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t2))), v_scale_inv); 4640 vst3q_f32(buf + j + 12, v_dst); 4641 } 4642 #elif CV_SSE2 4643 if (scn == 3 && haveSIMD) 4644 { 4645 for ( ; j <= (dn * 3 - 16); j += 16, src += 16) 4646 { 4647 __m128i v_src = _mm_loadu_si128((__m128i const *)src); 4648 4649 __m128i v_src_p = _mm_unpacklo_epi8(v_src, v_zero); 4650 _mm_store_ps(buf + j, _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi16(v_src_p, v_zero)), v_scale_inv)); 4651 _mm_store_ps(buf + j + 4, _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi16(v_src_p, v_zero)), v_scale_inv)); 4652 4653 v_src_p = _mm_unpackhi_epi8(v_src, v_zero); 4654 _mm_store_ps(buf + j + 8, _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi16(v_src_p, v_zero)), v_scale_inv)); 4655 _mm_store_ps(buf + j + 12, _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi16(v_src_p, v_zero)), v_scale_inv)); 4656 } 4657 4658 int jr = j % 3; 4659 if (jr) 4660 src -= jr, j -= jr; 4661 } 4662 #endif 4663 for( ; j < dn*3; j += 3, src += scn ) 4664 { 4665 buf[j] = src[0]*(1.f/255.f); 4666 buf[j+1] = src[1]*(1.f/255.f); 4667 buf[j+2] = src[2]*(1.f/255.f); 4668 } 4669 cvt(buf, buf, dn); 4670 4671 j = 0; 4672 #if CV_NEON 4673 for ( ; j <= (dn - 8) * 3; j += 24) 4674 { 4675 float32x4x3_t v_src0 = vld3q_f32(buf + j), v_src1 = vld3q_f32(buf + j + 12); 4676 4677 uint8x8x3_t v_dst; 4678 v_dst.val[0] = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(v_src0.val[0])), 4679 vqmovn_u32(cv_vrndq_u32_f32(v_src1.val[0])))); 4680 v_dst.val[1] = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[1], v_scale))), 4681 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[1], v_scale))))); 4682 v_dst.val[2] = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[2], v_scale))), 4683 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[2], v_scale))))); 4684 vst3_u8(dst + j, v_dst); 4685 } 4686 #elif CV_SSE2 4687 if (haveSIMD) 4688 { 4689 for ( ; j <= (dn - 32) * 3; j += 96) 4690 { 4691 __m128i v_h_0, v_l_0, v_s_0; 4692 process(buf + j, 4693 v_h_0, v_l_0, v_s_0); 4694 4695 __m128i v_h_1, v_l_1, v_s_1; 4696 process(buf + j + 24, 4697 v_h_1, v_l_1, v_s_1); 4698 4699 __m128i v_h0 = _mm_packus_epi16(v_h_0, v_h_1); 4700 __m128i v_l0 = _mm_packus_epi16(v_l_0, v_l_1); 4701 __m128i v_s0 = _mm_packus_epi16(v_s_0, v_s_1); 4702 4703 process(buf + j + 48, 4704 v_h_0, v_l_0, v_s_0); 4705 4706 process(buf + j + 72, 4707 v_h_1, v_l_1, v_s_1); 4708 4709 __m128i v_h1 = _mm_packus_epi16(v_h_0, v_h_1); 4710 __m128i v_l1 = _mm_packus_epi16(v_l_0, v_l_1); 4711 __m128i v_s1 = _mm_packus_epi16(v_s_0, v_s_1); 4712 4713 _mm_interleave_epi8(v_h0, v_h1, v_l0, v_l1, v_s0, v_s1); 4714 4715 _mm_storeu_si128((__m128i *)(dst + j), v_h0); 4716 _mm_storeu_si128((__m128i *)(dst + j + 16), v_h1); 4717 _mm_storeu_si128((__m128i *)(dst + j + 32), v_l0); 4718 _mm_storeu_si128((__m128i *)(dst + j + 48), v_l1); 4719 _mm_storeu_si128((__m128i *)(dst + j + 64), v_s0); 4720 _mm_storeu_si128((__m128i *)(dst + j + 80), v_s1); 4721 } 4722 } 4723 #endif 4724 for( ; j < dn*3; j += 3 ) 4725 { 4726 dst[j] = saturate_cast<uchar>(buf[j]); 4727 dst[j+1] = saturate_cast<uchar>(buf[j+1]*255.f); 4728 dst[j+2] = saturate_cast<uchar>(buf[j+2]*255.f); 4729 } 4730 } 4731 } 4732 4733 int srccn; 4734 RGB2HLS_f cvt; 4735 #if CV_NEON 4736 float32x4_t v_scale, v_scale_inv; 4737 uint8x8_t v_alpha; 4738 #elif CV_SSE2 4739 __m128 v_scale, v_scale_inv; 4740 __m128i v_zero; 4741 bool haveSIMD; 4742 #endif 4743 }; 4744 4745 4746 struct HLS2RGB_f 4747 { 4748 typedef float channel_type; 4749 4750 HLS2RGB_f(int _dstcn, int _blueIdx, float _hrange) 4751 : dstcn(_dstcn), blueIdx(_blueIdx), hscale(6.f/_hrange) {} 4752 4753 void operator()(const float* src, float* dst, int n) const 4754 { 4755 int i, bidx = blueIdx, dcn = dstcn; 4756 float _hscale = hscale; 4757 float alpha = ColorChannel<float>::max(); 4758 n *= 3; 4759 4760 for( i = 0; i < n; i += 3, dst += dcn ) 4761 { 4762 float h = src[i], l = src[i+1], s = src[i+2]; 4763 float b, g, r; 4764 4765 if( s == 0 ) 4766 b = g = r = l; 4767 else 4768 { 4769 static const int sector_data[][3]= 4770 {{1,3,0}, {1,0,2}, {3,0,1}, {0,2,1}, {0,1,3}, {2,1,0}}; 4771 float tab[4]; 4772 int sector; 4773 4774 float p2 = l <= 0.5f ? l*(1 + s) : l + s - l*s; 4775 float p1 = 2*l - p2; 4776 4777 h *= _hscale; 4778 if( h < 0 ) 4779 do h += 6; while( h < 0 ); 4780 else if( h >= 6 ) 4781 do h -= 6; while( h >= 6 ); 4782 4783 assert( 0 <= h && h < 6 ); 4784 sector = cvFloor(h); 4785 h -= sector; 4786 4787 tab[0] = p2; 4788 tab[1] = p1; 4789 tab[2] = p1 + (p2 - p1)*(1-h); 4790 tab[3] = p1 + (p2 - p1)*h; 4791 4792 b = tab[sector_data[sector][0]]; 4793 g = tab[sector_data[sector][1]]; 4794 r = tab[sector_data[sector][2]]; 4795 } 4796 4797 dst[bidx] = b; 4798 dst[1] = g; 4799 dst[bidx^2] = r; 4800 if( dcn == 4 ) 4801 dst[3] = alpha; 4802 } 4803 } 4804 4805 int dstcn, blueIdx; 4806 float hscale; 4807 }; 4808 4809 4810 struct HLS2RGB_b 4811 { 4812 typedef uchar channel_type; 4813 4814 HLS2RGB_b(int _dstcn, int _blueIdx, int _hrange) 4815 : dstcn(_dstcn), cvt(3, _blueIdx, (float)_hrange) 4816 { 4817 #if CV_NEON 4818 v_scale_inv = vdupq_n_f32(1.f/255.f); 4819 v_scale = vdupq_n_f32(255.f); 4820 v_alpha = vdup_n_u8(ColorChannel<uchar>::max()); 4821 #elif CV_SSE2 4822 v_scale_inv = _mm_set1_ps(1.f/255.f); 4823 v_scale = _mm_set1_ps(255.f); 4824 v_zero = _mm_setzero_si128(); 4825 haveSIMD = checkHardwareSupport(CV_CPU_SSE2); 4826 #endif 4827 } 4828 4829 #if CV_SSE2 4830 // 16s x 8 4831 void process(__m128i v_r, __m128i v_g, __m128i v_b, 4832 float * buf) const 4833 { 4834 __m128 v_r0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(v_r, v_zero)); 4835 __m128 v_g0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(v_g, v_zero)); 4836 __m128 v_b0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(v_b, v_zero)); 4837 4838 __m128 v_r1 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(v_r, v_zero)); 4839 __m128 v_g1 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(v_g, v_zero)); 4840 __m128 v_b1 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(v_b, v_zero)); 4841 4842 v_g0 = _mm_mul_ps(v_g0, v_scale_inv); 4843 v_b0 = _mm_mul_ps(v_b0, v_scale_inv); 4844 4845 v_g1 = _mm_mul_ps(v_g1, v_scale_inv); 4846 v_b1 = _mm_mul_ps(v_b1, v_scale_inv); 4847 4848 _mm_interleave_ps(v_r0, v_r1, v_g0, v_g1, v_b0, v_b1); 4849 4850 _mm_store_ps(buf, v_r0); 4851 _mm_store_ps(buf + 4, v_r1); 4852 _mm_store_ps(buf + 8, v_g0); 4853 _mm_store_ps(buf + 12, v_g1); 4854 _mm_store_ps(buf + 16, v_b0); 4855 _mm_store_ps(buf + 20, v_b1); 4856 } 4857 #endif 4858 4859 void operator()(const uchar* src, uchar* dst, int n) const 4860 { 4861 int i, j, dcn = dstcn; 4862 uchar alpha = ColorChannel<uchar>::max(); 4863 float CV_DECL_ALIGNED(16) buf[3*BLOCK_SIZE]; 4864 4865 for( i = 0; i < n; i += BLOCK_SIZE, src += BLOCK_SIZE*3 ) 4866 { 4867 int dn = std::min(n - i, (int)BLOCK_SIZE); 4868 j = 0; 4869 4870 #if CV_NEON 4871 for ( ; j <= (dn - 8) * 3; j += 24) 4872 { 4873 uint8x8x3_t v_src = vld3_u8(src + j); 4874 uint16x8_t v_t0 = vmovl_u8(v_src.val[0]), 4875 v_t1 = vmovl_u8(v_src.val[1]), 4876 v_t2 = vmovl_u8(v_src.val[2]); 4877 4878 float32x4x3_t v_dst; 4879 v_dst.val[0] = vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t0))); 4880 v_dst.val[1] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t1))), v_scale_inv); 4881 v_dst.val[2] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t2))), v_scale_inv); 4882 vst3q_f32(buf + j, v_dst); 4883 4884 v_dst.val[0] = vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t0))); 4885 v_dst.val[1] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t1))), v_scale_inv); 4886 v_dst.val[2] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t2))), v_scale_inv); 4887 vst3q_f32(buf + j + 12, v_dst); 4888 } 4889 #elif CV_SSE2 4890 if (haveSIMD) 4891 { 4892 for ( ; j <= (dn - 32) * 3; j += 96) 4893 { 4894 __m128i v_r0 = _mm_loadu_si128((__m128i const *)(src + j)); 4895 __m128i v_r1 = _mm_loadu_si128((__m128i const *)(src + j + 16)); 4896 __m128i v_g0 = _mm_loadu_si128((__m128i const *)(src + j + 32)); 4897 __m128i v_g1 = _mm_loadu_si128((__m128i const *)(src + j + 48)); 4898 __m128i v_b0 = _mm_loadu_si128((__m128i const *)(src + j + 64)); 4899 __m128i v_b1 = _mm_loadu_si128((__m128i const *)(src + j + 80)); 4900 4901 _mm_deinterleave_epi8(v_r0, v_r1, v_g0, v_g1, v_b0, v_b1); 4902 4903 process(_mm_unpacklo_epi8(v_r0, v_zero), 4904 _mm_unpacklo_epi8(v_g0, v_zero), 4905 _mm_unpacklo_epi8(v_b0, v_zero), 4906 buf + j); 4907 4908 process(_mm_unpackhi_epi8(v_r0, v_zero), 4909 _mm_unpackhi_epi8(v_g0, v_zero), 4910 _mm_unpackhi_epi8(v_b0, v_zero), 4911 buf + j + 24); 4912 4913 process(_mm_unpacklo_epi8(v_r1, v_zero), 4914 _mm_unpacklo_epi8(v_g1, v_zero), 4915 _mm_unpacklo_epi8(v_b1, v_zero), 4916 buf + j + 48); 4917 4918 process(_mm_unpackhi_epi8(v_r1, v_zero), 4919 _mm_unpackhi_epi8(v_g1, v_zero), 4920 _mm_unpackhi_epi8(v_b1, v_zero), 4921 buf + j + 72); 4922 } 4923 } 4924 #endif 4925 for( ; j < dn*3; j += 3 ) 4926 { 4927 buf[j] = src[j]; 4928 buf[j+1] = src[j+1]*(1.f/255.f); 4929 buf[j+2] = src[j+2]*(1.f/255.f); 4930 } 4931 cvt(buf, buf, dn); 4932 4933 j = 0; 4934 #if CV_NEON 4935 for ( ; j <= (dn - 8) * 3; j += 24, dst += dcn * 8) 4936 { 4937 float32x4x3_t v_src0 = vld3q_f32(buf + j), v_src1 = vld3q_f32(buf + j + 12); 4938 uint8x8_t v_dst0 = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[0], v_scale))), 4939 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[0], v_scale))))); 4940 uint8x8_t v_dst1 = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[1], v_scale))), 4941 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[1], v_scale))))); 4942 uint8x8_t v_dst2 = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[2], v_scale))), 4943 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[2], v_scale))))); 4944 4945 if (dcn == 4) 4946 { 4947 uint8x8x4_t v_dst; 4948 v_dst.val[0] = v_dst0; 4949 v_dst.val[1] = v_dst1; 4950 v_dst.val[2] = v_dst2; 4951 v_dst.val[3] = v_alpha; 4952 vst4_u8(dst, v_dst); 4953 } 4954 else 4955 { 4956 uint8x8x3_t v_dst; 4957 v_dst.val[0] = v_dst0; 4958 v_dst.val[1] = v_dst1; 4959 v_dst.val[2] = v_dst2; 4960 vst3_u8(dst, v_dst); 4961 } 4962 } 4963 #elif CV_SSE2 4964 if (dcn == 3 && haveSIMD) 4965 { 4966 for ( ; j <= (dn * 3 - 16); j += 16, dst += 16) 4967 { 4968 __m128 v_src0 = _mm_mul_ps(_mm_load_ps(buf + j), v_scale); 4969 __m128 v_src1 = _mm_mul_ps(_mm_load_ps(buf + j + 4), v_scale); 4970 __m128 v_src2 = _mm_mul_ps(_mm_load_ps(buf + j + 8), v_scale); 4971 __m128 v_src3 = _mm_mul_ps(_mm_load_ps(buf + j + 12), v_scale); 4972 4973 __m128i v_dst0 = _mm_packs_epi32(_mm_cvtps_epi32(v_src0), 4974 _mm_cvtps_epi32(v_src1)); 4975 __m128i v_dst1 = _mm_packs_epi32(_mm_cvtps_epi32(v_src2), 4976 _mm_cvtps_epi32(v_src3)); 4977 4978 _mm_storeu_si128((__m128i *)dst, _mm_packus_epi16(v_dst0, v_dst1)); 4979 } 4980 4981 int jr = j % 3; 4982 if (jr) 4983 dst -= jr, j -= jr; 4984 } 4985 #endif 4986 4987 for( ; j < dn*3; j += 3, dst += dcn ) 4988 { 4989 dst[0] = saturate_cast<uchar>(buf[j]*255.f); 4990 dst[1] = saturate_cast<uchar>(buf[j+1]*255.f); 4991 dst[2] = saturate_cast<uchar>(buf[j+2]*255.f); 4992 if( dcn == 4 ) 4993 dst[3] = alpha; 4994 } 4995 } 4996 } 4997 4998 int dstcn; 4999 HLS2RGB_f cvt; 5000 #if CV_NEON 5001 float32x4_t v_scale, v_scale_inv; 5002 uint8x8_t v_alpha; 5003 #elif CV_SSE2 5004 __m128 v_scale, v_scale_inv; 5005 __m128i v_zero; 5006 bool haveSIMD; 5007 #endif 5008 }; 5009 5010 5011 ///////////////////////////////////// RGB <-> L*a*b* ///////////////////////////////////// 5012 5013 static const float D65[] = { 0.950456f, 1.f, 1.088754f }; 5014 5015 enum { LAB_CBRT_TAB_SIZE = 1024, GAMMA_TAB_SIZE = 1024 }; 5016 static float LabCbrtTab[LAB_CBRT_TAB_SIZE*4]; 5017 static const float LabCbrtTabScale = LAB_CBRT_TAB_SIZE/1.5f; 5018 5019 static float sRGBGammaTab[GAMMA_TAB_SIZE*4], sRGBInvGammaTab[GAMMA_TAB_SIZE*4]; 5020 static const float GammaTabScale = (float)GAMMA_TAB_SIZE; 5021 5022 static ushort sRGBGammaTab_b[256], linearGammaTab_b[256]; 5023 #undef lab_shift 5024 #define lab_shift xyz_shift 5025 #define gamma_shift 3 5026 #define lab_shift2 (lab_shift + gamma_shift) 5027 #define LAB_CBRT_TAB_SIZE_B (256*3/2*(1<<gamma_shift)) 5028 static ushort LabCbrtTab_b[LAB_CBRT_TAB_SIZE_B]; 5029 5030 static void initLabTabs() 5031 { 5032 static bool initialized = false; 5033 if(!initialized) 5034 { 5035 float f[LAB_CBRT_TAB_SIZE+1], g[GAMMA_TAB_SIZE+1], ig[GAMMA_TAB_SIZE+1], scale = 1.f/LabCbrtTabScale; 5036 int i; 5037 for(i = 0; i <= LAB_CBRT_TAB_SIZE; i++) 5038 { 5039 float x = i*scale; 5040 f[i] = x < 0.008856f ? x*7.787f + 0.13793103448275862f : cvCbrt(x); 5041 } 5042 splineBuild(f, LAB_CBRT_TAB_SIZE, LabCbrtTab); 5043 5044 scale = 1.f/GammaTabScale; 5045 for(i = 0; i <= GAMMA_TAB_SIZE; i++) 5046 { 5047 float x = i*scale; 5048 g[i] = x <= 0.04045f ? x*(1.f/12.92f) : (float)std::pow((double)(x + 0.055)*(1./1.055), 2.4); 5049 ig[i] = x <= 0.0031308 ? x*12.92f : (float)(1.055*std::pow((double)x, 1./2.4) - 0.055); 5050 } 5051 splineBuild(g, GAMMA_TAB_SIZE, sRGBGammaTab); 5052 splineBuild(ig, GAMMA_TAB_SIZE, sRGBInvGammaTab); 5053 5054 for(i = 0; i < 256; i++) 5055 { 5056 float x = i*(1.f/255.f); 5057 sRGBGammaTab_b[i] = saturate_cast<ushort>(255.f*(1 << gamma_shift)*(x <= 0.04045f ? x*(1.f/12.92f) : (float)std::pow((double)(x + 0.055)*(1./1.055), 2.4))); 5058 linearGammaTab_b[i] = (ushort)(i*(1 << gamma_shift)); 5059 } 5060 5061 for(i = 0; i < LAB_CBRT_TAB_SIZE_B; i++) 5062 { 5063 float x = i*(1.f/(255.f*(1 << gamma_shift))); 5064 LabCbrtTab_b[i] = saturate_cast<ushort>((1 << lab_shift2)*(x < 0.008856f ? x*7.787f + 0.13793103448275862f : cvCbrt(x))); 5065 } 5066 initialized = true; 5067 } 5068 } 5069 5070 struct RGB2Lab_b 5071 { 5072 typedef uchar channel_type; 5073 5074 RGB2Lab_b(int _srccn, int blueIdx, const float* _coeffs, 5075 const float* _whitept, bool _srgb) 5076 : srccn(_srccn), srgb(_srgb) 5077 { 5078 static volatile int _3 = 3; 5079 initLabTabs(); 5080 5081 if (!_coeffs) 5082 _coeffs = sRGB2XYZ_D65; 5083 if (!_whitept) 5084 _whitept = D65; 5085 5086 float scale[] = 5087 { 5088 (1 << lab_shift)/_whitept[0], 5089 (float)(1 << lab_shift), 5090 (1 << lab_shift)/_whitept[2] 5091 }; 5092 5093 for( int i = 0; i < _3; i++ ) 5094 { 5095 coeffs[i*3+(blueIdx^2)] = cvRound(_coeffs[i*3]*scale[i]); 5096 coeffs[i*3+1] = cvRound(_coeffs[i*3+1]*scale[i]); 5097 coeffs[i*3+blueIdx] = cvRound(_coeffs[i*3+2]*scale[i]); 5098 5099 CV_Assert( coeffs[i] >= 0 && coeffs[i*3+1] >= 0 && coeffs[i*3+2] >= 0 && 5100 coeffs[i*3] + coeffs[i*3+1] + coeffs[i*3+2] < 2*(1 << lab_shift) ); 5101 } 5102 } 5103 5104 void operator()(const uchar* src, uchar* dst, int n) const 5105 { 5106 const int Lscale = (116*255+50)/100; 5107 const int Lshift = -((16*255*(1 << lab_shift2) + 50)/100); 5108 const ushort* tab = srgb ? sRGBGammaTab_b : linearGammaTab_b; 5109 int i, scn = srccn; 5110 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], 5111 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5], 5112 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8]; 5113 n *= 3; 5114 5115 for( i = 0; i < n; i += 3, src += scn ) 5116 { 5117 int R = tab[src[0]], G = tab[src[1]], B = tab[src[2]]; 5118 int fX = LabCbrtTab_b[CV_DESCALE(R*C0 + G*C1 + B*C2, lab_shift)]; 5119 int fY = LabCbrtTab_b[CV_DESCALE(R*C3 + G*C4 + B*C5, lab_shift)]; 5120 int fZ = LabCbrtTab_b[CV_DESCALE(R*C6 + G*C7 + B*C8, lab_shift)]; 5121 5122 int L = CV_DESCALE( Lscale*fY + Lshift, lab_shift2 ); 5123 int a = CV_DESCALE( 500*(fX - fY) + 128*(1 << lab_shift2), lab_shift2 ); 5124 int b = CV_DESCALE( 200*(fY - fZ) + 128*(1 << lab_shift2), lab_shift2 ); 5125 5126 dst[i] = saturate_cast<uchar>(L); 5127 dst[i+1] = saturate_cast<uchar>(a); 5128 dst[i+2] = saturate_cast<uchar>(b); 5129 } 5130 } 5131 5132 int srccn; 5133 int coeffs[9]; 5134 bool srgb; 5135 }; 5136 5137 5138 #define clip(value) \ 5139 value < 0.0f ? 0.0f : value > 1.0f ? 1.0f : value; 5140 5141 struct RGB2Lab_f 5142 { 5143 typedef float channel_type; 5144 5145 RGB2Lab_f(int _srccn, int blueIdx, const float* _coeffs, 5146 const float* _whitept, bool _srgb) 5147 : srccn(_srccn), srgb(_srgb) 5148 { 5149 volatile int _3 = 3; 5150 initLabTabs(); 5151 5152 if (!_coeffs) 5153 _coeffs = sRGB2XYZ_D65; 5154 if (!_whitept) 5155 _whitept = D65; 5156 5157 float scale[] = { 1.0f / _whitept[0], 1.0f, 1.0f / _whitept[2] }; 5158 5159 for( int i = 0; i < _3; i++ ) 5160 { 5161 int j = i * 3; 5162 coeffs[j + (blueIdx ^ 2)] = _coeffs[j] * scale[i]; 5163 coeffs[j + 1] = _coeffs[j + 1] * scale[i]; 5164 coeffs[j + blueIdx] = _coeffs[j + 2] * scale[i]; 5165 5166 CV_Assert( coeffs[j] >= 0 && coeffs[j + 1] >= 0 && coeffs[j + 2] >= 0 && 5167 coeffs[j] + coeffs[j + 1] + coeffs[j + 2] < 1.5f*LabCbrtTabScale ); 5168 } 5169 } 5170 5171 void operator()(const float* src, float* dst, int n) const 5172 { 5173 int i, scn = srccn; 5174 float gscale = GammaTabScale; 5175 const float* gammaTab = srgb ? sRGBGammaTab : 0; 5176 float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], 5177 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5], 5178 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8]; 5179 n *= 3; 5180 5181 static const float _1_3 = 1.0f / 3.0f; 5182 static const float _a = 16.0f / 116.0f; 5183 for (i = 0; i < n; i += 3, src += scn ) 5184 { 5185 float R = clip(src[0]); 5186 float G = clip(src[1]); 5187 float B = clip(src[2]); 5188 5189 if (gammaTab) 5190 { 5191 R = splineInterpolate(R * gscale, gammaTab, GAMMA_TAB_SIZE); 5192 G = splineInterpolate(G * gscale, gammaTab, GAMMA_TAB_SIZE); 5193 B = splineInterpolate(B * gscale, gammaTab, GAMMA_TAB_SIZE); 5194 } 5195 float X = R*C0 + G*C1 + B*C2; 5196 float Y = R*C3 + G*C4 + B*C5; 5197 float Z = R*C6 + G*C7 + B*C8; 5198 5199 float FX = X > 0.008856f ? std::pow(X, _1_3) : (7.787f * X + _a); 5200 float FY = Y > 0.008856f ? std::pow(Y, _1_3) : (7.787f * Y + _a); 5201 float FZ = Z > 0.008856f ? std::pow(Z, _1_3) : (7.787f * Z + _a); 5202 5203 float L = Y > 0.008856f ? (116.f * FY - 16.f) : (903.3f * Y); 5204 float a = 500.f * (FX - FY); 5205 float b = 200.f * (FY - FZ); 5206 5207 dst[i] = L; 5208 dst[i + 1] = a; 5209 dst[i + 2] = b; 5210 } 5211 } 5212 5213 int srccn; 5214 float coeffs[9]; 5215 bool srgb; 5216 }; 5217 5218 struct Lab2RGB_f 5219 { 5220 typedef float channel_type; 5221 5222 Lab2RGB_f( int _dstcn, int blueIdx, const float* _coeffs, 5223 const float* _whitept, bool _srgb ) 5224 : dstcn(_dstcn), srgb(_srgb) 5225 { 5226 initLabTabs(); 5227 5228 if(!_coeffs) 5229 _coeffs = XYZ2sRGB_D65; 5230 if(!_whitept) 5231 _whitept = D65; 5232 5233 for( int i = 0; i < 3; i++ ) 5234 { 5235 coeffs[i+(blueIdx^2)*3] = _coeffs[i]*_whitept[i]; 5236 coeffs[i+3] = _coeffs[i+3]*_whitept[i]; 5237 coeffs[i+blueIdx*3] = _coeffs[i+6]*_whitept[i]; 5238 } 5239 } 5240 5241 void operator()(const float* src, float* dst, int n) const 5242 { 5243 int i, dcn = dstcn; 5244 const float* gammaTab = srgb ? sRGBInvGammaTab : 0; 5245 float gscale = GammaTabScale; 5246 float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], 5247 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5], 5248 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8]; 5249 float alpha = ColorChannel<float>::max(); 5250 n *= 3; 5251 5252 static const float lThresh = 0.008856f * 903.3f; 5253 static const float fThresh = 7.787f * 0.008856f + 16.0f / 116.0f; 5254 for (i = 0; i < n; i += 3, dst += dcn) 5255 { 5256 float li = src[i]; 5257 float ai = src[i + 1]; 5258 float bi = src[i + 2]; 5259 5260 float y, fy; 5261 if (li <= lThresh) 5262 { 5263 y = li / 903.3f; 5264 fy = 7.787f * y + 16.0f / 116.0f; 5265 } 5266 else 5267 { 5268 fy = (li + 16.0f) / 116.0f; 5269 y = fy * fy * fy; 5270 } 5271 5272 float fxz[] = { ai / 500.0f + fy, fy - bi / 200.0f }; 5273 5274 for (int j = 0; j < 2; j++) 5275 if (fxz[j] <= fThresh) 5276 fxz[j] = (fxz[j] - 16.0f / 116.0f) / 7.787f; 5277 else 5278 fxz[j] = fxz[j] * fxz[j] * fxz[j]; 5279 5280 5281 float x = fxz[0], z = fxz[1]; 5282 float ro = C0 * x + C1 * y + C2 * z; 5283 float go = C3 * x + C4 * y + C5 * z; 5284 float bo = C6 * x + C7 * y + C8 * z; 5285 ro = clip(ro); 5286 go = clip(go); 5287 bo = clip(bo); 5288 5289 if (gammaTab) 5290 { 5291 ro = splineInterpolate(ro * gscale, gammaTab, GAMMA_TAB_SIZE); 5292 go = splineInterpolate(go * gscale, gammaTab, GAMMA_TAB_SIZE); 5293 bo = splineInterpolate(bo * gscale, gammaTab, GAMMA_TAB_SIZE); 5294 } 5295 5296 dst[0] = ro, dst[1] = go, dst[2] = bo; 5297 if( dcn == 4 ) 5298 dst[3] = alpha; 5299 } 5300 } 5301 5302 int dstcn; 5303 float coeffs[9]; 5304 bool srgb; 5305 }; 5306 5307 #undef clip 5308 5309 struct Lab2RGB_b 5310 { 5311 typedef uchar channel_type; 5312 5313 Lab2RGB_b( int _dstcn, int blueIdx, const float* _coeffs, 5314 const float* _whitept, bool _srgb ) 5315 : dstcn(_dstcn), cvt(3, blueIdx, _coeffs, _whitept, _srgb ) 5316 { 5317 #if CV_NEON 5318 v_scale_inv = vdupq_n_f32(100.f/255.f); 5319 v_scale = vdupq_n_f32(255.f); 5320 v_alpha = vdup_n_u8(ColorChannel<uchar>::max()); 5321 v_128 = vdupq_n_f32(128.0f); 5322 #elif CV_SSE2 5323 v_scale_inv = _mm_set1_ps(100.f/255.f); 5324 v_scale = _mm_set1_ps(255.f); 5325 v_128 = _mm_set1_ps(128.0f); 5326 v_zero = _mm_setzero_si128(); 5327 haveSIMD = checkHardwareSupport(CV_CPU_SSE2); 5328 #endif 5329 } 5330 5331 #if CV_SSE2 5332 // 16s x 8 5333 void process(__m128i v_r, __m128i v_g, __m128i v_b, 5334 float * buf) const 5335 { 5336 __m128 v_r0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(v_r, v_zero)); 5337 __m128 v_g0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(v_g, v_zero)); 5338 __m128 v_b0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(v_b, v_zero)); 5339 5340 __m128 v_r1 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(v_r, v_zero)); 5341 __m128 v_g1 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(v_g, v_zero)); 5342 __m128 v_b1 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(v_b, v_zero)); 5343 5344 v_r0 = _mm_mul_ps(v_r0, v_scale_inv); 5345 v_r1 = _mm_mul_ps(v_r1, v_scale_inv); 5346 5347 v_g0 = _mm_sub_ps(v_g0, v_128); 5348 v_g1 = _mm_sub_ps(v_g1, v_128); 5349 v_b0 = _mm_sub_ps(v_b0, v_128); 5350 v_b1 = _mm_sub_ps(v_b1, v_128); 5351 5352 _mm_interleave_ps(v_r0, v_r1, v_g0, v_g1, v_b0, v_b1); 5353 5354 _mm_store_ps(buf, v_r0); 5355 _mm_store_ps(buf + 4, v_r1); 5356 _mm_store_ps(buf + 8, v_g0); 5357 _mm_store_ps(buf + 12, v_g1); 5358 _mm_store_ps(buf + 16, v_b0); 5359 _mm_store_ps(buf + 20, v_b1); 5360 } 5361 #endif 5362 5363 void operator()(const uchar* src, uchar* dst, int n) const 5364 { 5365 int i, j, dcn = dstcn; 5366 uchar alpha = ColorChannel<uchar>::max(); 5367 float CV_DECL_ALIGNED(16) buf[3*BLOCK_SIZE]; 5368 5369 for( i = 0; i < n; i += BLOCK_SIZE, src += BLOCK_SIZE*3 ) 5370 { 5371 int dn = std::min(n - i, (int)BLOCK_SIZE); 5372 j = 0; 5373 5374 #if CV_NEON 5375 for ( ; j <= (dn - 8) * 3; j += 24) 5376 { 5377 uint8x8x3_t v_src = vld3_u8(src + j); 5378 uint16x8_t v_t0 = vmovl_u8(v_src.val[0]), 5379 v_t1 = vmovl_u8(v_src.val[1]), 5380 v_t2 = vmovl_u8(v_src.val[2]); 5381 5382 float32x4x3_t v_dst; 5383 v_dst.val[0] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t0))), v_scale_inv); 5384 v_dst.val[1] = vsubq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t1))), v_128); 5385 v_dst.val[2] = vsubq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t2))), v_128); 5386 vst3q_f32(buf + j, v_dst); 5387 5388 v_dst.val[0] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t0))), v_scale_inv); 5389 v_dst.val[1] = vsubq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t1))), v_128); 5390 v_dst.val[2] = vsubq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t2))), v_128); 5391 vst3q_f32(buf + j + 12, v_dst); 5392 } 5393 #elif CV_SSE2 5394 if (haveSIMD) 5395 { 5396 for ( ; j <= (dn - 32) * 3; j += 96) 5397 { 5398 __m128i v_r0 = _mm_loadu_si128((__m128i const *)(src + j)); 5399 __m128i v_r1 = _mm_loadu_si128((__m128i const *)(src + j + 16)); 5400 __m128i v_g0 = _mm_loadu_si128((__m128i const *)(src + j + 32)); 5401 __m128i v_g1 = _mm_loadu_si128((__m128i const *)(src + j + 48)); 5402 __m128i v_b0 = _mm_loadu_si128((__m128i const *)(src + j + 64)); 5403 __m128i v_b1 = _mm_loadu_si128((__m128i const *)(src + j + 80)); 5404 5405 _mm_deinterleave_epi8(v_r0, v_r1, v_g0, v_g1, v_b0, v_b1); 5406 5407 process(_mm_unpacklo_epi8(v_r0, v_zero), 5408 _mm_unpacklo_epi8(v_g0, v_zero), 5409 _mm_unpacklo_epi8(v_b0, v_zero), 5410 buf + j); 5411 5412 process(_mm_unpackhi_epi8(v_r0, v_zero), 5413 _mm_unpackhi_epi8(v_g0, v_zero), 5414 _mm_unpackhi_epi8(v_b0, v_zero), 5415 buf + j + 24); 5416 5417 process(_mm_unpacklo_epi8(v_r1, v_zero), 5418 _mm_unpacklo_epi8(v_g1, v_zero), 5419 _mm_unpacklo_epi8(v_b1, v_zero), 5420 buf + j + 48); 5421 5422 process(_mm_unpackhi_epi8(v_r1, v_zero), 5423 _mm_unpackhi_epi8(v_g1, v_zero), 5424 _mm_unpackhi_epi8(v_b1, v_zero), 5425 buf + j + 72); 5426 } 5427 } 5428 #endif 5429 5430 for( ; j < dn*3; j += 3 ) 5431 { 5432 buf[j] = src[j]*(100.f/255.f); 5433 buf[j+1] = (float)(src[j+1] - 128); 5434 buf[j+2] = (float)(src[j+2] - 128); 5435 } 5436 cvt(buf, buf, dn); 5437 j = 0; 5438 5439 #if CV_NEON 5440 for ( ; j <= (dn - 8) * 3; j += 24, dst += dcn * 8) 5441 { 5442 float32x4x3_t v_src0 = vld3q_f32(buf + j), v_src1 = vld3q_f32(buf + j + 12); 5443 uint8x8_t v_dst0 = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[0], v_scale))), 5444 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[0], v_scale))))); 5445 uint8x8_t v_dst1 = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[1], v_scale))), 5446 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[1], v_scale))))); 5447 uint8x8_t v_dst2 = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[2], v_scale))), 5448 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[2], v_scale))))); 5449 5450 if (dcn == 4) 5451 { 5452 uint8x8x4_t v_dst; 5453 v_dst.val[0] = v_dst0; 5454 v_dst.val[1] = v_dst1; 5455 v_dst.val[2] = v_dst2; 5456 v_dst.val[3] = v_alpha; 5457 vst4_u8(dst, v_dst); 5458 } 5459 else 5460 { 5461 uint8x8x3_t v_dst; 5462 v_dst.val[0] = v_dst0; 5463 v_dst.val[1] = v_dst1; 5464 v_dst.val[2] = v_dst2; 5465 vst3_u8(dst, v_dst); 5466 } 5467 } 5468 #elif CV_SSE2 5469 if (dcn == 3 && haveSIMD) 5470 { 5471 for ( ; j <= (dn * 3 - 16); j += 16, dst += 16) 5472 { 5473 __m128 v_src0 = _mm_mul_ps(_mm_load_ps(buf + j), v_scale); 5474 __m128 v_src1 = _mm_mul_ps(_mm_load_ps(buf + j + 4), v_scale); 5475 __m128 v_src2 = _mm_mul_ps(_mm_load_ps(buf + j + 8), v_scale); 5476 __m128 v_src3 = _mm_mul_ps(_mm_load_ps(buf + j + 12), v_scale); 5477 5478 __m128i v_dst0 = _mm_packs_epi32(_mm_cvtps_epi32(v_src0), 5479 _mm_cvtps_epi32(v_src1)); 5480 __m128i v_dst1 = _mm_packs_epi32(_mm_cvtps_epi32(v_src2), 5481 _mm_cvtps_epi32(v_src3)); 5482 5483 _mm_storeu_si128((__m128i *)dst, _mm_packus_epi16(v_dst0, v_dst1)); 5484 } 5485 5486 int jr = j % 3; 5487 if (jr) 5488 dst -= jr, j -= jr; 5489 } 5490 #endif 5491 5492 for( ; j < dn*3; j += 3, dst += dcn ) 5493 { 5494 dst[0] = saturate_cast<uchar>(buf[j]*255.f); 5495 dst[1] = saturate_cast<uchar>(buf[j+1]*255.f); 5496 dst[2] = saturate_cast<uchar>(buf[j+2]*255.f); 5497 if( dcn == 4 ) 5498 dst[3] = alpha; 5499 } 5500 } 5501 } 5502 5503 int dstcn; 5504 Lab2RGB_f cvt; 5505 5506 #if CV_NEON 5507 float32x4_t v_scale, v_scale_inv, v_128; 5508 uint8x8_t v_alpha; 5509 #elif CV_SSE2 5510 __m128 v_scale, v_scale_inv, v_128; 5511 __m128i v_zero; 5512 bool haveSIMD; 5513 #endif 5514 }; 5515 5516 5517 ///////////////////////////////////// RGB <-> L*u*v* ///////////////////////////////////// 5518 5519 struct RGB2Luv_f 5520 { 5521 typedef float channel_type; 5522 5523 RGB2Luv_f( int _srccn, int blueIdx, const float* _coeffs, 5524 const float* whitept, bool _srgb ) 5525 : srccn(_srccn), srgb(_srgb) 5526 { 5527 volatile int i; 5528 initLabTabs(); 5529 5530 if(!_coeffs) _coeffs = sRGB2XYZ_D65; 5531 if(!whitept) whitept = D65; 5532 5533 for( i = 0; i < 3; i++ ) 5534 { 5535 coeffs[i*3] = _coeffs[i*3]; 5536 coeffs[i*3+1] = _coeffs[i*3+1]; 5537 coeffs[i*3+2] = _coeffs[i*3+2]; 5538 if( blueIdx == 0 ) 5539 std::swap(coeffs[i*3], coeffs[i*3+2]); 5540 CV_Assert( coeffs[i*3] >= 0 && coeffs[i*3+1] >= 0 && coeffs[i*3+2] >= 0 && 5541 coeffs[i*3] + coeffs[i*3+1] + coeffs[i*3+2] < 1.5f ); 5542 } 5543 5544 float d = 1.f/(whitept[0] + whitept[1]*15 + whitept[2]*3); 5545 un = 4*whitept[0]*d; 5546 vn = 9*whitept[1]*d; 5547 5548 CV_Assert(whitept[1] == 1.f); 5549 } 5550 5551 void operator()(const float* src, float* dst, int n) const 5552 { 5553 int i, scn = srccn; 5554 float gscale = GammaTabScale; 5555 const float* gammaTab = srgb ? sRGBGammaTab : 0; 5556 float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], 5557 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5], 5558 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8]; 5559 float _un = 13*un, _vn = 13*vn; 5560 n *= 3; 5561 5562 for( i = 0; i < n; i += 3, src += scn ) 5563 { 5564 float R = src[0], G = src[1], B = src[2]; 5565 if( gammaTab ) 5566 { 5567 R = splineInterpolate(R*gscale, gammaTab, GAMMA_TAB_SIZE); 5568 G = splineInterpolate(G*gscale, gammaTab, GAMMA_TAB_SIZE); 5569 B = splineInterpolate(B*gscale, gammaTab, GAMMA_TAB_SIZE); 5570 } 5571 5572 float X = R*C0 + G*C1 + B*C2; 5573 float Y = R*C3 + G*C4 + B*C5; 5574 float Z = R*C6 + G*C7 + B*C8; 5575 5576 float L = splineInterpolate(Y*LabCbrtTabScale, LabCbrtTab, LAB_CBRT_TAB_SIZE); 5577 L = 116.f*L - 16.f; 5578 5579 float d = (4*13) / std::max(X + 15 * Y + 3 * Z, FLT_EPSILON); 5580 float u = L*(X*d - _un); 5581 float v = L*((9*0.25f)*Y*d - _vn); 5582 5583 dst[i] = L; dst[i+1] = u; dst[i+2] = v; 5584 } 5585 } 5586 5587 int srccn; 5588 float coeffs[9], un, vn; 5589 bool srgb; 5590 }; 5591 5592 5593 struct Luv2RGB_f 5594 { 5595 typedef float channel_type; 5596 5597 Luv2RGB_f( int _dstcn, int blueIdx, const float* _coeffs, 5598 const float* whitept, bool _srgb ) 5599 : dstcn(_dstcn), srgb(_srgb) 5600 { 5601 initLabTabs(); 5602 5603 if(!_coeffs) _coeffs = XYZ2sRGB_D65; 5604 if(!whitept) whitept = D65; 5605 5606 for( int i = 0; i < 3; i++ ) 5607 { 5608 coeffs[i+(blueIdx^2)*3] = _coeffs[i]; 5609 coeffs[i+3] = _coeffs[i+3]; 5610 coeffs[i+blueIdx*3] = _coeffs[i+6]; 5611 } 5612 5613 float d = 1.f/(whitept[0] + whitept[1]*15 + whitept[2]*3); 5614 un = 4*whitept[0]*d; 5615 vn = 9*whitept[1]*d; 5616 5617 CV_Assert(whitept[1] == 1.f); 5618 } 5619 5620 void operator()(const float* src, float* dst, int n) const 5621 { 5622 int i, dcn = dstcn; 5623 const float* gammaTab = srgb ? sRGBInvGammaTab : 0; 5624 float gscale = GammaTabScale; 5625 float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], 5626 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5], 5627 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8]; 5628 float alpha = ColorChannel<float>::max(); 5629 float _un = un, _vn = vn; 5630 n *= 3; 5631 5632 for( i = 0; i < n; i += 3, dst += dcn ) 5633 { 5634 float L = src[i], u = src[i+1], v = src[i+2], d, X, Y, Z; 5635 Y = (L + 16.f) * (1.f/116.f); 5636 Y = Y*Y*Y; 5637 d = (1.f/13.f)/L; 5638 u = u*d + _un; 5639 v = v*d + _vn; 5640 float iv = 1.f/v; 5641 X = 2.25f * u * Y * iv ; 5642 Z = (12 - 3 * u - 20 * v) * Y * 0.25f * iv; 5643 5644 float R = X*C0 + Y*C1 + Z*C2; 5645 float G = X*C3 + Y*C4 + Z*C5; 5646 float B = X*C6 + Y*C7 + Z*C8; 5647 5648 R = std::min(std::max(R, 0.f), 1.f); 5649 G = std::min(std::max(G, 0.f), 1.f); 5650 B = std::min(std::max(B, 0.f), 1.f); 5651 5652 if( gammaTab ) 5653 { 5654 R = splineInterpolate(R*gscale, gammaTab, GAMMA_TAB_SIZE); 5655 G = splineInterpolate(G*gscale, gammaTab, GAMMA_TAB_SIZE); 5656 B = splineInterpolate(B*gscale, gammaTab, GAMMA_TAB_SIZE); 5657 } 5658 5659 dst[0] = R; dst[1] = G; dst[2] = B; 5660 if( dcn == 4 ) 5661 dst[3] = alpha; 5662 } 5663 } 5664 5665 int dstcn; 5666 float coeffs[9], un, vn; 5667 bool srgb; 5668 }; 5669 5670 5671 struct RGB2Luv_b 5672 { 5673 typedef uchar channel_type; 5674 5675 RGB2Luv_b( int _srccn, int blueIdx, const float* _coeffs, 5676 const float* _whitept, bool _srgb ) 5677 : srccn(_srccn), cvt(3, blueIdx, _coeffs, _whitept, _srgb) 5678 { 5679 #if CV_NEON 5680 v_scale_inv = vdupq_n_f32(1.f/255.f); 5681 v_scale = vdupq_n_f32(2.55f); 5682 v_coeff1 = vdupq_n_f32(0.72033898305084743f); 5683 v_coeff2 = vdupq_n_f32(96.525423728813564f); 5684 v_coeff3 = vdupq_n_f32(0.9732824427480916f); 5685 v_coeff4 = vdupq_n_f32(136.259541984732824f); 5686 v_alpha = vdup_n_u8(ColorChannel<uchar>::max()); 5687 #elif CV_SSE2 5688 v_zero = _mm_setzero_si128(); 5689 v_scale_inv = _mm_set1_ps(1.f/255.f); 5690 v_scale = _mm_set1_ps(2.55f); 5691 v_coeff1 = _mm_set1_ps(0.72033898305084743f); 5692 v_coeff2 = _mm_set1_ps(96.525423728813564f); 5693 v_coeff3 = _mm_set1_ps(0.9732824427480916f); 5694 v_coeff4 = _mm_set1_ps(136.259541984732824f); 5695 haveSIMD = checkHardwareSupport(CV_CPU_SSE2); 5696 #endif 5697 } 5698 5699 #if CV_SSE2 5700 void process(const float * buf, 5701 __m128i & v_l, __m128i & v_u, __m128i & v_v) const 5702 { 5703 __m128 v_l0f = _mm_load_ps(buf); 5704 __m128 v_l1f = _mm_load_ps(buf + 4); 5705 __m128 v_u0f = _mm_load_ps(buf + 8); 5706 __m128 v_u1f = _mm_load_ps(buf + 12); 5707 __m128 v_v0f = _mm_load_ps(buf + 16); 5708 __m128 v_v1f = _mm_load_ps(buf + 20); 5709 5710 _mm_deinterleave_ps(v_l0f, v_l1f, v_u0f, v_u1f, v_v0f, v_v1f); 5711 5712 v_l0f = _mm_mul_ps(v_l0f, v_scale); 5713 v_l1f = _mm_mul_ps(v_l1f, v_scale); 5714 v_u0f = _mm_add_ps(_mm_mul_ps(v_u0f, v_coeff1), v_coeff2); 5715 v_u1f = _mm_add_ps(_mm_mul_ps(v_u1f, v_coeff1), v_coeff2); 5716 v_v0f = _mm_add_ps(_mm_mul_ps(v_v0f, v_coeff3), v_coeff4); 5717 v_v1f = _mm_add_ps(_mm_mul_ps(v_v1f, v_coeff3), v_coeff4); 5718 5719 v_l = _mm_packs_epi32(_mm_cvtps_epi32(v_l0f), _mm_cvtps_epi32(v_l1f)); 5720 v_u = _mm_packs_epi32(_mm_cvtps_epi32(v_u0f), _mm_cvtps_epi32(v_u1f)); 5721 v_v = _mm_packs_epi32(_mm_cvtps_epi32(v_v0f), _mm_cvtps_epi32(v_v1f)); 5722 } 5723 #endif 5724 5725 void operator()(const uchar* src, uchar* dst, int n) const 5726 { 5727 int i, j, scn = srccn; 5728 float CV_DECL_ALIGNED(16) buf[3*BLOCK_SIZE]; 5729 5730 for( i = 0; i < n; i += BLOCK_SIZE, dst += BLOCK_SIZE*3 ) 5731 { 5732 int dn = std::min(n - i, (int)BLOCK_SIZE); 5733 j = 0; 5734 5735 #if CV_NEON 5736 for ( ; j <= (dn - 8) * 3; j += 24, src += 8 * scn) 5737 { 5738 uint16x8_t v_t0, v_t1, v_t2; 5739 5740 if (scn == 3) 5741 { 5742 uint8x8x3_t v_src = vld3_u8(src); 5743 v_t0 = vmovl_u8(v_src.val[0]); 5744 v_t1 = vmovl_u8(v_src.val[1]); 5745 v_t2 = vmovl_u8(v_src.val[2]); 5746 } 5747 else 5748 { 5749 uint8x8x4_t v_src = vld4_u8(src); 5750 v_t0 = vmovl_u8(v_src.val[0]); 5751 v_t1 = vmovl_u8(v_src.val[1]); 5752 v_t2 = vmovl_u8(v_src.val[2]); 5753 } 5754 5755 float32x4x3_t v_dst; 5756 v_dst.val[0] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t0))), v_scale_inv); 5757 v_dst.val[1] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t1))), v_scale_inv); 5758 v_dst.val[2] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t2))), v_scale_inv); 5759 vst3q_f32(buf + j, v_dst); 5760 5761 v_dst.val[0] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t0))), v_scale_inv); 5762 v_dst.val[1] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t1))), v_scale_inv); 5763 v_dst.val[2] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t2))), v_scale_inv); 5764 vst3q_f32(buf + j + 12, v_dst); 5765 } 5766 #elif CV_SSE2 5767 if (scn == 3 && haveSIMD) 5768 { 5769 for ( ; j <= (dn * 3 - 16); j += 16, src += 16) 5770 { 5771 __m128i v_src = _mm_loadu_si128((__m128i const *)src); 5772 5773 __m128i v_src_p = _mm_unpacklo_epi8(v_src, v_zero); 5774 _mm_store_ps(buf + j, _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi16(v_src_p, v_zero)), v_scale_inv)); 5775 _mm_store_ps(buf + j + 4, _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi16(v_src_p, v_zero)), v_scale_inv)); 5776 5777 v_src_p = _mm_unpackhi_epi8(v_src, v_zero); 5778 _mm_store_ps(buf + j + 8, _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi16(v_src_p, v_zero)), v_scale_inv)); 5779 _mm_store_ps(buf + j + 12, _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi16(v_src_p, v_zero)), v_scale_inv)); 5780 } 5781 5782 int jr = j % 3; 5783 if (jr) 5784 src -= jr, j -= jr; 5785 } 5786 #endif 5787 for( ; j < dn*3; j += 3, src += scn ) 5788 { 5789 buf[j] = src[0]*(1.f/255.f); 5790 buf[j+1] = (float)(src[1]*(1.f/255.f)); 5791 buf[j+2] = (float)(src[2]*(1.f/255.f)); 5792 } 5793 cvt(buf, buf, dn); 5794 5795 j = 0; 5796 #if CV_NEON 5797 for ( ; j <= (dn - 8) * 3; j += 24) 5798 { 5799 float32x4x3_t v_src0 = vld3q_f32(buf + j), v_src1 = vld3q_f32(buf + j + 12); 5800 5801 uint8x8x3_t v_dst; 5802 v_dst.val[0] = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[0], v_scale))), 5803 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[0], v_scale))))); 5804 v_dst.val[1] = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vaddq_f32(vmulq_f32(v_src0.val[1], v_coeff1), v_coeff2))), 5805 vqmovn_u32(cv_vrndq_u32_f32(vaddq_f32(vmulq_f32(v_src1.val[1], v_coeff1), v_coeff2))))); 5806 v_dst.val[2] = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vaddq_f32(vmulq_f32(v_src0.val[2], v_coeff3), v_coeff4))), 5807 vqmovn_u32(cv_vrndq_u32_f32(vaddq_f32(vmulq_f32(v_src1.val[2], v_coeff3), v_coeff4))))); 5808 5809 vst3_u8(dst + j, v_dst); 5810 } 5811 #elif CV_SSE2 5812 if (haveSIMD) 5813 { 5814 for ( ; j <= (dn - 32) * 3; j += 96) 5815 { 5816 __m128i v_l_0, v_u_0, v_v_0; 5817 process(buf + j, 5818 v_l_0, v_u_0, v_v_0); 5819 5820 __m128i v_l_1, v_u_1, v_v_1; 5821 process(buf + j + 24, 5822 v_l_1, v_u_1, v_v_1); 5823 5824 __m128i v_l0 = _mm_packus_epi16(v_l_0, v_l_1); 5825 __m128i v_u0 = _mm_packus_epi16(v_u_0, v_u_1); 5826 __m128i v_v0 = _mm_packus_epi16(v_v_0, v_v_1); 5827 5828 process(buf + j + 48, 5829 v_l_0, v_u_0, v_v_0); 5830 5831 process(buf + j + 72, 5832 v_l_1, v_u_1, v_v_1); 5833 5834 __m128i v_l1 = _mm_packus_epi16(v_l_0, v_l_1); 5835 __m128i v_u1 = _mm_packus_epi16(v_u_0, v_u_1); 5836 __m128i v_v1 = _mm_packus_epi16(v_v_0, v_v_1); 5837 5838 _mm_interleave_epi8(v_l0, v_l1, v_u0, v_u1, v_v0, v_v1); 5839 5840 _mm_storeu_si128((__m128i *)(dst + j), v_l0); 5841 _mm_storeu_si128((__m128i *)(dst + j + 16), v_l1); 5842 _mm_storeu_si128((__m128i *)(dst + j + 32), v_u0); 5843 _mm_storeu_si128((__m128i *)(dst + j + 48), v_u1); 5844 _mm_storeu_si128((__m128i *)(dst + j + 64), v_v0); 5845 _mm_storeu_si128((__m128i *)(dst + j + 80), v_v1); 5846 } 5847 } 5848 #endif 5849 5850 for( ; j < dn*3; j += 3 ) 5851 { 5852 dst[j] = saturate_cast<uchar>(buf[j]*2.55f); 5853 dst[j+1] = saturate_cast<uchar>(buf[j+1]*0.72033898305084743f + 96.525423728813564f); 5854 dst[j+2] = saturate_cast<uchar>(buf[j+2]*0.9732824427480916f + 136.259541984732824f); 5855 } 5856 } 5857 } 5858 5859 int srccn; 5860 RGB2Luv_f cvt; 5861 5862 #if CV_NEON 5863 float32x4_t v_scale, v_scale_inv, v_coeff1, v_coeff2, v_coeff3, v_coeff4; 5864 uint8x8_t v_alpha; 5865 #elif CV_SSE2 5866 __m128 v_scale, v_scale_inv, v_coeff1, v_coeff2, v_coeff3, v_coeff4; 5867 __m128i v_zero; 5868 bool haveSIMD; 5869 #endif 5870 }; 5871 5872 5873 struct Luv2RGB_b 5874 { 5875 typedef uchar channel_type; 5876 5877 Luv2RGB_b( int _dstcn, int blueIdx, const float* _coeffs, 5878 const float* _whitept, bool _srgb ) 5879 : dstcn(_dstcn), cvt(3, blueIdx, _coeffs, _whitept, _srgb ) 5880 { 5881 #if CV_NEON 5882 v_scale_inv = vdupq_n_f32(100.f/255.f); 5883 v_coeff1 = vdupq_n_f32(1.388235294117647f); 5884 v_coeff2 = vdupq_n_f32(1.027450980392157f); 5885 v_134 = vdupq_n_f32(134.f); 5886 v_140 = vdupq_n_f32(140.f); 5887 v_scale = vdupq_n_f32(255.f); 5888 v_alpha = vdup_n_u8(ColorChannel<uchar>::max()); 5889 #elif CV_SSE2 5890 v_scale_inv = _mm_set1_ps(100.f/255.f); 5891 v_coeff1 = _mm_set1_ps(1.388235294117647f); 5892 v_coeff2 = _mm_set1_ps(1.027450980392157f); 5893 v_134 = _mm_set1_ps(134.f); 5894 v_140 = _mm_set1_ps(140.f); 5895 v_scale = _mm_set1_ps(255.f); 5896 v_zero = _mm_setzero_si128(); 5897 haveSIMD = checkHardwareSupport(CV_CPU_SSE2); 5898 #endif 5899 } 5900 5901 #if CV_SSE2 5902 // 16s x 8 5903 void process(__m128i v_l, __m128i v_u, __m128i v_v, 5904 float * buf) const 5905 { 5906 __m128 v_l0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(v_l, v_zero)); 5907 __m128 v_u0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(v_u, v_zero)); 5908 __m128 v_v0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(v_v, v_zero)); 5909 5910 __m128 v_l1 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(v_l, v_zero)); 5911 __m128 v_u1 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(v_u, v_zero)); 5912 __m128 v_v1 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(v_v, v_zero)); 5913 5914 v_l0 = _mm_mul_ps(v_l0, v_scale_inv); 5915 v_l1 = _mm_mul_ps(v_l1, v_scale_inv); 5916 5917 v_u0 = _mm_sub_ps(_mm_mul_ps(v_u0, v_coeff1), v_134); 5918 v_u1 = _mm_sub_ps(_mm_mul_ps(v_u1, v_coeff1), v_134); 5919 v_v0 = _mm_sub_ps(_mm_mul_ps(v_v0, v_coeff2), v_140); 5920 v_v1 = _mm_sub_ps(_mm_mul_ps(v_v1, v_coeff2), v_140); 5921 5922 _mm_interleave_ps(v_l0, v_l1, v_u0, v_u1, v_v0, v_v1); 5923 5924 _mm_store_ps(buf, v_l0); 5925 _mm_store_ps(buf + 4, v_l1); 5926 _mm_store_ps(buf + 8, v_u0); 5927 _mm_store_ps(buf + 12, v_u1); 5928 _mm_store_ps(buf + 16, v_v0); 5929 _mm_store_ps(buf + 20, v_v1); 5930 } 5931 #endif 5932 5933 void operator()(const uchar* src, uchar* dst, int n) const 5934 { 5935 int i, j, dcn = dstcn; 5936 uchar alpha = ColorChannel<uchar>::max(); 5937 float CV_DECL_ALIGNED(16) buf[3*BLOCK_SIZE]; 5938 5939 for( i = 0; i < n; i += BLOCK_SIZE, src += BLOCK_SIZE*3 ) 5940 { 5941 int dn = std::min(n - i, (int)BLOCK_SIZE); 5942 j = 0; 5943 5944 #if CV_NEON 5945 for ( ; j <= (dn - 8) * 3; j += 24) 5946 { 5947 uint8x8x3_t v_src = vld3_u8(src + j); 5948 uint16x8_t v_t0 = vmovl_u8(v_src.val[0]), 5949 v_t1 = vmovl_u8(v_src.val[1]), 5950 v_t2 = vmovl_u8(v_src.val[2]); 5951 5952 float32x4x3_t v_dst; 5953 v_dst.val[0] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t0))), v_scale_inv); 5954 v_dst.val[1] = vsubq_f32(vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t1))), v_coeff1), v_134); 5955 v_dst.val[2] = vsubq_f32(vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t2))), v_coeff2), v_140); 5956 vst3q_f32(buf + j, v_dst); 5957 5958 v_dst.val[0] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t0))), v_scale_inv); 5959 v_dst.val[1] = vsubq_f32(vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t1))), v_coeff1), v_134); 5960 v_dst.val[2] = vsubq_f32(vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t2))), v_coeff2), v_140); 5961 vst3q_f32(buf + j + 12, v_dst); 5962 } 5963 #elif CV_SSE2 5964 if (haveSIMD) 5965 { 5966 for ( ; j <= (dn - 32) * 3; j += 96) 5967 { 5968 __m128i v_r0 = _mm_loadu_si128((__m128i const *)(src + j)); 5969 __m128i v_r1 = _mm_loadu_si128((__m128i const *)(src + j + 16)); 5970 __m128i v_g0 = _mm_loadu_si128((__m128i const *)(src + j + 32)); 5971 __m128i v_g1 = _mm_loadu_si128((__m128i const *)(src + j + 48)); 5972 __m128i v_b0 = _mm_loadu_si128((__m128i const *)(src + j + 64)); 5973 __m128i v_b1 = _mm_loadu_si128((__m128i const *)(src + j + 80)); 5974 5975 _mm_deinterleave_epi8(v_r0, v_r1, v_g0, v_g1, v_b0, v_b1); 5976 5977 process(_mm_unpacklo_epi8(v_r0, v_zero), 5978 _mm_unpacklo_epi8(v_g0, v_zero), 5979 _mm_unpacklo_epi8(v_b0, v_zero), 5980 buf + j); 5981 5982 process(_mm_unpackhi_epi8(v_r0, v_zero), 5983 _mm_unpackhi_epi8(v_g0, v_zero), 5984 _mm_unpackhi_epi8(v_b0, v_zero), 5985 buf + j + 24); 5986 5987 process(_mm_unpacklo_epi8(v_r1, v_zero), 5988 _mm_unpacklo_epi8(v_g1, v_zero), 5989 _mm_unpacklo_epi8(v_b1, v_zero), 5990 buf + j + 48); 5991 5992 process(_mm_unpackhi_epi8(v_r1, v_zero), 5993 _mm_unpackhi_epi8(v_g1, v_zero), 5994 _mm_unpackhi_epi8(v_b1, v_zero), 5995 buf + j + 72); 5996 } 5997 } 5998 #endif 5999 for( ; j < dn*3; j += 3 ) 6000 { 6001 buf[j] = src[j]*(100.f/255.f); 6002 buf[j+1] = (float)(src[j+1]*1.388235294117647f - 134.f); 6003 buf[j+2] = (float)(src[j+2]*1.027450980392157f - 140.f); 6004 } 6005 cvt(buf, buf, dn); 6006 6007 j = 0; 6008 #if CV_NEON 6009 for ( ; j <= (dn - 8) * 3; j += 24, dst += dcn * 8) 6010 { 6011 float32x4x3_t v_src0 = vld3q_f32(buf + j), v_src1 = vld3q_f32(buf + j + 12); 6012 uint8x8_t v_dst0 = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[0], v_scale))), 6013 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[0], v_scale))))); 6014 uint8x8_t v_dst1 = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[1], v_scale))), 6015 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[1], v_scale))))); 6016 uint8x8_t v_dst2 = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[2], v_scale))), 6017 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[2], v_scale))))); 6018 6019 if (dcn == 4) 6020 { 6021 uint8x8x4_t v_dst; 6022 v_dst.val[0] = v_dst0; 6023 v_dst.val[1] = v_dst1; 6024 v_dst.val[2] = v_dst2; 6025 v_dst.val[3] = v_alpha; 6026 vst4_u8(dst, v_dst); 6027 } 6028 else 6029 { 6030 uint8x8x3_t v_dst; 6031 v_dst.val[0] = v_dst0; 6032 v_dst.val[1] = v_dst1; 6033 v_dst.val[2] = v_dst2; 6034 vst3_u8(dst, v_dst); 6035 } 6036 } 6037 #elif CV_SSE2 6038 if (dcn == 3 && haveSIMD) 6039 { 6040 for ( ; j <= (dn * 3 - 16); j += 16, dst += 16) 6041 { 6042 __m128 v_src0 = _mm_mul_ps(_mm_load_ps(buf + j), v_scale); 6043 __m128 v_src1 = _mm_mul_ps(_mm_load_ps(buf + j + 4), v_scale); 6044 __m128 v_src2 = _mm_mul_ps(_mm_load_ps(buf + j + 8), v_scale); 6045 __m128 v_src3 = _mm_mul_ps(_mm_load_ps(buf + j + 12), v_scale); 6046 6047 __m128i v_dst0 = _mm_packs_epi32(_mm_cvtps_epi32(v_src0), 6048 _mm_cvtps_epi32(v_src1)); 6049 __m128i v_dst1 = _mm_packs_epi32(_mm_cvtps_epi32(v_src2), 6050 _mm_cvtps_epi32(v_src3)); 6051 6052 _mm_storeu_si128((__m128i *)dst, _mm_packus_epi16(v_dst0, v_dst1)); 6053 } 6054 6055 int jr = j % 3; 6056 if (jr) 6057 dst -= jr, j -= jr; 6058 } 6059 #endif 6060 6061 for( ; j < dn*3; j += 3, dst += dcn ) 6062 { 6063 dst[0] = saturate_cast<uchar>(buf[j]*255.f); 6064 dst[1] = saturate_cast<uchar>(buf[j+1]*255.f); 6065 dst[2] = saturate_cast<uchar>(buf[j+2]*255.f); 6066 if( dcn == 4 ) 6067 dst[3] = alpha; 6068 } 6069 } 6070 } 6071 6072 int dstcn; 6073 Luv2RGB_f cvt; 6074 6075 #if CV_NEON 6076 float32x4_t v_scale, v_scale_inv, v_coeff1, v_coeff2, v_134, v_140; 6077 uint8x8_t v_alpha; 6078 #elif CV_SSE2 6079 __m128 v_scale, v_scale_inv, v_coeff1, v_coeff2, v_134, v_140; 6080 __m128i v_zero; 6081 bool haveSIMD; 6082 #endif 6083 }; 6084 6085 6086 ///////////////////////////////////// YUV420 -> RGB ///////////////////////////////////// 6087 6088 const int ITUR_BT_601_CY = 1220542; 6089 const int ITUR_BT_601_CUB = 2116026; 6090 const int ITUR_BT_601_CUG = -409993; 6091 const int ITUR_BT_601_CVG = -852492; 6092 const int ITUR_BT_601_CVR = 1673527; 6093 const int ITUR_BT_601_SHIFT = 20; 6094 6095 // Coefficients for RGB to YUV420p conversion 6096 const int ITUR_BT_601_CRY = 269484; 6097 const int ITUR_BT_601_CGY = 528482; 6098 const int ITUR_BT_601_CBY = 102760; 6099 const int ITUR_BT_601_CRU = -155188; 6100 const int ITUR_BT_601_CGU = -305135; 6101 const int ITUR_BT_601_CBU = 460324; 6102 const int ITUR_BT_601_CGV = -385875; 6103 const int ITUR_BT_601_CBV = -74448; 6104 6105 template<int bIdx, int uIdx> 6106 struct YUV420sp2RGB888Invoker : ParallelLoopBody 6107 { 6108 Mat* dst; 6109 const uchar* my1, *muv; 6110 int width, stride; 6111 6112 YUV420sp2RGB888Invoker(Mat* _dst, int _stride, const uchar* _y1, const uchar* _uv) 6113 : dst(_dst), my1(_y1), muv(_uv), width(_dst->cols), stride(_stride) {} 6114 6115 void operator()(const Range& range) const 6116 { 6117 int rangeBegin = range.start * 2; 6118 int rangeEnd = range.end * 2; 6119 6120 //R = 1.164(Y - 16) + 1.596(V - 128) 6121 //G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) 6122 //B = 1.164(Y - 16) + 2.018(U - 128) 6123 6124 //R = (1220542(Y - 16) + 1673527(V - 128) + (1 << 19)) >> 20 6125 //G = (1220542(Y - 16) - 852492(V - 128) - 409993(U - 128) + (1 << 19)) >> 20 6126 //B = (1220542(Y - 16) + 2116026(U - 128) + (1 << 19)) >> 20 6127 6128 const uchar* y1 = my1 + rangeBegin * stride, *uv = muv + rangeBegin * stride / 2; 6129 6130 #ifdef HAVE_TEGRA_OPTIMIZATION 6131 if(tegra::useTegra() && tegra::cvtYUV4202RGB(bIdx, uIdx, 3, y1, uv, stride, dst->ptr<uchar>(rangeBegin), dst->step, rangeEnd - rangeBegin, dst->cols)) 6132 return; 6133 #endif 6134 6135 for (int j = rangeBegin; j < rangeEnd; j += 2, y1 += stride * 2, uv += stride) 6136 { 6137 uchar* row1 = dst->ptr<uchar>(j); 6138 uchar* row2 = dst->ptr<uchar>(j + 1); 6139 const uchar* y2 = y1 + stride; 6140 6141 for (int i = 0; i < width; i += 2, row1 += 6, row2 += 6) 6142 { 6143 int u = int(uv[i + 0 + uIdx]) - 128; 6144 int v = int(uv[i + 1 - uIdx]) - 128; 6145 6146 int ruv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVR * v; 6147 int guv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVG * v + ITUR_BT_601_CUG * u; 6148 int buv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CUB * u; 6149 6150 int y00 = std::max(0, int(y1[i]) - 16) * ITUR_BT_601_CY; 6151 row1[2-bIdx] = saturate_cast<uchar>((y00 + ruv) >> ITUR_BT_601_SHIFT); 6152 row1[1] = saturate_cast<uchar>((y00 + guv) >> ITUR_BT_601_SHIFT); 6153 row1[bIdx] = saturate_cast<uchar>((y00 + buv) >> ITUR_BT_601_SHIFT); 6154 6155 int y01 = std::max(0, int(y1[i + 1]) - 16) * ITUR_BT_601_CY; 6156 row1[5-bIdx] = saturate_cast<uchar>((y01 + ruv) >> ITUR_BT_601_SHIFT); 6157 row1[4] = saturate_cast<uchar>((y01 + guv) >> ITUR_BT_601_SHIFT); 6158 row1[3+bIdx] = saturate_cast<uchar>((y01 + buv) >> ITUR_BT_601_SHIFT); 6159 6160 int y10 = std::max(0, int(y2[i]) - 16) * ITUR_BT_601_CY; 6161 row2[2-bIdx] = saturate_cast<uchar>((y10 + ruv) >> ITUR_BT_601_SHIFT); 6162 row2[1] = saturate_cast<uchar>((y10 + guv) >> ITUR_BT_601_SHIFT); 6163 row2[bIdx] = saturate_cast<uchar>((y10 + buv) >> ITUR_BT_601_SHIFT); 6164 6165 int y11 = std::max(0, int(y2[i + 1]) - 16) * ITUR_BT_601_CY; 6166 row2[5-bIdx] = saturate_cast<uchar>((y11 + ruv) >> ITUR_BT_601_SHIFT); 6167 row2[4] = saturate_cast<uchar>((y11 + guv) >> ITUR_BT_601_SHIFT); 6168 row2[3+bIdx] = saturate_cast<uchar>((y11 + buv) >> ITUR_BT_601_SHIFT); 6169 } 6170 } 6171 } 6172 }; 6173 6174 template<int bIdx, int uIdx> 6175 struct YUV420sp2RGBA8888Invoker : ParallelLoopBody 6176 { 6177 Mat* dst; 6178 const uchar* my1, *muv; 6179 int width, stride; 6180 6181 YUV420sp2RGBA8888Invoker(Mat* _dst, int _stride, const uchar* _y1, const uchar* _uv) 6182 : dst(_dst), my1(_y1), muv(_uv), width(_dst->cols), stride(_stride) {} 6183 6184 void operator()(const Range& range) const 6185 { 6186 int rangeBegin = range.start * 2; 6187 int rangeEnd = range.end * 2; 6188 6189 //R = 1.164(Y - 16) + 1.596(V - 128) 6190 //G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) 6191 //B = 1.164(Y - 16) + 2.018(U - 128) 6192 6193 //R = (1220542(Y - 16) + 1673527(V - 128) + (1 << 19)) >> 20 6194 //G = (1220542(Y - 16) - 852492(V - 128) - 409993(U - 128) + (1 << 19)) >> 20 6195 //B = (1220542(Y - 16) + 2116026(U - 128) + (1 << 19)) >> 20 6196 6197 const uchar* y1 = my1 + rangeBegin * stride, *uv = muv + rangeBegin * stride / 2; 6198 6199 #ifdef HAVE_TEGRA_OPTIMIZATION 6200 if(tegra::useTegra() && tegra::cvtYUV4202RGB(bIdx, uIdx, 4, y1, uv, stride, dst->ptr<uchar>(rangeBegin), dst->step, rangeEnd - rangeBegin, dst->cols)) 6201 return; 6202 #endif 6203 6204 for (int j = rangeBegin; j < rangeEnd; j += 2, y1 += stride * 2, uv += stride) 6205 { 6206 uchar* row1 = dst->ptr<uchar>(j); 6207 uchar* row2 = dst->ptr<uchar>(j + 1); 6208 const uchar* y2 = y1 + stride; 6209 6210 for (int i = 0; i < width; i += 2, row1 += 8, row2 += 8) 6211 { 6212 int u = int(uv[i + 0 + uIdx]) - 128; 6213 int v = int(uv[i + 1 - uIdx]) - 128; 6214 6215 int ruv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVR * v; 6216 int guv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVG * v + ITUR_BT_601_CUG * u; 6217 int buv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CUB * u; 6218 6219 int y00 = std::max(0, int(y1[i]) - 16) * ITUR_BT_601_CY; 6220 row1[2-bIdx] = saturate_cast<uchar>((y00 + ruv) >> ITUR_BT_601_SHIFT); 6221 row1[1] = saturate_cast<uchar>((y00 + guv) >> ITUR_BT_601_SHIFT); 6222 row1[bIdx] = saturate_cast<uchar>((y00 + buv) >> ITUR_BT_601_SHIFT); 6223 row1[3] = uchar(0xff); 6224 6225 int y01 = std::max(0, int(y1[i + 1]) - 16) * ITUR_BT_601_CY; 6226 row1[6-bIdx] = saturate_cast<uchar>((y01 + ruv) >> ITUR_BT_601_SHIFT); 6227 row1[5] = saturate_cast<uchar>((y01 + guv) >> ITUR_BT_601_SHIFT); 6228 row1[4+bIdx] = saturate_cast<uchar>((y01 + buv) >> ITUR_BT_601_SHIFT); 6229 row1[7] = uchar(0xff); 6230 6231 int y10 = std::max(0, int(y2[i]) - 16) * ITUR_BT_601_CY; 6232 row2[2-bIdx] = saturate_cast<uchar>((y10 + ruv) >> ITUR_BT_601_SHIFT); 6233 row2[1] = saturate_cast<uchar>((y10 + guv) >> ITUR_BT_601_SHIFT); 6234 row2[bIdx] = saturate_cast<uchar>((y10 + buv) >> ITUR_BT_601_SHIFT); 6235 row2[3] = uchar(0xff); 6236 6237 int y11 = std::max(0, int(y2[i + 1]) - 16) * ITUR_BT_601_CY; 6238 row2[6-bIdx] = saturate_cast<uchar>((y11 + ruv) >> ITUR_BT_601_SHIFT); 6239 row2[5] = saturate_cast<uchar>((y11 + guv) >> ITUR_BT_601_SHIFT); 6240 row2[4+bIdx] = saturate_cast<uchar>((y11 + buv) >> ITUR_BT_601_SHIFT); 6241 row2[7] = uchar(0xff); 6242 } 6243 } 6244 } 6245 }; 6246 6247 template<int bIdx> 6248 struct YUV420p2RGB888Invoker : ParallelLoopBody 6249 { 6250 Mat* dst; 6251 const uchar* my1, *mu, *mv; 6252 int width, stride; 6253 int ustepIdx, vstepIdx; 6254 6255 YUV420p2RGB888Invoker(Mat* _dst, int _stride, const uchar* _y1, const uchar* _u, const uchar* _v, int _ustepIdx, int _vstepIdx) 6256 : dst(_dst), my1(_y1), mu(_u), mv(_v), width(_dst->cols), stride(_stride), ustepIdx(_ustepIdx), vstepIdx(_vstepIdx) {} 6257 6258 void operator()(const Range& range) const 6259 { 6260 const int rangeBegin = range.start * 2; 6261 const int rangeEnd = range.end * 2; 6262 6263 int uvsteps[2] = {width/2, stride - width/2}; 6264 int usIdx = ustepIdx, vsIdx = vstepIdx; 6265 6266 const uchar* y1 = my1 + rangeBegin * stride; 6267 const uchar* u1 = mu + (range.start / 2) * stride; 6268 const uchar* v1 = mv + (range.start / 2) * stride; 6269 6270 if(range.start % 2 == 1) 6271 { 6272 u1 += uvsteps[(usIdx++) & 1]; 6273 v1 += uvsteps[(vsIdx++) & 1]; 6274 } 6275 6276 for (int j = rangeBegin; j < rangeEnd; j += 2, y1 += stride * 2, u1 += uvsteps[(usIdx++) & 1], v1 += uvsteps[(vsIdx++) & 1]) 6277 { 6278 uchar* row1 = dst->ptr<uchar>(j); 6279 uchar* row2 = dst->ptr<uchar>(j + 1); 6280 const uchar* y2 = y1 + stride; 6281 6282 for (int i = 0; i < width / 2; i += 1, row1 += 6, row2 += 6) 6283 { 6284 int u = int(u1[i]) - 128; 6285 int v = int(v1[i]) - 128; 6286 6287 int ruv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVR * v; 6288 int guv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVG * v + ITUR_BT_601_CUG * u; 6289 int buv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CUB * u; 6290 6291 int y00 = std::max(0, int(y1[2 * i]) - 16) * ITUR_BT_601_CY; 6292 row1[2-bIdx] = saturate_cast<uchar>((y00 + ruv) >> ITUR_BT_601_SHIFT); 6293 row1[1] = saturate_cast<uchar>((y00 + guv) >> ITUR_BT_601_SHIFT); 6294 row1[bIdx] = saturate_cast<uchar>((y00 + buv) >> ITUR_BT_601_SHIFT); 6295 6296 int y01 = std::max(0, int(y1[2 * i + 1]) - 16) * ITUR_BT_601_CY; 6297 row1[5-bIdx] = saturate_cast<uchar>((y01 + ruv) >> ITUR_BT_601_SHIFT); 6298 row1[4] = saturate_cast<uchar>((y01 + guv) >> ITUR_BT_601_SHIFT); 6299 row1[3+bIdx] = saturate_cast<uchar>((y01 + buv) >> ITUR_BT_601_SHIFT); 6300 6301 int y10 = std::max(0, int(y2[2 * i]) - 16) * ITUR_BT_601_CY; 6302 row2[2-bIdx] = saturate_cast<uchar>((y10 + ruv) >> ITUR_BT_601_SHIFT); 6303 row2[1] = saturate_cast<uchar>((y10 + guv) >> ITUR_BT_601_SHIFT); 6304 row2[bIdx] = saturate_cast<uchar>((y10 + buv) >> ITUR_BT_601_SHIFT); 6305 6306 int y11 = std::max(0, int(y2[2 * i + 1]) - 16) * ITUR_BT_601_CY; 6307 row2[5-bIdx] = saturate_cast<uchar>((y11 + ruv) >> ITUR_BT_601_SHIFT); 6308 row2[4] = saturate_cast<uchar>((y11 + guv) >> ITUR_BT_601_SHIFT); 6309 row2[3+bIdx] = saturate_cast<uchar>((y11 + buv) >> ITUR_BT_601_SHIFT); 6310 } 6311 } 6312 } 6313 }; 6314 6315 template<int bIdx> 6316 struct YUV420p2RGBA8888Invoker : ParallelLoopBody 6317 { 6318 Mat* dst; 6319 const uchar* my1, *mu, *mv; 6320 int width, stride; 6321 int ustepIdx, vstepIdx; 6322 6323 YUV420p2RGBA8888Invoker(Mat* _dst, int _stride, const uchar* _y1, const uchar* _u, const uchar* _v, int _ustepIdx, int _vstepIdx) 6324 : dst(_dst), my1(_y1), mu(_u), mv(_v), width(_dst->cols), stride(_stride), ustepIdx(_ustepIdx), vstepIdx(_vstepIdx) {} 6325 6326 void operator()(const Range& range) const 6327 { 6328 int rangeBegin = range.start * 2; 6329 int rangeEnd = range.end * 2; 6330 6331 int uvsteps[2] = {width/2, stride - width/2}; 6332 int usIdx = ustepIdx, vsIdx = vstepIdx; 6333 6334 const uchar* y1 = my1 + rangeBegin * stride; 6335 const uchar* u1 = mu + (range.start / 2) * stride; 6336 const uchar* v1 = mv + (range.start / 2) * stride; 6337 6338 if(range.start % 2 == 1) 6339 { 6340 u1 += uvsteps[(usIdx++) & 1]; 6341 v1 += uvsteps[(vsIdx++) & 1]; 6342 } 6343 6344 for (int j = rangeBegin; j < rangeEnd; j += 2, y1 += stride * 2, u1 += uvsteps[(usIdx++) & 1], v1 += uvsteps[(vsIdx++) & 1]) 6345 { 6346 uchar* row1 = dst->ptr<uchar>(j); 6347 uchar* row2 = dst->ptr<uchar>(j + 1); 6348 const uchar* y2 = y1 + stride; 6349 6350 for (int i = 0; i < width / 2; i += 1, row1 += 8, row2 += 8) 6351 { 6352 int u = int(u1[i]) - 128; 6353 int v = int(v1[i]) - 128; 6354 6355 int ruv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVR * v; 6356 int guv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVG * v + ITUR_BT_601_CUG * u; 6357 int buv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CUB * u; 6358 6359 int y00 = std::max(0, int(y1[2 * i]) - 16) * ITUR_BT_601_CY; 6360 row1[2-bIdx] = saturate_cast<uchar>((y00 + ruv) >> ITUR_BT_601_SHIFT); 6361 row1[1] = saturate_cast<uchar>((y00 + guv) >> ITUR_BT_601_SHIFT); 6362 row1[bIdx] = saturate_cast<uchar>((y00 + buv) >> ITUR_BT_601_SHIFT); 6363 row1[3] = uchar(0xff); 6364 6365 int y01 = std::max(0, int(y1[2 * i + 1]) - 16) * ITUR_BT_601_CY; 6366 row1[6-bIdx] = saturate_cast<uchar>((y01 + ruv) >> ITUR_BT_601_SHIFT); 6367 row1[5] = saturate_cast<uchar>((y01 + guv) >> ITUR_BT_601_SHIFT); 6368 row1[4+bIdx] = saturate_cast<uchar>((y01 + buv) >> ITUR_BT_601_SHIFT); 6369 row1[7] = uchar(0xff); 6370 6371 int y10 = std::max(0, int(y2[2 * i]) - 16) * ITUR_BT_601_CY; 6372 row2[2-bIdx] = saturate_cast<uchar>((y10 + ruv) >> ITUR_BT_601_SHIFT); 6373 row2[1] = saturate_cast<uchar>((y10 + guv) >> ITUR_BT_601_SHIFT); 6374 row2[bIdx] = saturate_cast<uchar>((y10 + buv) >> ITUR_BT_601_SHIFT); 6375 row2[3] = uchar(0xff); 6376 6377 int y11 = std::max(0, int(y2[2 * i + 1]) - 16) * ITUR_BT_601_CY; 6378 row2[6-bIdx] = saturate_cast<uchar>((y11 + ruv) >> ITUR_BT_601_SHIFT); 6379 row2[5] = saturate_cast<uchar>((y11 + guv) >> ITUR_BT_601_SHIFT); 6380 row2[4+bIdx] = saturate_cast<uchar>((y11 + buv) >> ITUR_BT_601_SHIFT); 6381 row2[7] = uchar(0xff); 6382 } 6383 } 6384 } 6385 }; 6386 6387 #define MIN_SIZE_FOR_PARALLEL_YUV420_CONVERSION (320*240) 6388 6389 template<int bIdx, int uIdx> 6390 inline void cvtYUV420sp2RGB(Mat& _dst, int _stride, const uchar* _y1, const uchar* _uv) 6391 { 6392 YUV420sp2RGB888Invoker<bIdx, uIdx> converter(&_dst, _stride, _y1, _uv); 6393 if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV420_CONVERSION) 6394 parallel_for_(Range(0, _dst.rows/2), converter); 6395 else 6396 converter(Range(0, _dst.rows/2)); 6397 } 6398 6399 template<int bIdx, int uIdx> 6400 inline void cvtYUV420sp2RGBA(Mat& _dst, int _stride, const uchar* _y1, const uchar* _uv) 6401 { 6402 YUV420sp2RGBA8888Invoker<bIdx, uIdx> converter(&_dst, _stride, _y1, _uv); 6403 if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV420_CONVERSION) 6404 parallel_for_(Range(0, _dst.rows/2), converter); 6405 else 6406 converter(Range(0, _dst.rows/2)); 6407 } 6408 6409 template<int bIdx> 6410 inline void cvtYUV420p2RGB(Mat& _dst, int _stride, const uchar* _y1, const uchar* _u, const uchar* _v, int ustepIdx, int vstepIdx) 6411 { 6412 YUV420p2RGB888Invoker<bIdx> converter(&_dst, _stride, _y1, _u, _v, ustepIdx, vstepIdx); 6413 if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV420_CONVERSION) 6414 parallel_for_(Range(0, _dst.rows/2), converter); 6415 else 6416 converter(Range(0, _dst.rows/2)); 6417 } 6418 6419 template<int bIdx> 6420 inline void cvtYUV420p2RGBA(Mat& _dst, int _stride, const uchar* _y1, const uchar* _u, const uchar* _v, int ustepIdx, int vstepIdx) 6421 { 6422 YUV420p2RGBA8888Invoker<bIdx> converter(&_dst, _stride, _y1, _u, _v, ustepIdx, vstepIdx); 6423 if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV420_CONVERSION) 6424 parallel_for_(Range(0, _dst.rows/2), converter); 6425 else 6426 converter(Range(0, _dst.rows/2)); 6427 } 6428 6429 ///////////////////////////////////// RGB -> YUV420p ///////////////////////////////////// 6430 6431 template<int bIdx> 6432 struct RGB888toYUV420pInvoker: public ParallelLoopBody 6433 { 6434 RGB888toYUV420pInvoker( const Mat& src, Mat* dst, const int uIdx ) 6435 : src_(src), 6436 dst_(dst), 6437 uIdx_(uIdx) { } 6438 6439 void operator()(const Range& rowRange) const 6440 { 6441 const int w = src_.cols; 6442 const int h = src_.rows; 6443 6444 const int cn = src_.channels(); 6445 for( int i = rowRange.start; i < rowRange.end; i++ ) 6446 { 6447 const uchar* row0 = src_.ptr<uchar>(2 * i); 6448 const uchar* row1 = src_.ptr<uchar>(2 * i + 1); 6449 6450 uchar* y = dst_->ptr<uchar>(2*i); 6451 uchar* u = dst_->ptr<uchar>(h + i/2) + (i % 2) * (w/2); 6452 uchar* v = dst_->ptr<uchar>(h + (i + h/2)/2) + ((i + h/2) % 2) * (w/2); 6453 if( uIdx_ == 2 ) std::swap(u, v); 6454 6455 for( int j = 0, k = 0; j < w * cn; j += 2 * cn, k++ ) 6456 { 6457 int r00 = row0[2-bIdx + j]; int g00 = row0[1 + j]; int b00 = row0[bIdx + j]; 6458 int r01 = row0[2-bIdx + cn + j]; int g01 = row0[1 + cn + j]; int b01 = row0[bIdx + cn + j]; 6459 int r10 = row1[2-bIdx + j]; int g10 = row1[1 + j]; int b10 = row1[bIdx + j]; 6460 int r11 = row1[2-bIdx + cn + j]; int g11 = row1[1 + cn + j]; int b11 = row1[bIdx + cn + j]; 6461 6462 const int shifted16 = (16 << ITUR_BT_601_SHIFT); 6463 const int halfShift = (1 << (ITUR_BT_601_SHIFT - 1)); 6464 int y00 = ITUR_BT_601_CRY * r00 + ITUR_BT_601_CGY * g00 + ITUR_BT_601_CBY * b00 + halfShift + shifted16; 6465 int y01 = ITUR_BT_601_CRY * r01 + ITUR_BT_601_CGY * g01 + ITUR_BT_601_CBY * b01 + halfShift + shifted16; 6466 int y10 = ITUR_BT_601_CRY * r10 + ITUR_BT_601_CGY * g10 + ITUR_BT_601_CBY * b10 + halfShift + shifted16; 6467 int y11 = ITUR_BT_601_CRY * r11 + ITUR_BT_601_CGY * g11 + ITUR_BT_601_CBY * b11 + halfShift + shifted16; 6468 6469 y[2*k + 0] = saturate_cast<uchar>(y00 >> ITUR_BT_601_SHIFT); 6470 y[2*k + 1] = saturate_cast<uchar>(y01 >> ITUR_BT_601_SHIFT); 6471 y[2*k + dst_->step + 0] = saturate_cast<uchar>(y10 >> ITUR_BT_601_SHIFT); 6472 y[2*k + dst_->step + 1] = saturate_cast<uchar>(y11 >> ITUR_BT_601_SHIFT); 6473 6474 const int shifted128 = (128 << ITUR_BT_601_SHIFT); 6475 int u00 = ITUR_BT_601_CRU * r00 + ITUR_BT_601_CGU * g00 + ITUR_BT_601_CBU * b00 + halfShift + shifted128; 6476 int v00 = ITUR_BT_601_CBU * r00 + ITUR_BT_601_CGV * g00 + ITUR_BT_601_CBV * b00 + halfShift + shifted128; 6477 6478 u[k] = saturate_cast<uchar>(u00 >> ITUR_BT_601_SHIFT); 6479 v[k] = saturate_cast<uchar>(v00 >> ITUR_BT_601_SHIFT); 6480 } 6481 } 6482 } 6483 6484 static bool isFit( const Mat& src ) 6485 { 6486 return (src.total() >= 320*240); 6487 } 6488 6489 private: 6490 RGB888toYUV420pInvoker& operator=(const RGB888toYUV420pInvoker&); 6491 6492 const Mat& src_; 6493 Mat* const dst_; 6494 const int uIdx_; 6495 }; 6496 6497 template<int bIdx, int uIdx> 6498 static void cvtRGBtoYUV420p(const Mat& src, Mat& dst) 6499 { 6500 RGB888toYUV420pInvoker<bIdx> colorConverter(src, &dst, uIdx); 6501 if( RGB888toYUV420pInvoker<bIdx>::isFit(src) ) 6502 parallel_for_(Range(0, src.rows/2), colorConverter); 6503 else 6504 colorConverter(Range(0, src.rows/2)); 6505 } 6506 6507 ///////////////////////////////////// YUV422 -> RGB ///////////////////////////////////// 6508 6509 template<int bIdx, int uIdx, int yIdx> 6510 struct YUV422toRGB888Invoker : ParallelLoopBody 6511 { 6512 Mat* dst; 6513 const uchar* src; 6514 int width, stride; 6515 6516 YUV422toRGB888Invoker(Mat* _dst, int _stride, const uchar* _yuv) 6517 : dst(_dst), src(_yuv), width(_dst->cols), stride(_stride) {} 6518 6519 void operator()(const Range& range) const 6520 { 6521 int rangeBegin = range.start; 6522 int rangeEnd = range.end; 6523 6524 const int uidx = 1 - yIdx + uIdx * 2; 6525 const int vidx = (2 + uidx) % 4; 6526 const uchar* yuv_src = src + rangeBegin * stride; 6527 6528 for (int j = rangeBegin; j < rangeEnd; j++, yuv_src += stride) 6529 { 6530 uchar* row = dst->ptr<uchar>(j); 6531 6532 for (int i = 0; i < 2 * width; i += 4, row += 6) 6533 { 6534 int u = int(yuv_src[i + uidx]) - 128; 6535 int v = int(yuv_src[i + vidx]) - 128; 6536 6537 int ruv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVR * v; 6538 int guv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVG * v + ITUR_BT_601_CUG * u; 6539 int buv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CUB * u; 6540 6541 int y00 = std::max(0, int(yuv_src[i + yIdx]) - 16) * ITUR_BT_601_CY; 6542 row[2-bIdx] = saturate_cast<uchar>((y00 + ruv) >> ITUR_BT_601_SHIFT); 6543 row[1] = saturate_cast<uchar>((y00 + guv) >> ITUR_BT_601_SHIFT); 6544 row[bIdx] = saturate_cast<uchar>((y00 + buv) >> ITUR_BT_601_SHIFT); 6545 6546 int y01 = std::max(0, int(yuv_src[i + yIdx + 2]) - 16) * ITUR_BT_601_CY; 6547 row[5-bIdx] = saturate_cast<uchar>((y01 + ruv) >> ITUR_BT_601_SHIFT); 6548 row[4] = saturate_cast<uchar>((y01 + guv) >> ITUR_BT_601_SHIFT); 6549 row[3+bIdx] = saturate_cast<uchar>((y01 + buv) >> ITUR_BT_601_SHIFT); 6550 } 6551 } 6552 } 6553 }; 6554 6555 template<int bIdx, int uIdx, int yIdx> 6556 struct YUV422toRGBA8888Invoker : ParallelLoopBody 6557 { 6558 Mat* dst; 6559 const uchar* src; 6560 int width, stride; 6561 6562 YUV422toRGBA8888Invoker(Mat* _dst, int _stride, const uchar* _yuv) 6563 : dst(_dst), src(_yuv), width(_dst->cols), stride(_stride) {} 6564 6565 void operator()(const Range& range) const 6566 { 6567 int rangeBegin = range.start; 6568 int rangeEnd = range.end; 6569 6570 const int uidx = 1 - yIdx + uIdx * 2; 6571 const int vidx = (2 + uidx) % 4; 6572 const uchar* yuv_src = src + rangeBegin * stride; 6573 6574 for (int j = rangeBegin; j < rangeEnd; j++, yuv_src += stride) 6575 { 6576 uchar* row = dst->ptr<uchar>(j); 6577 6578 for (int i = 0; i < 2 * width; i += 4, row += 8) 6579 { 6580 int u = int(yuv_src[i + uidx]) - 128; 6581 int v = int(yuv_src[i + vidx]) - 128; 6582 6583 int ruv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVR * v; 6584 int guv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVG * v + ITUR_BT_601_CUG * u; 6585 int buv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CUB * u; 6586 6587 int y00 = std::max(0, int(yuv_src[i + yIdx]) - 16) * ITUR_BT_601_CY; 6588 row[2-bIdx] = saturate_cast<uchar>((y00 + ruv) >> ITUR_BT_601_SHIFT); 6589 row[1] = saturate_cast<uchar>((y00 + guv) >> ITUR_BT_601_SHIFT); 6590 row[bIdx] = saturate_cast<uchar>((y00 + buv) >> ITUR_BT_601_SHIFT); 6591 row[3] = uchar(0xff); 6592 6593 int y01 = std::max(0, int(yuv_src[i + yIdx + 2]) - 16) * ITUR_BT_601_CY; 6594 row[6-bIdx] = saturate_cast<uchar>((y01 + ruv) >> ITUR_BT_601_SHIFT); 6595 row[5] = saturate_cast<uchar>((y01 + guv) >> ITUR_BT_601_SHIFT); 6596 row[4+bIdx] = saturate_cast<uchar>((y01 + buv) >> ITUR_BT_601_SHIFT); 6597 row[7] = uchar(0xff); 6598 } 6599 } 6600 } 6601 }; 6602 6603 #define MIN_SIZE_FOR_PARALLEL_YUV422_CONVERSION (320*240) 6604 6605 template<int bIdx, int uIdx, int yIdx> 6606 inline void cvtYUV422toRGB(Mat& _dst, int _stride, const uchar* _yuv) 6607 { 6608 YUV422toRGB888Invoker<bIdx, uIdx, yIdx> converter(&_dst, _stride, _yuv); 6609 if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV422_CONVERSION) 6610 parallel_for_(Range(0, _dst.rows), converter); 6611 else 6612 converter(Range(0, _dst.rows)); 6613 } 6614 6615 template<int bIdx, int uIdx, int yIdx> 6616 inline void cvtYUV422toRGBA(Mat& _dst, int _stride, const uchar* _yuv) 6617 { 6618 YUV422toRGBA8888Invoker<bIdx, uIdx, yIdx> converter(&_dst, _stride, _yuv); 6619 if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV422_CONVERSION) 6620 parallel_for_(Range(0, _dst.rows), converter); 6621 else 6622 converter(Range(0, _dst.rows)); 6623 } 6624 6625 /////////////////////////// RGBA <-> mRGBA (alpha premultiplied) ////////////// 6626 6627 template<typename _Tp> 6628 struct RGBA2mRGBA 6629 { 6630 typedef _Tp channel_type; 6631 6632 void operator()(const _Tp* src, _Tp* dst, int n) const 6633 { 6634 _Tp max_val = ColorChannel<_Tp>::max(); 6635 _Tp half_val = ColorChannel<_Tp>::half(); 6636 for( int i = 0; i < n; i++ ) 6637 { 6638 _Tp v0 = *src++; 6639 _Tp v1 = *src++; 6640 _Tp v2 = *src++; 6641 _Tp v3 = *src++; 6642 6643 *dst++ = (v0 * v3 + half_val) / max_val; 6644 *dst++ = (v1 * v3 + half_val) / max_val; 6645 *dst++ = (v2 * v3 + half_val) / max_val; 6646 *dst++ = v3; 6647 } 6648 } 6649 }; 6650 6651 6652 template<typename _Tp> 6653 struct mRGBA2RGBA 6654 { 6655 typedef _Tp channel_type; 6656 6657 void operator()(const _Tp* src, _Tp* dst, int n) const 6658 { 6659 _Tp max_val = ColorChannel<_Tp>::max(); 6660 for( int i = 0; i < n; i++ ) 6661 { 6662 _Tp v0 = *src++; 6663 _Tp v1 = *src++; 6664 _Tp v2 = *src++; 6665 _Tp v3 = *src++; 6666 _Tp v3_half = v3 / 2; 6667 6668 *dst++ = (v3==0)? 0 : (v0 * max_val + v3_half) / v3; 6669 *dst++ = (v3==0)? 0 : (v1 * max_val + v3_half) / v3; 6670 *dst++ = (v3==0)? 0 : (v2 * max_val + v3_half) / v3; 6671 *dst++ = v3; 6672 } 6673 } 6674 }; 6675 6676 #ifdef HAVE_OPENCL 6677 6678 static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn ) 6679 { 6680 bool ok = false; 6681 UMat src = _src.getUMat(), dst; 6682 Size sz = src.size(), dstSz = sz; 6683 int scn = src.channels(), depth = src.depth(), bidx, uidx, yidx; 6684 int dims = 2, stripeSize = 1; 6685 ocl::Kernel k; 6686 6687 if (depth != CV_8U && depth != CV_16U && depth != CV_32F) 6688 return false; 6689 6690 ocl::Device dev = ocl::Device::getDefault(); 6691 int pxPerWIy = dev.isIntel() && (dev.type() & ocl::Device::TYPE_GPU) ? 4 : 1; 6692 int pxPerWIx = 1; 6693 6694 size_t globalsize[] = { src.cols, (src.rows + pxPerWIy - 1) / pxPerWIy }; 6695 cv::String opts = format("-D depth=%d -D scn=%d -D PIX_PER_WI_Y=%d ", 6696 depth, scn, pxPerWIy); 6697 6698 switch (code) 6699 { 6700 case COLOR_BGR2BGRA: case COLOR_RGB2BGRA: case COLOR_BGRA2BGR: 6701 case COLOR_RGBA2BGR: case COLOR_RGB2BGR: case COLOR_BGRA2RGBA: 6702 { 6703 CV_Assert(scn == 3 || scn == 4); 6704 dcn = code == COLOR_BGR2BGRA || code == COLOR_RGB2BGRA || code == COLOR_BGRA2RGBA ? 4 : 3; 6705 bool reverse = !(code == COLOR_BGR2BGRA || code == COLOR_BGRA2BGR); 6706 k.create("RGB", ocl::imgproc::cvtcolor_oclsrc, 6707 opts + format("-D dcn=%d -D bidx=0 -D %s", dcn, 6708 reverse ? "REVERSE" : "ORDER")); 6709 break; 6710 } 6711 case COLOR_BGR5652BGR: case COLOR_BGR5552BGR: case COLOR_BGR5652RGB: case COLOR_BGR5552RGB: 6712 case COLOR_BGR5652BGRA: case COLOR_BGR5552BGRA: case COLOR_BGR5652RGBA: case COLOR_BGR5552RGBA: 6713 { 6714 dcn = code == COLOR_BGR5652BGRA || code == COLOR_BGR5552BGRA || code == COLOR_BGR5652RGBA || code == COLOR_BGR5552RGBA ? 4 : 3; 6715 CV_Assert((dcn == 3 || dcn == 4) && scn == 2 && depth == CV_8U); 6716 bidx = code == COLOR_BGR5652BGR || code == COLOR_BGR5552BGR || 6717 code == COLOR_BGR5652BGRA || code == COLOR_BGR5552BGRA ? 0 : 2; 6718 int greenbits = code == COLOR_BGR5652BGR || code == COLOR_BGR5652RGB || 6719 code == COLOR_BGR5652BGRA || code == COLOR_BGR5652RGBA ? 6 : 5; 6720 k.create("RGB5x52RGB", ocl::imgproc::cvtcolor_oclsrc, 6721 opts + format("-D dcn=%d -D bidx=%d -D greenbits=%d", dcn, bidx, greenbits)); 6722 break; 6723 } 6724 case COLOR_BGR2BGR565: case COLOR_BGR2BGR555: case COLOR_RGB2BGR565: case COLOR_RGB2BGR555: 6725 case COLOR_BGRA2BGR565: case COLOR_BGRA2BGR555: case COLOR_RGBA2BGR565: case COLOR_RGBA2BGR555: 6726 { 6727 CV_Assert((scn == 3 || scn == 4) && depth == CV_8U ); 6728 bidx = code == COLOR_BGR2BGR565 || code == COLOR_BGR2BGR555 || 6729 code == COLOR_BGRA2BGR565 || code == COLOR_BGRA2BGR555 ? 0 : 2; 6730 int greenbits = code == COLOR_BGR2BGR565 || code == COLOR_RGB2BGR565 || 6731 code == COLOR_BGRA2BGR565 || code == COLOR_RGBA2BGR565 ? 6 : 5; 6732 dcn = 2; 6733 k.create("RGB2RGB5x5", ocl::imgproc::cvtcolor_oclsrc, 6734 opts + format("-D dcn=2 -D bidx=%d -D greenbits=%d", bidx, greenbits)); 6735 break; 6736 } 6737 case COLOR_BGR5652GRAY: case COLOR_BGR5552GRAY: 6738 { 6739 CV_Assert(scn == 2 && depth == CV_8U); 6740 dcn = 1; 6741 int greenbits = code == COLOR_BGR5652GRAY ? 6 : 5; 6742 k.create("BGR5x52Gray", ocl::imgproc::cvtcolor_oclsrc, 6743 opts + format("-D dcn=1 -D bidx=0 -D greenbits=%d", greenbits)); 6744 break; 6745 } 6746 case COLOR_GRAY2BGR565: case COLOR_GRAY2BGR555: 6747 { 6748 CV_Assert(scn == 1 && depth == CV_8U); 6749 dcn = 2; 6750 int greenbits = code == COLOR_GRAY2BGR565 ? 6 : 5; 6751 k.create("Gray2BGR5x5", ocl::imgproc::cvtcolor_oclsrc, 6752 opts + format("-D dcn=2 -D bidx=0 -D greenbits=%d", greenbits)); 6753 break; 6754 } 6755 case COLOR_BGR2GRAY: case COLOR_BGRA2GRAY: 6756 case COLOR_RGB2GRAY: case COLOR_RGBA2GRAY: 6757 { 6758 CV_Assert(scn == 3 || scn == 4); 6759 bidx = code == COLOR_BGR2GRAY || code == COLOR_BGRA2GRAY ? 0 : 2; 6760 dcn = 1; 6761 k.create("RGB2Gray", ocl::imgproc::cvtcolor_oclsrc, 6762 opts + format("-D dcn=1 -D bidx=%d -D STRIPE_SIZE=%d", 6763 bidx, stripeSize)); 6764 globalsize[0] = (src.cols + stripeSize-1)/stripeSize; 6765 break; 6766 } 6767 case COLOR_GRAY2BGR: 6768 case COLOR_GRAY2BGRA: 6769 { 6770 CV_Assert(scn == 1); 6771 dcn = code == COLOR_GRAY2BGRA ? 4 : 3; 6772 k.create("Gray2RGB", ocl::imgproc::cvtcolor_oclsrc, 6773 opts + format("-D bidx=0 -D dcn=%d", dcn)); 6774 break; 6775 } 6776 case COLOR_BGR2YUV: 6777 case COLOR_RGB2YUV: 6778 { 6779 CV_Assert(scn == 3 || scn == 4); 6780 bidx = code == COLOR_RGB2YUV ? 0 : 2; 6781 dcn = 3; 6782 k.create("RGB2YUV", ocl::imgproc::cvtcolor_oclsrc, 6783 opts + format("-D dcn=3 -D bidx=%d", bidx)); 6784 break; 6785 } 6786 case COLOR_YUV2BGR: 6787 case COLOR_YUV2RGB: 6788 { 6789 if(dcn < 0) dcn = 3; 6790 CV_Assert(dcn == 3 || dcn == 4); 6791 bidx = code == COLOR_YUV2RGB ? 0 : 2; 6792 k.create("YUV2RGB", ocl::imgproc::cvtcolor_oclsrc, 6793 opts + format("-D dcn=%d -D bidx=%d", dcn, bidx)); 6794 break; 6795 } 6796 case COLOR_YUV2RGB_NV12: case COLOR_YUV2BGR_NV12: case COLOR_YUV2RGB_NV21: case COLOR_YUV2BGR_NV21: 6797 case COLOR_YUV2RGBA_NV12: case COLOR_YUV2BGRA_NV12: case COLOR_YUV2RGBA_NV21: case COLOR_YUV2BGRA_NV21: 6798 { 6799 CV_Assert( scn == 1 ); 6800 CV_Assert( sz.width % 2 == 0 && sz.height % 3 == 0 && depth == CV_8U ); 6801 dcn = code == COLOR_YUV2BGRA_NV12 || code == COLOR_YUV2RGBA_NV12 || 6802 code == COLOR_YUV2BGRA_NV21 || code == COLOR_YUV2RGBA_NV21 ? 4 : 3; 6803 bidx = code == COLOR_YUV2BGRA_NV12 || code == COLOR_YUV2BGR_NV12 || 6804 code == COLOR_YUV2BGRA_NV21 || code == COLOR_YUV2BGR_NV21 ? 0 : 2; 6805 uidx = code == COLOR_YUV2RGBA_NV21 || code == COLOR_YUV2RGB_NV21 || 6806 code == COLOR_YUV2BGRA_NV21 || code == COLOR_YUV2BGR_NV21 ? 1 : 0; 6807 6808 dstSz = Size(sz.width, sz.height * 2 / 3); 6809 globalsize[0] = dstSz.width / 2; globalsize[1] = (dstSz.height/2 + pxPerWIy - 1) / pxPerWIy; 6810 k.create("YUV2RGB_NVx", ocl::imgproc::cvtcolor_oclsrc, 6811 opts + format("-D dcn=%d -D bidx=%d -D uidx=%d", dcn, bidx, uidx)); 6812 break; 6813 } 6814 case COLOR_YUV2BGR_YV12: case COLOR_YUV2RGB_YV12: case COLOR_YUV2BGRA_YV12: case COLOR_YUV2RGBA_YV12: 6815 case COLOR_YUV2BGR_IYUV: case COLOR_YUV2RGB_IYUV: case COLOR_YUV2BGRA_IYUV: case COLOR_YUV2RGBA_IYUV: 6816 { 6817 CV_Assert( scn == 1 ); 6818 CV_Assert( sz.width % 2 == 0 && sz.height % 3 == 0 && depth == CV_8U ); 6819 dcn = code == COLOR_YUV2BGRA_YV12 || code == COLOR_YUV2RGBA_YV12 || 6820 code == COLOR_YUV2BGRA_IYUV || code == COLOR_YUV2RGBA_IYUV ? 4 : 3; 6821 bidx = code == COLOR_YUV2BGRA_YV12 || code == COLOR_YUV2BGR_YV12 || 6822 code == COLOR_YUV2BGRA_IYUV || code == COLOR_YUV2BGR_IYUV ? 0 : 2; 6823 uidx = code == COLOR_YUV2BGRA_YV12 || code == COLOR_YUV2BGR_YV12 || 6824 code == COLOR_YUV2RGBA_YV12 || code == COLOR_YUV2RGB_YV12 ? 1 : 0; 6825 6826 dstSz = Size(sz.width, sz.height * 2 / 3); 6827 globalsize[0] = dstSz.width / 2; globalsize[1] = (dstSz.height/2 + pxPerWIy - 1) / pxPerWIy; 6828 k.create("YUV2RGB_YV12_IYUV", ocl::imgproc::cvtcolor_oclsrc, 6829 opts + format("-D dcn=%d -D bidx=%d -D uidx=%d%s", dcn, bidx, uidx, 6830 src.isContinuous() ? " -D SRC_CONT" : "")); 6831 break; 6832 } 6833 case COLOR_YUV2GRAY_420: 6834 { 6835 if (dcn <= 0) dcn = 1; 6836 6837 CV_Assert( dcn == 1 ); 6838 CV_Assert( sz.width % 2 == 0 && sz.height % 3 == 0 && depth == CV_8U ); 6839 6840 dstSz = Size(sz.width, sz.height * 2 / 3); 6841 _dst.create(dstSz, CV_MAKETYPE(depth, dcn)); 6842 dst = _dst.getUMat(); 6843 6844 src.rowRange(0, dstSz.height).copyTo(dst); 6845 return true; 6846 } 6847 case COLOR_RGB2YUV_YV12: case COLOR_BGR2YUV_YV12: case COLOR_RGBA2YUV_YV12: case COLOR_BGRA2YUV_YV12: 6848 case COLOR_RGB2YUV_IYUV: case COLOR_BGR2YUV_IYUV: case COLOR_RGBA2YUV_IYUV: case COLOR_BGRA2YUV_IYUV: 6849 { 6850 if (dcn <= 0) dcn = 1; 6851 bidx = code == COLOR_BGRA2YUV_YV12 || code == COLOR_BGR2YUV_YV12 || 6852 code == COLOR_BGRA2YUV_IYUV || code == COLOR_BGR2YUV_IYUV ? 0 : 2; 6853 uidx = code == COLOR_RGBA2YUV_YV12 || code == COLOR_RGB2YUV_YV12 || 6854 code == COLOR_BGRA2YUV_YV12 || code == COLOR_BGR2YUV_YV12 ? 1 : 0; 6855 6856 CV_Assert( (scn == 3 || scn == 4) && depth == CV_8U ); 6857 CV_Assert( dcn == 1 ); 6858 CV_Assert( sz.width % 2 == 0 && sz.height % 2 == 0 ); 6859 6860 dstSz = Size(sz.width, sz.height / 2 * 3); 6861 _dst.create(dstSz, CV_MAKETYPE(depth, dcn)); 6862 dst = _dst.getUMat(); 6863 6864 if (dev.isIntel() && src.cols % 4 == 0 && src.step % 4 == 0 && src.offset % 4 == 0 && 6865 dst.step % 4 == 0 && dst.offset % 4 == 0) 6866 { 6867 pxPerWIx = 2; 6868 } 6869 globalsize[0] = dstSz.width / (2 * pxPerWIx); globalsize[1] = (dstSz.height/3 + pxPerWIy - 1) / pxPerWIy; 6870 6871 k.create("RGB2YUV_YV12_IYUV", ocl::imgproc::cvtcolor_oclsrc, 6872 opts + format("-D dcn=%d -D bidx=%d -D uidx=%d -D PIX_PER_WI_X=%d", dcn, bidx, uidx, pxPerWIx)); 6873 k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst)); 6874 return k.run(2, globalsize, NULL, false); 6875 } 6876 case COLOR_YUV2RGB_UYVY: case COLOR_YUV2BGR_UYVY: case COLOR_YUV2RGBA_UYVY: case COLOR_YUV2BGRA_UYVY: 6877 case COLOR_YUV2RGB_YUY2: case COLOR_YUV2BGR_YUY2: case COLOR_YUV2RGB_YVYU: case COLOR_YUV2BGR_YVYU: 6878 case COLOR_YUV2RGBA_YUY2: case COLOR_YUV2BGRA_YUY2: case COLOR_YUV2RGBA_YVYU: case COLOR_YUV2BGRA_YVYU: 6879 { 6880 if (dcn <= 0) 6881 dcn = (code==COLOR_YUV2RGBA_UYVY || code==COLOR_YUV2BGRA_UYVY || code==COLOR_YUV2RGBA_YUY2 || 6882 code==COLOR_YUV2BGRA_YUY2 || code==COLOR_YUV2RGBA_YVYU || code==COLOR_YUV2BGRA_YVYU) ? 4 : 3; 6883 6884 bidx = (code==COLOR_YUV2BGR_UYVY || code==COLOR_YUV2BGRA_UYVY || code==COLOR_YUV2BGRA_YUY2 || 6885 code==COLOR_YUV2BGR_YUY2 || code==COLOR_YUV2BGRA_YVYU || code==COLOR_YUV2BGR_YVYU) ? 0 : 2; 6886 yidx = (code==COLOR_YUV2RGB_UYVY || code==COLOR_YUV2RGBA_UYVY || code==COLOR_YUV2BGR_UYVY || code==COLOR_YUV2BGRA_UYVY) ? 1 : 0; 6887 uidx = (code==COLOR_YUV2RGB_YVYU || code==COLOR_YUV2RGBA_YVYU || 6888 code==COLOR_YUV2BGR_YVYU || code==COLOR_YUV2BGRA_YVYU) ? 2 : 0; 6889 uidx = 1 - yidx + uidx; 6890 6891 CV_Assert( dcn == 3 || dcn == 4 ); 6892 CV_Assert( scn == 2 && depth == CV_8U ); 6893 6894 k.create("YUV2RGB_422", ocl::imgproc::cvtcolor_oclsrc, 6895 opts + format("-D dcn=%d -D bidx=%d -D uidx=%d -D yidx=%d%s", dcn, bidx, uidx, yidx, 6896 src.offset % 4 == 0 && src.step % 4 == 0 ? " -D USE_OPTIMIZED_LOAD" : "")); 6897 break; 6898 } 6899 case COLOR_BGR2YCrCb: 6900 case COLOR_RGB2YCrCb: 6901 { 6902 CV_Assert(scn == 3 || scn == 4); 6903 bidx = code == COLOR_BGR2YCrCb ? 0 : 2; 6904 dcn = 3; 6905 k.create("RGB2YCrCb", ocl::imgproc::cvtcolor_oclsrc, 6906 opts + format("-D dcn=3 -D bidx=%d", bidx)); 6907 break; 6908 } 6909 case COLOR_YCrCb2BGR: 6910 case COLOR_YCrCb2RGB: 6911 { 6912 if( dcn <= 0 ) 6913 dcn = 3; 6914 CV_Assert(scn == 3 && (dcn == 3 || dcn == 4)); 6915 bidx = code == COLOR_YCrCb2BGR ? 0 : 2; 6916 k.create("YCrCb2RGB", ocl::imgproc::cvtcolor_oclsrc, 6917 opts + format("-D dcn=%d -D bidx=%d", dcn, bidx)); 6918 break; 6919 } 6920 case COLOR_BGR2XYZ: case COLOR_RGB2XYZ: 6921 { 6922 CV_Assert(scn == 3 || scn == 4); 6923 bidx = code == COLOR_BGR2XYZ ? 0 : 2; 6924 6925 UMat c; 6926 if (depth == CV_32F) 6927 { 6928 float coeffs[] = 6929 { 6930 0.412453f, 0.357580f, 0.180423f, 6931 0.212671f, 0.715160f, 0.072169f, 6932 0.019334f, 0.119193f, 0.950227f 6933 }; 6934 if (bidx == 0) 6935 { 6936 std::swap(coeffs[0], coeffs[2]); 6937 std::swap(coeffs[3], coeffs[5]); 6938 std::swap(coeffs[6], coeffs[8]); 6939 } 6940 Mat(1, 9, CV_32FC1, &coeffs[0]).copyTo(c); 6941 } 6942 else 6943 { 6944 int coeffs[] = 6945 { 6946 1689, 1465, 739, 6947 871, 2929, 296, 6948 79, 488, 3892 6949 }; 6950 if (bidx == 0) 6951 { 6952 std::swap(coeffs[0], coeffs[2]); 6953 std::swap(coeffs[3], coeffs[5]); 6954 std::swap(coeffs[6], coeffs[8]); 6955 } 6956 Mat(1, 9, CV_32SC1, &coeffs[0]).copyTo(c); 6957 } 6958 6959 _dst.create(dstSz, CV_MAKETYPE(depth, 3)); 6960 dst = _dst.getUMat(); 6961 6962 k.create("RGB2XYZ", ocl::imgproc::cvtcolor_oclsrc, 6963 opts + format("-D dcn=3 -D bidx=%d", bidx)); 6964 if (k.empty()) 6965 return false; 6966 k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst), ocl::KernelArg::PtrReadOnly(c)); 6967 return k.run(2, globalsize, 0, false); 6968 } 6969 case COLOR_XYZ2BGR: case COLOR_XYZ2RGB: 6970 { 6971 if (dcn <= 0) 6972 dcn = 3; 6973 CV_Assert(scn == 3 && (dcn == 3 || dcn == 4)); 6974 bidx = code == COLOR_XYZ2BGR ? 0 : 2; 6975 6976 UMat c; 6977 if (depth == CV_32F) 6978 { 6979 float coeffs[] = 6980 { 6981 3.240479f, -1.53715f, -0.498535f, 6982 -0.969256f, 1.875991f, 0.041556f, 6983 0.055648f, -0.204043f, 1.057311f 6984 }; 6985 if (bidx == 0) 6986 { 6987 std::swap(coeffs[0], coeffs[6]); 6988 std::swap(coeffs[1], coeffs[7]); 6989 std::swap(coeffs[2], coeffs[8]); 6990 } 6991 Mat(1, 9, CV_32FC1, &coeffs[0]).copyTo(c); 6992 } 6993 else 6994 { 6995 int coeffs[] = 6996 { 6997 13273, -6296, -2042, 6998 -3970, 7684, 170, 6999 228, -836, 4331 7000 }; 7001 if (bidx == 0) 7002 { 7003 std::swap(coeffs[0], coeffs[6]); 7004 std::swap(coeffs[1], coeffs[7]); 7005 std::swap(coeffs[2], coeffs[8]); 7006 } 7007 Mat(1, 9, CV_32SC1, &coeffs[0]).copyTo(c); 7008 } 7009 7010 _dst.create(dstSz, CV_MAKETYPE(depth, dcn)); 7011 dst = _dst.getUMat(); 7012 7013 k.create("XYZ2RGB", ocl::imgproc::cvtcolor_oclsrc, 7014 opts + format("-D dcn=%d -D bidx=%d", dcn, bidx)); 7015 if (k.empty()) 7016 return false; 7017 k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst), ocl::KernelArg::PtrReadOnly(c)); 7018 return k.run(2, globalsize, 0, false); 7019 } 7020 case COLOR_BGR2HSV: case COLOR_RGB2HSV: case COLOR_BGR2HSV_FULL: case COLOR_RGB2HSV_FULL: 7021 case COLOR_BGR2HLS: case COLOR_RGB2HLS: case COLOR_BGR2HLS_FULL: case COLOR_RGB2HLS_FULL: 7022 { 7023 CV_Assert((scn == 3 || scn == 4) && (depth == CV_8U || depth == CV_32F)); 7024 bidx = code == COLOR_BGR2HSV || code == COLOR_BGR2HLS || 7025 code == COLOR_BGR2HSV_FULL || code == COLOR_BGR2HLS_FULL ? 0 : 2; 7026 int hrange = depth == CV_32F ? 360 : code == COLOR_BGR2HSV || code == COLOR_RGB2HSV || 7027 code == COLOR_BGR2HLS || code == COLOR_RGB2HLS ? 180 : 256; 7028 bool is_hsv = code == COLOR_BGR2HSV || code == COLOR_RGB2HSV || code == COLOR_BGR2HSV_FULL || code == COLOR_RGB2HSV_FULL; 7029 String kernelName = String("RGB2") + (is_hsv ? "HSV" : "HLS"); 7030 dcn = 3; 7031 7032 if (is_hsv && depth == CV_8U) 7033 { 7034 static UMat sdiv_data; 7035 static UMat hdiv_data180; 7036 static UMat hdiv_data256; 7037 static int sdiv_table[256]; 7038 static int hdiv_table180[256]; 7039 static int hdiv_table256[256]; 7040 static volatile bool initialized180 = false, initialized256 = false; 7041 volatile bool & initialized = hrange == 180 ? initialized180 : initialized256; 7042 7043 if (!initialized) 7044 { 7045 int * const hdiv_table = hrange == 180 ? hdiv_table180 : hdiv_table256, hsv_shift = 12; 7046 UMat & hdiv_data = hrange == 180 ? hdiv_data180 : hdiv_data256; 7047 7048 sdiv_table[0] = hdiv_table180[0] = hdiv_table256[0] = 0; 7049 7050 int v = 255 << hsv_shift; 7051 if (!initialized180 && !initialized256) 7052 { 7053 for(int i = 1; i < 256; i++ ) 7054 sdiv_table[i] = saturate_cast<int>(v/(1.*i)); 7055 Mat(1, 256, CV_32SC1, sdiv_table).copyTo(sdiv_data); 7056 } 7057 7058 v = hrange << hsv_shift; 7059 for (int i = 1; i < 256; i++ ) 7060 hdiv_table[i] = saturate_cast<int>(v/(6.*i)); 7061 7062 Mat(1, 256, CV_32SC1, hdiv_table).copyTo(hdiv_data); 7063 initialized = true; 7064 } 7065 7066 _dst.create(dstSz, CV_8UC3); 7067 dst = _dst.getUMat(); 7068 7069 k.create("RGB2HSV", ocl::imgproc::cvtcolor_oclsrc, 7070 opts + format("-D hrange=%d -D bidx=%d -D dcn=3", 7071 hrange, bidx)); 7072 if (k.empty()) 7073 return false; 7074 7075 k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst), 7076 ocl::KernelArg::PtrReadOnly(sdiv_data), hrange == 256 ? ocl::KernelArg::PtrReadOnly(hdiv_data256) : 7077 ocl::KernelArg::PtrReadOnly(hdiv_data180)); 7078 7079 return k.run(2, globalsize, NULL, false); 7080 } 7081 else 7082 k.create(kernelName.c_str(), ocl::imgproc::cvtcolor_oclsrc, 7083 opts + format("-D hscale=%ff -D bidx=%d -D dcn=3", 7084 hrange*(1.f/360.f), bidx)); 7085 break; 7086 } 7087 case COLOR_HSV2BGR: case COLOR_HSV2RGB: case COLOR_HSV2BGR_FULL: case COLOR_HSV2RGB_FULL: 7088 case COLOR_HLS2BGR: case COLOR_HLS2RGB: case COLOR_HLS2BGR_FULL: case COLOR_HLS2RGB_FULL: 7089 { 7090 if (dcn <= 0) 7091 dcn = 3; 7092 CV_Assert(scn == 3 && (dcn == 3 || dcn == 4) && (depth == CV_8U || depth == CV_32F)); 7093 bidx = code == COLOR_HSV2BGR || code == COLOR_HLS2BGR || 7094 code == COLOR_HSV2BGR_FULL || code == COLOR_HLS2BGR_FULL ? 0 : 2; 7095 int hrange = depth == CV_32F ? 360 : code == COLOR_HSV2BGR || code == COLOR_HSV2RGB || 7096 code == COLOR_HLS2BGR || code == COLOR_HLS2RGB ? 180 : 255; 7097 bool is_hsv = code == COLOR_HSV2BGR || code == COLOR_HSV2RGB || 7098 code == COLOR_HSV2BGR_FULL || code == COLOR_HSV2RGB_FULL; 7099 7100 String kernelName = String(is_hsv ? "HSV" : "HLS") + "2RGB"; 7101 k.create(kernelName.c_str(), ocl::imgproc::cvtcolor_oclsrc, 7102 opts + format("-D dcn=%d -D bidx=%d -D hrange=%d -D hscale=%ff", 7103 dcn, bidx, hrange, 6.f/hrange)); 7104 break; 7105 } 7106 case COLOR_RGBA2mRGBA: case COLOR_mRGBA2RGBA: 7107 { 7108 CV_Assert(scn == 4 && depth == CV_8U); 7109 dcn = 4; 7110 7111 k.create(code == COLOR_RGBA2mRGBA ? "RGBA2mRGBA" : "mRGBA2RGBA", ocl::imgproc::cvtcolor_oclsrc, 7112 opts + "-D dcn=4 -D bidx=3"); 7113 break; 7114 } 7115 case CV_BGR2Lab: case CV_RGB2Lab: case CV_LBGR2Lab: case CV_LRGB2Lab: 7116 case CV_BGR2Luv: case CV_RGB2Luv: case CV_LBGR2Luv: case CV_LRGB2Luv: 7117 { 7118 CV_Assert( (scn == 3 || scn == 4) && (depth == CV_8U || depth == CV_32F) ); 7119 7120 bidx = code == CV_BGR2Lab || code == CV_LBGR2Lab || code == CV_BGR2Luv || code == CV_LBGR2Luv ? 0 : 2; 7121 bool srgb = code == CV_BGR2Lab || code == CV_RGB2Lab || code == CV_RGB2Luv || code == CV_BGR2Luv; 7122 bool lab = code == CV_BGR2Lab || code == CV_RGB2Lab || code == CV_LBGR2Lab || code == CV_LRGB2Lab; 7123 float un, vn; 7124 dcn = 3; 7125 7126 k.create(format("BGR2%s", lab ? "Lab" : "Luv").c_str(), 7127 ocl::imgproc::cvtcolor_oclsrc, 7128 opts + format("-D dcn=%d -D bidx=%d%s", 7129 dcn, bidx, srgb ? " -D SRGB" : "")); 7130 if (k.empty()) 7131 return false; 7132 7133 initLabTabs(); 7134 7135 _dst.create(dstSz, CV_MAKETYPE(depth, dcn)); 7136 dst = _dst.getUMat(); 7137 7138 ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src), 7139 dstarg = ocl::KernelArg::WriteOnly(dst); 7140 7141 if (depth == CV_8U && lab) 7142 { 7143 static UMat usRGBGammaTab, ulinearGammaTab, uLabCbrtTab, ucoeffs; 7144 7145 if (srgb && usRGBGammaTab.empty()) 7146 Mat(1, 256, CV_16UC1, sRGBGammaTab_b).copyTo(usRGBGammaTab); 7147 else if (ulinearGammaTab.empty()) 7148 Mat(1, 256, CV_16UC1, linearGammaTab_b).copyTo(ulinearGammaTab); 7149 if (uLabCbrtTab.empty()) 7150 Mat(1, LAB_CBRT_TAB_SIZE_B, CV_16UC1, LabCbrtTab_b).copyTo(uLabCbrtTab); 7151 7152 { 7153 int coeffs[9]; 7154 const float * const _coeffs = sRGB2XYZ_D65, * const _whitept = D65; 7155 const float scale[] = 7156 { 7157 (1 << lab_shift)/_whitept[0], 7158 (float)(1 << lab_shift), 7159 (1 << lab_shift)/_whitept[2] 7160 }; 7161 7162 for (int i = 0; i < 3; i++ ) 7163 { 7164 coeffs[i*3+(bidx^2)] = cvRound(_coeffs[i*3]*scale[i]); 7165 coeffs[i*3+1] = cvRound(_coeffs[i*3+1]*scale[i]); 7166 coeffs[i*3+bidx] = cvRound(_coeffs[i*3+2]*scale[i]); 7167 7168 CV_Assert( coeffs[i] >= 0 && coeffs[i*3+1] >= 0 && coeffs[i*3+2] >= 0 && 7169 coeffs[i*3] + coeffs[i*3+1] + coeffs[i*3+2] < 2*(1 << lab_shift) ); 7170 } 7171 Mat(1, 9, CV_32SC1, coeffs).copyTo(ucoeffs); 7172 } 7173 7174 const int Lscale = (116*255+50)/100; 7175 const int Lshift = -((16*255*(1 << lab_shift2) + 50)/100); 7176 7177 k.args(srcarg, dstarg, 7178 ocl::KernelArg::PtrReadOnly(srgb ? usRGBGammaTab : ulinearGammaTab), 7179 ocl::KernelArg::PtrReadOnly(uLabCbrtTab), ocl::KernelArg::PtrReadOnly(ucoeffs), 7180 Lscale, Lshift); 7181 } 7182 else 7183 { 7184 static UMat usRGBGammaTab, ucoeffs, uLabCbrtTab; 7185 7186 if (srgb && usRGBGammaTab.empty()) 7187 Mat(1, GAMMA_TAB_SIZE * 4, CV_32FC1, sRGBGammaTab).copyTo(usRGBGammaTab); 7188 if (!lab && uLabCbrtTab.empty()) 7189 Mat(1, LAB_CBRT_TAB_SIZE * 4, CV_32FC1, LabCbrtTab).copyTo(uLabCbrtTab); 7190 7191 { 7192 float coeffs[9]; 7193 const float * const _coeffs = sRGB2XYZ_D65, * const _whitept = D65; 7194 float scale[] = { 1.0f / _whitept[0], 1.0f, 1.0f / _whitept[2] }; 7195 7196 for (int i = 0; i < 3; i++) 7197 { 7198 int j = i * 3; 7199 coeffs[j + (bidx ^ 2)] = _coeffs[j] * (lab ? scale[i] : 1); 7200 coeffs[j + 1] = _coeffs[j + 1] * (lab ? scale[i] : 1); 7201 coeffs[j + bidx] = _coeffs[j + 2] * (lab ? scale[i] : 1); 7202 7203 CV_Assert( coeffs[j] >= 0 && coeffs[j + 1] >= 0 && coeffs[j + 2] >= 0 && 7204 coeffs[j] + coeffs[j + 1] + coeffs[j + 2] < 1.5f*(lab ? LabCbrtTabScale : 1) ); 7205 } 7206 7207 float d = 1.f/(_whitept[0] + _whitept[1]*15 + _whitept[2]*3); 7208 un = 13*4*_whitept[0]*d; 7209 vn = 13*9*_whitept[1]*d; 7210 7211 Mat(1, 9, CV_32FC1, coeffs).copyTo(ucoeffs); 7212 } 7213 7214 float _1_3 = 1.0f / 3.0f, _a = 16.0f / 116.0f; 7215 ocl::KernelArg ucoeffsarg = ocl::KernelArg::PtrReadOnly(ucoeffs); 7216 7217 if (lab) 7218 { 7219 if (srgb) 7220 k.args(srcarg, dstarg, ocl::KernelArg::PtrReadOnly(usRGBGammaTab), 7221 ucoeffsarg, _1_3, _a); 7222 else 7223 k.args(srcarg, dstarg, ucoeffsarg, _1_3, _a); 7224 } 7225 else 7226 { 7227 ocl::KernelArg LabCbrtTabarg = ocl::KernelArg::PtrReadOnly(uLabCbrtTab); 7228 if (srgb) 7229 k.args(srcarg, dstarg, ocl::KernelArg::PtrReadOnly(usRGBGammaTab), 7230 LabCbrtTabarg, ucoeffsarg, un, vn); 7231 else 7232 k.args(srcarg, dstarg, LabCbrtTabarg, ucoeffsarg, un, vn); 7233 } 7234 } 7235 7236 return k.run(dims, globalsize, NULL, false); 7237 } 7238 case CV_Lab2BGR: case CV_Lab2RGB: case CV_Lab2LBGR: case CV_Lab2LRGB: 7239 case CV_Luv2BGR: case CV_Luv2RGB: case CV_Luv2LBGR: case CV_Luv2LRGB: 7240 { 7241 if( dcn <= 0 ) 7242 dcn = 3; 7243 CV_Assert( scn == 3 && (dcn == 3 || dcn == 4) && (depth == CV_8U || depth == CV_32F) ); 7244 7245 bidx = code == CV_Lab2BGR || code == CV_Lab2LBGR || code == CV_Luv2BGR || code == CV_Luv2LBGR ? 0 : 2; 7246 bool srgb = code == CV_Lab2BGR || code == CV_Lab2RGB || code == CV_Luv2BGR || code == CV_Luv2RGB; 7247 bool lab = code == CV_Lab2BGR || code == CV_Lab2RGB || code == CV_Lab2LBGR || code == CV_Lab2LRGB; 7248 float un, vn; 7249 7250 k.create(format("%s2BGR", lab ? "Lab" : "Luv").c_str(), 7251 ocl::imgproc::cvtcolor_oclsrc, 7252 opts + format("-D dcn=%d -D bidx=%d%s", 7253 dcn, bidx, srgb ? " -D SRGB" : "")); 7254 if (k.empty()) 7255 return false; 7256 7257 initLabTabs(); 7258 static UMat ucoeffs, usRGBInvGammaTab; 7259 7260 if (srgb && usRGBInvGammaTab.empty()) 7261 Mat(1, GAMMA_TAB_SIZE*4, CV_32FC1, sRGBInvGammaTab).copyTo(usRGBInvGammaTab); 7262 7263 { 7264 float coeffs[9]; 7265 const float * const _coeffs = XYZ2sRGB_D65, * const _whitept = D65; 7266 7267 for( int i = 0; i < 3; i++ ) 7268 { 7269 coeffs[i+(bidx^2)*3] = _coeffs[i] * (lab ? _whitept[i] : 1); 7270 coeffs[i+3] = _coeffs[i+3] * (lab ? _whitept[i] : 1); 7271 coeffs[i+bidx*3] = _coeffs[i+6] * (lab ? _whitept[i] : 1); 7272 } 7273 7274 float d = 1.f/(_whitept[0] + _whitept[1]*15 + _whitept[2]*3); 7275 un = 4*_whitept[0]*d; 7276 vn = 9*_whitept[1]*d; 7277 7278 Mat(1, 9, CV_32FC1, coeffs).copyTo(ucoeffs); 7279 } 7280 7281 _dst.create(sz, CV_MAKETYPE(depth, dcn)); 7282 dst = _dst.getUMat(); 7283 7284 float lThresh = 0.008856f * 903.3f; 7285 float fThresh = 7.787f * 0.008856f + 16.0f / 116.0f; 7286 7287 ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src), 7288 dstarg = ocl::KernelArg::WriteOnly(dst), 7289 coeffsarg = ocl::KernelArg::PtrReadOnly(ucoeffs); 7290 7291 if (lab) 7292 { 7293 if (srgb) 7294 k.args(srcarg, dstarg, ocl::KernelArg::PtrReadOnly(usRGBInvGammaTab), 7295 coeffsarg, lThresh, fThresh); 7296 else 7297 k.args(srcarg, dstarg, coeffsarg, lThresh, fThresh); 7298 } 7299 else 7300 { 7301 if (srgb) 7302 k.args(srcarg, dstarg, ocl::KernelArg::PtrReadOnly(usRGBInvGammaTab), 7303 coeffsarg, un, vn); 7304 else 7305 k.args(srcarg, dstarg, coeffsarg, un, vn); 7306 } 7307 7308 return k.run(dims, globalsize, NULL, false); 7309 } 7310 default: 7311 break; 7312 } 7313 7314 if( !k.empty() ) 7315 { 7316 _dst.create(dstSz, CV_MAKETYPE(depth, dcn)); 7317 dst = _dst.getUMat(); 7318 k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst)); 7319 ok = k.run(dims, globalsize, NULL, false); 7320 } 7321 return ok; 7322 } 7323 7324 #endif 7325 7326 }//namespace cv 7327 7328 ////////////////////////////////////////////////////////////////////////////////////////// 7329 // The main function // 7330 ////////////////////////////////////////////////////////////////////////////////////////// 7331 7332 void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn ) 7333 { 7334 int stype = _src.type(); 7335 int scn = CV_MAT_CN(stype), depth = CV_MAT_DEPTH(stype), bidx; 7336 7337 CV_OCL_RUN( _src.dims() <= 2 && _dst.isUMat() && !(depth == CV_8U && (code == CV_Luv2BGR || code == CV_Luv2RGB)), 7338 ocl_cvtColor(_src, _dst, code, dcn) ) 7339 7340 Mat src = _src.getMat(), dst; 7341 Size sz = src.size(); 7342 7343 CV_Assert( depth == CV_8U || depth == CV_16U || depth == CV_32F ); 7344 7345 switch( code ) 7346 { 7347 case CV_BGR2BGRA: case CV_RGB2BGRA: case CV_BGRA2BGR: 7348 case CV_RGBA2BGR: case CV_RGB2BGR: case CV_BGRA2RGBA: 7349 CV_Assert( scn == 3 || scn == 4 ); 7350 dcn = code == CV_BGR2BGRA || code == CV_RGB2BGRA || code == CV_BGRA2RGBA ? 4 : 3; 7351 bidx = code == CV_BGR2BGRA || code == CV_BGRA2BGR ? 0 : 2; 7352 7353 _dst.create( sz, CV_MAKETYPE(depth, dcn)); 7354 dst = _dst.getMat(); 7355 7356 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) 7357 CV_IPP_CHECK() 7358 { 7359 if( code == CV_BGR2BGRA) 7360 { 7361 if ( CvtColorIPPLoop(src, dst, IPPReorderFunctor(ippiSwapChannelsC3C4RTab[depth], 0, 1, 2)) ) 7362 { 7363 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 7364 return; 7365 } 7366 setIppErrorStatus(); 7367 } 7368 else if( code == CV_BGRA2BGR ) 7369 { 7370 if ( CvtColorIPPLoop(src, dst, IPPGeneralFunctor(ippiCopyAC4C3RTab[depth])) ) 7371 { 7372 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 7373 return; 7374 } 7375 setIppErrorStatus(); 7376 } 7377 else if( code == CV_BGR2RGBA ) 7378 { 7379 if( CvtColorIPPLoop(src, dst, IPPReorderFunctor(ippiSwapChannelsC3C4RTab[depth], 2, 1, 0)) ) 7380 { 7381 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 7382 return; 7383 } 7384 setIppErrorStatus(); 7385 } 7386 else if( code == CV_RGBA2BGR ) 7387 { 7388 if( CvtColorIPPLoop(src, dst, IPPReorderFunctor(ippiSwapChannelsC4C3RTab[depth], 2, 1, 0)) ) 7389 { 7390 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 7391 return; 7392 } 7393 setIppErrorStatus(); 7394 } 7395 else if( code == CV_RGB2BGR ) 7396 { 7397 if( CvtColorIPPLoopCopy(src, dst, IPPReorderFunctor(ippiSwapChannelsC3RTab[depth], 2, 1, 0)) ) 7398 { 7399 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 7400 return; 7401 } 7402 setIppErrorStatus(); 7403 } 7404 #if IPP_VERSION_X100 >= 801 7405 else if( code == CV_RGBA2BGRA ) 7406 { 7407 if( CvtColorIPPLoopCopy(src, dst, IPPReorderFunctor(ippiSwapChannelsC4RTab[depth], 2, 1, 0)) ) 7408 { 7409 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 7410 return; 7411 } 7412 setIppErrorStatus(); 7413 } 7414 #endif 7415 } 7416 #endif 7417 7418 if( depth == CV_8U ) 7419 { 7420 #ifdef HAVE_TEGRA_OPTIMIZATION 7421 if(tegra::useTegra() && tegra::cvtBGR2RGB(src, dst, bidx)) 7422 break; 7423 #endif 7424 CvtColorLoop(src, dst, RGB2RGB<uchar>(scn, dcn, bidx)); 7425 } 7426 else if( depth == CV_16U ) 7427 CvtColorLoop(src, dst, RGB2RGB<ushort>(scn, dcn, bidx)); 7428 else 7429 CvtColorLoop(src, dst, RGB2RGB<float>(scn, dcn, bidx)); 7430 break; 7431 7432 case CV_BGR2BGR565: case CV_BGR2BGR555: case CV_RGB2BGR565: case CV_RGB2BGR555: 7433 case CV_BGRA2BGR565: case CV_BGRA2BGR555: case CV_RGBA2BGR565: case CV_RGBA2BGR555: 7434 CV_Assert( (scn == 3 || scn == 4) && depth == CV_8U ); 7435 _dst.create(sz, CV_8UC2); 7436 dst = _dst.getMat(); 7437 7438 #if defined(HAVE_IPP) && 0 // breaks OCL accuracy tests 7439 CV_IPP_CHECK() 7440 { 7441 CV_SUPPRESS_DEPRECATED_START 7442 7443 if (code == CV_BGR2BGR565 && scn == 3) 7444 { 7445 if (CvtColorIPPLoop(src, dst, IPPGeneralFunctor((ippiGeneralFunc)ippiBGRToBGR565_8u16u_C3R))) 7446 { 7447 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 7448 return; 7449 } 7450 setIppErrorStatus(); 7451 } 7452 else if (code == CV_BGRA2BGR565 && scn == 4) 7453 { 7454 if (CvtColorIPPLoopCopy(src, dst, 7455 IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], 7456 (ippiGeneralFunc)ippiBGRToBGR565_8u16u_C3R, 0, 1, 2, depth))) 7457 { 7458 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 7459 return; 7460 } 7461 setIppErrorStatus(); 7462 } 7463 else if (code == CV_RGB2BGR565 && scn == 3) 7464 { 7465 if( CvtColorIPPLoopCopy(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC3RTab[depth], 7466 (ippiGeneralFunc)ippiBGRToBGR565_8u16u_C3R, 2, 1, 0, depth)) ) 7467 { 7468 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 7469 return; 7470 } 7471 setIppErrorStatus(); 7472 } 7473 else if (code == CV_RGBA2BGR565 && scn == 4) 7474 { 7475 if( CvtColorIPPLoopCopy(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], 7476 (ippiGeneralFunc)ippiBGRToBGR565_8u16u_C3R, 2, 1, 0, depth)) ) 7477 { 7478 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 7479 return; 7480 } 7481 setIppErrorStatus(); 7482 } 7483 CV_SUPPRESS_DEPRECATED_END 7484 } 7485 #endif 7486 7487 #ifdef HAVE_TEGRA_OPTIMIZATION 7488 if(code == CV_BGR2BGR565 || code == CV_BGRA2BGR565 || code == CV_RGB2BGR565 || code == CV_RGBA2BGR565) 7489 if(tegra::useTegra() && tegra::cvtRGB2RGB565(src, dst, code == CV_RGB2BGR565 || code == CV_RGBA2BGR565 ? 0 : 2)) 7490 break; 7491 #endif 7492 7493 CvtColorLoop(src, dst, RGB2RGB5x5(scn, 7494 code == CV_BGR2BGR565 || code == CV_BGR2BGR555 || 7495 code == CV_BGRA2BGR565 || code == CV_BGRA2BGR555 ? 0 : 2, 7496 code == CV_BGR2BGR565 || code == CV_RGB2BGR565 || 7497 code == CV_BGRA2BGR565 || code == CV_RGBA2BGR565 ? 6 : 5 // green bits 7498 )); 7499 break; 7500 7501 case CV_BGR5652BGR: case CV_BGR5552BGR: case CV_BGR5652RGB: case CV_BGR5552RGB: 7502 case CV_BGR5652BGRA: case CV_BGR5552BGRA: case CV_BGR5652RGBA: case CV_BGR5552RGBA: 7503 if(dcn <= 0) dcn = (code==CV_BGR5652BGRA || code==CV_BGR5552BGRA || code==CV_BGR5652RGBA || code==CV_BGR5552RGBA) ? 4 : 3; 7504 CV_Assert( (dcn == 3 || dcn == 4) && scn == 2 && depth == CV_8U ); 7505 _dst.create(sz, CV_MAKETYPE(depth, dcn)); 7506 dst = _dst.getMat(); 7507 7508 #ifdef HAVE_IPP 7509 CV_IPP_CHECK() 7510 { 7511 CV_SUPPRESS_DEPRECATED_START 7512 if (code == CV_BGR5652BGR && dcn == 3) 7513 { 7514 if (CvtColorIPPLoop(src, dst, IPPGeneralFunctor((ippiGeneralFunc)ippiBGR565ToBGR_16u8u_C3R))) 7515 { 7516 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 7517 return; 7518 } 7519 setIppErrorStatus(); 7520 } 7521 else if (code == CV_BGR5652RGB && dcn == 3) 7522 { 7523 if (CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor((ippiGeneralFunc)ippiBGR565ToBGR_16u8u_C3R, 7524 ippiSwapChannelsC3RTab[depth], 2, 1, 0, depth))) 7525 { 7526 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 7527 return; 7528 } 7529 setIppErrorStatus(); 7530 } 7531 else if (code == CV_BGR5652BGRA && dcn == 4) 7532 { 7533 if (CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor((ippiGeneralFunc)ippiBGR565ToBGR_16u8u_C3R, 7534 ippiSwapChannelsC3C4RTab[depth], 0, 1, 2, depth))) 7535 { 7536 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 7537 return; 7538 } 7539 setIppErrorStatus(); 7540 } 7541 else if (code == CV_BGR5652RGBA && dcn == 4) 7542 { 7543 if (CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor((ippiGeneralFunc)ippiBGR565ToBGR_16u8u_C3R, 7544 ippiSwapChannelsC3C4RTab[depth], 2, 1, 0, depth))) 7545 { 7546 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 7547 return; 7548 } 7549 setIppErrorStatus(); 7550 } 7551 CV_SUPPRESS_DEPRECATED_END 7552 } 7553 #endif 7554 7555 CvtColorLoop(src, dst, RGB5x52RGB(dcn, 7556 code == CV_BGR5652BGR || code == CV_BGR5552BGR || 7557 code == CV_BGR5652BGRA || code == CV_BGR5552BGRA ? 0 : 2, // blue idx 7558 code == CV_BGR5652BGR || code == CV_BGR5652RGB || 7559 code == CV_BGR5652BGRA || code == CV_BGR5652RGBA ? 6 : 5 // green bits 7560 )); 7561 break; 7562 7563 case CV_BGR2GRAY: case CV_BGRA2GRAY: case CV_RGB2GRAY: case CV_RGBA2GRAY: 7564 CV_Assert( scn == 3 || scn == 4 ); 7565 _dst.create(sz, CV_MAKETYPE(depth, 1)); 7566 dst = _dst.getMat(); 7567 7568 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) 7569 CV_IPP_CHECK() 7570 { 7571 if( code == CV_BGR2GRAY && depth == CV_32F ) 7572 { 7573 if( CvtColorIPPLoop(src, dst, IPPColor2GrayFunctor(ippiColor2GrayC3Tab[depth])) ) 7574 { 7575 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 7576 return; 7577 } 7578 setIppErrorStatus(); 7579 } 7580 else if( code == CV_RGB2GRAY && depth == CV_32F ) 7581 { 7582 if( CvtColorIPPLoop(src, dst, IPPGeneralFunctor(ippiRGB2GrayC3Tab[depth])) ) 7583 { 7584 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 7585 return; 7586 } 7587 setIppErrorStatus(); 7588 } 7589 else if( code == CV_BGRA2GRAY && depth == CV_32F ) 7590 { 7591 if( CvtColorIPPLoop(src, dst, IPPColor2GrayFunctor(ippiColor2GrayC4Tab[depth])) ) 7592 { 7593 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 7594 return; 7595 } 7596 setIppErrorStatus(); 7597 } 7598 else if( code == CV_RGBA2GRAY && depth == CV_32F ) 7599 { 7600 if( CvtColorIPPLoop(src, dst, IPPGeneralFunctor(ippiRGB2GrayC4Tab[depth])) ) 7601 { 7602 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 7603 return; 7604 } 7605 setIppErrorStatus(); 7606 } 7607 } 7608 #endif 7609 7610 bidx = code == CV_BGR2GRAY || code == CV_BGRA2GRAY ? 0 : 2; 7611 7612 if( depth == CV_8U ) 7613 { 7614 #ifdef HAVE_TEGRA_OPTIMIZATION 7615 if(tegra::useTegra() && tegra::cvtRGB2Gray(src, dst, bidx)) 7616 break; 7617 #endif 7618 CvtColorLoop(src, dst, RGB2Gray<uchar>(scn, bidx, 0)); 7619 } 7620 else if( depth == CV_16U ) 7621 CvtColorLoop(src, dst, RGB2Gray<ushort>(scn, bidx, 0)); 7622 else 7623 CvtColorLoop(src, dst, RGB2Gray<float>(scn, bidx, 0)); 7624 break; 7625 7626 case CV_BGR5652GRAY: case CV_BGR5552GRAY: 7627 CV_Assert( scn == 2 && depth == CV_8U ); 7628 _dst.create(sz, CV_8UC1); 7629 dst = _dst.getMat(); 7630 7631 CvtColorLoop(src, dst, RGB5x52Gray(code == CV_BGR5652GRAY ? 6 : 5)); 7632 break; 7633 7634 case CV_GRAY2BGR: case CV_GRAY2BGRA: 7635 if( dcn <= 0 ) dcn = (code==CV_GRAY2BGRA) ? 4 : 3; 7636 CV_Assert( scn == 1 && (dcn == 3 || dcn == 4)); 7637 _dst.create(sz, CV_MAKETYPE(depth, dcn)); 7638 dst = _dst.getMat(); 7639 7640 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) 7641 CV_IPP_CHECK() 7642 { 7643 if( code == CV_GRAY2BGR ) 7644 { 7645 if( CvtColorIPPLoop(src, dst, IPPGray2BGRFunctor(ippiCopyP3C3RTab[depth])) ) 7646 { 7647 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 7648 return; 7649 } 7650 setIppErrorStatus(); 7651 } 7652 else if( code == CV_GRAY2BGRA ) 7653 { 7654 if( CvtColorIPPLoop(src, dst, IPPGray2BGRAFunctor(ippiCopyP3C3RTab[depth], ippiSwapChannelsC3C4RTab[depth], depth)) ) 7655 { 7656 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 7657 return; 7658 } 7659 setIppErrorStatus(); 7660 } 7661 } 7662 #endif 7663 7664 7665 if( depth == CV_8U ) 7666 { 7667 #ifdef HAVE_TEGRA_OPTIMIZATION 7668 if(tegra::useTegra() && tegra::cvtGray2RGB(src, dst)) 7669 break; 7670 #endif 7671 CvtColorLoop(src, dst, Gray2RGB<uchar>(dcn)); 7672 } 7673 else if( depth == CV_16U ) 7674 CvtColorLoop(src, dst, Gray2RGB<ushort>(dcn)); 7675 else 7676 CvtColorLoop(src, dst, Gray2RGB<float>(dcn)); 7677 break; 7678 7679 case CV_GRAY2BGR565: case CV_GRAY2BGR555: 7680 CV_Assert( scn == 1 && depth == CV_8U ); 7681 _dst.create(sz, CV_8UC2); 7682 dst = _dst.getMat(); 7683 7684 CvtColorLoop(src, dst, Gray2RGB5x5(code == CV_GRAY2BGR565 ? 6 : 5)); 7685 break; 7686 7687 case CV_BGR2YCrCb: case CV_RGB2YCrCb: 7688 case CV_BGR2YUV: case CV_RGB2YUV: 7689 { 7690 CV_Assert( scn == 3 || scn == 4 ); 7691 bidx = code == CV_BGR2YCrCb || code == CV_BGR2YUV ? 0 : 2; 7692 static const float yuv_f[] = { 0.114f, 0.587f, 0.299f, 0.492f, 0.877f }; 7693 static const int yuv_i[] = { B2Y, G2Y, R2Y, 8061, 14369 }; 7694 const float* coeffs_f = code == CV_BGR2YCrCb || code == CV_RGB2YCrCb ? 0 : yuv_f; 7695 const int* coeffs_i = code == CV_BGR2YCrCb || code == CV_RGB2YCrCb ? 0 : yuv_i; 7696 7697 _dst.create(sz, CV_MAKETYPE(depth, 3)); 7698 dst = _dst.getMat(); 7699 7700 #if defined HAVE_IPP && 0 7701 CV_IPP_CHECK() 7702 { 7703 if (code == CV_RGB2YUV && scn == 3 && depth == CV_8U) 7704 { 7705 if (CvtColorIPPLoop(src, dst, IPPGeneralFunctor((ippiGeneralFunc)ippiRGBToYUV_8u_C3R))) 7706 { 7707 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 7708 return; 7709 } 7710 setIppErrorStatus(); 7711 } 7712 else if (code == CV_BGR2YUV && scn == 3 && depth == CV_8U) 7713 { 7714 if (CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC3RTab[depth], 7715 (ippiGeneralFunc)ippiRGBToYUV_8u_C3R, 2, 1, 0, depth))) 7716 { 7717 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 7718 return; 7719 } 7720 setIppErrorStatus(); 7721 } 7722 else if (code == CV_RGB2YUV && scn == 4 && depth == CV_8U) 7723 { 7724 if (CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], 7725 (ippiGeneralFunc)ippiRGBToYUV_8u_C3R, 0, 1, 2, depth))) 7726 { 7727 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 7728 return; 7729 } 7730 setIppErrorStatus(); 7731 } 7732 else if (code == CV_BGR2YUV && scn == 4 && depth == CV_8U) 7733 { 7734 if (CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], 7735 (ippiGeneralFunc)ippiRGBToYUV_8u_C3R, 2, 1, 0, depth))) 7736 { 7737 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 7738 return; 7739 } 7740 setIppErrorStatus(); 7741 } 7742 } 7743 #endif 7744 7745 if( depth == CV_8U ) 7746 { 7747 #ifdef HAVE_TEGRA_OPTIMIZATION 7748 if((code == CV_RGB2YCrCb || code == CV_BGR2YCrCb) && tegra::useTegra() && tegra::cvtRGB2YCrCb(src, dst, bidx)) 7749 break; 7750 #endif 7751 CvtColorLoop(src, dst, RGB2YCrCb_i<uchar>(scn, bidx, coeffs_i)); 7752 } 7753 else if( depth == CV_16U ) 7754 CvtColorLoop(src, dst, RGB2YCrCb_i<ushort>(scn, bidx, coeffs_i)); 7755 else 7756 CvtColorLoop(src, dst, RGB2YCrCb_f<float>(scn, bidx, coeffs_f)); 7757 } 7758 break; 7759 7760 case CV_YCrCb2BGR: case CV_YCrCb2RGB: 7761 case CV_YUV2BGR: case CV_YUV2RGB: 7762 { 7763 if( dcn <= 0 ) dcn = 3; 7764 CV_Assert( scn == 3 && (dcn == 3 || dcn == 4) ); 7765 bidx = code == CV_YCrCb2BGR || code == CV_YUV2BGR ? 0 : 2; 7766 static const float yuv_f[] = { 2.032f, -0.395f, -0.581f, 1.140f }; 7767 static const int yuv_i[] = { 33292, -6472, -9519, 18678 }; 7768 const float* coeffs_f = code == CV_YCrCb2BGR || code == CV_YCrCb2RGB ? 0 : yuv_f; 7769 const int* coeffs_i = code == CV_YCrCb2BGR || code == CV_YCrCb2RGB ? 0 : yuv_i; 7770 7771 _dst.create(sz, CV_MAKETYPE(depth, dcn)); 7772 dst = _dst.getMat(); 7773 7774 #if defined HAVE_IPP && 0 7775 CV_IPP_CHECK() 7776 { 7777 if (code == CV_YUV2RGB && dcn == 3 && depth == CV_8U) 7778 { 7779 if (CvtColorIPPLoop(src, dst, IPPGeneralFunctor((ippiGeneralFunc)ippiYUVToRGB_8u_C3R))) 7780 { 7781 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 7782 return; 7783 } 7784 setIppErrorStatus(); 7785 } 7786 else if (code == CV_YUV2BGR && dcn == 3 && depth == CV_8U) 7787 { 7788 if (CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor((ippiGeneralFunc)ippiYUVToRGB_8u_C3R, 7789 ippiSwapChannelsC3RTab[depth], 2, 1, 0, depth))) 7790 { 7791 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 7792 return; 7793 } 7794 setIppErrorStatus(); 7795 } 7796 else if (code == CV_YUV2RGB && dcn == 4 && depth == CV_8U) 7797 { 7798 if (CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor((ippiGeneralFunc)ippiYUVToRGB_8u_C3R, 7799 ippiSwapChannelsC3C4RTab[depth], 0, 1, 2, depth))) 7800 { 7801 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 7802 return; 7803 } 7804 setIppErrorStatus(); 7805 } 7806 else if (code == CV_YUV2BGR && dcn == 4 && depth == CV_8U) 7807 { 7808 if (CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor((ippiGeneralFunc)ippiYUVToRGB_8u_C3R, 7809 ippiSwapChannelsC3C4RTab[depth], 2, 1, 0, depth))) 7810 { 7811 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 7812 return; 7813 } 7814 setIppErrorStatus(); 7815 } 7816 } 7817 #endif 7818 7819 if( depth == CV_8U ) 7820 CvtColorLoop(src, dst, YCrCb2RGB_i<uchar>(dcn, bidx, coeffs_i)); 7821 else if( depth == CV_16U ) 7822 CvtColorLoop(src, dst, YCrCb2RGB_i<ushort>(dcn, bidx, coeffs_i)); 7823 else 7824 CvtColorLoop(src, dst, YCrCb2RGB_f<float>(dcn, bidx, coeffs_f)); 7825 } 7826 break; 7827 7828 case CV_BGR2XYZ: case CV_RGB2XYZ: 7829 CV_Assert( scn == 3 || scn == 4 ); 7830 bidx = code == CV_BGR2XYZ ? 0 : 2; 7831 7832 _dst.create(sz, CV_MAKETYPE(depth, 3)); 7833 dst = _dst.getMat(); 7834 7835 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) 7836 CV_IPP_CHECK() 7837 { 7838 if( code == CV_BGR2XYZ && scn == 3 && depth != CV_32F ) 7839 { 7840 if( CvtColorIPPLoopCopy(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC3RTab[depth], ippiRGB2XYZTab[depth], 2, 1, 0, depth)) ) 7841 { 7842 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 7843 return; 7844 } 7845 setIppErrorStatus(); 7846 } 7847 else if( code == CV_BGR2XYZ && scn == 4 && depth != CV_32F ) 7848 { 7849 if( CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], ippiRGB2XYZTab[depth], 2, 1, 0, depth)) ) 7850 { 7851 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 7852 return; 7853 } 7854 setIppErrorStatus(); 7855 } 7856 else if( code == CV_RGB2XYZ && scn == 3 && depth != CV_32F ) 7857 { 7858 if( CvtColorIPPLoopCopy(src, dst, IPPGeneralFunctor(ippiRGB2XYZTab[depth])) ) 7859 { 7860 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 7861 return; 7862 } 7863 setIppErrorStatus(); 7864 } 7865 else if( code == CV_RGB2XYZ && scn == 4 && depth != CV_32F ) 7866 { 7867 if( CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], ippiRGB2XYZTab[depth], 0, 1, 2, depth)) ) 7868 { 7869 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 7870 return; 7871 } 7872 setIppErrorStatus(); 7873 } 7874 } 7875 #endif 7876 7877 if( depth == CV_8U ) 7878 CvtColorLoop(src, dst, RGB2XYZ_i<uchar>(scn, bidx, 0)); 7879 else if( depth == CV_16U ) 7880 CvtColorLoop(src, dst, RGB2XYZ_i<ushort>(scn, bidx, 0)); 7881 else 7882 CvtColorLoop(src, dst, RGB2XYZ_f<float>(scn, bidx, 0)); 7883 break; 7884 7885 case CV_XYZ2BGR: case CV_XYZ2RGB: 7886 if( dcn <= 0 ) dcn = 3; 7887 CV_Assert( scn == 3 && (dcn == 3 || dcn == 4) ); 7888 bidx = code == CV_XYZ2BGR ? 0 : 2; 7889 7890 _dst.create(sz, CV_MAKETYPE(depth, dcn)); 7891 dst = _dst.getMat(); 7892 7893 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) 7894 CV_IPP_CHECK() 7895 { 7896 if( code == CV_XYZ2BGR && dcn == 3 && depth != CV_32F ) 7897 { 7898 if( CvtColorIPPLoopCopy(src, dst, IPPGeneralReorderFunctor(ippiXYZ2RGBTab[depth], ippiSwapChannelsC3RTab[depth], 2, 1, 0, depth)) ) 7899 { 7900 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 7901 return; 7902 } 7903 setIppErrorStatus(); 7904 } 7905 else if( code == CV_XYZ2BGR && dcn == 4 && depth != CV_32F ) 7906 { 7907 if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor(ippiXYZ2RGBTab[depth], ippiSwapChannelsC3C4RTab[depth], 2, 1, 0, depth)) ) 7908 { 7909 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 7910 return; 7911 } 7912 setIppErrorStatus(); 7913 } 7914 if( code == CV_XYZ2RGB && dcn == 3 && depth != CV_32F ) 7915 { 7916 if( CvtColorIPPLoopCopy(src, dst, IPPGeneralFunctor(ippiXYZ2RGBTab[depth])) ) 7917 { 7918 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 7919 return; 7920 } 7921 setIppErrorStatus(); 7922 } 7923 else if( code == CV_XYZ2RGB && dcn == 4 && depth != CV_32F ) 7924 { 7925 if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor(ippiXYZ2RGBTab[depth], ippiSwapChannelsC3C4RTab[depth], 0, 1, 2, depth)) ) 7926 { 7927 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 7928 return; 7929 } 7930 setIppErrorStatus(); 7931 } 7932 } 7933 #endif 7934 7935 if( depth == CV_8U ) 7936 CvtColorLoop(src, dst, XYZ2RGB_i<uchar>(dcn, bidx, 0)); 7937 else if( depth == CV_16U ) 7938 CvtColorLoop(src, dst, XYZ2RGB_i<ushort>(dcn, bidx, 0)); 7939 else 7940 CvtColorLoop(src, dst, XYZ2RGB_f<float>(dcn, bidx, 0)); 7941 break; 7942 7943 case CV_BGR2HSV: case CV_RGB2HSV: case CV_BGR2HSV_FULL: case CV_RGB2HSV_FULL: 7944 case CV_BGR2HLS: case CV_RGB2HLS: case CV_BGR2HLS_FULL: case CV_RGB2HLS_FULL: 7945 { 7946 CV_Assert( (scn == 3 || scn == 4) && (depth == CV_8U || depth == CV_32F) ); 7947 bidx = code == CV_BGR2HSV || code == CV_BGR2HLS || 7948 code == CV_BGR2HSV_FULL || code == CV_BGR2HLS_FULL ? 0 : 2; 7949 int hrange = depth == CV_32F ? 360 : code == CV_BGR2HSV || code == CV_RGB2HSV || 7950 code == CV_BGR2HLS || code == CV_RGB2HLS ? 180 : 256; 7951 7952 _dst.create(sz, CV_MAKETYPE(depth, 3)); 7953 dst = _dst.getMat(); 7954 7955 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) 7956 CV_IPP_CHECK() 7957 { 7958 if( depth == CV_8U || depth == CV_16U ) 7959 { 7960 #if 0 // breaks OCL accuracy tests 7961 if( code == CV_BGR2HSV_FULL && scn == 3 ) 7962 { 7963 if( CvtColorIPPLoopCopy(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC3RTab[depth], ippiRGB2HSVTab[depth], 2, 1, 0, depth)) ) 7964 { 7965 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 7966 return; 7967 } 7968 setIppErrorStatus(); 7969 } 7970 else if( code == CV_BGR2HSV_FULL && scn == 4 ) 7971 { 7972 if( CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], ippiRGB2HSVTab[depth], 2, 1, 0, depth)) ) 7973 { 7974 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 7975 return; 7976 } 7977 setIppErrorStatus(); 7978 } 7979 else if( code == CV_RGB2HSV_FULL && scn == 4 ) 7980 { 7981 if( CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], ippiRGB2HSVTab[depth], 0, 1, 2, depth)) ) 7982 { 7983 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 7984 return; 7985 } 7986 setIppErrorStatus(); 7987 } else 7988 #endif 7989 if( code == CV_RGB2HSV_FULL && scn == 3 && depth == CV_16U ) 7990 { 7991 if( CvtColorIPPLoopCopy(src, dst, IPPGeneralFunctor(ippiRGB2HSVTab[depth])) ) 7992 { 7993 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 7994 return; 7995 } 7996 setIppErrorStatus(); 7997 } 7998 else if( code == CV_BGR2HLS_FULL && scn == 3 ) 7999 { 8000 if( CvtColorIPPLoopCopy(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC3RTab[depth], ippiRGB2HLSTab[depth], 2, 1, 0, depth)) ) 8001 { 8002 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 8003 return; 8004 } 8005 setIppErrorStatus(); 8006 } 8007 else if( code == CV_BGR2HLS_FULL && scn == 4 ) 8008 { 8009 if( CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], ippiRGB2HLSTab[depth], 2, 1, 0, depth)) ) 8010 { 8011 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 8012 return; 8013 } 8014 setIppErrorStatus(); 8015 } 8016 else if( code == CV_RGB2HLS_FULL && scn == 3 ) 8017 { 8018 if( CvtColorIPPLoopCopy(src, dst, IPPGeneralFunctor(ippiRGB2HLSTab[depth])) ) 8019 { 8020 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 8021 return; 8022 } 8023 setIppErrorStatus(); 8024 } 8025 else if( code == CV_RGB2HLS_FULL && scn == 4 ) 8026 { 8027 if( CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], ippiRGB2HLSTab[depth], 0, 1, 2, depth)) ) 8028 { 8029 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 8030 return; 8031 } 8032 setIppErrorStatus(); 8033 } 8034 } 8035 } 8036 #endif 8037 8038 if( code == CV_BGR2HSV || code == CV_RGB2HSV || 8039 code == CV_BGR2HSV_FULL || code == CV_RGB2HSV_FULL ) 8040 { 8041 #ifdef HAVE_TEGRA_OPTIMIZATION 8042 if(tegra::useTegra() && tegra::cvtRGB2HSV(src, dst, bidx, hrange)) 8043 break; 8044 #endif 8045 if( depth == CV_8U ) 8046 CvtColorLoop(src, dst, RGB2HSV_b(scn, bidx, hrange)); 8047 else 8048 CvtColorLoop(src, dst, RGB2HSV_f(scn, bidx, (float)hrange)); 8049 } 8050 else 8051 { 8052 if( depth == CV_8U ) 8053 CvtColorLoop(src, dst, RGB2HLS_b(scn, bidx, hrange)); 8054 else 8055 CvtColorLoop(src, dst, RGB2HLS_f(scn, bidx, (float)hrange)); 8056 } 8057 } 8058 break; 8059 8060 case CV_HSV2BGR: case CV_HSV2RGB: case CV_HSV2BGR_FULL: case CV_HSV2RGB_FULL: 8061 case CV_HLS2BGR: case CV_HLS2RGB: case CV_HLS2BGR_FULL: case CV_HLS2RGB_FULL: 8062 { 8063 if( dcn <= 0 ) dcn = 3; 8064 CV_Assert( scn == 3 && (dcn == 3 || dcn == 4) && (depth == CV_8U || depth == CV_32F) ); 8065 bidx = code == CV_HSV2BGR || code == CV_HLS2BGR || 8066 code == CV_HSV2BGR_FULL || code == CV_HLS2BGR_FULL ? 0 : 2; 8067 int hrange = depth == CV_32F ? 360 : code == CV_HSV2BGR || code == CV_HSV2RGB || 8068 code == CV_HLS2BGR || code == CV_HLS2RGB ? 180 : 255; 8069 8070 _dst.create(sz, CV_MAKETYPE(depth, dcn)); 8071 dst = _dst.getMat(); 8072 8073 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) 8074 CV_IPP_CHECK() 8075 { 8076 if( depth == CV_8U || depth == CV_16U ) 8077 { 8078 if( code == CV_HSV2BGR_FULL && dcn == 3 ) 8079 { 8080 if( CvtColorIPPLoopCopy(src, dst, IPPGeneralReorderFunctor(ippiHSV2RGBTab[depth], ippiSwapChannelsC3RTab[depth], 2, 1, 0, depth)) ) 8081 { 8082 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 8083 return; 8084 } 8085 setIppErrorStatus(); 8086 } 8087 else if( code == CV_HSV2BGR_FULL && dcn == 4 ) 8088 { 8089 if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor(ippiHSV2RGBTab[depth], ippiSwapChannelsC3C4RTab[depth], 2, 1, 0, depth)) ) 8090 { 8091 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 8092 return; 8093 } 8094 setIppErrorStatus(); 8095 } 8096 else if( code == CV_HSV2RGB_FULL && dcn == 3 ) 8097 { 8098 if( CvtColorIPPLoopCopy(src, dst, IPPGeneralFunctor(ippiHSV2RGBTab[depth])) ) 8099 { 8100 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 8101 return; 8102 } 8103 setIppErrorStatus(); 8104 } 8105 else if( code == CV_HSV2RGB_FULL && dcn == 4 ) 8106 { 8107 if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor(ippiHSV2RGBTab[depth], ippiSwapChannelsC3C4RTab[depth], 0, 1, 2, depth)) ) 8108 { 8109 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 8110 return; 8111 } 8112 setIppErrorStatus(); 8113 } 8114 else if( code == CV_HLS2BGR_FULL && dcn == 3 ) 8115 { 8116 if( CvtColorIPPLoopCopy(src, dst, IPPGeneralReorderFunctor(ippiHLS2RGBTab[depth], ippiSwapChannelsC3RTab[depth], 2, 1, 0, depth)) ) 8117 { 8118 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 8119 return; 8120 } 8121 setIppErrorStatus(); 8122 } 8123 else if( code == CV_HLS2BGR_FULL && dcn == 4 ) 8124 { 8125 if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor(ippiHLS2RGBTab[depth], ippiSwapChannelsC3C4RTab[depth], 2, 1, 0, depth)) ) 8126 { 8127 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 8128 return; 8129 } 8130 setIppErrorStatus(); 8131 } 8132 else if( code == CV_HLS2RGB_FULL && dcn == 3 ) 8133 { 8134 if( CvtColorIPPLoopCopy(src, dst, IPPGeneralFunctor(ippiHLS2RGBTab[depth])) ) 8135 { 8136 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 8137 return; 8138 } 8139 setIppErrorStatus(); 8140 } 8141 else if( code == CV_HLS2RGB_FULL && dcn == 4 ) 8142 { 8143 if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor(ippiHLS2RGBTab[depth], ippiSwapChannelsC3C4RTab[depth], 0, 1, 2, depth)) ) 8144 { 8145 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 8146 return; 8147 } 8148 setIppErrorStatus(); 8149 } 8150 } 8151 } 8152 #endif 8153 8154 if( code == CV_HSV2BGR || code == CV_HSV2RGB || 8155 code == CV_HSV2BGR_FULL || code == CV_HSV2RGB_FULL ) 8156 { 8157 if( depth == CV_8U ) 8158 CvtColorLoop(src, dst, HSV2RGB_b(dcn, bidx, hrange)); 8159 else 8160 CvtColorLoop(src, dst, HSV2RGB_f(dcn, bidx, (float)hrange)); 8161 } 8162 else 8163 { 8164 if( depth == CV_8U ) 8165 CvtColorLoop(src, dst, HLS2RGB_b(dcn, bidx, hrange)); 8166 else 8167 CvtColorLoop(src, dst, HLS2RGB_f(dcn, bidx, (float)hrange)); 8168 } 8169 } 8170 break; 8171 8172 case CV_BGR2Lab: case CV_RGB2Lab: case CV_LBGR2Lab: case CV_LRGB2Lab: 8173 case CV_BGR2Luv: case CV_RGB2Luv: case CV_LBGR2Luv: case CV_LRGB2Luv: 8174 { 8175 CV_Assert( (scn == 3 || scn == 4) && (depth == CV_8U || depth == CV_32F) ); 8176 bidx = code == CV_BGR2Lab || code == CV_BGR2Luv || 8177 code == CV_LBGR2Lab || code == CV_LBGR2Luv ? 0 : 2; 8178 bool srgb = code == CV_BGR2Lab || code == CV_RGB2Lab || 8179 code == CV_BGR2Luv || code == CV_RGB2Luv; 8180 8181 _dst.create(sz, CV_MAKETYPE(depth, 3)); 8182 dst = _dst.getMat(); 8183 8184 #if defined HAVE_IPP && 0 8185 CV_IPP_CHECK() 8186 { 8187 if (code == CV_LBGR2Lab && scn == 3 && depth == CV_8U) 8188 { 8189 if (CvtColorIPPLoop(src, dst, IPPGeneralFunctor((ippiGeneralFunc)ippiBGRToLab_8u_C3R))) 8190 { 8191 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 8192 return; 8193 } 8194 setIppErrorStatus(); 8195 } 8196 else if (code == CV_LBGR2Lab && scn == 4 && depth == CV_8U) 8197 { 8198 if (CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], 8199 (ippiGeneralFunc)ippiBGRToLab_8u_C3R, 0, 1, 2, depth))) 8200 { 8201 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 8202 return; 8203 } 8204 setIppErrorStatus(); 8205 } 8206 else 8207 if (code == CV_LRGB2Lab && scn == 3 && depth == CV_8U) // slower than OpenCV 8208 { 8209 if (CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC3RTab[depth], 8210 (ippiGeneralFunc)ippiBGRToLab_8u_C3R, 2, 1, 0, depth))) 8211 { 8212 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 8213 return; 8214 } 8215 setIppErrorStatus(); 8216 } 8217 else if (code == CV_LRGB2Lab && scn == 4 && depth == CV_8U) // slower than OpenCV 8218 { 8219 if (CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], 8220 (ippiGeneralFunc)ippiBGRToLab_8u_C3R, 2, 1, 0, depth))) 8221 { 8222 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 8223 return; 8224 } 8225 setIppErrorStatus(); 8226 } 8227 else if (code == CV_LRGB2Luv && scn == 3) 8228 { 8229 if (CvtColorIPPLoop(src, dst, IPPGeneralFunctor(ippiRGBToLUVTab[depth]))) 8230 { 8231 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 8232 return; 8233 } 8234 setIppErrorStatus(); 8235 } 8236 else if (code == CV_LRGB2Luv && scn == 4) 8237 { 8238 if (CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], 8239 ippiRGBToLUVTab[depth], 0, 1, 2, depth))) 8240 { 8241 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 8242 return; 8243 } 8244 setIppErrorStatus(); 8245 } 8246 else if (code == CV_LBGR2Luv && scn == 3) 8247 { 8248 if (CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC3RTab[depth], 8249 ippiRGBToLUVTab[depth], 2, 1, 0, depth))) 8250 { 8251 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 8252 return; 8253 } 8254 setIppErrorStatus(); 8255 } 8256 else if (code == CV_LBGR2Luv && scn == 4) 8257 { 8258 if (CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], 8259 ippiRGBToLUVTab[depth], 2, 1, 0, depth))) 8260 { 8261 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 8262 return; 8263 } 8264 setIppErrorStatus(); 8265 } 8266 } 8267 #endif 8268 8269 if( code == CV_BGR2Lab || code == CV_RGB2Lab || 8270 code == CV_LBGR2Lab || code == CV_LRGB2Lab ) 8271 { 8272 if( depth == CV_8U ) 8273 CvtColorLoop(src, dst, RGB2Lab_b(scn, bidx, 0, 0, srgb)); 8274 else 8275 CvtColorLoop(src, dst, RGB2Lab_f(scn, bidx, 0, 0, srgb)); 8276 } 8277 else 8278 { 8279 if( depth == CV_8U ) 8280 CvtColorLoop(src, dst, RGB2Luv_b(scn, bidx, 0, 0, srgb)); 8281 else 8282 CvtColorLoop(src, dst, RGB2Luv_f(scn, bidx, 0, 0, srgb)); 8283 } 8284 } 8285 break; 8286 8287 case CV_Lab2BGR: case CV_Lab2RGB: case CV_Lab2LBGR: case CV_Lab2LRGB: 8288 case CV_Luv2BGR: case CV_Luv2RGB: case CV_Luv2LBGR: case CV_Luv2LRGB: 8289 { 8290 if( dcn <= 0 ) dcn = 3; 8291 CV_Assert( scn == 3 && (dcn == 3 || dcn == 4) && (depth == CV_8U || depth == CV_32F) ); 8292 bidx = code == CV_Lab2BGR || code == CV_Luv2BGR || 8293 code == CV_Lab2LBGR || code == CV_Luv2LBGR ? 0 : 2; 8294 bool srgb = code == CV_Lab2BGR || code == CV_Lab2RGB || 8295 code == CV_Luv2BGR || code == CV_Luv2RGB; 8296 8297 _dst.create(sz, CV_MAKETYPE(depth, dcn)); 8298 dst = _dst.getMat(); 8299 8300 #if defined HAVE_IPP && 0 8301 CV_IPP_CHECK() 8302 { 8303 if( code == CV_Lab2LBGR && dcn == 3 && depth == CV_8U) 8304 { 8305 if( CvtColorIPPLoop(src, dst, IPPGeneralFunctor((ippiGeneralFunc)ippiLabToBGR_8u_C3R)) ) 8306 { 8307 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 8308 return; 8309 } 8310 setIppErrorStatus(); 8311 } 8312 else if( code == CV_Lab2LBGR && dcn == 4 && depth == CV_8U ) 8313 { 8314 if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor((ippiGeneralFunc)ippiLabToBGR_8u_C3R, 8315 ippiSwapChannelsC3C4RTab[depth], 0, 1, 2, depth)) ) 8316 { 8317 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 8318 return; 8319 } 8320 setIppErrorStatus(); 8321 } 8322 if( code == CV_Lab2LRGB && dcn == 3 && depth == CV_8U ) 8323 { 8324 if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor((ippiGeneralFunc)ippiLabToBGR_8u_C3R, 8325 ippiSwapChannelsC3RTab[depth], 2, 1, 0, depth)) ) 8326 { 8327 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 8328 return; 8329 } 8330 setIppErrorStatus(); 8331 } 8332 else if( code == CV_Lab2LRGB && dcn == 4 && depth == CV_8U ) 8333 { 8334 if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor((ippiGeneralFunc)ippiLabToBGR_8u_C3R, 8335 ippiSwapChannelsC3C4RTab[depth], 2, 1, 0, depth)) ) 8336 { 8337 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 8338 return; 8339 } 8340 setIppErrorStatus(); 8341 } 8342 if( code == CV_Luv2LRGB && dcn == 3 ) 8343 { 8344 if( CvtColorIPPLoop(src, dst, IPPGeneralFunctor(ippiLUVToRGBTab[depth])) ) 8345 return; 8346 } 8347 else if( code == CV_Luv2LRGB && dcn == 4 ) 8348 { 8349 if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor(ippiLUVToRGBTab[depth], 8350 ippiSwapChannelsC3C4RTab[depth], 0, 1, 2, depth)) ) 8351 { 8352 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 8353 return; 8354 } 8355 } 8356 if( code == CV_Luv2LBGR && dcn == 3 ) 8357 { 8358 if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor(ippiLUVToRGBTab[depth], 8359 ippiSwapChannelsC3RTab[depth], 2, 1, 0, depth)) ) 8360 { 8361 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 8362 return; 8363 } 8364 } 8365 else if( code == CV_Luv2LBGR && dcn == 4 ) 8366 { 8367 if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor(ippiLUVToRGBTab[depth], 8368 ippiSwapChannelsC3C4RTab[depth], 2, 1, 0, depth)) ) 8369 { 8370 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 8371 return; 8372 } 8373 } 8374 } 8375 #endif 8376 8377 if( code == CV_Lab2BGR || code == CV_Lab2RGB || 8378 code == CV_Lab2LBGR || code == CV_Lab2LRGB ) 8379 { 8380 if( depth == CV_8U ) 8381 CvtColorLoop(src, dst, Lab2RGB_b(dcn, bidx, 0, 0, srgb)); 8382 else 8383 CvtColorLoop(src, dst, Lab2RGB_f(dcn, bidx, 0, 0, srgb)); 8384 } 8385 else 8386 { 8387 if( depth == CV_8U ) 8388 CvtColorLoop(src, dst, Luv2RGB_b(dcn, bidx, 0, 0, srgb)); 8389 else 8390 CvtColorLoop(src, dst, Luv2RGB_f(dcn, bidx, 0, 0, srgb)); 8391 } 8392 } 8393 break; 8394 8395 case CV_BayerBG2GRAY: case CV_BayerGB2GRAY: case CV_BayerRG2GRAY: case CV_BayerGR2GRAY: 8396 case CV_BayerBG2BGR: case CV_BayerGB2BGR: case CV_BayerRG2BGR: case CV_BayerGR2BGR: 8397 case CV_BayerBG2BGR_VNG: case CV_BayerGB2BGR_VNG: case CV_BayerRG2BGR_VNG: case CV_BayerGR2BGR_VNG: 8398 case CV_BayerBG2BGR_EA: case CV_BayerGB2BGR_EA: case CV_BayerRG2BGR_EA: case CV_BayerGR2BGR_EA: 8399 demosaicing(src, _dst, code, dcn); 8400 break; 8401 8402 case CV_YUV2BGR_NV21: case CV_YUV2RGB_NV21: case CV_YUV2BGR_NV12: case CV_YUV2RGB_NV12: 8403 case CV_YUV2BGRA_NV21: case CV_YUV2RGBA_NV21: case CV_YUV2BGRA_NV12: case CV_YUV2RGBA_NV12: 8404 { 8405 // http://www.fourcc.org/yuv.php#NV21 == yuv420sp -> a plane of 8 bit Y samples followed by an interleaved V/U plane containing 8 bit 2x2 subsampled chroma samples 8406 // http://www.fourcc.org/yuv.php#NV12 -> a plane of 8 bit Y samples followed by an interleaved U/V plane containing 8 bit 2x2 subsampled colour difference samples 8407 8408 if (dcn <= 0) dcn = (code==CV_YUV420sp2BGRA || code==CV_YUV420sp2RGBA || code==CV_YUV2BGRA_NV12 || code==CV_YUV2RGBA_NV12) ? 4 : 3; 8409 const int bIdx = (code==CV_YUV2BGR_NV21 || code==CV_YUV2BGRA_NV21 || code==CV_YUV2BGR_NV12 || code==CV_YUV2BGRA_NV12) ? 0 : 2; 8410 const int uIdx = (code==CV_YUV2BGR_NV21 || code==CV_YUV2BGRA_NV21 || code==CV_YUV2RGB_NV21 || code==CV_YUV2RGBA_NV21) ? 1 : 0; 8411 8412 CV_Assert( dcn == 3 || dcn == 4 ); 8413 CV_Assert( sz.width % 2 == 0 && sz.height % 3 == 0 && depth == CV_8U ); 8414 8415 Size dstSz(sz.width, sz.height * 2 / 3); 8416 _dst.create(dstSz, CV_MAKETYPE(depth, dcn)); 8417 dst = _dst.getMat(); 8418 8419 int srcstep = (int)src.step; 8420 const uchar* y = src.ptr(); 8421 const uchar* uv = y + srcstep * dstSz.height; 8422 8423 switch(dcn*100 + bIdx * 10 + uIdx) 8424 { 8425 case 300: cvtYUV420sp2RGB<0, 0> (dst, srcstep, y, uv); break; 8426 case 301: cvtYUV420sp2RGB<0, 1> (dst, srcstep, y, uv); break; 8427 case 320: cvtYUV420sp2RGB<2, 0> (dst, srcstep, y, uv); break; 8428 case 321: cvtYUV420sp2RGB<2, 1> (dst, srcstep, y, uv); break; 8429 case 400: cvtYUV420sp2RGBA<0, 0>(dst, srcstep, y, uv); break; 8430 case 401: cvtYUV420sp2RGBA<0, 1>(dst, srcstep, y, uv); break; 8431 case 420: cvtYUV420sp2RGBA<2, 0>(dst, srcstep, y, uv); break; 8432 case 421: cvtYUV420sp2RGBA<2, 1>(dst, srcstep, y, uv); break; 8433 default: CV_Error( CV_StsBadFlag, "Unknown/unsupported color conversion code" ); break; 8434 }; 8435 } 8436 break; 8437 case CV_YUV2BGR_YV12: case CV_YUV2RGB_YV12: case CV_YUV2BGRA_YV12: case CV_YUV2RGBA_YV12: 8438 case CV_YUV2BGR_IYUV: case CV_YUV2RGB_IYUV: case CV_YUV2BGRA_IYUV: case CV_YUV2RGBA_IYUV: 8439 { 8440 //http://www.fourcc.org/yuv.php#YV12 == yuv420p -> It comprises an NxM Y plane followed by (N/2)x(M/2) V and U planes. 8441 //http://www.fourcc.org/yuv.php#IYUV == I420 -> It comprises an NxN Y plane followed by (N/2)x(N/2) U and V planes 8442 8443 if (dcn <= 0) dcn = (code==CV_YUV2BGRA_YV12 || code==CV_YUV2RGBA_YV12 || code==CV_YUV2RGBA_IYUV || code==CV_YUV2BGRA_IYUV) ? 4 : 3; 8444 const int bIdx = (code==CV_YUV2BGR_YV12 || code==CV_YUV2BGRA_YV12 || code==CV_YUV2BGR_IYUV || code==CV_YUV2BGRA_IYUV) ? 0 : 2; 8445 const int uIdx = (code==CV_YUV2BGR_YV12 || code==CV_YUV2RGB_YV12 || code==CV_YUV2BGRA_YV12 || code==CV_YUV2RGBA_YV12) ? 1 : 0; 8446 8447 CV_Assert( dcn == 3 || dcn == 4 ); 8448 CV_Assert( sz.width % 2 == 0 && sz.height % 3 == 0 && depth == CV_8U ); 8449 8450 Size dstSz(sz.width, sz.height * 2 / 3); 8451 _dst.create(dstSz, CV_MAKETYPE(depth, dcn)); 8452 dst = _dst.getMat(); 8453 8454 int srcstep = (int)src.step; 8455 const uchar* y = src.ptr(); 8456 const uchar* u = y + srcstep * dstSz.height; 8457 const uchar* v = y + srcstep * (dstSz.height + dstSz.height/4) + (dstSz.width/2) * ((dstSz.height % 4)/2); 8458 8459 int ustepIdx = 0; 8460 int vstepIdx = dstSz.height % 4 == 2 ? 1 : 0; 8461 8462 if(uIdx == 1) { std::swap(u ,v), std::swap(ustepIdx, vstepIdx); } 8463 8464 switch(dcn*10 + bIdx) 8465 { 8466 case 30: cvtYUV420p2RGB<0>(dst, srcstep, y, u, v, ustepIdx, vstepIdx); break; 8467 case 32: cvtYUV420p2RGB<2>(dst, srcstep, y, u, v, ustepIdx, vstepIdx); break; 8468 case 40: cvtYUV420p2RGBA<0>(dst, srcstep, y, u, v, ustepIdx, vstepIdx); break; 8469 case 42: cvtYUV420p2RGBA<2>(dst, srcstep, y, u, v, ustepIdx, vstepIdx); break; 8470 default: CV_Error( CV_StsBadFlag, "Unknown/unsupported color conversion code" ); break; 8471 }; 8472 } 8473 break; 8474 case CV_YUV2GRAY_420: 8475 { 8476 if (dcn <= 0) dcn = 1; 8477 8478 CV_Assert( dcn == 1 ); 8479 CV_Assert( sz.width % 2 == 0 && sz.height % 3 == 0 && depth == CV_8U ); 8480 8481 Size dstSz(sz.width, sz.height * 2 / 3); 8482 _dst.create(dstSz, CV_MAKETYPE(depth, dcn)); 8483 dst = _dst.getMat(); 8484 #if defined HAVE_IPP 8485 CV_IPP_CHECK() 8486 { 8487 if (ippStsNoErr == ippiCopy_8u_C1R(src.data, (int)src.step, dst.data, (int)dst.step, 8488 ippiSize(dstSz.width, dstSz.height))) 8489 { 8490 CV_IMPL_ADD(CV_IMPL_IPP); 8491 return; 8492 } 8493 setIppErrorStatus(); 8494 } 8495 #endif 8496 src(Range(0, dstSz.height), Range::all()).copyTo(dst); 8497 } 8498 break; 8499 case CV_RGB2YUV_YV12: case CV_BGR2YUV_YV12: case CV_RGBA2YUV_YV12: case CV_BGRA2YUV_YV12: 8500 case CV_RGB2YUV_IYUV: case CV_BGR2YUV_IYUV: case CV_RGBA2YUV_IYUV: case CV_BGRA2YUV_IYUV: 8501 { 8502 if (dcn <= 0) dcn = 1; 8503 const int bIdx = (code == CV_BGR2YUV_IYUV || code == CV_BGRA2YUV_IYUV || code == CV_BGR2YUV_YV12 || code == CV_BGRA2YUV_YV12) ? 0 : 2; 8504 const int uIdx = (code == CV_BGR2YUV_IYUV || code == CV_BGRA2YUV_IYUV || code == CV_RGB2YUV_IYUV || code == CV_RGBA2YUV_IYUV) ? 1 : 2; 8505 8506 CV_Assert( (scn == 3 || scn == 4) && depth == CV_8U ); 8507 CV_Assert( dcn == 1 ); 8508 CV_Assert( sz.width % 2 == 0 && sz.height % 2 == 0 ); 8509 8510 Size dstSz(sz.width, sz.height / 2 * 3); 8511 _dst.create(dstSz, CV_MAKETYPE(depth, dcn)); 8512 dst = _dst.getMat(); 8513 8514 switch(bIdx + uIdx*10) 8515 { 8516 case 10: cvtRGBtoYUV420p<0, 1>(src, dst); break; 8517 case 12: cvtRGBtoYUV420p<2, 1>(src, dst); break; 8518 case 20: cvtRGBtoYUV420p<0, 2>(src, dst); break; 8519 case 22: cvtRGBtoYUV420p<2, 2>(src, dst); break; 8520 default: CV_Error( CV_StsBadFlag, "Unknown/unsupported color conversion code" ); break; 8521 }; 8522 } 8523 break; 8524 case CV_YUV2RGB_UYVY: case CV_YUV2BGR_UYVY: case CV_YUV2RGBA_UYVY: case CV_YUV2BGRA_UYVY: 8525 case CV_YUV2RGB_YUY2: case CV_YUV2BGR_YUY2: case CV_YUV2RGB_YVYU: case CV_YUV2BGR_YVYU: 8526 case CV_YUV2RGBA_YUY2: case CV_YUV2BGRA_YUY2: case CV_YUV2RGBA_YVYU: case CV_YUV2BGRA_YVYU: 8527 { 8528 //http://www.fourcc.org/yuv.php#UYVY 8529 //http://www.fourcc.org/yuv.php#YUY2 8530 //http://www.fourcc.org/yuv.php#YVYU 8531 8532 if (dcn <= 0) dcn = (code==CV_YUV2RGBA_UYVY || code==CV_YUV2BGRA_UYVY || code==CV_YUV2RGBA_YUY2 || code==CV_YUV2BGRA_YUY2 || code==CV_YUV2RGBA_YVYU || code==CV_YUV2BGRA_YVYU) ? 4 : 3; 8533 const int bIdx = (code==CV_YUV2BGR_UYVY || code==CV_YUV2BGRA_UYVY || code==CV_YUV2BGR_YUY2 || code==CV_YUV2BGRA_YUY2 || code==CV_YUV2BGR_YVYU || code==CV_YUV2BGRA_YVYU) ? 0 : 2; 8534 const int ycn = (code==CV_YUV2RGB_UYVY || code==CV_YUV2BGR_UYVY || code==CV_YUV2RGBA_UYVY || code==CV_YUV2BGRA_UYVY) ? 1 : 0; 8535 const int uIdx = (code==CV_YUV2RGB_YVYU || code==CV_YUV2BGR_YVYU || code==CV_YUV2RGBA_YVYU || code==CV_YUV2BGRA_YVYU) ? 1 : 0; 8536 8537 CV_Assert( dcn == 3 || dcn == 4 ); 8538 CV_Assert( scn == 2 && depth == CV_8U ); 8539 8540 _dst.create(sz, CV_8UC(dcn)); 8541 dst = _dst.getMat(); 8542 8543 switch(dcn*1000 + bIdx*100 + uIdx*10 + ycn) 8544 { 8545 case 3000: cvtYUV422toRGB<0,0,0>(dst, (int)src.step, src.ptr<uchar>()); break; 8546 case 3001: cvtYUV422toRGB<0,0,1>(dst, (int)src.step, src.ptr<uchar>()); break; 8547 case 3010: cvtYUV422toRGB<0,1,0>(dst, (int)src.step, src.ptr<uchar>()); break; 8548 case 3011: cvtYUV422toRGB<0,1,1>(dst, (int)src.step, src.ptr<uchar>()); break; 8549 case 3200: cvtYUV422toRGB<2,0,0>(dst, (int)src.step, src.ptr<uchar>()); break; 8550 case 3201: cvtYUV422toRGB<2,0,1>(dst, (int)src.step, src.ptr<uchar>()); break; 8551 case 3210: cvtYUV422toRGB<2,1,0>(dst, (int)src.step, src.ptr<uchar>()); break; 8552 case 3211: cvtYUV422toRGB<2,1,1>(dst, (int)src.step, src.ptr<uchar>()); break; 8553 case 4000: cvtYUV422toRGBA<0,0,0>(dst, (int)src.step, src.ptr<uchar>()); break; 8554 case 4001: cvtYUV422toRGBA<0,0,1>(dst, (int)src.step, src.ptr<uchar>()); break; 8555 case 4010: cvtYUV422toRGBA<0,1,0>(dst, (int)src.step, src.ptr<uchar>()); break; 8556 case 4011: cvtYUV422toRGBA<0,1,1>(dst, (int)src.step, src.ptr<uchar>()); break; 8557 case 4200: cvtYUV422toRGBA<2,0,0>(dst, (int)src.step, src.ptr<uchar>()); break; 8558 case 4201: cvtYUV422toRGBA<2,0,1>(dst, (int)src.step, src.ptr<uchar>()); break; 8559 case 4210: cvtYUV422toRGBA<2,1,0>(dst, (int)src.step, src.ptr<uchar>()); break; 8560 case 4211: cvtYUV422toRGBA<2,1,1>(dst, (int)src.step, src.ptr<uchar>()); break; 8561 default: CV_Error( CV_StsBadFlag, "Unknown/unsupported color conversion code" ); break; 8562 }; 8563 } 8564 break; 8565 case CV_YUV2GRAY_UYVY: case CV_YUV2GRAY_YUY2: 8566 { 8567 if (dcn <= 0) dcn = 1; 8568 8569 CV_Assert( dcn == 1 ); 8570 CV_Assert( scn == 2 && depth == CV_8U ); 8571 8572 extractChannel(_src, _dst, code == CV_YUV2GRAY_UYVY ? 1 : 0); 8573 } 8574 break; 8575 case CV_RGBA2mRGBA: 8576 { 8577 if (dcn <= 0) dcn = 4; 8578 CV_Assert( scn == 4 && dcn == 4 ); 8579 8580 _dst.create(sz, CV_MAKETYPE(depth, dcn)); 8581 dst = _dst.getMat(); 8582 8583 if( depth == CV_8U ) 8584 { 8585 #if defined(HAVE_IPP) 8586 CV_IPP_CHECK() 8587 { 8588 if (CvtColorIPPLoop(src, dst, IPPGeneralFunctor((ippiGeneralFunc)ippiAlphaPremul_8u_AC4R))) 8589 { 8590 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 8591 return; 8592 } 8593 setIppErrorStatus(); 8594 } 8595 #endif 8596 CvtColorLoop(src, dst, RGBA2mRGBA<uchar>()); 8597 } 8598 else 8599 { 8600 CV_Error( CV_StsBadArg, "Unsupported image depth" ); 8601 } 8602 } 8603 break; 8604 case CV_mRGBA2RGBA: 8605 { 8606 if (dcn <= 0) dcn = 4; 8607 CV_Assert( scn == 4 && dcn == 4 ); 8608 8609 _dst.create(sz, CV_MAKETYPE(depth, dcn)); 8610 dst = _dst.getMat(); 8611 8612 if( depth == CV_8U ) 8613 CvtColorLoop(src, dst, mRGBA2RGBA<uchar>()); 8614 else 8615 { 8616 CV_Error( CV_StsBadArg, "Unsupported image depth" ); 8617 } 8618 } 8619 break; 8620 default: 8621 CV_Error( CV_StsBadFlag, "Unknown/unsupported color conversion code" ); 8622 } 8623 } 8624 8625 CV_IMPL void 8626 cvCvtColor( const CvArr* srcarr, CvArr* dstarr, int code ) 8627 { 8628 cv::Mat src = cv::cvarrToMat(srcarr), dst0 = cv::cvarrToMat(dstarr), dst = dst0; 8629 CV_Assert( src.depth() == dst.depth() ); 8630 8631 cv::cvtColor(src, dst, code, dst.channels()); 8632 CV_Assert( dst.data == dst0.data ); 8633 } 8634 8635 8636 /* End of file. */ 8637