1 /*M/////////////////////////////////////////////////////////////////////////////////////// 2 // 3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 4 // 5 // By downloading, copying, installing or using the software you agree to this license. 6 // If you do not agree to this license, do not download, install, 7 // copy or use the software. 8 // 9 // 10 // License Agreement 11 // For Open Source Computer Vision Library 12 // 13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. 14 // Copyright (C) 2009-2010, Willow Garage Inc., all rights reserved. 15 // Copyright (C) 2014, Itseez Inc., all rights reserved. 16 // Third party copyrights are property of their respective owners. 17 // 18 // Redistribution and use in source and binary forms, with or without modification, 19 // are permitted provided that the following conditions are met: 20 // 21 // * Redistribution's of source code must retain the above copyright notice, 22 // this list of conditions and the following disclaimer. 23 // 24 // * Redistribution's in binary form must reproduce the above copyright notice, 25 // this list of conditions and the following disclaimer in the documentation 26 // and/or other materials provided with the distribution. 27 // 28 // * The name of the copyright holders may not be used to endorse or promote products 29 // derived from this software without specific prior written permission. 30 // 31 // This software is provided by the copyright holders and contributors "as is" and 32 // any express or implied warranties, including, but not limited to, the implied 33 // warranties of merchantability and fitness for a particular purpose are disclaimed. 34 // In no event shall the Intel Corporation or contributors be liable for any direct, 35 // indirect, incidental, special, exemplary, or consequential damages 36 // (including, but not limited to, procurement of substitute goods or services; 37 // loss of use, data, or profits; or business interruption) however caused 38 // and on any theory of liability, whether in contract, strict liability, 39 // or tort (including negligence or otherwise) arising in any way out of 40 // the use of this software, even if advised of the possibility of such damage. 41 // 42 //M*/ 43 44 #include "precomp.hpp" 45 46 #include <limits> 47 48 #define CV_DESCALE(x,n) (((x) + (1 << ((n)-1))) >> (n)) 49 50 namespace cv 51 { 52 53 54 //////////////////////////// Bayer Pattern -> RGB conversion ///////////////////////////// 55 56 template<typename T> 57 class SIMDBayerStubInterpolator_ 58 { 59 public: 60 int bayer2Gray(const T*, int, T*, int, int, int, int) const 61 { 62 return 0; 63 } 64 65 int bayer2RGB(const T*, int, T*, int, int) const 66 { 67 return 0; 68 } 69 70 int bayer2RGBA(const T*, int, T*, int, int) const 71 { 72 return 0; 73 } 74 75 int bayer2RGB_EA(const T*, int, T*, int, int) const 76 { 77 return 0; 78 } 79 }; 80 81 #if CV_SSE2 82 class SIMDBayerInterpolator_8u 83 { 84 public: 85 SIMDBayerInterpolator_8u() 86 { 87 use_simd = checkHardwareSupport(CV_CPU_SSE2); 88 } 89 90 int bayer2Gray(const uchar* bayer, int bayer_step, uchar* dst, 91 int width, int bcoeff, int gcoeff, int rcoeff) const 92 { 93 if( !use_simd ) 94 return 0; 95 96 __m128i _b2y = _mm_set1_epi16((short)(rcoeff*2)); 97 __m128i _g2y = _mm_set1_epi16((short)(gcoeff*2)); 98 __m128i _r2y = _mm_set1_epi16((short)(bcoeff*2)); 99 const uchar* bayer_end = bayer + width; 100 101 for( ; bayer <= bayer_end - 18; bayer += 14, dst += 14 ) 102 { 103 __m128i r0 = _mm_loadu_si128((const __m128i*)bayer); 104 __m128i r1 = _mm_loadu_si128((const __m128i*)(bayer+bayer_step)); 105 __m128i r2 = _mm_loadu_si128((const __m128i*)(bayer+bayer_step*2)); 106 107 __m128i b1 = _mm_add_epi16(_mm_srli_epi16(_mm_slli_epi16(r0, 8), 7), 108 _mm_srli_epi16(_mm_slli_epi16(r2, 8), 7)); 109 __m128i b0 = _mm_add_epi16(b1, _mm_srli_si128(b1, 2)); 110 b1 = _mm_slli_epi16(_mm_srli_si128(b1, 2), 1); 111 112 __m128i g0 = _mm_add_epi16(_mm_srli_epi16(r0, 7), _mm_srli_epi16(r2, 7)); 113 __m128i g1 = _mm_srli_epi16(_mm_slli_epi16(r1, 8), 7); 114 g0 = _mm_add_epi16(g0, _mm_add_epi16(g1, _mm_srli_si128(g1, 2))); 115 g1 = _mm_slli_epi16(_mm_srli_si128(g1, 2), 2); 116 117 r0 = _mm_srli_epi16(r1, 8); 118 r1 = _mm_slli_epi16(_mm_add_epi16(r0, _mm_srli_si128(r0, 2)), 2); 119 r0 = _mm_slli_epi16(r0, 3); 120 121 g0 = _mm_add_epi16(_mm_mulhi_epi16(b0, _b2y), _mm_mulhi_epi16(g0, _g2y)); 122 g1 = _mm_add_epi16(_mm_mulhi_epi16(b1, _b2y), _mm_mulhi_epi16(g1, _g2y)); 123 g0 = _mm_add_epi16(g0, _mm_mulhi_epi16(r0, _r2y)); 124 g1 = _mm_add_epi16(g1, _mm_mulhi_epi16(r1, _r2y)); 125 g0 = _mm_srli_epi16(g0, 2); 126 g1 = _mm_srli_epi16(g1, 2); 127 g0 = _mm_packus_epi16(g0, g0); 128 g1 = _mm_packus_epi16(g1, g1); 129 g0 = _mm_unpacklo_epi8(g0, g1); 130 _mm_storeu_si128((__m128i*)dst, g0); 131 } 132 133 return (int)(bayer - (bayer_end - width)); 134 } 135 136 int bayer2RGB(const uchar* bayer, int bayer_step, uchar* dst, int width, int blue) const 137 { 138 if( !use_simd ) 139 return 0; 140 /* 141 B G B G | B G B G | B G B G | B G B G 142 G R G R | G R G R | G R G R | G R G R 143 B G B G | B G B G | B G B G | B G B G 144 */ 145 146 __m128i delta1 = _mm_set1_epi16(1), delta2 = _mm_set1_epi16(2); 147 __m128i mask = _mm_set1_epi16(blue < 0 ? -1 : 0), z = _mm_setzero_si128(); 148 __m128i masklo = _mm_set1_epi16(0x00ff); 149 const uchar* bayer_end = bayer + width; 150 151 for( ; bayer <= bayer_end - 18; bayer += 14, dst += 42 ) 152 { 153 __m128i r0 = _mm_loadu_si128((const __m128i*)bayer); 154 __m128i r1 = _mm_loadu_si128((const __m128i*)(bayer+bayer_step)); 155 __m128i r2 = _mm_loadu_si128((const __m128i*)(bayer+bayer_step*2)); 156 157 __m128i b1 = _mm_add_epi16(_mm_and_si128(r0, masklo), _mm_and_si128(r2, masklo)); 158 __m128i nextb1 = _mm_srli_si128(b1, 2); 159 __m128i b0 = _mm_add_epi16(b1, nextb1); 160 b1 = _mm_srli_epi16(_mm_add_epi16(nextb1, delta1), 1); 161 b0 = _mm_srli_epi16(_mm_add_epi16(b0, delta2), 2); 162 // b0 b2 ... b14 b1 b3 ... b15 163 b0 = _mm_packus_epi16(b0, b1); 164 165 __m128i g0 = _mm_add_epi16(_mm_srli_epi16(r0, 8), _mm_srli_epi16(r2, 8)); 166 __m128i g1 = _mm_and_si128(r1, masklo); 167 g0 = _mm_add_epi16(g0, _mm_add_epi16(g1, _mm_srli_si128(g1, 2))); 168 g1 = _mm_srli_si128(g1, 2); 169 g0 = _mm_srli_epi16(_mm_add_epi16(g0, delta2), 2); 170 // g0 g2 ... g14 g1 g3 ... g15 171 g0 = _mm_packus_epi16(g0, g1); 172 173 r0 = _mm_srli_epi16(r1, 8); 174 r1 = _mm_add_epi16(r0, _mm_srli_si128(r0, 2)); 175 r1 = _mm_srli_epi16(_mm_add_epi16(r1, delta1), 1); 176 // r0 r2 ... r14 r1 r3 ... r15 177 r0 = _mm_packus_epi16(r0, r1); 178 179 b1 = _mm_and_si128(_mm_xor_si128(b0, r0), mask); 180 b0 = _mm_xor_si128(b0, b1); 181 r0 = _mm_xor_si128(r0, b1); 182 183 // b1 g1 b3 g3 b5 g5... 184 b1 = _mm_unpackhi_epi8(b0, g0); 185 // b0 g0 b2 g2 b4 g4 .... 186 b0 = _mm_unpacklo_epi8(b0, g0); 187 188 // r1 0 r3 0 r5 0 ... 189 r1 = _mm_unpackhi_epi8(r0, z); 190 // r0 0 r2 0 r4 0 ... 191 r0 = _mm_unpacklo_epi8(r0, z); 192 193 // 0 b0 g0 r0 0 b2 g2 r2 ... 194 g0 = _mm_slli_si128(_mm_unpacklo_epi16(b0, r0), 1); 195 // 0 b8 g8 r8 0 b10 g10 r10 ... 196 g1 = _mm_slli_si128(_mm_unpackhi_epi16(b0, r0), 1); 197 198 // b1 g1 r1 0 b3 g3 r3 0 ... 199 r0 = _mm_unpacklo_epi16(b1, r1); 200 // b9 g9 r9 0 b11 g11 r11 0 ... 201 r1 = _mm_unpackhi_epi16(b1, r1); 202 203 // 0 b0 g0 r0 b1 g1 r1 0 ... 204 b0 = _mm_srli_si128(_mm_unpacklo_epi32(g0, r0), 1); 205 // 0 b4 g4 r4 b5 g5 r5 0 ... 206 b1 = _mm_srli_si128(_mm_unpackhi_epi32(g0, r0), 1); 207 208 _mm_storel_epi64((__m128i*)(dst-1+0), b0); 209 _mm_storel_epi64((__m128i*)(dst-1+6*1), _mm_srli_si128(b0, 8)); 210 _mm_storel_epi64((__m128i*)(dst-1+6*2), b1); 211 _mm_storel_epi64((__m128i*)(dst-1+6*3), _mm_srli_si128(b1, 8)); 212 213 // 0 b8 g8 r8 b9 g9 r9 0 ... 214 g0 = _mm_srli_si128(_mm_unpacklo_epi32(g1, r1), 1); 215 // 0 b12 g12 r12 b13 g13 r13 0 ... 216 g1 = _mm_srli_si128(_mm_unpackhi_epi32(g1, r1), 1); 217 218 _mm_storel_epi64((__m128i*)(dst-1+6*4), g0); 219 _mm_storel_epi64((__m128i*)(dst-1+6*5), _mm_srli_si128(g0, 8)); 220 221 _mm_storel_epi64((__m128i*)(dst-1+6*6), g1); 222 } 223 224 return (int)(bayer - (bayer_end - width)); 225 } 226 227 int bayer2RGBA(const uchar*, int, uchar*, int, int) const 228 { 229 return 0; 230 } 231 232 int bayer2RGB_EA(const uchar* bayer, int bayer_step, uchar* dst, int width, int blue) const 233 { 234 if (!use_simd) 235 return 0; 236 237 const uchar* bayer_end = bayer + width; 238 __m128i masklow = _mm_set1_epi16(0x00ff); 239 __m128i delta1 = _mm_set1_epi16(1), delta2 = _mm_set1_epi16(2); 240 __m128i full = _mm_set1_epi16(-1), z = _mm_setzero_si128(); 241 __m128i mask = _mm_set1_epi16(blue > 0 ? -1 : 0); 242 243 for ( ; bayer <= bayer_end - 18; bayer += 14, dst += 42) 244 { 245 /* 246 B G B G | B G B G | B G B G | B G B G 247 G R G R | G R G R | G R G R | G R G R 248 B G B G | B G B G | B G B G | B G B G 249 */ 250 251 __m128i r0 = _mm_loadu_si128((const __m128i*)bayer); 252 __m128i r1 = _mm_loadu_si128((const __m128i*)(bayer+bayer_step)); 253 __m128i r2 = _mm_loadu_si128((const __m128i*)(bayer+bayer_step*2)); 254 255 __m128i b1 = _mm_add_epi16(_mm_and_si128(r0, masklow), _mm_and_si128(r2, masklow)); 256 __m128i nextb1 = _mm_srli_si128(b1, 2); 257 __m128i b0 = _mm_add_epi16(b1, nextb1); 258 b1 = _mm_srli_epi16(_mm_add_epi16(nextb1, delta1), 1); 259 b0 = _mm_srli_epi16(_mm_add_epi16(b0, delta2), 2); 260 // b0 b2 ... b14 b1 b3 ... b15 261 b0 = _mm_packus_epi16(b0, b1); 262 263 // vertical sum 264 __m128i r0g = _mm_srli_epi16(r0, 8); 265 __m128i r2g = _mm_srli_epi16(r2, 8); 266 __m128i sumv = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(r0g, r2g), delta1), 1); 267 // gorizontal sum 268 __m128i g1 = _mm_and_si128(masklow, r1); 269 __m128i nextg1 = _mm_srli_si128(g1, 2); 270 __m128i sumg = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(g1, nextg1), delta1), 1); 271 272 // gradients 273 __m128i gradv = _mm_adds_epi16(_mm_subs_epu16(r0g, r2g), _mm_subs_epu16(r2g, r0g)); 274 __m128i gradg = _mm_adds_epi16(_mm_subs_epu16(nextg1, g1), _mm_subs_epu16(g1, nextg1)); 275 __m128i gmask = _mm_cmpgt_epi16(gradg, gradv); 276 277 __m128i g0 = _mm_add_epi16(_mm_and_si128(gmask, sumv), _mm_and_si128(sumg, _mm_xor_si128(gmask, full))); 278 // g0 g2 ... g14 g1 g3 ... 279 g0 = _mm_packus_epi16(g0, nextg1); 280 281 r0 = _mm_srli_epi16(r1, 8); 282 r1 = _mm_add_epi16(r0, _mm_srli_si128(r0, 2)); 283 r1 = _mm_srli_epi16(_mm_add_epi16(r1, delta1), 1); 284 // r0 r2 ... r14 r1 r3 ... r15 285 r0 = _mm_packus_epi16(r0, r1); 286 287 b1 = _mm_and_si128(_mm_xor_si128(b0, r0), mask); 288 b0 = _mm_xor_si128(b0, b1); 289 r0 = _mm_xor_si128(r0, b1); 290 291 // b1 g1 b3 g3 b5 g5... 292 b1 = _mm_unpackhi_epi8(b0, g0); 293 // b0 g0 b2 g2 b4 g4 .... 294 b0 = _mm_unpacklo_epi8(b0, g0); 295 296 // r1 0 r3 0 r5 0 ... 297 r1 = _mm_unpackhi_epi8(r0, z); 298 // r0 0 r2 0 r4 0 ... 299 r0 = _mm_unpacklo_epi8(r0, z); 300 301 // 0 b0 g0 r0 0 b2 g2 r2 ... 302 g0 = _mm_slli_si128(_mm_unpacklo_epi16(b0, r0), 1); 303 // 0 b8 g8 r8 0 b10 g10 r10 ... 304 g1 = _mm_slli_si128(_mm_unpackhi_epi16(b0, r0), 1); 305 306 // b1 g1 r1 0 b3 g3 r3 0 ... 307 r0 = _mm_unpacklo_epi16(b1, r1); 308 // b9 g9 r9 0 b11 g11 r11 0 ... 309 r1 = _mm_unpackhi_epi16(b1, r1); 310 311 // 0 b0 g0 r0 b1 g1 r1 0 ... 312 b0 = _mm_srli_si128(_mm_unpacklo_epi32(g0, r0), 1); 313 // 0 b4 g4 r4 b5 g5 r5 0 ... 314 b1 = _mm_srli_si128(_mm_unpackhi_epi32(g0, r0), 1); 315 316 _mm_storel_epi64((__m128i*)(dst+0), b0); 317 _mm_storel_epi64((__m128i*)(dst+6*1), _mm_srli_si128(b0, 8)); 318 _mm_storel_epi64((__m128i*)(dst+6*2), b1); 319 _mm_storel_epi64((__m128i*)(dst+6*3), _mm_srli_si128(b1, 8)); 320 321 // 0 b8 g8 r8 b9 g9 r9 0 ... 322 g0 = _mm_srli_si128(_mm_unpacklo_epi32(g1, r1), 1); 323 // 0 b12 g12 r12 b13 g13 r13 0 ... 324 g1 = _mm_srli_si128(_mm_unpackhi_epi32(g1, r1), 1); 325 326 _mm_storel_epi64((__m128i*)(dst+6*4), g0); 327 _mm_storel_epi64((__m128i*)(dst+6*5), _mm_srli_si128(g0, 8)); 328 329 _mm_storel_epi64((__m128i*)(dst+6*6), g1); 330 } 331 332 return int(bayer - (bayer_end - width)); 333 } 334 335 bool use_simd; 336 }; 337 #elif CV_NEON 338 class SIMDBayerInterpolator_8u 339 { 340 public: 341 SIMDBayerInterpolator_8u() 342 { 343 } 344 345 int bayer2Gray(const uchar* bayer, int bayer_step, uchar* dst, 346 int width, int bcoeff, int gcoeff, int rcoeff) const 347 { 348 /* 349 B G B G | B G B G | B G B G | B G B G 350 G R G R | G R G R | G R G R | G R G R 351 B G B G | B G B G | B G B G | B G B G 352 */ 353 354 uint16x8_t masklo = vdupq_n_u16(255); 355 const uchar* bayer_end = bayer + width; 356 357 for( ; bayer <= bayer_end - 18; bayer += 14, dst += 14 ) 358 { 359 uint16x8_t r0 = vld1q_u16((const ushort*)bayer); 360 uint16x8_t r1 = vld1q_u16((const ushort*)(bayer + bayer_step)); 361 uint16x8_t r2 = vld1q_u16((const ushort*)(bayer + bayer_step*2)); 362 363 uint16x8_t b1_ = vaddq_u16(vandq_u16(r0, masklo), vandq_u16(r2, masklo)); 364 uint16x8_t b1 = vextq_u16(b1_, b1_, 1); 365 uint16x8_t b0 = vaddq_u16(b1_, b1); 366 // b0 = b0 b2 b4 ... 367 // b1 = b1 b3 b5 ... 368 369 uint16x8_t g0 = vaddq_u16(vshrq_n_u16(r0, 8), vshrq_n_u16(r2, 8)); 370 uint16x8_t g1 = vandq_u16(r1, masklo); 371 g0 = vaddq_u16(g0, vaddq_u16(g1, vextq_u16(g1, g1, 1))); 372 uint16x8_t rot = vextq_u16(g1, g1, 1); 373 g1 = vshlq_n_u16(rot, 2); 374 // g0 = b0 b2 b4 ... 375 // g1 = b1 b3 b5 ... 376 377 r0 = vshrq_n_u16(r1, 8); 378 r1 = vaddq_u16(r0, vextq_u16(r0, r0, 1)); 379 r0 = vshlq_n_u16(r0, 2); 380 // r0 = r0 r2 r4 ... 381 // r1 = r1 r3 r5 ... 382 383 b0 = vreinterpretq_u16_s16(vqdmulhq_n_s16(vreinterpretq_s16_u16(b0), (short)(rcoeff*2))); 384 b1 = vreinterpretq_u16_s16(vqdmulhq_n_s16(vreinterpretq_s16_u16(b1), (short)(rcoeff*4))); 385 386 g0 = vreinterpretq_u16_s16(vqdmulhq_n_s16(vreinterpretq_s16_u16(g0), (short)(gcoeff*2))); 387 g1 = vreinterpretq_u16_s16(vqdmulhq_n_s16(vreinterpretq_s16_u16(g1), (short)(gcoeff*2))); 388 389 r0 = vreinterpretq_u16_s16(vqdmulhq_n_s16(vreinterpretq_s16_u16(r0), (short)(bcoeff*2))); 390 r1 = vreinterpretq_u16_s16(vqdmulhq_n_s16(vreinterpretq_s16_u16(r1), (short)(bcoeff*4))); 391 392 g0 = vaddq_u16(vaddq_u16(g0, b0), r0); 393 g1 = vaddq_u16(vaddq_u16(g1, b1), r1); 394 395 uint8x8x2_t p = vzip_u8(vrshrn_n_u16(g0, 2), vrshrn_n_u16(g1, 2)); 396 vst1_u8(dst, p.val[0]); 397 vst1_u8(dst + 8, p.val[1]); 398 } 399 400 return (int)(bayer - (bayer_end - width)); 401 } 402 403 int bayer2RGB(const uchar* bayer, int bayer_step, uchar* dst, int width, int blue) const 404 { 405 /* 406 B G B G | B G B G | B G B G | B G B G 407 G R G R | G R G R | G R G R | G R G R 408 B G B G | B G B G | B G B G | B G B G 409 */ 410 uint16x8_t masklo = vdupq_n_u16(255); 411 uint8x16x3_t pix; 412 const uchar* bayer_end = bayer + width; 413 414 for( ; bayer <= bayer_end - 18; bayer += 14, dst += 42 ) 415 { 416 uint16x8_t r0 = vld1q_u16((const ushort*)bayer); 417 uint16x8_t r1 = vld1q_u16((const ushort*)(bayer + bayer_step)); 418 uint16x8_t r2 = vld1q_u16((const ushort*)(bayer + bayer_step*2)); 419 420 uint16x8_t b1 = vaddq_u16(vandq_u16(r0, masklo), vandq_u16(r2, masklo)); 421 uint16x8_t nextb1 = vextq_u16(b1, b1, 1); 422 uint16x8_t b0 = vaddq_u16(b1, nextb1); 423 // b0 b1 b2 ... 424 uint8x8x2_t bb = vzip_u8(vrshrn_n_u16(b0, 2), vrshrn_n_u16(nextb1, 1)); 425 pix.val[1-blue] = vcombine_u8(bb.val[0], bb.val[1]); 426 427 uint16x8_t g0 = vaddq_u16(vshrq_n_u16(r0, 8), vshrq_n_u16(r2, 8)); 428 uint16x8_t g1 = vandq_u16(r1, masklo); 429 g0 = vaddq_u16(g0, vaddq_u16(g1, vextq_u16(g1, g1, 1))); 430 g1 = vextq_u16(g1, g1, 1); 431 // g0 g1 g2 ... 432 uint8x8x2_t gg = vzip_u8(vrshrn_n_u16(g0, 2), vmovn_u16(g1)); 433 pix.val[1] = vcombine_u8(gg.val[0], gg.val[1]); 434 435 r0 = vshrq_n_u16(r1, 8); 436 r1 = vaddq_u16(r0, vextq_u16(r0, r0, 1)); 437 // r0 r1 r2 ... 438 uint8x8x2_t rr = vzip_u8(vmovn_u16(r0), vrshrn_n_u16(r1, 1)); 439 pix.val[1+blue] = vcombine_u8(rr.val[0], rr.val[1]); 440 441 vst3q_u8(dst-1, pix); 442 } 443 444 return (int)(bayer - (bayer_end - width)); 445 } 446 447 int bayer2RGBA(const uchar* bayer, int bayer_step, uchar* dst, int width, int blue) const 448 { 449 /* 450 B G B G | B G B G | B G B G | B G B G 451 G R G R | G R G R | G R G R | G R G R 452 B G B G | B G B G | B G B G | B G B G 453 */ 454 uint16x8_t masklo = vdupq_n_u16(255); 455 uint8x16x4_t pix; 456 const uchar* bayer_end = bayer + width; 457 pix.val[3] = vdupq_n_u8(255); 458 459 for( ; bayer <= bayer_end - 18; bayer += 14, dst += 56 ) 460 { 461 uint16x8_t r0 = vld1q_u16((const ushort*)bayer); 462 uint16x8_t r1 = vld1q_u16((const ushort*)(bayer + bayer_step)); 463 uint16x8_t r2 = vld1q_u16((const ushort*)(bayer + bayer_step*2)); 464 465 uint16x8_t b1 = vaddq_u16(vandq_u16(r0, masklo), vandq_u16(r2, masklo)); 466 uint16x8_t nextb1 = vextq_u16(b1, b1, 1); 467 uint16x8_t b0 = vaddq_u16(b1, nextb1); 468 // b0 b1 b2 ... 469 uint8x8x2_t bb = vzip_u8(vrshrn_n_u16(b0, 2), vrshrn_n_u16(nextb1, 1)); 470 pix.val[1-blue] = vcombine_u8(bb.val[0], bb.val[1]); 471 472 uint16x8_t g0 = vaddq_u16(vshrq_n_u16(r0, 8), vshrq_n_u16(r2, 8)); 473 uint16x8_t g1 = vandq_u16(r1, masklo); 474 g0 = vaddq_u16(g0, vaddq_u16(g1, vextq_u16(g1, g1, 1))); 475 g1 = vextq_u16(g1, g1, 1); 476 // g0 g1 g2 ... 477 uint8x8x2_t gg = vzip_u8(vrshrn_n_u16(g0, 2), vmovn_u16(g1)); 478 pix.val[1] = vcombine_u8(gg.val[0], gg.val[1]); 479 480 r0 = vshrq_n_u16(r1, 8); 481 r1 = vaddq_u16(r0, vextq_u16(r0, r0, 1)); 482 // r0 r1 r2 ... 483 uint8x8x2_t rr = vzip_u8(vmovn_u16(r0), vrshrn_n_u16(r1, 1)); 484 pix.val[1+blue] = vcombine_u8(rr.val[0], rr.val[1]); 485 486 vst4q_u8(dst-1, pix); 487 } 488 489 return (int)(bayer - (bayer_end - width)); 490 } 491 492 int bayer2RGB_EA(const uchar*, int, uchar*, int, int) const 493 { 494 return 0; 495 } 496 }; 497 #else 498 typedef SIMDBayerStubInterpolator_<uchar> SIMDBayerInterpolator_8u; 499 #endif 500 501 502 template<typename T, class SIMDInterpolator> 503 class Bayer2Gray_Invoker : 504 public ParallelLoopBody 505 { 506 public: 507 Bayer2Gray_Invoker(const Mat& _srcmat, Mat& _dstmat, int _start_with_green, bool _brow, 508 const Size& _size, int _bcoeff, int _rcoeff) : 509 ParallelLoopBody(), srcmat(_srcmat), dstmat(_dstmat), Start_with_green(_start_with_green), 510 Brow(_brow), size(_size), Bcoeff(_bcoeff), Rcoeff(_rcoeff) 511 { 512 } 513 514 virtual void operator ()(const Range& range) const 515 { 516 SIMDInterpolator vecOp; 517 const int G2Y = 9617; 518 const int SHIFT = 14; 519 520 const T* bayer0 = srcmat.ptr<T>(); 521 int bayer_step = (int)(srcmat.step/sizeof(T)); 522 T* dst0 = (T*)dstmat.data; 523 int dst_step = (int)(dstmat.step/sizeof(T)); 524 int bcoeff = Bcoeff, rcoeff = Rcoeff; 525 int start_with_green = Start_with_green; 526 bool brow = Brow; 527 528 dst0 += dst_step + 1; 529 530 if (range.start % 2) 531 { 532 brow = !brow; 533 std::swap(bcoeff, rcoeff); 534 start_with_green = !start_with_green; 535 } 536 537 bayer0 += range.start * bayer_step; 538 dst0 += range.start * dst_step; 539 540 for(int i = range.start ; i < range.end; ++i, bayer0 += bayer_step, dst0 += dst_step ) 541 { 542 unsigned t0, t1, t2; 543 const T* bayer = bayer0; 544 T* dst = dst0; 545 const T* bayer_end = bayer + size.width; 546 547 if( size.width <= 0 ) 548 { 549 dst[-1] = dst[size.width] = 0; 550 continue; 551 } 552 553 if( start_with_green ) 554 { 555 t0 = (bayer[1] + bayer[bayer_step*2+1])*rcoeff; 556 t1 = (bayer[bayer_step] + bayer[bayer_step+2])*bcoeff; 557 t2 = bayer[bayer_step+1]*(2*G2Y); 558 559 dst[0] = (T)CV_DESCALE(t0 + t1 + t2, SHIFT+1); 560 bayer++; 561 dst++; 562 } 563 564 int delta = vecOp.bayer2Gray(bayer, bayer_step, dst, size.width, bcoeff, G2Y, rcoeff); 565 bayer += delta; 566 dst += delta; 567 568 for( ; bayer <= bayer_end - 2; bayer += 2, dst += 2 ) 569 { 570 t0 = (bayer[0] + bayer[2] + bayer[bayer_step*2] + bayer[bayer_step*2+2])*rcoeff; 571 t1 = (bayer[1] + bayer[bayer_step] + bayer[bayer_step+2] + bayer[bayer_step*2+1])*G2Y; 572 t2 = bayer[bayer_step+1]*(4*bcoeff); 573 dst[0] = (T)CV_DESCALE(t0 + t1 + t2, SHIFT+2); 574 575 t0 = (bayer[2] + bayer[bayer_step*2+2])*rcoeff; 576 t1 = (bayer[bayer_step+1] + bayer[bayer_step+3])*bcoeff; 577 t2 = bayer[bayer_step+2]*(2*G2Y); 578 dst[1] = (T)CV_DESCALE(t0 + t1 + t2, SHIFT+1); 579 } 580 581 if( bayer < bayer_end ) 582 { 583 t0 = (bayer[0] + bayer[2] + bayer[bayer_step*2] + bayer[bayer_step*2+2])*rcoeff; 584 t1 = (bayer[1] + bayer[bayer_step] + bayer[bayer_step+2] + bayer[bayer_step*2+1])*G2Y; 585 t2 = bayer[bayer_step+1]*(4*bcoeff); 586 dst[0] = (T)CV_DESCALE(t0 + t1 + t2, SHIFT+2); 587 bayer++; 588 dst++; 589 } 590 591 dst0[-1] = dst0[0]; 592 dst0[size.width] = dst0[size.width-1]; 593 594 brow = !brow; 595 std::swap(bcoeff, rcoeff); 596 start_with_green = !start_with_green; 597 } 598 } 599 600 private: 601 Mat srcmat; 602 Mat dstmat; 603 int Start_with_green; 604 bool Brow; 605 Size size; 606 int Bcoeff, Rcoeff; 607 }; 608 609 template<typename T, typename SIMDInterpolator> 610 static void Bayer2Gray_( const Mat& srcmat, Mat& dstmat, int code ) 611 { 612 const int R2Y = 4899; 613 const int B2Y = 1868; 614 615 Size size = srcmat.size(); 616 int bcoeff = B2Y, rcoeff = R2Y; 617 int start_with_green = code == CV_BayerGB2GRAY || code == CV_BayerGR2GRAY; 618 bool brow = true; 619 620 if( code != CV_BayerBG2GRAY && code != CV_BayerGB2GRAY ) 621 { 622 brow = false; 623 std::swap(bcoeff, rcoeff); 624 } 625 size.height -= 2; 626 size.width -= 2; 627 628 if (size.height > 0) 629 { 630 Range range(0, size.height); 631 Bayer2Gray_Invoker<T, SIMDInterpolator> invoker(srcmat, dstmat, 632 start_with_green, brow, size, bcoeff, rcoeff); 633 parallel_for_(range, invoker, dstmat.total()/static_cast<double>(1<<16)); 634 } 635 636 size = dstmat.size(); 637 T* dst0 = dstmat.ptr<T>(); 638 int dst_step = (int)(dstmat.step/sizeof(T)); 639 if( size.height > 2 ) 640 for( int i = 0; i < size.width; i++ ) 641 { 642 dst0[i] = dst0[i + dst_step]; 643 dst0[i + (size.height-1)*dst_step] = dst0[i + (size.height-2)*dst_step]; 644 } 645 else 646 for( int i = 0; i < size.width; i++ ) 647 dst0[i] = dst0[i + (size.height-1)*dst_step] = 0; 648 } 649 650 template <typename T> 651 struct Alpha 652 { 653 static T value() { return std::numeric_limits<T>::max(); } 654 }; 655 656 template <> 657 struct Alpha<float> 658 { 659 static float value() { return 1.0f; } 660 }; 661 662 template <typename T, typename SIMDInterpolator> 663 class Bayer2RGB_Invoker : 664 public ParallelLoopBody 665 { 666 public: 667 Bayer2RGB_Invoker(const Mat& _srcmat, Mat& _dstmat, int _start_with_green, int _blue, const Size& _size) : 668 ParallelLoopBody(), 669 srcmat(_srcmat), dstmat(_dstmat), Start_with_green(_start_with_green), Blue(_blue), size(_size) 670 { 671 } 672 673 virtual void operator() (const Range& range) const 674 { 675 SIMDInterpolator vecOp; 676 T alpha = Alpha<T>::value(); 677 int dcn = dstmat.channels(); 678 int dcn2 = dcn << 1; 679 680 int bayer_step = (int)(srcmat.step/sizeof(T)); 681 const T* bayer0 = srcmat.ptr<T>() + bayer_step * range.start; 682 683 int dst_step = (int)(dstmat.step/sizeof(T)); 684 T* dst0 = reinterpret_cast<T*>(dstmat.data) + (range.start + 1) * dst_step + dcn + 1; 685 686 int blue = Blue, start_with_green = Start_with_green; 687 if (range.start % 2) 688 { 689 blue = -blue; 690 start_with_green = !start_with_green; 691 } 692 693 for (int i = range.start; i < range.end; bayer0 += bayer_step, dst0 += dst_step, ++i ) 694 { 695 int t0, t1; 696 const T* bayer = bayer0; 697 T* dst = dst0; 698 const T* bayer_end = bayer + size.width; 699 700 // in case of when size.width <= 2 701 if( size.width <= 0 ) 702 { 703 if (dcn == 3) 704 { 705 dst[-4] = dst[-3] = dst[-2] = dst[size.width*dcn-1] = 706 dst[size.width*dcn] = dst[size.width*dcn+1] = 0; 707 } 708 else 709 { 710 dst[-5] = dst[-4] = dst[-3] = dst[size.width*dcn-1] = 711 dst[size.width*dcn] = dst[size.width*dcn+1] = 0; 712 dst[-2] = dst[size.width*dcn+2] = alpha; 713 } 714 continue; 715 } 716 717 if( start_with_green ) 718 { 719 t0 = (bayer[1] + bayer[bayer_step*2+1] + 1) >> 1; 720 t1 = (bayer[bayer_step] + bayer[bayer_step+2] + 1) >> 1; 721 722 dst[-blue] = (T)t0; 723 dst[0] = bayer[bayer_step+1]; 724 dst[blue] = (T)t1; 725 if (dcn == 4) 726 dst[2] = alpha; // alpha channel 727 728 bayer++; 729 dst += dcn; 730 } 731 732 // simd optimization only for dcn == 3 733 int delta = dcn == 4 ? 734 vecOp.bayer2RGBA(bayer, bayer_step, dst, size.width, blue) : 735 vecOp.bayer2RGB(bayer, bayer_step, dst, size.width, blue); 736 bayer += delta; 737 dst += delta*dcn; 738 739 if (dcn == 3) // Bayer to BGR 740 { 741 if( blue > 0 ) 742 { 743 for( ; bayer <= bayer_end - 2; bayer += 2, dst += dcn2 ) 744 { 745 t0 = (bayer[0] + bayer[2] + bayer[bayer_step*2] + 746 bayer[bayer_step*2+2] + 2) >> 2; 747 t1 = (bayer[1] + bayer[bayer_step] + 748 bayer[bayer_step+2] + bayer[bayer_step*2+1]+2) >> 2; 749 dst[-1] = (T)t0; 750 dst[0] = (T)t1; 751 dst[1] = bayer[bayer_step+1]; 752 753 t0 = (bayer[2] + bayer[bayer_step*2+2] + 1) >> 1; 754 t1 = (bayer[bayer_step+1] + bayer[bayer_step+3] + 1) >> 1; 755 dst[2] = (T)t0; 756 dst[3] = bayer[bayer_step+2]; 757 dst[4] = (T)t1; 758 } 759 } 760 else 761 { 762 for( ; bayer <= bayer_end - 2; bayer += 2, dst += dcn2 ) 763 { 764 t0 = (bayer[0] + bayer[2] + bayer[bayer_step*2] + 765 bayer[bayer_step*2+2] + 2) >> 2; 766 t1 = (bayer[1] + bayer[bayer_step] + 767 bayer[bayer_step+2] + bayer[bayer_step*2+1]+2) >> 2; 768 dst[1] = (T)t0; 769 dst[0] = (T)t1; 770 dst[-1] = bayer[bayer_step+1]; 771 772 t0 = (bayer[2] + bayer[bayer_step*2+2] + 1) >> 1; 773 t1 = (bayer[bayer_step+1] + bayer[bayer_step+3] + 1) >> 1; 774 dst[4] = (T)t0; 775 dst[3] = bayer[bayer_step+2]; 776 dst[2] = (T)t1; 777 } 778 } 779 } 780 else // Bayer to BGRA 781 { 782 // if current row does not contain Blue pixels 783 if( blue > 0 ) 784 { 785 for( ; bayer <= bayer_end - 2; bayer += 2, dst += dcn2 ) 786 { 787 t0 = (bayer[0] + bayer[2] + bayer[bayer_step*2] + 788 bayer[bayer_step*2+2] + 2) >> 2; 789 t1 = (bayer[1] + bayer[bayer_step] + 790 bayer[bayer_step+2] + bayer[bayer_step*2+1]+2) >> 2; 791 dst[-1] = (T)t0; 792 dst[0] = (T)t1; 793 dst[1] = bayer[bayer_step+1]; 794 dst[2] = alpha; // alpha channel 795 796 t0 = (bayer[2] + bayer[bayer_step*2+2] + 1) >> 1; 797 t1 = (bayer[bayer_step+1] + bayer[bayer_step+3] + 1) >> 1; 798 dst[3] = (T)t0; 799 dst[4] = bayer[bayer_step+2]; 800 dst[5] = (T)t1; 801 dst[6] = alpha; // alpha channel 802 } 803 } 804 else // if current row contains Blue pixels 805 { 806 for( ; bayer <= bayer_end - 2; bayer += 2, dst += dcn2 ) 807 { 808 t0 = (bayer[0] + bayer[2] + bayer[bayer_step*2] + 809 bayer[bayer_step*2+2] + 2) >> 2; 810 t1 = (bayer[1] + bayer[bayer_step] + 811 bayer[bayer_step+2] + bayer[bayer_step*2+1]+2) >> 2; 812 dst[-1] = bayer[bayer_step+1]; 813 dst[0] = (T)t1; 814 dst[1] = (T)t0; 815 dst[2] = alpha; // alpha channel 816 817 t0 = (bayer[2] + bayer[bayer_step*2+2] + 1) >> 1; 818 t1 = (bayer[bayer_step+1] + bayer[bayer_step+3] + 1) >> 1; 819 dst[3] = (T)t1; 820 dst[4] = bayer[bayer_step+2]; 821 dst[5] = (T)t0; 822 dst[6] = alpha; // alpha channel 823 } 824 } 825 } 826 827 // if skip one pixel at the end of row 828 if( bayer < bayer_end ) 829 { 830 t0 = (bayer[0] + bayer[2] + bayer[bayer_step*2] + 831 bayer[bayer_step*2+2] + 2) >> 2; 832 t1 = (bayer[1] + bayer[bayer_step] + 833 bayer[bayer_step+2] + bayer[bayer_step*2+1]+2) >> 2; 834 dst[-blue] = (T)t0; 835 dst[0] = (T)t1; 836 dst[blue] = bayer[bayer_step+1]; 837 if (dcn == 4) 838 dst[2] = alpha; // alpha channel 839 bayer++; 840 dst += dcn; 841 } 842 843 // fill the last and the first pixels of row accordingly 844 if (dcn == 3) 845 { 846 dst0[-4] = dst0[-1]; 847 dst0[-3] = dst0[0]; 848 dst0[-2] = dst0[1]; 849 dst0[size.width*dcn-1] = dst0[size.width*dcn-4]; 850 dst0[size.width*dcn] = dst0[size.width*dcn-3]; 851 dst0[size.width*dcn+1] = dst0[size.width*dcn-2]; 852 } 853 else 854 { 855 dst0[-5] = dst0[-1]; 856 dst0[-4] = dst0[0]; 857 dst0[-3] = dst0[1]; 858 dst0[-2] = dst0[2]; // alpha channel 859 dst0[size.width*dcn-1] = dst0[size.width*dcn-5]; 860 dst0[size.width*dcn] = dst0[size.width*dcn-4]; 861 dst0[size.width*dcn+1] = dst0[size.width*dcn-3]; 862 dst0[size.width*dcn+2] = dst0[size.width*dcn-2]; // alpha channel 863 } 864 865 blue = -blue; 866 start_with_green = !start_with_green; 867 } 868 } 869 870 private: 871 Mat srcmat; 872 Mat dstmat; 873 int Start_with_green, Blue; 874 Size size; 875 }; 876 877 template<typename T, class SIMDInterpolator> 878 static void Bayer2RGB_( const Mat& srcmat, Mat& dstmat, int code ) 879 { 880 int dst_step = (int)(dstmat.step/sizeof(T)); 881 Size size = srcmat.size(); 882 int blue = code == CV_BayerBG2BGR || code == CV_BayerGB2BGR ? -1 : 1; 883 int start_with_green = code == CV_BayerGB2BGR || code == CV_BayerGR2BGR; 884 885 int dcn = dstmat.channels(); 886 size.height -= 2; 887 size.width -= 2; 888 889 if (size.height > 0) 890 { 891 Range range(0, size.height); 892 Bayer2RGB_Invoker<T, SIMDInterpolator> invoker(srcmat, dstmat, start_with_green, blue, size); 893 parallel_for_(range, invoker, dstmat.total()/static_cast<double>(1<<16)); 894 } 895 896 // filling the first and the last rows 897 size = dstmat.size(); 898 T* dst0 = dstmat.ptr<T>(); 899 if( size.height > 2 ) 900 for( int i = 0; i < size.width*dcn; i++ ) 901 { 902 dst0[i] = dst0[i + dst_step]; 903 dst0[i + (size.height-1)*dst_step] = dst0[i + (size.height-2)*dst_step]; 904 } 905 else 906 for( int i = 0; i < size.width*dcn; i++ ) 907 dst0[i] = dst0[i + (size.height-1)*dst_step] = 0; 908 } 909 910 911 /////////////////// Demosaicing using Variable Number of Gradients /////////////////////// 912 913 static void Bayer2RGB_VNG_8u( const Mat& srcmat, Mat& dstmat, int code ) 914 { 915 const uchar* bayer = srcmat.ptr(); 916 int bstep = (int)srcmat.step; 917 uchar* dst = dstmat.ptr(); 918 int dststep = (int)dstmat.step; 919 Size size = srcmat.size(); 920 921 int blueIdx = code == CV_BayerBG2BGR_VNG || code == CV_BayerGB2BGR_VNG ? 0 : 2; 922 bool greenCell0 = code != CV_BayerBG2BGR_VNG && code != CV_BayerRG2BGR_VNG; 923 924 // for too small images use the simple interpolation algorithm 925 if( MIN(size.width, size.height) < 8 ) 926 { 927 Bayer2RGB_<uchar, SIMDBayerInterpolator_8u>( srcmat, dstmat, code ); 928 return; 929 } 930 931 const int brows = 3, bcn = 7; 932 int N = size.width, N2 = N*2, N3 = N*3, N4 = N*4, N5 = N*5, N6 = N*6, N7 = N*7; 933 int i, bufstep = N7*bcn; 934 cv::AutoBuffer<ushort> _buf(bufstep*brows); 935 ushort* buf = (ushort*)_buf; 936 937 bayer += bstep*2; 938 939 #if CV_SSE2 940 bool haveSSE = cv::checkHardwareSupport(CV_CPU_SSE2); 941 #define _mm_absdiff_epu16(a,b) _mm_adds_epu16(_mm_subs_epu16(a, b), _mm_subs_epu16(b, a)) 942 #endif 943 944 for( int y = 2; y < size.height - 4; y++ ) 945 { 946 uchar* dstrow = dst + dststep*y + 6; 947 const uchar* srow; 948 949 for( int dy = (y == 2 ? -1 : 1); dy <= 1; dy++ ) 950 { 951 ushort* brow = buf + ((y + dy - 1)%brows)*bufstep + 1; 952 srow = bayer + (y+dy)*bstep + 1; 953 954 for( i = 0; i < bcn; i++ ) 955 brow[N*i-1] = brow[(N-2) + N*i] = 0; 956 957 i = 1; 958 959 #if CV_SSE2 960 if( haveSSE ) 961 { 962 __m128i z = _mm_setzero_si128(); 963 for( ; i <= N-9; i += 8, srow += 8, brow += 8 ) 964 { 965 __m128i s1, s2, s3, s4, s6, s7, s8, s9; 966 967 s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow-1-bstep)),z); 968 s2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow-bstep)),z); 969 s3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow+1-bstep)),z); 970 971 s4 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow-1)),z); 972 s6 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow+1)),z); 973 974 s7 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow-1+bstep)),z); 975 s8 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow+bstep)),z); 976 s9 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow+1+bstep)),z); 977 978 __m128i b0, b1, b2, b3, b4, b5, b6; 979 980 b0 = _mm_adds_epu16(_mm_slli_epi16(_mm_absdiff_epu16(s2,s8),1), 981 _mm_adds_epu16(_mm_absdiff_epu16(s1, s7), 982 _mm_absdiff_epu16(s3, s9))); 983 b1 = _mm_adds_epu16(_mm_slli_epi16(_mm_absdiff_epu16(s4,s6),1), 984 _mm_adds_epu16(_mm_absdiff_epu16(s1, s3), 985 _mm_absdiff_epu16(s7, s9))); 986 b2 = _mm_slli_epi16(_mm_absdiff_epu16(s3,s7),1); 987 b3 = _mm_slli_epi16(_mm_absdiff_epu16(s1,s9),1); 988 989 _mm_storeu_si128((__m128i*)brow, b0); 990 _mm_storeu_si128((__m128i*)(brow + N), b1); 991 _mm_storeu_si128((__m128i*)(brow + N2), b2); 992 _mm_storeu_si128((__m128i*)(brow + N3), b3); 993 994 b4 = _mm_adds_epu16(b2,_mm_adds_epu16(_mm_absdiff_epu16(s2, s4), 995 _mm_absdiff_epu16(s6, s8))); 996 b5 = _mm_adds_epu16(b3,_mm_adds_epu16(_mm_absdiff_epu16(s2, s6), 997 _mm_absdiff_epu16(s4, s8))); 998 b6 = _mm_adds_epu16(_mm_adds_epu16(s2, s4), _mm_adds_epu16(s6, s8)); 999 b6 = _mm_srli_epi16(b6, 1); 1000 1001 _mm_storeu_si128((__m128i*)(brow + N4), b4); 1002 _mm_storeu_si128((__m128i*)(brow + N5), b5); 1003 _mm_storeu_si128((__m128i*)(brow + N6), b6); 1004 } 1005 } 1006 #endif 1007 1008 for( ; i < N-1; i++, srow++, brow++ ) 1009 { 1010 brow[0] = (ushort)(std::abs(srow[-1-bstep] - srow[-1+bstep]) + 1011 std::abs(srow[-bstep] - srow[+bstep])*2 + 1012 std::abs(srow[1-bstep] - srow[1+bstep])); 1013 brow[N] = (ushort)(std::abs(srow[-1-bstep] - srow[1-bstep]) + 1014 std::abs(srow[-1] - srow[1])*2 + 1015 std::abs(srow[-1+bstep] - srow[1+bstep])); 1016 brow[N2] = (ushort)(std::abs(srow[+1-bstep] - srow[-1+bstep])*2); 1017 brow[N3] = (ushort)(std::abs(srow[-1-bstep] - srow[1+bstep])*2); 1018 brow[N4] = (ushort)(brow[N2] + std::abs(srow[-bstep] - srow[-1]) + 1019 std::abs(srow[+bstep] - srow[1])); 1020 brow[N5] = (ushort)(brow[N3] + std::abs(srow[-bstep] - srow[1]) + 1021 std::abs(srow[+bstep] - srow[-1])); 1022 brow[N6] = (ushort)((srow[-bstep] + srow[-1] + srow[1] + srow[+bstep])>>1); 1023 } 1024 } 1025 1026 const ushort* brow0 = buf + ((y - 2) % brows)*bufstep + 2; 1027 const ushort* brow1 = buf + ((y - 1) % brows)*bufstep + 2; 1028 const ushort* brow2 = buf + (y % brows)*bufstep + 2; 1029 static const float scale[] = { 0.f, 0.5f, 0.25f, 0.1666666666667f, 0.125f, 0.1f, 0.08333333333f, 0.0714286f, 0.0625f }; 1030 srow = bayer + y*bstep + 2; 1031 bool greenCell = greenCell0; 1032 1033 i = 2; 1034 #if CV_SSE2 1035 int limit = !haveSSE ? N-2 : greenCell ? std::min(3, N-2) : 2; 1036 #else 1037 int limit = N - 2; 1038 #endif 1039 1040 do 1041 { 1042 for( ; i < limit; i++, srow++, brow0++, brow1++, brow2++, dstrow += 3 ) 1043 { 1044 int gradN = brow0[0] + brow1[0]; 1045 int gradS = brow1[0] + brow2[0]; 1046 int gradW = brow1[N-1] + brow1[N]; 1047 int gradE = brow1[N] + brow1[N+1]; 1048 int minGrad = std::min(std::min(std::min(gradN, gradS), gradW), gradE); 1049 int maxGrad = std::max(std::max(std::max(gradN, gradS), gradW), gradE); 1050 int R, G, B; 1051 1052 if( !greenCell ) 1053 { 1054 int gradNE = brow0[N4+1] + brow1[N4]; 1055 int gradSW = brow1[N4] + brow2[N4-1]; 1056 int gradNW = brow0[N5-1] + brow1[N5]; 1057 int gradSE = brow1[N5] + brow2[N5+1]; 1058 1059 minGrad = std::min(std::min(std::min(std::min(minGrad, gradNE), gradSW), gradNW), gradSE); 1060 maxGrad = std::max(std::max(std::max(std::max(maxGrad, gradNE), gradSW), gradNW), gradSE); 1061 int T = minGrad + MAX(maxGrad/2, 1); 1062 1063 int Rs = 0, Gs = 0, Bs = 0, ng = 0; 1064 if( gradN < T ) 1065 { 1066 Rs += srow[-bstep*2] + srow[0]; 1067 Gs += srow[-bstep]*2; 1068 Bs += srow[-bstep-1] + srow[-bstep+1]; 1069 ng++; 1070 } 1071 if( gradS < T ) 1072 { 1073 Rs += srow[bstep*2] + srow[0]; 1074 Gs += srow[bstep]*2; 1075 Bs += srow[bstep-1] + srow[bstep+1]; 1076 ng++; 1077 } 1078 if( gradW < T ) 1079 { 1080 Rs += srow[-2] + srow[0]; 1081 Gs += srow[-1]*2; 1082 Bs += srow[-bstep-1] + srow[bstep-1]; 1083 ng++; 1084 } 1085 if( gradE < T ) 1086 { 1087 Rs += srow[2] + srow[0]; 1088 Gs += srow[1]*2; 1089 Bs += srow[-bstep+1] + srow[bstep+1]; 1090 ng++; 1091 } 1092 if( gradNE < T ) 1093 { 1094 Rs += srow[-bstep*2+2] + srow[0]; 1095 Gs += brow0[N6+1]; 1096 Bs += srow[-bstep+1]*2; 1097 ng++; 1098 } 1099 if( gradSW < T ) 1100 { 1101 Rs += srow[bstep*2-2] + srow[0]; 1102 Gs += brow2[N6-1]; 1103 Bs += srow[bstep-1]*2; 1104 ng++; 1105 } 1106 if( gradNW < T ) 1107 { 1108 Rs += srow[-bstep*2-2] + srow[0]; 1109 Gs += brow0[N6-1]; 1110 Bs += srow[-bstep+1]*2; 1111 ng++; 1112 } 1113 if( gradSE < T ) 1114 { 1115 Rs += srow[bstep*2+2] + srow[0]; 1116 Gs += brow2[N6+1]; 1117 Bs += srow[-bstep+1]*2; 1118 ng++; 1119 } 1120 R = srow[0]; 1121 G = R + cvRound((Gs - Rs)*scale[ng]); 1122 B = R + cvRound((Bs - Rs)*scale[ng]); 1123 } 1124 else 1125 { 1126 int gradNE = brow0[N2] + brow0[N2+1] + brow1[N2] + brow1[N2+1]; 1127 int gradSW = brow1[N2] + brow1[N2-1] + brow2[N2] + brow2[N2-1]; 1128 int gradNW = brow0[N3] + brow0[N3-1] + brow1[N3] + brow1[N3-1]; 1129 int gradSE = brow1[N3] + brow1[N3+1] + brow2[N3] + brow2[N3+1]; 1130 1131 minGrad = std::min(std::min(std::min(std::min(minGrad, gradNE), gradSW), gradNW), gradSE); 1132 maxGrad = std::max(std::max(std::max(std::max(maxGrad, gradNE), gradSW), gradNW), gradSE); 1133 int T = minGrad + MAX(maxGrad/2, 1); 1134 1135 int Rs = 0, Gs = 0, Bs = 0, ng = 0; 1136 if( gradN < T ) 1137 { 1138 Rs += srow[-bstep*2-1] + srow[-bstep*2+1]; 1139 Gs += srow[-bstep*2] + srow[0]; 1140 Bs += srow[-bstep]*2; 1141 ng++; 1142 } 1143 if( gradS < T ) 1144 { 1145 Rs += srow[bstep*2-1] + srow[bstep*2+1]; 1146 Gs += srow[bstep*2] + srow[0]; 1147 Bs += srow[bstep]*2; 1148 ng++; 1149 } 1150 if( gradW < T ) 1151 { 1152 Rs += srow[-1]*2; 1153 Gs += srow[-2] + srow[0]; 1154 Bs += srow[-bstep-2]+srow[bstep-2]; 1155 ng++; 1156 } 1157 if( gradE < T ) 1158 { 1159 Rs += srow[1]*2; 1160 Gs += srow[2] + srow[0]; 1161 Bs += srow[-bstep+2]+srow[bstep+2]; 1162 ng++; 1163 } 1164 if( gradNE < T ) 1165 { 1166 Rs += srow[-bstep*2+1] + srow[1]; 1167 Gs += srow[-bstep+1]*2; 1168 Bs += srow[-bstep] + srow[-bstep+2]; 1169 ng++; 1170 } 1171 if( gradSW < T ) 1172 { 1173 Rs += srow[bstep*2-1] + srow[-1]; 1174 Gs += srow[bstep-1]*2; 1175 Bs += srow[bstep] + srow[bstep-2]; 1176 ng++; 1177 } 1178 if( gradNW < T ) 1179 { 1180 Rs += srow[-bstep*2-1] + srow[-1]; 1181 Gs += srow[-bstep-1]*2; 1182 Bs += srow[-bstep-2]+srow[-bstep]; 1183 ng++; 1184 } 1185 if( gradSE < T ) 1186 { 1187 Rs += srow[bstep*2+1] + srow[1]; 1188 Gs += srow[bstep+1]*2; 1189 Bs += srow[bstep+2]+srow[bstep]; 1190 ng++; 1191 } 1192 G = srow[0]; 1193 R = G + cvRound((Rs - Gs)*scale[ng]); 1194 B = G + cvRound((Bs - Gs)*scale[ng]); 1195 } 1196 dstrow[blueIdx] = cv::saturate_cast<uchar>(B); 1197 dstrow[1] = cv::saturate_cast<uchar>(G); 1198 dstrow[blueIdx^2] = cv::saturate_cast<uchar>(R); 1199 greenCell = !greenCell; 1200 } 1201 1202 #if CV_SSE2 1203 if( !haveSSE ) 1204 break; 1205 1206 __m128i emask = _mm_set1_epi32(0x0000ffff), 1207 omask = _mm_set1_epi32(0xffff0000), 1208 z = _mm_setzero_si128(), 1209 one = _mm_set1_epi16(1); 1210 __m128 _0_5 = _mm_set1_ps(0.5f); 1211 1212 #define _mm_merge_epi16(a, b) _mm_or_si128(_mm_and_si128(a, emask), _mm_and_si128(b, omask)) //(aA_aA_aA_aA) * (bB_bB_bB_bB) => (bA_bA_bA_bA) 1213 #define _mm_cvtloepi16_ps(a) _mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpacklo_epi16(a,a), 16)) //(1,2,3,4,5,6,7,8) => (1f,2f,3f,4f) 1214 #define _mm_cvthiepi16_ps(a) _mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpackhi_epi16(a,a), 16)) //(1,2,3,4,5,6,7,8) => (5f,6f,7f,8f) 1215 #define _mm_loadl_u8_s16(ptr, offset) _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)((ptr) + (offset))), z) //load 8 uchars to 8 shorts 1216 1217 // process 8 pixels at once 1218 for( ; i <= N - 10; i += 8, srow += 8, brow0 += 8, brow1 += 8, brow2 += 8 ) 1219 { 1220 //int gradN = brow0[0] + brow1[0]; 1221 __m128i gradN = _mm_adds_epi16(_mm_loadu_si128((__m128i*)brow0), _mm_loadu_si128((__m128i*)brow1)); 1222 1223 //int gradS = brow1[0] + brow2[0]; 1224 __m128i gradS = _mm_adds_epi16(_mm_loadu_si128((__m128i*)brow1), _mm_loadu_si128((__m128i*)brow2)); 1225 1226 //int gradW = brow1[N-1] + brow1[N]; 1227 __m128i gradW = _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow1+N-1)), _mm_loadu_si128((__m128i*)(brow1+N))); 1228 1229 //int gradE = brow1[N+1] + brow1[N]; 1230 __m128i gradE = _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow1+N+1)), _mm_loadu_si128((__m128i*)(brow1+N))); 1231 1232 //int minGrad = std::min(std::min(std::min(gradN, gradS), gradW), gradE); 1233 //int maxGrad = std::max(std::max(std::max(gradN, gradS), gradW), gradE); 1234 __m128i minGrad = _mm_min_epi16(_mm_min_epi16(gradN, gradS), _mm_min_epi16(gradW, gradE)); 1235 __m128i maxGrad = _mm_max_epi16(_mm_max_epi16(gradN, gradS), _mm_max_epi16(gradW, gradE)); 1236 1237 __m128i grad0, grad1; 1238 1239 //int gradNE = brow0[N4+1] + brow1[N4]; 1240 //int gradNE = brow0[N2] + brow0[N2+1] + brow1[N2] + brow1[N2+1]; 1241 grad0 = _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow0+N4+1)), _mm_loadu_si128((__m128i*)(brow1+N4))); 1242 grad1 = _mm_adds_epi16( _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow0+N2)), _mm_loadu_si128((__m128i*)(brow0+N2+1))), 1243 _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow1+N2)), _mm_loadu_si128((__m128i*)(brow1+N2+1)))); 1244 __m128i gradNE = _mm_merge_epi16(grad0, grad1); 1245 1246 //int gradSW = brow1[N4] + brow2[N4-1]; 1247 //int gradSW = brow1[N2] + brow1[N2-1] + brow2[N2] + brow2[N2-1]; 1248 grad0 = _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow2+N4-1)), _mm_loadu_si128((__m128i*)(brow1+N4))); 1249 grad1 = _mm_adds_epi16(_mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow2+N2)), _mm_loadu_si128((__m128i*)(brow2+N2-1))), 1250 _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow1+N2)), _mm_loadu_si128((__m128i*)(brow1+N2-1)))); 1251 __m128i gradSW = _mm_merge_epi16(grad0, grad1); 1252 1253 minGrad = _mm_min_epi16(_mm_min_epi16(minGrad, gradNE), gradSW); 1254 maxGrad = _mm_max_epi16(_mm_max_epi16(maxGrad, gradNE), gradSW); 1255 1256 //int gradNW = brow0[N5-1] + brow1[N5]; 1257 //int gradNW = brow0[N3] + brow0[N3-1] + brow1[N3] + brow1[N3-1]; 1258 grad0 = _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow0+N5-1)), _mm_loadu_si128((__m128i*)(brow1+N5))); 1259 grad1 = _mm_adds_epi16(_mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow0+N3)), _mm_loadu_si128((__m128i*)(brow0+N3-1))), 1260 _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow1+N3)), _mm_loadu_si128((__m128i*)(brow1+N3-1)))); 1261 __m128i gradNW = _mm_merge_epi16(grad0, grad1); 1262 1263 //int gradSE = brow1[N5] + brow2[N5+1]; 1264 //int gradSE = brow1[N3] + brow1[N3+1] + brow2[N3] + brow2[N3+1]; 1265 grad0 = _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow2+N5+1)), _mm_loadu_si128((__m128i*)(brow1+N5))); 1266 grad1 = _mm_adds_epi16(_mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow2+N3)), _mm_loadu_si128((__m128i*)(brow2+N3+1))), 1267 _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow1+N3)), _mm_loadu_si128((__m128i*)(brow1+N3+1)))); 1268 __m128i gradSE = _mm_merge_epi16(grad0, grad1); 1269 1270 minGrad = _mm_min_epi16(_mm_min_epi16(minGrad, gradNW), gradSE); 1271 maxGrad = _mm_max_epi16(_mm_max_epi16(maxGrad, gradNW), gradSE); 1272 1273 //int T = minGrad + maxGrad/2; 1274 __m128i T = _mm_adds_epi16(_mm_max_epi16(_mm_srli_epi16(maxGrad, 1), one), minGrad); 1275 1276 __m128i RGs = z, GRs = z, Bs = z, ng = z; 1277 1278 __m128i x0 = _mm_loadl_u8_s16(srow, +0 ); 1279 __m128i x1 = _mm_loadl_u8_s16(srow, -1 - bstep ); 1280 __m128i x2 = _mm_loadl_u8_s16(srow, -1 - bstep*2); 1281 __m128i x3 = _mm_loadl_u8_s16(srow, - bstep ); 1282 __m128i x4 = _mm_loadl_u8_s16(srow, +1 - bstep*2); 1283 __m128i x5 = _mm_loadl_u8_s16(srow, +1 - bstep ); 1284 __m128i x6 = _mm_loadl_u8_s16(srow, +2 - bstep ); 1285 __m128i x7 = _mm_loadl_u8_s16(srow, +1 ); 1286 __m128i x8 = _mm_loadl_u8_s16(srow, +2 + bstep ); 1287 __m128i x9 = _mm_loadl_u8_s16(srow, +1 + bstep ); 1288 __m128i x10 = _mm_loadl_u8_s16(srow, +1 + bstep*2); 1289 __m128i x11 = _mm_loadl_u8_s16(srow, + bstep ); 1290 __m128i x12 = _mm_loadl_u8_s16(srow, -1 + bstep*2); 1291 __m128i x13 = _mm_loadl_u8_s16(srow, -1 + bstep ); 1292 __m128i x14 = _mm_loadl_u8_s16(srow, -2 + bstep ); 1293 __m128i x15 = _mm_loadl_u8_s16(srow, -1 ); 1294 __m128i x16 = _mm_loadl_u8_s16(srow, -2 - bstep ); 1295 1296 __m128i t0, t1, mask; 1297 1298 // gradN *********************************************** 1299 mask = _mm_cmpgt_epi16(T, gradN); // mask = T>gradN 1300 ng = _mm_sub_epi16(ng, mask); // ng += (T>gradN) 1301 1302 t0 = _mm_slli_epi16(x3, 1); // srow[-bstep]*2 1303 t1 = _mm_adds_epi16(_mm_loadl_u8_s16(srow, -bstep*2), x0); // srow[-bstep*2] + srow[0] 1304 1305 // RGs += (srow[-bstep*2] + srow[0]) * (T>gradN) 1306 RGs = _mm_adds_epi16(RGs, _mm_and_si128(t1, mask)); 1307 // GRs += {srow[-bstep]*2; (srow[-bstep*2-1] + srow[-bstep*2+1])} * (T>gradN) 1308 GRs = _mm_adds_epi16(GRs, _mm_and_si128(_mm_merge_epi16(t0, _mm_adds_epi16(x2,x4)), mask)); 1309 // Bs += {(srow[-bstep-1]+srow[-bstep+1]); srow[-bstep]*2 } * (T>gradN) 1310 Bs = _mm_adds_epi16(Bs, _mm_and_si128(_mm_merge_epi16(_mm_adds_epi16(x1,x5), t0), mask)); 1311 1312 // gradNE ********************************************** 1313 mask = _mm_cmpgt_epi16(T, gradNE); // mask = T>gradNE 1314 ng = _mm_sub_epi16(ng, mask); // ng += (T>gradNE) 1315 1316 t0 = _mm_slli_epi16(x5, 1); // srow[-bstep+1]*2 1317 t1 = _mm_adds_epi16(_mm_loadl_u8_s16(srow, -bstep*2+2), x0); // srow[-bstep*2+2] + srow[0] 1318 1319 // RGs += {(srow[-bstep*2+2] + srow[0]); srow[-bstep+1]*2} * (T>gradNE) 1320 RGs = _mm_adds_epi16(RGs, _mm_and_si128(_mm_merge_epi16(t1, t0), mask)); 1321 // GRs += {brow0[N6+1]; (srow[-bstep*2+1] + srow[1])} * (T>gradNE) 1322 GRs = _mm_adds_epi16(GRs, _mm_and_si128(_mm_merge_epi16(_mm_loadu_si128((__m128i*)(brow0+N6+1)), _mm_adds_epi16(x4,x7)), mask)); 1323 // Bs += {srow[-bstep+1]*2; (srow[-bstep] + srow[-bstep+2])} * (T>gradNE) 1324 Bs = _mm_adds_epi16(Bs, _mm_and_si128(_mm_merge_epi16(t0,_mm_adds_epi16(x3,x6)), mask)); 1325 1326 // gradE *********************************************** 1327 mask = _mm_cmpgt_epi16(T, gradE); // mask = T>gradE 1328 ng = _mm_sub_epi16(ng, mask); // ng += (T>gradE) 1329 1330 t0 = _mm_slli_epi16(x7, 1); // srow[1]*2 1331 t1 = _mm_adds_epi16(_mm_loadl_u8_s16(srow, 2), x0); // srow[2] + srow[0] 1332 1333 // RGs += (srow[2] + srow[0]) * (T>gradE) 1334 RGs = _mm_adds_epi16(RGs, _mm_and_si128(t1, mask)); 1335 // GRs += (srow[1]*2) * (T>gradE) 1336 GRs = _mm_adds_epi16(GRs, _mm_and_si128(t0, mask)); 1337 // Bs += {(srow[-bstep+1]+srow[bstep+1]); (srow[-bstep+2]+srow[bstep+2])} * (T>gradE) 1338 Bs = _mm_adds_epi16(Bs, _mm_and_si128(_mm_merge_epi16(_mm_adds_epi16(x5,x9), _mm_adds_epi16(x6,x8)), mask)); 1339 1340 // gradSE ********************************************** 1341 mask = _mm_cmpgt_epi16(T, gradSE); // mask = T>gradSE 1342 ng = _mm_sub_epi16(ng, mask); // ng += (T>gradSE) 1343 1344 t0 = _mm_slli_epi16(x9, 1); // srow[bstep+1]*2 1345 t1 = _mm_adds_epi16(_mm_loadl_u8_s16(srow, bstep*2+2), x0); // srow[bstep*2+2] + srow[0] 1346 1347 // RGs += {(srow[bstep*2+2] + srow[0]); srow[bstep+1]*2} * (T>gradSE) 1348 RGs = _mm_adds_epi16(RGs, _mm_and_si128(_mm_merge_epi16(t1, t0), mask)); 1349 // GRs += {brow2[N6+1]; (srow[1]+srow[bstep*2+1])} * (T>gradSE) 1350 GRs = _mm_adds_epi16(GRs, _mm_and_si128(_mm_merge_epi16(_mm_loadu_si128((__m128i*)(brow2+N6+1)), _mm_adds_epi16(x7,x10)), mask)); 1351 // Bs += {srow[-bstep+1]*2; (srow[bstep+2]+srow[bstep])} * (T>gradSE) 1352 Bs = _mm_adds_epi16(Bs, _mm_and_si128(_mm_merge_epi16(_mm_slli_epi16(x5, 1), _mm_adds_epi16(x8,x11)), mask)); 1353 1354 // gradS *********************************************** 1355 mask = _mm_cmpgt_epi16(T, gradS); // mask = T>gradS 1356 ng = _mm_sub_epi16(ng, mask); // ng += (T>gradS) 1357 1358 t0 = _mm_slli_epi16(x11, 1); // srow[bstep]*2 1359 t1 = _mm_adds_epi16(_mm_loadl_u8_s16(srow,bstep*2), x0); // srow[bstep*2]+srow[0] 1360 1361 // RGs += (srow[bstep*2]+srow[0]) * (T>gradS) 1362 RGs = _mm_adds_epi16(RGs, _mm_and_si128(t1, mask)); 1363 // GRs += {srow[bstep]*2; (srow[bstep*2+1]+srow[bstep*2-1])} * (T>gradS) 1364 GRs = _mm_adds_epi16(GRs, _mm_and_si128(_mm_merge_epi16(t0, _mm_adds_epi16(x10,x12)), mask)); 1365 // Bs += {(srow[bstep+1]+srow[bstep-1]); srow[bstep]*2} * (T>gradS) 1366 Bs = _mm_adds_epi16(Bs, _mm_and_si128(_mm_merge_epi16(_mm_adds_epi16(x9,x13), t0), mask)); 1367 1368 // gradSW ********************************************** 1369 mask = _mm_cmpgt_epi16(T, gradSW); // mask = T>gradSW 1370 ng = _mm_sub_epi16(ng, mask); // ng += (T>gradSW) 1371 1372 t0 = _mm_slli_epi16(x13, 1); // srow[bstep-1]*2 1373 t1 = _mm_adds_epi16(_mm_loadl_u8_s16(srow, bstep*2-2), x0); // srow[bstep*2-2]+srow[0] 1374 1375 // RGs += {(srow[bstep*2-2]+srow[0]); srow[bstep-1]*2} * (T>gradSW) 1376 RGs = _mm_adds_epi16(RGs, _mm_and_si128(_mm_merge_epi16(t1, t0), mask)); 1377 // GRs += {brow2[N6-1]; (srow[bstep*2-1]+srow[-1])} * (T>gradSW) 1378 GRs = _mm_adds_epi16(GRs, _mm_and_si128(_mm_merge_epi16(_mm_loadu_si128((__m128i*)(brow2+N6-1)), _mm_adds_epi16(x12,x15)), mask)); 1379 // Bs += {srow[bstep-1]*2; (srow[bstep]+srow[bstep-2])} * (T>gradSW) 1380 Bs = _mm_adds_epi16(Bs, _mm_and_si128(_mm_merge_epi16(t0,_mm_adds_epi16(x11,x14)), mask)); 1381 1382 // gradW *********************************************** 1383 mask = _mm_cmpgt_epi16(T, gradW); // mask = T>gradW 1384 ng = _mm_sub_epi16(ng, mask); // ng += (T>gradW) 1385 1386 t0 = _mm_slli_epi16(x15, 1); // srow[-1]*2 1387 t1 = _mm_adds_epi16(_mm_loadl_u8_s16(srow, -2), x0); // srow[-2]+srow[0] 1388 1389 // RGs += (srow[-2]+srow[0]) * (T>gradW) 1390 RGs = _mm_adds_epi16(RGs, _mm_and_si128(t1, mask)); 1391 // GRs += (srow[-1]*2) * (T>gradW) 1392 GRs = _mm_adds_epi16(GRs, _mm_and_si128(t0, mask)); 1393 // Bs += {(srow[-bstep-1]+srow[bstep-1]); (srow[bstep-2]+srow[-bstep-2])} * (T>gradW) 1394 Bs = _mm_adds_epi16(Bs, _mm_and_si128(_mm_merge_epi16(_mm_adds_epi16(x1,x13), _mm_adds_epi16(x14,x16)), mask)); 1395 1396 // gradNW ********************************************** 1397 mask = _mm_cmpgt_epi16(T, gradNW); // mask = T>gradNW 1398 ng = _mm_sub_epi16(ng, mask); // ng += (T>gradNW) 1399 1400 t0 = _mm_slli_epi16(x1, 1); // srow[-bstep-1]*2 1401 t1 = _mm_adds_epi16(_mm_loadl_u8_s16(srow,-bstep*2-2), x0); // srow[-bstep*2-2]+srow[0] 1402 1403 // RGs += {(srow[-bstep*2-2]+srow[0]); srow[-bstep-1]*2} * (T>gradNW) 1404 RGs = _mm_adds_epi16(RGs, _mm_and_si128(_mm_merge_epi16(t1, t0), mask)); 1405 // GRs += {brow0[N6-1]; (srow[-bstep*2-1]+srow[-1])} * (T>gradNW) 1406 GRs = _mm_adds_epi16(GRs, _mm_and_si128(_mm_merge_epi16(_mm_loadu_si128((__m128i*)(brow0+N6-1)), _mm_adds_epi16(x2,x15)), mask)); 1407 // Bs += {srow[-bstep-1]*2; (srow[-bstep]+srow[-bstep-2])} * (T>gradNW) 1408 Bs = _mm_adds_epi16(Bs, _mm_and_si128(_mm_merge_epi16(_mm_slli_epi16(x5, 1),_mm_adds_epi16(x3,x16)), mask)); 1409 1410 __m128 ngf0 = _mm_div_ps(_0_5, _mm_cvtloepi16_ps(ng)); 1411 __m128 ngf1 = _mm_div_ps(_0_5, _mm_cvthiepi16_ps(ng)); 1412 1413 // now interpolate r, g & b 1414 t0 = _mm_subs_epi16(GRs, RGs); 1415 t1 = _mm_subs_epi16(Bs, RGs); 1416 1417 t0 = _mm_add_epi16(x0, _mm_packs_epi32( 1418 _mm_cvtps_epi32(_mm_mul_ps(_mm_cvtloepi16_ps(t0), ngf0)), 1419 _mm_cvtps_epi32(_mm_mul_ps(_mm_cvthiepi16_ps(t0), ngf1)))); 1420 1421 t1 = _mm_add_epi16(x0, _mm_packs_epi32( 1422 _mm_cvtps_epi32(_mm_mul_ps(_mm_cvtloepi16_ps(t1), ngf0)), 1423 _mm_cvtps_epi32(_mm_mul_ps(_mm_cvthiepi16_ps(t1), ngf1)))); 1424 1425 x1 = _mm_merge_epi16(x0, t0); 1426 x2 = _mm_merge_epi16(t0, x0); 1427 1428 uchar R[8], G[8], B[8]; 1429 1430 _mm_storel_epi64(blueIdx ? (__m128i*)B : (__m128i*)R, _mm_packus_epi16(x1, z)); 1431 _mm_storel_epi64((__m128i*)G, _mm_packus_epi16(x2, z)); 1432 _mm_storel_epi64(blueIdx ? (__m128i*)R : (__m128i*)B, _mm_packus_epi16(t1, z)); 1433 1434 for( int j = 0; j < 8; j++, dstrow += 3 ) 1435 { 1436 dstrow[0] = B[j]; dstrow[1] = G[j]; dstrow[2] = R[j]; 1437 } 1438 } 1439 #endif 1440 1441 limit = N - 2; 1442 } 1443 while( i < N - 2 ); 1444 1445 for( i = 0; i < 6; i++ ) 1446 { 1447 dst[dststep*y + 5 - i] = dst[dststep*y + 8 - i]; 1448 dst[dststep*y + (N - 2)*3 + i] = dst[dststep*y + (N - 3)*3 + i]; 1449 } 1450 1451 greenCell0 = !greenCell0; 1452 blueIdx ^= 2; 1453 } 1454 1455 for( i = 0; i < size.width*3; i++ ) 1456 { 1457 dst[i] = dst[i + dststep] = dst[i + dststep*2]; 1458 dst[i + dststep*(size.height-4)] = 1459 dst[i + dststep*(size.height-3)] = 1460 dst[i + dststep*(size.height-2)] = 1461 dst[i + dststep*(size.height-1)] = dst[i + dststep*(size.height-5)]; 1462 } 1463 } 1464 1465 //////////////////////////////// Edge-Aware Demosaicing ////////////////////////////////// 1466 1467 template <typename T, typename SIMDInterpolator> 1468 class Bayer2RGB_EdgeAware_T_Invoker : 1469 public cv::ParallelLoopBody 1470 { 1471 public: 1472 Bayer2RGB_EdgeAware_T_Invoker(const Mat& _src, Mat& _dst, const Size& _size, 1473 int _blue, int _start_with_green) : 1474 ParallelLoopBody(), 1475 src(_src), dst(_dst), size(_size), Blue(_blue), Start_with_green(_start_with_green) 1476 { 1477 } 1478 1479 virtual void operator()(const Range& range) const 1480 { 1481 int dcn = dst.channels(); 1482 int dcn2 = dcn<<1; 1483 int start_with_green = Start_with_green, blue = Blue; 1484 int sstep = int(src.step / src.elemSize1()), dstep = int(dst.step / dst.elemSize1()); 1485 SIMDInterpolator vecOp; 1486 1487 const T* S = src.ptr<T>(range.start + 1) + 1; 1488 T* D = reinterpret_cast<T*>(dst.data + (range.start + 1) * dst.step) + dcn; 1489 1490 if (range.start % 2) 1491 { 1492 start_with_green ^= 1; 1493 blue ^= 1; 1494 } 1495 1496 // to BGR 1497 for (int y = range.start; y < range.end; ++y) 1498 { 1499 int x = 1; 1500 if (start_with_green) 1501 { 1502 D[blue<<1] = (S[-sstep] + S[sstep]) >> 1; 1503 D[1] = S[0]; 1504 D[2-(blue<<1)] = (S[-1] + S[1]) >> 1; 1505 D += dcn; 1506 ++S; 1507 ++x; 1508 } 1509 1510 int delta = vecOp.bayer2RGB_EA(S - sstep - 1, sstep, D, size.width, blue); 1511 x += delta; 1512 S += delta; 1513 D += dcn * delta; 1514 1515 if (blue) 1516 for (; x < size.width; x += 2, S += 2, D += dcn2) 1517 { 1518 D[0] = S[0]; 1519 D[1] = (std::abs(S[-1] - S[1]) > std::abs(S[sstep] - S[-sstep]) ? (S[sstep] + S[-sstep] + 1) : (S[-1] + S[1] + 1)) >> 1; 1520 D[2] = (S[-sstep-1] + S[-sstep+1] + S[sstep-1] + S[sstep+1]) >> 2; 1521 1522 D[3] = (S[0] + S[2] + 1) >> 1; 1523 D[4] = S[1]; 1524 D[5] = (S[-sstep+1] + S[sstep+1] + 1) >> 1; 1525 } 1526 else 1527 for (; x < size.width; x += 2, S += 2, D += dcn2) 1528 { 1529 D[0] = (S[-sstep-1] + S[-sstep+1] + S[sstep-1] + S[sstep+1] + 2) >> 2; 1530 D[1] = (std::abs(S[-1] - S[1]) > std::abs(S[sstep] - S[-sstep]) ? (S[sstep] + S[-sstep] + 1) : (S[-1] + S[1] + 1)) >> 1; 1531 D[2] = S[0]; 1532 1533 D[3] = (S[-sstep+1] + S[sstep+1] + 1) >> 1; 1534 D[4] = S[1]; 1535 D[5] = (S[0] + S[2] + 1) >> 1; 1536 } 1537 1538 if (x <= size.width) 1539 { 1540 D[blue<<1] = (S[-sstep-1] + S[-sstep+1] + S[sstep-1] + S[sstep+1] + 2) >> 2; 1541 D[1] = (std::abs(S[-1] - S[1]) > std::abs(S[sstep] - S[-sstep]) ? (S[sstep] + S[-sstep] + 1) : (S[-1] + S[1] + 1)) >> 1; 1542 D[2-(blue<<1)] = S[0]; 1543 D += dcn; 1544 ++S; 1545 } 1546 1547 for (int i = 0; i < dcn; ++i) 1548 { 1549 D[i] = D[-dcn + i]; 1550 D[-dstep+dcn+i] = D[-dstep+(dcn<<1)+i]; 1551 } 1552 1553 start_with_green ^= 1; 1554 blue ^= 1; 1555 S += 2; 1556 D += dcn2; 1557 } 1558 } 1559 1560 private: 1561 Mat src; 1562 Mat dst; 1563 Size size; 1564 int Blue, Start_with_green; 1565 }; 1566 1567 template <typename T, typename SIMDInterpolator> 1568 static void Bayer2RGB_EdgeAware_T(const Mat& src, Mat& dst, int code) 1569 { 1570 Size size = src.size(); 1571 1572 // for small sizes 1573 if (size.width <= 2 || size.height <= 2) 1574 { 1575 dst = Scalar::all(0); 1576 return; 1577 } 1578 1579 size.width -= 2; 1580 size.height -= 2; 1581 1582 int start_with_green = code == CV_BayerGB2BGR_EA || code == CV_BayerGR2BGR_EA ? 1 : 0; 1583 int blue = code == CV_BayerGB2BGR_EA || code == CV_BayerBG2BGR_EA ? 1 : 0; 1584 1585 if (size.height > 0) 1586 { 1587 Bayer2RGB_EdgeAware_T_Invoker<T, SIMDInterpolator> invoker(src, dst, size, blue, start_with_green); 1588 Range range(0, size.height); 1589 parallel_for_(range, invoker, dst.total()/static_cast<double>(1<<16)); 1590 } 1591 size = dst.size(); 1592 size.width *= dst.channels(); 1593 size_t dstep = dst.step / dst.elemSize1(); 1594 T* firstRow = dst.ptr<T>(); 1595 T* lastRow = dst.ptr<T>() + (size.height-1) * dstep; 1596 1597 if (size.height > 2) 1598 { 1599 for (int x = 0; x < size.width; ++x) 1600 { 1601 firstRow[x] = (firstRow+dstep)[x]; 1602 lastRow[x] = (lastRow-dstep)[x]; 1603 } 1604 } 1605 else 1606 for (int x = 0; x < size.width; ++x) 1607 firstRow[x] = lastRow[x] = 0; 1608 } 1609 1610 } // end namespace cv 1611 1612 ////////////////////////////////////////////////////////////////////////////////////////// 1613 // The main Demosaicing function // 1614 ////////////////////////////////////////////////////////////////////////////////////////// 1615 1616 void cv::demosaicing(InputArray _src, OutputArray _dst, int code, int dcn) 1617 { 1618 Mat src = _src.getMat(), dst; 1619 Size sz = src.size(); 1620 int scn = src.channels(), depth = src.depth(); 1621 1622 CV_Assert(depth == CV_8U || depth == CV_16U); 1623 CV_Assert(!src.empty()); 1624 1625 switch (code) 1626 { 1627 case CV_BayerBG2GRAY: case CV_BayerGB2GRAY: case CV_BayerRG2GRAY: case CV_BayerGR2GRAY: 1628 if (dcn <= 0) 1629 dcn = 1; 1630 CV_Assert( scn == 1 && dcn == 1 ); 1631 1632 _dst.create(sz, CV_MAKETYPE(depth, dcn)); 1633 dst = _dst.getMat(); 1634 1635 if( depth == CV_8U ) 1636 Bayer2Gray_<uchar, SIMDBayerInterpolator_8u>(src, dst, code); 1637 else if( depth == CV_16U ) 1638 Bayer2Gray_<ushort, SIMDBayerStubInterpolator_<ushort> >(src, dst, code); 1639 else 1640 CV_Error(CV_StsUnsupportedFormat, "Bayer->Gray demosaicing only supports 8u and 16u types"); 1641 break; 1642 1643 case CV_BayerBG2BGR: case CV_BayerGB2BGR: case CV_BayerRG2BGR: case CV_BayerGR2BGR: 1644 case CV_BayerBG2BGR_VNG: case CV_BayerGB2BGR_VNG: case CV_BayerRG2BGR_VNG: case CV_BayerGR2BGR_VNG: 1645 { 1646 if (dcn <= 0) 1647 dcn = 3; 1648 CV_Assert( scn == 1 && (dcn == 3 || dcn == 4) ); 1649 1650 _dst.create(sz, CV_MAKE_TYPE(depth, dcn)); 1651 Mat dst_ = _dst.getMat(); 1652 1653 if( code == CV_BayerBG2BGR || code == CV_BayerGB2BGR || 1654 code == CV_BayerRG2BGR || code == CV_BayerGR2BGR ) 1655 { 1656 if( depth == CV_8U ) 1657 Bayer2RGB_<uchar, SIMDBayerInterpolator_8u>(src, dst_, code); 1658 else if( depth == CV_16U ) 1659 Bayer2RGB_<ushort, SIMDBayerStubInterpolator_<ushort> >(src, dst_, code); 1660 else 1661 CV_Error(CV_StsUnsupportedFormat, "Bayer->RGB demosaicing only supports 8u and 16u types"); 1662 } 1663 else 1664 { 1665 CV_Assert( depth == CV_8U ); 1666 Bayer2RGB_VNG_8u(src, dst_, code); 1667 } 1668 } 1669 break; 1670 1671 case CV_BayerBG2BGR_EA: case CV_BayerGB2BGR_EA: case CV_BayerRG2BGR_EA: case CV_BayerGR2BGR_EA: 1672 if (dcn <= 0) 1673 dcn = 3; 1674 1675 CV_Assert(scn == 1 && dcn == 3); 1676 _dst.create(sz, CV_MAKETYPE(depth, dcn)); 1677 dst = _dst.getMat(); 1678 1679 if (depth == CV_8U) 1680 Bayer2RGB_EdgeAware_T<uchar, SIMDBayerInterpolator_8u>(src, dst, code); 1681 else if (depth == CV_16U) 1682 Bayer2RGB_EdgeAware_T<ushort, SIMDBayerStubInterpolator_<ushort> >(src, dst, code); 1683 else 1684 CV_Error(CV_StsUnsupportedFormat, "Bayer->RGB Edge-Aware demosaicing only currently supports 8u and 16u types"); 1685 1686 break; 1687 1688 default: 1689 CV_Error( CV_StsBadFlag, "Unknown / unsupported color conversion code" ); 1690 } 1691 } 1692