Home | History | Annotate | Download | only in cuda
      1 /*M///////////////////////////////////////////////////////////////////////////////////////
      2 //
      3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
      4 //
      5 //  By downloading, copying, installing or using the software you agree to this license.
      6 //  If you do not agree to this license, do not download, install,
      7 //  copy or use the software.
      8 //
      9 //
     10 //                           License Agreement
     11 //                For Open Source Computer Vision Library
     12 //
     13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
     14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
     15 // Third party copyrights are property of their respective owners.
     16 //
     17 // Redistribution and use in source and binary forms, with or without modification,
     18 // are permitted provided that the following conditions are met:
     19 //
     20 //   * Redistribution's of source code must retain the above copyright notice,
     21 //     this list of conditions and the following disclaimer.
     22 //
     23 //   * Redistribution's in binary form must reproduce the above copyright notice,
     24 //     this list of conditions and the following disclaimer in the documentation
     25 //     and/or other materials provided with the distribution.
     26 //
     27 //   * The name of the copyright holders may not be used to endorse or promote products
     28 //     derived from this software without specific prior written permission.
     29 //
     30 // This software is provided by the copyright holders and contributors "as is" and
     31 // any express or implied warranties, including, but not limited to, the implied
     32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
     33 // In no event shall the Intel Corporation or contributors be liable for any direct,
     34 // indirect, incidental, special, exemplary, or consequential damages
     35 // (including, but not limited to, procurement of substitute goods or services;
     36 // loss of use, data, or profits; or business interruption) however caused
     37 // and on any theory of liability, whether in contract, strict liability,
     38 // or tort (including negligence or otherwise) arising in any way out of
     39 // the use of this software, even if advised of the possibility of such damage.
     40 //
     41 //M*/
     42 
     43 #if !defined CUDA_DISABLER
     44 
     45 #include "opencv2/core/cuda/common.hpp"
     46 #include "opencv2/core/cuda/vec_traits.hpp"
     47 #include "opencv2/core/cuda/vec_math.hpp"
     48 #include "opencv2/core/cuda/limits.hpp"
     49 #include "opencv2/core/cuda/color.hpp"
     50 #include "opencv2/core/cuda/saturate_cast.hpp"
     51 
     52 namespace cv { namespace cuda { namespace device
     53 {
     54     template <typename T> struct Bayer2BGR;
     55 
     56     template <> struct Bayer2BGR<uchar>
     57     {
     58         uchar3 res0;
     59         uchar3 res1;
     60         uchar3 res2;
     61         uchar3 res3;
     62 
     63         __device__ void apply(const PtrStepSzb& src, int s_x, int s_y, bool blue_last, bool start_with_green)
     64         {
     65             uchar4 patch[3][3];
     66             patch[0][1] = ((const uchar4*) src.ptr(s_y - 1))[s_x];
     67             patch[0][0] = ((const uchar4*) src.ptr(s_y - 1))[::max(s_x - 1, 0)];
     68             patch[0][2] = ((const uchar4*) src.ptr(s_y - 1))[::min(s_x + 1, ((src.cols + 3) >> 2) - 1)];
     69 
     70             patch[1][1] = ((const uchar4*) src.ptr(s_y))[s_x];
     71             patch[1][0] = ((const uchar4*) src.ptr(s_y))[::max(s_x - 1, 0)];
     72             patch[1][2] = ((const uchar4*) src.ptr(s_y))[::min(s_x + 1, ((src.cols + 3) >> 2) - 1)];
     73 
     74             patch[2][1] = ((const uchar4*) src.ptr(s_y + 1))[s_x];
     75             patch[2][0] = ((const uchar4*) src.ptr(s_y + 1))[::max(s_x - 1, 0)];
     76             patch[2][2] = ((const uchar4*) src.ptr(s_y + 1))[::min(s_x + 1, ((src.cols + 3) >> 2) - 1)];
     77 
     78             if ((s_y & 1) ^ start_with_green)
     79             {
     80                 const int t0 = (patch[0][1].x + patch[2][1].x + 1) >> 1;
     81                 const int t1 = (patch[1][0].w + patch[1][1].y + 1) >> 1;
     82 
     83                 const int t2 = (patch[0][1].x + patch[0][1].z + patch[2][1].x + patch[2][1].z + 2) >> 2;
     84                 const int t3 = (patch[0][1].y + patch[1][1].x + patch[1][1].z + patch[2][1].y + 2) >> 2;
     85 
     86                 const int t4 = (patch[0][1].z + patch[2][1].z + 1) >> 1;
     87                 const int t5 = (patch[1][1].y + patch[1][1].w + 1) >> 1;
     88 
     89                 const int t6 = (patch[0][1].z + patch[0][2].x + patch[2][1].z + patch[2][2].x + 2) >> 2;
     90                 const int t7 = (patch[0][1].w + patch[1][1].z + patch[1][2].x + patch[2][1].w + 2) >> 2;
     91 
     92                 if ((s_y & 1) ^ blue_last)
     93                 {
     94                     res0.x = t1;
     95                     res0.y = patch[1][1].x;
     96                     res0.z = t0;
     97 
     98                     res1.x = patch[1][1].y;
     99                     res1.y = t3;
    100                     res1.z = t2;
    101 
    102                     res2.x = t5;
    103                     res2.y = patch[1][1].z;
    104                     res2.z = t4;
    105 
    106                     res3.x = patch[1][1].w;
    107                     res3.y = t7;
    108                     res3.z = t6;
    109                 }
    110                 else
    111                 {
    112                     res0.x = t0;
    113                     res0.y = patch[1][1].x;
    114                     res0.z = t1;
    115 
    116                     res1.x = t2;
    117                     res1.y = t3;
    118                     res1.z = patch[1][1].y;
    119 
    120                     res2.x = t4;
    121                     res2.y = patch[1][1].z;
    122                     res2.z = t5;
    123 
    124                     res3.x = t6;
    125                     res3.y = t7;
    126                     res3.z = patch[1][1].w;
    127                 }
    128             }
    129             else
    130             {
    131                 const int t0 = (patch[0][0].w + patch[0][1].y + patch[2][0].w + patch[2][1].y + 2) >> 2;
    132                 const int t1 = (patch[0][1].x + patch[1][0].w + patch[1][1].y + patch[2][1].x + 2) >> 2;
    133 
    134                 const int t2 = (patch[0][1].y + patch[2][1].y + 1) >> 1;
    135                 const int t3 = (patch[1][1].x + patch[1][1].z + 1) >> 1;
    136 
    137                 const int t4 = (patch[0][1].y + patch[0][1].w + patch[2][1].y + patch[2][1].w + 2) >> 2;
    138                 const int t5 = (patch[0][1].z + patch[1][1].y + patch[1][1].w + patch[2][1].z + 2) >> 2;
    139 
    140                 const int t6 = (patch[0][1].w + patch[2][1].w + 1) >> 1;
    141                 const int t7 = (patch[1][1].z + patch[1][2].x + 1) >> 1;
    142 
    143                 if ((s_y & 1) ^ blue_last)
    144                 {
    145                     res0.x = patch[1][1].x;
    146                     res0.y = t1;
    147                     res0.z = t0;
    148 
    149                     res1.x = t3;
    150                     res1.y = patch[1][1].y;
    151                     res1.z = t2;
    152 
    153                     res2.x = patch[1][1].z;
    154                     res2.y = t5;
    155                     res2.z = t4;
    156 
    157                     res3.x = t7;
    158                     res3.y = patch[1][1].w;
    159                     res3.z = t6;
    160                 }
    161                 else
    162                 {
    163                     res0.x = t0;
    164                     res0.y = t1;
    165                     res0.z = patch[1][1].x;
    166 
    167                     res1.x = t2;
    168                     res1.y = patch[1][1].y;
    169                     res1.z = t3;
    170 
    171                     res2.x = t4;
    172                     res2.y = t5;
    173                     res2.z = patch[1][1].z;
    174 
    175                     res3.x = t6;
    176                     res3.y = patch[1][1].w;
    177                     res3.z = t7;
    178                 }
    179             }
    180         }
    181     };
    182 
    183     template <typename D> __device__ __forceinline__ D toDst(const uchar3& pix);
    184     template <> __device__ __forceinline__ uchar toDst<uchar>(const uchar3& pix)
    185     {
    186         typename bgr_to_gray_traits<uchar>::functor_type f = bgr_to_gray_traits<uchar>::create_functor();
    187         return f(pix);
    188     }
    189     template <> __device__ __forceinline__ uchar3 toDst<uchar3>(const uchar3& pix)
    190     {
    191         return pix;
    192     }
    193     template <> __device__ __forceinline__ uchar4 toDst<uchar4>(const uchar3& pix)
    194     {
    195         return make_uchar4(pix.x, pix.y, pix.z, 255);
    196     }
    197 
    198     template <typename D>
    199     __global__ void Bayer2BGR_8u(const PtrStepSzb src, PtrStep<D> dst, const bool blue_last, const bool start_with_green)
    200     {
    201         const int s_x = blockIdx.x * blockDim.x + threadIdx.x;
    202         int s_y = blockIdx.y * blockDim.y + threadIdx.y;
    203 
    204         if (s_y >= src.rows || (s_x << 2) >= src.cols)
    205             return;
    206 
    207         s_y = ::min(::max(s_y, 1), src.rows - 2);
    208 
    209         Bayer2BGR<uchar> bayer;
    210         bayer.apply(src, s_x, s_y, blue_last, start_with_green);
    211 
    212         const int d_x = (blockIdx.x * blockDim.x + threadIdx.x) << 2;
    213         const int d_y = blockIdx.y * blockDim.y + threadIdx.y;
    214 
    215         dst(d_y, d_x) = toDst<D>(bayer.res0);
    216         if (d_x + 1 < src.cols)
    217             dst(d_y, d_x + 1) = toDst<D>(bayer.res1);
    218         if (d_x + 2 < src.cols)
    219             dst(d_y, d_x + 2) = toDst<D>(bayer.res2);
    220         if (d_x + 3 < src.cols)
    221             dst(d_y, d_x + 3) = toDst<D>(bayer.res3);
    222     }
    223 
    224     template <> struct Bayer2BGR<ushort>
    225     {
    226         ushort3 res0;
    227         ushort3 res1;
    228 
    229         __device__ void apply(const PtrStepSzb& src, int s_x, int s_y, bool blue_last, bool start_with_green)
    230         {
    231             ushort2 patch[3][3];
    232             patch[0][1] = ((const ushort2*) src.ptr(s_y - 1))[s_x];
    233             patch[0][0] = ((const ushort2*) src.ptr(s_y - 1))[::max(s_x - 1, 0)];
    234             patch[0][2] = ((const ushort2*) src.ptr(s_y - 1))[::min(s_x + 1, ((src.cols + 1) >> 1) - 1)];
    235 
    236             patch[1][1] = ((const ushort2*) src.ptr(s_y))[s_x];
    237             patch[1][0] = ((const ushort2*) src.ptr(s_y))[::max(s_x - 1, 0)];
    238             patch[1][2] = ((const ushort2*) src.ptr(s_y))[::min(s_x + 1, ((src.cols + 1) >> 1) - 1)];
    239 
    240             patch[2][1] = ((const ushort2*) src.ptr(s_y + 1))[s_x];
    241             patch[2][0] = ((const ushort2*) src.ptr(s_y + 1))[::max(s_x - 1, 0)];
    242             patch[2][2] = ((const ushort2*) src.ptr(s_y + 1))[::min(s_x + 1, ((src.cols + 1) >> 1) - 1)];
    243 
    244             if ((s_y & 1) ^ start_with_green)
    245             {
    246                 const int t0 = (patch[0][1].x + patch[2][1].x + 1) >> 1;
    247                 const int t1 = (patch[1][0].y + patch[1][1].y + 1) >> 1;
    248 
    249                 const int t2 = (patch[0][1].x + patch[0][2].x + patch[2][1].x + patch[2][2].x + 2) >> 2;
    250                 const int t3 = (patch[0][1].y + patch[1][1].x + patch[1][2].x + patch[2][1].y + 2) >> 2;
    251 
    252                 if ((s_y & 1) ^ blue_last)
    253                 {
    254                     res0.x = t1;
    255                     res0.y = patch[1][1].x;
    256                     res0.z = t0;
    257 
    258                     res1.x = patch[1][1].y;
    259                     res1.y = t3;
    260                     res1.z = t2;
    261                 }
    262                 else
    263                 {
    264                     res0.x = t0;
    265                     res0.y = patch[1][1].x;
    266                     res0.z = t1;
    267 
    268                     res1.x = t2;
    269                     res1.y = t3;
    270                     res1.z = patch[1][1].y;
    271                 }
    272             }
    273             else
    274             {
    275                 const int t0 = (patch[0][0].y + patch[0][1].y + patch[2][0].y + patch[2][1].y + 2) >> 2;
    276                 const int t1 = (patch[0][1].x + patch[1][0].y + patch[1][1].y + patch[2][1].x + 2) >> 2;
    277 
    278                 const int t2 = (patch[0][1].y + patch[2][1].y + 1) >> 1;
    279                 const int t3 = (patch[1][1].x + patch[1][2].x + 1) >> 1;
    280 
    281                 if ((s_y & 1) ^ blue_last)
    282                 {
    283                     res0.x = patch[1][1].x;
    284                     res0.y = t1;
    285                     res0.z = t0;
    286 
    287                     res1.x = t3;
    288                     res1.y = patch[1][1].y;
    289                     res1.z = t2;
    290                 }
    291                 else
    292                 {
    293                     res0.x = t0;
    294                     res0.y = t1;
    295                     res0.z = patch[1][1].x;
    296 
    297                     res1.x = t2;
    298                     res1.y = patch[1][1].y;
    299                     res1.z = t3;
    300                 }
    301             }
    302         }
    303     };
    304 
    305     template <typename D> __device__ __forceinline__ D toDst(const ushort3& pix);
    306     template <> __device__ __forceinline__ ushort toDst<ushort>(const ushort3& pix)
    307     {
    308         typename bgr_to_gray_traits<ushort>::functor_type f = bgr_to_gray_traits<ushort>::create_functor();
    309         return f(pix);
    310     }
    311     template <> __device__ __forceinline__ ushort3 toDst<ushort3>(const ushort3& pix)
    312     {
    313         return pix;
    314     }
    315     template <> __device__ __forceinline__ ushort4 toDst<ushort4>(const ushort3& pix)
    316     {
    317         return make_ushort4(pix.x, pix.y, pix.z, numeric_limits<ushort>::max());
    318     }
    319 
    320     template <typename D>
    321     __global__ void Bayer2BGR_16u(const PtrStepSzb src, PtrStep<D> dst, const bool blue_last, const bool start_with_green)
    322     {
    323         const int s_x = blockIdx.x * blockDim.x + threadIdx.x;
    324         int s_y = blockIdx.y * blockDim.y + threadIdx.y;
    325 
    326         if (s_y >= src.rows || (s_x << 1) >= src.cols)
    327             return;
    328 
    329         s_y = ::min(::max(s_y, 1), src.rows - 2);
    330 
    331         Bayer2BGR<ushort> bayer;
    332         bayer.apply(src, s_x, s_y, blue_last, start_with_green);
    333 
    334         const int d_x = (blockIdx.x * blockDim.x + threadIdx.x) << 1;
    335         const int d_y = blockIdx.y * blockDim.y + threadIdx.y;
    336 
    337         dst(d_y, d_x) = toDst<D>(bayer.res0);
    338         if (d_x + 1 < src.cols)
    339             dst(d_y, d_x + 1) = toDst<D>(bayer.res1);
    340     }
    341 
    342     template <int cn>
    343     void Bayer2BGR_8u_gpu(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream)
    344     {
    345         typedef typename TypeVec<uchar, cn>::vec_type dst_t;
    346 
    347         const dim3 block(32, 8);
    348         const dim3 grid(divUp(src.cols, 4 * block.x), divUp(src.rows, block.y));
    349 
    350         cudaSafeCall( cudaFuncSetCacheConfig(Bayer2BGR_8u<dst_t>, cudaFuncCachePreferL1) );
    351 
    352         Bayer2BGR_8u<dst_t><<<grid, block, 0, stream>>>(src, (PtrStepSz<dst_t>)dst, blue_last, start_with_green);
    353         cudaSafeCall( cudaGetLastError() );
    354 
    355         if (stream == 0)
    356             cudaSafeCall( cudaDeviceSynchronize() );
    357     }
    358 
    359     template <int cn>
    360     void Bayer2BGR_16u_gpu(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream)
    361     {
    362         typedef typename TypeVec<ushort, cn>::vec_type dst_t;
    363 
    364         const dim3 block(32, 8);
    365         const dim3 grid(divUp(src.cols, 2 * block.x), divUp(src.rows, block.y));
    366 
    367         cudaSafeCall( cudaFuncSetCacheConfig(Bayer2BGR_16u<dst_t>, cudaFuncCachePreferL1) );
    368 
    369         Bayer2BGR_16u<dst_t><<<grid, block, 0, stream>>>(src, (PtrStepSz<dst_t>)dst, blue_last, start_with_green);
    370         cudaSafeCall( cudaGetLastError() );
    371 
    372         if (stream == 0)
    373             cudaSafeCall( cudaDeviceSynchronize() );
    374     }
    375 
    376     template void Bayer2BGR_8u_gpu<1>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
    377     template void Bayer2BGR_8u_gpu<3>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
    378     template void Bayer2BGR_8u_gpu<4>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
    379 
    380     template void Bayer2BGR_16u_gpu<1>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
    381     template void Bayer2BGR_16u_gpu<3>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
    382     template void Bayer2BGR_16u_gpu<4>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
    383 
    384     //////////////////////////////////////////////////////////////
    385     // Bayer Demosaicing (Malvar, He, and Cutler)
    386     //
    387     // by Morgan McGuire, Williams College
    388     // http://graphics.cs.williams.edu/papers/BayerJGT09/#shaders
    389     //
    390     // ported to CUDA
    391 
    392     texture<uchar, cudaTextureType2D, cudaReadModeElementType> sourceTex(false, cudaFilterModePoint, cudaAddressModeClamp);
    393 
    394     template <typename DstType>
    395     __global__ void MHCdemosaic(PtrStepSz<DstType> dst, const int2 sourceOffset, const int2 firstRed)
    396     {
    397         const float   kAx = -1.0f / 8.0f,     kAy = -1.5f / 8.0f,     kAz =  0.5f / 8.0f    /*kAw = -1.0f / 8.0f*/;
    398         const float   kBx =  2.0f / 8.0f,   /*kBy =  0.0f / 8.0f,*/ /*kBz =  0.0f / 8.0f,*/   kBw =  4.0f / 8.0f  ;
    399         const float   kCx =  4.0f / 8.0f,     kCy =  6.0f / 8.0f,     kCz =  5.0f / 8.0f    /*kCw =  5.0f / 8.0f*/;
    400         const float /*kDx =  0.0f / 8.0f,*/   kDy =  2.0f / 8.0f,     kDz = -1.0f / 8.0f    /*kDw = -1.0f / 8.0f*/;
    401         const float   kEx = -1.0f / 8.0f,     kEy = -1.5f / 8.0f,   /*kEz = -1.0f / 8.0f,*/   kEw =  0.5f / 8.0f  ;
    402         const float   kFx =  2.0f / 8.0f,   /*kFy =  0.0f / 8.0f,*/   kFz =  4.0f / 8.0f    /*kFw =  0.0f / 8.0f*/;
    403 
    404         const int x = blockIdx.x * blockDim.x + threadIdx.x;
    405         const int y = blockIdx.y * blockDim.y + threadIdx.y;
    406 
    407         if (x == 0 || x >= dst.cols - 1 || y == 0 || y >= dst.rows - 1)
    408             return;
    409 
    410         int2 center;
    411         center.x = x + sourceOffset.x;
    412         center.y = y + sourceOffset.y;
    413 
    414         int4 xCoord;
    415         xCoord.x = center.x - 2;
    416         xCoord.y = center.x - 1;
    417         xCoord.z = center.x + 1;
    418         xCoord.w = center.x + 2;
    419 
    420         int4 yCoord;
    421         yCoord.x = center.y - 2;
    422         yCoord.y = center.y - 1;
    423         yCoord.z = center.y + 1;
    424         yCoord.w = center.y + 2;
    425 
    426         float C = tex2D(sourceTex, center.x, center.y); // ( 0, 0)
    427 
    428         float4 Dvec;
    429         Dvec.x = tex2D(sourceTex, xCoord.y, yCoord.y); // (-1,-1)
    430         Dvec.y = tex2D(sourceTex, xCoord.y, yCoord.z); // (-1, 1)
    431         Dvec.z = tex2D(sourceTex, xCoord.z, yCoord.y); // ( 1,-1)
    432         Dvec.w = tex2D(sourceTex, xCoord.z, yCoord.z); // ( 1, 1)
    433 
    434         float4 value;
    435         value.x = tex2D(sourceTex, center.x, yCoord.x); // ( 0,-2) A0
    436         value.y = tex2D(sourceTex, center.x, yCoord.y); // ( 0,-1) B0
    437         value.z = tex2D(sourceTex, xCoord.x, center.y); // (-2, 0) E0
    438         value.w = tex2D(sourceTex, xCoord.y, center.y); // (-1, 0) F0
    439 
    440         // (A0 + A1), (B0 + B1), (E0 + E1), (F0 + F1)
    441         value.x += tex2D(sourceTex, center.x, yCoord.w); // ( 0, 2) A1
    442         value.y += tex2D(sourceTex, center.x, yCoord.z); // ( 0, 1) B1
    443         value.z += tex2D(sourceTex, xCoord.w, center.y); // ( 2, 0) E1
    444         value.w += tex2D(sourceTex, xCoord.z, center.y); // ( 1, 0) F1
    445 
    446         float4 PATTERN;
    447         PATTERN.x = kCx * C;
    448         PATTERN.y = kCy * C;
    449         PATTERN.z = kCz * C;
    450         PATTERN.w = PATTERN.z;
    451 
    452         float D = Dvec.x + Dvec.y + Dvec.z + Dvec.w;
    453 
    454         // There are five filter patterns (identity, cross, checker,
    455         // theta, phi). Precompute the terms from all of them and then
    456         // use swizzles to assign to color channels.
    457         //
    458         // Channel Matches
    459         // x cross (e.g., EE G)
    460         // y checker (e.g., EE B)
    461         // z theta (e.g., EO R)
    462         // w phi (e.g., EO B)
    463 
    464         #define A value.x  // A0 + A1
    465         #define B value.y  // B0 + B1
    466         #define E value.z  // E0 + E1
    467         #define F value.w  // F0 + F1
    468 
    469         float3 temp;
    470 
    471         // PATTERN.yzw += (kD.yz * D).xyy;
    472         temp.x = kDy * D;
    473         temp.y = kDz * D;
    474         PATTERN.y += temp.x;
    475         PATTERN.z += temp.y;
    476         PATTERN.w += temp.y;
    477 
    478         // PATTERN += (kA.xyz * A).xyzx;
    479         temp.x = kAx * A;
    480         temp.y = kAy * A;
    481         temp.z = kAz * A;
    482         PATTERN.x += temp.x;
    483         PATTERN.y += temp.y;
    484         PATTERN.z += temp.z;
    485         PATTERN.w += temp.x;
    486 
    487         // PATTERN += (kE.xyw * E).xyxz;
    488         temp.x = kEx * E;
    489         temp.y = kEy * E;
    490         temp.z = kEw * E;
    491         PATTERN.x += temp.x;
    492         PATTERN.y += temp.y;
    493         PATTERN.z += temp.x;
    494         PATTERN.w += temp.z;
    495 
    496         // PATTERN.xw += kB.xw * B;
    497         PATTERN.x += kBx * B;
    498         PATTERN.w += kBw * B;
    499 
    500         // PATTERN.xz += kF.xz * F;
    501         PATTERN.x += kFx * F;
    502         PATTERN.z += kFz * F;
    503 
    504         // Determine which of four types of pixels we are on.
    505         int2 alternate;
    506         alternate.x = (x + firstRed.x) % 2;
    507         alternate.y = (y + firstRed.y) % 2;
    508 
    509         // in BGR sequence;
    510         uchar3 pixelColor =
    511             (alternate.y == 0) ?
    512                 ((alternate.x == 0) ?
    513                     make_uchar3(saturate_cast<uchar>(PATTERN.y), saturate_cast<uchar>(PATTERN.x), saturate_cast<uchar>(C)) :
    514                     make_uchar3(saturate_cast<uchar>(PATTERN.w), saturate_cast<uchar>(C), saturate_cast<uchar>(PATTERN.z))) :
    515                 ((alternate.x == 0) ?
    516                     make_uchar3(saturate_cast<uchar>(PATTERN.z), saturate_cast<uchar>(C), saturate_cast<uchar>(PATTERN.w)) :
    517                     make_uchar3(saturate_cast<uchar>(C), saturate_cast<uchar>(PATTERN.x), saturate_cast<uchar>(PATTERN.y)));
    518 
    519         dst(y, x) = toDst<DstType>(pixelColor);
    520     }
    521 
    522     template <int cn>
    523     void MHCdemosaic(PtrStepSzb src, int2 sourceOffset, PtrStepSzb dst, int2 firstRed, cudaStream_t stream)
    524     {
    525         typedef typename TypeVec<uchar, cn>::vec_type dst_t;
    526 
    527         const dim3 block(32, 8);
    528         const dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y));
    529 
    530         bindTexture(&sourceTex, src);
    531 
    532         MHCdemosaic<dst_t><<<grid, block, 0, stream>>>((PtrStepSz<dst_t>)dst, sourceOffset, firstRed);
    533         cudaSafeCall( cudaGetLastError() );
    534 
    535         if (stream == 0)
    536             cudaSafeCall( cudaDeviceSynchronize() );
    537     }
    538 
    539     template void MHCdemosaic<1>(PtrStepSzb src, int2 sourceOffset, PtrStepSzb dst, int2 firstRed, cudaStream_t stream);
    540     template void MHCdemosaic<3>(PtrStepSzb src, int2 sourceOffset, PtrStepSzb dst, int2 firstRed, cudaStream_t stream);
    541     template void MHCdemosaic<4>(PtrStepSzb src, int2 sourceOffset, PtrStepSzb dst, int2 firstRed, cudaStream_t stream);
    542 }}}
    543 
    544 #endif /* CUDA_DISABLER */
    545