Home | History | Annotate | Download | only in cuda
      1 /*M///////////////////////////////////////////////////////////////////////////////////////
      2 //
      3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
      4 //
      5 //  By downloading, copying, installing or using the software you agree to this license.
      6 //  If you do not agree to this license, do not download, install,
      7 //  copy or use the software.
      8 //
      9 //
     10 //                           License Agreement
     11 //                For Open Source Computer Vision Library
     12 //
     13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
     14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
     15 // Third party copyrights are property of their respective owners.
     16 //
     17 // Redistribution and use in source and binary forms, with or without modification,
     18 // are permitted provided that the following conditions are met:
     19 //
     20 //   * Redistribution's of source code must retain the above copyright notice,
     21 //     this list of conditions and the following disclaimer.
     22 //
     23 //   * Redistribution's in binary form must reproduce the above copyright notice,
     24 //     this list of conditions and the following disclaimer in the documentation
     25 //     and/or other materials provided with the distribution.
     26 //
     27 //   * The name of the copyright holders may not be used to endorse or promote products
     28 //     derived from this software without specific prior written permission.
     29 //
     30 // This software is provided by the copyright holders and contributors "as is" and
     31 // any express or implied warranties, including, but not limited to, the implied
     32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
     33 // In no event shall the Intel Corporation or contributors be liable for any direct,
     34 // indirect, incidental, special, exemplary, or consequential damages
     35 // (including, but not limited to, procurement of substitute goods or services;
     36 // loss of use, data, or profits; or business interruption) however caused
     37 // and on any theory of liability, whether in contract, strict liability,
     38 // or tort (including negligence or otherwise) arising in any way out of
     39 // the use of this software, even if advised of the possibility of such damage.
     40 //
     41 //M*/
     42 
     43 #if !defined CUDA_DISABLER
     44 
     45 #include "opencv2/core/cuda/common.hpp"
     46 #include "opencv2/core/cuda/border_interpolate.hpp"
     47 #include "opencv2/core/cuda/limits.hpp"
     48 
     49 using namespace cv::cuda;
     50 using namespace cv::cuda::device;
     51 
     52 ////////////////////////////////////////////////////////////
     53 // centeredGradient
     54 
     55 namespace tvl1flow
     56 {
     57     __global__ void centeredGradientKernel(const PtrStepSzf src, PtrStepf dx, PtrStepf dy)
     58     {
     59         const int x = blockIdx.x * blockDim.x + threadIdx.x;
     60         const int y = blockIdx.y * blockDim.y + threadIdx.y;
     61 
     62         if (x >= src.cols || y >= src.rows)
     63             return;
     64 
     65         dx(y, x) = 0.5f * (src(y, ::min(x + 1, src.cols - 1)) - src(y, ::max(x - 1, 0)));
     66         dy(y, x) = 0.5f * (src(::min(y + 1, src.rows - 1), x) - src(::max(y - 1, 0), x));
     67     }
     68 
     69     void centeredGradient(PtrStepSzf src, PtrStepSzf dx, PtrStepSzf dy, cudaStream_t stream)
     70     {
     71         const dim3 block(32, 8);
     72         const dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y));
     73 
     74         centeredGradientKernel<<<grid, block, 0, stream>>>(src, dx, dy);
     75         cudaSafeCall( cudaGetLastError() );
     76 
     77         if (!stream)
     78             cudaSafeCall( cudaDeviceSynchronize() );
     79     }
     80 }
     81 
     82 ////////////////////////////////////////////////////////////
     83 // warpBackward
     84 
     85 namespace tvl1flow
     86 {
     87     static __device__ __forceinline__ float bicubicCoeff(float x_)
     88     {
     89         float x = fabsf(x_);
     90         if (x <= 1.0f)
     91         {
     92             return x * x * (1.5f * x - 2.5f) + 1.0f;
     93         }
     94         else if (x < 2.0f)
     95         {
     96             return x * (x * (-0.5f * x + 2.5f) - 4.0f) + 2.0f;
     97         }
     98         else
     99         {
    100             return 0.0f;
    101         }
    102     }
    103 
    104     texture<float, cudaTextureType2D, cudaReadModeElementType> tex_I1 (false, cudaFilterModePoint, cudaAddressModeClamp);
    105     texture<float, cudaTextureType2D, cudaReadModeElementType> tex_I1x(false, cudaFilterModePoint, cudaAddressModeClamp);
    106     texture<float, cudaTextureType2D, cudaReadModeElementType> tex_I1y(false, cudaFilterModePoint, cudaAddressModeClamp);
    107 
    108     __global__ void warpBackwardKernel(const PtrStepSzf I0, const PtrStepf u1, const PtrStepf u2, PtrStepf I1w, PtrStepf I1wx, PtrStepf I1wy, PtrStepf grad, PtrStepf rho)
    109     {
    110         const int x = blockIdx.x * blockDim.x + threadIdx.x;
    111         const int y = blockIdx.y * blockDim.y + threadIdx.y;
    112 
    113         if (x >= I0.cols || y >= I0.rows)
    114             return;
    115 
    116         const float u1Val = u1(y, x);
    117         const float u2Val = u2(y, x);
    118 
    119         const float wx = x + u1Val;
    120         const float wy = y + u2Val;
    121 
    122         const int xmin = ::ceilf(wx - 2.0f);
    123         const int xmax = ::floorf(wx + 2.0f);
    124 
    125         const int ymin = ::ceilf(wy - 2.0f);
    126         const int ymax = ::floorf(wy + 2.0f);
    127 
    128         float sum  = 0.0f;
    129         float sumx = 0.0f;
    130         float sumy = 0.0f;
    131         float wsum = 0.0f;
    132 
    133         for (int cy = ymin; cy <= ymax; ++cy)
    134         {
    135             for (int cx = xmin; cx <= xmax; ++cx)
    136             {
    137                 const float w = bicubicCoeff(wx - cx) * bicubicCoeff(wy - cy);
    138 
    139                 sum  += w * tex2D(tex_I1 , cx, cy);
    140                 sumx += w * tex2D(tex_I1x, cx, cy);
    141                 sumy += w * tex2D(tex_I1y, cx, cy);
    142 
    143                 wsum += w;
    144             }
    145         }
    146 
    147         const float coeff = 1.0f / wsum;
    148 
    149         const float I1wVal  = sum  * coeff;
    150         const float I1wxVal = sumx * coeff;
    151         const float I1wyVal = sumy * coeff;
    152 
    153         I1w(y, x)  = I1wVal;
    154         I1wx(y, x) = I1wxVal;
    155         I1wy(y, x) = I1wyVal;
    156 
    157         const float Ix2 = I1wxVal * I1wxVal;
    158         const float Iy2 = I1wyVal * I1wyVal;
    159 
    160         // store the |Grad(I1)|^2
    161         grad(y, x) = Ix2 + Iy2;
    162 
    163         // compute the constant part of the rho function
    164         const float I0Val = I0(y, x);
    165         rho(y, x) = I1wVal - I1wxVal * u1Val - I1wyVal * u2Val - I0Val;
    166     }
    167 
    168     void warpBackward(PtrStepSzf I0, PtrStepSzf I1, PtrStepSzf I1x, PtrStepSzf I1y,
    169                       PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf I1w, PtrStepSzf I1wx,
    170                       PtrStepSzf I1wy, PtrStepSzf grad, PtrStepSzf rho,
    171                       cudaStream_t stream)
    172     {
    173         const dim3 block(32, 8);
    174         const dim3 grid(divUp(I0.cols, block.x), divUp(I0.rows, block.y));
    175 
    176         bindTexture(&tex_I1 , I1);
    177         bindTexture(&tex_I1x, I1x);
    178         bindTexture(&tex_I1y, I1y);
    179 
    180         warpBackwardKernel<<<grid, block, 0, stream>>>(I0, u1, u2, I1w, I1wx, I1wy, grad, rho);
    181         cudaSafeCall( cudaGetLastError() );
    182 
    183         if (!stream)
    184             cudaSafeCall( cudaDeviceSynchronize() );
    185     }
    186 }
    187 
    188 ////////////////////////////////////////////////////////////
    189 // estimateU
    190 
    191 namespace tvl1flow
    192 {
    193     __device__ float divergence(const PtrStepf& v1, const PtrStepf& v2, int y, int x)
    194     {
    195         if (x > 0 && y > 0)
    196         {
    197             const float v1x = v1(y, x) - v1(y, x - 1);
    198             const float v2y = v2(y, x) - v2(y - 1, x);
    199             return v1x + v2y;
    200         }
    201         else
    202         {
    203             if (y > 0)
    204                 return v1(y, 0) + v2(y, 0) - v2(y - 1, 0);
    205             else
    206             {
    207                 if (x > 0)
    208                     return v1(0, x) - v1(0, x - 1) + v2(0, x);
    209                 else
    210                     return v1(0, 0) + v2(0, 0);
    211             }
    212         }
    213     }
    214 
    215     __global__ void estimateUKernel(const PtrStepSzf I1wx, const PtrStepf I1wy,
    216                               const PtrStepf grad, const PtrStepf rho_c,
    217                               const PtrStepf p11, const PtrStepf p12,
    218                               const PtrStepf p21, const PtrStepf p22,
    219                               const PtrStepf p31, const PtrStepf p32,
    220                               PtrStepf u1, PtrStepf u2, PtrStepf u3, PtrStepf error,
    221                               const float l_t, const float theta, const float gamma, const bool calcError)
    222     {
    223         const int x = blockIdx.x * blockDim.x + threadIdx.x;
    224         const int y = blockIdx.y * blockDim.y + threadIdx.y;
    225 
    226         if (x >= I1wx.cols || y >= I1wx.rows)
    227             return;
    228 
    229         const float I1wxVal = I1wx(y, x);
    230         const float I1wyVal = I1wy(y, x);
    231         const float gradVal = grad(y, x);
    232         const float u1OldVal = u1(y, x);
    233         const float u2OldVal = u2(y, x);
    234         const float u3OldVal = gamma ? u3(y, x) : 0;
    235 
    236         const float rho = rho_c(y, x) + (I1wxVal * u1OldVal + I1wyVal * u2OldVal + gamma * u3OldVal);
    237 
    238         // estimate the values of the variable (v1, v2) (thresholding operator TH)
    239 
    240         float d1 = 0.0f;
    241         float d2 = 0.0f;
    242         float d3 = 0.0f;
    243 
    244         if (rho < -l_t * gradVal)
    245         {
    246             d1 = l_t * I1wxVal;
    247             d2 = l_t * I1wyVal;
    248             if (gamma)
    249                 d3 = l_t * gamma;
    250         }
    251         else if (rho > l_t * gradVal)
    252         {
    253             d1 = -l_t * I1wxVal;
    254             d2 = -l_t * I1wyVal;
    255             if (gamma)
    256                 d3 = -l_t * gamma;
    257         }
    258         else if (gradVal > numeric_limits<float>::epsilon())
    259         {
    260             const float fi = -rho / gradVal;
    261             d1 = fi * I1wxVal;
    262             d2 = fi * I1wyVal;
    263             if (gamma)
    264                 d3 = fi * gamma;
    265         }
    266 
    267         const float v1 = u1OldVal + d1;
    268         const float v2 = u2OldVal + d2;
    269         const float v3 = u3OldVal + d3;
    270 
    271         // compute the divergence of the dual variable (p1, p2)
    272 
    273         const float div_p1 = divergence(p11, p12, y, x);
    274         const float div_p2 = divergence(p21, p22, y, x);
    275         const float div_p3 = gamma ? divergence(p31, p32, y, x) : 0;
    276 
    277         // estimate the values of the optical flow (u1, u2)
    278 
    279         const float u1NewVal = v1 + theta * div_p1;
    280         const float u2NewVal = v2 + theta * div_p2;
    281         const float u3NewVal = gamma ? v3 + theta * div_p3 : 0;
    282 
    283         u1(y, x) = u1NewVal;
    284         u2(y, x) = u2NewVal;
    285         if (gamma)
    286             u3(y, x) = u3NewVal;
    287 
    288         if (calcError)
    289         {
    290             const float n1 = (u1OldVal - u1NewVal) * (u1OldVal - u1NewVal);
    291             const float n2 = (u2OldVal - u2NewVal) * (u2OldVal - u2NewVal);
    292             error(y, x) = n1 + n2;
    293         }
    294     }
    295 
    296     void estimateU(PtrStepSzf I1wx, PtrStepSzf I1wy,
    297                    PtrStepSzf grad, PtrStepSzf rho_c,
    298                    PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, PtrStepSzf p31, PtrStepSzf p32,
    299                    PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf u3, PtrStepSzf error,
    300                    float l_t, float theta, float gamma, bool calcError,
    301                    cudaStream_t stream)
    302     {
    303         const dim3 block(32, 8);
    304         const dim3 grid(divUp(I1wx.cols, block.x), divUp(I1wx.rows, block.y));
    305 
    306         estimateUKernel<<<grid, block, 0, stream>>>(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, p31, p32, u1, u2, u3, error, l_t, theta, gamma, calcError);
    307         cudaSafeCall( cudaGetLastError() );
    308 
    309         if (!stream)
    310             cudaSafeCall( cudaDeviceSynchronize() );
    311     }
    312 }
    313 
    314 ////////////////////////////////////////////////////////////
    315 // estimateDualVariables
    316 
    317 namespace tvl1flow
    318 {
    319     __global__ void estimateDualVariablesKernel(const PtrStepSzf u1, const PtrStepf u2, const PtrStepSzf u3,
    320                                                 PtrStepf p11, PtrStepf p12, PtrStepf p21, PtrStepf p22, PtrStepf p31, PtrStepf p32, const float taut, const float gamma)
    321     {
    322         const int x = blockIdx.x * blockDim.x + threadIdx.x;
    323         const int y = blockIdx.y * blockDim.y + threadIdx.y;
    324 
    325         if (x >= u1.cols || y >= u1.rows)
    326             return;
    327 
    328         const float u1x = u1(y, ::min(x + 1, u1.cols - 1)) - u1(y, x);
    329         const float u1y = u1(::min(y + 1, u1.rows - 1), x) - u1(y, x);
    330 
    331         const float u2x = u2(y, ::min(x + 1, u1.cols - 1)) - u2(y, x);
    332         const float u2y = u2(::min(y + 1, u1.rows - 1), x) - u2(y, x);
    333 
    334         const float u3x = gamma ? u3(y, ::min(x + 1, u1.cols - 1)) - u3(y, x) : 0;
    335         const float u3y = gamma ? u3(::min(y + 1, u1.rows - 1), x) - u3(y, x) : 0;
    336 
    337         const float g1 = ::hypotf(u1x, u1y);
    338         const float g2 = ::hypotf(u2x, u2y);
    339         const float g3 = gamma ? ::hypotf(u3x, u3y) : 0;
    340 
    341         const float ng1 = 1.0f + taut * g1;
    342         const float ng2 = 1.0f + taut * g2;
    343         const float ng3 = gamma ? 1.0f + taut * g3 : 0;
    344 
    345         p11(y, x) = (p11(y, x) + taut * u1x) / ng1;
    346         p12(y, x) = (p12(y, x) + taut * u1y) / ng1;
    347         p21(y, x) = (p21(y, x) + taut * u2x) / ng2;
    348         p22(y, x) = (p22(y, x) + taut * u2y) / ng2;
    349         if (gamma)
    350         {
    351             p31(y, x) = (p31(y, x) + taut * u3x) / ng3;
    352             p32(y, x) = (p32(y, x) + taut * u3y) / ng3;
    353         }
    354     }
    355 
    356     void estimateDualVariables(PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf u3,
    357                                PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, PtrStepSzf p31, PtrStepSzf p32,
    358                                float taut, float gamma,
    359                                cudaStream_t stream)
    360     {
    361         const dim3 block(32, 8);
    362         const dim3 grid(divUp(u1.cols, block.x), divUp(u1.rows, block.y));
    363 
    364         estimateDualVariablesKernel<<<grid, block, 0, stream>>>(u1, u2, u3, p11, p12, p21, p22, p31, p32, taut, gamma);
    365         cudaSafeCall( cudaGetLastError() );
    366 
    367         if (!stream)
    368             cudaSafeCall( cudaDeviceSynchronize() );
    369     }
    370 }
    371 
    372 #endif // !defined CUDA_DISABLER
    373