Home | History | Annotate | Download | only in src
      1 /*M///////////////////////////////////////////////////////////////////////////////////////
      2 //
      3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
      4 //
      5 //  By downloading, copying, installing or using the software you agree to this license.
      6 //  If you do not agree to this license, do not download, install,
      7 //  copy or use the software.
      8 //
      9 //
     10 //                           License Agreement
     11 //                For Open Source Computer Vision Library
     12 //
     13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
     14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
     15 // Third party copyrights are property of their respective owners.
     16 //
     17 // Redistribution and use in source and binary forms, with or without modification,
     18 // are permitted provided that the following conditions are met:
     19 //
     20 //   * Redistribution's of source code must retain the above copyright notice,
     21 //     this list of conditions and the following disclaimer.
     22 //
     23 //   * Redistribution's in binary form must reproduce the above copyright notice,
     24 //     this list of conditions and the following disclaimer in the documentation
     25 //     and/or other materials provided with the distribution.
     26 //
     27 //   * The name of the copyright holders may not be used to endorse or promote products
     28 //     derived from this software without specific prior written permission.
     29 //
     30 // This software is provided by the copyright holders and contributors "as is" and
     31 // any express or implied warranties, including, but not limited to, the implied
     32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
     33 // In no event shall the Intel Corporation or contributors be liable for any direct,
     34 // indirect, incidental, special, exemplary, or consequential damages
     35 // (including, but not limited to, procurement of substitute goods or services;
     36 // loss of use, data, or profits; or business interruption) however caused
     37 // and on any theory of liability, whether in contract, strict liability,
     38 // or tort (including negligence or otherwise) arising in any way out of
     39 // the use of this software, even if advised of the possibility of such damage.
     40 //
     41 //M*/
     42 
     43 #include "precomp.hpp"
     44 
     45 using namespace cv;
     46 using namespace cv::cuda;
     47 
     48 #if !defined HAVE_CUDA || defined(CUDA_DISABLER)
     49 
     50 Ptr<FarnebackOpticalFlow> cv::cuda::FarnebackOpticalFlow::create(int, double, bool, int, int, int, double, int) { throw_no_cuda(); return Ptr<BroxOpticalFlow>(); }
     51 
     52 #else
     53 
     54 #define MIN_SIZE 32
     55 
     56 // CUDA resize() is fast, but it differs from the CPU analog. Disabling this flag
     57 // leads to an inefficient code. It's for debug purposes only.
     58 #define ENABLE_CUDA_RESIZE 1
     59 
     60 namespace cv { namespace cuda { namespace device { namespace optflow_farneback
     61 {
     62     void setPolynomialExpansionConsts(
     63             int polyN, const float *g, const float *xg, const float *xxg,
     64             float ig11, float ig03, float ig33, float ig55);
     65 
     66     void polynomialExpansionGpu(const PtrStepSzf &src, int polyN, PtrStepSzf dst, cudaStream_t stream);
     67 
     68     void setUpdateMatricesConsts();
     69 
     70     void updateMatricesGpu(
     71             const PtrStepSzf flowx, const PtrStepSzf flowy, const PtrStepSzf R0, const PtrStepSzf R1,
     72             PtrStepSzf M, cudaStream_t stream);
     73 
     74     void updateFlowGpu(
     75             const PtrStepSzf M, PtrStepSzf flowx, PtrStepSzf flowy, cudaStream_t stream);
     76 
     77     void boxFilter5Gpu(const PtrStepSzf src, int ksizeHalf, PtrStepSzf dst, cudaStream_t stream);
     78 
     79     void boxFilter5Gpu_CC11(const PtrStepSzf src, int ksizeHalf, PtrStepSzf dst, cudaStream_t stream);
     80 
     81     void setGaussianBlurKernel(const float *gKer, int ksizeHalf);
     82 
     83     void gaussianBlurGpu(
     84             const PtrStepSzf src, int ksizeHalf, PtrStepSzf dst, int borderType, cudaStream_t stream);
     85 
     86     void gaussianBlur5Gpu(
     87             const PtrStepSzf src, int ksizeHalf, PtrStepSzf dst, int borderType, cudaStream_t stream);
     88 
     89     void gaussianBlur5Gpu_CC11(
     90             const PtrStepSzf src, int ksizeHalf, PtrStepSzf dst, int borderType, cudaStream_t stream);
     91 
     92 }}}}
     93 
     94 namespace
     95 {
     96     class FarnebackOpticalFlowImpl : public FarnebackOpticalFlow
     97     {
     98     public:
     99         FarnebackOpticalFlowImpl(int numLevels, double pyrScale, bool fastPyramids, int winSize,
    100                                  int numIters, int polyN, double polySigma, int flags) :
    101             numLevels_(numLevels), pyrScale_(pyrScale), fastPyramids_(fastPyramids), winSize_(winSize),
    102             numIters_(numIters), polyN_(polyN), polySigma_(polySigma), flags_(flags)
    103         {
    104         }
    105 
    106         virtual int getNumLevels() const { return numLevels_; }
    107         virtual void setNumLevels(int numLevels) { numLevels_ = numLevels; }
    108 
    109         virtual double getPyrScale() const { return pyrScale_; }
    110         virtual void setPyrScale(double pyrScale) { pyrScale_ = pyrScale; }
    111 
    112         virtual bool getFastPyramids() const { return fastPyramids_; }
    113         virtual void setFastPyramids(bool fastPyramids) { fastPyramids_ = fastPyramids; }
    114 
    115         virtual int getWinSize() const { return winSize_; }
    116         virtual void setWinSize(int winSize) { winSize_ = winSize; }
    117 
    118         virtual int getNumIters() const { return numIters_; }
    119         virtual void setNumIters(int numIters) { numIters_ = numIters; }
    120 
    121         virtual int getPolyN() const { return polyN_; }
    122         virtual void setPolyN(int polyN) { polyN_ = polyN; }
    123 
    124         virtual double getPolySigma() const { return polySigma_; }
    125         virtual void setPolySigma(double polySigma) { polySigma_ = polySigma; }
    126 
    127         virtual int getFlags() const { return flags_; }
    128         virtual void setFlags(int flags) { flags_ = flags; }
    129 
    130         virtual void calc(InputArray I0, InputArray I1, InputOutputArray flow, Stream& stream);
    131 
    132     private:
    133         int numLevels_;
    134         double pyrScale_;
    135         bool fastPyramids_;
    136         int winSize_;
    137         int numIters_;
    138         int polyN_;
    139         double polySigma_;
    140         int flags_;
    141 
    142     private:
    143         void prepareGaussian(
    144                 int n, double sigma, float *g, float *xg, float *xxg,
    145                 double &ig11, double &ig03, double &ig33, double &ig55);
    146 
    147         void setPolynomialExpansionConsts(int n, double sigma);
    148 
    149         void updateFlow_boxFilter(
    150                 const GpuMat& R0, const GpuMat& R1, GpuMat& flowx, GpuMat &flowy,
    151                 GpuMat& M, GpuMat &bufM, int blockSize, bool updateMatrices, Stream streams[]);
    152 
    153         void updateFlow_gaussianBlur(
    154                 const GpuMat& R0, const GpuMat& R1, GpuMat& flowx, GpuMat& flowy,
    155                 GpuMat& M, GpuMat &bufM, int blockSize, bool updateMatrices, Stream streams[]);
    156 
    157         void calcImpl(const GpuMat &frame0, const GpuMat &frame1, GpuMat &flowx, GpuMat &flowy, Stream &stream);
    158 
    159         GpuMat frames_[2];
    160         GpuMat pyrLevel_[2], M_, bufM_, R_[2], blurredFrame_[2];
    161         std::vector<GpuMat> pyramid0_, pyramid1_;
    162     };
    163 
    164     void FarnebackOpticalFlowImpl::calc(InputArray _frame0, InputArray _frame1, InputOutputArray _flow, Stream& stream)
    165     {
    166         const GpuMat frame0 = _frame0.getGpuMat();
    167         const GpuMat frame1 = _frame1.getGpuMat();
    168 
    169         BufferPool pool(stream);
    170         GpuMat flowx = pool.getBuffer(frame0.size(), CV_32FC1);
    171         GpuMat flowy = pool.getBuffer(frame0.size(), CV_32FC1);
    172 
    173         calcImpl(frame0, frame1, flowx, flowy, stream);
    174 
    175         GpuMat flows[] = {flowx, flowy};
    176         cuda::merge(flows, 2, _flow, stream);
    177     }
    178 
    179     GpuMat allocMatFromBuf(int rows, int cols, int type, GpuMat& mat)
    180     {
    181         if (!mat.empty() && mat.type() == type && mat.rows >= rows && mat.cols >= cols)
    182             return mat(Rect(0, 0, cols, rows));
    183 
    184         return mat = GpuMat(rows, cols, type);
    185     }
    186 
    187     void FarnebackOpticalFlowImpl::prepareGaussian(
    188             int n, double sigma, float *g, float *xg, float *xxg,
    189             double &ig11, double &ig03, double &ig33, double &ig55)
    190     {
    191         double s = 0.;
    192         for (int x = -n; x <= n; x++)
    193         {
    194             g[x] = (float)std::exp(-x*x/(2*sigma*sigma));
    195             s += g[x];
    196         }
    197 
    198         s = 1./s;
    199         for (int x = -n; x <= n; x++)
    200         {
    201             g[x] = (float)(g[x]*s);
    202             xg[x] = (float)(x*g[x]);
    203             xxg[x] = (float)(x*x*g[x]);
    204         }
    205 
    206         Mat_<double> G(6, 6);
    207         G.setTo(0);
    208 
    209         for (int y = -n; y <= n; y++)
    210         {
    211             for (int x = -n; x <= n; x++)
    212             {
    213                 G(0,0) += g[y]*g[x];
    214                 G(1,1) += g[y]*g[x]*x*x;
    215                 G(3,3) += g[y]*g[x]*x*x*x*x;
    216                 G(5,5) += g[y]*g[x]*x*x*y*y;
    217             }
    218         }
    219 
    220         //G[0][0] = 1.;
    221         G(2,2) = G(0,3) = G(0,4) = G(3,0) = G(4,0) = G(1,1);
    222         G(4,4) = G(3,3);
    223         G(3,4) = G(4,3) = G(5,5);
    224 
    225         // invG:
    226         // [ x        e  e    ]
    227         // [    y             ]
    228         // [       y          ]
    229         // [ e        z       ]
    230         // [ e           z    ]
    231         // [                u ]
    232         Mat_<double> invG = G.inv(DECOMP_CHOLESKY);
    233 
    234         ig11 = invG(1,1);
    235         ig03 = invG(0,3);
    236         ig33 = invG(3,3);
    237         ig55 = invG(5,5);
    238     }
    239 
    240     void FarnebackOpticalFlowImpl::setPolynomialExpansionConsts(int n, double sigma)
    241     {
    242         std::vector<float> buf(n*6 + 3);
    243         float* g = &buf[0] + n;
    244         float* xg = g + n*2 + 1;
    245         float* xxg = xg + n*2 + 1;
    246 
    247         if (sigma < FLT_EPSILON)
    248             sigma = n*0.3;
    249 
    250         double ig11, ig03, ig33, ig55;
    251         prepareGaussian(n, sigma, g, xg, xxg, ig11, ig03, ig33, ig55);
    252 
    253         device::optflow_farneback::setPolynomialExpansionConsts(n, g, xg, xxg, static_cast<float>(ig11), static_cast<float>(ig03), static_cast<float>(ig33), static_cast<float>(ig55));
    254     }
    255 
    256     void FarnebackOpticalFlowImpl::updateFlow_boxFilter(
    257             const GpuMat& R0, const GpuMat& R1, GpuMat& flowx, GpuMat &flowy,
    258             GpuMat& M, GpuMat &bufM, int blockSize, bool updateMatrices, Stream streams[])
    259     {
    260         if (deviceSupports(FEATURE_SET_COMPUTE_12))
    261             device::optflow_farneback::boxFilter5Gpu(M, blockSize/2, bufM, StreamAccessor::getStream(streams[0]));
    262         else
    263             device::optflow_farneback::boxFilter5Gpu_CC11(M, blockSize/2, bufM, StreamAccessor::getStream(streams[0]));
    264         swap(M, bufM);
    265 
    266         for (int i = 1; i < 5; ++i)
    267             streams[i].waitForCompletion();
    268         device::optflow_farneback::updateFlowGpu(M, flowx, flowy, StreamAccessor::getStream(streams[0]));
    269 
    270         if (updateMatrices)
    271             device::optflow_farneback::updateMatricesGpu(flowx, flowy, R0, R1, M, StreamAccessor::getStream(streams[0]));
    272     }
    273 
    274     void FarnebackOpticalFlowImpl::updateFlow_gaussianBlur(
    275             const GpuMat& R0, const GpuMat& R1, GpuMat& flowx, GpuMat& flowy,
    276             GpuMat& M, GpuMat &bufM, int blockSize, bool updateMatrices, Stream streams[])
    277     {
    278         if (deviceSupports(FEATURE_SET_COMPUTE_12))
    279             device::optflow_farneback::gaussianBlur5Gpu(
    280                         M, blockSize/2, bufM, BORDER_REPLICATE, StreamAccessor::getStream(streams[0]));
    281         else
    282             device::optflow_farneback::gaussianBlur5Gpu_CC11(
    283                         M, blockSize/2, bufM, BORDER_REPLICATE, StreamAccessor::getStream(streams[0]));
    284         swap(M, bufM);
    285 
    286         device::optflow_farneback::updateFlowGpu(M, flowx, flowy, StreamAccessor::getStream(streams[0]));
    287 
    288         if (updateMatrices)
    289             device::optflow_farneback::updateMatricesGpu(flowx, flowy, R0, R1, M, StreamAccessor::getStream(streams[0]));
    290     }
    291 
    292     void FarnebackOpticalFlowImpl::calcImpl(const GpuMat &frame0, const GpuMat &frame1, GpuMat &flowx, GpuMat &flowy, Stream &stream)
    293     {
    294         CV_Assert(frame0.channels() == 1 && frame1.channels() == 1);
    295         CV_Assert(frame0.size() == frame1.size());
    296         CV_Assert(polyN_ == 5 || polyN_ == 7);
    297         CV_Assert(!fastPyramids_ || std::abs(pyrScale_ - 0.5) < 1e-6);
    298 
    299         Stream streams[5];
    300         if (stream)
    301             streams[0] = stream;
    302 
    303         Size size = frame0.size();
    304         GpuMat prevFlowX, prevFlowY, curFlowX, curFlowY;
    305 
    306         flowx.create(size, CV_32F);
    307         flowy.create(size, CV_32F);
    308         GpuMat flowx0 = flowx;
    309         GpuMat flowy0 = flowy;
    310 
    311         // Crop unnecessary levels
    312         double scale = 1;
    313         int numLevelsCropped = 0;
    314         for (; numLevelsCropped < numLevels_; numLevelsCropped++)
    315         {
    316             scale *= pyrScale_;
    317             if (size.width*scale < MIN_SIZE || size.height*scale < MIN_SIZE)
    318                 break;
    319         }
    320 
    321         frame0.convertTo(frames_[0], CV_32F, streams[0]);
    322         frame1.convertTo(frames_[1], CV_32F, streams[1]);
    323 
    324         if (fastPyramids_)
    325         {
    326             // Build Gaussian pyramids using pyrDown()
    327             pyramid0_.resize(numLevelsCropped + 1);
    328             pyramid1_.resize(numLevelsCropped + 1);
    329             pyramid0_[0] = frames_[0];
    330             pyramid1_[0] = frames_[1];
    331             for (int i = 1; i <= numLevelsCropped; ++i)
    332             {
    333                 cuda::pyrDown(pyramid0_[i - 1], pyramid0_[i], streams[0]);
    334                 cuda::pyrDown(pyramid1_[i - 1], pyramid1_[i], streams[1]);
    335             }
    336         }
    337 
    338         setPolynomialExpansionConsts(polyN_, polySigma_);
    339         device::optflow_farneback::setUpdateMatricesConsts();
    340 
    341         for (int k = numLevelsCropped; k >= 0; k--)
    342         {
    343             streams[0].waitForCompletion();
    344 
    345             scale = 1;
    346             for (int i = 0; i < k; i++)
    347                 scale *= pyrScale_;
    348 
    349             double sigma = (1./scale - 1) * 0.5;
    350             int smoothSize = cvRound(sigma*5) | 1;
    351             smoothSize = std::max(smoothSize, 3);
    352 
    353             int width = cvRound(size.width*scale);
    354             int height = cvRound(size.height*scale);
    355 
    356             if (fastPyramids_)
    357             {
    358                 width = pyramid0_[k].cols;
    359                 height = pyramid0_[k].rows;
    360             }
    361 
    362             if (k > 0)
    363             {
    364                 curFlowX.create(height, width, CV_32F);
    365                 curFlowY.create(height, width, CV_32F);
    366             }
    367             else
    368             {
    369                 curFlowX = flowx0;
    370                 curFlowY = flowy0;
    371             }
    372 
    373             if (!prevFlowX.data)
    374             {
    375                 if (flags_ & OPTFLOW_USE_INITIAL_FLOW)
    376                 {
    377                     cuda::resize(flowx0, curFlowX, Size(width, height), 0, 0, INTER_LINEAR, streams[0]);
    378                     cuda::resize(flowy0, curFlowY, Size(width, height), 0, 0, INTER_LINEAR, streams[1]);
    379                     curFlowX.convertTo(curFlowX, curFlowX.depth(), scale, streams[0]);
    380                     curFlowY.convertTo(curFlowY, curFlowY.depth(), scale, streams[1]);
    381                 }
    382                 else
    383                 {
    384                     curFlowX.setTo(0, streams[0]);
    385                     curFlowY.setTo(0, streams[1]);
    386                 }
    387             }
    388             else
    389             {
    390                 cuda::resize(prevFlowX, curFlowX, Size(width, height), 0, 0, INTER_LINEAR, streams[0]);
    391                 cuda::resize(prevFlowY, curFlowY, Size(width, height), 0, 0, INTER_LINEAR, streams[1]);
    392                 curFlowX.convertTo(curFlowX, curFlowX.depth(), 1./pyrScale_, streams[0]);
    393                 curFlowY.convertTo(curFlowY, curFlowY.depth(), 1./pyrScale_, streams[1]);
    394             }
    395 
    396             GpuMat M = allocMatFromBuf(5*height, width, CV_32F, M_);
    397             GpuMat bufM = allocMatFromBuf(5*height, width, CV_32F, bufM_);
    398             GpuMat R[2] =
    399             {
    400                 allocMatFromBuf(5*height, width, CV_32F, R_[0]),
    401                 allocMatFromBuf(5*height, width, CV_32F, R_[1])
    402             };
    403 
    404             if (fastPyramids_)
    405             {
    406                 device::optflow_farneback::polynomialExpansionGpu(pyramid0_[k], polyN_, R[0], StreamAccessor::getStream(streams[0]));
    407                 device::optflow_farneback::polynomialExpansionGpu(pyramid1_[k], polyN_, R[1], StreamAccessor::getStream(streams[1]));
    408             }
    409             else
    410             {
    411                 GpuMat blurredFrame[2] =
    412                 {
    413                     allocMatFromBuf(size.height, size.width, CV_32F, blurredFrame_[0]),
    414                     allocMatFromBuf(size.height, size.width, CV_32F, blurredFrame_[1])
    415                 };
    416                 GpuMat pyrLevel[2] =
    417                 {
    418                     allocMatFromBuf(height, width, CV_32F, pyrLevel_[0]),
    419                     allocMatFromBuf(height, width, CV_32F, pyrLevel_[1])
    420                 };
    421 
    422                 Mat g = getGaussianKernel(smoothSize, sigma, CV_32F);
    423                 device::optflow_farneback::setGaussianBlurKernel(g.ptr<float>(smoothSize/2), smoothSize/2);
    424 
    425                 for (int i = 0; i < 2; i++)
    426                 {
    427                     device::optflow_farneback::gaussianBlurGpu(
    428                             frames_[i], smoothSize/2, blurredFrame[i], BORDER_REFLECT101, StreamAccessor::getStream(streams[i]));
    429                     cuda::resize(blurredFrame[i], pyrLevel[i], Size(width, height), 0.0, 0.0, INTER_LINEAR, streams[i]);
    430                     device::optflow_farneback::polynomialExpansionGpu(pyrLevel[i], polyN_, R[i], StreamAccessor::getStream(streams[i]));
    431                 }
    432             }
    433 
    434             streams[1].waitForCompletion();
    435             device::optflow_farneback::updateMatricesGpu(curFlowX, curFlowY, R[0], R[1], M, StreamAccessor::getStream(streams[0]));
    436 
    437             if (flags_ & OPTFLOW_FARNEBACK_GAUSSIAN)
    438             {
    439                 Mat g = getGaussianKernel(winSize_, winSize_/2*0.3f, CV_32F);
    440                 device::optflow_farneback::setGaussianBlurKernel(g.ptr<float>(winSize_/2), winSize_/2);
    441             }
    442             for (int i = 0; i < numIters_; i++)
    443             {
    444                 if (flags_ & OPTFLOW_FARNEBACK_GAUSSIAN)
    445                     updateFlow_gaussianBlur(R[0], R[1], curFlowX, curFlowY, M, bufM, winSize_, i < numIters_-1, streams);
    446                 else
    447                     updateFlow_boxFilter(R[0], R[1], curFlowX, curFlowY, M, bufM, winSize_, i < numIters_-1, streams);
    448             }
    449 
    450             prevFlowX = curFlowX;
    451             prevFlowY = curFlowY;
    452         }
    453 
    454         flowx = curFlowX;
    455         flowy = curFlowY;
    456 
    457         if (!stream)
    458             streams[0].waitForCompletion();
    459     }
    460 }
    461 
    462 Ptr<FarnebackOpticalFlow> cv::cuda::FarnebackOpticalFlow::create(int numLevels, double pyrScale, bool fastPyramids, int winSize,
    463                                                                  int numIters, int polyN, double polySigma, int flags)
    464 {
    465     return makePtr<FarnebackOpticalFlowImpl>(numLevels, pyrScale, fastPyramids, winSize,
    466                                              numIters, polyN, polySigma, flags);
    467 }
    468 
    469 #endif
    470