Home | History | Annotate | Download | only in cuda
      1 /*M///////////////////////////////////////////////////////////////////////////////////////
      2 //
      3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
      4 //
      5 //  By downloading, copying, installing or using the software you agree to this license.
      6 //  If you do not agree to this license, do not download, install,
      7 //  copy or use the software.
      8 //
      9 //
     10 //                           License Agreement
     11 //                For Open Source Computer Vision Library
     12 //
     13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
     14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
     15 // Third party copyrights are property of their respective owners.
     16 //
     17 // Redistribution and use in source and binary forms, with or without modification,
     18 // are permitted provided that the following conditions are met:
     19 //
     20 //   * Redistribution's of source code must retain the above copyright notice,
     21 //     this list of conditions and the following disclaimer.
     22 //
     23 //   * Redistribution's in binary form must reproduce the above copyright notice,
     24 //     this list of conditions and the following disclaimer in the documentation
     25 //     and/or other materials provided with the distribution.
     26 //
     27 //   * The name of the copyright holders may not be used to endorse or promote products
     28 //     derived from this software without specific prior written permission.
     29 //
     30 // This software is provided by the copyright holders and contributors "as is" and
     31 // any express or implied warranties, including, but not limited to, the implied
     32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
     33 // In no event shall the Intel Corporation or contributors be liable for any direct,
     34 // indirect, incidental, special, exemplary, or consequential damages
     35 // (including, but not limited to, procurement of substitute goods or services;
     36 // loss of use, data, or profits; or business interruption) however caused
     37 // and on any theory of liability, whether in contract, strict liability,
     38 // or tort (including negligence or otherwise) arising in any way out of
     39 // the use of this software, even if advised of the possibility of such damage.
     40 //
     41 //M*/
     42 
     43 #if !defined CUDA_DISABLER
     44 
     45 #include "opencv2/core/cuda/common.hpp"
     46 #include "opencv2/core/cuda/vec_traits.hpp"
     47 #include "opencv2/core/cuda/vec_math.hpp"
     48 #include "opencv2/core/cuda/limits.hpp"
     49 
     50 namespace cv { namespace cuda { namespace device
     51 {
     52     namespace mog2
     53     {
     54         ///////////////////////////////////////////////////////////////
     55         // Utility
     56 
     57         __device__ __forceinline__ float cvt(uchar val)
     58         {
     59             return val;
     60         }
     61         __device__ __forceinline__ float3 cvt(const uchar3& val)
     62         {
     63             return make_float3(val.x, val.y, val.z);
     64         }
     65         __device__ __forceinline__ float4 cvt(const uchar4& val)
     66         {
     67             return make_float4(val.x, val.y, val.z, val.w);
     68         }
     69 
     70         __device__ __forceinline__ float sqr(float val)
     71         {
     72             return val * val;
     73         }
     74         __device__ __forceinline__ float sqr(const float3& val)
     75         {
     76             return val.x * val.x + val.y * val.y + val.z * val.z;
     77         }
     78         __device__ __forceinline__ float sqr(const float4& val)
     79         {
     80             return val.x * val.x + val.y * val.y + val.z * val.z;
     81         }
     82 
     83         __device__ __forceinline__ float sum(float val)
     84         {
     85             return val;
     86         }
     87         __device__ __forceinline__ float sum(const float3& val)
     88         {
     89             return val.x + val.y + val.z;
     90         }
     91         __device__ __forceinline__ float sum(const float4& val)
     92         {
     93             return val.x + val.y + val.z;
     94         }
     95 
     96         template <class Ptr2D>
     97         __device__ __forceinline__ void swap(Ptr2D& ptr, int x, int y, int k, int rows)
     98         {
     99             typename Ptr2D::elem_type val = ptr(k * rows + y, x);
    100             ptr(k * rows + y, x) = ptr((k + 1) * rows + y, x);
    101             ptr((k + 1) * rows + y, x) = val;
    102         }
    103 
    104         ///////////////////////////////////////////////////////////////
    105         // MOG2
    106 
    107         __constant__ int           c_nmixtures;
    108         __constant__ float         c_Tb;
    109         __constant__ float         c_TB;
    110         __constant__ float         c_Tg;
    111         __constant__ float         c_varInit;
    112         __constant__ float         c_varMin;
    113         __constant__ float         c_varMax;
    114         __constant__ float         c_tau;
    115         __constant__ unsigned char c_shadowVal;
    116 
    117         void loadConstants(int nmixtures, float Tb, float TB, float Tg, float varInit, float varMin, float varMax, float tau, unsigned char shadowVal)
    118         {
    119             varMin = ::fminf(varMin, varMax);
    120             varMax = ::fmaxf(varMin, varMax);
    121 
    122             cudaSafeCall( cudaMemcpyToSymbol(c_nmixtures, &nmixtures, sizeof(int)) );
    123             cudaSafeCall( cudaMemcpyToSymbol(c_Tb, &Tb, sizeof(float)) );
    124             cudaSafeCall( cudaMemcpyToSymbol(c_TB, &TB, sizeof(float)) );
    125             cudaSafeCall( cudaMemcpyToSymbol(c_Tg, &Tg, sizeof(float)) );
    126             cudaSafeCall( cudaMemcpyToSymbol(c_varInit, &varInit, sizeof(float)) );
    127             cudaSafeCall( cudaMemcpyToSymbol(c_varMin, &varMin, sizeof(float)) );
    128             cudaSafeCall( cudaMemcpyToSymbol(c_varMax, &varMax, sizeof(float)) );
    129             cudaSafeCall( cudaMemcpyToSymbol(c_tau, &tau, sizeof(float)) );
    130             cudaSafeCall( cudaMemcpyToSymbol(c_shadowVal, &shadowVal, sizeof(unsigned char)) );
    131         }
    132 
    133         template <bool detectShadows, typename SrcT, typename WorkT>
    134         __global__ void mog2(const PtrStepSz<SrcT> frame, PtrStepb fgmask, PtrStepb modesUsed,
    135                              PtrStepf gmm_weight, PtrStepf gmm_variance, PtrStep<WorkT> gmm_mean,
    136                              const float alphaT, const float alpha1, const float prune)
    137         {
    138             const int x = blockIdx.x * blockDim.x + threadIdx.x;
    139             const int y = blockIdx.y * blockDim.y + threadIdx.y;
    140 
    141             if (x >= frame.cols || y >= frame.rows)
    142                 return;
    143 
    144             WorkT pix = cvt(frame(y, x));
    145 
    146             //calculate distances to the modes (+ sort)
    147             //here we need to go in descending order!!!
    148 
    149             bool background = false; // true - the pixel classified as background
    150 
    151             //internal:
    152 
    153             bool fitsPDF = false; //if it remains zero a new GMM mode will be added
    154 
    155             int nmodes = modesUsed(y, x);
    156             int nNewModes = nmodes; //current number of modes in GMM
    157 
    158             float totalWeight = 0.0f;
    159 
    160             //go through all modes
    161 
    162             for (int mode = 0; mode < nmodes; ++mode)
    163             {
    164                 //need only weight if fit is found
    165                 float weight = alpha1 * gmm_weight(mode * frame.rows + y, x) + prune;
    166                 int swap_count = 0;
    167                 //fit not found yet
    168                 if (!fitsPDF)
    169                 {
    170                     //check if it belongs to some of the remaining modes
    171                     float var = gmm_variance(mode * frame.rows + y, x);
    172 
    173                     WorkT mean = gmm_mean(mode * frame.rows + y, x);
    174 
    175                     //calculate difference and distance
    176                     WorkT diff = mean - pix;
    177                     float dist2 = sqr(diff);
    178 
    179                     //background? - Tb - usually larger than Tg
    180                     if (totalWeight < c_TB && dist2 < c_Tb * var)
    181                         background = true;
    182 
    183                     //check fit
    184                     if (dist2 < c_Tg * var)
    185                     {
    186                         //belongs to the mode
    187                         fitsPDF = true;
    188 
    189                         //update distribution
    190 
    191                         //update weight
    192                         weight += alphaT;
    193                         float k = alphaT / weight;
    194 
    195                         //update mean
    196                         gmm_mean(mode * frame.rows + y, x) = mean - k * diff;
    197 
    198                         //update variance
    199                         float varnew = var + k * (dist2 - var);
    200 
    201                         //limit the variance
    202                         varnew = ::fmaxf(varnew, c_varMin);
    203                         varnew = ::fminf(varnew, c_varMax);
    204 
    205                         gmm_variance(mode * frame.rows + y, x) = varnew;
    206 
    207                         //sort
    208                         //all other weights are at the same place and
    209                         //only the matched (iModes) is higher -> just find the new place for it
    210 
    211                         for (int i = mode; i > 0; --i)
    212                         {
    213                             //check one up
    214                             if (weight < gmm_weight((i - 1) * frame.rows + y, x))
    215                                 break;
    216 
    217                             swap_count++;
    218                             //swap one up
    219                             swap(gmm_weight, x, y, i - 1, frame.rows);
    220                             swap(gmm_variance, x, y, i - 1, frame.rows);
    221                             swap(gmm_mean, x, y, i - 1, frame.rows);
    222                         }
    223 
    224                         //belongs to the mode - bFitsPDF becomes 1
    225                     }
    226                 } // !fitsPDF
    227 
    228                 //check prune
    229                 if (weight < -prune)
    230                 {
    231                     weight = 0.0f;
    232                     nmodes--;
    233                 }
    234 
    235                 gmm_weight((mode - swap_count) * frame.rows + y, x) = weight; //update weight by the calculated value
    236                 totalWeight += weight;
    237             }
    238 
    239             //renormalize weights
    240 
    241             totalWeight = 1.f / totalWeight;
    242             for (int mode = 0; mode < nmodes; ++mode)
    243                 gmm_weight(mode * frame.rows + y, x) *= totalWeight;
    244 
    245             nmodes = nNewModes;
    246 
    247             //make new mode if needed and exit
    248 
    249             if (!fitsPDF)
    250             {
    251                 // replace the weakest or add a new one
    252                 int mode = nmodes == c_nmixtures ? c_nmixtures - 1 : nmodes++;
    253 
    254                 if (nmodes == 1)
    255                     gmm_weight(mode * frame.rows + y, x) = 1.f;
    256                 else
    257                 {
    258                     gmm_weight(mode * frame.rows + y, x) = alphaT;
    259 
    260                     // renormalize all other weights
    261 
    262                     for (int i = 0; i < nmodes - 1; ++i)
    263                         gmm_weight(i * frame.rows + y, x) *= alpha1;
    264                 }
    265 
    266                 // init
    267 
    268                 gmm_mean(mode * frame.rows + y, x) = pix;
    269                 gmm_variance(mode * frame.rows + y, x) = c_varInit;
    270 
    271                 //sort
    272                 //find the new place for it
    273 
    274                 for (int i = nmodes - 1; i > 0; --i)
    275                 {
    276                     // check one up
    277                     if (alphaT < gmm_weight((i - 1) * frame.rows + y, x))
    278                         break;
    279 
    280                     //swap one up
    281                     swap(gmm_weight, x, y, i - 1, frame.rows);
    282                     swap(gmm_variance, x, y, i - 1, frame.rows);
    283                     swap(gmm_mean, x, y, i - 1, frame.rows);
    284                 }
    285             }
    286 
    287             //set the number of modes
    288             modesUsed(y, x) = nmodes;
    289 
    290             bool isShadow = false;
    291             if (detectShadows && !background)
    292             {
    293                 float tWeight = 0.0f;
    294 
    295                 // check all the components  marked as background:
    296                 for (int mode = 0; mode < nmodes; ++mode)
    297                 {
    298                     WorkT mean = gmm_mean(mode * frame.rows + y, x);
    299 
    300                     WorkT pix_mean = pix * mean;
    301 
    302                     float numerator = sum(pix_mean);
    303                     float denominator = sqr(mean);
    304 
    305                     // no division by zero allowed
    306                     if (denominator == 0)
    307                         break;
    308 
    309                     // if tau < a < 1 then also check the color distortion
    310                     if (numerator <= denominator && numerator >= c_tau * denominator)
    311                     {
    312                         float a = numerator / denominator;
    313 
    314                         WorkT dD = a * mean - pix;
    315 
    316                         if (sqr(dD) < c_Tb * gmm_variance(mode * frame.rows + y, x) * a * a)
    317                         {
    318                             isShadow = true;
    319                             break;
    320                         }
    321                     };
    322 
    323                     tWeight += gmm_weight(mode * frame.rows + y, x);
    324                     if (tWeight > c_TB)
    325                         break;
    326                 }
    327             }
    328 
    329             fgmask(y, x) = background ? 0 : isShadow ? c_shadowVal : 255;
    330         }
    331 
    332         template <typename SrcT, typename WorkT>
    333         void mog2_caller(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean,
    334                          float alphaT, float prune, bool detectShadows, cudaStream_t stream)
    335         {
    336             dim3 block(32, 8);
    337             dim3 grid(divUp(frame.cols, block.x), divUp(frame.rows, block.y));
    338 
    339             const float alpha1 = 1.0f - alphaT;
    340 
    341             if (detectShadows)
    342             {
    343                 cudaSafeCall( cudaFuncSetCacheConfig(mog2<true, SrcT, WorkT>, cudaFuncCachePreferL1) );
    344 
    345                 mog2<true, SrcT, WorkT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>) frame, fgmask, modesUsed,
    346                                                                     weight, variance, (PtrStepSz<WorkT>) mean,
    347                                                                     alphaT, alpha1, prune);
    348             }
    349             else
    350             {
    351                 cudaSafeCall( cudaFuncSetCacheConfig(mog2<false, SrcT, WorkT>, cudaFuncCachePreferL1) );
    352 
    353                 mog2<false, SrcT, WorkT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>) frame, fgmask, modesUsed,
    354                                                                     weight, variance, (PtrStepSz<WorkT>) mean,
    355                                                                     alphaT, alpha1, prune);
    356             }
    357 
    358             cudaSafeCall( cudaGetLastError() );
    359 
    360             if (stream == 0)
    361                 cudaSafeCall( cudaDeviceSynchronize() );
    362         }
    363 
    364         void mog2_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean,
    365                       float alphaT, float prune, bool detectShadows, cudaStream_t stream)
    366         {
    367             typedef void (*func_t)(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean, float alphaT, float prune, bool detectShadows, cudaStream_t stream);
    368 
    369             static const func_t funcs[] =
    370             {
    371                 0, mog2_caller<uchar, float>, 0, mog2_caller<uchar3, float3>, mog2_caller<uchar4, float4>
    372             };
    373 
    374             funcs[cn](frame, fgmask, modesUsed, weight, variance, mean, alphaT, prune, detectShadows, stream);
    375         }
    376 
    377         template <typename WorkT, typename OutT>
    378         __global__ void getBackgroundImage2(const PtrStepSzb modesUsed, const PtrStepf gmm_weight, const PtrStep<WorkT> gmm_mean, PtrStep<OutT> dst)
    379         {
    380             const int x = blockIdx.x * blockDim.x + threadIdx.x;
    381             const int y = blockIdx.y * blockDim.y + threadIdx.y;
    382 
    383             if (x >= modesUsed.cols || y >= modesUsed.rows)
    384                 return;
    385 
    386             int nmodes = modesUsed(y, x);
    387 
    388             WorkT meanVal = VecTraits<WorkT>::all(0.0f);
    389             float totalWeight = 0.0f;
    390 
    391             for (int mode = 0; mode < nmodes; ++mode)
    392             {
    393                 float weight = gmm_weight(mode * modesUsed.rows + y, x);
    394 
    395                 WorkT mean = gmm_mean(mode * modesUsed.rows + y, x);
    396                 meanVal = meanVal + weight * mean;
    397 
    398                 totalWeight += weight;
    399 
    400                 if(totalWeight > c_TB)
    401                     break;
    402             }
    403 
    404             meanVal = meanVal * (1.f / totalWeight);
    405 
    406             dst(y, x) = saturate_cast<OutT>(meanVal);
    407         }
    408 
    409         template <typename WorkT, typename OutT>
    410         void getBackgroundImage2_caller(PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream)
    411         {
    412             dim3 block(32, 8);
    413             dim3 grid(divUp(modesUsed.cols, block.x), divUp(modesUsed.rows, block.y));
    414 
    415             cudaSafeCall( cudaFuncSetCacheConfig(getBackgroundImage2<WorkT, OutT>, cudaFuncCachePreferL1) );
    416 
    417             getBackgroundImage2<WorkT, OutT><<<grid, block, 0, stream>>>(modesUsed, weight, (PtrStepSz<WorkT>) mean, (PtrStepSz<OutT>) dst);
    418             cudaSafeCall( cudaGetLastError() );
    419 
    420             if (stream == 0)
    421                 cudaSafeCall( cudaDeviceSynchronize() );
    422         }
    423 
    424         void getBackgroundImage2_gpu(int cn, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream)
    425         {
    426             typedef void (*func_t)(PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream);
    427 
    428             static const func_t funcs[] =
    429             {
    430                 0, getBackgroundImage2_caller<float, uchar>, 0, getBackgroundImage2_caller<float3, uchar3>, getBackgroundImage2_caller<float4, uchar4>
    431             };
    432 
    433             funcs[cn](modesUsed, weight, mean, dst, stream);
    434         }
    435     }
    436 }}}
    437 
    438 
    439 #endif /* CUDA_DISABLER */
    440