1 /*M/////////////////////////////////////////////////////////////////////////////////////// 2 // 3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 4 // 5 // By downloading, copying, installing or using the software you agree to this license. 6 // If you do not agree to this license, do not download, install, 7 // copy or use the software. 8 // 9 // 10 // License Agreement 11 // For Open Source Computer Vision Library 12 // 13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. 14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved. 15 // Third party copyrights are property of their respective owners. 16 // 17 // Redistribution and use in source and binary forms, with or without modification, 18 // are permitted provided that the following conditions are met: 19 // 20 // * Redistribution's of source code must retain the above copyright notice, 21 // this list of conditions and the following disclaimer. 22 // 23 // * Redistribution's in binary form must reproduce the above copyright notice, 24 // this list of conditions and the following disclaimer in the documentation 25 // and/or other materials provided with the distribution. 26 // 27 // * The name of the copyright holders may not be used to endorse or promote products 28 // derived from this software without specific prior written permission. 29 // 30 // This software is provided by the copyright holders and contributors "as is" and 31 // any express or implied warranties, including, but not limited to, the implied 32 // warranties of merchantability and fitness for a particular purpose are disclaimed. 33 // In no event shall the Intel Corporation or contributors be liable for any direct, 34 // indirect, incidental, special, exemplary, or consequential damages 35 // (including, but not limited to, procurement of substitute goods or services; 36 // loss of use, data, or profits; or business interruption) however caused 37 // and on any theory of liability, whether in contract, strict liability, 38 // or tort (including negligence or otherwise) arising in any way out of 39 // the use of this software, even if advised of the possibility of such damage. 40 // 41 //M*/ 42 43 #if !defined CUDA_DISABLER 44 45 #include "opencv2/core/cuda/common.hpp" 46 #include "opencv2/core/cuda/vec_traits.hpp" 47 #include "opencv2/core/cuda/vec_math.hpp" 48 #include "opencv2/core/cuda/limits.hpp" 49 50 namespace cv { namespace cuda { namespace device 51 { 52 namespace mog2 53 { 54 /////////////////////////////////////////////////////////////// 55 // Utility 56 57 __device__ __forceinline__ float cvt(uchar val) 58 { 59 return val; 60 } 61 __device__ __forceinline__ float3 cvt(const uchar3& val) 62 { 63 return make_float3(val.x, val.y, val.z); 64 } 65 __device__ __forceinline__ float4 cvt(const uchar4& val) 66 { 67 return make_float4(val.x, val.y, val.z, val.w); 68 } 69 70 __device__ __forceinline__ float sqr(float val) 71 { 72 return val * val; 73 } 74 __device__ __forceinline__ float sqr(const float3& val) 75 { 76 return val.x * val.x + val.y * val.y + val.z * val.z; 77 } 78 __device__ __forceinline__ float sqr(const float4& val) 79 { 80 return val.x * val.x + val.y * val.y + val.z * val.z; 81 } 82 83 __device__ __forceinline__ float sum(float val) 84 { 85 return val; 86 } 87 __device__ __forceinline__ float sum(const float3& val) 88 { 89 return val.x + val.y + val.z; 90 } 91 __device__ __forceinline__ float sum(const float4& val) 92 { 93 return val.x + val.y + val.z; 94 } 95 96 template <class Ptr2D> 97 __device__ __forceinline__ void swap(Ptr2D& ptr, int x, int y, int k, int rows) 98 { 99 typename Ptr2D::elem_type val = ptr(k * rows + y, x); 100 ptr(k * rows + y, x) = ptr((k + 1) * rows + y, x); 101 ptr((k + 1) * rows + y, x) = val; 102 } 103 104 /////////////////////////////////////////////////////////////// 105 // MOG2 106 107 __constant__ int c_nmixtures; 108 __constant__ float c_Tb; 109 __constant__ float c_TB; 110 __constant__ float c_Tg; 111 __constant__ float c_varInit; 112 __constant__ float c_varMin; 113 __constant__ float c_varMax; 114 __constant__ float c_tau; 115 __constant__ unsigned char c_shadowVal; 116 117 void loadConstants(int nmixtures, float Tb, float TB, float Tg, float varInit, float varMin, float varMax, float tau, unsigned char shadowVal) 118 { 119 varMin = ::fminf(varMin, varMax); 120 varMax = ::fmaxf(varMin, varMax); 121 122 cudaSafeCall( cudaMemcpyToSymbol(c_nmixtures, &nmixtures, sizeof(int)) ); 123 cudaSafeCall( cudaMemcpyToSymbol(c_Tb, &Tb, sizeof(float)) ); 124 cudaSafeCall( cudaMemcpyToSymbol(c_TB, &TB, sizeof(float)) ); 125 cudaSafeCall( cudaMemcpyToSymbol(c_Tg, &Tg, sizeof(float)) ); 126 cudaSafeCall( cudaMemcpyToSymbol(c_varInit, &varInit, sizeof(float)) ); 127 cudaSafeCall( cudaMemcpyToSymbol(c_varMin, &varMin, sizeof(float)) ); 128 cudaSafeCall( cudaMemcpyToSymbol(c_varMax, &varMax, sizeof(float)) ); 129 cudaSafeCall( cudaMemcpyToSymbol(c_tau, &tau, sizeof(float)) ); 130 cudaSafeCall( cudaMemcpyToSymbol(c_shadowVal, &shadowVal, sizeof(unsigned char)) ); 131 } 132 133 template <bool detectShadows, typename SrcT, typename WorkT> 134 __global__ void mog2(const PtrStepSz<SrcT> frame, PtrStepb fgmask, PtrStepb modesUsed, 135 PtrStepf gmm_weight, PtrStepf gmm_variance, PtrStep<WorkT> gmm_mean, 136 const float alphaT, const float alpha1, const float prune) 137 { 138 const int x = blockIdx.x * blockDim.x + threadIdx.x; 139 const int y = blockIdx.y * blockDim.y + threadIdx.y; 140 141 if (x >= frame.cols || y >= frame.rows) 142 return; 143 144 WorkT pix = cvt(frame(y, x)); 145 146 //calculate distances to the modes (+ sort) 147 //here we need to go in descending order!!! 148 149 bool background = false; // true - the pixel classified as background 150 151 //internal: 152 153 bool fitsPDF = false; //if it remains zero a new GMM mode will be added 154 155 int nmodes = modesUsed(y, x); 156 int nNewModes = nmodes; //current number of modes in GMM 157 158 float totalWeight = 0.0f; 159 160 //go through all modes 161 162 for (int mode = 0; mode < nmodes; ++mode) 163 { 164 //need only weight if fit is found 165 float weight = alpha1 * gmm_weight(mode * frame.rows + y, x) + prune; 166 int swap_count = 0; 167 //fit not found yet 168 if (!fitsPDF) 169 { 170 //check if it belongs to some of the remaining modes 171 float var = gmm_variance(mode * frame.rows + y, x); 172 173 WorkT mean = gmm_mean(mode * frame.rows + y, x); 174 175 //calculate difference and distance 176 WorkT diff = mean - pix; 177 float dist2 = sqr(diff); 178 179 //background? - Tb - usually larger than Tg 180 if (totalWeight < c_TB && dist2 < c_Tb * var) 181 background = true; 182 183 //check fit 184 if (dist2 < c_Tg * var) 185 { 186 //belongs to the mode 187 fitsPDF = true; 188 189 //update distribution 190 191 //update weight 192 weight += alphaT; 193 float k = alphaT / weight; 194 195 //update mean 196 gmm_mean(mode * frame.rows + y, x) = mean - k * diff; 197 198 //update variance 199 float varnew = var + k * (dist2 - var); 200 201 //limit the variance 202 varnew = ::fmaxf(varnew, c_varMin); 203 varnew = ::fminf(varnew, c_varMax); 204 205 gmm_variance(mode * frame.rows + y, x) = varnew; 206 207 //sort 208 //all other weights are at the same place and 209 //only the matched (iModes) is higher -> just find the new place for it 210 211 for (int i = mode; i > 0; --i) 212 { 213 //check one up 214 if (weight < gmm_weight((i - 1) * frame.rows + y, x)) 215 break; 216 217 swap_count++; 218 //swap one up 219 swap(gmm_weight, x, y, i - 1, frame.rows); 220 swap(gmm_variance, x, y, i - 1, frame.rows); 221 swap(gmm_mean, x, y, i - 1, frame.rows); 222 } 223 224 //belongs to the mode - bFitsPDF becomes 1 225 } 226 } // !fitsPDF 227 228 //check prune 229 if (weight < -prune) 230 { 231 weight = 0.0f; 232 nmodes--; 233 } 234 235 gmm_weight((mode - swap_count) * frame.rows + y, x) = weight; //update weight by the calculated value 236 totalWeight += weight; 237 } 238 239 //renormalize weights 240 241 totalWeight = 1.f / totalWeight; 242 for (int mode = 0; mode < nmodes; ++mode) 243 gmm_weight(mode * frame.rows + y, x) *= totalWeight; 244 245 nmodes = nNewModes; 246 247 //make new mode if needed and exit 248 249 if (!fitsPDF) 250 { 251 // replace the weakest or add a new one 252 int mode = nmodes == c_nmixtures ? c_nmixtures - 1 : nmodes++; 253 254 if (nmodes == 1) 255 gmm_weight(mode * frame.rows + y, x) = 1.f; 256 else 257 { 258 gmm_weight(mode * frame.rows + y, x) = alphaT; 259 260 // renormalize all other weights 261 262 for (int i = 0; i < nmodes - 1; ++i) 263 gmm_weight(i * frame.rows + y, x) *= alpha1; 264 } 265 266 // init 267 268 gmm_mean(mode * frame.rows + y, x) = pix; 269 gmm_variance(mode * frame.rows + y, x) = c_varInit; 270 271 //sort 272 //find the new place for it 273 274 for (int i = nmodes - 1; i > 0; --i) 275 { 276 // check one up 277 if (alphaT < gmm_weight((i - 1) * frame.rows + y, x)) 278 break; 279 280 //swap one up 281 swap(gmm_weight, x, y, i - 1, frame.rows); 282 swap(gmm_variance, x, y, i - 1, frame.rows); 283 swap(gmm_mean, x, y, i - 1, frame.rows); 284 } 285 } 286 287 //set the number of modes 288 modesUsed(y, x) = nmodes; 289 290 bool isShadow = false; 291 if (detectShadows && !background) 292 { 293 float tWeight = 0.0f; 294 295 // check all the components marked as background: 296 for (int mode = 0; mode < nmodes; ++mode) 297 { 298 WorkT mean = gmm_mean(mode * frame.rows + y, x); 299 300 WorkT pix_mean = pix * mean; 301 302 float numerator = sum(pix_mean); 303 float denominator = sqr(mean); 304 305 // no division by zero allowed 306 if (denominator == 0) 307 break; 308 309 // if tau < a < 1 then also check the color distortion 310 if (numerator <= denominator && numerator >= c_tau * denominator) 311 { 312 float a = numerator / denominator; 313 314 WorkT dD = a * mean - pix; 315 316 if (sqr(dD) < c_Tb * gmm_variance(mode * frame.rows + y, x) * a * a) 317 { 318 isShadow = true; 319 break; 320 } 321 }; 322 323 tWeight += gmm_weight(mode * frame.rows + y, x); 324 if (tWeight > c_TB) 325 break; 326 } 327 } 328 329 fgmask(y, x) = background ? 0 : isShadow ? c_shadowVal : 255; 330 } 331 332 template <typename SrcT, typename WorkT> 333 void mog2_caller(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean, 334 float alphaT, float prune, bool detectShadows, cudaStream_t stream) 335 { 336 dim3 block(32, 8); 337 dim3 grid(divUp(frame.cols, block.x), divUp(frame.rows, block.y)); 338 339 const float alpha1 = 1.0f - alphaT; 340 341 if (detectShadows) 342 { 343 cudaSafeCall( cudaFuncSetCacheConfig(mog2<true, SrcT, WorkT>, cudaFuncCachePreferL1) ); 344 345 mog2<true, SrcT, WorkT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>) frame, fgmask, modesUsed, 346 weight, variance, (PtrStepSz<WorkT>) mean, 347 alphaT, alpha1, prune); 348 } 349 else 350 { 351 cudaSafeCall( cudaFuncSetCacheConfig(mog2<false, SrcT, WorkT>, cudaFuncCachePreferL1) ); 352 353 mog2<false, SrcT, WorkT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>) frame, fgmask, modesUsed, 354 weight, variance, (PtrStepSz<WorkT>) mean, 355 alphaT, alpha1, prune); 356 } 357 358 cudaSafeCall( cudaGetLastError() ); 359 360 if (stream == 0) 361 cudaSafeCall( cudaDeviceSynchronize() ); 362 } 363 364 void mog2_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean, 365 float alphaT, float prune, bool detectShadows, cudaStream_t stream) 366 { 367 typedef void (*func_t)(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean, float alphaT, float prune, bool detectShadows, cudaStream_t stream); 368 369 static const func_t funcs[] = 370 { 371 0, mog2_caller<uchar, float>, 0, mog2_caller<uchar3, float3>, mog2_caller<uchar4, float4> 372 }; 373 374 funcs[cn](frame, fgmask, modesUsed, weight, variance, mean, alphaT, prune, detectShadows, stream); 375 } 376 377 template <typename WorkT, typename OutT> 378 __global__ void getBackgroundImage2(const PtrStepSzb modesUsed, const PtrStepf gmm_weight, const PtrStep<WorkT> gmm_mean, PtrStep<OutT> dst) 379 { 380 const int x = blockIdx.x * blockDim.x + threadIdx.x; 381 const int y = blockIdx.y * blockDim.y + threadIdx.y; 382 383 if (x >= modesUsed.cols || y >= modesUsed.rows) 384 return; 385 386 int nmodes = modesUsed(y, x); 387 388 WorkT meanVal = VecTraits<WorkT>::all(0.0f); 389 float totalWeight = 0.0f; 390 391 for (int mode = 0; mode < nmodes; ++mode) 392 { 393 float weight = gmm_weight(mode * modesUsed.rows + y, x); 394 395 WorkT mean = gmm_mean(mode * modesUsed.rows + y, x); 396 meanVal = meanVal + weight * mean; 397 398 totalWeight += weight; 399 400 if(totalWeight > c_TB) 401 break; 402 } 403 404 meanVal = meanVal * (1.f / totalWeight); 405 406 dst(y, x) = saturate_cast<OutT>(meanVal); 407 } 408 409 template <typename WorkT, typename OutT> 410 void getBackgroundImage2_caller(PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream) 411 { 412 dim3 block(32, 8); 413 dim3 grid(divUp(modesUsed.cols, block.x), divUp(modesUsed.rows, block.y)); 414 415 cudaSafeCall( cudaFuncSetCacheConfig(getBackgroundImage2<WorkT, OutT>, cudaFuncCachePreferL1) ); 416 417 getBackgroundImage2<WorkT, OutT><<<grid, block, 0, stream>>>(modesUsed, weight, (PtrStepSz<WorkT>) mean, (PtrStepSz<OutT>) dst); 418 cudaSafeCall( cudaGetLastError() ); 419 420 if (stream == 0) 421 cudaSafeCall( cudaDeviceSynchronize() ); 422 } 423 424 void getBackgroundImage2_gpu(int cn, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream) 425 { 426 typedef void (*func_t)(PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream); 427 428 static const func_t funcs[] = 429 { 430 0, getBackgroundImage2_caller<float, uchar>, 0, getBackgroundImage2_caller<float3, uchar3>, getBackgroundImage2_caller<float4, uchar4> 431 }; 432 433 funcs[cn](modesUsed, weight, mean, dst, stream); 434 } 435 } 436 }}} 437 438 439 #endif /* CUDA_DISABLER */ 440