Home | History | Annotate | Download | only in cuda
      1 /*M///////////////////////////////////////////////////////////////////////////////////////
      2 //
      3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
      4 //
      5 //  By downloading, copying, installing or using the software you agree to this license.
      6 //  If you do not agree to this license, do not download, install,
      7 //  copy or use the software.
      8 //
      9 //
     10 //                           License Agreement
     11 //                For Open Source Computer Vision Library
     12 //
     13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
     14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
     15 // Third party copyrights are property of their respective owners.
     16 //
     17 // Redistribution and use in source and binary forms, with or without modification,
     18 // are permitted provided that the following conditions are met:
     19 //
     20 //   * Redistribution's of source code must retain the above copyright notice,
     21 //     this list of conditions and the following disclaimer.
     22 //
     23 //   * Redistribution's in binary form must reproduce the above copyright notice,
     24 //     this list of conditions and the following disclaimer in the documentation
     25 //     and/or other materials provided with the distribution.
     26 //
     27 //   * The name of the copyright holders may not be used to endorse or promote products
     28 //     derived from this software without specific prior written permission.
     29 //
     30 // This software is provided by the copyright holders and contributors "as is" and
     31 // any express or implied warranties, including, but not limited to, the implied
     32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
     33 // In no event shall the Intel Corporation or contributors be liable for any direct,
     34 // indirect, incidental, special, exemplary, or consequential damages
     35 // (including, but not limited to, procurement of substitute goods or services;
     36 // loss of use, data, or profits; or business interruption) however caused
     37 // and on any theory of liability, whether in contract, strict liability,
     38 // or tort (including negligence or otherwise) arising in any way out of
     39 // the use of this software, even if advised of the possibility of such damage.
     40 //
     41 //M*/
     42 
     43 #if !defined CUDA_DISABLER
     44 
     45 #include "opencv2/core/cuda/common.hpp"
     46 #include "opencv2/core/cuda/emulation.hpp"
     47 #include "opencv2/core/cuda/dynamic_smem.hpp"
     48 
     49 #include "opencv2/opencv_modules.hpp"
     50 
     51 #ifdef HAVE_OPENCV_CUDAFILTERS
     52 
     53 namespace cv { namespace cuda { namespace device
     54 {
     55     namespace hough_circles
     56     {
     57         __device__ int g_counter;
     58 
     59         ////////////////////////////////////////////////////////////////////////
     60         // circlesAccumCenters
     61 
     62         __global__ void circlesAccumCenters(const unsigned int* list, const int count, const PtrStepi dx, const PtrStepi dy,
     63                                             PtrStepi accum, const int width, const int height, const int minRadius, const int maxRadius, const float idp)
     64         {
     65             const int SHIFT = 10;
     66             const int ONE = 1 << SHIFT;
     67 
     68             const int tid = blockIdx.x * blockDim.x + threadIdx.x;
     69 
     70             if (tid >= count)
     71                 return;
     72 
     73             const unsigned int val = list[tid];
     74 
     75             const int x = (val & 0xFFFF);
     76             const int y = (val >> 16) & 0xFFFF;
     77 
     78             const int vx = dx(y, x);
     79             const int vy = dy(y, x);
     80 
     81             if (vx == 0 && vy == 0)
     82                 return;
     83 
     84             const float mag = ::sqrtf(vx * vx + vy * vy);
     85 
     86             const int x0 = __float2int_rn((x * idp) * ONE);
     87             const int y0 = __float2int_rn((y * idp) * ONE);
     88 
     89             int sx = __float2int_rn((vx * idp) * ONE / mag);
     90             int sy = __float2int_rn((vy * idp) * ONE / mag);
     91 
     92             // Step from minRadius to maxRadius in both directions of the gradient
     93             for (int k1 = 0; k1 < 2; ++k1)
     94             {
     95                 int x1 = x0 + minRadius * sx;
     96                 int y1 = y0 + minRadius * sy;
     97 
     98                 for (int r = minRadius; r <= maxRadius; x1 += sx, y1 += sy, ++r)
     99                 {
    100                     const int x2 = x1 >> SHIFT;
    101                     const int y2 = y1 >> SHIFT;
    102 
    103                     if (x2 < 0 || x2 >= width || y2 < 0 || y2 >= height)
    104                         break;
    105 
    106                     ::atomicAdd(accum.ptr(y2 + 1) + x2 + 1, 1);
    107                 }
    108 
    109                 sx = -sx;
    110                 sy = -sy;
    111             }
    112         }
    113 
    114         void circlesAccumCenters_gpu(const unsigned int* list, int count, PtrStepi dx, PtrStepi dy, PtrStepSzi accum, int minRadius, int maxRadius, float idp)
    115         {
    116             const dim3 block(256);
    117             const dim3 grid(divUp(count, block.x));
    118 
    119             cudaSafeCall( cudaFuncSetCacheConfig(circlesAccumCenters, cudaFuncCachePreferL1) );
    120 
    121             circlesAccumCenters<<<grid, block>>>(list, count, dx, dy, accum, accum.cols - 2, accum.rows - 2, minRadius, maxRadius, idp);
    122             cudaSafeCall( cudaGetLastError() );
    123 
    124             cudaSafeCall( cudaDeviceSynchronize() );
    125         }
    126 
    127         ////////////////////////////////////////////////////////////////////////
    128         // buildCentersList
    129 
    130         __global__ void buildCentersList(const PtrStepSzi accum, unsigned int* centers, const int threshold)
    131         {
    132             const int x = blockIdx.x * blockDim.x + threadIdx.x;
    133             const int y = blockIdx.y * blockDim.y + threadIdx.y;
    134 
    135             if (x < accum.cols - 2 && y < accum.rows - 2)
    136             {
    137                 const int top = accum(y, x + 1);
    138 
    139                 const int left = accum(y + 1, x);
    140                 const int cur = accum(y + 1, x + 1);
    141                 const int right = accum(y + 1, x + 2);
    142 
    143                 const int bottom = accum(y + 2, x + 1);
    144 
    145                 if (cur > threshold && cur > top && cur >= bottom && cur >  left && cur >= right)
    146                 {
    147                     const unsigned int val = (y << 16) | x;
    148                     const int idx = ::atomicAdd(&g_counter, 1);
    149                     centers[idx] = val;
    150                 }
    151             }
    152         }
    153 
    154         int buildCentersList_gpu(PtrStepSzi accum, unsigned int* centers, int threshold)
    155         {
    156             void* counterPtr;
    157             cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
    158 
    159             cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
    160 
    161             const dim3 block(32, 8);
    162             const dim3 grid(divUp(accum.cols - 2, block.x), divUp(accum.rows - 2, block.y));
    163 
    164             cudaSafeCall( cudaFuncSetCacheConfig(buildCentersList, cudaFuncCachePreferL1) );
    165 
    166             buildCentersList<<<grid, block>>>(accum, centers, threshold);
    167             cudaSafeCall( cudaGetLastError() );
    168 
    169             cudaSafeCall( cudaDeviceSynchronize() );
    170 
    171             int totalCount;
    172             cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
    173 
    174             return totalCount;
    175         }
    176 
    177         ////////////////////////////////////////////////////////////////////////
    178         // circlesAccumRadius
    179 
    180         __global__ void circlesAccumRadius(const unsigned int* centers, const unsigned int* list, const int count,
    181                                            float3* circles, const int maxCircles, const float dp,
    182                                            const int minRadius, const int maxRadius, const int histSize, const int threshold)
    183         {
    184             int* smem = DynamicSharedMem<int>();
    185 
    186             for (int i = threadIdx.x; i < histSize + 2; i += blockDim.x)
    187                 smem[i] = 0;
    188             __syncthreads();
    189 
    190             unsigned int val = centers[blockIdx.x];
    191 
    192             float cx = (val & 0xFFFF);
    193             float cy = (val >> 16) & 0xFFFF;
    194 
    195             cx = (cx + 0.5f) * dp;
    196             cy = (cy + 0.5f) * dp;
    197 
    198             for (int i = threadIdx.x; i < count; i += blockDim.x)
    199             {
    200                 val = list[i];
    201 
    202                 const int x = (val & 0xFFFF);
    203                 const int y = (val >> 16) & 0xFFFF;
    204 
    205                 const float rad = ::sqrtf((cx - x) * (cx - x) + (cy - y) * (cy - y));
    206                 if (rad >= minRadius && rad <= maxRadius)
    207                 {
    208                     const int r = __float2int_rn(rad - minRadius);
    209 
    210                     Emulation::smem::atomicAdd(&smem[r + 1], 1);
    211                 }
    212             }
    213 
    214             __syncthreads();
    215 
    216             for (int i = threadIdx.x; i < histSize; i += blockDim.x)
    217             {
    218                 const int curVotes = smem[i + 1];
    219 
    220                 if (curVotes >= threshold && curVotes > smem[i] && curVotes >= smem[i + 2])
    221                 {
    222                     const int ind = ::atomicAdd(&g_counter, 1);
    223                     if (ind < maxCircles)
    224                         circles[ind] = make_float3(cx, cy, i + minRadius);
    225                 }
    226             }
    227         }
    228 
    229         int circlesAccumRadius_gpu(const unsigned int* centers, int centersCount, const unsigned int* list, int count,
    230                                    float3* circles, int maxCircles, float dp, int minRadius, int maxRadius, int threshold, bool has20)
    231         {
    232             void* counterPtr;
    233             cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
    234 
    235             cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
    236 
    237             const dim3 block(has20 ? 1024 : 512);
    238             const dim3 grid(centersCount);
    239 
    240             const int histSize = maxRadius - minRadius + 1;
    241             size_t smemSize = (histSize + 2) * sizeof(int);
    242 
    243             circlesAccumRadius<<<grid, block, smemSize>>>(centers, list, count, circles, maxCircles, dp, minRadius, maxRadius, histSize, threshold);
    244             cudaSafeCall( cudaGetLastError() );
    245 
    246             cudaSafeCall( cudaDeviceSynchronize() );
    247 
    248             int totalCount;
    249             cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
    250 
    251             totalCount = ::min(totalCount, maxCircles);
    252 
    253             return totalCount;
    254         }
    255     }
    256 }}}
    257 
    258 #endif // HAVE_OPENCV_CUDAFILTERS
    259 
    260 #endif /* CUDA_DISABLER */
    261