Home | History | Annotate | Download | only in cuda
      1 /*M///////////////////////////////////////////////////////////////////////////////////////
      2 //
      3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
      4 //
      5 //  By downloading, copying, installing or using the software you agree to this license.
      6 //  If you do not agree to this license, do not download, install,
      7 //  copy or use the software.
      8 //
      9 //
     10 //                           License Agreement
     11 //                For Open Source Computer Vision Library
     12 //
     13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
     14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
     15 // Third party copyrights are property of their respective owners.
     16 //
     17 // Redistribution and use in source and binary forms, with or without modification,
     18 // are permitted provided that the following conditions are met:
     19 //
     20 //   * Redistribution's of source code must retain the above copyright notice,
     21 //     this list of conditions and the following disclaimer.
     22 //
     23 //   * Redistribution's in binary form must reproduce the above copyright notice,
     24 //     this list of conditions and the following disclaimer in the documentation
     25 //     and/or other materials provided with the distribution.
     26 //
     27 //   * The name of the copyright holders may not be used to endorse or promote products
     28 //     derived from this software without specific prior written permission.
     29 //
     30 // This software is provided by the copyright holders and contributors "as is" and
     31 // any express or implied warranties, including, but not limited to, the implied
     32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
     33 // In no event shall the Intel Corporation or contributors be liable for any direct,
     34 // indirect, incidental, special, exemplary, or consequential damages
     35 // (including, but not limited to, procurement of substitute goods or services;
     36 // loss of use, data, or profits; or business interruption) however caused
     37 // and on any theory of liability, whether in contract, strict liability,
     38 // or tort (including negligence or otherwise) arising in any way out of
     39 // the use of this software, even if advised of the possibility of such damage.
     40 //
     41 //M*/
     42 
     43 #if !defined CUDA_DISABLER
     44 
     45 #include "opencv2/core/cuda/common.hpp"
     46 #include "opencv2/core/cuda/emulation.hpp"
     47 
     48 namespace cv { namespace cuda { namespace device
     49 {
     50     namespace hough
     51     {
     52         __device__ int g_counter;
     53 
     54         template <int PIXELS_PER_THREAD>
     55         __global__ void buildPointList(const PtrStepSzb src, unsigned int* list)
     56         {
     57             __shared__ unsigned int s_queues[4][32 * PIXELS_PER_THREAD];
     58             __shared__ int s_qsize[4];
     59             __shared__ int s_globStart[4];
     60 
     61             const int x = blockIdx.x * blockDim.x * PIXELS_PER_THREAD + threadIdx.x;
     62             const int y = blockIdx.y * blockDim.y + threadIdx.y;
     63 
     64             if (threadIdx.x == 0)
     65                 s_qsize[threadIdx.y] = 0;
     66             __syncthreads();
     67 
     68             if (y < src.rows)
     69             {
     70                 // fill the queue
     71                 const uchar* srcRow = src.ptr(y);
     72                 for (int i = 0, xx = x; i < PIXELS_PER_THREAD && xx < src.cols; ++i, xx += blockDim.x)
     73                 {
     74                     if (srcRow[xx])
     75                     {
     76                         const unsigned int val = (y << 16) | xx;
     77                         const int qidx = Emulation::smem::atomicAdd(&s_qsize[threadIdx.y], 1);
     78                         s_queues[threadIdx.y][qidx] = val;
     79                     }
     80                 }
     81             }
     82 
     83             __syncthreads();
     84 
     85             // let one thread reserve the space required in the global list
     86             if (threadIdx.x == 0 && threadIdx.y == 0)
     87             {
     88                 // find how many items are stored in each list
     89                 int totalSize = 0;
     90                 for (int i = 0; i < blockDim.y; ++i)
     91                 {
     92                     s_globStart[i] = totalSize;
     93                     totalSize += s_qsize[i];
     94                 }
     95 
     96                 // calculate the offset in the global list
     97                 const int globalOffset = atomicAdd(&g_counter, totalSize);
     98                 for (int i = 0; i < blockDim.y; ++i)
     99                     s_globStart[i] += globalOffset;
    100             }
    101 
    102             __syncthreads();
    103 
    104             // copy local queues to global queue
    105             const int qsize = s_qsize[threadIdx.y];
    106             int gidx = s_globStart[threadIdx.y] + threadIdx.x;
    107             for(int i = threadIdx.x; i < qsize; i += blockDim.x, gidx += blockDim.x)
    108                 list[gidx] = s_queues[threadIdx.y][i];
    109         }
    110 
    111         int buildPointList_gpu(PtrStepSzb src, unsigned int* list)
    112         {
    113             const int PIXELS_PER_THREAD = 16;
    114 
    115             void* counterPtr;
    116             cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
    117 
    118             cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
    119 
    120             const dim3 block(32, 4);
    121             const dim3 grid(divUp(src.cols, block.x * PIXELS_PER_THREAD), divUp(src.rows, block.y));
    122 
    123             cudaSafeCall( cudaFuncSetCacheConfig(buildPointList<PIXELS_PER_THREAD>, cudaFuncCachePreferShared) );
    124 
    125             buildPointList<PIXELS_PER_THREAD><<<grid, block>>>(src, list);
    126             cudaSafeCall( cudaGetLastError() );
    127 
    128             cudaSafeCall( cudaDeviceSynchronize() );
    129 
    130             int totalCount;
    131             cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
    132 
    133             return totalCount;
    134         }
    135     }
    136 }}}
    137 
    138 #endif /* CUDA_DISABLER */
    139