Home | History | Annotate | Download | only in opencl
      1 // This file is part of OpenCV project.
      2 // It is subject to the license terms in the LICENSE file found in the top-level directory
      3 // of this distribution and at http://opencv.org/license.html.
      4 
      5 // Copyright (C) 2014, Itseez, Inc., all rights reserved.
      6 // Third party copyrights are property of their respective owners.
      7 
      8 #ifdef DOUBLE_SUPPORT
      9 #ifdef cl_amd_fp64
     10 #pragma OPENCL EXTENSION cl_amd_fp64:enable
     11 #elif defined (cl_khr_fp64)
     12 #pragma OPENCL EXTENSION cl_khr_fp64:enable
     13 #endif
     14 #endif
     15 
     16 #define noconvert
     17 
     18 #if cn != 3
     19 #define loadpix(addr) *(__global const srcT *)(addr)
     20 #define storepix(val, addr)  *(__global dstT *)(addr) = val
     21 #define storesqpix(val, addr)  *(__global sqdstT *)(addr) = val
     22 #define srcTSIZE (int)sizeof(srcT)
     23 #define dstTSIZE (int)sizeof(dstT)
     24 #define sqdstTSIZE (int)sizeof(sqdstT)
     25 #else
     26 #define loadpix(addr) vload3(0, (__global const srcT1 *)(addr))
     27 #define storepix(val, addr) vstore3(val, 0, (__global dstT1 *)(addr))
     28 #define storesqpix(val, addr) vstore3(val, 0, (__global sqdstT1 *)(addr))
     29 #define srcTSIZE ((int)sizeof(srcT1)*3)
     30 #define dstTSIZE ((int)sizeof(dstT1)*3)
     31 #define sqdstTSIZE ((int)sizeof(sqdstT1)*3)
     32 #endif
     33 
     34 __kernel void meanStdDev(__global const uchar * srcptr, int src_step, int src_offset, int cols,
     35                          int total, int groups, __global uchar * dstptr
     36  #ifdef HAVE_MASK
     37                          , __global const uchar * mask, int mask_step, int mask_offset
     38  #endif
     39                         )
     40 {
     41     int lid = get_local_id(0);
     42     int gid = get_group_id(0);
     43     int id = get_global_id(0);
     44 
     45     __local dstT localMemSum[WGS2_ALIGNED];
     46     __local sqdstT localMemSqSum[WGS2_ALIGNED];
     47 #ifdef HAVE_MASK
     48     __local int localMemNonZero[WGS2_ALIGNED];
     49 #endif
     50 
     51     dstT accSum = (dstT)(0);
     52     sqdstT accSqSum = (sqdstT)(0);
     53 #ifdef HAVE_MASK
     54     int accNonZero = 0;
     55     mask += mask_offset;
     56 #endif
     57     srcptr += src_offset;
     58 
     59     for (int grain = groups * WGS; id < total; id += grain)
     60     {
     61 #ifdef HAVE_MASK
     62 #ifdef HAVE_MASK_CONT
     63         int mask_index = id;
     64 #else
     65         int mask_index = mad24(id / cols, mask_step, id % cols);
     66 #endif
     67         if (mask[mask_index])
     68 #endif
     69         {
     70 #ifdef HAVE_SRC_CONT
     71             int src_index = id * srcTSIZE;
     72 #else
     73             int src_index = mad24(id / cols, src_step, mul24(id % cols, srcTSIZE));
     74 #endif
     75 
     76             srcT value = loadpix(srcptr + src_index);
     77             accSum += convertToDT(value);
     78             sqdstT dvalue = convertToSDT(value);
     79             accSqSum = fma(dvalue, dvalue, accSqSum);
     80 
     81 #ifdef HAVE_MASK
     82             ++accNonZero;
     83 #endif
     84         }
     85     }
     86 
     87     if (lid < WGS2_ALIGNED)
     88     {
     89         localMemSum[lid] = accSum;
     90         localMemSqSum[lid] = accSqSum;
     91 #ifdef HAVE_MASK
     92         localMemNonZero[lid] = accNonZero;
     93 #endif
     94     }
     95     barrier(CLK_LOCAL_MEM_FENCE);
     96 
     97     if (lid >= WGS2_ALIGNED && total >= WGS2_ALIGNED)
     98     {
     99         localMemSum[lid - WGS2_ALIGNED] += accSum;
    100         localMemSqSum[lid - WGS2_ALIGNED] += accSqSum;
    101 #ifdef HAVE_MASK
    102         localMemNonZero[lid - WGS2_ALIGNED] += accNonZero;
    103 #endif
    104     }
    105     barrier(CLK_LOCAL_MEM_FENCE);
    106 
    107     for (int lsize = WGS2_ALIGNED >> 1; lsize > 0; lsize >>= 1)
    108     {
    109         if (lid < lsize)
    110         {
    111             int lid2 = lsize + lid;
    112             localMemSum[lid] += localMemSum[lid2];
    113             localMemSqSum[lid] += localMemSqSum[lid2];
    114 #ifdef HAVE_MASK
    115             localMemNonZero[lid] += localMemNonZero[lid2];
    116 #endif
    117         }
    118         barrier(CLK_LOCAL_MEM_FENCE);
    119     }
    120 
    121     if (lid == 0)
    122     {
    123         storepix(localMemSum[0], dstptr + dstTSIZE * gid);
    124         storesqpix(localMemSqSum[0], dstptr + mad24(dstTSIZE, groups, sqdstTSIZE * gid));
    125 #ifdef HAVE_MASK
    126         *(__global int *)(dstptr + mad24(dstTSIZE + sqdstTSIZE, groups, (int)sizeof(int) * gid)) = localMemNonZero[0];
    127 #endif
    128     }
    129 }
    130