Home | History | Annotate | Download | only in opencl
      1 // This file is part of OpenCV project.
      2 // It is subject to the license terms in the LICENSE file found in the top-level directory
      3 // of this distribution and at http://opencv.org/license.html.
      4 
      5 // Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved.
      6 // Third party copyrights are property of their respective owners.
      7 
      8 #ifdef DOUBLE_SUPPORT
      9 #ifdef cl_amd_fp64
     10 #pragma OPENCL EXTENSION cl_amd_fp64:enable
     11 #elif defined (cl_khr_fp64)
     12 #pragma OPENCL EXTENSION cl_khr_fp64:enable
     13 #endif
     14 #endif
     15 
     16 #define SRC_TSIZE cn * (int)sizeof(srcT1)
     17 #define DST_TSIZE cn * (int)sizeof(dstT1)
     18 
     19 #define noconvert
     20 
     21 __kernel void accumulate(__global const uchar * srcptr, int src_step, int src_offset,
     22 #ifdef ACCUMULATE_PRODUCT
     23                          __global const uchar * src2ptr, int src2_step, int src2_offset,
     24 #endif
     25                          __global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols
     26 #ifdef ACCUMULATE_WEIGHTED
     27                          , dstT1 alpha
     28 #endif
     29 #ifdef HAVE_MASK
     30                          , __global const uchar * mask, int mask_step, int mask_offset
     31 #endif
     32                          )
     33 {
     34     int x = get_global_id(0);
     35     int y = get_global_id(1) * rowsPerWI;
     36 
     37     if (x < dst_cols)
     38     {
     39         int src_index = mad24(y, src_step, mad24(x, SRC_TSIZE, src_offset));
     40 #ifdef HAVE_MASK
     41         int mask_index = mad24(y, mask_step, mask_offset + x);
     42         mask += mask_index;
     43 #endif
     44 #ifdef ACCUMULATE_PRODUCT
     45         int src2_index = mad24(y, src2_step, mad24(x, SRC_TSIZE, src2_offset));
     46 #endif
     47         int dst_index = mad24(y, dst_step, mad24(x, DST_TSIZE, dst_offset));
     48 
     49         #pragma unroll
     50         for (int i = 0; i < rowsPerWI; ++i)
     51             if (y < dst_rows)
     52             {
     53                 __global const srcT1 * src = (__global const srcT1 *)(srcptr + src_index);
     54 #ifdef ACCUMULATE_PRODUCT
     55                 __global const srcT1 * src2 = (__global const srcT1 *)(src2ptr + src2_index);
     56 #endif
     57                 __global dstT1 * dst = (__global dstT1 *)(dstptr + dst_index);
     58 
     59 #ifdef HAVE_MASK
     60                 if (mask[0])
     61 #endif
     62                     #pragma unroll
     63                     for (int c = 0; c < cn; ++c)
     64                     {
     65 #ifdef ACCUMULATE
     66                         dst[c] += convertToDT(src[c]);
     67 #elif defined ACCUMULATE_SQUARE
     68                         dstT1 val = convertToDT(src[c]);
     69                         dst[c] = fma(val, val, dst[c]);
     70 #elif defined ACCUMULATE_PRODUCT
     71                         dst[c] = fma(convertToDT(src[c]), convertToDT(src2[c]), dst[c]);
     72 #elif defined ACCUMULATE_WEIGHTED
     73                         dst[c] = fma(1 - alpha, dst[c], src[c] * alpha);
     74 #else
     75 #error "Unknown accumulation type"
     76 #endif
     77                     }
     78 
     79                 src_index += src_step;
     80 #ifdef ACCUMULATE_PRODUCT
     81                 src2_index += src2_step;
     82 #endif
     83 #ifdef HAVE_MASK
     84                 mask += mask_step;
     85 #endif
     86                 dst_index += dst_step;
     87                 ++y;
     88             }
     89     }
     90 }
     91