1 // This file is part of OpenCV project. 2 // It is subject to the license terms in the LICENSE file found in the top-level directory 3 // of this distribution and at http://opencv.org/license.html. 4 5 // Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved. 6 // Third party copyrights are property of their respective owners. 7 8 #ifdef DOUBLE_SUPPORT 9 #ifdef cl_amd_fp64 10 #pragma OPENCL EXTENSION cl_amd_fp64:enable 11 #elif defined (cl_khr_fp64) 12 #pragma OPENCL EXTENSION cl_khr_fp64:enable 13 #endif 14 #endif 15 16 #define SRC_TSIZE cn * (int)sizeof(srcT1) 17 #define DST_TSIZE cn * (int)sizeof(dstT1) 18 19 #define noconvert 20 21 __kernel void accumulate(__global const uchar * srcptr, int src_step, int src_offset, 22 #ifdef ACCUMULATE_PRODUCT 23 __global const uchar * src2ptr, int src2_step, int src2_offset, 24 #endif 25 __global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols 26 #ifdef ACCUMULATE_WEIGHTED 27 , dstT1 alpha 28 #endif 29 #ifdef HAVE_MASK 30 , __global const uchar * mask, int mask_step, int mask_offset 31 #endif 32 ) 33 { 34 int x = get_global_id(0); 35 int y = get_global_id(1) * rowsPerWI; 36 37 if (x < dst_cols) 38 { 39 int src_index = mad24(y, src_step, mad24(x, SRC_TSIZE, src_offset)); 40 #ifdef HAVE_MASK 41 int mask_index = mad24(y, mask_step, mask_offset + x); 42 mask += mask_index; 43 #endif 44 #ifdef ACCUMULATE_PRODUCT 45 int src2_index = mad24(y, src2_step, mad24(x, SRC_TSIZE, src2_offset)); 46 #endif 47 int dst_index = mad24(y, dst_step, mad24(x, DST_TSIZE, dst_offset)); 48 49 #pragma unroll 50 for (int i = 0; i < rowsPerWI; ++i) 51 if (y < dst_rows) 52 { 53 __global const srcT1 * src = (__global const srcT1 *)(srcptr + src_index); 54 #ifdef ACCUMULATE_PRODUCT 55 __global const srcT1 * src2 = (__global const srcT1 *)(src2ptr + src2_index); 56 #endif 57 __global dstT1 * dst = (__global dstT1 *)(dstptr + dst_index); 58 59 #ifdef HAVE_MASK 60 if (mask[0]) 61 #endif 62 #pragma unroll 63 for (int c = 0; c < cn; ++c) 64 { 65 #ifdef ACCUMULATE 66 dst[c] += convertToDT(src[c]); 67 #elif defined ACCUMULATE_SQUARE 68 dstT1 val = convertToDT(src[c]); 69 dst[c] = fma(val, val, dst[c]); 70 #elif defined ACCUMULATE_PRODUCT 71 dst[c] = fma(convertToDT(src[c]), convertToDT(src2[c]), dst[c]); 72 #elif defined ACCUMULATE_WEIGHTED 73 dst[c] = fma(1 - alpha, dst[c], src[c] * alpha); 74 #else 75 #error "Unknown accumulation type" 76 #endif 77 } 78 79 src_index += src_step; 80 #ifdef ACCUMULATE_PRODUCT 81 src2_index += src2_step; 82 #endif 83 #ifdef HAVE_MASK 84 mask += mask_step; 85 #endif 86 dst_index += dst_step; 87 ++y; 88 } 89 } 90 } 91