Home | History | Annotate | Download | only in opencl
      1 /*M///////////////////////////////////////////////////////////////////////////////////////
      2 //
      3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
      4 //
      5 //  By downloading, copying, installing or using the software you agree to this license.
      6 //  If you do not agree to this license, do not download, install,
      7 //  copy or use the software.
      8 //
      9 //
     10 //                           License Agreement
     11 //                For Open Source Computer Vision Library
     12 //
     13 // Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
     14 // Third party copyrights are property of their respective owners.
     15 //
     16 // Redistribution and use in source and binary forms, with or without modification,
     17 // are permitted provided that the following conditions are met:
     18 //
     19 //   * Redistribution's of source code must retain the above copyright notice,
     20 //     this list of conditions and the following disclaimer.
     21 //
     22 //   * Redistribution's in binary form must reproduce the above copyright notice,
     23 //     this list of conditions and the following disclaimer in the documentation
     24 //     and/or other materials provided with the distribution.
     25 //
     26 //   * The name of the copyright holders may not be used to endorse or promote products
     27 //     derived from this software without specific prior written permission.
     28 //
     29 // This software is provided by the copyright holders and contributors as is and
     30 // any express or implied warranties, including, but not limited to, the implied
     31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
     32 // In no event shall the Intel Corporation or contributors be liable for any direct,
     33 // indirect, incidental, special, exemplary, or consequential damages
     34 // (including, but not limited to, procurement of substitute goods or services;
     35 // loss of use, data, or profits; or business interruption) however caused
     36 // and on any theory of liability, whether in contract, strict liability,
     37 // or tort (including negligence or otherwise) arising in any way out of
     38 // the use of this software, even if advised of the possibility of such damage.
     39 //
     40 //M*/
     41 
     42 #ifdef DOUBLE_SUPPORT
     43 #ifdef cl_amd_fp64
     44 #pragma OPENCL EXTENSION cl_amd_fp64:enable
     45 #elif defined (cl_khr_fp64)
     46 #pragma OPENCL EXTENSION cl_khr_fp64:enable
     47 #endif
     48 #endif
     49 
     50 #if cn != 3
     51 #define loadpix(addr) *(__global const ST *)(addr)
     52 #define storepix(val, addr)  *(__global DT *)(addr) = val
     53 #define SRCSIZE (int)sizeof(ST)
     54 #define DSTSIZE (int)sizeof(DT)
     55 #else
     56 #define loadpix(addr) vload3(0, (__global const ST1 *)(addr))
     57 #define storepix(val, addr) vstore3(val, 0, (__global DT1 *)(addr))
     58 #define SRCSIZE (int)sizeof(ST1)*cn
     59 #define DSTSIZE (int)sizeof(DT1)*cn
     60 #endif
     61 
     62 #ifdef BORDER_CONSTANT
     63 #elif defined BORDER_REPLICATE
     64 #define EXTRAPOLATE(x, y, minX, minY, maxX, maxY) \
     65     { \
     66         x = max(min(x, maxX - 1), minX); \
     67         y = max(min(y, maxY - 1), minY); \
     68     }
     69 #elif defined BORDER_WRAP
     70 #define EXTRAPOLATE(x, y, minX, minY, maxX, maxY) \
     71     { \
     72         if (x < minX) \
     73             x -= ((x - maxX + 1) / maxX) * maxX; \
     74         if (x >= maxX) \
     75             x %= maxX; \
     76         if (y < minY) \
     77             y -= ((y - maxY + 1) / maxY) * maxY; \
     78         if (y >= maxY) \
     79             y %= maxY; \
     80     }
     81 #elif defined(BORDER_REFLECT) || defined(BORDER_REFLECT_101)
     82 #define EXTRAPOLATE_(x, y, minX, minY, maxX, maxY, delta) \
     83     { \
     84         if (maxX - minX == 1) \
     85             x = minX; \
     86         else \
     87             do \
     88             { \
     89                 if (x < minX) \
     90                     x = minX - (x - minX) - 1 + delta; \
     91                 else \
     92                     x = maxX - 1 - (x - maxX) - delta; \
     93             } \
     94             while (x >= maxX || x < minX); \
     95         \
     96         if (maxY - minY == 1) \
     97             y = minY; \
     98         else \
     99             do \
    100             { \
    101                 if (y < minY) \
    102                     y = minY - (y - minY) - 1 + delta; \
    103                 else \
    104                     y = maxY - 1 - (y - maxY) - delta; \
    105             } \
    106             while (y >= maxY || y < minY); \
    107     }
    108 #ifdef BORDER_REFLECT
    109 #define EXTRAPOLATE(x, y, minX, minY, maxX, maxY) EXTRAPOLATE_(x, y, minX, minY, maxX, maxY, 0)
    110 #elif defined(BORDER_REFLECT_101)
    111 #define EXTRAPOLATE(x, y, minX, minY, maxX, maxY) EXTRAPOLATE_(x, y, minX, minY, maxX, maxY, 1)
    112 #endif
    113 #else
    114 #error No extrapolation method
    115 #endif
    116 
    117 #define noconvert
    118 
    119 #ifdef SQR
    120 #define PROCESS_ELEM(value) (value * value)
    121 #else
    122 #define PROCESS_ELEM(value) value
    123 #endif
    124 
    125 struct RectCoords
    126 {
    127     int x1, y1, x2, y2;
    128 };
    129 
    130 inline WT readSrcPixel(int2 pos, __global const uchar * srcptr, int src_step, const struct RectCoords srcCoords)
    131 {
    132 #ifdef BORDER_ISOLATED
    133     if (pos.x >= srcCoords.x1 && pos.y >= srcCoords.y1 && pos.x < srcCoords.x2 && pos.y < srcCoords.y2)
    134 #else
    135     if (pos.x >= 0 && pos.y >= 0 && pos.x < srcCoords.x2 && pos.y < srcCoords.y2)
    136 #endif
    137     {
    138         int src_index = mad24(pos.y, src_step, pos.x * SRCSIZE);
    139         WT value = convertToWT(loadpix(srcptr + src_index));
    140 
    141         return PROCESS_ELEM(value);
    142     }
    143     else
    144     {
    145 #ifdef BORDER_CONSTANT
    146         return (WT)(0);
    147 #else
    148         int selected_col = pos.x, selected_row = pos.y;
    149 
    150         EXTRAPOLATE(selected_col, selected_row,
    151 #ifdef BORDER_ISOLATED
    152             srcCoords.x1, srcCoords.y1,
    153 #else
    154             0, 0,
    155 #endif
    156             srcCoords.x2, srcCoords.y2);
    157 
    158         int src_index = mad24(selected_row, src_step, selected_col * SRCSIZE);
    159         WT value = convertToWT(loadpix(srcptr + src_index));
    160 
    161         return PROCESS_ELEM(value);
    162 #endif
    163     }
    164 }
    165 
    166 __kernel void boxFilter(__global const uchar * srcptr, int src_step, int srcOffsetX, int srcOffsetY, int srcEndX, int srcEndY,
    167                         __global uchar * dstptr, int dst_step, int dst_offset, int rows, int cols
    168 #ifdef NORMALIZE
    169                         , float alpha
    170 #endif
    171                        )
    172 {
    173     const struct RectCoords srcCoords = { srcOffsetX, srcOffsetY, srcEndX, srcEndY }; // for non-isolated border: offsetX, offsetY, wholeX, wholeY
    174 
    175     int x = get_local_id(0) + (LOCAL_SIZE_X - (KERNEL_SIZE_X - 1)) * get_group_id(0) - ANCHOR_X;
    176     int y = get_global_id(1) * BLOCK_SIZE_Y;
    177     int local_id = get_local_id(0);
    178 
    179     WT data[KERNEL_SIZE_Y];
    180     __local WT sumOfCols[LOCAL_SIZE_X];
    181     int2 srcPos = (int2)(srcCoords.x1 + x, srcCoords.y1 + y - ANCHOR_Y);
    182 
    183     #pragma unroll
    184     for (int sy = 0; sy < KERNEL_SIZE_Y; sy++, srcPos.y++)
    185         data[sy] = readSrcPixel(srcPos, srcptr, src_step, srcCoords);
    186 
    187     WT tmp_sum = (WT)(0);
    188     #pragma unroll
    189     for (int sy = 0; sy < KERNEL_SIZE_Y; sy++)
    190         tmp_sum += data[sy];
    191 
    192     sumOfCols[local_id] = tmp_sum;
    193     barrier(CLK_LOCAL_MEM_FENCE);
    194 
    195     int dst_index = mad24(y, dst_step, mad24(x, DSTSIZE, dst_offset));
    196     __global DT * dst = (__global DT *)(dstptr + dst_index);
    197 
    198     int sy_index = 0; // current index in data[] array
    199     for (int i = 0, stepY = min(rows - y, BLOCK_SIZE_Y); i < stepY; ++i)
    200     {
    201         if (local_id >= ANCHOR_X && local_id < LOCAL_SIZE_X - (KERNEL_SIZE_X - 1 - ANCHOR_X) &&
    202             x >= 0 && x < cols)
    203         {
    204             WT total_sum = (WT)(0);
    205 
    206             #pragma unroll
    207             for (int sx = 0; sx < KERNEL_SIZE_X; sx++)
    208                 total_sum += sumOfCols[local_id + sx - ANCHOR_X];
    209 
    210 #ifdef NORMALIZE
    211             DT dstval = convertToDT((WT)(alpha) * total_sum);
    212 #else
    213             DT dstval = convertToDT(total_sum);
    214 #endif
    215             storepix(dstval, dst);
    216         }
    217         barrier(CLK_LOCAL_MEM_FENCE);
    218 
    219         tmp_sum = sumOfCols[local_id];
    220         tmp_sum -= data[sy_index];
    221 
    222         data[sy_index] = readSrcPixel(srcPos, srcptr, src_step, srcCoords);
    223         srcPos.y++;
    224 
    225         tmp_sum += data[sy_index];
    226         sumOfCols[local_id] = tmp_sum;
    227 
    228         sy_index = sy_index + 1 < KERNEL_SIZE_Y ? sy_index + 1 : 0;
    229         barrier(CLK_LOCAL_MEM_FENCE);
    230 
    231         dst = (__global DT *)((__global uchar *)dst + dst_step);
    232     }
    233 }
    234