Home | History | Annotate | Download | only in opencl
      1 /*M///////////////////////////////////////////////////////////////////////////////////////
      2 //
      3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
      4 //
      5 //  By downloading, copying, installing or using the software you agree to this license.
      6 //  If you do not agree to this license, do not download, install,
      7 //  copy or use the software.
      8 //
      9 //
     10 //                           License Agreement
     11 //                For Open Source Computer Vision Library
     12 //
     13 // Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
     14 // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
     15 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
     16 // Third party copyrights are property of their respective owners.
     17 //
     18 // Redistribution and use in source and binary forms, with or without modification,
     19 // are permitted provided that the following conditions are met:
     20 //
     21 //   * Redistribution's of source code must retain the above copyright notice,
     22 //     this list of conditions and the following disclaimer.
     23 //
     24 //   * Redistribution's in binary form must reproduce the above copyright notice,
     25 //     this list of conditions and the following disclaimer in the documentation
     26 //     and/or other materials provided with the distribution.
     27 //
     28 //   * The name of the copyright holders may not be used to endorse or promote products
     29 //     derived from this software without specific prior written permission.
     30 //
     31 // This software is provided by the copyright holders and contributors as is and
     32 // any express or implied warranties, including, but not limited to, the implied
     33 // warranties of merchantability and fitness for a particular purpose are disclaimed.
     34 // In no event shall the copyright holders or contributors be liable for any direct,
     35 // indirect, incidental, special, exemplary, or consequential damages
     36 // (including, but not limited to, procurement of substitute goods or services;
     37 // loss of use, data, or profits; or business interruption) however caused
     38 // and on any theory of liability, whether in contract, strict liability,
     39 // or tort (including negligence or otherwise) arising in any way out of
     40 // the use of this software, even if advised of the possibility of such damage.
     41 //
     42 //M*/
     43 
     44 #ifdef DOUBLE_SUPPORT
     45 #ifdef cl_amd_fp64
     46 #pragma OPENCL EXTENSION cl_amd_fp64:enable
     47 #elif defined (cl_khr_fp64)
     48 #pragma OPENCL EXTENSION cl_khr_fp64:enable
     49 #endif
     50 #endif
     51 
     52 #if ddepth == 0
     53 #define MIN_VAL 0
     54 #define MAX_VAL 255
     55 #elif ddepth == 1
     56 #define MIN_VAL -128
     57 #define MAX_VAL 127
     58 #elif ddepth == 2
     59 #define MIN_VAL 0
     60 #define MAX_VAL 65535
     61 #elif ddepth == 3
     62 #define MIN_VAL -32768
     63 #define MAX_VAL 32767
     64 #elif ddepth == 4
     65 #define MIN_VAL INT_MIN
     66 #define MAX_VAL INT_MAX
     67 #elif ddepth == 5
     68 #define MIN_VAL (-FLT_MAX)
     69 #define MAX_VAL FLT_MAX
     70 #elif ddepth == 6
     71 #define MIN_VAL (-DBL_MAX)
     72 #define MAX_VAL DBL_MAX
     73 #else
     74 #error "Unsupported depth"
     75 #endif
     76 
     77 #define noconvert
     78 
     79 #if defined OCL_CV_REDUCE_SUM || defined OCL_CV_REDUCE_AVG
     80 #define INIT_VALUE 0
     81 #define PROCESS_ELEM(acc, value) acc += value
     82 #elif defined OCL_CV_REDUCE_MAX
     83 #define INIT_VALUE MIN_VAL
     84 #define PROCESS_ELEM(acc, value) acc = max(value, acc)
     85 #elif defined OCL_CV_REDUCE_MIN
     86 #define INIT_VALUE MAX_VAL
     87 #define PROCESS_ELEM(acc, value) acc = min(value, acc)
     88 #else
     89 #error "No operation is specified"
     90 #endif
     91 
     92 #ifdef OP_REDUCE_PRE
     93 
     94 __kernel void reduce_horz_opt(__global const uchar * srcptr, int src_step, int src_offset, int rows, int cols,
     95                      __global uchar * dstptr, int dst_step, int dst_offset
     96 #ifdef OCL_CV_REDUCE_AVG
     97                      , float fscale
     98 #endif
     99                      )
    100 {
    101     __local bufT lsmem[TILE_HEIGHT][BUF_COLS][cn];
    102 
    103     int x = get_global_id(0);
    104     int y = get_global_id(1);
    105     int liy = get_local_id(1);
    106     if ((x < BUF_COLS) && (y < rows))
    107     {
    108         int src_index = mad24(y, src_step, mad24(x, (int)sizeof(srcT) * cn, src_offset));
    109 
    110         __global const srcT * src = (__global const srcT *)(srcptr + src_index);
    111         bufT tmp[cn];
    112         #pragma unroll
    113         for (int c = 0; c < cn; ++c)
    114             tmp[c] = INIT_VALUE;
    115 
    116         int src_step_mul = BUF_COLS * cn;
    117         for (int idx = x; idx < cols; idx += BUF_COLS, src += src_step_mul)
    118         {
    119             #pragma unroll
    120             for (int c = 0; c < cn; ++c)
    121             {
    122                 bufT value = convertToBufT(src[c]);
    123                 PROCESS_ELEM(tmp[c], value);
    124             }
    125         }
    126 
    127         #pragma unroll
    128         for (int c = 0; c < cn; ++c)
    129             lsmem[liy][x][c] = tmp[c];
    130     }
    131     barrier(CLK_LOCAL_MEM_FENCE);
    132     if ((x < BUF_COLS / 2) && (y < rows))
    133     {
    134         #pragma unroll
    135         for (int c = 0; c < cn; ++c)
    136         {
    137             PROCESS_ELEM(lsmem[liy][x][c], lsmem[liy][x +  BUF_COLS / 2][c]);
    138         }
    139     }
    140     barrier(CLK_LOCAL_MEM_FENCE);
    141     if ((x == 0) && (y < rows))
    142     {
    143         int dst_index = mad24(y, dst_step, dst_offset);
    144 
    145         __global dstT * dst = (__global dstT *)(dstptr + dst_index);
    146         bufT tmp[cn];
    147         #pragma unroll
    148         for (int c = 0; c < cn; ++c)
    149             tmp[c] = INIT_VALUE;
    150 
    151         #pragma unroll
    152         for (int xin = 0; xin < BUF_COLS / 2; xin ++)
    153         {
    154             #pragma unroll
    155             for (int c = 0; c < cn; ++c)
    156             {
    157                 PROCESS_ELEM(tmp[c], lsmem[liy][xin][c]);
    158             }
    159         }
    160 
    161         #pragma unroll
    162         for (int c = 0; c < cn; ++c)
    163 #ifdef OCL_CV_REDUCE_AVG
    164             dst[c] = convertToDT(convertToWT(tmp[c]) * fscale);
    165 #else
    166             dst[c] = convertToDT(tmp[c]);
    167 #endif
    168     }
    169 }
    170 
    171 #else
    172 
    173 __kernel void reduce(__global const uchar * srcptr, int src_step, int src_offset, int rows, int cols,
    174                      __global uchar * dstptr, int dst_step, int dst_offset
    175 #ifdef OCL_CV_REDUCE_AVG
    176                      , float fscale
    177 #endif
    178                      )
    179 {
    180 #if dim == 0 // reduce to a single row
    181     int x = get_global_id(0);
    182     if (x < cols)
    183     {
    184         int src_index = mad24(x, (int)sizeof(srcT) * cn, src_offset);
    185         int dst_index = mad24(x, (int)sizeof(dstT0) * cn, dst_offset);
    186 
    187         __global dstT0 * dst = (__global dstT0 *)(dstptr + dst_index);
    188         dstT tmp[cn];
    189         #pragma unroll
    190         for (int c = 0; c < cn; ++c)
    191             tmp[c] = INIT_VALUE;
    192 
    193         for (int y = 0; y < rows; ++y, src_index += src_step)
    194         {
    195             __global const srcT * src = (__global const srcT *)(srcptr + src_index);
    196             #pragma unroll
    197             for (int c = 0; c < cn; ++c)
    198             {
    199                 dstT value = convertToDT(src[c]);
    200                 PROCESS_ELEM(tmp[c], value);
    201             }
    202         }
    203 
    204         #pragma unroll
    205         for (int c = 0; c < cn; ++c)
    206 #ifdef OCL_CV_REDUCE_AVG
    207             dst[c] = convertToDT0(convertToWT(tmp[c]) * fscale);
    208 #else
    209             dst[c] = convertToDT0(tmp[c]);
    210 #endif
    211     }
    212 #elif dim == 1 // reduce to a single column
    213     int y = get_global_id(0);
    214     if (y < rows)
    215     {
    216         int src_index = mad24(y, src_step, src_offset);
    217         int dst_index = mad24(y, dst_step, dst_offset);
    218 
    219         __global const srcT * src = (__global const srcT *)(srcptr + src_index);
    220         __global dstT * dst = (__global dstT *)(dstptr + dst_index);
    221         dstT tmp[cn];
    222         #pragma unroll
    223         for (int c = 0; c < cn; ++c)
    224             tmp[c] = INIT_VALUE;
    225 
    226         for (int x = 0; x < cols; ++x, src += cn)
    227         {
    228             #pragma unroll
    229             for (int c = 0; c < cn; ++c)
    230             {
    231                 dstT value = convertToDT(src[c]);
    232                 PROCESS_ELEM(tmp[c], value);
    233             }
    234         }
    235 
    236         #pragma unroll
    237         for (int c = 0; c < cn; ++c)
    238 #ifdef OCL_CV_REDUCE_AVG
    239             dst[c] = convertToDT0(convertToWT(tmp[c]) * fscale);
    240 #else
    241             dst[c] = convertToDT0(tmp[c]);
    242 #endif
    243     }
    244 #else
    245 #error "Dims must be either 0 or 1"
    246 #endif
    247 }
    248 
    249 #endif
    250