Home | History | Annotate | Download | only in opencl
      1 //                           License Agreement
      2 //                For Open Source Computer Vision Library
      3 //
      4 // Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
      5 // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
      6 // Third party copyrights are property of their respective owners.
      7 //
      8 // @Authors
      9 //    Rock Li, Rock.li (a] amd.com
     10 //
     11 // Redistribution and use in source and binary forms, with or without modification,
     12 // are permitted provided that the following conditions are met:
     13 //
     14 //   * Redistribution's of source code must retain the above copyright notice,
     15 //     this list of conditions and the following disclaimer.
     16 //
     17 //   * Redistribution's in binary form must reproduce the above copyright notice,
     18 //     this list of conditions and the following disclaimer in the documentation
     19 //     and/or other materials provided with the distribution.
     20 //
     21 //   * The name of the copyright holders may not be used to endorse or promote products
     22 //     derived from this software without specific prior written permission.
     23 //
     24 // This software is provided by the copyright holders and contributors as is and
     25 // any express or implied warranties, including, but not limited to, the implied
     26 // warranties of merchantability and fitness for a particular purpose are disclaimed.
     27 // In no event shall the Intel Corporation or contributors be liable for any direct,
     28 // indirect, incidental, special, exemplary, or consequential damages
     29 // (including, but not limited to, procurement of substitute goods or services;
     30 // loss of use, data, or profits; or business interruption) however caused
     31 // and on any theory of liability, whether in contract, strict liability,
     32 // or tort (including negligence or otherwise) arising in any way out of
     33 // the use of this software, even if advised of the possibility of such damage.
     34 
     35 #if cn != 3
     36 #define loadpix(addr) *(__global const uchar_t *)(addr)
     37 #define storepix(val, addr)  *(__global uchar_t *)(addr) = val
     38 #define TSIZE cn
     39 #else
     40 #define loadpix(addr) vload3(0, (__global const uchar *)(addr))
     41 #define storepix(val, addr) vstore3(val, 0, (__global uchar *)(addr))
     42 #define TSIZE 3
     43 #endif
     44 
     45 #if cn == 1
     46 #define SUM(a) a
     47 #elif cn == 2
     48 #define SUM(a) a.x + a.y
     49 #elif cn == 3
     50 #define SUM(a) a.x + a.y + a.z
     51 #elif cn == 4
     52 #define SUM(a) a.x + a.y + a.z + a.w
     53 #else
     54 #error "cn should be <= 4"
     55 #endif
     56 
     57 //Read pixels as integers
     58 // Intel Device - Read Pixels as floats
     59 __kernel void bilateral(__global const uchar * src, int src_step, int src_offset,
     60                         __global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols,
     61                         __constant float * space_weight, __constant int * space_ofs)
     62 {
     63     int x = get_global_id(0);
     64     int y = get_global_id(1);
     65 
     66     if (y < dst_rows && x < dst_cols)
     67     {
     68         int src_index = mad24(y + radius, src_step, mad24(x + radius, TSIZE, src_offset));
     69         int dst_index = mad24(y, dst_step, mad24(x, TSIZE, dst_offset));
     70 
     71         float_t sum = (float_t)(0.0f);
     72         float wsum = 0.0f;
     73         #ifdef INTEL_DEVICE
     74         float_t val0 = convert_float_t(loadpix(src + src_index));
     75         #else
     76         int_t val0 = convert_int_t(loadpix(src + src_index));
     77         #endif
     78         #pragma unroll
     79         for (int k = 0; k < maxk; k++ )
     80         {
     81             #ifdef INTEL_DEVICE
     82             float_t val = convert_float_t(loadpix(src + src_index + space_ofs[k]));
     83             float diff = SUM(fabs(val - val0));
     84             #else
     85             int_t val = convert_int_t(loadpix(src + src_index + space_ofs[k]));
     86             int diff = SUM(abs(val - val0));
     87             #endif
     88             float w = space_weight[k] * native_exp((float)(diff * diff * gauss_color_coeff));
     89             sum += convert_float_t(val) * (float_t)(w);
     90             wsum += w;
     91         }
     92         storepix(convert_uchar_t(sum / (float_t)(wsum)), dst + dst_index);
     93     }
     94 }
     95 
     96 #ifdef INTEL_DEVICE
     97 #if cn == 1
     98 //for single channgel x4 sized images.
     99 __kernel void bilateral_float4(__global const uchar * src, int src_step, int src_offset,
    100                                __global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols,
    101                                __constant float * space_weight, __constant int * space_ofs)
    102 {
    103     int x = get_global_id(0);
    104     int y = get_global_id(1);
    105     if (y < dst_rows && x < dst_cols / 4 )
    106     {
    107         int src_index = ((y + radius) * src_step) + x * 4  + (radius + src_offset);
    108         int dst_index = (y  * dst_step) +  x * 4 + dst_offset ;
    109         float4 sum = 0.f, wsum = 0.f;
    110         float4 val0 = convert_float4(vload4(0, src + src_index));
    111         #pragma unroll
    112         for (int k = 0; k < maxk; k++ )
    113         {
    114             float4 val = convert_float4(vload4(0, src + src_index + space_ofs[k]));
    115             float4 w = space_weight[k] * native_exp((val - val0) * (val - val0) * gauss_color_coeff);
    116             sum += val * w;
    117             wsum += w;
    118         }
    119         sum = sum / wsum + .5f;
    120         vstore4(convert_uchar4_rtz(sum), 0, dst + dst_index);
    121     }
    122 }
    123 #endif
    124 #endif