Home | History | Annotate | Download | only in opencl
      1 //                           License Agreement
      2 //                For Open Source Computer Vision Library
      3 //
      4 // Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
      5 // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
      6 // Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
      7 // Third party copyrights are property of their respective owners.
      8 //
      9 // @Authors
     10 //    Niko Li, newlife20080214 (a] gmail.com
     11 //    Jia Haipeng, jiahaipeng95 (a] gmail.com
     12 //    Xu Pang, pangxu010 (a] 163.com
     13 //    Wenju He, wenju (a] multicorewareinc.com
     14 // Redistribution and use in source and binary forms, with or without modification,
     15 // are permitted provided that the following conditions are met:
     16 //
     17 //   * Redistribution's of source code must retain the above copyright notice,
     18 //     this list of conditions and the following disclaimer.
     19 //
     20 //   * Redistribution's in binary form must reproduce the above copyright notice,
     21 //     this list of conditions and the following disclaimer in the documentation
     22 //     and/or other materials provided with the distribution.
     23 //
     24 //   * The name of the copyright holders may not be used to endorse or promote products
     25 //     derived from this software without specific prior written permission.
     26 //
     27 // This software is provided by the copyright holders and contributors as is and
     28 // any express or implied warranties, including, but not limited to, the implied
     29 // warranties of merchantability and fitness for a particular purpose are disclaimed.
     30 // In no event shall the Intel Corporation or contributors be liable for any direct,
     31 // indirect, incidental, special, exemplary, or consequential damages
     32 // (including, but not limited to, procurement of substitute goods or services;
     33 // loss of use, data, or profits; or business interruption) however caused
     34 // and on any theory of liability, whether in contract, strict liability,
     35 // or tort (including negligence or otherwise) arising in any way out of
     36 // the use of this software, even if advised of the possibility of such damage.
     37 //
     38 //
     39 
     40 #ifndef kercn
     41 #define kercn 1
     42 #endif
     43 
     44 #ifndef T
     45 #define T uchar
     46 #endif
     47 
     48 #define noconvert
     49 
     50 __kernel void calculate_histogram(__global const uchar * src_ptr, int src_step, int src_offset, int src_rows, int src_cols,
     51                                   __global uchar * histptr, int total)
     52 {
     53     int lid = get_local_id(0);
     54     int id = get_global_id(0) * kercn;
     55     int gid = get_group_id(0);
     56 
     57     __local int localhist[BINS];
     58 
     59     #pragma unroll
     60     for (int i = lid; i < BINS; i += WGS)
     61         localhist[i] = 0;
     62     barrier(CLK_LOCAL_MEM_FENCE);
     63 
     64     __global const uchar * src = src_ptr + src_offset;
     65     int src_index;
     66 
     67     for (int grain = HISTS_COUNT * WGS * kercn; id < total; id += grain)
     68     {
     69 #ifdef HAVE_SRC_CONT
     70         src_index = id;
     71 #else
     72         src_index = mad24(id / src_cols, src_step, id % src_cols);
     73 #endif
     74 
     75 #if kercn == 1
     76         atomic_inc(localhist + convert_int(src[src_index]));
     77 #elif kercn == 4
     78         int value = *(__global const int *)(src + src_index);
     79         atomic_inc(localhist + (value & 0xff));
     80         atomic_inc(localhist + ((value >> 8) & 0xff));
     81         atomic_inc(localhist + ((value >> 16) & 0xff));
     82         atomic_inc(localhist + ((value >> 24) & 0xff));
     83 #elif kercn >= 2
     84         T value = *(__global const T *)(src + src_index);
     85         atomic_inc(localhist + value.s0);
     86         atomic_inc(localhist + value.s1);
     87 #if kercn >= 4
     88         atomic_inc(localhist + value.s2);
     89         atomic_inc(localhist + value.s3);
     90 #if kercn >= 8
     91         atomic_inc(localhist + value.s4);
     92         atomic_inc(localhist + value.s5);
     93         atomic_inc(localhist + value.s6);
     94         atomic_inc(localhist + value.s7);
     95 #if kercn == 16
     96         atomic_inc(localhist + value.s8);
     97         atomic_inc(localhist + value.s9);
     98         atomic_inc(localhist + value.sA);
     99         atomic_inc(localhist + value.sB);
    100         atomic_inc(localhist + value.sC);
    101         atomic_inc(localhist + value.sD);
    102         atomic_inc(localhist + value.sE);
    103         atomic_inc(localhist + value.sF);
    104 #endif
    105 #endif
    106 #endif
    107 #endif
    108     }
    109     barrier(CLK_LOCAL_MEM_FENCE);
    110 
    111     __global int * hist = (__global int *)(histptr + gid * BINS * (int)sizeof(int));
    112     #pragma unroll
    113     for (int i = lid; i < BINS; i += WGS)
    114         hist[i] = localhist[i];
    115 }
    116 
    117 #ifndef HT
    118 #define HT int
    119 #endif
    120 
    121 #ifndef convertToHT
    122 #define convertToHT noconvert
    123 #endif
    124 
    125 __kernel void merge_histogram(__global const int * ghist, __global uchar * histptr, int hist_step, int hist_offset)
    126 {
    127     int lid = get_local_id(0);
    128 
    129     __global HT * hist = (__global HT *)(histptr + hist_offset);
    130 #if WGS >= BINS
    131     HT res = (HT)(0);
    132 #else
    133     #pragma unroll
    134     for (int i = lid; i < BINS; i += WGS)
    135         hist[i] = (HT)(0);
    136 #endif
    137 
    138     #pragma unroll
    139     for (int i = 0; i < HISTS_COUNT; ++i)
    140     {
    141         #pragma unroll
    142         for (int j = lid; j < BINS; j += WGS)
    143 #if WGS >= BINS
    144             res += convertToHT(ghist[j]);
    145 #else
    146             hist[j] += convertToHT(ghist[j]);
    147 #endif
    148         ghist += BINS;
    149     }
    150 
    151 #if WGS >= BINS
    152     if (lid < BINS)
    153         *(__global HT *)(histptr + mad24(lid, hist_step, hist_offset)) = res;
    154 #endif
    155 }
    156 
    157 __kernel void calcLUT(__global uchar * dst, __global const int * ghist, int total)
    158 {
    159     int lid = get_local_id(0);
    160     __local int sumhist[BINS];
    161     __local float scale;
    162 
    163 #if WGS >= BINS
    164     int res = 0;
    165 #else
    166     #pragma unroll
    167     for (int i = lid; i < BINS; i += WGS)
    168         sumhist[i] = 0;
    169 #endif
    170 
    171     #pragma unroll
    172     for (int i = 0; i < HISTS_COUNT; ++i)
    173     {
    174         #pragma unroll
    175         for (int j = lid; j < BINS; j += WGS)
    176 #if WGS >= BINS
    177             res += ghist[j];
    178 #else
    179             sumhist[j] += ghist[j];
    180 #endif
    181         ghist += BINS;
    182     }
    183 
    184 #if WGS >= BINS
    185     if (lid < BINS)
    186         sumhist[lid] = res;
    187 #endif
    188     barrier(CLK_LOCAL_MEM_FENCE);
    189 
    190     if (lid == 0)
    191     {
    192         int sum = 0, i = 0;
    193         while (!sumhist[i])
    194             ++i;
    195 
    196         if (total == sumhist[i])
    197         {
    198             scale = 1;
    199             for (int j = 0; j < BINS; ++j)
    200                 sumhist[i] = i;
    201         }
    202         else
    203         {
    204             scale = 255.f / (total - sumhist[i]);
    205 
    206             for (sumhist[i++] = 0; i < BINS; i++)
    207             {
    208                 sum += sumhist[i];
    209                 sumhist[i] = sum;
    210             }
    211         }
    212     }
    213     barrier(CLK_LOCAL_MEM_FENCE);
    214 
    215     #pragma unroll
    216     for (int i = lid; i < BINS; i += WGS)
    217         dst[i]= convert_uchar_sat_rte(convert_float(sumhist[i]) * scale);
    218 }
    219