1 // License Agreement 2 // For Open Source Computer Vision Library 3 // 4 // Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. 5 // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. 6 // Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. 7 // Third party copyrights are property of their respective owners. 8 // 9 // @Authors 10 // Niko Li, newlife20080214 (a] gmail.com 11 // Jia Haipeng, jiahaipeng95 (a] gmail.com 12 // Xu Pang, pangxu010 (a] 163.com 13 // Wenju He, wenju (a] multicorewareinc.com 14 // Redistribution and use in source and binary forms, with or without modification, 15 // are permitted provided that the following conditions are met: 16 // 17 // * Redistribution's of source code must retain the above copyright notice, 18 // this list of conditions and the following disclaimer. 19 // 20 // * Redistribution's in binary form must reproduce the above copyright notice, 21 // this list of conditions and the following disclaimer in the documentation 22 // and/or other materials provided with the distribution. 23 // 24 // * The name of the copyright holders may not be used to endorse or promote products 25 // derived from this software without specific prior written permission. 26 // 27 // This software is provided by the copyright holders and contributors as is and 28 // any express or implied warranties, including, but not limited to, the implied 29 // warranties of merchantability and fitness for a particular purpose are disclaimed. 30 // In no event shall the Intel Corporation or contributors be liable for any direct, 31 // indirect, incidental, special, exemplary, or consequential damages 32 // (including, but not limited to, procurement of substitute goods or services; 33 // loss of use, data, or profits; or business interruption) however caused 34 // and on any theory of liability, whether in contract, strict liability, 35 // or tort (including negligence or otherwise) arising in any way out of 36 // the use of this software, even if advised of the possibility of such damage. 37 // 38 // 39 40 #ifndef kercn 41 #define kercn 1 42 #endif 43 44 #ifndef T 45 #define T uchar 46 #endif 47 48 #define noconvert 49 50 __kernel void calculate_histogram(__global const uchar * src_ptr, int src_step, int src_offset, int src_rows, int src_cols, 51 __global uchar * histptr, int total) 52 { 53 int lid = get_local_id(0); 54 int id = get_global_id(0) * kercn; 55 int gid = get_group_id(0); 56 57 __local int localhist[BINS]; 58 59 #pragma unroll 60 for (int i = lid; i < BINS; i += WGS) 61 localhist[i] = 0; 62 barrier(CLK_LOCAL_MEM_FENCE); 63 64 __global const uchar * src = src_ptr + src_offset; 65 int src_index; 66 67 for (int grain = HISTS_COUNT * WGS * kercn; id < total; id += grain) 68 { 69 #ifdef HAVE_SRC_CONT 70 src_index = id; 71 #else 72 src_index = mad24(id / src_cols, src_step, id % src_cols); 73 #endif 74 75 #if kercn == 1 76 atomic_inc(localhist + convert_int(src[src_index])); 77 #elif kercn == 4 78 int value = *(__global const int *)(src + src_index); 79 atomic_inc(localhist + (value & 0xff)); 80 atomic_inc(localhist + ((value >> 8) & 0xff)); 81 atomic_inc(localhist + ((value >> 16) & 0xff)); 82 atomic_inc(localhist + ((value >> 24) & 0xff)); 83 #elif kercn >= 2 84 T value = *(__global const T *)(src + src_index); 85 atomic_inc(localhist + value.s0); 86 atomic_inc(localhist + value.s1); 87 #if kercn >= 4 88 atomic_inc(localhist + value.s2); 89 atomic_inc(localhist + value.s3); 90 #if kercn >= 8 91 atomic_inc(localhist + value.s4); 92 atomic_inc(localhist + value.s5); 93 atomic_inc(localhist + value.s6); 94 atomic_inc(localhist + value.s7); 95 #if kercn == 16 96 atomic_inc(localhist + value.s8); 97 atomic_inc(localhist + value.s9); 98 atomic_inc(localhist + value.sA); 99 atomic_inc(localhist + value.sB); 100 atomic_inc(localhist + value.sC); 101 atomic_inc(localhist + value.sD); 102 atomic_inc(localhist + value.sE); 103 atomic_inc(localhist + value.sF); 104 #endif 105 #endif 106 #endif 107 #endif 108 } 109 barrier(CLK_LOCAL_MEM_FENCE); 110 111 __global int * hist = (__global int *)(histptr + gid * BINS * (int)sizeof(int)); 112 #pragma unroll 113 for (int i = lid; i < BINS; i += WGS) 114 hist[i] = localhist[i]; 115 } 116 117 #ifndef HT 118 #define HT int 119 #endif 120 121 #ifndef convertToHT 122 #define convertToHT noconvert 123 #endif 124 125 __kernel void merge_histogram(__global const int * ghist, __global uchar * histptr, int hist_step, int hist_offset) 126 { 127 int lid = get_local_id(0); 128 129 __global HT * hist = (__global HT *)(histptr + hist_offset); 130 #if WGS >= BINS 131 HT res = (HT)(0); 132 #else 133 #pragma unroll 134 for (int i = lid; i < BINS; i += WGS) 135 hist[i] = (HT)(0); 136 #endif 137 138 #pragma unroll 139 for (int i = 0; i < HISTS_COUNT; ++i) 140 { 141 #pragma unroll 142 for (int j = lid; j < BINS; j += WGS) 143 #if WGS >= BINS 144 res += convertToHT(ghist[j]); 145 #else 146 hist[j] += convertToHT(ghist[j]); 147 #endif 148 ghist += BINS; 149 } 150 151 #if WGS >= BINS 152 if (lid < BINS) 153 *(__global HT *)(histptr + mad24(lid, hist_step, hist_offset)) = res; 154 #endif 155 } 156 157 __kernel void calcLUT(__global uchar * dst, __global const int * ghist, int total) 158 { 159 int lid = get_local_id(0); 160 __local int sumhist[BINS]; 161 __local float scale; 162 163 #if WGS >= BINS 164 int res = 0; 165 #else 166 #pragma unroll 167 for (int i = lid; i < BINS; i += WGS) 168 sumhist[i] = 0; 169 #endif 170 171 #pragma unroll 172 for (int i = 0; i < HISTS_COUNT; ++i) 173 { 174 #pragma unroll 175 for (int j = lid; j < BINS; j += WGS) 176 #if WGS >= BINS 177 res += ghist[j]; 178 #else 179 sumhist[j] += ghist[j]; 180 #endif 181 ghist += BINS; 182 } 183 184 #if WGS >= BINS 185 if (lid < BINS) 186 sumhist[lid] = res; 187 #endif 188 barrier(CLK_LOCAL_MEM_FENCE); 189 190 if (lid == 0) 191 { 192 int sum = 0, i = 0; 193 while (!sumhist[i]) 194 ++i; 195 196 if (total == sumhist[i]) 197 { 198 scale = 1; 199 for (int j = 0; j < BINS; ++j) 200 sumhist[i] = i; 201 } 202 else 203 { 204 scale = 255.f / (total - sumhist[i]); 205 206 for (sumhist[i++] = 0; i < BINS; i++) 207 { 208 sum += sumhist[i]; 209 sumhist[i] = sum; 210 } 211 } 212 } 213 barrier(CLK_LOCAL_MEM_FENCE); 214 215 #pragma unroll 216 for (int i = lid; i < BINS; i += WGS) 217 dst[i]= convert_uchar_sat_rte(convert_float(sumhist[i]) * scale); 218 } 219