Lines Matching full:smem
72 __global float* block_hists, __local float* smem)
87 __local float* hists = smem + lp * cnbins * (CELLS_PER_BLOCK_X *
172 __local float* smem = squares + boffset;
173 float sum = smem[hid];
175 smem[hid] = sum = sum + smem[hid + 18];
178 smem[hid] = sum = sum + smem[hid + 9];
181 smem[hid] = sum + smem[hid + 4];
183 sum = smem[0] + smem[1] + smem[2] + smem[3] + smem[8];
192 sum = smem[hid];
194 smem[hid] = sum = sum + smem[hid + 18];
197 smem[hid] = sum = sum + smem[hid + 9];
200 smem[hid] = sum + smem[hid + 4];
202 sum = smem[0] + smem[1] + smem[2] + smem[3] + smem[8];
210 inline float reduce_smem(volatile __local float* smem, int size)
213 float sum = smem[tid];
215 if (size >= 512) { if (tid < 256) smem[tid] = sum = sum + smem[tid + 256];
217 if (size >= 256) { if (tid < 128) smem[tid] = sum = sum + smem[tid + 128];
219 if (size >= 128) { if (tid < 64) smem[tid] = sum = sum + smem[tid + 64];
222 if (size >= 64) { if (tid < 32) smem[tid] = sum = sum + smem[tid + 32];
224 if (size >= 32) { if (tid < 16) smem[tid] = sum = sum + smem[tid + 16];
226 if (size >= 16) { if (tid < 8) smem[tid] = sum = sum + smem[tid + 8];
228 if (size >= 8) { if (tid < 4) smem[tid] = sum = sum + smem[tid + 4];
230 if (size >= 4) { if (tid < 2) smem[tid] = sum = sum + smem[tid + 2];
232 if (size >= 2) { if (tid < 1) smem[tid] = sum = sum + smem[tid + 1];
237 if (size >= 64) smem[tid] = sum = sum + smem[tid + 32];
242 if (size >= 32) smem[tid] = sum = sum + smem[tid + 16];
243 if (size >= 16) smem[tid] = sum = sum + smem[tid + 8];
244 if (size >= 8) smem[tid] = sum = sum + smem[tid + 4];
245 if (size >= 4) smem[tid] = sum = sum + smem[tid + 2];
246 if (size >= 2) smem[tid] = sum = sum + smem[tid + 1];
325 volatile __local float* smem = products;
327 if (tid < 13) smem[tid] = product = product + smem[tid + 32];
329 if (tid < 16) smem[tid] = product = product + smem[tid + 16];
331 if(tid<8) smem[tid] = product = product + smem[tid + 8];
333 if(tid<4) smem[tid] = product = product + smem[tid + 4];
335 if(tid<2) smem[tid] = product = product + smem[tid + 2];
340 smem[tid] = product = product + smem[tid + 32];
347 smem[tid] = product = product + smem[tid + 16];
348 smem[tid] = product = product + smem[tid + 8];
349 smem[tid] = product = product + smem[tid + 4];
350 smem[tid] = product = product + smem[tid + 2];
355 product = product + smem[tid + 1];
398 volatile __local float* smem = products;
400 if(tid<32) smem[tid] = product = product + smem[tid + 32];
402 if(tid<16) smem[tid] = product = product + smem[tid + 16];
404 if(tid<8) smem[tid] = product = product + smem[tid + 8];
406 if(tid<4) smem[tid] = product = product + smem[tid + 4];
408 if(tid<2) smem[tid] = product = product + smem[tid + 2];
413 smem[tid] = product = product + smem[tid + 32];
418 smem[tid] = product = product + smem[tid + 16];
419 smem[tid] = product = product + smem[tid + 8];
420 smem[tid] = product = product + smem[tid + 4];
421 smem[tid] = product = product + smem[tid + 2];
425 product = product + smem[tid + 1];
468 volatile __local float* smem = products;
470 if(tid<32) smem[tid] = product = product + smem[tid + 32];
472 if(tid<16) smem[tid] = product = product + smem[tid + 16];
474 if(tid<8) smem[tid] = product = product + smem[tid + 8];
476 if(tid<4) smem[tid] = product = product + smem[tid + 4];
478 if(tid<2) smem[tid] = product = product + smem[tid + 2];
483 smem[tid] = product = product + smem[tid + 32];
488 smem[tid] = product = product + smem[tid + 16];
489 smem[tid] = product = product + smem[tid + 8];
490 smem[tid] = product = product + smem[tid + 4];
491 smem[tid] = product = product + smem[tid + 2];
495 smem[tid] = product = product + smem[tid + 1];