Lines Matching full:smem
492 "inline float3 sobel(int idx, __local const floatN *smem)\n"
495 "floatN dx = fma(2, smem[idx + GRP_SIZEX + 6] - smem[idx + GRP_SIZEX + 4],\n"
496 "smem[idx + 2] - smem[idx] + smem[idx + 2 * GRP_SIZEX + 10] - smem[idx + 2 * GRP_SIZEX + 8]);\n"
497 "floatN dy = fma(2, smem[idx + 1] - smem[idx + 2 * GRP_SIZEX + 9],\n"
498 "smem[idx + 2] - smem[idx + 2 * GRP_SIZEX + 10] + smem[idx] - smem[idx + 2 * GRP_SIZEX + 8]);\n"
529 "__local floatN smem[(GRP_SIZEX + 4) * (GRP_SIZEY + 4)];\n"
539 "smem[j] = loadpix(src + mad24(y, src_step, mad24(x, cn * (int)sizeof(TYPE), src_offset)));\n"
548 "mag[i] = (sobel(i, smem)).z;\n"
549 "mag[i + grp_sizey * (GRP_SIZEX + 2)] = (sobel(i + grp_sizey * (GRP_SIZEX + 4), smem)).z;\n"
554 "mag[i * (GRP_SIZEX + 2)] = (sobel(i * (GRP_SIZEX + 4), smem)).z;\n"
555 "mag[i * (GRP_SIZEX + 2) + grp_sizex] = (sobel(i * (GRP_SIZEX + 4) + grp_sizex, smem)).z;\n"
559 "float3 res = sobel(idx, smem);\n"
787 "inline int calc_lut(__local int* smem, int val, int tid)\n"
789 "smem[tid] = val;\n"
793 "smem[i] += smem[i - 1];\n"
795 "return smem[tid];\n"
798 "inline void reduce(volatile __local int* smem, int val, int tid)\n"
800 "smem[tid] = val;\n"
803 "smem[tid] = val += smem[tid + 128];\n"
806 "smem[tid] = val += smem[tid + 64];\n"
809 "smem[tid] += smem[tid + 32];\n"
812 "smem[tid] += smem[tid + 16];\n"
815 "smem[tid] += smem[tid + 8];\n"
818 "smem[tid] += smem[tid + 4];\n"
821 "smem[tid] += smem[tid + 2];\n"
824 "smem[256] = smem[tid] + smem[tid + 1];\n"
828 "inline void reduce(__local volatile int* smem, int val, int tid)\n"
830 "smem[tid] = val;\n"
833 "smem[tid] = val += smem[tid + 128];\n"
836 "smem[tid] = val += smem[tid + 64];\n"
840 "smem[tid] += smem[tid + 32];\n"
846 "smem[tid] += smem[tid + 16];\n"
853 "smem[tid] += smem[tid + 8];\n"
854 "smem[tid] += smem[tid + 4];\n"
855 "smem[tid] += smem[tid + 2];\n"
856 "smem[tid] += smem[tid + 1];\n"
866 "__local int smem[512];\n"
871 "smem[tid] = 0;\n"
879 "atomic_inc(&smem[data]);\n"
883 "int tHistVal = smem[tid];\n"
893 "reduce(smem, clipped, tid);\n"
896 "clipped = smem[256];\n"
898 "clipped = smem[0];\n"
910 "const int lutVal = calc_lut(smem, tHistVal, tid);\n"
6338 "smem[0][col_lcl] = sum0; \\\n"
6341 "smem[1][col_lcl] = sum1;\n"
6355 "vstore4(sum40, col_lcl, (__local float*) &smem[0][2]); \\\n"
6358 smem[1][2]);\n"
6366 "__local FT smem[2][LOCAL_SIZE + 4];\n"
6451 "FT sum = dot(vload4(0, (__local float*) (&smem) + tid2 + (yin - y) * (LOCAL_SIZE + 4)), (float4)(co3, co2, co1, co2));\n"
6453 "FT sum = dot(vload4(0, (__local double*) (&smem) + tid2 + (yin - y) * (LOCAL_SIZE + 4)), (double4)(co3, co2, co1, co2));\n"
6456 "FT sum = co3 * smem[yin - y][2 + tid2 - 2];\n"
6457 "sum = MAD(co2, smem[yin - y][2 + tid2 - 1], sum);\n"
6458 "sum = MAD(co1, smem[yin - y][2 + tid2 ], sum);\n"
6459 "sum = MAD(co2, smem[yin - y][2 + tid2 + 1], sum);\n"
6461 "sum = MAD(co3, smem[yin - y][2 + tid2 + 2], sum);\n"
6473 "FT sum = co3* smem[yin - y][2 + tid4 + 2];\n"
6474 "sum = MAD(co3, smem[yin - y][2 + tid4 - 2], sum);\n"
6475 "sum = MAD(co2, smem[yin - y][2 + tid4 - 1], sum);\n"
6476 "sum = MAD(co1, smem[yin - y][2 + tid4 ], sum);\n"
6477 "sum = MAD(co2, smem[yin - y][2 + tid4 + 1], sum);\n"
6480 "sum = co3* smem[yin - y][2 + tid4 + 4];\n"
6481 "sum = MAD(co3, smem[yin - y][2 + tid4 ], sum);\n"
6482 "sum = MAD(co2, smem[yin - y][2 + tid4 + 1], sum);\n"
6483 "sum = MAD(co1, smem[yin - y][2 + tid4 + 2], sum);\n"
6484 "sum = MAD(co2, smem[yin - y][2 + tid4 + 3], sum);\n"
6493 "FT sum = co3* smem[yin - y][2 + tid4 + 2];\n"
6494 "sum = MAD(co3, smem[yin - y][2 + tid4 - 2], sum);\n"
6495 "sum = MAD(co2, smem[yin - y][2 + tid4 - 1], sum);\n"
6496 "sum = MAD(co1, smem[yin - y][2 + tid4 ], sum);\n"
6497 "sum = MAD(co2, smem[yin - y][2 + tid4 + 1], sum);\n"