HomeSort by relevance Sort by last modified time
    Searched full:smem (Results 1 - 25 of 76) sorted by null

1 2 3 4

  /external/opencv3/modules/core/src/opencl/
fft.cl 34 void butterfly2(CT a0, CT a1, __local CT* smem, __global const CT* twiddles,
41 smem[dst_ind] = a0 + a1;
42 smem[dst_ind+block_size] = a0 - a1;
46 void butterfly4(CT a0, CT a1, CT a2, CT a3, __local CT* smem, __global const CT* twiddles,
61 smem[dst_ind] = b0 + b1;
62 smem[dst_ind + block_size] = a2 + a3;
63 smem[dst_ind + 2*block_size] = b0 - b1;
64 smem[dst_ind + 3*block_size] = a2 - a3;
68 void butterfly3(CT a0, CT a1, CT a2, __local CT* smem, __global const CT* twiddles,
80 smem[dst_ind] = a0 + b1
    [all...]
  /external/opencv3/modules/imgproc/src/opencl/
clahe.cl 50 inline int calc_lut(__local int* smem, int val, int tid)
52 smem[tid] = val;
57 smem[i] += smem[i - 1];
60 return smem[tid];
64 inline void reduce(volatile __local int* smem, int val, int tid)
66 smem[tid] = val;
70 smem[tid] = val += smem[tid + 128];
74 smem[tid] = val += smem[tid + 64]
    [all...]
pyr_down.cl 104 smem[0][col_lcl] = sum0; \
107 smem[1][col_lcl] = sum1;
123 vstore4(sum40, col_lcl, (__local float*) &smem[0][2]); \
126 vstore4(sum41, col_lcl, (__local float*) &smem[1][2]);
137 __local FT smem[2][LOCAL_SIZE + 4];
229 FT sum = dot(vload4(0, (__local float*) (&smem) + tid2 + (yin - y) * (LOCAL_SIZE + 4)), (float4)(co3, co2, co1, co2));
231 FT sum = dot(vload4(0, (__local double*) (&smem) + tid2 + (yin - y) * (LOCAL_SIZE + 4)), (double4)(co3, co2, co1, co2));
234 FT sum = co3 * smem[yin - y][2 + tid2 - 2];
235 sum = MAD(co2, smem[yin - y][2 + tid2 - 1], sum);
236 sum = MAD(co1, smem[yin - y][2 + tid2 ], sum)
    [all...]
canny.cl 80 inline float3 sobel(int idx, __local const floatN *smem)
85 floatN dx = fma(2, smem[idx + GRP_SIZEX + 6] - smem[idx + GRP_SIZEX + 4],
86 smem[idx + 2] - smem[idx] + smem[idx + 2 * GRP_SIZEX + 10] - smem[idx + 2 * GRP_SIZEX + 8]);
88 floatN dy = fma(2, smem[idx + 1] - smem[idx + 2 * GRP_SIZEX + 9],
89 smem[idx + 2] - smem[idx + 2 * GRP_SIZEX + 10] + smem[idx] - smem[idx + 2 * GRP_SIZEX + 8])
    [all...]
  /external/opencv3/modules/cudev/include/opencv2/cudev/block/detail/
reduce.hpp 83 __device__ static void loadToSmem(const PointerTuple& smem, const ValTuple& val, uint tid)
85 get<I>(smem)[tid] = get<I>(val);
87 For<I + 1, N>::loadToSmem(smem, val, tid);
91 __device__ static void loadFromSmem(const PointerTuple& smem, const ValTuple& val, uint tid)
93 get<I>(val) = get<I>(smem)[tid];
95 For<I + 1, N>::loadFromSmem(smem, val, tid);
99 __device__ static void merge(const PointerTuple& smem, const ValTuple& val, uint tid, uint delta, const OpTuple& op)
101 typename GetType<typename tuple_element<I, PointerTuple>::type>::type reg = get<I>(smem)[tid + delta];
102 get<I>(smem)[tid] = get<I>(val) = get<I>(op)(get<I>(val), reg);
104 For<I + 1, N>::merge(smem, val, tid, delta, op)
    [all...]
  /external/opencv3/modules/cudev/include/opencv2/cudev/warp/
scan.hpp 59 __device__ T warpScanInclusive(T data, volatile T* smem, uint tid)
62 (void) smem;
79 smem[pos] = 0;
82 smem[pos] = data;
84 smem[pos] += smem[pos - 1];
85 smem[pos] += smem[pos - 2];
86 smem[pos] += smem[pos - 4]
    [all...]
  /external/opencv3/modules/cudev/include/opencv2/cudev/block/
vec_distance.hpp 75 __device__ __forceinline__ void reduceWarp(result_type* smem, uint tid)
77 warpReduce(smem, mySum, tid, plus<result_type>());
80 template <int THREAD_DIM> __device__ __forceinline__ void reduceBlock(result_type* smem, uint tid)
82 blockReduce<THREAD_DIM>(smem, mySum, tid, plus<result_type>());
104 __device__ __forceinline__ void reduceWarp(result_type* smem, uint tid)
106 warpReduce(smem, mySum, tid, plus<result_type>());
109 template <int THREAD_DIM> __device__ __forceinline__ void reduceBlock(result_type* smem, uint tid)
111 blockReduce<THREAD_DIM>(smem, mySum, tid, plus<result_type>());
137 __device__ __forceinline__ void reduceWarp(result_type* smem, uint tid)
139 warpReduce(smem, mySum, tid, plus<result_type>())
    [all...]
scan.hpp 58 __device__ T blockScanInclusive(T data, volatile T* smem, uint tid)
63 T warpResult = warpScanInclusive(data, smem, tid);
71 smem[tid >> LOG_WARP_SIZE] = warpResult;
79 T val = smem[tid];
82 smem[tid] = warpScanExclusive(val, smem, tid);
88 return warpResult + smem[tid >> LOG_WARP_SIZE];
92 return warpScanInclusive(data, smem, tid);
97 __device__ __forceinline__ T blockScanExclusive(T data, volatile T* smem, uint tid)
99 return blockScanInclusive<THREADS_NUM>(data, smem, tid) - data
    [all...]
  /external/opencv3/modules/objdetect/src/opencl/
objdetect_hog.cl 72 __global float* block_hists, __local float* smem)
87 __local float* hists = smem + lp * cnbins * (CELLS_PER_BLOCK_X *
172 __local float* smem = squares + boffset;
173 float sum = smem[hid];
175 smem[hid] = sum = sum + smem[hid + 18];
178 smem[hid] = sum = sum + smem[hid + 9];
181 smem[hid] = sum + smem[hid + 4]
    [all...]
  /external/opencv3/modules/core/include/opencv2/core/cuda/detail/
reduce.hpp 74 static __device__ void loadToSmem(const PointerTuple& smem, const ValTuple& val, unsigned int tid)
76 thrust::get<I>(smem)[tid] = thrust::get<I>(val);
78 For<I + 1, N>::loadToSmem(smem, val, tid);
81 static __device__ void loadFromSmem(const PointerTuple& smem, const ValTuple& val, unsigned int tid)
83 thrust::get<I>(val) = thrust::get<I>(smem)[tid];
85 For<I + 1, N>::loadFromSmem(smem, val, tid);
89 static __device__ void merge(const PointerTuple& smem, const ValTuple& val, unsigned int tid, unsigned int delta, const OpTuple& op)
91 typename GetType<typename thrust::tuple_element<I, PointerTuple>::type>::type reg = thrust::get<I>(smem)[tid + delta];
92 thrust::get<I>(smem)[tid] = thrust::get<I>(val) = thrust::get<I>(op)(thrust::get<I>(val), reg);
94 For<I + 1, N>::merge(smem, val, tid, delta, op)
    [all...]
  /external/opencv3/modules/core/include/opencv2/core/cuda/
vec_distance.hpp 70 template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(int* smem, int tid)
72 reduce<THREAD_DIM>(smem, mySum, tid, plus<int>());
94 template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(float* smem, int tid)
96 reduce<THREAD_DIM>(smem, mySum, tid, plus<float>());
120 template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(float* smem, int tid)
122 reduce<THREAD_DIM>(smem, mySum, tid, plus<float>());
145 template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(int* smem, int tid)
147 reduce<THREAD_DIM>(smem, mySum, tid, plus<int>());
160 __device__ void calcVecDiffGlobal(const T1* vec1, const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid)
173 dist.reduceAll<THREAD_DIM>(smem, tid)
    [all...]
  /external/opencv3/modules/cudev/include/opencv2/cudev/warp/detail/
reduce.hpp 81 __device__ static void loadToSmem(const PointerTuple& smem, const ValTuple& val, uint tid)
83 get<I>(smem)[tid] = get<I>(val);
85 For<I + 1, N>::loadToSmem(smem, val, tid);
89 __device__ static void merge(const PointerTuple& smem, const ValTuple& val, uint tid, uint delta, const OpTuple& op)
91 typename GetType<typename tuple_element<I, PointerTuple>::type>::type reg = get<I>(smem)[tid + delta];
92 get<I>(smem)[tid] = get<I>(val) = get<I>(op)(get<I>(val), reg);
94 For<I + 1, N>::merge(smem, val, tid, delta, op);
132 __device__ __forceinline__ void loadToSmem(volatile T* smem, T& val, uint tid)
134 smem[tid] = val;
139 __device__ __forceinline__ void loadToSmem(const tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem,
    [all...]
  /hardware/intel/common/libmix/videoencoder/
IntelMetadataBuffer.cpp 127 static status_t WriteMemObjToBinder(Parcel& data, ShareMemMap* smem) {
129 if (smem->type >= ST_MAX)
134 data.writeInt32(smem->type);
136 if (smem->type == ST_MEMBASE) /*offset, size, heap*/
140 sp<IMemoryHeap> heap = smem->membase->getMemory(&offset, &size);
146 heap->getHeapID(), smem->membase->pointer(), *((int *)(smem->membase->pointer())));
149 else if (smem->type == ST_GFX) /*graphicbuffer*/
150 data.write(*(smem->gbuffer));
222 static void PushShareMem(ShareMemMap* &smem)
311 ShareMemMap* smem = FindShareMem(sessionflag, value, false); local
706 ShareMemMap smem; local
722 ShareMemMap* smem = new ShareMemMap; local
758 ShareMemMap smem; local
774 ShareMemMap* smem = new ShareMemMap; local
    [all...]
  /external/opencv3/modules/cudev/include/opencv2/cudev/grid/detail/
reduce_to_column.hpp 61 __device__ __forceinline__ static void call(work_elem_type smem[1][BLOCK_SIZE], work_type& myVal)
64 blockReduce<BLOCK_SIZE>(smem[0], myVal, threadIdx.x, op);
70 __device__ __forceinline__ static void call(work_elem_type smem[2][BLOCK_SIZE], work_type& myVal)
73 blockReduce<BLOCK_SIZE>(smem_tuple(smem[0], smem[1]), tie(myVal.x, myVal.y), threadIdx.x, make_tuple(op, op));
79 __device__ __forceinline__ static void call(work_elem_type smem[3][BLOCK_SIZE], work_type& myVal)
82 blockReduce<BLOCK_SIZE>(smem_tuple(smem[0], smem[1], smem[2]), tie(myVal.x, myVal.y, myVal.z), threadIdx.x, make_tuple(op, op, op));
88 __device__ __forceinline__ static void call(work_elem_type smem[4][BLOCK_SIZE], work_type& myVal
102 __shared__ work_elem_type smem[cn][BLOCK_SIZE]; local
    [all...]
pyr_down.hpp 68 __shared__ work_type smem[256 + 4]; local
86 smem[2 + threadIdx.x] = sum;
101 smem[threadIdx.x] = sum;
116 smem[4 + threadIdx.x] = sum;
130 smem[2 + threadIdx.x] = sum;
145 smem[threadIdx.x] = sum;
160 smem[4 + threadIdx.x] = sum;
172 sum = 0.0625f * smem[2 + tid2 - 2];
173 sum = sum + 0.25f * smem[2 + tid2 - 1];
174 sum = sum + 0.375f * smem[2 + tid2 ]
    [all...]
histogram.hpp 60 __shared__ ResType smem[BIN_COUNT]; local
66 smem[i] = 0;
77 atomicAdd(&smem[data % BIN_COUNT], 1);
86 const ResType histVal = smem[i];
reduce_to_row.hpp 62 __shared__ work_type smem[BLOCK_SIZE_X * BLOCK_SIZE_Y]; local
81 smem[threadIdx.x * BLOCK_SIZE_Y + threadIdx.y] = myVal;
85 volatile work_type* srow = smem + threadIdx.y * BLOCK_SIZE_X;
96 dst[x] = saturate_cast<ResType>(Reductor::result(smem[threadIdx.x * BLOCK_SIZE_X], rows));
  /external/opencv3/modules/objdetect/
opencl_kernels_objdetect.cpp 606 "__global float* block_hists, __local float* smem)\n"
618 "__local float* hists = smem + lp * cnbins * (CELLS_PER_BLOCK_X *\n"
685 "__local float* smem = squares + boffset;\n"
686 "float sum = smem[hid];\n"
688 "smem[hid] = sum = sum + smem[hid + 18];\n"
691 "smem[hid] = sum = sum + smem[hid + 9];\n"
694 "smem[hid] = sum + smem[hid + 4];\n
    [all...]
  /external/opencv3/modules/cudaimgproc/src/cuda/
canny.cu 249 __shared__ volatile int smem[18][18];
254 smem[threadIdx.y + 1][threadIdx.x + 1] = checkIdx(y, x, map.rows, map.cols) ? map(y, x) : 0;
256 smem[0][threadIdx.x + 1] = checkIdx(y - 1, x, map.rows, map.cols) ? map(y - 1, x) : 0;
258 smem[blockDim.y + 1][threadIdx.x + 1] = checkIdx(y + 1, x, map.rows, map.cols) ? map(y + 1, x) : 0;
260 smem[threadIdx.y + 1][0] = checkIdx(y, x - 1, map.rows, map.cols) ? map(y, x - 1) : 0;
262 smem[threadIdx.y + 1][blockDim.x + 1] = checkIdx(y, x + 1, map.rows, map.cols) ? map(y, x + 1) : 0;
264 smem[0][0] = checkIdx(y - 1, x - 1, map.rows, map.cols) ? map(y - 1, x - 1) : 0;
266 smem[0][blockDim.x + 1] = checkIdx(y - 1, x + 1, map.rows, map.cols) ? map(y - 1, x + 1) : 0;
268 smem[blockDim.y + 1][0] = checkIdx(y + 1, x - 1, map.rows, map.cols) ? map(y + 1, x - 1) : 0;
270 smem[blockDim.y + 1][blockDim.x + 1] = checkIdx(y + 1, x + 1, map.rows, map.cols) ? map(y + 1, x + 1) : (…)
    [all...]
clahe.cu 61 __shared__ int smem[512];
67 smem[tid] = 0;
76 Emulation::smem::atomicAdd(&smem[data], 1);
82 int tHistVal = smem[tid];
99 reduce<256>(smem, clipped, tid, plus<int>());
119 const int lutVal = blockScanInclusive<256>(tHistVal, smem, tid);
  /external/opencv3/modules/core/src/
opencl_kernels_core.cpp 636 "void butterfly2(CT a0, CT a1, __local CT* smem, __global const CT* twiddles,\n"
642 "smem[dst_ind] = a0 + a1;\n"
643 "smem[dst_ind+block_size] = a0 - a1;\n"
646 "void butterfly4(CT a0, CT a1, CT a2, CT a3, __local CT* smem, __global const CT* twiddles,\n"
658 "smem[dst_ind] = b0 + b1;\n"
659 "smem[dst_ind + block_size] = a2 + a3;\n"
660 "smem[dst_ind + 2*block_size] = b0 - b1;\n"
661 "smem[dst_ind + 3*block_size] = a2 - a3;\n"
664 "void butterfly3(CT a0, CT a1, CT a2, __local CT* smem, __global const CT* twiddles,\n"
674 "smem[dst_ind] = a0 + b1;\n
    [all...]
  /toolchain/binutils/binutils-2.25/gas/testsuite/gas/tic54x/
opcodes.s 11 add *ar0+, a ; Smem, src
12 add *ar1+, ts, a ; Smem, TS, src
13 add *ar2+, 16, a ; Smem, 16, src [,dst]
14 add *ar3+, a, b ; Smem [,SHIFT], src [,dst] (-16<=SHIFT<=15)
28 and *ar3+,a ; Smem,src
100 ld *ar0+,a ; Smem,dst
101 ld *ar1+,ts,a ; Smem,TS,dst
102 ld *ar2+,16,a ; Smem,16,dst
103 ld *ar3+,1,a ; Smem[,SHIFT],dst
  /external/opencv3/modules/cudawarping/src/cuda/
pyr_down.cu 59 __shared__ work_t smem[256 + 4];
77 smem[2 + threadIdx.x] = sum;
92 smem[threadIdx.x] = sum;
107 smem[4 + threadIdx.x] = sum;
121 smem[2 + threadIdx.x] = sum;
136 smem[threadIdx.x] = sum;
151 smem[4 + threadIdx.x] = sum;
163 sum = 0.0625f * smem[2 + tid2 - 2];
164 sum = sum + 0.25f * smem[2 + tid2 - 1];
165 sum = sum + 0.375f * smem[2 + tid2 ]
    [all...]
  /external/opencv3/modules/cudafeatures2d/src/cuda/
bf_match.cu 141 extern __shared__ int smem[];
145 typename Dist::value_type* s_query = (typename Dist::value_type*)(smem);
146 typename Dist::value_type* s_train = (typename Dist::value_type*)(smem + BLOCK_SIZE * MAX_DESC_LEN);
157 float* s_distance = (float*)(smem);
158 int* s_trainIdx = (int*)(smem + BLOCK_SIZE * BLOCK_SIZE);
190 extern __shared__ int smem[];
194 typename Dist::value_type* s_query = (typename Dist::value_type*)(smem);
195 typename Dist::value_type* s_train = (typename Dist::value_type*)(smem + BLOCK_SIZE * MAX_DESC_LEN);
214 float* s_distance = (float*)(smem);
215 int* s_trainIdx = (int*)(smem + BLOCK_SIZE * BLOCK_SIZE)
    [all...]
  /external/opencv3/modules/cudaoptflow/src/cuda/
farneback.cu 75 extern __shared__ float smem[];
76 volatile float *row = smem + tx;
140 int smem = 3 * block.x * sizeof(float);
143 polynomialExpansion<5><<<grid, block, smem, stream>>>(src.rows, src.cols, src, dst);
145 polynomialExpansion<7><<<grid, block, smem, stream>>>(src.rows, src.cols, src, dst);
310 extern __shared__ float smem[];
311 volatile float *row = smem + ty * (bdx + 2*ksizeHalf);
345 int smem = (block.x + 2*ksizeHalf) * block.y * sizeof(float);
348 boxFilter<<<grid, block, smem, stream>>>(src.rows, src.cols, src, ksizeHalf, boxAreaInv, dst);
364 extern __shared__ float smem[];
    [all...]

Completed in 531 milliseconds

1 2 3 4