HomeSort by relevance Sort by last modified time
    Searched refs:smem (Results 1 - 25 of 56) sorted by null

1 2 3

  /external/opencv3/modules/cudev/include/opencv2/cudev/warp/
scan.hpp 59 __device__ T warpScanInclusive(T data, volatile T* smem, uint tid)
62 (void) smem;
79 smem[pos] = 0;
82 smem[pos] = data;
84 smem[pos] += smem[pos - 1];
85 smem[pos] += smem[pos - 2];
86 smem[pos] += smem[pos - 4]
    [all...]
  /external/opencv3/modules/cudev/include/opencv2/cudev/block/
scan.hpp 58 __device__ T blockScanInclusive(T data, volatile T* smem, uint tid)
63 T warpResult = warpScanInclusive(data, smem, tid);
71 smem[tid >> LOG_WARP_SIZE] = warpResult;
79 T val = smem[tid];
82 smem[tid] = warpScanExclusive(val, smem, tid);
88 return warpResult + smem[tid >> LOG_WARP_SIZE];
92 return warpScanInclusive(data, smem, tid);
97 __device__ __forceinline__ T blockScanExclusive(T data, volatile T* smem, uint tid)
99 return blockScanInclusive<THREADS_NUM>(data, smem, tid) - data
    [all...]
vec_distance.hpp 75 __device__ __forceinline__ void reduceWarp(result_type* smem, uint tid)
77 warpReduce(smem, mySum, tid, plus<result_type>());
80 template <int THREAD_DIM> __device__ __forceinline__ void reduceBlock(result_type* smem, uint tid)
82 blockReduce<THREAD_DIM>(smem, mySum, tid, plus<result_type>());
104 __device__ __forceinline__ void reduceWarp(result_type* smem, uint tid)
106 warpReduce(smem, mySum, tid, plus<result_type>());
109 template <int THREAD_DIM> __device__ __forceinline__ void reduceBlock(result_type* smem, uint tid)
111 blockReduce<THREAD_DIM>(smem, mySum, tid, plus<result_type>());
137 __device__ __forceinline__ void reduceWarp(result_type* smem, uint tid)
139 warpReduce(smem, mySum, tid, plus<result_type>())
    [all...]
  /external/opencv3/modules/imgproc/src/opencl/
clahe.cl 50 inline int calc_lut(__local int* smem, int val, int tid)
52 smem[tid] = val;
57 smem[i] += smem[i - 1];
60 return smem[tid];
64 inline void reduce(volatile __local int* smem, int val, int tid)
66 smem[tid] = val;
70 smem[tid] = val += smem[tid + 128];
74 smem[tid] = val += smem[tid + 64]
    [all...]
pyr_down.cl 104 smem[0][col_lcl] = sum0; \
107 smem[1][col_lcl] = sum1;
123 vstore4(sum40, col_lcl, (__local float*) &smem[0][2]); \
126 vstore4(sum41, col_lcl, (__local float*) &smem[1][2]);
137 __local FT smem[2][LOCAL_SIZE + 4];
229 FT sum = dot(vload4(0, (__local float*) (&smem) + tid2 + (yin - y) * (LOCAL_SIZE + 4)), (float4)(co3, co2, co1, co2));
231 FT sum = dot(vload4(0, (__local double*) (&smem) + tid2 + (yin - y) * (LOCAL_SIZE + 4)), (double4)(co3, co2, co1, co2));
234 FT sum = co3 * smem[yin - y][2 + tid2 - 2];
235 sum = MAD(co2, smem[yin - y][2 + tid2 - 1], sum);
236 sum = MAD(co1, smem[yin - y][2 + tid2 ], sum)
    [all...]
canny.cl 80 inline float3 sobel(int idx, __local const floatN *smem)
85 floatN dx = fma(2, smem[idx + GRP_SIZEX + 6] - smem[idx + GRP_SIZEX + 4],
86 smem[idx + 2] - smem[idx] + smem[idx + 2 * GRP_SIZEX + 10] - smem[idx + 2 * GRP_SIZEX + 8]);
88 floatN dy = fma(2, smem[idx + 1] - smem[idx + 2 * GRP_SIZEX + 9],
89 smem[idx + 2] - smem[idx + 2 * GRP_SIZEX + 10] + smem[idx] - smem[idx + 2 * GRP_SIZEX + 8])
    [all...]
  /external/opencv3/modules/objdetect/src/opencl/
objdetect_hog.cl 72 __global float* block_hists, __local float* smem)
87 __local float* hists = smem + lp * cnbins * (CELLS_PER_BLOCK_X *
172 __local float* smem = squares + boffset;
173 float sum = smem[hid];
175 smem[hid] = sum = sum + smem[hid + 18];
178 smem[hid] = sum = sum + smem[hid + 9];
181 smem[hid] = sum + smem[hid + 4]
    [all...]
  /external/opencv3/modules/cudev/include/opencv2/cudev/grid/detail/
reduce_to_column.hpp 61 __device__ __forceinline__ static void call(work_elem_type smem[1][BLOCK_SIZE], work_type& myVal)
64 blockReduce<BLOCK_SIZE>(smem[0], myVal, threadIdx.x, op);
70 __device__ __forceinline__ static void call(work_elem_type smem[2][BLOCK_SIZE], work_type& myVal)
73 blockReduce<BLOCK_SIZE>(smem_tuple(smem[0], smem[1]), tie(myVal.x, myVal.y), threadIdx.x, make_tuple(op, op));
79 __device__ __forceinline__ static void call(work_elem_type smem[3][BLOCK_SIZE], work_type& myVal)
82 blockReduce<BLOCK_SIZE>(smem_tuple(smem[0], smem[1], smem[2]), tie(myVal.x, myVal.y, myVal.z), threadIdx.x, make_tuple(op, op, op));
88 __device__ __forceinline__ static void call(work_elem_type smem[4][BLOCK_SIZE], work_type& myVal
102 __shared__ work_elem_type smem[cn][BLOCK_SIZE]; local
    [all...]
histogram.hpp 60 __shared__ ResType smem[BIN_COUNT]; local
66 smem[i] = 0;
77 atomicAdd(&smem[data % BIN_COUNT], 1);
86 const ResType histVal = smem[i];
reduce_to_row.hpp 62 __shared__ work_type smem[BLOCK_SIZE_X * BLOCK_SIZE_Y]; local
81 smem[threadIdx.x * BLOCK_SIZE_Y + threadIdx.y] = myVal;
85 volatile work_type* srow = smem + threadIdx.y * BLOCK_SIZE_X;
96 dst[x] = saturate_cast<ResType>(Reductor::result(smem[threadIdx.x * BLOCK_SIZE_X], rows));
pyr_down.hpp 68 __shared__ work_type smem[256 + 4]; local
86 smem[2 + threadIdx.x] = sum;
101 smem[threadIdx.x] = sum;
116 smem[4 + threadIdx.x] = sum;
130 smem[2 + threadIdx.x] = sum;
145 smem[threadIdx.x] = sum;
160 smem[4 + threadIdx.x] = sum;
172 sum = 0.0625f * smem[2 + tid2 - 2];
173 sum = sum + 0.25f * smem[2 + tid2 - 1];
174 sum = sum + 0.375f * smem[2 + tid2 ]
    [all...]
integral.hpp 63 __shared__ D smem[NUM_SCAN_THREADS * 2]; local
84 const D curScanElem = blockScanInclusive<NUM_SCAN_THREADS>(curElem, smem, threadIdx.x);
105 __shared__ D smem[NUM_SCAN_THREADS * 2]; local
127 const D curScanElem = blockScanInclusive<NUM_SCAN_THREADS>(curElem, smem, threadIdx.x);
445 // place into SMEM
446 // shfl scan reduce the SMEM, reformating so the column
481 __shared__ T smem[32][32];
484 volatile T* smem_row = &smem[0][0] + 64 * threadIdx.y;
505 smem[threadIdx.y + 0][threadIdx.x] = 0.0f;
506 smem[threadIdx.y + 8][threadIdx.x] = 0.0f
    [all...]
  /external/opencv3/modules/core/include/opencv2/core/cuda/detail/
reduce.hpp 74 static __device__ void loadToSmem(const PointerTuple& smem, const ValTuple& val, unsigned int tid)
76 thrust::get<I>(smem)[tid] = thrust::get<I>(val);
78 For<I + 1, N>::loadToSmem(smem, val, tid);
81 static __device__ void loadFromSmem(const PointerTuple& smem, const ValTuple& val, unsigned int tid)
83 thrust::get<I>(val) = thrust::get<I>(smem)[tid];
85 For<I + 1, N>::loadFromSmem(smem, val, tid);
89 static __device__ void merge(const PointerTuple& smem, const ValTuple& val, unsigned int tid, unsigned int delta, const OpTuple& op)
91 typename GetType<typename thrust::tuple_element<I, PointerTuple>::type>::type reg = thrust::get<I>(smem)[tid + delta];
92 thrust::get<I>(smem)[tid] = thrust::get<I>(val) = thrust::get<I>(op)(thrust::get<I>(val), reg);
94 For<I + 1, N>::merge(smem, val, tid, delta, op)
    [all...]
  /external/opencv3/modules/cudev/include/opencv2/cudev/block/detail/
reduce.hpp 83 __device__ static void loadToSmem(const PointerTuple& smem, const ValTuple& val, uint tid)
85 get<I>(smem)[tid] = get<I>(val);
87 For<I + 1, N>::loadToSmem(smem, val, tid);
91 __device__ static void loadFromSmem(const PointerTuple& smem, const ValTuple& val, uint tid)
93 get<I>(val) = get<I>(smem)[tid];
95 For<I + 1, N>::loadFromSmem(smem, val, tid);
99 __device__ static void merge(const PointerTuple& smem, const ValTuple& val, uint tid, uint delta, const OpTuple& op)
101 typename GetType<typename tuple_element<I, PointerTuple>::type>::type reg = get<I>(smem)[tid + delta];
102 get<I>(smem)[tid] = get<I>(val) = get<I>(op)(get<I>(val), reg);
104 For<I + 1, N>::merge(smem, val, tid, delta, op)
    [all...]
  /external/opencv3/modules/core/include/opencv2/core/cuda/
vec_distance.hpp 70 template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(int* smem, int tid)
72 reduce<THREAD_DIM>(smem, mySum, tid, plus<int>());
94 template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(float* smem, int tid)
96 reduce<THREAD_DIM>(smem, mySum, tid, plus<float>());
120 template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(float* smem, int tid)
122 reduce<THREAD_DIM>(smem, mySum, tid, plus<float>());
145 template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(int* smem, int tid)
147 reduce<THREAD_DIM>(smem, mySum, tid, plus<int>());
160 __device__ void calcVecDiffGlobal(const T1* vec1, const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid)
173 dist.reduceAll<THREAD_DIM>(smem, tid)
    [all...]
  /external/opencv3/modules/cudev/include/opencv2/cudev/warp/detail/
reduce.hpp 81 __device__ static void loadToSmem(const PointerTuple& smem, const ValTuple& val, uint tid)
83 get<I>(smem)[tid] = get<I>(val);
85 For<I + 1, N>::loadToSmem(smem, val, tid);
89 __device__ static void merge(const PointerTuple& smem, const ValTuple& val, uint tid, uint delta, const OpTuple& op)
91 typename GetType<typename tuple_element<I, PointerTuple>::type>::type reg = get<I>(smem)[tid + delta];
92 get<I>(smem)[tid] = get<I>(val) = get<I>(op)(get<I>(val), reg);
94 For<I + 1, N>::merge(smem, val, tid, delta, op);
132 __device__ __forceinline__ void loadToSmem(volatile T* smem, T& val, uint tid)
134 smem[tid] = val;
139 __device__ __forceinline__ void loadToSmem(const tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem,
    [all...]
  /hardware/intel/common/libmix/videoencoder/
IntelMetadataBuffer.cpp 127 static status_t WriteMemObjToBinder(Parcel& data, ShareMemMap* smem) {
129 if (smem->type >= ST_MAX)
134 data.writeInt32(smem->type);
136 if (smem->type == ST_MEMBASE) /*offset, size, heap*/
140 sp<IMemoryHeap> heap = smem->membase->getMemory(&offset, &size);
146 heap->getHeapID(), smem->membase->pointer(), *((int *)(smem->membase->pointer())));
149 else if (smem->type == ST_GFX) /*graphicbuffer*/
150 data.write(*(smem->gbuffer));
222 static void PushShareMem(ShareMemMap* &smem)
311 ShareMemMap* smem = FindShareMem(sessionflag, value, false); local
706 ShareMemMap smem; local
722 ShareMemMap* smem = new ShareMemMap; local
758 ShareMemMap smem; local
774 ShareMemMap* smem = new ShareMemMap; local
    [all...]
  /external/opencv3/modules/cudaimgproc/src/cuda/
canny.cu 249 __shared__ volatile int smem[18][18];
254 smem[threadIdx.y + 1][threadIdx.x + 1] = checkIdx(y, x, map.rows, map.cols) ? map(y, x) : 0;
256 smem[0][threadIdx.x + 1] = checkIdx(y - 1, x, map.rows, map.cols) ? map(y - 1, x) : 0;
258 smem[blockDim.y + 1][threadIdx.x + 1] = checkIdx(y + 1, x, map.rows, map.cols) ? map(y + 1, x) : 0;
260 smem[threadIdx.y + 1][0] = checkIdx(y, x - 1, map.rows, map.cols) ? map(y, x - 1) : 0;
262 smem[threadIdx.y + 1][blockDim.x + 1] = checkIdx(y, x + 1, map.rows, map.cols) ? map(y, x + 1) : 0;
264 smem[0][0] = checkIdx(y - 1, x - 1, map.rows, map.cols) ? map(y - 1, x - 1) : 0;
266 smem[0][blockDim.x + 1] = checkIdx(y - 1, x + 1, map.rows, map.cols) ? map(y - 1, x + 1) : 0;
268 smem[blockDim.y + 1][0] = checkIdx(y + 1, x - 1, map.rows, map.cols) ? map(y + 1, x - 1) : 0;
270 smem[blockDim.y + 1][blockDim.x + 1] = checkIdx(y + 1, x + 1, map.rows, map.cols) ? map(y + 1, x + 1) : (…)
    [all...]
clahe.cu 61 __shared__ int smem[512];
67 smem[tid] = 0;
76 Emulation::smem::atomicAdd(&smem[data], 1);
82 int tHistVal = smem[tid];
99 reduce<256>(smem, clipped, tid, plus<int>());
119 const int lutVal = blockScanInclusive<256>(tHistVal, smem, tid);
hist.cu 74 Emulation::smem::atomicAdd(&shist[(data >> 0) & 0xFFU], 1);
75 Emulation::smem::atomicAdd(&shist[(data >> 8) & 0xFFU], 1);
76 Emulation::smem::atomicAdd(&shist[(data >> 16) & 0xFFU], 1);
77 Emulation::smem::atomicAdd(&shist[(data >> 24) & 0xFFU], 1);
85 Emulation::smem::atomicAdd(&shist[data], 1);
119 Emulation::smem::atomicAdd(shist + ind, 1);
hough_lines.cu 91 int* smem = DynamicSharedMem<int>();
94 smem[i] = 0;
119 Emulation::smem::atomicAdd(&smem[r + 1], 1);
126 accumRow[i] = smem[i];
  /external/opencv3/modules/cudawarping/src/cuda/
pyr_down.cu 59 __shared__ work_t smem[256 + 4];
77 smem[2 + threadIdx.x] = sum;
92 smem[threadIdx.x] = sum;
107 smem[4 + threadIdx.x] = sum;
121 smem[2 + threadIdx.x] = sum;
136 smem[threadIdx.x] = sum;
151 smem[4 + threadIdx.x] = sum;
163 sum = 0.0625f * smem[2 + tid2 - 2];
164 sum = sum + 0.25f * smem[2 + tid2 - 1];
165 sum = sum + 0.375f * smem[2 + tid2 ]
    [all...]
  /external/opencv3/modules/cudafeatures2d/src/cuda/
bf_match.cu 141 extern __shared__ int smem[];
145 typename Dist::value_type* s_query = (typename Dist::value_type*)(smem);
146 typename Dist::value_type* s_train = (typename Dist::value_type*)(smem + BLOCK_SIZE * MAX_DESC_LEN);
157 float* s_distance = (float*)(smem);
158 int* s_trainIdx = (int*)(smem + BLOCK_SIZE * BLOCK_SIZE);
190 extern __shared__ int smem[];
194 typename Dist::value_type* s_query = (typename Dist::value_type*)(smem);
195 typename Dist::value_type* s_train = (typename Dist::value_type*)(smem + BLOCK_SIZE * MAX_DESC_LEN);
214 float* s_distance = (float*)(smem);
215 int* s_trainIdx = (int*)(smem + BLOCK_SIZE * BLOCK_SIZE)
    [all...]
  /external/opencv3/modules/cudaobjdetect/src/cuda/
lbp.cu 150 Emulation::smem::atomicAdd((rrects + cls * 4 + 0), candidates[tid].x);
151 Emulation::smem::atomicAdd((rrects + cls * 4 + 1), candidates[tid].y);
152 Emulation::smem::atomicAdd((rrects + cls * 4 + 2), candidates[tid].z);
153 Emulation::smem::atomicAdd((rrects + cls * 4 + 3), candidates[tid].w);
159 Emulation::smem::atomicInc((unsigned int*)labels + cls, n);
181 int aidx = Emulation::smem::atomicInc(nclasses, n);
190 int smem = block * ( sizeof(int) + sizeof(int4) );
191 disjoin<InSameComponint><<<1, block, smem>>>(candidates, objects, ncandidates, groupThreshold, grouping_eps, nclasses);
lbp.hpp 97 Emulation::smem::atomicMin(labels + id, p);
101 Emulation::smem::atomicMin(labels + tid, q);

Completed in 4524 milliseconds

1 2 3