Search

Home	Sort by relevance Sort by last modified time

Searched refs:smem (Results 1 - 25 of 56) sorted by null

1 2 3

/external/opencv3/modules/cudev/include/opencv2/cudev/warp/
scan.hpp	`59 __device__ T warpScanInclusive(T data, volatile T* smem, uint tid) 62 (void) smem; 79 smem[pos] = 0; 82 smem[pos] = data; 84 smem[pos] += smem[pos - 1]; 85 smem[pos] += smem[pos - 2]; 86 smem[pos] += smem[pos - 4] [all...]`
/external/opencv3/modules/cudev/include/opencv2/cudev/block/
scan.hpp	58 __device__ T blockScanInclusive(T data, volatile T* smem, uint tid) 63 T warpResult = warpScanInclusive(data, smem, tid); 71 smem[tid >> LOG_WARP_SIZE] = warpResult; 79 T val = smem[tid]; 82 smem[tid] = warpScanExclusive(val, smem, tid); 88 return warpResult + smem[tid >> LOG_WARP_SIZE]; 92 return warpScanInclusive(data, smem, tid); 97 __device__ __forceinline__ T blockScanExclusive(T data, volatile T* smem, uint tid) 99 return blockScanInclusive<THREADS_NUM>(data, smem, tid) - data [all...]
vec_distance.hpp	75 __device__ __forceinline__ void reduceWarp(result_type* smem, uint tid) 77 warpReduce(smem, mySum, tid, plus<result_type>()); 80 template <int THREAD_DIM> __device__ __forceinline__ void reduceBlock(result_type* smem, uint tid) 82 blockReduce<THREAD_DIM>(smem, mySum, tid, plus<result_type>()); 104 __device__ __forceinline__ void reduceWarp(result_type* smem, uint tid) 106 warpReduce(smem, mySum, tid, plus<result_type>()); 109 template <int THREAD_DIM> __device__ __forceinline__ void reduceBlock(result_type* smem, uint tid) 111 blockReduce<THREAD_DIM>(smem, mySum, tid, plus<result_type>()); 137 __device__ __forceinline__ void reduceWarp(result_type* smem, uint tid) 139 warpReduce(smem, mySum, tid, plus<result_type>()) [all...]
/external/opencv3/modules/imgproc/src/opencl/
clahe.cl	`50 inline int calc_lut(__local int* smem, int val, int tid) 52 smem[tid] = val; 57 smem[i] += smem[i - 1]; 60 return smem[tid]; 64 inline void reduce(volatile __local int* smem, int val, int tid) 66 smem[tid] = val; 70 smem[tid] = val += smem[tid + 128]; 74 smem[tid] = val += smem[tid + 64] [all...]`
pyr_down.cl	104 smem[0][col_lcl] = sum0; \ 107 smem[1][col_lcl] = sum1; 123 vstore4(sum40, col_lcl, (__local float) &smem[0][2]); \ 126 vstore4(sum41, col_lcl, (__local float) &smem[1][2]); 137 __local FT smem[2][LOCAL_SIZE + 4]; 229 FT sum = dot(vload4(0, (__local float) (&smem) + tid2 + (yin - y) (LOCAL_SIZE + 4)), (float4)(co3, co2, co1, co2)); 231 FT sum = dot(vload4(0, (__local double) (&smem) + tid2 + (yin - y) (LOCAL_SIZE + 4)), (double4)(co3, co2, co1, co2)); 234 FT sum = co3 * smem[yin - y][2 + tid2 - 2]; 235 sum = MAD(co2, smem[yin - y][2 + tid2 - 1], sum); 236 sum = MAD(co1, smem[yin - y][2 + tid2 ], sum) [all...]
canny.cl	`80 inline float3 sobel(int idx, __local const floatN smem) 85 floatN dx = fma(2, smem[idx + GRP_SIZEX + 6] - smem[idx + GRP_SIZEX + 4], 86 smem[idx + 2] - smem[idx] + smem[idx + 2 GRP_SIZEX + 10] - smem[idx + 2 * GRP_SIZEX + 8]); 88 floatN dy = fma(2, smem[idx + 1] - smem[idx + 2 * GRP_SIZEX + 9], 89 smem[idx + 2] - smem[idx + 2 * GRP_SIZEX + 10] + smem[idx] - smem[idx + 2 * GRP_SIZEX + 8]) [all...]`
/external/opencv3/modules/objdetect/src/opencl/
objdetect_hog.cl	`72 __global float* block_hists, __local float* smem) 87 __local float* hists = smem + lp * cnbins * (CELLS_PER_BLOCK_X * 172 __local float* smem = squares + boffset; 173 float sum = smem[hid]; 175 smem[hid] = sum = sum + smem[hid + 18]; 178 smem[hid] = sum = sum + smem[hid + 9]; 181 smem[hid] = sum + smem[hid + 4] [all...]`
/external/opencv3/modules/cudev/include/opencv2/cudev/grid/detail/
reduce_to_column.hpp	61 __device__ __forceinline__ static void call(work_elem_type smem[1][BLOCK_SIZE], work_type& myVal) 64 blockReduce<BLOCK_SIZE>(smem[0], myVal, threadIdx.x, op); 70 __device__ __forceinline__ static void call(work_elem_type smem[2][BLOCK_SIZE], work_type& myVal) 73 blockReduce<BLOCK_SIZE>(smem_tuple(smem[0], smem[1]), tie(myVal.x, myVal.y), threadIdx.x, make_tuple(op, op)); 79 __device__ __forceinline__ static void call(work_elem_type smem[3][BLOCK_SIZE], work_type& myVal) 82 blockReduce<BLOCK_SIZE>(smem_tuple(smem[0], smem[1], smem[2]), tie(myVal.x, myVal.y, myVal.z), threadIdx.x, make_tuple(op, op, op)); 88 __device__ __forceinline__ static void call(work_elem_type smem[4][BLOCK_SIZE], work_type& myVal 102 __shared__ work_elem_type smem[cn][BLOCK_SIZE]; local [all...]
histogram.hpp	`60 __shared__ ResType smem[BIN_COUNT]; local 66 smem[i] = 0; 77 atomicAdd(&smem[data % BIN_COUNT], 1); 86 const ResType histVal = smem[i];`
reduce_to_row.hpp	`62 __shared__ work_type smem[BLOCK_SIZE_X * BLOCK_SIZE_Y]; local 81 smem[threadIdx.x * BLOCK_SIZE_Y + threadIdx.y] = myVal; 85 volatile work_type* srow = smem + threadIdx.y * BLOCK_SIZE_X; 96 dst[x] = saturate_cast<ResType>(Reductor::result(smem[threadIdx.x * BLOCK_SIZE_X], rows));`
pyr_down.hpp	`68 __shared__ work_type smem[256 + 4]; local 86 smem[2 + threadIdx.x] = sum; 101 smem[threadIdx.x] = sum; 116 smem[4 + threadIdx.x] = sum; 130 smem[2 + threadIdx.x] = sum; 145 smem[threadIdx.x] = sum; 160 smem[4 + threadIdx.x] = sum; 172 sum = 0.0625f * smem[2 + tid2 - 2]; 173 sum = sum + 0.25f * smem[2 + tid2 - 1]; 174 sum = sum + 0.375f * smem[2 + tid2 ] [all...]`
integral.hpp	63 __shared__ D smem[NUM_SCAN_THREADS * 2]; local 84 const D curScanElem = blockScanInclusive<NUM_SCAN_THREADS>(curElem, smem, threadIdx.x); 105 __shared__ D smem[NUM_SCAN_THREADS * 2]; local 127 const D curScanElem = blockScanInclusive<NUM_SCAN_THREADS>(curElem, smem, threadIdx.x); 445 // place into SMEM 446 // shfl scan reduce the SMEM, reformating so the column 481 __shared__ T smem[32][32]; 484 volatile T* smem_row = &smem[0][0] + 64 * threadIdx.y; 505 smem[threadIdx.y + 0][threadIdx.x] = 0.0f; 506 smem[threadIdx.y + 8][threadIdx.x] = 0.0f [all...]
/external/opencv3/modules/core/include/opencv2/core/cuda/detail/
reduce.hpp	74 static __device__ void loadToSmem(const PointerTuple& smem, const ValTuple& val, unsigned int tid) 76 thrust::get<I>(smem)[tid] = thrust::get<I>(val); 78 For<I + 1, N>::loadToSmem(smem, val, tid); 81 static __device__ void loadFromSmem(const PointerTuple& smem, const ValTuple& val, unsigned int tid) 83 thrust::get<I>(val) = thrust::get<I>(smem)[tid]; 85 For<I + 1, N>::loadFromSmem(smem, val, tid); 89 static __device__ void merge(const PointerTuple& smem, const ValTuple& val, unsigned int tid, unsigned int delta, const OpTuple& op) 91 typename GetType<typename thrust::tuple_element<I, PointerTuple>::type>::type reg = thrust::get<I>(smem)[tid + delta]; 92 thrust::get<I>(smem)[tid] = thrust::get<I>(val) = thrust::get<I>(op)(thrust::get<I>(val), reg); 94 For<I + 1, N>::merge(smem, val, tid, delta, op) [all...]
/external/opencv3/modules/cudev/include/opencv2/cudev/block/detail/
reduce.hpp	83 __device__ static void loadToSmem(const PointerTuple& smem, const ValTuple& val, uint tid) 85 get<I>(smem)[tid] = get<I>(val); 87 For<I + 1, N>::loadToSmem(smem, val, tid); 91 __device__ static void loadFromSmem(const PointerTuple& smem, const ValTuple& val, uint tid) 93 get<I>(val) = get<I>(smem)[tid]; 95 For<I + 1, N>::loadFromSmem(smem, val, tid); 99 __device__ static void merge(const PointerTuple& smem, const ValTuple& val, uint tid, uint delta, const OpTuple& op) 101 typename GetType<typename tuple_element<I, PointerTuple>::type>::type reg = get<I>(smem)[tid + delta]; 102 get<I>(smem)[tid] = get<I>(val) = get<I>(op)(get<I>(val), reg); 104 For<I + 1, N>::merge(smem, val, tid, delta, op) [all...]
/external/opencv3/modules/core/include/opencv2/core/cuda/
vec_distance.hpp	70 template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(int* smem, int tid) 72 reduce<THREAD_DIM>(smem, mySum, tid, plus<int>()); 94 template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(float* smem, int tid) 96 reduce<THREAD_DIM>(smem, mySum, tid, plus<float>()); 120 template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(float* smem, int tid) 122 reduce<THREAD_DIM>(smem, mySum, tid, plus<float>()); 145 template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(int* smem, int tid) 147 reduce<THREAD_DIM>(smem, mySum, tid, plus<int>()); 160 __device__ void calcVecDiffGlobal(const T1* vec1, const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid) 173 dist.reduceAll<THREAD_DIM>(smem, tid) [all...]
/external/opencv3/modules/cudev/include/opencv2/cudev/warp/detail/
reduce.hpp	81 __device__ static void loadToSmem(const PointerTuple& smem, const ValTuple& val, uint tid) 83 get<I>(smem)[tid] = get<I>(val); 85 For<I + 1, N>::loadToSmem(smem, val, tid); 89 __device__ static void merge(const PointerTuple& smem, const ValTuple& val, uint tid, uint delta, const OpTuple& op) 91 typename GetType<typename tuple_element<I, PointerTuple>::type>::type reg = get<I>(smem)[tid + delta]; 92 get<I>(smem)[tid] = get<I>(val) = get<I>(op)(get<I>(val), reg); 94 For<I + 1, N>::merge(smem, val, tid, delta, op); 132 __device__ __forceinline__ void loadToSmem(volatile T* smem, T& val, uint tid) 134 smem[tid] = val; 139 __device__ __forceinline__ void loadToSmem(const tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem, [all...]
/hardware/intel/common/libmix/videoencoder/
IntelMetadataBuffer.cpp	127 static status_t WriteMemObjToBinder(Parcel& data, ShareMemMap* smem) { 129 if (smem->type >= ST_MAX) 134 data.writeInt32(smem->type); 136 if (smem->type == ST_MEMBASE) /offset, size, heap/ 140 sp<IMemoryHeap> heap = smem->membase->getMemory(&offset, &size); 146 heap->getHeapID(), smem->membase->pointer(), ((int )(smem->membase->pointer()))); 149 else if (smem->type == ST_GFX) /graphicbuffer/ 150 data.write((smem->gbuffer)); 222 static void PushShareMem(ShareMemMap &smem) 311 ShareMemMap* smem = FindShareMem(sessionflag, value, false); local 706 ShareMemMap smem; local 722 ShareMemMap* smem = new ShareMemMap; local 758 ShareMemMap smem; local 774 ShareMemMap* smem = new ShareMemMap; local [all...]
/external/opencv3/modules/cudaimgproc/src/cuda/
canny.cu	249 __shared__ volatile int smem[18][18]; 254 smem[threadIdx.y + 1][threadIdx.x + 1] = checkIdx(y, x, map.rows, map.cols) ? map(y, x) : 0; 256 smem[0][threadIdx.x + 1] = checkIdx(y - 1, x, map.rows, map.cols) ? map(y - 1, x) : 0; 258 smem[blockDim.y + 1][threadIdx.x + 1] = checkIdx(y + 1, x, map.rows, map.cols) ? map(y + 1, x) : 0; 260 smem[threadIdx.y + 1][0] = checkIdx(y, x - 1, map.rows, map.cols) ? map(y, x - 1) : 0; 262 smem[threadIdx.y + 1][blockDim.x + 1] = checkIdx(y, x + 1, map.rows, map.cols) ? map(y, x + 1) : 0; 264 smem[0][0] = checkIdx(y - 1, x - 1, map.rows, map.cols) ? map(y - 1, x - 1) : 0; 266 smem[0][blockDim.x + 1] = checkIdx(y - 1, x + 1, map.rows, map.cols) ? map(y - 1, x + 1) : 0; 268 smem[blockDim.y + 1][0] = checkIdx(y + 1, x - 1, map.rows, map.cols) ? map(y + 1, x - 1) : 0; 270 smem[blockDim.y + 1][blockDim.x + 1] = checkIdx(y + 1, x + 1, map.rows, map.cols) ? map(y + 1, x + 1) : (…) [all...]
clahe.cu	`61 __shared__ int smem[512]; 67 smem[tid] = 0; 76 Emulation::smem::atomicAdd(&smem[data], 1); 82 int tHistVal = smem[tid]; 99 reduce<256>(smem, clipped, tid, plus<int>()); 119 const int lutVal = blockScanInclusive<256>(tHistVal, smem, tid);`
hist.cu	`74 Emulation::smem::atomicAdd(&shist[(data >> 0) & 0xFFU], 1); 75 Emulation::smem::atomicAdd(&shist[(data >> 8) & 0xFFU], 1); 76 Emulation::smem::atomicAdd(&shist[(data >> 16) & 0xFFU], 1); 77 Emulation::smem::atomicAdd(&shist[(data >> 24) & 0xFFU], 1); 85 Emulation::smem::atomicAdd(&shist[data], 1); 119 Emulation::smem::atomicAdd(shist + ind, 1);`
hough_lines.cu	`91 int* smem = DynamicSharedMem<int>(); 94 smem[i] = 0; 119 Emulation::smem::atomicAdd(&smem[r + 1], 1); 126 accumRow[i] = smem[i];`
/external/opencv3/modules/cudawarping/src/cuda/
pyr_down.cu	`59 __shared__ work_t smem[256 + 4]; 77 smem[2 + threadIdx.x] = sum; 92 smem[threadIdx.x] = sum; 107 smem[4 + threadIdx.x] = sum; 121 smem[2 + threadIdx.x] = sum; 136 smem[threadIdx.x] = sum; 151 smem[4 + threadIdx.x] = sum; 163 sum = 0.0625f * smem[2 + tid2 - 2]; 164 sum = sum + 0.25f * smem[2 + tid2 - 1]; 165 sum = sum + 0.375f * smem[2 + tid2 ] [all...]`
/external/opencv3/modules/cudafeatures2d/src/cuda/
bf_match.cu	141 extern __shared__ int smem[]; 145 typename Dist::value_type* s_query = (typename Dist::value_type)(smem); 146 typename Dist::value_type s_train = (typename Dist::value_type)(smem + BLOCK_SIZE MAX_DESC_LEN); 157 float* s_distance = (float)(smem); 158 int s_trainIdx = (int)(smem + BLOCK_SIZE BLOCK_SIZE); 190 extern __shared__ int smem[]; 194 typename Dist::value_type* s_query = (typename Dist::value_type)(smem); 195 typename Dist::value_type s_train = (typename Dist::value_type)(smem + BLOCK_SIZE MAX_DESC_LEN); 214 float* s_distance = (float)(smem); 215 int s_trainIdx = (int)(smem + BLOCK_SIZE BLOCK_SIZE) [all...]
/external/opencv3/modules/cudaobjdetect/src/cuda/
lbp.cu	150 Emulation::smem::atomicAdd((rrects + cls * 4 + 0), candidates[tid].x); 151 Emulation::smem::atomicAdd((rrects + cls * 4 + 1), candidates[tid].y); 152 Emulation::smem::atomicAdd((rrects + cls * 4 + 2), candidates[tid].z); 153 Emulation::smem::atomicAdd((rrects + cls * 4 + 3), candidates[tid].w); 159 Emulation::smem::atomicInc((unsigned int)labels + cls, n); 181 int aidx = Emulation::smem::atomicInc(nclasses, n); 190 int smem = block ( sizeof(int) + sizeof(int4) ); 191 disjoin<InSameComponint><<<1, block, smem>>>(candidates, objects, ncandidates, groupThreshold, grouping_eps, nclasses);
lbp.hpp	`97 Emulation::smem::atomicMin(labels + id, p); 101 Emulation::smem::atomicMin(labels + tid, q);`

Completed in 4524 milliseconds

1 2 3

OpenGrok