Search

Home	Sort by relevance Sort by last modified time

Searched full:smem (Results 1 - 25 of 76) sorted by null

1 2 3 4

/external/opencv3/modules/core/src/opencl/
fft.cl	34 void butterfly2(CT a0, CT a1, __local CT* smem, __global const CT* twiddles, 41 smem[dst_ind] = a0 + a1; 42 smem[dst_ind+block_size] = a0 - a1; 46 void butterfly4(CT a0, CT a1, CT a2, CT a3, __local CT* smem, __global const CT* twiddles, 61 smem[dst_ind] = b0 + b1; 62 smem[dst_ind + block_size] = a2 + a3; 63 smem[dst_ind + 2block_size] = b0 - b1; 64 smem[dst_ind + 3block_size] = a2 - a3; 68 void butterfly3(CT a0, CT a1, CT a2, __local CT* smem, __global const CT* twiddles, 80 smem[dst_ind] = a0 + b1 [all...]
/external/opencv3/modules/imgproc/src/opencl/
clahe.cl	`50 inline int calc_lut(__local int* smem, int val, int tid) 52 smem[tid] = val; 57 smem[i] += smem[i - 1]; 60 return smem[tid]; 64 inline void reduce(volatile __local int* smem, int val, int tid) 66 smem[tid] = val; 70 smem[tid] = val += smem[tid + 128]; 74 smem[tid] = val += smem[tid + 64] [all...]`
pyr_down.cl	104 smem[0][col_lcl] = sum0; \ 107 smem[1][col_lcl] = sum1; 123 vstore4(sum40, col_lcl, (__local float) &smem[0][2]); \ 126 vstore4(sum41, col_lcl, (__local float) &smem[1][2]); 137 __local FT smem[2][LOCAL_SIZE + 4]; 229 FT sum = dot(vload4(0, (__local float) (&smem) + tid2 + (yin - y) (LOCAL_SIZE + 4)), (float4)(co3, co2, co1, co2)); 231 FT sum = dot(vload4(0, (__local double) (&smem) + tid2 + (yin - y) (LOCAL_SIZE + 4)), (double4)(co3, co2, co1, co2)); 234 FT sum = co3 * smem[yin - y][2 + tid2 - 2]; 235 sum = MAD(co2, smem[yin - y][2 + tid2 - 1], sum); 236 sum = MAD(co1, smem[yin - y][2 + tid2 ], sum) [all...]
canny.cl	`80 inline float3 sobel(int idx, __local const floatN smem) 85 floatN dx = fma(2, smem[idx + GRP_SIZEX + 6] - smem[idx + GRP_SIZEX + 4], 86 smem[idx + 2] - smem[idx] + smem[idx + 2 GRP_SIZEX + 10] - smem[idx + 2 * GRP_SIZEX + 8]); 88 floatN dy = fma(2, smem[idx + 1] - smem[idx + 2 * GRP_SIZEX + 9], 89 smem[idx + 2] - smem[idx + 2 * GRP_SIZEX + 10] + smem[idx] - smem[idx + 2 * GRP_SIZEX + 8]) [all...]`
/external/opencv3/modules/cudev/include/opencv2/cudev/block/detail/
reduce.hpp	83 __device__ static void loadToSmem(const PointerTuple& smem, const ValTuple& val, uint tid) 85 get<I>(smem)[tid] = get<I>(val); 87 For<I + 1, N>::loadToSmem(smem, val, tid); 91 __device__ static void loadFromSmem(const PointerTuple& smem, const ValTuple& val, uint tid) 93 get<I>(val) = get<I>(smem)[tid]; 95 For<I + 1, N>::loadFromSmem(smem, val, tid); 99 __device__ static void merge(const PointerTuple& smem, const ValTuple& val, uint tid, uint delta, const OpTuple& op) 101 typename GetType<typename tuple_element<I, PointerTuple>::type>::type reg = get<I>(smem)[tid + delta]; 102 get<I>(smem)[tid] = get<I>(val) = get<I>(op)(get<I>(val), reg); 104 For<I + 1, N>::merge(smem, val, tid, delta, op) [all...]
/external/opencv3/modules/cudev/include/opencv2/cudev/warp/
scan.hpp	`59 __device__ T warpScanInclusive(T data, volatile T* smem, uint tid) 62 (void) smem; 79 smem[pos] = 0; 82 smem[pos] = data; 84 smem[pos] += smem[pos - 1]; 85 smem[pos] += smem[pos - 2]; 86 smem[pos] += smem[pos - 4] [all...]`
/external/opencv3/modules/cudev/include/opencv2/cudev/block/
vec_distance.hpp	75 __device__ __forceinline__ void reduceWarp(result_type* smem, uint tid) 77 warpReduce(smem, mySum, tid, plus<result_type>()); 80 template <int THREAD_DIM> __device__ __forceinline__ void reduceBlock(result_type* smem, uint tid) 82 blockReduce<THREAD_DIM>(smem, mySum, tid, plus<result_type>()); 104 __device__ __forceinline__ void reduceWarp(result_type* smem, uint tid) 106 warpReduce(smem, mySum, tid, plus<result_type>()); 109 template <int THREAD_DIM> __device__ __forceinline__ void reduceBlock(result_type* smem, uint tid) 111 blockReduce<THREAD_DIM>(smem, mySum, tid, plus<result_type>()); 137 __device__ __forceinline__ void reduceWarp(result_type* smem, uint tid) 139 warpReduce(smem, mySum, tid, plus<result_type>()) [all...]
scan.hpp	58 __device__ T blockScanInclusive(T data, volatile T* smem, uint tid) 63 T warpResult = warpScanInclusive(data, smem, tid); 71 smem[tid >> LOG_WARP_SIZE] = warpResult; 79 T val = smem[tid]; 82 smem[tid] = warpScanExclusive(val, smem, tid); 88 return warpResult + smem[tid >> LOG_WARP_SIZE]; 92 return warpScanInclusive(data, smem, tid); 97 __device__ __forceinline__ T blockScanExclusive(T data, volatile T* smem, uint tid) 99 return blockScanInclusive<THREADS_NUM>(data, smem, tid) - data [all...]
/external/opencv3/modules/objdetect/src/opencl/
objdetect_hog.cl	`72 __global float* block_hists, __local float* smem) 87 __local float* hists = smem + lp * cnbins * (CELLS_PER_BLOCK_X * 172 __local float* smem = squares + boffset; 173 float sum = smem[hid]; 175 smem[hid] = sum = sum + smem[hid + 18]; 178 smem[hid] = sum = sum + smem[hid + 9]; 181 smem[hid] = sum + smem[hid + 4] [all...]`
/external/opencv3/modules/core/include/opencv2/core/cuda/detail/
reduce.hpp	74 static __device__ void loadToSmem(const PointerTuple& smem, const ValTuple& val, unsigned int tid) 76 thrust::get<I>(smem)[tid] = thrust::get<I>(val); 78 For<I + 1, N>::loadToSmem(smem, val, tid); 81 static __device__ void loadFromSmem(const PointerTuple& smem, const ValTuple& val, unsigned int tid) 83 thrust::get<I>(val) = thrust::get<I>(smem)[tid]; 85 For<I + 1, N>::loadFromSmem(smem, val, tid); 89 static __device__ void merge(const PointerTuple& smem, const ValTuple& val, unsigned int tid, unsigned int delta, const OpTuple& op) 91 typename GetType<typename thrust::tuple_element<I, PointerTuple>::type>::type reg = thrust::get<I>(smem)[tid + delta]; 92 thrust::get<I>(smem)[tid] = thrust::get<I>(val) = thrust::get<I>(op)(thrust::get<I>(val), reg); 94 For<I + 1, N>::merge(smem, val, tid, delta, op) [all...]
/external/opencv3/modules/core/include/opencv2/core/cuda/
vec_distance.hpp	70 template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(int* smem, int tid) 72 reduce<THREAD_DIM>(smem, mySum, tid, plus<int>()); 94 template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(float* smem, int tid) 96 reduce<THREAD_DIM>(smem, mySum, tid, plus<float>()); 120 template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(float* smem, int tid) 122 reduce<THREAD_DIM>(smem, mySum, tid, plus<float>()); 145 template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(int* smem, int tid) 147 reduce<THREAD_DIM>(smem, mySum, tid, plus<int>()); 160 __device__ void calcVecDiffGlobal(const T1* vec1, const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid) 173 dist.reduceAll<THREAD_DIM>(smem, tid) [all...]
/external/opencv3/modules/cudev/include/opencv2/cudev/warp/detail/
reduce.hpp	81 __device__ static void loadToSmem(const PointerTuple& smem, const ValTuple& val, uint tid) 83 get<I>(smem)[tid] = get<I>(val); 85 For<I + 1, N>::loadToSmem(smem, val, tid); 89 __device__ static void merge(const PointerTuple& smem, const ValTuple& val, uint tid, uint delta, const OpTuple& op) 91 typename GetType<typename tuple_element<I, PointerTuple>::type>::type reg = get<I>(smem)[tid + delta]; 92 get<I>(smem)[tid] = get<I>(val) = get<I>(op)(get<I>(val), reg); 94 For<I + 1, N>::merge(smem, val, tid, delta, op); 132 __device__ __forceinline__ void loadToSmem(volatile T* smem, T& val, uint tid) 134 smem[tid] = val; 139 __device__ __forceinline__ void loadToSmem(const tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem, [all...]
/hardware/intel/common/libmix/videoencoder/
IntelMetadataBuffer.cpp	127 static status_t WriteMemObjToBinder(Parcel& data, ShareMemMap* smem) { 129 if (smem->type >= ST_MAX) 134 data.writeInt32(smem->type); 136 if (smem->type == ST_MEMBASE) /offset, size, heap/ 140 sp<IMemoryHeap> heap = smem->membase->getMemory(&offset, &size); 146 heap->getHeapID(), smem->membase->pointer(), ((int )(smem->membase->pointer()))); 149 else if (smem->type == ST_GFX) /graphicbuffer/ 150 data.write((smem->gbuffer)); 222 static void PushShareMem(ShareMemMap &smem) 311 ShareMemMap* smem = FindShareMem(sessionflag, value, false); local 706 ShareMemMap smem; local 722 ShareMemMap* smem = new ShareMemMap; local 758 ShareMemMap smem; local 774 ShareMemMap* smem = new ShareMemMap; local [all...]
/external/opencv3/modules/cudev/include/opencv2/cudev/grid/detail/
reduce_to_column.hpp	61 __device__ __forceinline__ static void call(work_elem_type smem[1][BLOCK_SIZE], work_type& myVal) 64 blockReduce<BLOCK_SIZE>(smem[0], myVal, threadIdx.x, op); 70 __device__ __forceinline__ static void call(work_elem_type smem[2][BLOCK_SIZE], work_type& myVal) 73 blockReduce<BLOCK_SIZE>(smem_tuple(smem[0], smem[1]), tie(myVal.x, myVal.y), threadIdx.x, make_tuple(op, op)); 79 __device__ __forceinline__ static void call(work_elem_type smem[3][BLOCK_SIZE], work_type& myVal) 82 blockReduce<BLOCK_SIZE>(smem_tuple(smem[0], smem[1], smem[2]), tie(myVal.x, myVal.y, myVal.z), threadIdx.x, make_tuple(op, op, op)); 88 __device__ __forceinline__ static void call(work_elem_type smem[4][BLOCK_SIZE], work_type& myVal 102 __shared__ work_elem_type smem[cn][BLOCK_SIZE]; local [all...]
pyr_down.hpp	`68 __shared__ work_type smem[256 + 4]; local 86 smem[2 + threadIdx.x] = sum; 101 smem[threadIdx.x] = sum; 116 smem[4 + threadIdx.x] = sum; 130 smem[2 + threadIdx.x] = sum; 145 smem[threadIdx.x] = sum; 160 smem[4 + threadIdx.x] = sum; 172 sum = 0.0625f * smem[2 + tid2 - 2]; 173 sum = sum + 0.25f * smem[2 + tid2 - 1]; 174 sum = sum + 0.375f * smem[2 + tid2 ] [all...]`
histogram.hpp	`60 __shared__ ResType smem[BIN_COUNT]; local 66 smem[i] = 0; 77 atomicAdd(&smem[data % BIN_COUNT], 1); 86 const ResType histVal = smem[i];`
reduce_to_row.hpp	`62 __shared__ work_type smem[BLOCK_SIZE_X * BLOCK_SIZE_Y]; local 81 smem[threadIdx.x * BLOCK_SIZE_Y + threadIdx.y] = myVal; 85 volatile work_type* srow = smem + threadIdx.y * BLOCK_SIZE_X; 96 dst[x] = saturate_cast<ResType>(Reductor::result(smem[threadIdx.x * BLOCK_SIZE_X], rows));`
/external/opencv3/modules/objdetect/
opencl_kernels_objdetect.cpp	`606 "__global float* block_hists, __local float* smem)\n" 618 "__local float* hists = smem + lp * cnbins * (CELLS_PER_BLOCK_X \n" 685 "__local float smem = squares + boffset;\n" 686 "float sum = smem[hid];\n" 688 "smem[hid] = sum = sum + smem[hid + 18];\n" 691 "smem[hid] = sum = sum + smem[hid + 9];\n" 694 "smem[hid] = sum + smem[hid + 4];\n [all...]`
/external/opencv3/modules/cudaimgproc/src/cuda/
canny.cu	249 __shared__ volatile int smem[18][18]; 254 smem[threadIdx.y + 1][threadIdx.x + 1] = checkIdx(y, x, map.rows, map.cols) ? map(y, x) : 0; 256 smem[0][threadIdx.x + 1] = checkIdx(y - 1, x, map.rows, map.cols) ? map(y - 1, x) : 0; 258 smem[blockDim.y + 1][threadIdx.x + 1] = checkIdx(y + 1, x, map.rows, map.cols) ? map(y + 1, x) : 0; 260 smem[threadIdx.y + 1][0] = checkIdx(y, x - 1, map.rows, map.cols) ? map(y, x - 1) : 0; 262 smem[threadIdx.y + 1][blockDim.x + 1] = checkIdx(y, x + 1, map.rows, map.cols) ? map(y, x + 1) : 0; 264 smem[0][0] = checkIdx(y - 1, x - 1, map.rows, map.cols) ? map(y - 1, x - 1) : 0; 266 smem[0][blockDim.x + 1] = checkIdx(y - 1, x + 1, map.rows, map.cols) ? map(y - 1, x + 1) : 0; 268 smem[blockDim.y + 1][0] = checkIdx(y + 1, x - 1, map.rows, map.cols) ? map(y + 1, x - 1) : 0; 270 smem[blockDim.y + 1][blockDim.x + 1] = checkIdx(y + 1, x + 1, map.rows, map.cols) ? map(y + 1, x + 1) : (…) [all...]
clahe.cu	`61 __shared__ int smem[512]; 67 smem[tid] = 0; 76 Emulation::smem::atomicAdd(&smem[data], 1); 82 int tHistVal = smem[tid]; 99 reduce<256>(smem, clipped, tid, plus<int>()); 119 const int lutVal = blockScanInclusive<256>(tHistVal, smem, tid);`
/external/opencv3/modules/core/src/
opencl_kernels_core.cpp	636 "void butterfly2(CT a0, CT a1, __local CT* smem, __global const CT* twiddles,\n" 642 "smem[dst_ind] = a0 + a1;\n" 643 "smem[dst_ind+block_size] = a0 - a1;\n" 646 "void butterfly4(CT a0, CT a1, CT a2, CT a3, __local CT* smem, __global const CT* twiddles,\n" 658 "smem[dst_ind] = b0 + b1;\n" 659 "smem[dst_ind + block_size] = a2 + a3;\n" 660 "smem[dst_ind + 2block_size] = b0 - b1;\n" 661 "smem[dst_ind + 3block_size] = a2 - a3;\n" 664 "void butterfly3(CT a0, CT a1, CT a2, __local CT* smem, __global const CT* twiddles,\n" 674 "smem[dst_ind] = a0 + b1;\n [all...]
/toolchain/binutils/binutils-2.25/gas/testsuite/gas/tic54x/
opcodes.s	`11 add ar0+, a ; Smem, src 12 add ar1+, ts, a ; Smem, TS, src 13 add ar2+, 16, a ; Smem, 16, src [,dst] 14 add ar3+, a, b ; Smem [,SHIFT], src [,dst] (-16<=SHIFT<=15) 28 and ar3+,a ; Smem,src 100 ld ar0+,a ; Smem,dst 101 ld ar1+,ts,a ; Smem,TS,dst 102 ld ar2+,16,a ; Smem,16,dst 103 ld *ar3+,1,a ; Smem[,SHIFT],dst`
/external/opencv3/modules/cudawarping/src/cuda/
pyr_down.cu	`59 __shared__ work_t smem[256 + 4]; 77 smem[2 + threadIdx.x] = sum; 92 smem[threadIdx.x] = sum; 107 smem[4 + threadIdx.x] = sum; 121 smem[2 + threadIdx.x] = sum; 136 smem[threadIdx.x] = sum; 151 smem[4 + threadIdx.x] = sum; 163 sum = 0.0625f * smem[2 + tid2 - 2]; 164 sum = sum + 0.25f * smem[2 + tid2 - 1]; 165 sum = sum + 0.375f * smem[2 + tid2 ] [all...]`
/external/opencv3/modules/cudafeatures2d/src/cuda/
bf_match.cu	141 extern __shared__ int smem[]; 145 typename Dist::value_type* s_query = (typename Dist::value_type)(smem); 146 typename Dist::value_type s_train = (typename Dist::value_type)(smem + BLOCK_SIZE MAX_DESC_LEN); 157 float* s_distance = (float)(smem); 158 int s_trainIdx = (int)(smem + BLOCK_SIZE BLOCK_SIZE); 190 extern __shared__ int smem[]; 194 typename Dist::value_type* s_query = (typename Dist::value_type)(smem); 195 typename Dist::value_type s_train = (typename Dist::value_type)(smem + BLOCK_SIZE MAX_DESC_LEN); 214 float* s_distance = (float)(smem); 215 int s_trainIdx = (int)(smem + BLOCK_SIZE BLOCK_SIZE) [all...]
/external/opencv3/modules/cudaoptflow/src/cuda/
farneback.cu	75 extern __shared__ float smem[]; 76 volatile float row = smem + tx; 140 int smem = 3 block.x * sizeof(float); 143 polynomialExpansion<5><<<grid, block, smem, stream>>>(src.rows, src.cols, src, dst); 145 polynomialExpansion<7><<<grid, block, smem, stream>>>(src.rows, src.cols, src, dst); 310 extern __shared__ float smem[]; 311 volatile float row = smem + ty (bdx + 2ksizeHalf); 345 int smem = (block.x + 2ksizeHalf) * block.y * sizeof(float); 348 boxFilter<<<grid, block, smem, stream>>>(src.rows, src.cols, src, ksizeHalf, boxAreaInv, dst); 364 extern __shared__ float smem[]; [all...]

Completed in 531 milliseconds

1 2 3 4

OpenGrok