/external/opencv3/modules/core/src/opencl/ |
fft.cl | 34 void butterfly2(CT a0, CT a1, __local CT* smem, __global const CT* twiddles, 41 smem[dst_ind] = a0 + a1; 42 smem[dst_ind+block_size] = a0 - a1; 46 void butterfly4(CT a0, CT a1, CT a2, CT a3, __local CT* smem, __global const CT* twiddles, 61 smem[dst_ind] = b0 + b1; 62 smem[dst_ind + block_size] = a2 + a3; 63 smem[dst_ind + 2*block_size] = b0 - b1; 64 smem[dst_ind + 3*block_size] = a2 - a3; 68 void butterfly3(CT a0, CT a1, CT a2, __local CT* smem, __global const CT* twiddles, 80 smem[dst_ind] = a0 + b1 [all...] |
/external/opencv3/modules/imgproc/src/opencl/ |
clahe.cl | 50 inline int calc_lut(__local int* smem, int val, int tid) 52 smem[tid] = val; 57 smem[i] += smem[i - 1]; 60 return smem[tid]; 64 inline void reduce(volatile __local int* smem, int val, int tid) 66 smem[tid] = val; 70 smem[tid] = val += smem[tid + 128]; 74 smem[tid] = val += smem[tid + 64] [all...] |
pyr_down.cl | 104 smem[0][col_lcl] = sum0; \ 107 smem[1][col_lcl] = sum1; 123 vstore4(sum40, col_lcl, (__local float*) &smem[0][2]); \ 126 vstore4(sum41, col_lcl, (__local float*) &smem[1][2]); 137 __local FT smem[2][LOCAL_SIZE + 4]; 229 FT sum = dot(vload4(0, (__local float*) (&smem) + tid2 + (yin - y) * (LOCAL_SIZE + 4)), (float4)(co3, co2, co1, co2)); 231 FT sum = dot(vload4(0, (__local double*) (&smem) + tid2 + (yin - y) * (LOCAL_SIZE + 4)), (double4)(co3, co2, co1, co2)); 234 FT sum = co3 * smem[yin - y][2 + tid2 - 2]; 235 sum = MAD(co2, smem[yin - y][2 + tid2 - 1], sum); 236 sum = MAD(co1, smem[yin - y][2 + tid2 ], sum) [all...] |
canny.cl | 80 inline float3 sobel(int idx, __local const floatN *smem) 85 floatN dx = fma(2, smem[idx + GRP_SIZEX + 6] - smem[idx + GRP_SIZEX + 4], 86 smem[idx + 2] - smem[idx] + smem[idx + 2 * GRP_SIZEX + 10] - smem[idx + 2 * GRP_SIZEX + 8]); 88 floatN dy = fma(2, smem[idx + 1] - smem[idx + 2 * GRP_SIZEX + 9], 89 smem[idx + 2] - smem[idx + 2 * GRP_SIZEX + 10] + smem[idx] - smem[idx + 2 * GRP_SIZEX + 8]) [all...] |
/external/opencv3/modules/cudev/include/opencv2/cudev/block/detail/ |
reduce.hpp | 83 __device__ static void loadToSmem(const PointerTuple& smem, const ValTuple& val, uint tid) 85 get<I>(smem)[tid] = get<I>(val); 87 For<I + 1, N>::loadToSmem(smem, val, tid); 91 __device__ static void loadFromSmem(const PointerTuple& smem, const ValTuple& val, uint tid) 93 get<I>(val) = get<I>(smem)[tid]; 95 For<I + 1, N>::loadFromSmem(smem, val, tid); 99 __device__ static void merge(const PointerTuple& smem, const ValTuple& val, uint tid, uint delta, const OpTuple& op) 101 typename GetType<typename tuple_element<I, PointerTuple>::type>::type reg = get<I>(smem)[tid + delta]; 102 get<I>(smem)[tid] = get<I>(val) = get<I>(op)(get<I>(val), reg); 104 For<I + 1, N>::merge(smem, val, tid, delta, op) [all...] |
/external/opencv3/modules/cudev/include/opencv2/cudev/warp/ |
scan.hpp | 59 __device__ T warpScanInclusive(T data, volatile T* smem, uint tid) 62 (void) smem; 79 smem[pos] = 0; 82 smem[pos] = data; 84 smem[pos] += smem[pos - 1]; 85 smem[pos] += smem[pos - 2]; 86 smem[pos] += smem[pos - 4] [all...] |
/external/opencv3/modules/cudev/include/opencv2/cudev/block/ |
vec_distance.hpp | 75 __device__ __forceinline__ void reduceWarp(result_type* smem, uint tid) 77 warpReduce(smem, mySum, tid, plus<result_type>()); 80 template <int THREAD_DIM> __device__ __forceinline__ void reduceBlock(result_type* smem, uint tid) 82 blockReduce<THREAD_DIM>(smem, mySum, tid, plus<result_type>()); 104 __device__ __forceinline__ void reduceWarp(result_type* smem, uint tid) 106 warpReduce(smem, mySum, tid, plus<result_type>()); 109 template <int THREAD_DIM> __device__ __forceinline__ void reduceBlock(result_type* smem, uint tid) 111 blockReduce<THREAD_DIM>(smem, mySum, tid, plus<result_type>()); 137 __device__ __forceinline__ void reduceWarp(result_type* smem, uint tid) 139 warpReduce(smem, mySum, tid, plus<result_type>()) [all...] |
scan.hpp | 58 __device__ T blockScanInclusive(T data, volatile T* smem, uint tid) 63 T warpResult = warpScanInclusive(data, smem, tid); 71 smem[tid >> LOG_WARP_SIZE] = warpResult; 79 T val = smem[tid]; 82 smem[tid] = warpScanExclusive(val, smem, tid); 88 return warpResult + smem[tid >> LOG_WARP_SIZE]; 92 return warpScanInclusive(data, smem, tid); 97 __device__ __forceinline__ T blockScanExclusive(T data, volatile T* smem, uint tid) 99 return blockScanInclusive<THREADS_NUM>(data, smem, tid) - data [all...] |
/external/opencv3/modules/objdetect/src/opencl/ |
objdetect_hog.cl | 72 __global float* block_hists, __local float* smem) 87 __local float* hists = smem + lp * cnbins * (CELLS_PER_BLOCK_X * 172 __local float* smem = squares + boffset; 173 float sum = smem[hid]; 175 smem[hid] = sum = sum + smem[hid + 18]; 178 smem[hid] = sum = sum + smem[hid + 9]; 181 smem[hid] = sum + smem[hid + 4] [all...] |
/external/opencv3/modules/core/include/opencv2/core/cuda/detail/ |
reduce.hpp | 74 static __device__ void loadToSmem(const PointerTuple& smem, const ValTuple& val, unsigned int tid) 76 thrust::get<I>(smem)[tid] = thrust::get<I>(val); 78 For<I + 1, N>::loadToSmem(smem, val, tid); 81 static __device__ void loadFromSmem(const PointerTuple& smem, const ValTuple& val, unsigned int tid) 83 thrust::get<I>(val) = thrust::get<I>(smem)[tid]; 85 For<I + 1, N>::loadFromSmem(smem, val, tid); 89 static __device__ void merge(const PointerTuple& smem, const ValTuple& val, unsigned int tid, unsigned int delta, const OpTuple& op) 91 typename GetType<typename thrust::tuple_element<I, PointerTuple>::type>::type reg = thrust::get<I>(smem)[tid + delta]; 92 thrust::get<I>(smem)[tid] = thrust::get<I>(val) = thrust::get<I>(op)(thrust::get<I>(val), reg); 94 For<I + 1, N>::merge(smem, val, tid, delta, op) [all...] |
/external/opencv3/modules/core/include/opencv2/core/cuda/ |
vec_distance.hpp | 70 template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(int* smem, int tid) 72 reduce<THREAD_DIM>(smem, mySum, tid, plus<int>()); 94 template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(float* smem, int tid) 96 reduce<THREAD_DIM>(smem, mySum, tid, plus<float>()); 120 template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(float* smem, int tid) 122 reduce<THREAD_DIM>(smem, mySum, tid, plus<float>()); 145 template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(int* smem, int tid) 147 reduce<THREAD_DIM>(smem, mySum, tid, plus<int>()); 160 __device__ void calcVecDiffGlobal(const T1* vec1, const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid) 173 dist.reduceAll<THREAD_DIM>(smem, tid) [all...] |
/external/opencv3/modules/cudev/include/opencv2/cudev/warp/detail/ |
reduce.hpp | 81 __device__ static void loadToSmem(const PointerTuple& smem, const ValTuple& val, uint tid) 83 get<I>(smem)[tid] = get<I>(val); 85 For<I + 1, N>::loadToSmem(smem, val, tid); 89 __device__ static void merge(const PointerTuple& smem, const ValTuple& val, uint tid, uint delta, const OpTuple& op) 91 typename GetType<typename tuple_element<I, PointerTuple>::type>::type reg = get<I>(smem)[tid + delta]; 92 get<I>(smem)[tid] = get<I>(val) = get<I>(op)(get<I>(val), reg); 94 For<I + 1, N>::merge(smem, val, tid, delta, op); 132 __device__ __forceinline__ void loadToSmem(volatile T* smem, T& val, uint tid) 134 smem[tid] = val; 139 __device__ __forceinline__ void loadToSmem(const tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem, [all...] |
/hardware/intel/common/libmix/videoencoder/ |
IntelMetadataBuffer.cpp | 127 static status_t WriteMemObjToBinder(Parcel& data, ShareMemMap* smem) { 129 if (smem->type >= ST_MAX) 134 data.writeInt32(smem->type); 136 if (smem->type == ST_MEMBASE) /*offset, size, heap*/ 140 sp<IMemoryHeap> heap = smem->membase->getMemory(&offset, &size); 146 heap->getHeapID(), smem->membase->pointer(), *((int *)(smem->membase->pointer()))); 149 else if (smem->type == ST_GFX) /*graphicbuffer*/ 150 data.write(*(smem->gbuffer)); 222 static void PushShareMem(ShareMemMap* &smem) 311 ShareMemMap* smem = FindShareMem(sessionflag, value, false); local 706 ShareMemMap smem; local 722 ShareMemMap* smem = new ShareMemMap; local 758 ShareMemMap smem; local 774 ShareMemMap* smem = new ShareMemMap; local [all...] |
/external/opencv3/modules/cudev/include/opencv2/cudev/grid/detail/ |
reduce_to_column.hpp | 61 __device__ __forceinline__ static void call(work_elem_type smem[1][BLOCK_SIZE], work_type& myVal) 64 blockReduce<BLOCK_SIZE>(smem[0], myVal, threadIdx.x, op); 70 __device__ __forceinline__ static void call(work_elem_type smem[2][BLOCK_SIZE], work_type& myVal) 73 blockReduce<BLOCK_SIZE>(smem_tuple(smem[0], smem[1]), tie(myVal.x, myVal.y), threadIdx.x, make_tuple(op, op)); 79 __device__ __forceinline__ static void call(work_elem_type smem[3][BLOCK_SIZE], work_type& myVal) 82 blockReduce<BLOCK_SIZE>(smem_tuple(smem[0], smem[1], smem[2]), tie(myVal.x, myVal.y, myVal.z), threadIdx.x, make_tuple(op, op, op)); 88 __device__ __forceinline__ static void call(work_elem_type smem[4][BLOCK_SIZE], work_type& myVal 102 __shared__ work_elem_type smem[cn][BLOCK_SIZE]; local [all...] |
pyr_down.hpp | 68 __shared__ work_type smem[256 + 4]; local 86 smem[2 + threadIdx.x] = sum; 101 smem[threadIdx.x] = sum; 116 smem[4 + threadIdx.x] = sum; 130 smem[2 + threadIdx.x] = sum; 145 smem[threadIdx.x] = sum; 160 smem[4 + threadIdx.x] = sum; 172 sum = 0.0625f * smem[2 + tid2 - 2]; 173 sum = sum + 0.25f * smem[2 + tid2 - 1]; 174 sum = sum + 0.375f * smem[2 + tid2 ] [all...] |
histogram.hpp | 60 __shared__ ResType smem[BIN_COUNT]; local 66 smem[i] = 0; 77 atomicAdd(&smem[data % BIN_COUNT], 1); 86 const ResType histVal = smem[i];
|
reduce_to_row.hpp | 62 __shared__ work_type smem[BLOCK_SIZE_X * BLOCK_SIZE_Y]; local 81 smem[threadIdx.x * BLOCK_SIZE_Y + threadIdx.y] = myVal; 85 volatile work_type* srow = smem + threadIdx.y * BLOCK_SIZE_X; 96 dst[x] = saturate_cast<ResType>(Reductor::result(smem[threadIdx.x * BLOCK_SIZE_X], rows));
|
/external/opencv3/modules/objdetect/ |
opencl_kernels_objdetect.cpp | 606 "__global float* block_hists, __local float* smem)\n" 618 "__local float* hists = smem + lp * cnbins * (CELLS_PER_BLOCK_X *\n" 685 "__local float* smem = squares + boffset;\n" 686 "float sum = smem[hid];\n" 688 "smem[hid] = sum = sum + smem[hid + 18];\n" 691 "smem[hid] = sum = sum + smem[hid + 9];\n" 694 "smem[hid] = sum + smem[hid + 4];\n [all...] |
/external/opencv3/modules/cudaimgproc/src/cuda/ |
canny.cu | 249 __shared__ volatile int smem[18][18]; 254 smem[threadIdx.y + 1][threadIdx.x + 1] = checkIdx(y, x, map.rows, map.cols) ? map(y, x) : 0; 256 smem[0][threadIdx.x + 1] = checkIdx(y - 1, x, map.rows, map.cols) ? map(y - 1, x) : 0; 258 smem[blockDim.y + 1][threadIdx.x + 1] = checkIdx(y + 1, x, map.rows, map.cols) ? map(y + 1, x) : 0; 260 smem[threadIdx.y + 1][0] = checkIdx(y, x - 1, map.rows, map.cols) ? map(y, x - 1) : 0; 262 smem[threadIdx.y + 1][blockDim.x + 1] = checkIdx(y, x + 1, map.rows, map.cols) ? map(y, x + 1) : 0; 264 smem[0][0] = checkIdx(y - 1, x - 1, map.rows, map.cols) ? map(y - 1, x - 1) : 0; 266 smem[0][blockDim.x + 1] = checkIdx(y - 1, x + 1, map.rows, map.cols) ? map(y - 1, x + 1) : 0; 268 smem[blockDim.y + 1][0] = checkIdx(y + 1, x - 1, map.rows, map.cols) ? map(y + 1, x - 1) : 0; 270 smem[blockDim.y + 1][blockDim.x + 1] = checkIdx(y + 1, x + 1, map.rows, map.cols) ? map(y + 1, x + 1) : (…) [all...] |
clahe.cu | 61 __shared__ int smem[512]; 67 smem[tid] = 0; 76 Emulation::smem::atomicAdd(&smem[data], 1); 82 int tHistVal = smem[tid]; 99 reduce<256>(smem, clipped, tid, plus<int>()); 119 const int lutVal = blockScanInclusive<256>(tHistVal, smem, tid);
|
/external/opencv3/modules/core/src/ |
opencl_kernels_core.cpp | 636 "void butterfly2(CT a0, CT a1, __local CT* smem, __global const CT* twiddles,\n" 642 "smem[dst_ind] = a0 + a1;\n" 643 "smem[dst_ind+block_size] = a0 - a1;\n" 646 "void butterfly4(CT a0, CT a1, CT a2, CT a3, __local CT* smem, __global const CT* twiddles,\n" 658 "smem[dst_ind] = b0 + b1;\n" 659 "smem[dst_ind + block_size] = a2 + a3;\n" 660 "smem[dst_ind + 2*block_size] = b0 - b1;\n" 661 "smem[dst_ind + 3*block_size] = a2 - a3;\n" 664 "void butterfly3(CT a0, CT a1, CT a2, __local CT* smem, __global const CT* twiddles,\n" 674 "smem[dst_ind] = a0 + b1;\n [all...] |
/toolchain/binutils/binutils-2.25/gas/testsuite/gas/tic54x/ |
opcodes.s | 11 add *ar0+, a ; Smem, src
12 add *ar1+, ts, a ; Smem, TS, src
13 add *ar2+, 16, a ; Smem, 16, src [,dst]
14 add *ar3+, a, b ; Smem [,SHIFT], src [,dst] (-16<=SHIFT<=15)
28 and *ar3+,a ; Smem,src
100 ld *ar0+,a ; Smem,dst
101 ld *ar1+,ts,a ; Smem,TS,dst
102 ld *ar2+,16,a ; Smem,16,dst
103 ld *ar3+,1,a ; Smem[,SHIFT],dst
|
/external/opencv3/modules/cudawarping/src/cuda/ |
pyr_down.cu | 59 __shared__ work_t smem[256 + 4]; 77 smem[2 + threadIdx.x] = sum; 92 smem[threadIdx.x] = sum; 107 smem[4 + threadIdx.x] = sum; 121 smem[2 + threadIdx.x] = sum; 136 smem[threadIdx.x] = sum; 151 smem[4 + threadIdx.x] = sum; 163 sum = 0.0625f * smem[2 + tid2 - 2]; 164 sum = sum + 0.25f * smem[2 + tid2 - 1]; 165 sum = sum + 0.375f * smem[2 + tid2 ] [all...] |
/external/opencv3/modules/cudafeatures2d/src/cuda/ |
bf_match.cu | 141 extern __shared__ int smem[]; 145 typename Dist::value_type* s_query = (typename Dist::value_type*)(smem); 146 typename Dist::value_type* s_train = (typename Dist::value_type*)(smem + BLOCK_SIZE * MAX_DESC_LEN); 157 float* s_distance = (float*)(smem); 158 int* s_trainIdx = (int*)(smem + BLOCK_SIZE * BLOCK_SIZE); 190 extern __shared__ int smem[]; 194 typename Dist::value_type* s_query = (typename Dist::value_type*)(smem); 195 typename Dist::value_type* s_train = (typename Dist::value_type*)(smem + BLOCK_SIZE * MAX_DESC_LEN); 214 float* s_distance = (float*)(smem); 215 int* s_trainIdx = (int*)(smem + BLOCK_SIZE * BLOCK_SIZE) [all...] |
/external/opencv3/modules/cudaoptflow/src/cuda/ |
farneback.cu | 75 extern __shared__ float smem[]; 76 volatile float *row = smem + tx; 140 int smem = 3 * block.x * sizeof(float); 143 polynomialExpansion<5><<<grid, block, smem, stream>>>(src.rows, src.cols, src, dst); 145 polynomialExpansion<7><<<grid, block, smem, stream>>>(src.rows, src.cols, src, dst); 310 extern __shared__ float smem[]; 311 volatile float *row = smem + ty * (bdx + 2*ksizeHalf); 345 int smem = (block.x + 2*ksizeHalf) * block.y * sizeof(float); 348 boxFilter<<<grid, block, smem, stream>>>(src.rows, src.cols, src, ksizeHalf, boxAreaInv, dst); 364 extern __shared__ float smem[]; [all...] |