/external/opencv3/modules/photo/src/cuda/ |
nlm.cu | 189 static __device__ __forceinline__ thrust::tuple<volatile float*, volatile float*> smem_tuple(float* smem) 191 return cv::cuda::device::smem_tuple(smem, smem + BLOCK_SIZE); 208 static __device__ __forceinline__ thrust::tuple<volatile float*, volatile float*, volatile float*> smem_tuple(float* smem) 210 return cv::cuda::device::smem_tuple(smem, smem + BLOCK_SIZE, smem + 2 * BLOCK_SIZE); 227 static __device__ __forceinline__ thrust::tuple<volatile float*, volatile float*, volatile float*, volatile float*> smem_tuple(float* smem) 229 return cv::cuda::device::smem_tuple(smem, smem + BLOCK_SIZE, smem + 2 * BLOCK_SIZE, smem + 3 * BLOCK_SIZE) [all...] |
/external/opencv3/modules/cudaimgproc/src/cuda/ |
hough_circles.cu | 184 int* smem = DynamicSharedMem<int>(); 187 smem[i] = 0; 210 Emulation::smem::atomicAdd(&smem[r + 1], 1); 218 const int curVotes = smem[i + 1]; 220 if (curVotes >= threshold && curVotes > smem[i] && curVotes >= smem[i + 2])
|
build_point_list.cu | 77 const int qidx = Emulation::smem::atomicAdd(&s_qsize[threadIdx.y], 1);
|
/external/opencv3/modules/cudev/include/opencv2/cudev/block/detail/ |
reduce_key_val.hpp | 82 __device__ static void loadToSmem(const PointerTuple& smem, const ReferenceTuple& data, uint tid) 84 get<I>(smem)[tid] = get<I>(data); 86 For<I + 1, N>::loadToSmem(smem, data, tid); 90 __device__ static void loadFromSmem(const PointerTuple& smem, const ReferenceTuple& data, uint tid) 92 get<I>(data) = get<I>(smem)[tid]; 94 For<I + 1, N>::loadFromSmem(smem, data, tid); 152 __device__ __forceinline__ void loadToSmem(volatile T* smem, T& data, uint tid) 154 smem[tid] = data; 158 __device__ __forceinline__ void loadFromSmem(volatile T* smem, T& data, uint tid) 160 data = smem[tid] [all...] |
/external/opencv3/modules/cudaoptflow/src/cuda/ |
farneback.cu | 75 extern __shared__ float smem[]; 76 volatile float *row = smem + tx; 140 int smem = 3 * block.x * sizeof(float); 143 polynomialExpansion<5><<<grid, block, smem, stream>>>(src.rows, src.cols, src, dst); 145 polynomialExpansion<7><<<grid, block, smem, stream>>>(src.rows, src.cols, src, dst); 310 extern __shared__ float smem[]; 311 volatile float *row = smem + ty * (bdx + 2*ksizeHalf); 345 int smem = (block.x + 2*ksizeHalf) * block.y * sizeof(float); 348 boxFilter<<<grid, block, smem, stream>>>(src.rows, src.cols, src, ksizeHalf, boxAreaInv, dst); 364 extern __shared__ float smem[]; [all...] |
/external/opencv3/modules/cudafeatures2d/src/cuda/ |
bf_knnmatch.cu | 379 extern __shared__ int smem[]; 383 typename Dist::value_type* s_query = (typename Dist::value_type*)(smem); 384 typename Dist::value_type* s_train = (typename Dist::value_type*)(smem + BLOCK_SIZE * MAX_DESC_LEN); 397 float* s_distance = (float*)(smem); 398 int* s_trainIdx = (int*)(smem + BLOCK_SIZE * BLOCK_SIZE); 429 extern __shared__ int smem[]; 433 typename Dist::value_type* s_query = (typename Dist::value_type*)(smem); 434 typename Dist::value_type* s_train = (typename Dist::value_type*)(smem + BLOCK_SIZE * MAX_DESC_LEN); 456 float* s_distance = (float*)(smem); 457 int* s_trainIdx = (int*)(smem + BLOCK_SIZE * BLOCK_SIZE) [all...] |
bf_radius_match.cu | 62 extern __shared__ int smem[]; 67 typename Dist::value_type* s_query = (typename Dist::value_type*)(smem); 68 typename Dist::value_type* s_train = (typename Dist::value_type*)(smem + BLOCK_SIZE * BLOCK_SIZE); 170 extern __shared__ int smem[]; 175 typename Dist::value_type* s_query = (typename Dist::value_type*)(smem); 176 typename Dist::value_type* s_train = (typename Dist::value_type*)(smem + BLOCK_SIZE * BLOCK_SIZE);
|
/external/opencv3/modules/cudev/include/opencv2/cudev/warp/detail/ |
reduce_key_val.hpp | 80 __device__ static void loadToSmem(const PointerTuple& smem, const ReferenceTuple& data, uint tid) 82 get<I>(smem)[tid] = get<I>(data); 84 For<I + 1, N>::loadToSmem(smem, data, tid); 137 __device__ __forceinline__ void loadToSmem(volatile T* smem, T& data, uint tid) 139 smem[tid] = data; 144 __device__ __forceinline__ void loadToSmem(const tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& smem, 148 For<0, tuple_size<tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::loadToSmem(smem, data, tid);
|
/external/elfutils/src/ |
ldscript.y | 230 obstack_alloc (&ld_state.smem, 282 obstack_alloc (&ld_state.smem, sizeof (*$$)); 467 obstack_alloc (&ld_state.smem, sizeof (*newp)); 478 obstack_alloc (&ld_state.smem, sizeof (*newp)); 490 obstack_alloc (&ld_state.smem, sizeof (*newp)); 502 memset (obstack_alloc (&ld_state.smem, sizeof (*newp)), 516 obstack_alloc (&ld_state.smem, sizeof (*newp)); 534 = (struct output_segment *) obstack_alloc (&ld_state.smem, sizeof (*newp)); 631 newp = (struct id_list *) obstack_alloc (&ld_state.smem, sizeof (*newp)); 650 newp = (struct version *) obstack_alloc (&ld_state.smem, sizeof (*newp)) [all...] |
ld.c | 307 obstack_init (&ld_state.smem); [all...] |
ldlex.l | 186 {ID} { ldlval.str = obstack_strndup (&ld_state.smem, 190 {FILENAMECHAR1}{FILENAMECHAR} { ldlval.str = obstack_strndup (&ld_state.smem,
|
ldgeneric.c | 410 fileinfo->rfname = obstack_strdup (&ld_state.smem, rfname); [all...] |
/external/opencv3/modules/cudev/include/opencv2/cudev/grid/detail/ |
reduce.hpp | 71 __device__ __forceinline__ static volatile R* smem(R* ptr) function in struct:cv::cudev::grid_reduce_detail::Unroll 92 __device__ __forceinline__ static tuple<volatile R*, volatile R*> smem(R* ptr) function in struct:cv::cudev::grid_reduce_detail::Unroll 113 __device__ __forceinline__ static tuple<volatile R*, volatile R*, volatile R*> smem(R* ptr) function in struct:cv::cudev::grid_reduce_detail::Unroll 136 __device__ __forceinline__ static tuple<volatile R*, volatile R*, volatile R*, volatile R*> smem(R* ptr) function in struct:cv::cudev::grid_reduce_detail::Unroll 279 __shared__ work_elem_type smem[BLOCK_SIZE * cn]; local 281 blockReduce<BLOCK_SIZE>(Unroll<cn>::template smem<BLOCK_SIZE>(smem), Unroll<cn>::res(sum), tid, Unroll<cn>::op(plus<work_elem_type>())); 339 __shared__ work_type smem[BLOCK_SIZE]; local 343 blockReduce<BLOCK_SIZE>(smem, myval, tid, op);
|
/external/opencv3/modules/core/include/opencv2/core/cuda/detail/ |
reduce_key_val.hpp | 74 static __device__ void loadToSmem(const PointerTuple& smem, const ReferenceTuple& data, unsigned int tid) 76 thrust::get<I>(smem)[tid] = thrust::get<I>(data); 78 For<I + 1, N>::loadToSmem(smem, data, tid); 81 static __device__ void loadFromSmem(const PointerTuple& smem, const ReferenceTuple& data, unsigned int tid) 83 thrust::get<I>(data) = thrust::get<I>(smem)[tid]; 85 For<I + 1, N>::loadFromSmem(smem, data, tid); 171 __device__ __forceinline__ void loadToSmem(volatile T* smem, T& data, unsigned int tid) 173 smem[tid] = data; 176 __device__ __forceinline__ void loadFromSmem(volatile T* smem, T& data, unsigned int tid) 178 data = smem[tid] [all...] |
/external/opencv3/modules/cudafilters/src/cuda/ |
column_filter.hpp | 74 __shared__ sum_t smem[(PATCH_PER_BLOCK + 2 * HALO_SIZE) * BLOCK_DIM_Y][BLOCK_DIM_X]; local 90 smem[threadIdx.y + j * BLOCK_DIM_Y][threadIdx.x] = saturate_cast<sum_t>(src(yStart - (HALO_SIZE - j) * BLOCK_DIM_Y, x)); 97 smem[threadIdx.y + j * BLOCK_DIM_Y][threadIdx.x] = saturate_cast<sum_t>(brd.at_low(yStart - (HALO_SIZE - j) * BLOCK_DIM_Y, src_col, src.step)); 105 smem[threadIdx.y + HALO_SIZE * BLOCK_DIM_Y + j * BLOCK_DIM_Y][threadIdx.x] = saturate_cast<sum_t>(src(yStart + j * BLOCK_DIM_Y, x)); 110 smem[threadIdx.y + (PATCH_PER_BLOCK + HALO_SIZE) * BLOCK_DIM_Y + j * BLOCK_DIM_Y][threadIdx.x] = saturate_cast<sum_t>(src(yStart + (PATCH_PER_BLOCK + j) * BLOCK_DIM_Y, x)); 117 smem[threadIdx.y + HALO_SIZE * BLOCK_DIM_Y + j * BLOCK_DIM_Y][threadIdx.x] = saturate_cast<sum_t>(brd.at_high(yStart + j * BLOCK_DIM_Y, src_col, src.step)); 122 smem[threadIdx.y + (PATCH_PER_BLOCK + HALO_SIZE) * BLOCK_DIM_Y + j * BLOCK_DIM_Y][threadIdx.x] = saturate_cast<sum_t>(brd.at_high(yStart + (PATCH_PER_BLOCK + j) * BLOCK_DIM_Y, src_col, src.step)); 138 sum = sum + smem[threadIdx.y + HALO_SIZE * BLOCK_DIM_Y + j * BLOCK_DIM_Y - anchor + k][threadIdx.x] * c_kernel[k];
|
row_filter.hpp | 74 __shared__ sum_t smem[BLOCK_DIM_Y][(PATCH_PER_BLOCK + 2 * HALO_SIZE) * BLOCK_DIM_X]; local 90 smem[threadIdx.y][threadIdx.x + j * BLOCK_DIM_X] = saturate_cast<sum_t>(src_row[xStart - (HALO_SIZE - j) * BLOCK_DIM_X]); 97 smem[threadIdx.y][threadIdx.x + j * BLOCK_DIM_X] = saturate_cast<sum_t>(brd.at_low(xStart - (HALO_SIZE - j) * BLOCK_DIM_X, src_row)); 105 smem[threadIdx.y][threadIdx.x + HALO_SIZE * BLOCK_DIM_X + j * BLOCK_DIM_X] = saturate_cast<sum_t>(src_row[xStart + j * BLOCK_DIM_X]); 110 smem[threadIdx.y][threadIdx.x + (PATCH_PER_BLOCK + HALO_SIZE) * BLOCK_DIM_X + j * BLOCK_DIM_X] = saturate_cast<sum_t>(src_row[xStart + (PATCH_PER_BLOCK + j) * BLOCK_DIM_X]); 117 smem[threadIdx.y][threadIdx.x + HALO_SIZE * BLOCK_DIM_X + j * BLOCK_DIM_X] = saturate_cast<sum_t>(brd.at_high(xStart + j * BLOCK_DIM_X, src_row)); 122 smem[threadIdx.y][threadIdx.x + (PATCH_PER_BLOCK + HALO_SIZE) * BLOCK_DIM_X + j * BLOCK_DIM_X] = saturate_cast<sum_t>(brd.at_high(xStart + (PATCH_PER_BLOCK + j) * BLOCK_DIM_X, src_row)); 138 sum = sum + smem[threadIdx.y][threadIdx.x + HALO_SIZE * BLOCK_DIM_X + j * BLOCK_DIM_X - anchor + k] * c_kernel[k];
|
/external/opencv3/modules/cudev/include/opencv2/cudev/block/ |
reduce.hpp | 63 __device__ __forceinline__ void blockReduce(volatile T* smem, T& val, uint tid, const Op& op) 65 block_reduce_detail::Dispatcher<N>::reductor::template reduce<volatile T*, T&, const Op&>(smem, val, tid, op); 72 __device__ __forceinline__ void blockReduce(const tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem, 80 const tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>&>(smem, val, tid, op);
|
/external/opencv3/modules/video/src/opencl/ |
optical_flow_farneback.cl | 78 __local float * smem, 85 __local float *row = smem + tx; 163 __local float * smem) 168 __local float *row = smem + ty * (bdx + 2*ksizeHalf); 202 __local float * smem) 208 __local volatile float *row = smem + 5 * ty * smw; 354 __local float * smem) 361 __local float *row = smem + 5 * ty * smw;
|
/external/opencv3/modules/cudalegacy/src/cuda/ |
NCVBroxOpticalFlow.cu | 261 ///\param smem pointer to shared memory array 269 __forceinline__ __device__ void load_array_element(float *smem, int is, int js, int i, int j, int w, int h, int p) 281 smem[ijs] = tex1Dfetch(tex_u, pos); 284 smem[ijs] = tex1Dfetch(tex_v, pos); 287 smem[ijs] = tex1Dfetch(tex_du, pos); 290 smem[ijs] = tex1Dfetch(tex_dv, pos); 297 ///\param smem pointer to target shared memory array 305 __forceinline__ __device__ void load_array(float *smem, int ig, int jg, int w, int h, int p) 309 load_array_element<tex>(smem, i, j, ig, jg, w, h, p);//load current pixel 314 load_array_element<tex>(smem, i, j-2, ig, jg-2, w, h, p) [all...] |
bm_fast.cu | 283 size_t smem = search_window * search_window * sizeof(int); 285 optflowbm_fast_kernel<<<grid, block, smem, stream>>>(fbm, velx, vely);
|
needle_map.cu | 56 __shared__ float smem[2 * NEEDLE_MAP_SCALE]; 58 volatile float* u_col_sum = smem;
|
/external/opencv3/modules/cudaobjdetect/src/cuda/ |
hog.cu | 129 extern __shared__ float smem[]; 130 float* hists = smem; 131 float* final_hist = smem + cnbins * 48 * nblocks; 217 int smem = hists_size + final_hists_size; 218 compute_hists_kernel_many_blocks<nblocks><<<grid, threads, smem>>>( 232 __device__ float reduce_smem(float* smem, float val) 237 reduce<size>(smem, sum, tid, plus<float>()); 244 return smem[0]; 251 smem[0] = sum; 256 return smem[0] [all...] |
/external/opencv3/modules/core/include/opencv2/core/cuda/ |
reduce.hpp | 59 __device__ __forceinline__ void reduce(volatile T* smem, T& val, unsigned int tid, const Op& op) 61 reduce_detail::Dispatcher<N>::reductor::template reduce<volatile T*, T&, const Op&>(smem, val, tid, op); 67 __device__ __forceinline__ void reduce(const thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem, 75 const thrust::tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>&>(smem, val, tid, op);
|
emulation.hpp | 84 struct smem struct in struct:cv::cuda::device::Emulation
|
/external/opencv3/modules/cudev/include/opencv2/cudev/warp/ |
reduce.hpp | 62 __device__ __forceinline__ void warpReduce(volatile T* smem, T& val, uint tid, const Op& op) 64 warp_reduce_detail::WarpReductor::template reduce<volatile T*, T&, const Op&>(smem, val, tid, op); 70 __device__ __forceinline__ void warpReduce(const tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem, 78 const tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>&>(smem, val, tid, op);
|