HomeSort by relevance Sort by last modified time
    Searched refs:smem (Results 26 - 50 of 56) sorted by null

12 3

  /external/opencv3/modules/photo/src/cuda/
nlm.cu 189 static __device__ __forceinline__ thrust::tuple<volatile float*, volatile float*> smem_tuple(float* smem)
191 return cv::cuda::device::smem_tuple(smem, smem + BLOCK_SIZE);
208 static __device__ __forceinline__ thrust::tuple<volatile float*, volatile float*, volatile float*> smem_tuple(float* smem)
210 return cv::cuda::device::smem_tuple(smem, smem + BLOCK_SIZE, smem + 2 * BLOCK_SIZE);
227 static __device__ __forceinline__ thrust::tuple<volatile float*, volatile float*, volatile float*, volatile float*> smem_tuple(float* smem)
229 return cv::cuda::device::smem_tuple(smem, smem + BLOCK_SIZE, smem + 2 * BLOCK_SIZE, smem + 3 * BLOCK_SIZE)
    [all...]
  /external/opencv3/modules/cudaimgproc/src/cuda/
hough_circles.cu 184 int* smem = DynamicSharedMem<int>();
187 smem[i] = 0;
210 Emulation::smem::atomicAdd(&smem[r + 1], 1);
218 const int curVotes = smem[i + 1];
220 if (curVotes >= threshold && curVotes > smem[i] && curVotes >= smem[i + 2])
build_point_list.cu 77 const int qidx = Emulation::smem::atomicAdd(&s_qsize[threadIdx.y], 1);
  /external/opencv3/modules/cudev/include/opencv2/cudev/block/detail/
reduce_key_val.hpp 82 __device__ static void loadToSmem(const PointerTuple& smem, const ReferenceTuple& data, uint tid)
84 get<I>(smem)[tid] = get<I>(data);
86 For<I + 1, N>::loadToSmem(smem, data, tid);
90 __device__ static void loadFromSmem(const PointerTuple& smem, const ReferenceTuple& data, uint tid)
92 get<I>(data) = get<I>(smem)[tid];
94 For<I + 1, N>::loadFromSmem(smem, data, tid);
152 __device__ __forceinline__ void loadToSmem(volatile T* smem, T& data, uint tid)
154 smem[tid] = data;
158 __device__ __forceinline__ void loadFromSmem(volatile T* smem, T& data, uint tid)
160 data = smem[tid]
    [all...]
  /external/opencv3/modules/cudaoptflow/src/cuda/
farneback.cu 75 extern __shared__ float smem[];
76 volatile float *row = smem + tx;
140 int smem = 3 * block.x * sizeof(float);
143 polynomialExpansion<5><<<grid, block, smem, stream>>>(src.rows, src.cols, src, dst);
145 polynomialExpansion<7><<<grid, block, smem, stream>>>(src.rows, src.cols, src, dst);
310 extern __shared__ float smem[];
311 volatile float *row = smem + ty * (bdx + 2*ksizeHalf);
345 int smem = (block.x + 2*ksizeHalf) * block.y * sizeof(float);
348 boxFilter<<<grid, block, smem, stream>>>(src.rows, src.cols, src, ksizeHalf, boxAreaInv, dst);
364 extern __shared__ float smem[];
    [all...]
  /external/opencv3/modules/cudafeatures2d/src/cuda/
bf_knnmatch.cu 379 extern __shared__ int smem[];
383 typename Dist::value_type* s_query = (typename Dist::value_type*)(smem);
384 typename Dist::value_type* s_train = (typename Dist::value_type*)(smem + BLOCK_SIZE * MAX_DESC_LEN);
397 float* s_distance = (float*)(smem);
398 int* s_trainIdx = (int*)(smem + BLOCK_SIZE * BLOCK_SIZE);
429 extern __shared__ int smem[];
433 typename Dist::value_type* s_query = (typename Dist::value_type*)(smem);
434 typename Dist::value_type* s_train = (typename Dist::value_type*)(smem + BLOCK_SIZE * MAX_DESC_LEN);
456 float* s_distance = (float*)(smem);
457 int* s_trainIdx = (int*)(smem + BLOCK_SIZE * BLOCK_SIZE)
    [all...]
bf_radius_match.cu 62 extern __shared__ int smem[];
67 typename Dist::value_type* s_query = (typename Dist::value_type*)(smem);
68 typename Dist::value_type* s_train = (typename Dist::value_type*)(smem + BLOCK_SIZE * BLOCK_SIZE);
170 extern __shared__ int smem[];
175 typename Dist::value_type* s_query = (typename Dist::value_type*)(smem);
176 typename Dist::value_type* s_train = (typename Dist::value_type*)(smem + BLOCK_SIZE * BLOCK_SIZE);
  /external/opencv3/modules/cudev/include/opencv2/cudev/warp/detail/
reduce_key_val.hpp 80 __device__ static void loadToSmem(const PointerTuple& smem, const ReferenceTuple& data, uint tid)
82 get<I>(smem)[tid] = get<I>(data);
84 For<I + 1, N>::loadToSmem(smem, data, tid);
137 __device__ __forceinline__ void loadToSmem(volatile T* smem, T& data, uint tid)
139 smem[tid] = data;
144 __device__ __forceinline__ void loadToSmem(const tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& smem,
148 For<0, tuple_size<tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::loadToSmem(smem, data, tid);
  /external/elfutils/src/
ldscript.y 230 obstack_alloc (&ld_state.smem,
282 obstack_alloc (&ld_state.smem, sizeof (*$$));
467 obstack_alloc (&ld_state.smem, sizeof (*newp));
478 obstack_alloc (&ld_state.smem, sizeof (*newp));
490 obstack_alloc (&ld_state.smem, sizeof (*newp));
502 memset (obstack_alloc (&ld_state.smem, sizeof (*newp)),
516 obstack_alloc (&ld_state.smem, sizeof (*newp));
534 = (struct output_segment *) obstack_alloc (&ld_state.smem, sizeof (*newp));
631 newp = (struct id_list *) obstack_alloc (&ld_state.smem, sizeof (*newp));
650 newp = (struct version *) obstack_alloc (&ld_state.smem, sizeof (*newp))
    [all...]
ld.c 307 obstack_init (&ld_state.smem);
    [all...]
ldlex.l 186 {ID} { ldlval.str = obstack_strndup (&ld_state.smem,
190 {FILENAMECHAR1}{FILENAMECHAR} { ldlval.str = obstack_strndup (&ld_state.smem,
ldgeneric.c 410 fileinfo->rfname = obstack_strdup (&ld_state.smem, rfname);
    [all...]
  /external/opencv3/modules/cudev/include/opencv2/cudev/grid/detail/
reduce.hpp 71 __device__ __forceinline__ static volatile R* smem(R* ptr) function in struct:cv::cudev::grid_reduce_detail::Unroll
92 __device__ __forceinline__ static tuple<volatile R*, volatile R*> smem(R* ptr) function in struct:cv::cudev::grid_reduce_detail::Unroll
113 __device__ __forceinline__ static tuple<volatile R*, volatile R*, volatile R*> smem(R* ptr) function in struct:cv::cudev::grid_reduce_detail::Unroll
136 __device__ __forceinline__ static tuple<volatile R*, volatile R*, volatile R*, volatile R*> smem(R* ptr) function in struct:cv::cudev::grid_reduce_detail::Unroll
279 __shared__ work_elem_type smem[BLOCK_SIZE * cn]; local
281 blockReduce<BLOCK_SIZE>(Unroll<cn>::template smem<BLOCK_SIZE>(smem), Unroll<cn>::res(sum), tid, Unroll<cn>::op(plus<work_elem_type>()));
339 __shared__ work_type smem[BLOCK_SIZE]; local
343 blockReduce<BLOCK_SIZE>(smem, myval, tid, op);
  /external/opencv3/modules/core/include/opencv2/core/cuda/detail/
reduce_key_val.hpp 74 static __device__ void loadToSmem(const PointerTuple& smem, const ReferenceTuple& data, unsigned int tid)
76 thrust::get<I>(smem)[tid] = thrust::get<I>(data);
78 For<I + 1, N>::loadToSmem(smem, data, tid);
81 static __device__ void loadFromSmem(const PointerTuple& smem, const ReferenceTuple& data, unsigned int tid)
83 thrust::get<I>(data) = thrust::get<I>(smem)[tid];
85 For<I + 1, N>::loadFromSmem(smem, data, tid);
171 __device__ __forceinline__ void loadToSmem(volatile T* smem, T& data, unsigned int tid)
173 smem[tid] = data;
176 __device__ __forceinline__ void loadFromSmem(volatile T* smem, T& data, unsigned int tid)
178 data = smem[tid]
    [all...]
  /external/opencv3/modules/cudafilters/src/cuda/
column_filter.hpp 74 __shared__ sum_t smem[(PATCH_PER_BLOCK + 2 * HALO_SIZE) * BLOCK_DIM_Y][BLOCK_DIM_X]; local
90 smem[threadIdx.y + j * BLOCK_DIM_Y][threadIdx.x] = saturate_cast<sum_t>(src(yStart - (HALO_SIZE - j) * BLOCK_DIM_Y, x));
97 smem[threadIdx.y + j * BLOCK_DIM_Y][threadIdx.x] = saturate_cast<sum_t>(brd.at_low(yStart - (HALO_SIZE - j) * BLOCK_DIM_Y, src_col, src.step));
105 smem[threadIdx.y + HALO_SIZE * BLOCK_DIM_Y + j * BLOCK_DIM_Y][threadIdx.x] = saturate_cast<sum_t>(src(yStart + j * BLOCK_DIM_Y, x));
110 smem[threadIdx.y + (PATCH_PER_BLOCK + HALO_SIZE) * BLOCK_DIM_Y + j * BLOCK_DIM_Y][threadIdx.x] = saturate_cast<sum_t>(src(yStart + (PATCH_PER_BLOCK + j) * BLOCK_DIM_Y, x));
117 smem[threadIdx.y + HALO_SIZE * BLOCK_DIM_Y + j * BLOCK_DIM_Y][threadIdx.x] = saturate_cast<sum_t>(brd.at_high(yStart + j * BLOCK_DIM_Y, src_col, src.step));
122 smem[threadIdx.y + (PATCH_PER_BLOCK + HALO_SIZE) * BLOCK_DIM_Y + j * BLOCK_DIM_Y][threadIdx.x] = saturate_cast<sum_t>(brd.at_high(yStart + (PATCH_PER_BLOCK + j) * BLOCK_DIM_Y, src_col, src.step));
138 sum = sum + smem[threadIdx.y + HALO_SIZE * BLOCK_DIM_Y + j * BLOCK_DIM_Y - anchor + k][threadIdx.x] * c_kernel[k];
row_filter.hpp 74 __shared__ sum_t smem[BLOCK_DIM_Y][(PATCH_PER_BLOCK + 2 * HALO_SIZE) * BLOCK_DIM_X]; local
90 smem[threadIdx.y][threadIdx.x + j * BLOCK_DIM_X] = saturate_cast<sum_t>(src_row[xStart - (HALO_SIZE - j) * BLOCK_DIM_X]);
97 smem[threadIdx.y][threadIdx.x + j * BLOCK_DIM_X] = saturate_cast<sum_t>(brd.at_low(xStart - (HALO_SIZE - j) * BLOCK_DIM_X, src_row));
105 smem[threadIdx.y][threadIdx.x + HALO_SIZE * BLOCK_DIM_X + j * BLOCK_DIM_X] = saturate_cast<sum_t>(src_row[xStart + j * BLOCK_DIM_X]);
110 smem[threadIdx.y][threadIdx.x + (PATCH_PER_BLOCK + HALO_SIZE) * BLOCK_DIM_X + j * BLOCK_DIM_X] = saturate_cast<sum_t>(src_row[xStart + (PATCH_PER_BLOCK + j) * BLOCK_DIM_X]);
117 smem[threadIdx.y][threadIdx.x + HALO_SIZE * BLOCK_DIM_X + j * BLOCK_DIM_X] = saturate_cast<sum_t>(brd.at_high(xStart + j * BLOCK_DIM_X, src_row));
122 smem[threadIdx.y][threadIdx.x + (PATCH_PER_BLOCK + HALO_SIZE) * BLOCK_DIM_X + j * BLOCK_DIM_X] = saturate_cast<sum_t>(brd.at_high(xStart + (PATCH_PER_BLOCK + j) * BLOCK_DIM_X, src_row));
138 sum = sum + smem[threadIdx.y][threadIdx.x + HALO_SIZE * BLOCK_DIM_X + j * BLOCK_DIM_X - anchor + k] * c_kernel[k];
  /external/opencv3/modules/cudev/include/opencv2/cudev/block/
reduce.hpp 63 __device__ __forceinline__ void blockReduce(volatile T* smem, T& val, uint tid, const Op& op)
65 block_reduce_detail::Dispatcher<N>::reductor::template reduce<volatile T*, T&, const Op&>(smem, val, tid, op);
72 __device__ __forceinline__ void blockReduce(const tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem,
80 const tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>&>(smem, val, tid, op);
  /external/opencv3/modules/video/src/opencl/
optical_flow_farneback.cl 78 __local float * smem,
85 __local float *row = smem + tx;
163 __local float * smem)
168 __local float *row = smem + ty * (bdx + 2*ksizeHalf);
202 __local float * smem)
208 __local volatile float *row = smem + 5 * ty * smw;
354 __local float * smem)
361 __local float *row = smem + 5 * ty * smw;
  /external/opencv3/modules/cudalegacy/src/cuda/
NCVBroxOpticalFlow.cu 261 ///\param smem pointer to shared memory array
269 __forceinline__ __device__ void load_array_element(float *smem, int is, int js, int i, int j, int w, int h, int p)
281 smem[ijs] = tex1Dfetch(tex_u, pos);
284 smem[ijs] = tex1Dfetch(tex_v, pos);
287 smem[ijs] = tex1Dfetch(tex_du, pos);
290 smem[ijs] = tex1Dfetch(tex_dv, pos);
297 ///\param smem pointer to target shared memory array
305 __forceinline__ __device__ void load_array(float *smem, int ig, int jg, int w, int h, int p)
309 load_array_element<tex>(smem, i, j, ig, jg, w, h, p);//load current pixel
314 load_array_element<tex>(smem, i, j-2, ig, jg-2, w, h, p)
    [all...]
bm_fast.cu 283 size_t smem = search_window * search_window * sizeof(int);
285 optflowbm_fast_kernel<<<grid, block, smem, stream>>>(fbm, velx, vely);
needle_map.cu 56 __shared__ float smem[2 * NEEDLE_MAP_SCALE];
58 volatile float* u_col_sum = smem;
  /external/opencv3/modules/cudaobjdetect/src/cuda/
hog.cu 129 extern __shared__ float smem[];
130 float* hists = smem;
131 float* final_hist = smem + cnbins * 48 * nblocks;
217 int smem = hists_size + final_hists_size;
218 compute_hists_kernel_many_blocks<nblocks><<<grid, threads, smem>>>(
232 __device__ float reduce_smem(float* smem, float val)
237 reduce<size>(smem, sum, tid, plus<float>());
244 return smem[0];
251 smem[0] = sum;
256 return smem[0]
    [all...]
  /external/opencv3/modules/core/include/opencv2/core/cuda/
reduce.hpp 59 __device__ __forceinline__ void reduce(volatile T* smem, T& val, unsigned int tid, const Op& op)
61 reduce_detail::Dispatcher<N>::reductor::template reduce<volatile T*, T&, const Op&>(smem, val, tid, op);
67 __device__ __forceinline__ void reduce(const thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem,
75 const thrust::tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>&>(smem, val, tid, op);
emulation.hpp 84 struct smem struct in struct:cv::cuda::device::Emulation
  /external/opencv3/modules/cudev/include/opencv2/cudev/warp/
reduce.hpp 62 __device__ __forceinline__ void warpReduce(volatile T* smem, T& val, uint tid, const Op& op)
64 warp_reduce_detail::WarpReductor::template reduce<volatile T*, T&, const Op&>(smem, val, tid, op);
70 __device__ __forceinline__ void warpReduce(const tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem,
78 const tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>&>(smem, val, tid, op);

Completed in 2662 milliseconds

12 3