HomeSort by relevance Sort by last modified time
    Searched refs:threadIdx (Results 1 - 25 of 95) sorted by null

1 2 3 4

  /external/opencv3/modules/cudalegacy/src/cuda/
NCVAlg.hpp 117 functor.assign(reduceArr + threadIdx.x, &threadElem);
120 if (nThreads >= 256 && threadIdx.x < 128)
122 functor.reduce(reduceArr[threadIdx.x], reduceArr[threadIdx.x + 128]);
126 if (nThreads >= 128 && threadIdx.x < 64)
128 functor.reduce(reduceArr[threadIdx.x], reduceArr[threadIdx.x + 64]);
132 if (threadIdx.x < 32)
136 functor.reduce(reduceArr[threadIdx.x], reduceArr[threadIdx.x + 32])
    [all...]
needle_map.cu 61 const int x = blockIdx.x * NEEDLE_MAP_SCALE + threadIdx.x;
64 u_col_sum[threadIdx.x] = 0;
65 v_col_sum[threadIdx.x] = 0;
70 u_col_sum[threadIdx.x] += u(::min(y + i, u.rows - 1), x);
71 v_col_sum[threadIdx.x] += v(::min(y + i, u.rows - 1), x);
74 if (threadIdx.x < 8)
77 const uint X = threadIdx.x;
81 u_col_sum[threadIdx.x] += u_col_sum[threadIdx.x + 1];
82 v_col_sum[threadIdx.x] += v_col_sum[threadIdx.x + 1]
    [all...]
  /external/opencv3/modules/cudev/include/opencv2/cudev/grid/detail/
integral.hpp 77 const int curElemOffs = offsetX + threadIdx.x;
84 const D curScanElem = blockScanInclusive<NUM_SCAN_THREADS>(curElem, smem, threadIdx.x);
93 if (threadIdx.x == NUM_SCAN_THREADS - 1)
120 const int curElemOffs = offsetX + threadIdx.x;
127 const D curScanElem = blockScanInclusive<NUM_SCAN_THREADS>(curElem, smem, threadIdx.x);
136 if (threadIdx.x == NUM_SCAN_THREADS - 1)
174 const int id = threadIdx.x;
238 if (threadIdx.x % warpSize == warpSize - 1)
329 if (threadIdx.x % 4 == 0)
332 if (threadIdx.x % 4 == 1
    [all...]
pyr_up.hpp 68 const int x = blockIdx.x * blockDim.x + threadIdx.x;
69 const int y = blockIdx.y * blockDim.y + threadIdx.y;
74 if (threadIdx.x < 10 && threadIdx.y < 10)
76 int srcx = static_cast<int>((blockIdx.x * blockDim.x) / 2 + threadIdx.x) - 1;
77 int srcy = static_cast<int>((blockIdx.y * blockDim.y) / 2 + threadIdx.y) - 1;
85 s_srcPatch[threadIdx.y][threadIdx.x] = saturate_cast<work_type>(src(srcy, srcx));
92 const int evenFlag = static_cast<int>((threadIdx.x & 1) == 0);
93 const int oddFlag = static_cast<int>((threadIdx.x & 1) != 0)
    [all...]
reduce_to_row.hpp 64 const int x = blockIdx.x * BLOCK_SIZE_X + threadIdx.x;
72 for (int y = threadIdx.y; y < rows; y += BLOCK_SIZE_Y)
81 smem[threadIdx.x * BLOCK_SIZE_Y + threadIdx.y] = myVal;
85 volatile work_type* srow = smem + threadIdx.y * BLOCK_SIZE_X;
87 myVal = srow[threadIdx.x];
88 blockReduce<BLOCK_SIZE_X>(srow, myVal, threadIdx.x, op);
90 if (threadIdx.x == 0)
95 if (threadIdx.y == 0 && x < cols)
96 dst[x] = saturate_cast<ResType>(Reductor::result(smem[threadIdx.x * BLOCK_SIZE_X], rows))
    [all...]
transpose.hpp 80 int xIndex = blockIdx_x * TILE_DIM + threadIdx.x;
81 int yIndex = blockIdx_y * TILE_DIM + threadIdx.y;
89 tile[threadIdx.y + i][threadIdx.x] = src(yIndex + i, xIndex);
96 xIndex = blockIdx_y * TILE_DIM + threadIdx.x;
97 yIndex = blockIdx_x * TILE_DIM + threadIdx.y;
105 dst(yIndex + i, xIndex) = saturate_cast<DstType>(tile[threadIdx.x][threadIdx.y + i]);
pyr_down.hpp 70 const int x = blockIdx.x * blockDim.x + threadIdx.x;
86 smem[2 + threadIdx.x] = sum;
89 if (threadIdx.x < 2)
101 smem[threadIdx.x] = sum;
104 if (threadIdx.x > 253)
116 smem[4 + threadIdx.x] = sum;
130 smem[2 + threadIdx.x] = sum;
133 if (threadIdx.x < 2)
145 smem[threadIdx.x] = sum;
148 if (threadIdx.x > 253
    [all...]
histogram.hpp 62 const int y = blockIdx.x * blockDim.y + threadIdx.y;
63 const int tid = threadIdx.y * blockDim.x + threadIdx.x;
72 for (int x = threadIdx.x; x < cols; x += blockDim.x)
reduce_to_column.hpp 64 blockReduce<BLOCK_SIZE>(smem[0], myVal, threadIdx.x, op);
73 blockReduce<BLOCK_SIZE>(smem_tuple(smem[0], smem[1]), tie(myVal.x, myVal.y), threadIdx.x, make_tuple(op, op));
82 blockReduce<BLOCK_SIZE>(smem_tuple(smem[0], smem[1], smem[2]), tie(myVal.x, myVal.y, myVal.z), threadIdx.x, make_tuple(op, op, op));
91 blockReduce<BLOCK_SIZE>(smem_tuple(smem[0], smem[1], smem[2], smem[3]), tie(myVal.x, myVal.y, myVal.z, myVal.w), threadIdx.x, make_tuple(op, op, op, op));
110 for (int x = threadIdx.x; x < cols; x += BLOCK_SIZE)
120 if (threadIdx.x == 0)
minmaxloc.hpp 67 const int x0 = blockIdx.x * blockDim.x * patch_x + threadIdx.x;
68 const int y0 = blockIdx.y * blockDim.y * patch_y + threadIdx.y;
98 const int tid = threadIdx.y * blockDim.x + threadIdx.x;
124 const int idx = ::min(threadIdx.x, count - 1);
133 threadIdx.x,
136 if (threadIdx.x == 0)
  /external/opencv3/modules/cudaimgproc/src/cuda/
canny.cu 108 const int x = blockIdx.x * blockDim.x + threadIdx.x;
109 const int y = blockIdx.y * blockDim.y + threadIdx.y;
174 const int x = blockIdx.x * blockDim.x + threadIdx.x;
175 const int y = blockIdx.y * blockDim.y + threadIdx.y;
251 const int x = blockIdx.x * blockDim.x + threadIdx.x;
252 const int y = blockIdx.y * blockDim.y + threadIdx.y;
254 smem[threadIdx.y + 1][threadIdx.x + 1] = checkIdx(y, x, map.rows, map.cols) ? map(y, x) : 0;
255 if (threadIdx.y == 0)
256 smem[0][threadIdx.x + 1] = checkIdx(y - 1, x, map.rows, map.cols) ? map(y - 1, x) : 0
    [all...]
build_point_list.cu 61 const int x = blockIdx.x * blockDim.x * PIXELS_PER_THREAD + threadIdx.x;
62 const int y = blockIdx.y * blockDim.y + threadIdx.y;
64 if (threadIdx.x == 0)
65 s_qsize[threadIdx.y] = 0;
77 const int qidx = Emulation::smem::atomicAdd(&s_qsize[threadIdx.y], 1);
78 s_queues[threadIdx.y][qidx] = val;
86 if (threadIdx.x == 0 && threadIdx.y == 0)
105 const int qsize = s_qsize[threadIdx.y];
106 int gidx = s_globStart[threadIdx.y] + threadIdx.x
    [all...]
hist.cu 59 const int y = blockIdx.x * blockDim.y + threadIdx.y;
60 const int tid = threadIdx.y * blockDim.x + threadIdx.x;
70 for (int x = threadIdx.x; x < cols_4; x += blockDim.x)
80 if (cols % 4 != 0 && threadIdx.x == 0)
128 const int y = blockIdx.x * blockDim.y + threadIdx.y;
129 const int tid = threadIdx.y * blockDim.x + threadIdx.x;
142 for (int x = threadIdx.x; x < cols_4; x += blockDim.x)
152 if (cols % 4 != 0 && threadIdx.x == 0
    [all...]
clahe.cu 65 const unsigned int tid = threadIdx.y * blockDim.x + threadIdx.x;
70 for (int i = threadIdx.y; i < tileSize.y; i += blockDim.y)
73 for (int j = threadIdx.x; j < tileSize.x; j += blockDim.x)
139 const int x = blockIdx.x * blockDim.x + threadIdx.x;
140 const int y = blockIdx.y * blockDim.y + threadIdx.y;
hough_lines.cu 75 for (int i = threadIdx.x; i < count; i += blockDim.x)
93 for (int i = threadIdx.x; i < numrho + 1; i += blockDim.x)
109 for (int i = threadIdx.x; i < count; i += blockDim.x)
125 for (int i = threadIdx.x; i < numrho + 1; i += blockDim.x)
151 const int r = blockIdx.x * blockDim.x + threadIdx.x;
152 const int n = blockIdx.y * blockDim.y + threadIdx.y;
  /external/opencv3/modules/cudawarping/src/cuda/
pyr_up.cu 59 const int x = blockIdx.x * blockDim.x + threadIdx.x;
60 const int y = blockIdx.y * blockDim.y + threadIdx.y;
65 if (threadIdx.x < 10 && threadIdx.y < 10)
67 int srcx = static_cast<int>((blockIdx.x * blockDim.x) / 2 + threadIdx.x) - 1;
68 int srcy = static_cast<int>((blockIdx.y * blockDim.y) / 2 + threadIdx.y) - 1;
76 s_srcPatch[threadIdx.y][threadIdx.x] = saturate_cast<sum_t>(src(srcy, srcx));
83 const int evenFlag = static_cast<int>((threadIdx.x & 1) == 0);
84 const int oddFlag = static_cast<int>((threadIdx.x & 1) != 0)
    [all...]
pyr_down.cu 61 const int x = blockIdx.x * blockDim.x + threadIdx.x;
77 smem[2 + threadIdx.x] = sum;
80 if (threadIdx.x < 2)
92 smem[threadIdx.x] = sum;
95 if (threadIdx.x > 253)
107 smem[4 + threadIdx.x] = sum;
121 smem[2 + threadIdx.x] = sum;
124 if (threadIdx.x < 2)
136 smem[threadIdx.x] = sum;
139 if (threadIdx.x > 253
    [all...]
  /external/opencv3/modules/cudafeatures2d/src/cuda/
bf_knnmatch.cu 109 s_distance += threadIdx.y * BLOCK_SIZE;
110 s_trainIdx += threadIdx.y * BLOCK_SIZE;
112 s_distance[threadIdx.x] = bestDistance1;
113 s_trainIdx[threadIdx.x] = bestTrainIdx1;
117 if (threadIdx.x == 0)
142 s_distance[threadIdx.x] = bestDistance2;
143 s_trainIdx[threadIdx.x] = bestTrainIdx2;
147 if (threadIdx.x == 0)
230 s_distance += threadIdx.y * BLOCK_SIZE;
231 s_trainIdx += threadIdx.y * BLOCK_SIZE
    [all...]
bf_match.cu 62 s_distance += threadIdx.y * BLOCK_SIZE;
63 s_trainIdx += threadIdx.y * BLOCK_SIZE;
65 reduceKeyVal<BLOCK_SIZE>(s_distance, bestDistance, s_trainIdx, bestTrainIdx, threadIdx.x, less<float>());
71 s_distance += threadIdx.y * BLOCK_SIZE;
72 s_trainIdx += threadIdx.y * BLOCK_SIZE;
73 s_imgIdx += threadIdx.y * BLOCK_SIZE;
75 reduceKeyVal<BLOCK_SIZE>(s_distance, bestDistance, smem_tuple(s_trainIdx, s_imgIdx), thrust::tie(bestTrainIdx, bestImgIdx), threadIdx.x, less<float>());
87 const int loadX = threadIdx.x + i * BLOCK_SIZE;
88 s_query[threadIdx.y * MAX_DESC_LEN + loadX] = loadX < query.cols ? query.ptr(::min(queryIdx, query.rows - 1))[loadX] : 0;
104 const int loadX = threadIdx.x + i * BLOCK_SIZE
    [all...]
orb.cu 78 const int ptidx = blockIdx.x * blockDim.y + threadIdx.y;
90 for (int ind = threadIdx.x; ind < blockSize * blockSize; ind += blockDim.x)
108 int* srow0 = smem0 + threadIdx.y * blockDim.x;
109 int* srow1 = smem1 + threadIdx.y * blockDim.x;
110 int* srow2 = smem2 + threadIdx.y * blockDim.x;
113 reduce<32>(smem_tuple(srow0, srow1, srow2), thrust::tie(a, b, c), threadIdx.x, thrust::make_tuple(op, op, op));
115 if (threadIdx.x == 0)
156 int* srow0 = smem0 + threadIdx.y * blockDim.x;
157 int* srow1 = smem1 + threadIdx.y * blockDim.x;
161 const int ptidx = blockIdx.x * blockDim.y + threadIdx.y
    [all...]
bf_radius_match.cu 64 const int queryIdx = blockIdx.y * BLOCK_SIZE + threadIdx.y;
65 const int trainIdx = blockIdx.x * BLOCK_SIZE + threadIdx.x;
75 const int loadX = threadIdx.x + i * BLOCK_SIZE;
77 s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = 0;
78 s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = 0;
85 s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = val;
87 ForceGlob<T>::Load(train.ptr(::min(blockIdx.x * BLOCK_SIZE + threadIdx.y, train.rows - 1)), loadX, val)
    [all...]
  /external/opencv3/modules/cudaobjdetect/src/cuda/
hog.cu 121 const int block_x = threadIdx.z;
122 const int cell_x = threadIdx.x / 16;
123 const int cell_y = threadIdx.y;
124 const int cell_thread_x = threadIdx.x & 0xF;
151 const int dist_y_begin = -4 - 4 * (int)threadIdx.y;
234 unsigned int tid = threadIdx.x;
250 if (threadIdx.x == 0)
267 if (blockIdx.x * blockDim.z + threadIdx.z >= img_block_width)
271 blockIdx.x * blockDim.z + threadIdx.z) *
272 block_hist_size + threadIdx.x
    [all...]
  /external/opencv3/modules/core/include/opencv2/core/cuda/
warp_reduce.hpp 55 __device__ __forceinline__ T warp_reduce(volatile T *ptr , const unsigned int tid = threadIdx.x)
  /external/opencv3/modules/cudaoptflow/src/cuda/
pyrlk.cu 133 const unsigned int tid = threadIdx.y * blockDim.x + threadIdx.x;
162 for (int yBase = threadIdx.y, i = 0; yBase < c_winSize_y; yBase += blockDim.y, ++i)
164 for (int xBase = threadIdx.x, j = 0; xBase < c_winSize_x; xBase += blockDim.x, ++j)
241 for (int y = threadIdx.y, i = 0; y < c_winSize_y; y += blockDim.y, ++i)
243 for (int x = threadIdx.x, j = 0; x < c_winSize_x; x += blockDim.x, ++j)
284 for (int y = threadIdx.y, i = 0; y < c_winSize_y; y += blockDim.y, ++i)
286 for (int x = threadIdx.x, j = 0; x < c_winSize_x; x += blockDim.x, ++j)
344 for (int i = threadIdx.y; i < patchHeight; i += blockDim.y)
346 for (int j = threadIdx.x; j < patchWidth; j += blockDim.x
    [all...]
  /external/opencv3/modules/cudev/include/opencv2/cudev/block/
block.hpp 70 return (threadIdx.z * blockDim.y + threadIdx.y) * blockDim.x + threadIdx.x;

Completed in 918 milliseconds

1 2 3 4