/external/opencv3/modules/cudalegacy/src/cuda/ |
NCVAlg.hpp | 117 functor.assign(reduceArr + threadIdx.x, &threadElem); 120 if (nThreads >= 256 && threadIdx.x < 128) 122 functor.reduce(reduceArr[threadIdx.x], reduceArr[threadIdx.x + 128]); 126 if (nThreads >= 128 && threadIdx.x < 64) 128 functor.reduce(reduceArr[threadIdx.x], reduceArr[threadIdx.x + 64]); 132 if (threadIdx.x < 32) 136 functor.reduce(reduceArr[threadIdx.x], reduceArr[threadIdx.x + 32]) [all...] |
needle_map.cu | 61 const int x = blockIdx.x * NEEDLE_MAP_SCALE + threadIdx.x; 64 u_col_sum[threadIdx.x] = 0; 65 v_col_sum[threadIdx.x] = 0; 70 u_col_sum[threadIdx.x] += u(::min(y + i, u.rows - 1), x); 71 v_col_sum[threadIdx.x] += v(::min(y + i, u.rows - 1), x); 74 if (threadIdx.x < 8) 77 const uint X = threadIdx.x; 81 u_col_sum[threadIdx.x] += u_col_sum[threadIdx.x + 1]; 82 v_col_sum[threadIdx.x] += v_col_sum[threadIdx.x + 1] [all...] |
/external/opencv3/modules/cudev/include/opencv2/cudev/grid/detail/ |
integral.hpp | 77 const int curElemOffs = offsetX + threadIdx.x; 84 const D curScanElem = blockScanInclusive<NUM_SCAN_THREADS>(curElem, smem, threadIdx.x); 93 if (threadIdx.x == NUM_SCAN_THREADS - 1) 120 const int curElemOffs = offsetX + threadIdx.x; 127 const D curScanElem = blockScanInclusive<NUM_SCAN_THREADS>(curElem, smem, threadIdx.x); 136 if (threadIdx.x == NUM_SCAN_THREADS - 1) 174 const int id = threadIdx.x; 238 if (threadIdx.x % warpSize == warpSize - 1) 329 if (threadIdx.x % 4 == 0) 332 if (threadIdx.x % 4 == 1 [all...] |
pyr_up.hpp | 68 const int x = blockIdx.x * blockDim.x + threadIdx.x; 69 const int y = blockIdx.y * blockDim.y + threadIdx.y; 74 if (threadIdx.x < 10 && threadIdx.y < 10) 76 int srcx = static_cast<int>((blockIdx.x * blockDim.x) / 2 + threadIdx.x) - 1; 77 int srcy = static_cast<int>((blockIdx.y * blockDim.y) / 2 + threadIdx.y) - 1; 85 s_srcPatch[threadIdx.y][threadIdx.x] = saturate_cast<work_type>(src(srcy, srcx)); 92 const int evenFlag = static_cast<int>((threadIdx.x & 1) == 0); 93 const int oddFlag = static_cast<int>((threadIdx.x & 1) != 0) [all...] |
reduce_to_row.hpp | 64 const int x = blockIdx.x * BLOCK_SIZE_X + threadIdx.x; 72 for (int y = threadIdx.y; y < rows; y += BLOCK_SIZE_Y) 81 smem[threadIdx.x * BLOCK_SIZE_Y + threadIdx.y] = myVal; 85 volatile work_type* srow = smem + threadIdx.y * BLOCK_SIZE_X; 87 myVal = srow[threadIdx.x]; 88 blockReduce<BLOCK_SIZE_X>(srow, myVal, threadIdx.x, op); 90 if (threadIdx.x == 0) 95 if (threadIdx.y == 0 && x < cols) 96 dst[x] = saturate_cast<ResType>(Reductor::result(smem[threadIdx.x * BLOCK_SIZE_X], rows)) [all...] |
transpose.hpp | 80 int xIndex = blockIdx_x * TILE_DIM + threadIdx.x; 81 int yIndex = blockIdx_y * TILE_DIM + threadIdx.y; 89 tile[threadIdx.y + i][threadIdx.x] = src(yIndex + i, xIndex); 96 xIndex = blockIdx_y * TILE_DIM + threadIdx.x; 97 yIndex = blockIdx_x * TILE_DIM + threadIdx.y; 105 dst(yIndex + i, xIndex) = saturate_cast<DstType>(tile[threadIdx.x][threadIdx.y + i]);
|
pyr_down.hpp | 70 const int x = blockIdx.x * blockDim.x + threadIdx.x; 86 smem[2 + threadIdx.x] = sum; 89 if (threadIdx.x < 2) 101 smem[threadIdx.x] = sum; 104 if (threadIdx.x > 253) 116 smem[4 + threadIdx.x] = sum; 130 smem[2 + threadIdx.x] = sum; 133 if (threadIdx.x < 2) 145 smem[threadIdx.x] = sum; 148 if (threadIdx.x > 253 [all...] |
histogram.hpp | 62 const int y = blockIdx.x * blockDim.y + threadIdx.y; 63 const int tid = threadIdx.y * blockDim.x + threadIdx.x; 72 for (int x = threadIdx.x; x < cols; x += blockDim.x)
|
reduce_to_column.hpp | 64 blockReduce<BLOCK_SIZE>(smem[0], myVal, threadIdx.x, op); 73 blockReduce<BLOCK_SIZE>(smem_tuple(smem[0], smem[1]), tie(myVal.x, myVal.y), threadIdx.x, make_tuple(op, op)); 82 blockReduce<BLOCK_SIZE>(smem_tuple(smem[0], smem[1], smem[2]), tie(myVal.x, myVal.y, myVal.z), threadIdx.x, make_tuple(op, op, op)); 91 blockReduce<BLOCK_SIZE>(smem_tuple(smem[0], smem[1], smem[2], smem[3]), tie(myVal.x, myVal.y, myVal.z, myVal.w), threadIdx.x, make_tuple(op, op, op, op)); 110 for (int x = threadIdx.x; x < cols; x += BLOCK_SIZE) 120 if (threadIdx.x == 0)
|
minmaxloc.hpp | 67 const int x0 = blockIdx.x * blockDim.x * patch_x + threadIdx.x; 68 const int y0 = blockIdx.y * blockDim.y * patch_y + threadIdx.y; 98 const int tid = threadIdx.y * blockDim.x + threadIdx.x; 124 const int idx = ::min(threadIdx.x, count - 1); 133 threadIdx.x, 136 if (threadIdx.x == 0)
|
/external/opencv3/modules/cudaimgproc/src/cuda/ |
canny.cu | 108 const int x = blockIdx.x * blockDim.x + threadIdx.x; 109 const int y = blockIdx.y * blockDim.y + threadIdx.y; 174 const int x = blockIdx.x * blockDim.x + threadIdx.x; 175 const int y = blockIdx.y * blockDim.y + threadIdx.y; 251 const int x = blockIdx.x * blockDim.x + threadIdx.x; 252 const int y = blockIdx.y * blockDim.y + threadIdx.y; 254 smem[threadIdx.y + 1][threadIdx.x + 1] = checkIdx(y, x, map.rows, map.cols) ? map(y, x) : 0; 255 if (threadIdx.y == 0) 256 smem[0][threadIdx.x + 1] = checkIdx(y - 1, x, map.rows, map.cols) ? map(y - 1, x) : 0 [all...] |
build_point_list.cu | 61 const int x = blockIdx.x * blockDim.x * PIXELS_PER_THREAD + threadIdx.x; 62 const int y = blockIdx.y * blockDim.y + threadIdx.y; 64 if (threadIdx.x == 0) 65 s_qsize[threadIdx.y] = 0; 77 const int qidx = Emulation::smem::atomicAdd(&s_qsize[threadIdx.y], 1); 78 s_queues[threadIdx.y][qidx] = val; 86 if (threadIdx.x == 0 && threadIdx.y == 0) 105 const int qsize = s_qsize[threadIdx.y]; 106 int gidx = s_globStart[threadIdx.y] + threadIdx.x [all...] |
hist.cu | 59 const int y = blockIdx.x * blockDim.y + threadIdx.y; 60 const int tid = threadIdx.y * blockDim.x + threadIdx.x; 70 for (int x = threadIdx.x; x < cols_4; x += blockDim.x) 80 if (cols % 4 != 0 && threadIdx.x == 0) 128 const int y = blockIdx.x * blockDim.y + threadIdx.y; 129 const int tid = threadIdx.y * blockDim.x + threadIdx.x; 142 for (int x = threadIdx.x; x < cols_4; x += blockDim.x) 152 if (cols % 4 != 0 && threadIdx.x == 0 [all...] |
clahe.cu | 65 const unsigned int tid = threadIdx.y * blockDim.x + threadIdx.x; 70 for (int i = threadIdx.y; i < tileSize.y; i += blockDim.y) 73 for (int j = threadIdx.x; j < tileSize.x; j += blockDim.x) 139 const int x = blockIdx.x * blockDim.x + threadIdx.x; 140 const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
hough_lines.cu | 75 for (int i = threadIdx.x; i < count; i += blockDim.x) 93 for (int i = threadIdx.x; i < numrho + 1; i += blockDim.x) 109 for (int i = threadIdx.x; i < count; i += blockDim.x) 125 for (int i = threadIdx.x; i < numrho + 1; i += blockDim.x) 151 const int r = blockIdx.x * blockDim.x + threadIdx.x; 152 const int n = blockIdx.y * blockDim.y + threadIdx.y;
|
/external/opencv3/modules/cudawarping/src/cuda/ |
pyr_up.cu | 59 const int x = blockIdx.x * blockDim.x + threadIdx.x; 60 const int y = blockIdx.y * blockDim.y + threadIdx.y; 65 if (threadIdx.x < 10 && threadIdx.y < 10) 67 int srcx = static_cast<int>((blockIdx.x * blockDim.x) / 2 + threadIdx.x) - 1; 68 int srcy = static_cast<int>((blockIdx.y * blockDim.y) / 2 + threadIdx.y) - 1; 76 s_srcPatch[threadIdx.y][threadIdx.x] = saturate_cast<sum_t>(src(srcy, srcx)); 83 const int evenFlag = static_cast<int>((threadIdx.x & 1) == 0); 84 const int oddFlag = static_cast<int>((threadIdx.x & 1) != 0) [all...] |
pyr_down.cu | 61 const int x = blockIdx.x * blockDim.x + threadIdx.x; 77 smem[2 + threadIdx.x] = sum; 80 if (threadIdx.x < 2) 92 smem[threadIdx.x] = sum; 95 if (threadIdx.x > 253) 107 smem[4 + threadIdx.x] = sum; 121 smem[2 + threadIdx.x] = sum; 124 if (threadIdx.x < 2) 136 smem[threadIdx.x] = sum; 139 if (threadIdx.x > 253 [all...] |
/external/opencv3/modules/cudafeatures2d/src/cuda/ |
bf_knnmatch.cu | 109 s_distance += threadIdx.y * BLOCK_SIZE; 110 s_trainIdx += threadIdx.y * BLOCK_SIZE; 112 s_distance[threadIdx.x] = bestDistance1; 113 s_trainIdx[threadIdx.x] = bestTrainIdx1; 117 if (threadIdx.x == 0) 142 s_distance[threadIdx.x] = bestDistance2; 143 s_trainIdx[threadIdx.x] = bestTrainIdx2; 147 if (threadIdx.x == 0) 230 s_distance += threadIdx.y * BLOCK_SIZE; 231 s_trainIdx += threadIdx.y * BLOCK_SIZE [all...] |
bf_match.cu | 62 s_distance += threadIdx.y * BLOCK_SIZE; 63 s_trainIdx += threadIdx.y * BLOCK_SIZE; 65 reduceKeyVal<BLOCK_SIZE>(s_distance, bestDistance, s_trainIdx, bestTrainIdx, threadIdx.x, less<float>()); 71 s_distance += threadIdx.y * BLOCK_SIZE; 72 s_trainIdx += threadIdx.y * BLOCK_SIZE; 73 s_imgIdx += threadIdx.y * BLOCK_SIZE; 75 reduceKeyVal<BLOCK_SIZE>(s_distance, bestDistance, smem_tuple(s_trainIdx, s_imgIdx), thrust::tie(bestTrainIdx, bestImgIdx), threadIdx.x, less<float>()); 87 const int loadX = threadIdx.x + i * BLOCK_SIZE; 88 s_query[threadIdx.y * MAX_DESC_LEN + loadX] = loadX < query.cols ? query.ptr(::min(queryIdx, query.rows - 1))[loadX] : 0; 104 const int loadX = threadIdx.x + i * BLOCK_SIZE [all...] |
orb.cu | 78 const int ptidx = blockIdx.x * blockDim.y + threadIdx.y; 90 for (int ind = threadIdx.x; ind < blockSize * blockSize; ind += blockDim.x) 108 int* srow0 = smem0 + threadIdx.y * blockDim.x; 109 int* srow1 = smem1 + threadIdx.y * blockDim.x; 110 int* srow2 = smem2 + threadIdx.y * blockDim.x; 113 reduce<32>(smem_tuple(srow0, srow1, srow2), thrust::tie(a, b, c), threadIdx.x, thrust::make_tuple(op, op, op)); 115 if (threadIdx.x == 0) 156 int* srow0 = smem0 + threadIdx.y * blockDim.x; 157 int* srow1 = smem1 + threadIdx.y * blockDim.x; 161 const int ptidx = blockIdx.x * blockDim.y + threadIdx.y [all...] |
bf_radius_match.cu | 64 const int queryIdx = blockIdx.y * BLOCK_SIZE + threadIdx.y; 65 const int trainIdx = blockIdx.x * BLOCK_SIZE + threadIdx.x; 75 const int loadX = threadIdx.x + i * BLOCK_SIZE; 77 s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = 0; 78 s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = 0; 85 s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = val; 87 ForceGlob<T>::Load(train.ptr(::min(blockIdx.x * BLOCK_SIZE + threadIdx.y, train.rows - 1)), loadX, val) [all...] |
/external/opencv3/modules/cudaobjdetect/src/cuda/ |
hog.cu | 121 const int block_x = threadIdx.z; 122 const int cell_x = threadIdx.x / 16; 123 const int cell_y = threadIdx.y; 124 const int cell_thread_x = threadIdx.x & 0xF; 151 const int dist_y_begin = -4 - 4 * (int)threadIdx.y; 234 unsigned int tid = threadIdx.x; 250 if (threadIdx.x == 0) 267 if (blockIdx.x * blockDim.z + threadIdx.z >= img_block_width) 271 blockIdx.x * blockDim.z + threadIdx.z) * 272 block_hist_size + threadIdx.x [all...] |
/external/opencv3/modules/core/include/opencv2/core/cuda/ |
warp_reduce.hpp | 55 __device__ __forceinline__ T warp_reduce(volatile T *ptr , const unsigned int tid = threadIdx.x)
|
/external/opencv3/modules/cudaoptflow/src/cuda/ |
pyrlk.cu | 133 const unsigned int tid = threadIdx.y * blockDim.x + threadIdx.x; 162 for (int yBase = threadIdx.y, i = 0; yBase < c_winSize_y; yBase += blockDim.y, ++i) 164 for (int xBase = threadIdx.x, j = 0; xBase < c_winSize_x; xBase += blockDim.x, ++j) 241 for (int y = threadIdx.y, i = 0; y < c_winSize_y; y += blockDim.y, ++i) 243 for (int x = threadIdx.x, j = 0; x < c_winSize_x; x += blockDim.x, ++j) 284 for (int y = threadIdx.y, i = 0; y < c_winSize_y; y += blockDim.y, ++i) 286 for (int x = threadIdx.x, j = 0; x < c_winSize_x; x += blockDim.x, ++j) 344 for (int i = threadIdx.y; i < patchHeight; i += blockDim.y) 346 for (int j = threadIdx.x; j < patchWidth; j += blockDim.x [all...] |
/external/opencv3/modules/cudev/include/opencv2/cudev/block/ |
block.hpp | 70 return (threadIdx.z * blockDim.y + threadIdx.y) * blockDim.x + threadIdx.x;
|