Home | History | Annotate | Download | only in cuda

Lines Matching refs:threadIdx

62             s_distance += threadIdx.y * BLOCK_SIZE;
63 s_trainIdx += threadIdx.y * BLOCK_SIZE;
65 reduceKeyVal<BLOCK_SIZE>(s_distance, bestDistance, s_trainIdx, bestTrainIdx, threadIdx.x, less<float>());
71 s_distance += threadIdx.y * BLOCK_SIZE;
72 s_trainIdx += threadIdx.y * BLOCK_SIZE;
73 s_imgIdx += threadIdx.y * BLOCK_SIZE;
75 reduceKeyVal<BLOCK_SIZE>(s_distance, bestDistance, smem_tuple(s_trainIdx, s_imgIdx), thrust::tie(bestTrainIdx, bestImgIdx), threadIdx.x, less<float>());
87 const int loadX = threadIdx.x + i * BLOCK_SIZE;
88 s_query[threadIdx.y * MAX_DESC_LEN + loadX] = loadX < query.cols ? query.ptr(::min(queryIdx, query.rows - 1))[loadX] : 0;
104 const int loadX = threadIdx.x + i * BLOCK_SIZE;
106 s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = 0;
112 ForceGlob<T>::Load(train.ptr(::min(t * BLOCK_SIZE + threadIdx.y, train.rows - 1)), loadX, val);
113 s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = val;
120 dist.reduceIter(s_query[threadIdx.y * MAX_DESC_LEN + i * BLOCK_SIZE + j], s_train[j * BLOCK_SIZE + threadIdx.x]);
127 const int trainIdx = t * BLOCK_SIZE + threadIdx.x;
143 const int queryIdx = blockIdx.x * BLOCK_SIZE + threadIdx.y;
162 if (queryIdx < query.rows && threadIdx.x == 0)
192 const int queryIdx = blockIdx.x * BLOCK_SIZE + threadIdx.y;
220 if (queryIdx < query.rows && threadIdx.x == 0)
260 const int loadX = threadIdx.x + i * BLOCK_SIZE;
262 s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = 0;
263 s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = 0;
270 s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = val;
272 ForceGlob<T>::Load(train.ptr(::min(t * BLOCK_SIZE + threadIdx.y, train.rows - 1)), loadX, val);
273 s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = val;
280 dist.reduceIter(s_query[threadIdx.y * BLOCK_SIZE + j], s_train[j * BLOCK_SIZE + threadIdx.x]);
287 const int trainIdx = t * BLOCK_SIZE + threadIdx.x;
303 const int queryIdx = blockIdx.x * BLOCK_SIZE + threadIdx.y;
320 if (queryIdx < query.rows && threadIdx.x == 0)
350 const int queryIdx = blockIdx.x * BLOCK_SIZE + threadIdx.y;
376 if (queryIdx < query.rows && threadIdx.x == 0)
415 const int loadX = threadIdx.x + i * BLOCK_SIZE;
417 s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = 0;
418 s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = 0;
425 s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = val;
427 ForceGlob<T>::Load(train.ptr(::min(t * BLOCK_SIZE + threadIdx.y, train.rows - 1)), loadX, val);
428 s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = val;
435 dist.reduceIter(s_query[threadIdx.y * BLOCK_SIZE + j], s_train[j * BLOCK_SIZE + threadIdx.x]);
442 const int trainIdx = t * BLOCK_SIZE + threadIdx.x;
458 const int queryIdx = blockIdx.x * BLOCK_SIZE + threadIdx.y;
475 if (queryIdx < query.rows && threadIdx.x == 0)
505 const int queryIdx = blockIdx.x * BLOCK_SIZE + threadIdx.y;
530 if (queryIdx < query.rows && threadIdx.x == 0)