Home | History | Annotate | Download | only in cuda

Lines Matching full:queryidx

305         __device__ void loadQueryToSmem(int queryIdx, const PtrStepSz<T>& query, U* s_query)
311 s_query[threadIdx.y * MAX_DESC_LEN + loadX] = loadX < query.cols ? query.ptr(::min(queryIdx, query.rows - 1))[loadX] : 0;
316 __device__ void loopUnrolledCached(int queryIdx, const PtrStepSz<T>& query, int imgIdx, const PtrStepSz<T>& train, const Mask& mask,
354 if (queryIdx < query.rows && trainIdx < train.rows && mask(queryIdx, trainIdx))
381 const int queryIdx = blockIdx.x * BLOCK_SIZE + threadIdx.y;
386 loadQueryToSmem<BLOCK_SIZE, MAX_DESC_LEN>(queryIdx, query, s_query);
393 loopUnrolledCached<BLOCK_SIZE, MAX_DESC_LEN, Dist>(queryIdx, query, 0, train, mask, s_query, s_train, myBestDistance1, myBestDistance2, myBestTrainIdx1, myBestTrainIdx2, myBestTrainIdx1, myBestTrainIdx2);
402 if (queryIdx < query.rows && threadIdx.x == 0)
404 bestTrainIdx[queryIdx] = make_int2(myBestTrainIdx1, myBestTrainIdx2);
405 bestDistance[queryIdx] = make_float2(myBestDistance1, myBestDistance2);
431 const int queryIdx = blockIdx.x * BLOCK_SIZE + threadIdx.y;
436 loadQueryToSmem<BLOCK_SIZE, MAX_DESC_LEN>(queryIdx, query, s_query);
451 loopUnrolledCached<BLOCK_SIZE, MAX_DESC_LEN, Dist>(queryIdx, query, imgIdx, train, m, s_query, s_train, myBestDistance1, myBestDistance2, myBestTrainIdx1, myBestTrainIdx2, myBestImgIdx1, myBestImgIdx2);
462 if (queryIdx < query.rows && threadIdx.x == 0)
464 bestTrainIdx[queryIdx] = make_int2(myBestTrainIdx1, myBestTrainIdx2);
465 bestImgIdx[queryIdx] = make_int2(myBestImgIdx1, myBestImgIdx2);
466 bestDistance[queryIdx] = make_float2(myBestDistance1, myBestDistance2);
491 __device__ void loopUnrolled(int queryIdx, const PtrStepSz<T>& query, int imgIdx, const PtrStepSz<T>& train, const Mask& mask,
513 ForceGlob<T>::Load(query.ptr(::min(queryIdx, query.rows - 1)), loadX, val);
533 if (queryIdx < query.rows && trainIdx < train.rows && mask(queryIdx, trainIdx))
560 const int queryIdx = blockIdx.x * BLOCK_SIZE + threadIdx.y;
570 loopUnrolled<BLOCK_SIZE, MAX_DESC_LEN, Dist>(queryIdx, query, 0, train, mask, s_query, s_train, myBestDistance1, myBestDistance2, myBestTrainIdx1, myBestTrainIdx2, myBestTrainIdx1, myBestTrainIdx2);
579 if (queryIdx < query.rows && threadIdx.x == 0)
581 bestTrainIdx[queryIdx] = make_int2(myBestTrainIdx1, myBestTrainIdx2);
582 bestDistance[queryIdx] = make_float2(myBestDistance1, myBestDistance2);
608 const int queryIdx = blockIdx.x * BLOCK_SIZE + threadIdx.y;
626 loopUnrolled<BLOCK_SIZE, MAX_DESC_LEN, Dist>(queryIdx, query, imgIdx, train, m, s_query, s_train, myBestDistance1, myBestDistance2, myBestTrainIdx1, myBestTrainIdx2, myBestImgIdx1, myBestImgIdx2);
637 if (queryIdx < query.rows && threadIdx.x == 0)
639 bestTrainIdx[queryIdx] = make_int2(myBestTrainIdx1, myBestTrainIdx2);
640 bestImgIdx[queryIdx] = make_int2(myBestImgIdx1, myBestImgIdx2);
641 bestDistance[queryIdx] = make_float2(myBestDistance1, myBestDistance2);
666 __device__ void loop(int queryIdx, const PtrStepSz<T>& query, int imgIdx, const PtrStepSz<T>& train, const Mask& mask,
687 ForceGlob<T>::Load(query.ptr(::min(queryIdx, query.rows - 1)), loadX, val);
707 if (queryIdx < query.rows && trainIdx < train.rows && mask(queryIdx, trainIdx))
734 const int queryIdx = blockIdx.x * BLOCK_SIZE + threadIdx.y;
744 loop<BLOCK_SIZE, Dist>(queryIdx, query, 0, train, mask, s_query, s_train, myBestDistance1, myBestDistance2, myBestTrainIdx1, myBestTrainIdx2, myBestTrainIdx1, myBestTrainIdx2);
753 if (queryIdx < query.rows && threadIdx.x == 0)
755 bestTrainIdx[queryIdx] = make_int2(myBestTrainIdx1, myBestTrainIdx2);
756 bestDistance[queryIdx] = make_float2(myBestDistance1, myBestDistance2);
782 const int queryIdx = blockIdx.x * BLOCK_SIZE + threadIdx.y;
800 loop<BLOCK_SIZE, Dist>(queryIdx, query, imgIdx, train, m, s_query, s_train, myBestDistance1, myBestDistance2, myBestTrainIdx1, myBestTrainIdx2, myBestImgIdx1, myBestImgIdx2);
811 if (queryIdx < query.rows && threadIdx.x == 0)
813 bestTrainIdx[queryIdx] = make_int2(myBestTrainIdx1, myBestTrainIdx2);
814 bestImgIdx[queryIdx] = make_int2(myBestImgIdx1, myBestImgIdx2);
815 bestDistance[queryIdx] = make_float2(myBestDistance1, myBestDistance2);
909 const int queryIdx = blockIdx.y * BLOCK_SIZE + threadIdx.y;
924 s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = query.ptr(::min(queryIdx, query.rows - 1))[loadX];
942 if (queryIdx < query.rows && trainIdx < train.rows)
946 if (mask(queryIdx, trainIdx))
949 allDist.ptr(queryIdx)[trainIdx] = distVal;
973 const int queryIdx = blockIdx.y * BLOCK_SIZE + threadIdx.y;
987 s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = query.ptr(::min(queryIdx, query.rows - 1))[loadX];
1005 if (queryIdx < query.rows && trainIdx < train.rows)
1009 if (mask(queryIdx, trainIdx))
1012 allDist.ptr(queryIdx)[trainIdx] = distVal;
1075 const int queryIdx = blockIdx.x;
1077 float* allDistRow = allDist.ptr(queryIdx);
1103 trainIdx.ptr(queryIdx)[i] = bestIdx;
1104 distance.ptr(queryIdx)[i] = dist;