Lines Matching full:nblocks
117 template <int nblocks> // Number of histogram blocks processed by single GPU thread block
131 float* final_hist = smem + cnbins * 48 * nblocks;
147 hist[bin_id * 48 * nblocks] = 0.f;
168 hist[bin.x * 48 * nblocks] += gaussian * interp_weight * vote.x;
169 hist[bin.y * 48 * nblocks] += gaussian * interp_weight * vote.y;
173 for (int bin_id = 0; bin_id < cnbins; ++bin_id, hist_ += 48 * nblocks)
199 const int nblocks = 1;
206 dim3 grid(divUp(img_block_width, nblocks), img_block_height);
207 dim3 threads(32, 2, nblocks);
209 cudaSafeCall(cudaFuncSetCacheConfig(compute_hists_kernel_many_blocks<nblocks>,
215 int hists_size = (nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y * 12 * nblocks) * sizeof(float);
216 int final_hists_size = (nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y * nblocks) * sizeof(float);
218 compute_hists_kernel_many_blocks<nblocks><<<grid, threads, smem>>>(
262 int nblocks> // Number of block hisograms processed by one GPU thread block
274 __shared__ float sh_squares[nthreads * nblocks];
298 const int nblocks = 1;
302 dim3 threads(nthreads, 1, nblocks);
306 dim3 grid(divUp(img_block_width, nblocks), img_block_height);
309 normalize_hists_kernel_many_blocks<32, nblocks><<<grid, threads>>>(block_hist_size, img_block_width, block_hists, threshold);
311 normalize_hists_kernel_many_blocks<64, nblocks><<<grid, threads>>>(block_hist_size, img_block_width, block_hists, threshold);
313 normalize_hists_kernel_many_blocks<64, nblocks><<<grid, threads>>>(block_hist_size, img_block_width, block_hists, threshold);
315 normalize_hists_kernel_many_blocks<256, nblocks><<<grid, threads>>>(block_hist_size, img_block_width, block_hists, threshold);
317 normalize_hists_kernel_many_blocks<512, nblocks><<<grid, threads>>>(block_hist_size, img_block_width, block_hists, threshold);
333 int nblocks> // Number of histogram block processed by single GPU thread block
355 __shared__ float products[nthreads * nblocks];
371 const int nblocks = 1;
378 dim3 threads(nthreads, 1, nblocks);
379 dim3 grid(divUp(img_win_width, nblocks), img_win_height);
381 cudaSafeCall(cudaFuncSetCacheConfig(compute_confidence_hists_kernel_many_blocks<nthreads, nblocks>,
386 compute_confidence_hists_kernel_many_blocks<nthreads, nblocks><<<grid, threads>>>(
395 int nblocks> // Number of histogram block processed by single GPU thread block
417 __shared__ float products[nthreads * nblocks];
433 const int nblocks = 1;
440 dim3 threads(nthreads, 1, nblocks);
441 dim3 grid(divUp(img_win_width, nblocks), img_win_height);
443 cudaSafeCall(cudaFuncSetCacheConfig(classify_hists_kernel_many_blocks<nthreads, nblocks>, cudaFuncCachePreferL1));
446 classify_hists_kernel_many_blocks<nthreads, nblocks><<<grid, threads>>>(