Cross Reference: /external/opencv3/modules/cudaobjdetect/src/cuda/hog.cu

Lines Matching full:nthreads
261         template <int nthreads, // Number of threads which process one block historgam
274             __shared__ float sh_squares[nthreads * nblocks];
275             float* squares = sh_squares + threadIdx.z * nthreads;
281             float sum = reduce_smem<nthreads>(squares, elem * elem);
286             sum = reduce_smem<nthreads>(squares, elem * elem);
301             int nthreads = power_2up(block_hist_size);
302             dim3 threads(nthreads, 1, nblocks);
308             if (nthreads == 32)
310             else if (nthreads == 64)
312             else if (nthreads == 128)
314             else if (nthreads == 256)
316             else if (nthreads == 512)
332        template <int nthreads, // Number of threads per one histogram block
348            for (int i = threadIdx.x; i < cdescr_size; i += nthreads)
355            __shared__ float products[nthreads * nblocks];
357            const int tid = threadIdx.z * nthreads + threadIdx.x;
359 nthreads>(products, product, tid, plus<float>());
370            const int nthreads = 256;
378            dim3 threads(nthreads, 1, nblocks);
381            cudaSafeCall(cudaFuncSetCacheConfig(compute_confidence_hists_kernel_many_blocks<nthreads, nblocks>,
386            compute_confidence_hists_kernel_many_blocks<nthreads, nblocks><<<grid, threads>>>(
394         template <int nthreads, // Number of threads per one histogram block
410             for (int i = threadIdx.x; i < cdescr_size; i += nthreads)
417             __shared__ float products[nthreads * nblocks];
419             const int tid = threadIdx.z * nthreads + threadIdx.x;
421             reduce<nthreads>(products, product, tid, plus<float>());
432             const int nthreads = 256;
440             dim3 threads(nthreads, 1, nblocks);
443             cudaSafeCall(cudaFuncSetCacheConfig(classify_hists_kernel_many_blocks<nthreads, nblocks>, cudaFuncCachePreferL1));
446             classify_hists_kernel_many_blocks<nthreads, nblocks><<<grid, threads>>>(
458         template <int nthreads>
470             for (int i = threadIdx.x; i < cdescr_size; i += nthreads)
482             const int nthreads = 256;
488             dim3 threads(nthreads, 1);
492             extract_descrs_by_rows_kernel<nthreads><<<grid, threads>>>(
500         template <int nthreads>
513             for (int i = threadIdx.x; i < cdescr_size; i += nthreads)
531             const int nthreads = 256;
537             dim3 threads(nthreads, 1);
541             extract_descrs_by_cols_kernel<nthreads><<<grid, threads>>>(
552         template <int nthreads, int correct_gamma>
560             __shared__ float sh_row[(nthreads + 2) * 3];
569             sh_row[threadIdx.x + 1 + (nthreads + 2)] = val.y;
570             sh_row[threadIdx.x + 1 + 2 * (nthreads + 2)] = val.z;
576                 sh_row[(nthreads + 2)] = val.y;
577                 sh_row[2 * (nthreads + 2)] = val.z;
584                 sh_row[blockDim.x + 1 + (nthreads + 2)] = val.y;
585                 sh_row[blockDim.x + 1 + 2 * (nthreads + 2)] = val.z;
594                 b.y = sh_row[threadIdx.x + 2 + (nthreads + 2)];
595                 b.z = sh_row[threadIdx.x + 2 + 2 * (nthreads + 2)];
597                 a.y = sh_row[threadIdx.x + (nthreads + 2)];
598                 a.z = sh_row[threadIdx.x + 2 * (nthreads + 2)];
659             const int nthreads = 256;
661             dim3 bdim(nthreads, 1);
665                 compute_gradients_8UC4_kernel<nthreads, 1><<<gdim, bdim>>>(height, width, img, angle_scale, grad, qangle);
667                 compute_gradients_8UC4_kernel<nthreads, 0><<<gdim, bdim>>>(height, width, img, angle_scale, grad, qangle);
674         template <int nthreads, int correct_gamma>
682             __shared__ float sh_row[nthreads + 2];
732             const int nthreads = 256;
734             dim3 bdim(nthreads, 1);
738                 compute_gradients_8UC1_kernel<nthreads, 1><<<gdim, bdim>>>(height, width, img, angle_scale, grad, qangle);
740                 compute_gradients_8UC1_kernel<nthreads, 0><<<gdim, bdim>>>(height, width, img, angle_scale, grad, qangle);
OpenGrok