Home | History | Annotate | Download | only in kernels

Lines Matching refs:cu_stream

451                            const cudaStream_t& cu_stream) {
457 <<<num_blocks, num_threads, 0, cu_stream>>>(in, out, in_size, op, init);
477 <<<num_blocks, num_threads, 0, cu_stream>>>(
484 CleanupSegments<<<1, 32, 0, cu_stream>>>(
497 temp_storage_bytes, in, out, in_size, op, init, cu_stream);
515 const cudaStream_t& cu_stream) {
521 RowReduceKernel<<<num_blocks, threads_per_block, 0, cu_stream>>>(
538 transform_iter + 1, op, init, cu_stream);
556 const cudaStream_t& cu_stream) {
572 ColumnReduceMax16ColumnsKernel<<<grid_dim, block_dim, 0, cu_stream>>>(
581 ColumnReduceMax16ColumnsKernel<<<grid_dim, block_dim, 0, cu_stream>>>(
587 CleanupSegments<<<new_grid_dim, num_threads, 0, cu_stream>>>(
596 T init, const cudaStream_t& cu_stream) {
608 ColumnReduceKernel<<<grid_dim, block_dim, 0, cu_stream>>>(
618 ColumnReduceKernel<<<grid_dim, block_dim, 0, cu_stream>>>(
624 CleanupSegments<<<new_grid_dim, block_dim, 0, cu_stream>>>(
633 const cudaStream_t& cu_stream) {
636 cu_stream);
639 init, cu_stream);
644 ColumnReduceSimpleKernel<<<num_blocks, threads_per_block, 0, cu_stream>>>(
652 const cudaStream_t& cu_stream) {
659 ColumnReduceSimpleKernel<<<num_blocks, threads_per_block, 0, cu_stream>>>(
666 const cudaStream_t& cu_stream) {
689 transform_iter + 1, op, init, cu_stream);
787 const cudaStream_t& cu_stream = GetCudaStream(ctx);
790 LaunchScalarReduction(ctx, out, in, in_size, op, init, cu_stream);
793 LaunchRowReduction(ctx, out, in, in_dim0, in_dim1, op, init, cu_stream);
796 LaunchColumnReduction(ctx, out, in, in_dim0, in_dim1, op, init, cu_stream);
799 cu_stream);
803 cu_stream);