/external/tensorflow/tensorflow/contrib/image/kernels/ |
adjust_hsv_in_yiq_op_gpu.cu.cc | 50 auto* cu_stream = ctx->eigen_device<GPUDevice>().stream(); local 51 OP_REQUIRES(ctx, cu_stream, errors::Internal("No GPU stream available.")); 58 internal::compute_tranformation_matrix_cuda<<<1, 1, 0, cu_stream>>>(
|
/external/tensorflow/tensorflow/core/kernels/ |
where_op_gpu.cu.h | 139 const cudaStream_t& cu_stream = GetCudaStream(ctx); local 152 /*stream*/ cu_stream); 171 /*stream*/ cu_stream); 266 const cudaStream_t& cu_stream = GetCudaStream(ctx); local 289 /*stream*/ cu_stream); 308 /*stream*/ cu_stream);
|
reduction_gpu_kernels.cu.h | 451 const cudaStream_t& cu_stream) { 457 <<<num_blocks, num_threads, 0, cu_stream>>>(in, out, in_size, op, init); 477 <<<num_blocks, num_threads, 0, cu_stream>>>( 484 CleanupSegments<<<1, 32, 0, cu_stream>>>( 497 temp_storage_bytes, in, out, in_size, op, init, cu_stream); 515 const cudaStream_t& cu_stream) { 521 RowReduceKernel<<<num_blocks, threads_per_block, 0, cu_stream>>>( 538 transform_iter + 1, op, init, cu_stream); 556 const cudaStream_t& cu_stream) { 572 ColumnReduceMax16ColumnsKernel<<<grid_dim, block_dim, 0, cu_stream>>>( 787 const cudaStream_t& cu_stream = GetCudaStream(ctx); local [all...] |
dynamic_partition_op_gpu.cu.cc | 328 const cudaStream_t& cu_stream = GetCudaStream(c); local 342 indices_in_ptr, indices_out_ptr, N, 0, sizeof(int32) * 8, cu_stream); 354 0, sizeof(int32) * 8, cu_stream); 361 const cudaStream_t& cu_stream = GetCudaStream(c); local 412 num_runs_ptr, reduction_op, N, cu_stream); 427 num_runs_ptr, reduction_op, N, cu_stream);
|
softmax_op_gpu.cu.cc | 104 const cudaStream_t& cu_stream = GetCudaStream(context); variable 137 GenerateNormalizedProb<<<numBlocks, numThreads, 0, cu_stream>>>(
|
topk_op_gpu.cu.cc | 435 const cudaStream_t& cu_stream = GetCudaStream(ctx); 484 /* stream */ cu_stream); 509 /* stream */ cu_stream); 549 const cudaStream_t& cu_stream = GetCudaStream(context); 550 auto err = impl::LaunchTopKKernel(cu_stream, /* num_shards */ 0,
|
/external/tensorflow/tensorflow/contrib/rnn/kernels/ |
lstm_ops_gpu.cu.cc | 189 const cudaStream_t& cu_stream = GetCudaStream(ctx); local 199 concat_xh<<<grid_dim, block_dim, 0, cu_stream>>>( 217 lstm_gates<T, true><<<grid_dim_2d, block_dim_2d, 0, cu_stream>>>( 222 lstm_gates<T, false><<<grid_dim_2d, block_dim_2d, 0, cu_stream>>>( 324 const cudaStream_t& cu_stream = GetCudaStream(ctx); local 330 lstm_gates_bprop<<<grid_dim_2d, block_dim_2d, 0, cu_stream>>>(
|
/external/tensorflow/tensorflow/contrib/nccl/kernels/ |
nccl_manager.cc | 447 const cudaStream_t* cu_stream = reinterpret_cast<const cudaStream_t*>( local 481 collective->reduction_op, nccl_comm, *cu_stream); 488 collective->root_rank, nccl_comm, *cu_stream); 498 collective->root_rank, nccl_comm, *cu_stream);
|