Search

Home	Sort by relevance Sort by last modified time

Searched defs:threads_per_block (Results 1 - 6 of 6) sorted by null

/external/tensorflow/tensorflow/compiler/xla/service/gpu/
partition_assignment.cc	`41 launch_dims.threads_per_block()); 66 int64 threads_per_block; local 68 threads_per_block = device_desc.threads_per_core_limit() / 78 threads_per_block = device_desc.threads_per_warp(); 79 if (threads_per_block == 0) { 81 threads_per_block = 32; 85 if (num_elements < threads_per_block) { 86 threads_per_block = num_elements; 88 << threads_per_block << ") because the latter is smaller."; 91 int64 block_count = CeilOfRatio(num_elements, threads_per_block); [all...]`
partition_assignment.h	`41 LaunchDimensions(int64 block_count, int64 threads_per_block) 42 : block_count_(block_count), threads_per_block_(threads_per_block) {} 49 int64 threads_per_block() const { return threads_per_block_; } function in class:xla::gpu::LaunchDimensions`
elemental_ir_emitter.cc	`308 llvm::Value* threads_per_block = ir_builder_->CreateIntCast( local 311 ir_builder_->getIntNTy(128), /isSigned=/true, "threads_per_block"); 313 ir_builder_->CreateNSWMul(block_id, threads_per_block),`
/external/tensorflow/tensorflow/core/kernels/
adjust_hue_op_gpu.cu.cc	`33 const int threads_per_block = config.thread_per_block; local 35 (number_of_elements + threads_per_block - 1) / threads_per_block; 37 <<<block_count, threads_per_block, 0, stream>>>(`
adjust_saturation_op_gpu.cu.cc	`35 const int threads_per_block = config.thread_per_block; local 37 (number_of_elements + threads_per_block - 1) / threads_per_block; 39 <<<block_count, threads_per_block, 0, stream>>>(`
reduction_gpu_kernels.cu.h	517 const int threads_per_block = 128; local 518 const int warps_per_block = threads_per_block / 32; 521 RowReduceKernel<<<num_blocks, threads_per_block, 0, cu_stream>>>( 641 int threads_per_block = 128; local 642 int num_blocks = Eigen::divup(extent_y, threads_per_block); 644 ColumnReduceSimpleKernel<<<num_blocks, threads_per_block, 0, cu_stream>>>( 653 int threads_per_block = 128; local 655 (extent_x * extent_z + threads_per_block - 1) / threads_per_block; 659 ColumnReduceSimpleKernel<<<num_blocks, threads_per_block, 0, cu_stream>>> [all...]

Completed in 304 milliseconds