/external/tensorflow/tensorflow/contrib/reduce_slice_ops/kernels/ |
reduce_slice_ops_gpu.cu.cc | 37 CUDA_AXIS_KERNEL_LOOP(x, config.virtual_thread_count.x, X) { \ 38 CUDA_AXIS_KERNEL_LOOP(y, config.virtual_thread_count.y, Y) { \ 39 CUDA_AXIS_KERNEL_LOOP(z, config.virtual_thread_count.z, Z) { \ 40 Index outidx = x * config.virtual_thread_count.y * \ 41 config.virtual_thread_count.z + \ 42 y * config.virtual_thread_count.z + z; \ 47 Index inidx = x * bound * config.virtual_thread_count.z + \ 48 yin * config.virtual_thread_count.z + z; \
|
/external/tensorflow/tensorflow/core/util/ |
cuda_launch_config.h | 51 CUDA_1D_KERNEL_LOOP(x, config.virtual_thread_count) { 57 CUDA_AXIS_KERNEL_LOOP(x, config.virtual_thread_count, x) { 58 CUDA_AXIS_KERNEL_LOOP(y, config.virtual_thread_count, y) { 65 CUDA_AXIS_KERNEL_LOOP(x, config.virtual_thread_count, x) { 66 CUDA_AXIS_KERNEL_LOOP(y, config.virtual_thread_count, y) { 67 CUDA_AXIS_KERNEL_LOOP(z, config.virtual_thread_count, z) { 114 int virtual_thread_count = -1; member in struct:tensorflow::CudaLaunchConfig 129 const int virtual_thread_count = work_element_count; local 132 virtual_thread_count); 138 config.virtual_thread_count = virtual_thread_count 197 dim3 virtual_thread_count = dim3(0, 0, 0); member in struct:tensorflow::Cuda2DLaunchConfig [all...] |
cuda_kernel_helper_test.cu.cc | 42 CUDA_1D_KERNEL_LOOP(x, config.virtual_thread_count) { outbuf[x] = 0; } 47 CUDA_1D_KERNEL_LOOP(x, config.virtual_thread_count) { 55 CUDA_AXIS_KERNEL_LOOP(x, config.virtual_thread_count.x, X) { 59 CUDA_AXIS_KERNEL_LOOP(y, config.virtual_thread_count.y, Y) { 63 int idx = x * config.virtual_thread_count.y + y; 69 CUDA_AXIS_KERNEL_LOOP(x, config.virtual_thread_count.x, X) { 73 CUDA_AXIS_KERNEL_LOOP(y, config.virtual_thread_count.y, Y) { 77 CUDA_AXIS_KERNEL_LOOP(z, config.virtual_thread_count.z, Z) { 82 x * config.virtual_thread_count.y * config.virtual_thread_count.z [all...] |
/external/tensorflow/tensorflow/core/kernels/ |
diag_op_gpu.cu.cc | 52 // CudaLaunchConfig uses an int for virtual_thread_count, 58 int virtual_thread_count = int(size * size); local 63 GetCudaLaunchConfig(virtual_thread_count, device); 65 device.stream()>>>(diag_config.virtual_thread_count, size, 105 0, device.stream()>>>(diag_config.virtual_thread_count,
|
fused_batch_norm_op.cu.cc | 42 d.stream()>>>(config.virtual_thread_count, 64 d.stream()>>>(config.virtual_thread_count,
|
matrix_set_diag_op_gpu.cu.cc | 78 config.virtual_thread_count, m, n, minsize, diag.data(), 84 config.virtual_thread_count, m, n, minsize, input.data(),
|
eye_functor_gpu.cu.cc | 57 device.stream()>>>(config.virtual_thread_count, batch_size, m,
|
matrix_band_part_op_gpu.cu.cc | 63 config.virtual_thread_count, batch_size, m, n, num_lower_diags,
|
inplace_ops_functor_gpu.cu.cc | 54 cfg.virtual_thread_count, nrows, ncols, loc, src, dst);
|
resize_bilinear_op_gpu.cu.cc | 169 config.virtual_thread_count, images.data(), height_scale, 198 config.virtual_thread_count, output_grad.data()); 205 config.virtual_thread_count, input_grad.data(), height_scale,
|
tile_functor_gpu.cu.cc | 76 cfg.virtual_thread_count, p, reinterpret_cast<const int32*>(dev_buf),
|
depthtospace_op_gpu.cu.cc | 163 config.virtual_thread_count, input.data(), block_size, batch_size, 218 config.virtual_thread_count, input.data(), block_size, input_width,
|
determinant_op_gpu.cu.cc | 134 config.virtual_thread_count, n, lu_factor.data(), pivots, nullptr, 155 config.virtual_thread_count, n, lu_factor.data(), pivots,
|
spacetodepth_op_gpu.cu.cc | 159 config.virtual_thread_count, input.data(), block_size, batch_size, 214 config.virtual_thread_count, input.data(), block_size, output_width,
|
crop_and_resize_op_gpu.cu.cc | 345 config.virtual_thread_count, image.data(), boxes.data(), 377 config.virtual_thread_count, grads_image.data()); 386 config.virtual_thread_count, grads.data(), boxes.data(), 419 config.virtual_thread_count, grads_boxes.data()); 428 config.virtual_thread_count, grads.data(), image.data(), boxes.data(),
|
dilation_ops_gpu.cu.cc | 201 config.virtual_thread_count, input.data(), filter.data(), batch, 241 config.virtual_thread_count, input.data(), filter.data(), 281 config.virtual_thread_count, input.data(), filter.data(),
|
svd_op_gpu.cu.cc | 66 CUDA_AXIS_KERNEL_LOOP(batch, config.virtual_thread_count.x, X) { 67 CUDA_AXIS_KERNEL_LOOP(i, config.virtual_thread_count.y, Y) { 78 CUDA_1D_KERNEL_LOOP(i, config.virtual_thread_count) {
|
avgpooling_op_gpu.cu.cc | 95 config.virtual_thread_count, top_diff, num, height, width, channels,
|
multinomial_op_gpu.cu.cc | 109 d.stream()>>>(config.virtual_thread_count, num_classes,
|
spacetobatch_functor_gpu.cu.cc | 146 config.virtual_thread_count, const_cast<T*>(space_tensor.data()),
|
bias_op_gpu.cu.cc | 82 config.virtual_thread_count, input, bias, output, bias_size); 86 config.virtual_thread_count, input, bias, output, bias_size,
|
conv_ops_gpu_3.cu.cc | 449 config.virtual_thread_count, in.data(), combined_dims, out.data()); 469 config.virtual_thread_count, in.data(), combined_dims, out.data()); 499 config.virtual_thread_count, in.data(), input_dims, out.data(), 504 config.virtual_thread_count, in.data(), input_dims, out.data(), [all...] |
resize_nearest_neighbor_op_gpu.cu.cc | 158 d.stream()>>>(input_config.virtual_thread_count, input.data(),
|
transpose_functor_gpu.cu.cc | 85 cfg.virtual_thread_count, p, reinterpret_cast<const int32*>(dev_buf),
|