/external/eigen/unsupported/Eigen/CXX11/src/Tensor/ |
TensorContractionCuda.h | 57 // index is shared by both sides, then the contracting index should be in threadIdx.x. 67 const Index lhs_store_idx_base = threadIdx.y * 72 + threadIdx.x * 9 + threadIdx.z; 68 const Index rhs_store_idx_base = threadIdx.y * 72 + threadIdx.z * 8 + threadIdx.x; 89 // threadIdx.x: the vertical position in an 8x8 block 90 // threadIdx.y: the vertical index of the 8x8 block in the grid 91 // threadIdx.z: the horizontal position in an 8x8 bloc [all...] |
TensorReductionCuda.h | 115 const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x; 129 const Index first_index = blockIdx.x * BlockSize * NumPerThread + threadIdx.x; 136 if (threadIdx.x == 0) { 174 if ((threadIdx.x & (warpSize - 1)) == 0) { 178 if (gridDim.x > 1 && threadIdx.x == 0) { 205 const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x; 223 const Index first_index = blockIdx.x * BlockSize * NumPerThread + 2*threadIdx.x; 250 if ((threadIdx.x & (warpSize - 1)) == 0) { 265 eigen_assert(threadIdx.x == 1); 390 const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x [all...] |
/external/tensorflow/tensorflow/examples/adding_an_op/ |
cuda_op_kernel.cu.cc | 21 for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < N;
|
/external/tensorflow/tensorflow/tools/ci_build/builds/user_ops/ |
cuda_op_kernel.cu.cc | 21 for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < N;
|
/external/tensorflow/tensorflow/core/kernels/ |
bias_op_gpu.cu.cc | 121 for (int32 index = threadIdx.x; index < bias_size; index += blockDim.x) { 126 for (int32 index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; 133 for (int32 index = threadIdx.x; index < bias_size; index += blockDim.x) { 147 for (int32 index = threadIdx.x; index < kSDataSize; index += blockDim.x) { 158 for (int32 index = group_index * blockDim.x + threadIdx.x; 169 int bias_offset = threadIdx.x % 32; 175 int32 thread_index = threadIdx.x;
|
concat_lib_gpu_impl.cu.cc | 40 IntType gidx = blockIdx.x * blockDim.x + threadIdx.x; 43 IntType gidy = blockIdx.y * blockDim.y + threadIdx.y; 68 IntType gidx = blockIdx.x * blockDim.x + threadIdx.x; 76 IntType lidx = threadIdx.y * blockDim.x + threadIdx.x; 107 IntType gidy = blockIdx.y * blockDim.y + threadIdx.y;
|
bucketize_op_gpu.cu.cc | 46 int32 lidx = threadIdx.y * blockDim.x + threadIdx.x;
|
check_numerics_op_gpu.cu.cc | 39 const int32 thread_id = blockIdx.x * blockDim.x + threadIdx.x;
|
reduction_gpu_kernels.cu.h | 181 const int tid = threadIdx.x; 218 const int row = (blockIdx.x * blockDim.x + threadIdx.x) / 32; 219 const int lane = threadIdx.x % 32; 222 int gid = threadIdx.x + blockIdx.x * blockDim.x; 256 const int lane = threadIdx.x % 32; 260 rows_per_warp * (blockIdx.y * blockDim.y + threadIdx.y); 284 cub::ShuffleIndex(sum, threadIdx.x + i * num_cols, 32, 0xffffffff); 288 if (lane < num_cols) partial_sums[lane * 33 + threadIdx.y] = sum; 292 if (threadIdx.y == 0 && threadIdx.x < num_cols) [all...] |
split_lib_gpu.cu.cc | 118 IntType gidx = blockIdx.x * blockDim.x + threadIdx.x; 126 IntType lidx = threadIdx.y * blockDim.x + threadIdx.x; 157 IntType gidy = blockIdx.y * blockDim.y + threadIdx.y;
|
/external/eigen/test/ |
cuda_common.h | 11 dim3 threadIdx, blockDim, blockIdx; 26 int i = threadIdx.x + blockIdx.x*blockDim.x;
|
/external/clang/lib/Headers/ |
cuda_builtin_vars.h | 38 // int x = threadIdx.x; 70 // threadIdx should be convertible to uint3 (in fact in nvcc, it *is* a 112 __CUDA_BUILTIN_VAR __cuda_builtin_threadIdx_t threadIdx;
|
/external/tensorflow/tensorflow/contrib/mpi_collectives/kernels/ |
ring.cu.cc | 90 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < N;
|
/prebuilts/clang/host/darwin-x86/clang-3289846/lib64/clang/3.8/include/ |
cuda_builtin_vars.h | 38 // int x = threadIdx.x; 70 // threadIdx should be convertible to uint3 (in fact in nvcc, it *is* a 112 __CUDA_BUILTIN_VAR __cuda_builtin_threadIdx_t threadIdx;
|
/prebuilts/clang/host/darwin-x86/clang-3859424/lib64/clang/4.0/include/ |
__clang_cuda_builtin_vars.h | 38 // int x = threadIdx.x; 70 // threadIdx should be convertible to uint3 (in fact in nvcc, it *is* a 112 __CUDA_BUILTIN_VAR __cuda_builtin_threadIdx_t threadIdx;
|
/prebuilts/clang/host/darwin-x86/clang-4053586/lib64/clang/5.0/include/ |
__clang_cuda_builtin_vars.h | 38 // int x = threadIdx.x; 70 // threadIdx should be convertible to uint3 (in fact in nvcc, it *is* a 112 __CUDA_BUILTIN_VAR __cuda_builtin_threadIdx_t threadIdx;
|
/prebuilts/clang/host/darwin-x86/clang-4393122/lib64/clang/5.0.1/include/ |
__clang_cuda_builtin_vars.h | 38 // int x = threadIdx.x; 70 // threadIdx should be convertible to uint3 (in fact in nvcc, it *is* a 112 __CUDA_BUILTIN_VAR __cuda_builtin_threadIdx_t threadIdx;
|
/prebuilts/clang/host/darwin-x86/clang-4479392/lib64/clang/5.0.2/include/ |
__clang_cuda_builtin_vars.h | 38 // int x = threadIdx.x; 70 // threadIdx should be convertible to uint3 (in fact in nvcc, it *is* a 112 __CUDA_BUILTIN_VAR __cuda_builtin_threadIdx_t threadIdx;
|
/prebuilts/clang/host/darwin-x86/clang-4579689/lib64/clang/6.0.1/include/ |
__clang_cuda_builtin_vars.h | 38 // int x = threadIdx.x; 70 // threadIdx should be convertible to uint3 (in fact in nvcc, it *is* a 112 __CUDA_BUILTIN_VAR __cuda_builtin_threadIdx_t threadIdx;
|
/prebuilts/clang/host/darwin-x86/clang-4630689/lib64/clang/6.0.1/include/ |
__clang_cuda_builtin_vars.h | 38 // int x = threadIdx.x; 70 // threadIdx should be convertible to uint3 (in fact in nvcc, it *is* a 112 __CUDA_BUILTIN_VAR __cuda_builtin_threadIdx_t threadIdx;
|
/prebuilts/clang/host/darwin-x86/clang-4639204/lib64/clang/6.0.1/include/ |
__clang_cuda_builtin_vars.h | 38 // int x = threadIdx.x; 70 // threadIdx should be convertible to uint3 (in fact in nvcc, it *is* a 112 __CUDA_BUILTIN_VAR __cuda_builtin_threadIdx_t threadIdx;
|
/prebuilts/clang/host/darwin-x86/clang-4691093/lib64/clang/6.0.2/include/ |
__clang_cuda_builtin_vars.h | 38 // int x = threadIdx.x; 70 // threadIdx should be convertible to uint3 (in fact in nvcc, it *is* a 112 __CUDA_BUILTIN_VAR __cuda_builtin_threadIdx_t threadIdx;
|
/prebuilts/clang/host/linux-x86/clang-3289846/lib64/clang/3.8/include/ |
cuda_builtin_vars.h | 38 // int x = threadIdx.x; 70 // threadIdx should be convertible to uint3 (in fact in nvcc, it *is* a 112 __CUDA_BUILTIN_VAR __cuda_builtin_threadIdx_t threadIdx;
|
/prebuilts/clang/host/linux-x86/clang-3859424/lib64/clang/4.0/include/ |
__clang_cuda_builtin_vars.h | 38 // int x = threadIdx.x; 70 // threadIdx should be convertible to uint3 (in fact in nvcc, it *is* a 112 __CUDA_BUILTIN_VAR __cuda_builtin_threadIdx_t threadIdx;
|
/prebuilts/clang/host/linux-x86/clang-4053586/lib64/clang/5.0/include/ |
__clang_cuda_builtin_vars.h | 38 // int x = threadIdx.x; 70 // threadIdx should be convertible to uint3 (in fact in nvcc, it *is* a 112 __CUDA_BUILTIN_VAR __cuda_builtin_threadIdx_t threadIdx;
|