HomeSort by relevance Sort by last modified time
    Searched refs:threadIdx (Results 1 - 25 of 47) sorted by null

1 2

  /external/eigen/unsupported/Eigen/CXX11/src/Tensor/
TensorContractionCuda.h 57 // index is shared by both sides, then the contracting index should be in threadIdx.x.
67 const Index lhs_store_idx_base = threadIdx.y * 72 + threadIdx.x * 9 + threadIdx.z;
68 const Index rhs_store_idx_base = threadIdx.y * 72 + threadIdx.z * 8 + threadIdx.x;
89 // threadIdx.x: the vertical position in an 8x8 block
90 // threadIdx.y: the vertical index of the 8x8 block in the grid
91 // threadIdx.z: the horizontal position in an 8x8 bloc
    [all...]
TensorReductionCuda.h 115 const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x;
129 const Index first_index = blockIdx.x * BlockSize * NumPerThread + threadIdx.x;
136 if (threadIdx.x == 0) {
174 if ((threadIdx.x & (warpSize - 1)) == 0) {
178 if (gridDim.x > 1 && threadIdx.x == 0) {
205 const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x;
223 const Index first_index = blockIdx.x * BlockSize * NumPerThread + 2*threadIdx.x;
250 if ((threadIdx.x & (warpSize - 1)) == 0) {
265 eigen_assert(threadIdx.x == 1);
390 const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x
    [all...]
  /external/tensorflow/tensorflow/examples/adding_an_op/
cuda_op_kernel.cu.cc 21 for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < N;
  /external/tensorflow/tensorflow/tools/ci_build/builds/user_ops/
cuda_op_kernel.cu.cc 21 for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < N;
  /external/tensorflow/tensorflow/core/kernels/
bias_op_gpu.cu.cc 121 for (int32 index = threadIdx.x; index < bias_size; index += blockDim.x) {
126 for (int32 index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads;
133 for (int32 index = threadIdx.x; index < bias_size; index += blockDim.x) {
147 for (int32 index = threadIdx.x; index < kSDataSize; index += blockDim.x) {
158 for (int32 index = group_index * blockDim.x + threadIdx.x;
169 int bias_offset = threadIdx.x % 32;
175 int32 thread_index = threadIdx.x;
concat_lib_gpu_impl.cu.cc 40 IntType gidx = blockIdx.x * blockDim.x + threadIdx.x;
43 IntType gidy = blockIdx.y * blockDim.y + threadIdx.y;
68 IntType gidx = blockIdx.x * blockDim.x + threadIdx.x;
76 IntType lidx = threadIdx.y * blockDim.x + threadIdx.x;
107 IntType gidy = blockIdx.y * blockDim.y + threadIdx.y;
bucketize_op_gpu.cu.cc 46 int32 lidx = threadIdx.y * blockDim.x + threadIdx.x;
check_numerics_op_gpu.cu.cc 39 const int32 thread_id = blockIdx.x * blockDim.x + threadIdx.x;
reduction_gpu_kernels.cu.h 181 const int tid = threadIdx.x;
218 const int row = (blockIdx.x * blockDim.x + threadIdx.x) / 32;
219 const int lane = threadIdx.x % 32;
222 int gid = threadIdx.x + blockIdx.x * blockDim.x;
256 const int lane = threadIdx.x % 32;
260 rows_per_warp * (blockIdx.y * blockDim.y + threadIdx.y);
284 cub::ShuffleIndex(sum, threadIdx.x + i * num_cols, 32, 0xffffffff);
288 if (lane < num_cols) partial_sums[lane * 33 + threadIdx.y] = sum;
292 if (threadIdx.y == 0 && threadIdx.x < num_cols)
    [all...]
split_lib_gpu.cu.cc 118 IntType gidx = blockIdx.x * blockDim.x + threadIdx.x;
126 IntType lidx = threadIdx.y * blockDim.x + threadIdx.x;
157 IntType gidy = blockIdx.y * blockDim.y + threadIdx.y;
  /external/eigen/test/
cuda_common.h 11 dim3 threadIdx, blockDim, blockIdx;
26 int i = threadIdx.x + blockIdx.x*blockDim.x;
  /external/clang/lib/Headers/
cuda_builtin_vars.h 38 // int x = threadIdx.x;
70 // threadIdx should be convertible to uint3 (in fact in nvcc, it *is* a
112 __CUDA_BUILTIN_VAR __cuda_builtin_threadIdx_t threadIdx;
  /external/tensorflow/tensorflow/contrib/mpi_collectives/kernels/
ring.cu.cc 90 for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < N;
  /prebuilts/clang/host/darwin-x86/clang-3289846/lib64/clang/3.8/include/
cuda_builtin_vars.h 38 // int x = threadIdx.x;
70 // threadIdx should be convertible to uint3 (in fact in nvcc, it *is* a
112 __CUDA_BUILTIN_VAR __cuda_builtin_threadIdx_t threadIdx;
  /prebuilts/clang/host/darwin-x86/clang-3859424/lib64/clang/4.0/include/
__clang_cuda_builtin_vars.h 38 // int x = threadIdx.x;
70 // threadIdx should be convertible to uint3 (in fact in nvcc, it *is* a
112 __CUDA_BUILTIN_VAR __cuda_builtin_threadIdx_t threadIdx;
  /prebuilts/clang/host/darwin-x86/clang-4053586/lib64/clang/5.0/include/
__clang_cuda_builtin_vars.h 38 // int x = threadIdx.x;
70 // threadIdx should be convertible to uint3 (in fact in nvcc, it *is* a
112 __CUDA_BUILTIN_VAR __cuda_builtin_threadIdx_t threadIdx;
  /prebuilts/clang/host/darwin-x86/clang-4393122/lib64/clang/5.0.1/include/
__clang_cuda_builtin_vars.h 38 // int x = threadIdx.x;
70 // threadIdx should be convertible to uint3 (in fact in nvcc, it *is* a
112 __CUDA_BUILTIN_VAR __cuda_builtin_threadIdx_t threadIdx;
  /prebuilts/clang/host/darwin-x86/clang-4479392/lib64/clang/5.0.2/include/
__clang_cuda_builtin_vars.h 38 // int x = threadIdx.x;
70 // threadIdx should be convertible to uint3 (in fact in nvcc, it *is* a
112 __CUDA_BUILTIN_VAR __cuda_builtin_threadIdx_t threadIdx;
  /prebuilts/clang/host/darwin-x86/clang-4579689/lib64/clang/6.0.1/include/
__clang_cuda_builtin_vars.h 38 // int x = threadIdx.x;
70 // threadIdx should be convertible to uint3 (in fact in nvcc, it *is* a
112 __CUDA_BUILTIN_VAR __cuda_builtin_threadIdx_t threadIdx;
  /prebuilts/clang/host/darwin-x86/clang-4630689/lib64/clang/6.0.1/include/
__clang_cuda_builtin_vars.h 38 // int x = threadIdx.x;
70 // threadIdx should be convertible to uint3 (in fact in nvcc, it *is* a
112 __CUDA_BUILTIN_VAR __cuda_builtin_threadIdx_t threadIdx;
  /prebuilts/clang/host/darwin-x86/clang-4639204/lib64/clang/6.0.1/include/
__clang_cuda_builtin_vars.h 38 // int x = threadIdx.x;
70 // threadIdx should be convertible to uint3 (in fact in nvcc, it *is* a
112 __CUDA_BUILTIN_VAR __cuda_builtin_threadIdx_t threadIdx;
  /prebuilts/clang/host/darwin-x86/clang-4691093/lib64/clang/6.0.2/include/
__clang_cuda_builtin_vars.h 38 // int x = threadIdx.x;
70 // threadIdx should be convertible to uint3 (in fact in nvcc, it *is* a
112 __CUDA_BUILTIN_VAR __cuda_builtin_threadIdx_t threadIdx;
  /prebuilts/clang/host/linux-x86/clang-3289846/lib64/clang/3.8/include/
cuda_builtin_vars.h 38 // int x = threadIdx.x;
70 // threadIdx should be convertible to uint3 (in fact in nvcc, it *is* a
112 __CUDA_BUILTIN_VAR __cuda_builtin_threadIdx_t threadIdx;
  /prebuilts/clang/host/linux-x86/clang-3859424/lib64/clang/4.0/include/
__clang_cuda_builtin_vars.h 38 // int x = threadIdx.x;
70 // threadIdx should be convertible to uint3 (in fact in nvcc, it *is* a
112 __CUDA_BUILTIN_VAR __cuda_builtin_threadIdx_t threadIdx;
  /prebuilts/clang/host/linux-x86/clang-4053586/lib64/clang/5.0/include/
__clang_cuda_builtin_vars.h 38 // int x = threadIdx.x;
70 // threadIdx should be convertible to uint3 (in fact in nvcc, it *is* a
112 __CUDA_BUILTIN_VAR __cuda_builtin_threadIdx_t threadIdx;

Completed in 357 milliseconds

1 2