/external/tensorflow/tensorflow/stream_executor/cuda/ |
cuda_dnn.h | 67 const DeviceMemory<Eigen::half>& input_data, 69 const DeviceMemory<Eigen::half>& input_h_data, 71 const DeviceMemory<Eigen::half>& input_c_data, 72 const DeviceMemory<Eigen::half>& params, 74 DeviceMemory<Eigen::half>* output_data, 76 DeviceMemory<Eigen::half>* output_h_data, 78 DeviceMemory<Eigen::half>* output_c_data, bool is_training, 84 const DeviceMemory<float>& input_data, 86 const DeviceMemory<float>& input_h_data, 88 const DeviceMemory<float>& input_c_data [all...] |
cuda_rng.h | 32 class DeviceMemory; 58 bool DoPopulateRandUniform(Stream *stream, DeviceMemory<float> *v) override; 59 bool DoPopulateRandUniform(Stream *stream, DeviceMemory<double> *v) override; 61 DeviceMemory<std::complex<float>> *v) override; 63 DeviceMemory<std::complex<double>> *v) override; 65 DeviceMemory<float> *v) override; 67 DeviceMemory<double> *v) override; 75 bool DoPopulateRandUniformInternal(Stream *stream, DeviceMemory<T> *v); 78 DeviceMemory<ElemT> *v, FuncT func);
|
cuda_blas.cc | 642 const DeviceMemory<float> &x, int incx, 643 DeviceMemory<float> *result) { 650 const DeviceMemory<double> &x, int incx, 651 DeviceMemory<double> *result) { 658 const DeviceMemory<std::complex<float>> &x, int incx, 659 DeviceMemory<float> *result) { 666 const DeviceMemory<std::complex<double>> &x, int incx, 667 DeviceMemory<double> *result) { 674 const DeviceMemory<float> &x, int incx, 675 DeviceMemory<float> *y, int incy) [all...] |
cuda_blas.h | 114 const port::ArraySlice<DeviceMemory<T> *> &a_array, int lda, 115 const port::ArraySlice<DeviceMemory<T> *> &b_array, int ldb, T beta, 116 const port::ArraySlice<DeviceMemory<T> *> &c_array, int ldc, 128 uint64 n, uint64 k, const CompT &alpha, const DeviceMemory<InT> &a, 129 int lda, const DeviceMemory<InT> &b, int ldb, const CompT &beta, 130 DeviceMemory<OutT> *c, int ldc, blas::ComputationType computation_type, 138 uint64 n, uint64 k, const ParamType &alpha, const DeviceMemory<T> &a, 139 int lda, const DeviceMemory<T> &b, int ldb, const ParamType &beta, 140 DeviceMemory<T> *c, int ldc, blas::ProfileResult *output_profile_result); 146 const DeviceMemory<T> &a, int lda [all...] |
cuda_fft.h | 90 DeviceMemory<uint8> scratch_; 122 const DeviceMemory<InputT> &input, 123 DeviceMemory<OutputT> *output); 129 const DeviceMemory<InputT> &input, 130 DeviceMemory<OutputT> *output);
|
cuda_dnn.cc | [all...] |
cuda_helpers.h | 34 class DeviceMemory; 38 // Converts a const DeviceMemory reference to its underlying typed pointer in 42 const T *CUDAMemory(const DeviceMemory<T> &mem) { 46 // Converts a (non-const) DeviceMemory pointer reference to its underlying typed 49 T *CUDAMemoryMutable(DeviceMemory<T> *mem) {
|
/external/tensorflow/tensorflow/stream_executor/ |
blas.h | 27 // DeviceMemory<float> x = stream_exec->AllocateArray<float>(1024); 28 // DeviceMemory<float> y = stream_exec->AllocateArray<float>(1024); 59 class DeviceMemory; 179 const DeviceMemory<float> &x, int incx, 180 DeviceMemory<float> *result) = 0; 182 const DeviceMemory<double> &x, int incx, 183 DeviceMemory<double> *result) = 0; 185 const DeviceMemory<std::complex<float>> &x, int incx, 186 DeviceMemory<float> *result) = 0; 188 const DeviceMemory<std::complex<double>> &x, int incx [all...] |
stream.h | 61 class DeviceMemory; 141 // like DeviceMemory or primitive types such as int. What arguments you may 152 // `const DeviceMemory<T>` is considered "pack compatible" with a 153 // `const DeviceMemory<T>&` formal parameter; in part, because we don't have 222 const DeviceMemory<float> &x, const DeviceMemory<float> &scale, 223 const DeviceMemory<float> &offset, 224 const DeviceMemory<float> &estimated_mean, 225 const DeviceMemory<float> &estimated_variance, 228 DeviceMemory<float> *y, DeviceMemory<float> *batch_mean [all...] |
dnn.h | 920 Stream* stream, const DeviceMemory<float>& x, 921 const DeviceMemory<float>& scale, const DeviceMemory<float>& offset, 922 const DeviceMemory<float>& estimated_mean, 923 const DeviceMemory<float>& estimated_variance, [all...] |
rng.h | 30 class DeviceMemory; 52 // DeviceMemory element type; i.e. populates DeviceMemory<float> with random 55 DeviceMemory<float> *v) = 0; 57 DeviceMemory<double> *v) = 0; 59 DeviceMemory<std::complex<float>> *v) = 0; 61 DeviceMemory<std::complex<double>> *v) = 0; 66 DeviceMemory<float> *v) { 72 double stddev, DeviceMemory<double> *v) {
|
stream.cc | 315 const DeviceMemory<float> &x, const DeviceMemory<float> &scale, 316 const DeviceMemory<float> &offset, 317 const DeviceMemory<float> &estimated_mean, 318 const DeviceMemory<float> &estimated_variance, 321 DeviceMemory<float> *y, DeviceMemory<float> *batch_mean, 322 DeviceMemory<float> *batch_var, DeviceMemory<float> *saved_mean, 323 DeviceMemory<float> *saved_inv_var, bool is_training [all...] |
device_memory.h | 22 // DeviceMemory<T>. 38 // DeviceMemory<T>. 105 // For example, DeviceMemory<int> is a simple wrapper around DeviceMemoryBase 110 class DeviceMemory final : public DeviceMemoryBase { 113 DeviceMemory() : DeviceMemoryBase(nullptr, 0) {} 114 DeviceMemory(std::nullptr_t) : DeviceMemory() {} 118 explicit DeviceMemory(const DeviceMemoryBase &other) 129 // Create a typed area of DeviceMemory with a given opaque pointer and the 132 static DeviceMemory<ElemT> MakeFromByteSize(void *opaque, uint64 bytes) [all...] |
temporary_device_memory.h | 113 DeviceMemory<T>* mutable_device_memory() { 115 return reinterpret_cast<DeviceMemory<T>*>( 120 const DeviceMemory<T>& device_memory() const { 122 return reinterpret_cast<const DeviceMemory<T>&>(
|
fft.h | 27 // DeviceMemory<std::complex<float>> x = 29 // DeviceMemory<std::complex<float>> y = 56 class DeviceMemory; 182 const DeviceMemory<std::complex<float>> &input, 183 DeviceMemory<std::complex<float>> *output) = 0; 185 const DeviceMemory<std::complex<double>> &input, 186 DeviceMemory<std::complex<double>> *output) = 0; 190 const DeviceMemory<float> &input, 191 DeviceMemory<std::complex<float>> *output) = 0; 193 const DeviceMemory<double> &input [all...] |
scratch_allocator.h | 55 virtual port::StatusOr<DeviceMemory<uint8>> AllocateBytes( 71 port::StatusOr<DeviceMemory<uint8>> AllocateBytes(Stream* stream,
|
scratch_allocator.cc | 33 port::StatusOr<DeviceMemory<uint8>> OneTimeScratchAllocator::AllocateBytes(
|
stream_executor_pimpl.h | 113 DeviceMemory<T> AllocateArray(uint64 element_count); 124 DeviceMemory<T> AllocateScalar() { 137 DeviceMemory<T> AllocateZeroed(); 154 DeviceMemory<T> AllocateSubBuffer(DeviceMemory<T> *parent, 160 ScopedDeviceMemory<T> AllocateOwnedSubBuffer(DeviceMemory<T> *parent, 174 port::StatusOr<DeviceMemory<T>> GetSymbol(const string &symbol_name); 176 // Deallocate the DeviceMemory previously allocated via this interface. 263 port::Status SynchronousMemcpyD2H(const DeviceMemory<T> &device_src, 668 inline DeviceMemory<T> StreamExecutor::AllocateArray(uint64 element_count) [all...] |
/external/tensorflow/tensorflow/core/util/ |
stream_executor_util.h | 30 // Map a Tensor as a DeviceMemory object wrapping the given typed 33 static perftools::gputools::DeviceMemory<T> AsDeviceMemory(const Tensor& t) { 35 return perftools::gputools::DeviceMemory<T>(
|
/external/tensorflow/tensorflow/compiler/xla/service/gpu/ |
cudnn_batchnorm_thunk.cc | 109 se::DeviceMemory<float> output(buffer_allocations.GetDeviceAddress(output_)); 111 se::DeviceMemory<float>(buffer_allocations.GetDeviceAddress(operand_)), 112 se::DeviceMemory<float>(buffer_allocations.GetDeviceAddress(scale_)), 113 se::DeviceMemory<float>(buffer_allocations.GetDeviceAddress(offset_)), 114 se::DeviceMemory<float>(buffer_allocations.GetDeviceAddress(mean_)), 115 se::DeviceMemory<float>(buffer_allocations.GetDeviceAddress(variance_)), 171 se::DeviceMemory<float> output_data( 173 se::DeviceMemory<float> output_mean( 175 se::DeviceMemory<float> output_inv_stddev( 178 se::DeviceMemory<float> null_device_ptr(nullptr) [all...] |
fft_thunk.cc | 52 se::port::StatusOr<se::DeviceMemory<uint8>> FftScratchAllocator::AllocateBytes( 74 return se::DeviceMemory<uint8>(allocated_buffer); 175 se::DeviceMemory<complex64> input_data( 177 se::DeviceMemory<complex64> output_data( 184 se::DeviceMemory<complex64> input_data( 186 se::DeviceMemory<complex64> output_data( 200 se::DeviceMemory<float> input_data( 202 se::DeviceMemory<complex64> output_data( 209 se::DeviceMemory<complex64> input_data( 211 se::DeviceMemory<float> output_data [all...] |
convolution_thunk.h | 76 perftools::gputools::DeviceMemory<float> input_data, 78 perftools::gputools::DeviceMemory<float> filter_data, 80 perftools::gputools::DeviceMemory<float> output_data,
|
cudnn_convolution_runner.cc | 27 using se::DeviceMemory; 52 se::port::StatusOr<DeviceMemory<uint8>> AllocateBytes( 65 return se::DeviceMemory<uint8>(scratch_); 76 const Shape& output_shape, DeviceMemory<T> input_buf, 77 DeviceMemory<T> filter_buf, DeviceMemory<T> output_buf, 249 se::DeviceMemory<float>(input_buf), se::DeviceMemory<float>(filter_buf), 250 se::DeviceMemory<float>(output_buf), scratch_allocator, window, dnums, 254 se::DeviceMemory<Eigen::half>(input_buf) [all...] |
tuple_thunk.cc | 32 se::DeviceMemory<void*> dest_buffer_address(
|
/external/tensorflow/tensorflow/contrib/rnn/kernels/ |
blas_gemm.cc | 29 perftools::gputools::DeviceMemory<T> AsDeviceMemory(const T* cuda_memory) { 31 perftools::gputools::DeviceMemory<T> typed(wrapped);
|