HomeSort by relevance Sort by last modified time
    Searched refs:DeviceMemory (Results 1 - 25 of 58) sorted by null

1 2 3

  /external/tensorflow/tensorflow/stream_executor/cuda/
cuda_dnn.h 67 const DeviceMemory<Eigen::half>& input_data,
69 const DeviceMemory<Eigen::half>& input_h_data,
71 const DeviceMemory<Eigen::half>& input_c_data,
72 const DeviceMemory<Eigen::half>& params,
74 DeviceMemory<Eigen::half>* output_data,
76 DeviceMemory<Eigen::half>* output_h_data,
78 DeviceMemory<Eigen::half>* output_c_data, bool is_training,
84 const DeviceMemory<float>& input_data,
86 const DeviceMemory<float>& input_h_data,
88 const DeviceMemory<float>& input_c_data
    [all...]
cuda_rng.h 32 class DeviceMemory;
58 bool DoPopulateRandUniform(Stream *stream, DeviceMemory<float> *v) override;
59 bool DoPopulateRandUniform(Stream *stream, DeviceMemory<double> *v) override;
61 DeviceMemory<std::complex<float>> *v) override;
63 DeviceMemory<std::complex<double>> *v) override;
65 DeviceMemory<float> *v) override;
67 DeviceMemory<double> *v) override;
75 bool DoPopulateRandUniformInternal(Stream *stream, DeviceMemory<T> *v);
78 DeviceMemory<ElemT> *v, FuncT func);
cuda_blas.cc 642 const DeviceMemory<float> &x, int incx,
643 DeviceMemory<float> *result) {
650 const DeviceMemory<double> &x, int incx,
651 DeviceMemory<double> *result) {
658 const DeviceMemory<std::complex<float>> &x, int incx,
659 DeviceMemory<float> *result) {
666 const DeviceMemory<std::complex<double>> &x, int incx,
667 DeviceMemory<double> *result) {
674 const DeviceMemory<float> &x, int incx,
675 DeviceMemory<float> *y, int incy)
    [all...]
cuda_blas.h 114 const port::ArraySlice<DeviceMemory<T> *> &a_array, int lda,
115 const port::ArraySlice<DeviceMemory<T> *> &b_array, int ldb, T beta,
116 const port::ArraySlice<DeviceMemory<T> *> &c_array, int ldc,
128 uint64 n, uint64 k, const CompT &alpha, const DeviceMemory<InT> &a,
129 int lda, const DeviceMemory<InT> &b, int ldb, const CompT &beta,
130 DeviceMemory<OutT> *c, int ldc, blas::ComputationType computation_type,
138 uint64 n, uint64 k, const ParamType &alpha, const DeviceMemory<T> &a,
139 int lda, const DeviceMemory<T> &b, int ldb, const ParamType &beta,
140 DeviceMemory<T> *c, int ldc, blas::ProfileResult *output_profile_result);
146 const DeviceMemory<T> &a, int lda
    [all...]
cuda_fft.h 90 DeviceMemory<uint8> scratch_;
122 const DeviceMemory<InputT> &input,
123 DeviceMemory<OutputT> *output);
129 const DeviceMemory<InputT> &input,
130 DeviceMemory<OutputT> *output);
cuda_dnn.cc     [all...]
cuda_helpers.h 34 class DeviceMemory;
38 // Converts a const DeviceMemory reference to its underlying typed pointer in
42 const T *CUDAMemory(const DeviceMemory<T> &mem) {
46 // Converts a (non-const) DeviceMemory pointer reference to its underlying typed
49 T *CUDAMemoryMutable(DeviceMemory<T> *mem) {
  /external/tensorflow/tensorflow/stream_executor/
blas.h 27 // DeviceMemory<float> x = stream_exec->AllocateArray<float>(1024);
28 // DeviceMemory<float> y = stream_exec->AllocateArray<float>(1024);
59 class DeviceMemory;
179 const DeviceMemory<float> &x, int incx,
180 DeviceMemory<float> *result) = 0;
182 const DeviceMemory<double> &x, int incx,
183 DeviceMemory<double> *result) = 0;
185 const DeviceMemory<std::complex<float>> &x, int incx,
186 DeviceMemory<float> *result) = 0;
188 const DeviceMemory<std::complex<double>> &x, int incx
    [all...]
stream.h 61 class DeviceMemory;
141 // like DeviceMemory or primitive types such as int. What arguments you may
152 // `const DeviceMemory<T>` is considered "pack compatible" with a
153 // `const DeviceMemory<T>&` formal parameter; in part, because we don't have
222 const DeviceMemory<float> &x, const DeviceMemory<float> &scale,
223 const DeviceMemory<float> &offset,
224 const DeviceMemory<float> &estimated_mean,
225 const DeviceMemory<float> &estimated_variance,
228 DeviceMemory<float> *y, DeviceMemory<float> *batch_mean
    [all...]
dnn.h 920 Stream* stream, const DeviceMemory<float>& x,
921 const DeviceMemory<float>& scale, const DeviceMemory<float>& offset,
922 const DeviceMemory<float>& estimated_mean,
923 const DeviceMemory<float>& estimated_variance,
    [all...]
rng.h 30 class DeviceMemory;
52 // DeviceMemory element type; i.e. populates DeviceMemory<float> with random
55 DeviceMemory<float> *v) = 0;
57 DeviceMemory<double> *v) = 0;
59 DeviceMemory<std::complex<float>> *v) = 0;
61 DeviceMemory<std::complex<double>> *v) = 0;
66 DeviceMemory<float> *v) {
72 double stddev, DeviceMemory<double> *v) {
stream.cc 315 const DeviceMemory<float> &x, const DeviceMemory<float> &scale,
316 const DeviceMemory<float> &offset,
317 const DeviceMemory<float> &estimated_mean,
318 const DeviceMemory<float> &estimated_variance,
321 DeviceMemory<float> *y, DeviceMemory<float> *batch_mean,
322 DeviceMemory<float> *batch_var, DeviceMemory<float> *saved_mean,
323 DeviceMemory<float> *saved_inv_var, bool is_training
    [all...]
device_memory.h 22 // DeviceMemory<T>.
38 // DeviceMemory<T>.
105 // For example, DeviceMemory<int> is a simple wrapper around DeviceMemoryBase
110 class DeviceMemory final : public DeviceMemoryBase {
113 DeviceMemory() : DeviceMemoryBase(nullptr, 0) {}
114 DeviceMemory(std::nullptr_t) : DeviceMemory() {}
118 explicit DeviceMemory(const DeviceMemoryBase &other)
129 // Create a typed area of DeviceMemory with a given opaque pointer and the
132 static DeviceMemory<ElemT> MakeFromByteSize(void *opaque, uint64 bytes)
    [all...]
temporary_device_memory.h 113 DeviceMemory<T>* mutable_device_memory() {
115 return reinterpret_cast<DeviceMemory<T>*>(
120 const DeviceMemory<T>& device_memory() const {
122 return reinterpret_cast<const DeviceMemory<T>&>(
fft.h 27 // DeviceMemory<std::complex<float>> x =
29 // DeviceMemory<std::complex<float>> y =
56 class DeviceMemory;
182 const DeviceMemory<std::complex<float>> &input,
183 DeviceMemory<std::complex<float>> *output) = 0;
185 const DeviceMemory<std::complex<double>> &input,
186 DeviceMemory<std::complex<double>> *output) = 0;
190 const DeviceMemory<float> &input,
191 DeviceMemory<std::complex<float>> *output) = 0;
193 const DeviceMemory<double> &input
    [all...]
scratch_allocator.h 55 virtual port::StatusOr<DeviceMemory<uint8>> AllocateBytes(
71 port::StatusOr<DeviceMemory<uint8>> AllocateBytes(Stream* stream,
scratch_allocator.cc 33 port::StatusOr<DeviceMemory<uint8>> OneTimeScratchAllocator::AllocateBytes(
stream_executor_pimpl.h 113 DeviceMemory<T> AllocateArray(uint64 element_count);
124 DeviceMemory<T> AllocateScalar() {
137 DeviceMemory<T> AllocateZeroed();
154 DeviceMemory<T> AllocateSubBuffer(DeviceMemory<T> *parent,
160 ScopedDeviceMemory<T> AllocateOwnedSubBuffer(DeviceMemory<T> *parent,
174 port::StatusOr<DeviceMemory<T>> GetSymbol(const string &symbol_name);
176 // Deallocate the DeviceMemory previously allocated via this interface.
263 port::Status SynchronousMemcpyD2H(const DeviceMemory<T> &device_src,
668 inline DeviceMemory<T> StreamExecutor::AllocateArray(uint64 element_count)
    [all...]
  /external/tensorflow/tensorflow/core/util/
stream_executor_util.h 30 // Map a Tensor as a DeviceMemory object wrapping the given typed
33 static perftools::gputools::DeviceMemory<T> AsDeviceMemory(const Tensor& t) {
35 return perftools::gputools::DeviceMemory<T>(
  /external/tensorflow/tensorflow/compiler/xla/service/gpu/
cudnn_batchnorm_thunk.cc 109 se::DeviceMemory<float> output(buffer_allocations.GetDeviceAddress(output_));
111 se::DeviceMemory<float>(buffer_allocations.GetDeviceAddress(operand_)),
112 se::DeviceMemory<float>(buffer_allocations.GetDeviceAddress(scale_)),
113 se::DeviceMemory<float>(buffer_allocations.GetDeviceAddress(offset_)),
114 se::DeviceMemory<float>(buffer_allocations.GetDeviceAddress(mean_)),
115 se::DeviceMemory<float>(buffer_allocations.GetDeviceAddress(variance_)),
171 se::DeviceMemory<float> output_data(
173 se::DeviceMemory<float> output_mean(
175 se::DeviceMemory<float> output_inv_stddev(
178 se::DeviceMemory<float> null_device_ptr(nullptr)
    [all...]
fft_thunk.cc 52 se::port::StatusOr<se::DeviceMemory<uint8>> FftScratchAllocator::AllocateBytes(
74 return se::DeviceMemory<uint8>(allocated_buffer);
175 se::DeviceMemory<complex64> input_data(
177 se::DeviceMemory<complex64> output_data(
184 se::DeviceMemory<complex64> input_data(
186 se::DeviceMemory<complex64> output_data(
200 se::DeviceMemory<float> input_data(
202 se::DeviceMemory<complex64> output_data(
209 se::DeviceMemory<complex64> input_data(
211 se::DeviceMemory<float> output_data
    [all...]
convolution_thunk.h 76 perftools::gputools::DeviceMemory<float> input_data,
78 perftools::gputools::DeviceMemory<float> filter_data,
80 perftools::gputools::DeviceMemory<float> output_data,
cudnn_convolution_runner.cc 27 using se::DeviceMemory;
52 se::port::StatusOr<DeviceMemory<uint8>> AllocateBytes(
65 return se::DeviceMemory<uint8>(scratch_);
76 const Shape& output_shape, DeviceMemory<T> input_buf,
77 DeviceMemory<T> filter_buf, DeviceMemory<T> output_buf,
249 se::DeviceMemory<float>(input_buf), se::DeviceMemory<float>(filter_buf),
250 se::DeviceMemory<float>(output_buf), scratch_allocator, window, dnums,
254 se::DeviceMemory<Eigen::half>(input_buf)
    [all...]
tuple_thunk.cc 32 se::DeviceMemory<void*> dest_buffer_address(
  /external/tensorflow/tensorflow/contrib/rnn/kernels/
blas_gemm.cc 29 perftools::gputools::DeviceMemory<T> AsDeviceMemory(const T* cuda_memory) {
31 perftools::gputools::DeviceMemory<T> typed(wrapped);

Completed in 351 milliseconds

1 2 3