1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 // Defines the CUDAStream type - the CUDA-specific implementation of the generic 17 // StreamExecutor Stream interface. 18 19 #ifndef TENSORFLOW_STREAM_EXECUTOR_CUDA_CUDA_STREAM_H_ 20 #define TENSORFLOW_STREAM_EXECUTOR_CUDA_CUDA_STREAM_H_ 21 22 #include "tensorflow/stream_executor/cuda/cuda_driver.h" 23 #include "tensorflow/stream_executor/platform/thread_annotations.h" 24 #include "tensorflow/stream_executor/stream_executor_internal.h" 25 26 namespace perftools { 27 namespace gputools { 28 namespace cuda { 29 30 class CUDAExecutor; 31 32 // Wraps a CUstream in order to satisfy the platform-independent 33 // StreamInterface. 34 // 35 // Thread-safe post-initialization. 36 class CUDAStream : public internal::StreamInterface { 37 public: 38 explicit CUDAStream(CUDAExecutor *parent) 39 : parent_(parent), cuda_stream_(nullptr), completed_event_(nullptr) {} 40 41 // Note: teardown is handled by a parent's call to DeallocateStream. 42 ~CUDAStream() override {} 43 44 void *CudaStreamHack() override { return cuda_stream_; } 45 void **CudaStreamMemberHack() override { 46 return reinterpret_cast<void **>(&cuda_stream_); 47 } 48 49 // Explicitly initialize the CUDA resources associated with this stream, used 50 // by StreamExecutor::AllocateStream(). 51 bool Init(); 52 53 // Explicitly destroy the CUDA resources associated with this stream, used by 54 // StreamExecutor::DeallocateStream(). 55 void Destroy(); 56 57 // Returns true if no work is pending or executing on the stream. 58 bool IsIdle() const; 59 60 // Retrieves an event which indicates that all work enqueued into the stream 61 // has completed. Ownership of the event is not transferred to the caller, the 62 // event is owned by this stream. 63 CUevent* completed_event() { return &completed_event_; } 64 65 // Returns the CUstream value for passing to the CUDA API. 66 // 67 // Precond: this CUDAStream has been allocated (otherwise passing a nullptr 68 // into the NVIDIA library causes difficult-to-understand faults). 69 CUstream cuda_stream() const { 70 DCHECK(cuda_stream_ != nullptr); 71 return const_cast<CUstream>(cuda_stream_); 72 } 73 74 CUDAExecutor *parent() const { return parent_; } 75 76 private: 77 CUDAExecutor *parent_; // Executor that spawned this stream. 78 CUstream cuda_stream_; // Wrapped CUDA stream handle. 79 80 // Event that indicates this stream has completed. 81 CUevent completed_event_ = nullptr; 82 }; 83 84 // Helper functions to simplify extremely common flows. 85 // Converts a Stream to the underlying CUDAStream implementation. 86 CUDAStream *AsCUDAStream(Stream *stream); 87 88 // Extracts a CUstream from a CUDAStream-backed Stream object. 89 CUstream AsCUDAStreamValue(Stream *stream); 90 91 } // namespace cuda 92 } // namespace gputools 93 } // namespace perftools 94 95 #endif // TENSORFLOW_STREAM_EXECUTOR_CUDA_CUDA_STREAM_H_ 96