Home | History | Annotate | Download | only in cuda
      1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 // Defines the CUDAStream type - the CUDA-specific implementation of the generic
     17 // StreamExecutor Stream interface.
     18 
     19 #ifndef TENSORFLOW_STREAM_EXECUTOR_CUDA_CUDA_STREAM_H_
     20 #define TENSORFLOW_STREAM_EXECUTOR_CUDA_CUDA_STREAM_H_
     21 
     22 #include "tensorflow/stream_executor/cuda/cuda_driver.h"
     23 #include "tensorflow/stream_executor/platform/thread_annotations.h"
     24 #include "tensorflow/stream_executor/stream_executor_internal.h"
     25 
     26 namespace perftools {
     27 namespace gputools {
     28 namespace cuda {
     29 
     30 class CUDAExecutor;
     31 
     32 // Wraps a CUstream in order to satisfy the platform-independent
     33 // StreamInterface.
     34 //
     35 // Thread-safe post-initialization.
     36 class CUDAStream : public internal::StreamInterface {
     37  public:
     38   explicit CUDAStream(CUDAExecutor *parent)
     39       : parent_(parent), cuda_stream_(nullptr), completed_event_(nullptr) {}
     40 
     41   // Note: teardown is handled by a parent's call to DeallocateStream.
     42   ~CUDAStream() override {}
     43 
     44   void *CudaStreamHack() override { return cuda_stream_; }
     45   void **CudaStreamMemberHack() override {
     46     return reinterpret_cast<void **>(&cuda_stream_);
     47   }
     48 
     49   // Explicitly initialize the CUDA resources associated with this stream, used
     50   // by StreamExecutor::AllocateStream().
     51   bool Init();
     52 
     53   // Explicitly destroy the CUDA resources associated with this stream, used by
     54   // StreamExecutor::DeallocateStream().
     55   void Destroy();
     56 
     57   // Returns true if no work is pending or executing on the stream.
     58   bool IsIdle() const;
     59 
     60   // Retrieves an event which indicates that all work enqueued into the stream
     61   // has completed. Ownership of the event is not transferred to the caller, the
     62   // event is owned by this stream.
     63   CUevent* completed_event() { return &completed_event_; }
     64 
     65   // Returns the CUstream value for passing to the CUDA API.
     66   //
     67   // Precond: this CUDAStream has been allocated (otherwise passing a nullptr
     68   // into the NVIDIA library causes difficult-to-understand faults).
     69   CUstream cuda_stream() const {
     70     DCHECK(cuda_stream_ != nullptr);
     71     return const_cast<CUstream>(cuda_stream_);
     72   }
     73 
     74   CUDAExecutor *parent() const { return parent_; }
     75 
     76  private:
     77   CUDAExecutor *parent_;  // Executor that spawned this stream.
     78   CUstream cuda_stream_;  // Wrapped CUDA stream handle.
     79 
     80   // Event that indicates this stream has completed.
     81   CUevent completed_event_ = nullptr;
     82 };
     83 
     84 // Helper functions to simplify extremely common flows.
     85 // Converts a Stream to the underlying CUDAStream implementation.
     86 CUDAStream *AsCUDAStream(Stream *stream);
     87 
     88 // Extracts a CUstream from a CUDAStream-backed Stream object.
     89 CUstream AsCUDAStreamValue(Stream *stream);
     90 
     91 }  // namespace cuda
     92 }  // namespace gputools
     93 }  // namespace perftools
     94 
     95 #endif  // TENSORFLOW_STREAM_EXECUTOR_CUDA_CUDA_STREAM_H_
     96