Home | History | Annotate | Download | only in cuda
      1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #ifndef TENSORFLOW_STREAM_EXECUTOR_CUDA_CUDA_PLATFORM_H_
     17 #define TENSORFLOW_STREAM_EXECUTOR_CUDA_CUDA_PLATFORM_H_
     18 
     19 #include <memory>
     20 #include "tensorflow/stream_executor/platform/port.h"
     21 #include <vector>
     22 
     23 #include "tensorflow/stream_executor/executor_cache.h"
     24 #include "tensorflow/stream_executor/lib/statusor.h"
     25 #include "tensorflow/stream_executor/multi_platform_manager.h"
     26 #include "tensorflow/stream_executor/platform.h"
     27 #include "tensorflow/stream_executor/platform/mutex.h"
     28 #include "tensorflow/stream_executor/platform/port.h"
     29 #include "tensorflow/stream_executor/platform/thread_annotations.h"
     30 #include "tensorflow/stream_executor/stream_executor_internal.h"
     31 #include "tensorflow/stream_executor/stream_executor_pimpl.h"
     32 #include "tensorflow/stream_executor/trace_listener.h"
     33 
     34 namespace perftools {
     35 namespace gputools {
     36 namespace cuda {
     37 
     38 // Opaque and unique identifier for the CUDA platform plugin.
     39 // This is needed so that plugins can refer to/identify this platform without
     40 // instantiating a CudaPlatform object.
     41 extern const Platform::Id kCudaPlatformId;
     42 
     43 // Cuda-specific platform plugin, registered as a singleton value via module
     44 // initializer.
     45 class CudaPlatform : public Platform {
     46  public:
     47   CudaPlatform();
     48   ~CudaPlatform() override;
     49 
     50   // CudaPlatform-specific functionality
     51   // Returns the number of distinct buses / NUMA nodes on the machine.
     52   int BusCount();
     53 
     54   // Returns the bus/NUMA node for the specified device ordinal.
     55   int DeviceToBus(int device_ordinal);
     56 
     57   // Returns the lowest-ordinal-number StreamExecutor on the specified bus.
     58   port::StatusOr<StreamExecutor*> FirstExecutorForBus(int bus_ordinal);
     59 
     60   // Platform interface implementation:
     61   // Returns the same value as kCudaPlatform above.
     62   Platform::Id id() const override;
     63 
     64   // Returns -1 as a sentinel on internal failure (and logs the error).
     65   int VisibleDeviceCount() const override;
     66 
     67   const string& Name() const override;
     68 
     69   port::StatusOr<StreamExecutor*> ExecutorForDevice(int ordinal) override;
     70 
     71   port::StatusOr<StreamExecutor*> ExecutorForDeviceWithPluginConfig(
     72       int ordinal, const PluginConfig& config) override;
     73 
     74   port::StatusOr<StreamExecutor*> GetExecutor(
     75       const StreamExecutorConfig& config) override;
     76 
     77   port::StatusOr<std::unique_ptr<StreamExecutor>> GetUncachedExecutor(
     78       const StreamExecutorConfig& config) override;
     79 
     80   void RegisterTraceListener(std::unique_ptr<TraceListener> listener) override;
     81 
     82   void UnregisterTraceListener(TraceListener* listener) override;
     83 
     84  private:
     85   // Determines the number of NUMA nodes and the assignment of executor to each.
     86   void InspectNumaNodes();
     87 
     88   // This platform's name.
     89   string name_;
     90 
     91   // Cache of created executors.
     92   ExecutorCache executor_cache_;
     93 
     94   // The smallest NUMA node value for any device managed by this machine
     95   // manager. Used, along with limit_numa_node_, to convert NUMA nodes into bus
     96   // ordinals. The NUMA node space occupied by GPUs is assumed to be dense./
     97   int min_numa_node_;
     98 
     99   // Larger than the NUMA node value for any device managed by this machine
    100   // manager.
    101   int limit_numa_node_;
    102 
    103   SE_DISALLOW_COPY_AND_ASSIGN(CudaPlatform);
    104 };
    105 
    106 }  // namespace cuda
    107 }  // namespace gputools
    108 }  // namespace perftools
    109 
    110 #endif  // TENSORFLOW_STREAM_EXECUTOR_CUDA_CUDA_PLATFORM_H_
    111