Home | History | Annotate | Download | only in gpu
      1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #ifndef TENSORFLOW_COMMON_RUNTIME_GPU_PROCESS_STATE_H_
     17 #define TENSORFLOW_COMMON_RUNTIME_GPU_PROCESS_STATE_H_
     18 
     19 #include <functional>
     20 #include <map>
     21 #include <unordered_map>
     22 #include <vector>
     23 
     24 #include "tensorflow/core/common_runtime/gpu/gpu_id.h"
     25 #include "tensorflow/core/framework/allocator.h"
     26 #include "tensorflow/core/platform/mutex.h"
     27 #include "tensorflow/core/platform/thread_annotations.h"
     28 #include "tensorflow/core/platform/types.h"
     29 #include "tensorflow/core/protobuf/config.pb.h"
     30 
     31 namespace tensorflow {
     32 
     33 class Allocator;
     34 class VisitableAllocator;
     35 class PoolAllocator;
     36 
     37 // Singleton that manages per-process state, e.g. allocation
     38 // of shared resources.
     39 class ProcessState {
     40  public:
     41   static ProcessState* singleton();
     42 
     43   // Descriptor for memory allocation attributes, used by optional
     44   // runtime correctness analysis logic.
     45   struct MemDesc {
     46     enum MemLoc { CPU, GPU };
     47     MemLoc loc;
     48     int dev_index;
     49     bool gpu_registered;
     50     bool nic_registered;
     51     MemDesc()
     52         : loc(CPU),
     53           dev_index(0),
     54           gpu_registered(false),
     55           nic_registered(false) {}
     56     string DebugString();
     57   };
     58 
     59   // Query whether any GPU device has been created so far.
     60   // Disable thread safety analysis since a race is benign here.
     61   bool HasGPUDevice() const NO_THREAD_SAFETY_ANALYSIS {
     62     return gpu_device_enabled_;
     63   }
     64 
     65   // Set the flag to indicate a GPU device has been created.
     66   // Disable thread safety analysis since a race is benign here.
     67   void EnableGPUDevice() NO_THREAD_SAFETY_ANALYSIS {
     68     gpu_device_enabled_ = true;
     69   }
     70 
     71   // Returns what we know about the memory at ptr.
     72   // If we know nothing, it's called CPU 0 with no other attributes.
     73   MemDesc PtrType(const void* ptr);
     74 
     75   // Returns the one CPUAllocator used for the given numa_node.
     76   // TEMPORARY: ignores numa_node.
     77   Allocator* GetCPUAllocator(int numa_node);
     78 
     79   // Returns the one GPU allocator used for the indexed GPU.
     80   // Note that this is a system GPU index, not (necessarily) a brain
     81   // device index.
     82   //
     83   // 'total_bytes' is the total number of bytes that should be made
     84   // available to the allocator.  The first call to this function for
     85   // a given tf_gpu_id creates the allocator, so only the total_bytes
     86   // used on that first call is used.
     87   //
     88   // "Allocator type" describes the type of algorithm to use for the
     89   // underlying allocator.  REQUIRES: Must be a valid type (see
     90   // config.proto for the list of supported strings.).
     91   //
     92   // REQUIRES: tf_gpu_id must be a valid id for a BaseGPUDevice available in the
     93   // current system environment.  Otherwise returns nullptr.
     94   virtual Allocator* GetGPUAllocator(const GPUOptions& options,
     95                                      TfGpuId tf_gpu_id, size_t total_bytes);
     96 
     97   virtual Allocator* GetCUDAHostAllocator(int numa_node);
     98 
     99   // Registers a function to be called once on every new Region
    100   // allocated by every GPURegionAllocator proximate to the specified
    101   // bus.  The AllocVisitor is provided with a memory pointer and the
    102   // size of the area it identifies.  The pointer is not guaranteed to
    103   // be valid after the call terminates.  The intention is for this
    104   // interface to be used for network device memory registration.
    105   // "bus_id" is platform-specific.  On many platforms it
    106   // should be 0.  On machines with multiple PCIe buses, it should be
    107   // the index of one of the PCIe buses.  If the bus_id is invalid,
    108   // results are undefined.
    109   typedef std::function<void(void*, size_t)> AllocVisitor;
    110   virtual void AddGPUAllocVisitor(int bus_id, AllocVisitor visitor);
    111 
    112   typedef std::unordered_map<const void*, MemDesc> MDMap;
    113 
    114  protected:
    115   ProcessState();
    116 
    117   static ProcessState* instance_;
    118   bool gpu_device_enabled_;
    119 
    120   mutex mu_;
    121 
    122   std::vector<Allocator*> cpu_allocators_ GUARDED_BY(mu_);
    123   std::vector<VisitableAllocator*> gpu_allocators_ GUARDED_BY(mu_);
    124   std::vector<std::vector<AllocVisitor>> gpu_visitors_ GUARDED_BY(mu_);
    125   std::vector<Allocator*> cuda_host_allocators_ GUARDED_BY(mu_);
    126 
    127   virtual ~ProcessState();
    128 
    129   // Optional RecordingAllocators that wrap the corresponding
    130   // Allocators for runtime attribute use analysis.
    131   MDMap mem_desc_map_;
    132   std::vector<Allocator*> cpu_al_ GUARDED_BY(mu_);
    133   std::vector<Allocator*> gpu_al_ GUARDED_BY(mu_);
    134   std::vector<Allocator*> cuda_al_ GUARDED_BY(mu_);
    135 };
    136 
    137 namespace internal {
    138 class RecordingAllocator : public Allocator {
    139  public:
    140   RecordingAllocator(ProcessState::MDMap* mm, Allocator* a,
    141                      ProcessState::MemDesc md, mutex* mu)
    142       : mm_(mm), a_(a), md_(md), mu_(mu) {}
    143 
    144   string Name() override { return a_->Name(); }
    145   void* AllocateRaw(size_t alignment, size_t num_bytes) override {
    146     void* p = a_->AllocateRaw(alignment, num_bytes);
    147     mutex_lock l(*mu_);
    148     (*mm_)[p] = md_;
    149     return p;
    150   }
    151   void DeallocateRaw(void* p) override {
    152     mutex_lock l(*mu_);
    153     auto iter = mm_->find(p);
    154     mm_->erase(iter);
    155     a_->DeallocateRaw(p);
    156   }
    157   bool TracksAllocationSizes() override { return a_->TracksAllocationSizes(); }
    158   size_t RequestedSize(const void* p) override { return a_->RequestedSize(p); }
    159   size_t AllocatedSize(const void* p) override { return a_->AllocatedSize(p); }
    160   void GetStats(AllocatorStats* stats) override { a_->GetStats(stats); }
    161   void ClearStats() override { a_->ClearStats(); }
    162   ProcessState::MDMap* mm_;  // not owned
    163   Allocator* a_;             // not owned
    164   ProcessState::MemDesc md_;
    165   mutex* mu_;
    166 };
    167 }  // namespace internal
    168 }  // namespace tensorflow
    169 #endif  // TENSORFLOW_COMMON_RUNTIME_GPU_PROCESS_STATE_H_
    170