1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_COMMON_RUNTIME_GPU_PROCESS_STATE_H_ 17 #define TENSORFLOW_COMMON_RUNTIME_GPU_PROCESS_STATE_H_ 18 19 #include <functional> 20 #include <map> 21 #include <unordered_map> 22 #include <vector> 23 24 #include "tensorflow/core/common_runtime/gpu/gpu_id.h" 25 #include "tensorflow/core/framework/allocator.h" 26 #include "tensorflow/core/platform/mutex.h" 27 #include "tensorflow/core/platform/thread_annotations.h" 28 #include "tensorflow/core/platform/types.h" 29 #include "tensorflow/core/protobuf/config.pb.h" 30 31 namespace tensorflow { 32 33 class Allocator; 34 class VisitableAllocator; 35 class PoolAllocator; 36 37 // Singleton that manages per-process state, e.g. allocation 38 // of shared resources. 39 class ProcessState { 40 public: 41 static ProcessState* singleton(); 42 43 // Descriptor for memory allocation attributes, used by optional 44 // runtime correctness analysis logic. 45 struct MemDesc { 46 enum MemLoc { CPU, GPU }; 47 MemLoc loc; 48 int dev_index; 49 bool gpu_registered; 50 bool nic_registered; 51 MemDesc() 52 : loc(CPU), 53 dev_index(0), 54 gpu_registered(false), 55 nic_registered(false) {} 56 string DebugString(); 57 }; 58 59 // Query whether any GPU device has been created so far. 60 // Disable thread safety analysis since a race is benign here. 61 bool HasGPUDevice() const NO_THREAD_SAFETY_ANALYSIS { 62 return gpu_device_enabled_; 63 } 64 65 // Set the flag to indicate a GPU device has been created. 66 // Disable thread safety analysis since a race is benign here. 67 void EnableGPUDevice() NO_THREAD_SAFETY_ANALYSIS { 68 gpu_device_enabled_ = true; 69 } 70 71 // Returns what we know about the memory at ptr. 72 // If we know nothing, it's called CPU 0 with no other attributes. 73 MemDesc PtrType(const void* ptr); 74 75 // Returns the one CPUAllocator used for the given numa_node. 76 // TEMPORARY: ignores numa_node. 77 Allocator* GetCPUAllocator(int numa_node); 78 79 // Returns the one GPU allocator used for the indexed GPU. 80 // Note that this is a system GPU index, not (necessarily) a brain 81 // device index. 82 // 83 // 'total_bytes' is the total number of bytes that should be made 84 // available to the allocator. The first call to this function for 85 // a given tf_gpu_id creates the allocator, so only the total_bytes 86 // used on that first call is used. 87 // 88 // "Allocator type" describes the type of algorithm to use for the 89 // underlying allocator. REQUIRES: Must be a valid type (see 90 // config.proto for the list of supported strings.). 91 // 92 // REQUIRES: tf_gpu_id must be a valid id for a BaseGPUDevice available in the 93 // current system environment. Otherwise returns nullptr. 94 virtual Allocator* GetGPUAllocator(const GPUOptions& options, 95 TfGpuId tf_gpu_id, size_t total_bytes); 96 97 virtual Allocator* GetCUDAHostAllocator(int numa_node); 98 99 // Registers a function to be called once on every new Region 100 // allocated by every GPURegionAllocator proximate to the specified 101 // bus. The AllocVisitor is provided with a memory pointer and the 102 // size of the area it identifies. The pointer is not guaranteed to 103 // be valid after the call terminates. The intention is for this 104 // interface to be used for network device memory registration. 105 // "bus_id" is platform-specific. On many platforms it 106 // should be 0. On machines with multiple PCIe buses, it should be 107 // the index of one of the PCIe buses. If the bus_id is invalid, 108 // results are undefined. 109 typedef std::function<void(void*, size_t)> AllocVisitor; 110 virtual void AddGPUAllocVisitor(int bus_id, AllocVisitor visitor); 111 112 typedef std::unordered_map<const void*, MemDesc> MDMap; 113 114 protected: 115 ProcessState(); 116 117 static ProcessState* instance_; 118 bool gpu_device_enabled_; 119 120 mutex mu_; 121 122 std::vector<Allocator*> cpu_allocators_ GUARDED_BY(mu_); 123 std::vector<VisitableAllocator*> gpu_allocators_ GUARDED_BY(mu_); 124 std::vector<std::vector<AllocVisitor>> gpu_visitors_ GUARDED_BY(mu_); 125 std::vector<Allocator*> cuda_host_allocators_ GUARDED_BY(mu_); 126 127 virtual ~ProcessState(); 128 129 // Optional RecordingAllocators that wrap the corresponding 130 // Allocators for runtime attribute use analysis. 131 MDMap mem_desc_map_; 132 std::vector<Allocator*> cpu_al_ GUARDED_BY(mu_); 133 std::vector<Allocator*> gpu_al_ GUARDED_BY(mu_); 134 std::vector<Allocator*> cuda_al_ GUARDED_BY(mu_); 135 }; 136 137 namespace internal { 138 class RecordingAllocator : public Allocator { 139 public: 140 RecordingAllocator(ProcessState::MDMap* mm, Allocator* a, 141 ProcessState::MemDesc md, mutex* mu) 142 : mm_(mm), a_(a), md_(md), mu_(mu) {} 143 144 string Name() override { return a_->Name(); } 145 void* AllocateRaw(size_t alignment, size_t num_bytes) override { 146 void* p = a_->AllocateRaw(alignment, num_bytes); 147 mutex_lock l(*mu_); 148 (*mm_)[p] = md_; 149 return p; 150 } 151 void DeallocateRaw(void* p) override { 152 mutex_lock l(*mu_); 153 auto iter = mm_->find(p); 154 mm_->erase(iter); 155 a_->DeallocateRaw(p); 156 } 157 bool TracksAllocationSizes() override { return a_->TracksAllocationSizes(); } 158 size_t RequestedSize(const void* p) override { return a_->RequestedSize(p); } 159 size_t AllocatedSize(const void* p) override { return a_->AllocatedSize(p); } 160 void GetStats(AllocatorStats* stats) override { a_->GetStats(stats); } 161 void ClearStats() override { a_->ClearStats(); } 162 ProcessState::MDMap* mm_; // not owned 163 Allocator* a_; // not owned 164 ProcessState::MemDesc md_; 165 mutex* mu_; 166 }; 167 } // namespace internal 168 } // namespace tensorflow 169 #endif // TENSORFLOW_COMMON_RUNTIME_GPU_PROCESS_STATE_H_ 170