1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #if GOOGLE_CUDA 17 18 #include "tensorflow/core/common_runtime/gpu/gpu_event_mgr.h" 19 20 #include <atomic> 21 #include "tensorflow/core/common_runtime/gpu/gpu_init.h" 22 #include "tensorflow/core/platform/stream_executor.h" 23 #include "tensorflow/core/platform/test.h" 24 #include "tensorflow/core/protobuf/config.pb.h" 25 26 namespace gpu = ::perftools::gputools; 27 28 namespace tensorflow { 29 30 class TEST_EventMgrHelper { 31 public: 32 explicit TEST_EventMgrHelper(EventMgr* em) : em_(em) { 33 // The polling loop can interfere with the measurements made here, and 34 // isn't needed since the member PollEvents() always clears the queue. 35 // The tested behavior is slightly different from what may occur in 36 // ordinary execution. 37 StopPollingLoop(); 38 } 39 40 size_t queue_size() { 41 mutex_lock l(em_->mu_); 42 return em_->used_events_.size(); 43 } 44 45 size_t free_size() { 46 mutex_lock l(em_->mu_); 47 return em_->free_events_.size(); 48 } 49 50 void QueueTensors(perftools::gputools::Stream* stream, 51 TensorReferenceVector* tensors) { 52 mutex_lock l(em_->mu_); 53 em_->QueueTensors(stream, tensors); 54 } 55 56 void PollEvents(bool is_dedicated_poller) { 57 while (queue_size() > 0) { 58 // For ordinary tensor frees, this function 59 // should synchronously harvest all complete 60 // events and execute the corresponding memory frees. 61 EventMgr::ToFreeVector to_free; 62 { 63 mutex_lock l(em_->mu_); 64 em_->PollEvents(is_dedicated_poller, &to_free); 65 } 66 em_->FreeMemory(to_free); 67 } 68 } 69 70 void StopPollingLoop() { em_->StopPollingLoop(); } 71 72 void StartPollingLoop() { em_->StartPollingLoop(); } 73 74 private: 75 EventMgr* em_; 76 }; 77 78 static std::atomic_int_fast64_t live_tensor_bytes(0); 79 80 // A TensorBuffer that counts live memory usage for testing 81 class TestTensorBuffer : public TensorBuffer { 82 public: 83 explicit TestTensorBuffer(size_t bytes) : bytes_(bytes) { 84 live_tensor_bytes += bytes_; 85 } 86 ~TestTensorBuffer() override { live_tensor_bytes -= bytes_; } 87 88 size_t size() const override { return bytes_; } 89 90 // Not used in this test 91 void* data() const override { return nullptr; } 92 TensorBuffer* root_buffer() override { return nullptr; } 93 void FillAllocationDescription(AllocationDescription* arg) const override {} 94 95 private: 96 size_t bytes_; 97 }; 98 99 namespace { 100 101 TEST(EventMgr, Empty) { 102 auto stream_exec = GPUMachineManager()->ExecutorForDevice(0).ValueOrDie(); 103 EventMgr em(stream_exec, GPUOptions()); 104 TEST_EventMgrHelper th(&em); 105 EXPECT_EQ(0, th.queue_size()); 106 EXPECT_EQ(0, th.free_size()); 107 } 108 109 static void AddTensorReference(TensorReferenceVector* v, int64 size) { 110 TestTensorBuffer* buf = new TestTensorBuffer(size); 111 v->push_back(TensorReference(buf)); 112 buf->Unref(); 113 } 114 115 // Delaying polling until after several enqueings should grow the 116 // total number of allocated events. Once we have enough events for 117 // the max simultaneously pending, we should not allocate any more. 118 TEST(EventMgr, DelayedPolling) { 119 auto stream_exec = GPUMachineManager()->ExecutorForDevice(0).ValueOrDie(); 120 EventMgr em(stream_exec, GPUOptions()); 121 TEST_EventMgrHelper th(&em); 122 EXPECT_EQ(0, th.queue_size()); 123 TensorReferenceVector* v = nullptr; 124 std::unique_ptr<gpu::Stream> stream(new gpu::Stream(stream_exec)); 125 CHECK(stream.get()); 126 stream->Init(); 127 for (int i = 0; i < 5; ++i) { 128 v = new TensorReferenceVector; 129 AddTensorReference(v, 100 * 1048576); 130 th.QueueTensors(stream.get(), v); 131 EXPECT_EQ(i + 1, th.queue_size()); 132 EXPECT_EQ(0, th.free_size()); 133 } 134 th.PollEvents(false); 135 EXPECT_EQ(0, th.queue_size()); 136 EXPECT_EQ(5, th.free_size()); 137 for (int j = 0; j < 2; ++j) { 138 for (int i = 0; i < 5; ++i) { 139 v = new TensorReferenceVector; 140 AddTensorReference(v, 100 * 1048576); 141 th.QueueTensors(stream.get(), v); 142 EXPECT_EQ(i + 1, th.queue_size()); 143 EXPECT_EQ(4 - i, th.free_size()); 144 } 145 th.PollEvents(false); 146 EXPECT_EQ(0, th.queue_size()); 147 EXPECT_EQ(5, th.free_size()); 148 } 149 } 150 151 TEST(EventMgr, FlushLargeTensorImmediately) { 152 auto stream_exec = GPUMachineManager()->ExecutorForDevice(0).ValueOrDie(); 153 EventMgr em(stream_exec, GPUOptions()); 154 TEST_EventMgrHelper th(&em); 155 EXPECT_EQ(0, live_tensor_bytes); 156 std::unique_ptr<gpu::Stream> stream(new gpu::Stream(stream_exec)); 157 CHECK(stream.get()); 158 stream->Init(); 159 for (int i = 0; i < 5; ++i) { 160 TensorReferenceVector v; 161 AddTensorReference(&v, 100 * 1048576); 162 em.ThenDeleteTensors(stream.get(), v); 163 th.PollEvents(false); // Ensure things get registered to be freed by Poll 164 EXPECT_EQ(0, live_tensor_bytes); 165 } 166 } 167 168 TEST(EventMgr, ManySmallTensorsFlushedImmediately) { 169 auto stream_exec = GPUMachineManager()->ExecutorForDevice(0).ValueOrDie(); 170 EventMgr em(stream_exec, GPUOptions()); 171 TEST_EventMgrHelper th(&em); 172 EXPECT_EQ(0, live_tensor_bytes); 173 std::unique_ptr<gpu::Stream> stream(new gpu::Stream(stream_exec)); 174 CHECK(stream.get()); 175 stream->Init(); 176 for (int i = 0; i < 5; ++i) { 177 TensorReferenceVector v; 178 for (int i = 0; i < 1000; i++) { 179 AddTensorReference(&v, 100 * 1024); 180 } 181 em.ThenDeleteTensors(stream.get(), v); 182 th.PollEvents(false); // Harvest the tensors ready to be freed. 183 EXPECT_EQ(0, live_tensor_bytes); 184 } 185 } 186 187 TEST(EventMgr, StreamSwitchingFlushesImmediately) { 188 auto stream_exec = GPUMachineManager()->ExecutorForDevice(0).ValueOrDie(); 189 EventMgr em(stream_exec, GPUOptions()); 190 TEST_EventMgrHelper th(&em); 191 EXPECT_EQ(0, live_tensor_bytes); 192 std::unique_ptr<gpu::Stream> stream1(new gpu::Stream(stream_exec)); 193 std::unique_ptr<gpu::Stream> stream2(new gpu::Stream(stream_exec)); 194 stream1->Init(); 195 stream2->Init(); 196 TensorReferenceVector v1; 197 AddTensorReference(&v1, 1024); 198 em.ThenDeleteTensors(stream1.get(), v1); 199 200 TensorReferenceVector v2; 201 AddTensorReference(&v2, 1024); 202 int64 initial_live_bytes = live_tensor_bytes; 203 em.ThenDeleteTensors(stream2.get(), v2); 204 th.PollEvents(false); // Ensure things get registered to be freed by Poll 205 // Different stream should cause first tensor to get deleted 206 EXPECT_GT(initial_live_bytes, live_tensor_bytes); 207 } 208 209 TEST(EventMgr, ManySmallTensorsSeparateCallsFlushed) { 210 auto stream_exec = GPUMachineManager()->ExecutorForDevice(0).ValueOrDie(); 211 EventMgr em(stream_exec, GPUOptions()); 212 TEST_EventMgrHelper th(&em); 213 EXPECT_EQ(0, live_tensor_bytes); 214 std::unique_ptr<gpu::Stream> stream(new gpu::Stream(stream_exec)); 215 CHECK(stream.get()); 216 stream->Init(); 217 for (int i = 0; i < 5; ++i) { 218 for (int i = 0; i < 1000; i++) { 219 TensorReferenceVector v; 220 AddTensorReference(&v, 100 * 1024); 221 em.ThenDeleteTensors(stream.get(), v); 222 } 223 th.PollEvents(false); // Ensure things get registered to be freed by Poll 224 // Some of the tensors at least should be flushed 225 EXPECT_GT(1000 * 100 * 1024, live_tensor_bytes); 226 } 227 } 228 229 // Deleting the EventMgr when events are still pending should shut 230 // down gracefully. 231 TEST(EventMgr, NonEmptyShutdown) { 232 auto stream_exec = GPUMachineManager()->ExecutorForDevice(0).ValueOrDie(); 233 EventMgr em(stream_exec, GPUOptions()); 234 TEST_EventMgrHelper th(&em); 235 EXPECT_EQ(0, th.queue_size()); 236 EXPECT_EQ(0, th.free_size()); 237 std::unique_ptr<gpu::Stream> stream(new gpu::Stream(stream_exec)); 238 CHECK(stream.get()); 239 stream->Init(); 240 for (int i = 0; i < 5; ++i) { 241 TensorReferenceVector* v = new TensorReferenceVector; 242 AddTensorReference(v, 100 * 1048576); 243 th.QueueTensors(stream.get(), v); 244 EXPECT_EQ(1 + i, th.queue_size()); 245 EXPECT_EQ(0, th.free_size()); 246 } 247 } 248 249 } // namespace 250 } // namespace tensorflow 251 252 #endif // GOOGLE_CUDA 253