1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #if GOOGLE_CUDA 17 18 #include "tensorflow/core/common_runtime/gpu/gpu_device.h" 19 20 #include "tensorflow/core/common_runtime/gpu/gpu_init.h" 21 #include "tensorflow/core/lib/core/errors.h" 22 #include "tensorflow/core/lib/core/status.h" 23 #include "tensorflow/core/lib/core/status_test_util.h" 24 #include "tensorflow/core/platform/test.h" 25 26 namespace tensorflow { 27 namespace { 28 const char* kDeviceNamePrefix = "/job:localhost/replica:0/task:0"; 29 30 static SessionOptions MakeSessionOptions( 31 const string& visible_device_list = "", 32 double per_process_gpu_memory_fraction = 0, int gpu_device_count = 1, 33 const std::vector<std::vector<float>>& memory_limit_mb = {}) { 34 SessionOptions options; 35 ConfigProto* config = &options.config; 36 (*config->mutable_device_count())["GPU"] = gpu_device_count; 37 GPUOptions* gpu_options = config->mutable_gpu_options(); 38 gpu_options->set_visible_device_list(visible_device_list); 39 gpu_options->set_per_process_gpu_memory_fraction( 40 per_process_gpu_memory_fraction); 41 for (const auto& v : memory_limit_mb) { 42 auto virtual_devices = 43 gpu_options->mutable_experimental()->add_virtual_devices(); 44 for (float mb : v) { 45 virtual_devices->add_memory_limit_mb(mb); 46 } 47 } 48 return options; 49 } 50 51 static bool StartsWith(const string& lhs, const string& rhs) { 52 if (rhs.length() > lhs.length()) return false; 53 return lhs.substr(0, rhs.length()) == rhs; 54 } 55 56 TEST(GPUDeviceTest, FailedToParseVisibleDeviceList) { 57 SessionOptions opts = MakeSessionOptions("0,abc"); 58 std::vector<tensorflow::Device*> devices; 59 Status status = DeviceFactory::GetFactory("GPU")->CreateDevices( 60 opts, kDeviceNamePrefix, &devices); 61 EXPECT_EQ(status.code(), error::INVALID_ARGUMENT); 62 EXPECT_TRUE(StartsWith(status.error_message(), "Could not parse entry")) 63 << status; 64 } 65 66 TEST(GPUDeviceTest, InvalidGpuId) { 67 SessionOptions opts = MakeSessionOptions("100"); 68 std::vector<tensorflow::Device*> devices; 69 Status status = DeviceFactory::GetFactory("GPU")->CreateDevices( 70 opts, kDeviceNamePrefix, &devices); 71 EXPECT_EQ(status.code(), error::INVALID_ARGUMENT); 72 EXPECT_TRUE(StartsWith(status.error_message(), 73 "'visible_device_list' listed an invalid GPU id")) 74 << status; 75 } 76 77 TEST(GPUDeviceTest, DuplicateEntryInVisibleDeviceList) { 78 SessionOptions opts = MakeSessionOptions("0,0"); 79 std::vector<tensorflow::Device*> devices; 80 Status status = DeviceFactory::GetFactory("GPU")->CreateDevices( 81 opts, kDeviceNamePrefix, &devices); 82 EXPECT_EQ(status.code(), error::INVALID_ARGUMENT); 83 EXPECT_TRUE(StartsWith(status.error_message(), 84 "visible_device_list contained a duplicate entry")) 85 << status; 86 } 87 88 TEST(GPUDeviceTest, VirtualDeviceConfigConflictsWithMemoryFractionSettings) { 89 SessionOptions opts = MakeSessionOptions("0", 0.1, 1, {{}}); 90 std::vector<tensorflow::Device*> devices; 91 Status status = DeviceFactory::GetFactory("GPU")->CreateDevices( 92 opts, kDeviceNamePrefix, &devices); 93 EXPECT_EQ(status.code(), error::INVALID_ARGUMENT); 94 EXPECT_TRUE(StartsWith(status.error_message(), 95 "It's invalid to set per_process_gpu_memory_fraction")) 96 << status; 97 } 98 99 TEST(GPUDeviceTest, GpuDeviceCountTooSmall) { 100 // device_count is 0, but with one entry in visible_device_list and one 101 // (empty) VirtualDevices messages. 102 SessionOptions opts = MakeSessionOptions("0", 0, 0, {{}}); 103 std::vector<tensorflow::Device*> devices; 104 Status status = DeviceFactory::GetFactory("GPU")->CreateDevices( 105 opts, kDeviceNamePrefix, &devices); 106 EXPECT_EQ(status.code(), error::UNKNOWN); 107 EXPECT_TRUE(StartsWith(status.error_message(), 108 "Not enough GPUs to create virtual devices.")) 109 << status; 110 } 111 112 TEST(GPUDeviceTest, NotEnoughGpuInVisibleDeviceList) { 113 // Single entry in visible_device_list with two (empty) VirtualDevices 114 // messages. 115 SessionOptions opts = MakeSessionOptions("0", 0, 8, {{}, {}}); 116 std::vector<tensorflow::Device*> devices; 117 Status status = DeviceFactory::GetFactory("GPU")->CreateDevices( 118 opts, kDeviceNamePrefix, &devices); 119 EXPECT_EQ(status.code(), error::UNKNOWN); 120 EXPECT_TRUE(StartsWith(status.error_message(), 121 "Not enough GPUs to create virtual devices.")) 122 << status; 123 } 124 125 TEST(GPUDeviceTest, VirtualDeviceConfigConflictsWithVisibleDeviceList) { 126 // This test requires at least two visible GPU hardware. 127 if (GPUMachineManager()->VisibleDeviceCount() < 2) return; 128 // Three entries in visible_device_list with two (empty) VirtualDevices 129 // messages. 130 SessionOptions opts = MakeSessionOptions("0,1", 0, 8, {{}}); 131 std::vector<tensorflow::Device*> devices; 132 Status status = DeviceFactory::GetFactory("GPU")->CreateDevices( 133 opts, kDeviceNamePrefix, &devices); 134 EXPECT_EQ(status.code(), error::INVALID_ARGUMENT); 135 EXPECT_TRUE(StartsWith(status.error_message(), 136 "The number of GPUs in visible_device_list doesn't " 137 "match the number of elements in the virtual_devices " 138 "list.")) 139 << status; 140 } 141 142 TEST(GPUDeviceTest, EmptyVirtualDeviceConfig) { 143 // It'll create single virtual device when the virtual device config is empty. 144 SessionOptions opts = MakeSessionOptions("0"); 145 std::vector<tensorflow::Device*> devices; 146 TF_CHECK_OK(DeviceFactory::GetFactory("GPU")->CreateDevices( 147 opts, kDeviceNamePrefix, &devices)); 148 EXPECT_EQ(1, devices.size()); 149 EXPECT_GE(devices[0]->attributes().memory_limit(), 0); 150 for (auto d : devices) delete d; 151 } 152 153 TEST(GPUDeviceTest, SingleVirtualDeviceWithNoMemoryLimit) { 154 // It'll create single virtual device for the gpu in question when 155 // memory_limit_mb is unset. 156 SessionOptions opts = MakeSessionOptions("0", 0, 1, {{}}); 157 std::vector<tensorflow::Device*> devices; 158 TF_CHECK_OK(DeviceFactory::GetFactory("GPU")->CreateDevices( 159 opts, kDeviceNamePrefix, &devices)); 160 EXPECT_EQ(1, devices.size()); 161 EXPECT_GE(devices[0]->attributes().memory_limit(), 0); 162 for (auto d : devices) delete d; 163 } 164 165 TEST(GPUDeviceTest, SingleVirtualDeviceWithMemoryLimit) { 166 SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123}}); 167 std::vector<tensorflow::Device*> devices; 168 TF_CHECK_OK(DeviceFactory::GetFactory("GPU")->CreateDevices( 169 opts, kDeviceNamePrefix, &devices)); 170 EXPECT_EQ(1, devices.size()); 171 EXPECT_EQ(123 << 20, devices[0]->attributes().memory_limit()); 172 for (auto d : devices) delete d; 173 } 174 175 TEST(GPUDeviceTest, MultipleVirtualDevices) { 176 SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}}); 177 std::vector<tensorflow::Device*> devices; 178 TF_CHECK_OK(DeviceFactory::GetFactory("GPU")->CreateDevices( 179 opts, kDeviceNamePrefix, &devices)); 180 EXPECT_EQ(2, devices.size()); 181 EXPECT_EQ(123 << 20, devices[0]->attributes().memory_limit()); 182 EXPECT_EQ(456 << 20, devices[1]->attributes().memory_limit()); 183 ASSERT_EQ(1, devices[0]->attributes().locality().links().link_size()); 184 ASSERT_EQ(1, devices[1]->attributes().locality().links().link_size()); 185 EXPECT_EQ(1, devices[0]->attributes().locality().links().link(0).device_id()); 186 EXPECT_EQ("SAME_DEVICE", 187 devices[0]->attributes().locality().links().link(0).type()); 188 EXPECT_EQ(BaseGPUDeviceFactory::InterconnectMap::kSameDeviceStrength, 189 devices[0]->attributes().locality().links().link(0).strength()); 190 EXPECT_EQ(0, devices[1]->attributes().locality().links().link(0).device_id()); 191 EXPECT_EQ("SAME_DEVICE", 192 devices[1]->attributes().locality().links().link(0).type()); 193 EXPECT_EQ(BaseGPUDeviceFactory::InterconnectMap::kSameDeviceStrength, 194 devices[1]->attributes().locality().links().link(0).strength()); 195 for (auto d : devices) delete d; 196 } 197 198 } // namespace 199 } // namespace tensorflow 200 201 #endif 202