1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #if GOOGLE_CUDA 17 18 #include "tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h" 19 20 #include <algorithm> 21 #include <vector> 22 23 #include "tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h" 24 #include "tensorflow/core/common_runtime/gpu/gpu_id.h" 25 #include "tensorflow/core/common_runtime/gpu/gpu_id_utils.h" 26 #include "tensorflow/core/common_runtime/gpu/gpu_init.h" 27 #include "tensorflow/core/lib/gtl/inlined_vector.h" 28 #include "tensorflow/core/platform/logging.h" 29 #include "tensorflow/core/platform/stream_executor.h" 30 #include "tensorflow/core/platform/test.h" 31 #include "tensorflow/core/platform/types.h" 32 33 namespace tensorflow { 34 namespace { 35 36 TEST(GPUDebugAllocatorTest, OverwriteDetection_None) { 37 const CudaGpuId cuda_gpu_id(0); 38 GPUDebugAllocator a(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""), 39 cuda_gpu_id); 40 auto stream_exec = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(); 41 42 for (int s : {8}) { 43 std::vector<int64> cpu_array(s); 44 memset(&cpu_array[0], 0, cpu_array.size() * sizeof(int64)); 45 int64* gpu_array = a.Allocate<int64>(cpu_array.size()); 46 gpu::DeviceMemory<int64> gpu_array_ptr{gpu::DeviceMemoryBase{gpu_array}}; 47 ASSERT_TRUE(stream_exec->SynchronousMemcpy(&gpu_array_ptr, &cpu_array[0], 48 s * sizeof(int64))); 49 EXPECT_TRUE(a.CheckHeader(gpu_array)); 50 EXPECT_TRUE(a.CheckFooter(gpu_array)); 51 52 // Confirm no error on free. 53 a.DeallocateRaw(gpu_array); 54 } 55 } 56 57 TEST(GPUDebugAllocatorTest, OverwriteDetection_Header) { 58 for (int s : {8, 211}) { 59 EXPECT_DEATH( 60 { 61 const CudaGpuId cuda_gpu_id(0); 62 GPUDebugAllocator a(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""), 63 cuda_gpu_id); 64 auto stream_exec = 65 GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(); 66 67 std::vector<int64> cpu_array(s); 68 memset(&cpu_array[0], 0, cpu_array.size() * sizeof(int64)); 69 int64* gpu_array = a.Allocate<int64>(cpu_array.size()); 70 71 gpu::DeviceMemory<int64> gpu_array_ptr{ 72 gpu::DeviceMemoryBase{gpu_array}}; 73 ASSERT_TRUE(stream_exec->SynchronousMemcpy( 74 &gpu_array_ptr, &cpu_array[0], cpu_array.size() * sizeof(int64))); 75 76 gpu::DeviceMemory<int64> gpu_hdr_ptr{ 77 gpu::DeviceMemoryBase{gpu_array - 1}}; 78 // Clobber first word of the header. 79 float pi = 3.1417; 80 ASSERT_TRUE( 81 stream_exec->SynchronousMemcpy(&gpu_hdr_ptr, &pi, sizeof(float))); 82 83 // Expect error on free. 84 a.DeallocateRaw(gpu_array); 85 }, 86 ""); 87 } 88 } 89 90 TEST(GPUDebugAllocatorTest, OverwriteDetection_Footer) { 91 for (int s : {8, 22}) { 92 EXPECT_DEATH( 93 { 94 const CudaGpuId cuda_gpu_id(0); 95 GPUDebugAllocator a(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""), 96 cuda_gpu_id); 97 auto stream_exec = 98 GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(); 99 100 std::vector<int64> cpu_array(s); 101 memset(&cpu_array[0], 0, cpu_array.size() * sizeof(int64)); 102 int64* gpu_array = a.Allocate<int64>(cpu_array.size()); 103 104 gpu::DeviceMemory<int64> gpu_array_ptr{ 105 gpu::DeviceMemoryBase{gpu_array}}; 106 ASSERT_TRUE(stream_exec->SynchronousMemcpy( 107 &gpu_array_ptr, &cpu_array[0], cpu_array.size() * sizeof(int64))); 108 109 // Clobber word of the footer. 110 gpu::DeviceMemory<int64> gpu_ftr_ptr{ 111 gpu::DeviceMemoryBase{gpu_array + s}}; 112 float pi = 3.1417; 113 ASSERT_TRUE( 114 stream_exec->SynchronousMemcpy(&gpu_ftr_ptr, &pi, sizeof(float))); 115 116 // Expect error on free. 117 a.DeallocateRaw(gpu_array); 118 }, 119 ""); 120 } 121 } 122 123 TEST(GPUDebugAllocatorTest, ResetToNan) { 124 const CudaGpuId cuda_gpu_id(0); 125 GPUNanResetAllocator a(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""), 126 cuda_gpu_id); 127 auto stream_exec = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(); 128 129 std::vector<float> cpu_array(1024); 130 std::vector<float> cpu_array_result(1024); 131 132 // Allocate 1024 floats 133 float* gpu_array = a.Allocate<float>(cpu_array.size()); 134 gpu::DeviceMemory<float> gpu_array_ptr{gpu::DeviceMemoryBase{gpu_array}}; 135 ASSERT_TRUE(stream_exec->SynchronousMemcpy(&cpu_array[0], gpu_array_ptr, 136 cpu_array.size() * sizeof(float))); 137 for (float f : cpu_array) { 138 ASSERT_FALSE(std::isfinite(f)); 139 } 140 141 // Set one of the fields to 1.0. 142 cpu_array[0] = 1.0; 143 ASSERT_TRUE(stream_exec->SynchronousMemcpy(&gpu_array_ptr, &cpu_array[0], 144 cpu_array.size() * sizeof(float))); 145 // Copy the data back and verify. 146 ASSERT_TRUE( 147 stream_exec->SynchronousMemcpy(&cpu_array_result[0], gpu_array_ptr, 148 cpu_array_result.size() * sizeof(float))); 149 ASSERT_EQ(1.0, cpu_array_result[0]); 150 151 // Free the array 152 a.DeallocateRaw(gpu_array); 153 154 // All values should be reset to nan. 155 ASSERT_TRUE( 156 stream_exec->SynchronousMemcpy(&cpu_array_result[0], gpu_array_ptr, 157 cpu_array_result.size() * sizeof(float))); 158 for (float f : cpu_array_result) { 159 ASSERT_FALSE(std::isfinite(f)); 160 } 161 } 162 163 TEST(GPUDebugAllocatorTest, ResetToNanWithHeaderFooter) { 164 const CudaGpuId cuda_gpu_id(0); 165 // NaN reset must be the outer-most allocator. 166 GPUNanResetAllocator a( 167 new GPUDebugAllocator(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""), 168 cuda_gpu_id), 169 cuda_gpu_id); 170 auto stream_exec = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(); 171 172 std::vector<float> cpu_array(1024); 173 std::vector<float> cpu_array_result(1024); 174 175 // Allocate 1024 floats 176 float* gpu_array = a.Allocate<float>(cpu_array.size()); 177 gpu::DeviceMemory<float> gpu_array_ptr{gpu::DeviceMemoryBase{gpu_array}}; 178 ASSERT_TRUE(stream_exec->SynchronousMemcpy(&cpu_array[0], gpu_array_ptr, 179 cpu_array.size() * sizeof(float))); 180 for (float f : cpu_array) { 181 ASSERT_FALSE(std::isfinite(f)); 182 } 183 184 // Set one of the fields to 1.0. 185 cpu_array[0] = 1.0; 186 ASSERT_TRUE(stream_exec->SynchronousMemcpy(&gpu_array_ptr, &cpu_array[0], 187 cpu_array.size() * sizeof(float))); 188 // Copy the data back and verify. 189 ASSERT_TRUE( 190 stream_exec->SynchronousMemcpy(&cpu_array_result[0], gpu_array_ptr, 191 cpu_array_result.size() * sizeof(float))); 192 ASSERT_EQ(1.0, cpu_array_result[0]); 193 194 // Free the array 195 a.DeallocateRaw(gpu_array); 196 197 // All values should be reset to nan. 198 ASSERT_TRUE( 199 stream_exec->SynchronousMemcpy(&cpu_array_result[0], gpu_array_ptr, 200 cpu_array_result.size() * sizeof(float))); 201 for (float f : cpu_array_result) { 202 ASSERT_FALSE(std::isfinite(f)); 203 } 204 } 205 206 TEST(GPUDebugAllocatorTest, TracksSizes) { 207 const CudaGpuId cuda_gpu_id(0); 208 GPUDebugAllocator a(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""), 209 cuda_gpu_id); 210 EXPECT_EQ(true, a.TracksAllocationSizes()); 211 } 212 213 TEST(GPUDebugAllocatorTest, AllocatedVsRequested) { 214 const CudaGpuId cuda_gpu_id(0); 215 GPUNanResetAllocator a( 216 new GPUDebugAllocator(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""), 217 cuda_gpu_id), 218 cuda_gpu_id); 219 float* t1 = a.Allocate<float>(1); 220 EXPECT_EQ(4, a.RequestedSize(t1)); 221 EXPECT_EQ(256, a.AllocatedSize(t1)); 222 a.DeallocateRaw(t1); 223 } 224 225 } // namespace 226 } // namespace tensorflow 227 228 #endif // GOOGLE_CUDA 229