Home | History | Annotate | Download | only in gpu
      1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #if GOOGLE_CUDA
     17 
     18 #include "tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h"
     19 
     20 #include <algorithm>
     21 #include <vector>
     22 
     23 #include "tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h"
     24 #include "tensorflow/core/common_runtime/gpu/gpu_id.h"
     25 #include "tensorflow/core/common_runtime/gpu/gpu_id_utils.h"
     26 #include "tensorflow/core/common_runtime/gpu/gpu_init.h"
     27 #include "tensorflow/core/lib/gtl/inlined_vector.h"
     28 #include "tensorflow/core/platform/logging.h"
     29 #include "tensorflow/core/platform/stream_executor.h"
     30 #include "tensorflow/core/platform/test.h"
     31 #include "tensorflow/core/platform/types.h"
     32 
     33 namespace tensorflow {
     34 namespace {
     35 
     36 TEST(GPUDebugAllocatorTest, OverwriteDetection_None) {
     37   const CudaGpuId cuda_gpu_id(0);
     38   GPUDebugAllocator a(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""),
     39                       cuda_gpu_id);
     40   auto stream_exec = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie();
     41 
     42   for (int s : {8}) {
     43     std::vector<int64> cpu_array(s);
     44     memset(&cpu_array[0], 0, cpu_array.size() * sizeof(int64));
     45     int64* gpu_array = a.Allocate<int64>(cpu_array.size());
     46     gpu::DeviceMemory<int64> gpu_array_ptr{gpu::DeviceMemoryBase{gpu_array}};
     47     ASSERT_TRUE(stream_exec->SynchronousMemcpy(&gpu_array_ptr, &cpu_array[0],
     48                                                s * sizeof(int64)));
     49     EXPECT_TRUE(a.CheckHeader(gpu_array));
     50     EXPECT_TRUE(a.CheckFooter(gpu_array));
     51 
     52     // Confirm no error on free.
     53     a.DeallocateRaw(gpu_array);
     54   }
     55 }
     56 
     57 TEST(GPUDebugAllocatorTest, OverwriteDetection_Header) {
     58   for (int s : {8, 211}) {
     59     EXPECT_DEATH(
     60         {
     61           const CudaGpuId cuda_gpu_id(0);
     62           GPUDebugAllocator a(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""),
     63                               cuda_gpu_id);
     64           auto stream_exec =
     65               GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie();
     66 
     67           std::vector<int64> cpu_array(s);
     68           memset(&cpu_array[0], 0, cpu_array.size() * sizeof(int64));
     69           int64* gpu_array = a.Allocate<int64>(cpu_array.size());
     70 
     71           gpu::DeviceMemory<int64> gpu_array_ptr{
     72               gpu::DeviceMemoryBase{gpu_array}};
     73           ASSERT_TRUE(stream_exec->SynchronousMemcpy(
     74               &gpu_array_ptr, &cpu_array[0], cpu_array.size() * sizeof(int64)));
     75 
     76           gpu::DeviceMemory<int64> gpu_hdr_ptr{
     77               gpu::DeviceMemoryBase{gpu_array - 1}};
     78           // Clobber first word of the header.
     79           float pi = 3.1417;
     80           ASSERT_TRUE(
     81               stream_exec->SynchronousMemcpy(&gpu_hdr_ptr, &pi, sizeof(float)));
     82 
     83           // Expect error on free.
     84           a.DeallocateRaw(gpu_array);
     85         },
     86         "");
     87   }
     88 }
     89 
     90 TEST(GPUDebugAllocatorTest, OverwriteDetection_Footer) {
     91   for (int s : {8, 22}) {
     92     EXPECT_DEATH(
     93         {
     94           const CudaGpuId cuda_gpu_id(0);
     95           GPUDebugAllocator a(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""),
     96                               cuda_gpu_id);
     97           auto stream_exec =
     98               GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie();
     99 
    100           std::vector<int64> cpu_array(s);
    101           memset(&cpu_array[0], 0, cpu_array.size() * sizeof(int64));
    102           int64* gpu_array = a.Allocate<int64>(cpu_array.size());
    103 
    104           gpu::DeviceMemory<int64> gpu_array_ptr{
    105               gpu::DeviceMemoryBase{gpu_array}};
    106           ASSERT_TRUE(stream_exec->SynchronousMemcpy(
    107               &gpu_array_ptr, &cpu_array[0], cpu_array.size() * sizeof(int64)));
    108 
    109           // Clobber word of the footer.
    110           gpu::DeviceMemory<int64> gpu_ftr_ptr{
    111               gpu::DeviceMemoryBase{gpu_array + s}};
    112           float pi = 3.1417;
    113           ASSERT_TRUE(
    114               stream_exec->SynchronousMemcpy(&gpu_ftr_ptr, &pi, sizeof(float)));
    115 
    116           // Expect error on free.
    117           a.DeallocateRaw(gpu_array);
    118         },
    119         "");
    120   }
    121 }
    122 
    123 TEST(GPUDebugAllocatorTest, ResetToNan) {
    124   const CudaGpuId cuda_gpu_id(0);
    125   GPUNanResetAllocator a(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""),
    126                          cuda_gpu_id);
    127   auto stream_exec = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie();
    128 
    129   std::vector<float> cpu_array(1024);
    130   std::vector<float> cpu_array_result(1024);
    131 
    132   // Allocate 1024 floats
    133   float* gpu_array = a.Allocate<float>(cpu_array.size());
    134   gpu::DeviceMemory<float> gpu_array_ptr{gpu::DeviceMemoryBase{gpu_array}};
    135   ASSERT_TRUE(stream_exec->SynchronousMemcpy(&cpu_array[0], gpu_array_ptr,
    136                                              cpu_array.size() * sizeof(float)));
    137   for (float f : cpu_array) {
    138     ASSERT_FALSE(std::isfinite(f));
    139   }
    140 
    141   // Set one of the fields to 1.0.
    142   cpu_array[0] = 1.0;
    143   ASSERT_TRUE(stream_exec->SynchronousMemcpy(&gpu_array_ptr, &cpu_array[0],
    144                                              cpu_array.size() * sizeof(float)));
    145   // Copy the data back and verify.
    146   ASSERT_TRUE(
    147       stream_exec->SynchronousMemcpy(&cpu_array_result[0], gpu_array_ptr,
    148                                      cpu_array_result.size() * sizeof(float)));
    149   ASSERT_EQ(1.0, cpu_array_result[0]);
    150 
    151   // Free the array
    152   a.DeallocateRaw(gpu_array);
    153 
    154   // All values should be reset to nan.
    155   ASSERT_TRUE(
    156       stream_exec->SynchronousMemcpy(&cpu_array_result[0], gpu_array_ptr,
    157                                      cpu_array_result.size() * sizeof(float)));
    158   for (float f : cpu_array_result) {
    159     ASSERT_FALSE(std::isfinite(f));
    160   }
    161 }
    162 
    163 TEST(GPUDebugAllocatorTest, ResetToNanWithHeaderFooter) {
    164   const CudaGpuId cuda_gpu_id(0);
    165   // NaN reset must be the outer-most allocator.
    166   GPUNanResetAllocator a(
    167       new GPUDebugAllocator(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""),
    168                             cuda_gpu_id),
    169       cuda_gpu_id);
    170   auto stream_exec = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie();
    171 
    172   std::vector<float> cpu_array(1024);
    173   std::vector<float> cpu_array_result(1024);
    174 
    175   // Allocate 1024 floats
    176   float* gpu_array = a.Allocate<float>(cpu_array.size());
    177   gpu::DeviceMemory<float> gpu_array_ptr{gpu::DeviceMemoryBase{gpu_array}};
    178   ASSERT_TRUE(stream_exec->SynchronousMemcpy(&cpu_array[0], gpu_array_ptr,
    179                                              cpu_array.size() * sizeof(float)));
    180   for (float f : cpu_array) {
    181     ASSERT_FALSE(std::isfinite(f));
    182   }
    183 
    184   // Set one of the fields to 1.0.
    185   cpu_array[0] = 1.0;
    186   ASSERT_TRUE(stream_exec->SynchronousMemcpy(&gpu_array_ptr, &cpu_array[0],
    187                                              cpu_array.size() * sizeof(float)));
    188   // Copy the data back and verify.
    189   ASSERT_TRUE(
    190       stream_exec->SynchronousMemcpy(&cpu_array_result[0], gpu_array_ptr,
    191                                      cpu_array_result.size() * sizeof(float)));
    192   ASSERT_EQ(1.0, cpu_array_result[0]);
    193 
    194   // Free the array
    195   a.DeallocateRaw(gpu_array);
    196 
    197   // All values should be reset to nan.
    198   ASSERT_TRUE(
    199       stream_exec->SynchronousMemcpy(&cpu_array_result[0], gpu_array_ptr,
    200                                      cpu_array_result.size() * sizeof(float)));
    201   for (float f : cpu_array_result) {
    202     ASSERT_FALSE(std::isfinite(f));
    203   }
    204 }
    205 
    206 TEST(GPUDebugAllocatorTest, TracksSizes) {
    207   const CudaGpuId cuda_gpu_id(0);
    208   GPUDebugAllocator a(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""),
    209                       cuda_gpu_id);
    210   EXPECT_EQ(true, a.TracksAllocationSizes());
    211 }
    212 
    213 TEST(GPUDebugAllocatorTest, AllocatedVsRequested) {
    214   const CudaGpuId cuda_gpu_id(0);
    215   GPUNanResetAllocator a(
    216       new GPUDebugAllocator(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""),
    217                             cuda_gpu_id),
    218       cuda_gpu_id);
    219   float* t1 = a.Allocate<float>(1);
    220   EXPECT_EQ(4, a.RequestedSize(t1));
    221   EXPECT_EQ(256, a.AllocatedSize(t1));
    222   a.DeallocateRaw(t1);
    223 }
    224 
    225 }  // namespace
    226 }  // namespace tensorflow
    227 
    228 #endif  // GOOGLE_CUDA
    229