Home | History | Annotate | Download | only in vk
      1 /*
      2  * Copyright 2018 Google Inc.
      3  *
      4  * Use of this source code is governed by a BSD-style license that can be
      5  * found in the LICENSE file.
      6  */
      7 
      8 #include "GrVkAMDMemoryAllocator.h"
      9 
     10 #include "GrVkInterface.h"
     11 #include "GrVkMemory.h"
     12 #include "GrVkUtil.h"
     13 
     14 GrVkAMDMemoryAllocator::GrVkAMDMemoryAllocator(VkPhysicalDevice physicalDevice,
     15                                                VkDevice device,
     16                                                sk_sp<const GrVkInterface> interface)
     17         : fAllocator(VK_NULL_HANDLE)
     18         , fInterface(std::move(interface))
     19         , fDevice(device) {
     20 #define GR_COPY_FUNCTION(NAME) functions.vk##NAME = fInterface->fFunctions.f##NAME
     21 
     22     VmaVulkanFunctions functions;
     23     GR_COPY_FUNCTION(GetPhysicalDeviceProperties);
     24     GR_COPY_FUNCTION(GetPhysicalDeviceMemoryProperties);
     25     GR_COPY_FUNCTION(AllocateMemory);
     26     GR_COPY_FUNCTION(FreeMemory);
     27     GR_COPY_FUNCTION(MapMemory);
     28     GR_COPY_FUNCTION(UnmapMemory);
     29     GR_COPY_FUNCTION(BindBufferMemory);
     30     GR_COPY_FUNCTION(BindImageMemory);
     31     GR_COPY_FUNCTION(GetBufferMemoryRequirements);
     32     GR_COPY_FUNCTION(GetImageMemoryRequirements);
     33     GR_COPY_FUNCTION(CreateBuffer);
     34     GR_COPY_FUNCTION(DestroyBuffer);
     35     GR_COPY_FUNCTION(CreateImage);
     36     GR_COPY_FUNCTION(DestroyImage);
     37 
     38     // Skia current doesn't support VK_KHR_dedicated_allocation
     39     functions.vkGetBufferMemoryRequirements2KHR = nullptr;
     40     functions.vkGetImageMemoryRequirements2KHR = nullptr;
     41 
     42     VmaAllocatorCreateInfo info;
     43     info.flags = 0;
     44     info.physicalDevice = physicalDevice;
     45     info.device = device;
     46     // Manually testing runs of dm using 64 here instead of the default 256 shows less memory usage
     47     // on average. Also dm seems to run faster using 64 so it doesn't seem to be trading off speed
     48     // for memory.
     49     info.preferredLargeHeapBlockSize = 4*1024*1024;
     50     info.pAllocationCallbacks = nullptr;
     51     info.pDeviceMemoryCallbacks = nullptr;
     52     info.frameInUseCount = 0;
     53     info.pHeapSizeLimit = nullptr;
     54     info.pVulkanFunctions = &functions;
     55 
     56     vmaCreateAllocator(&info, &fAllocator);
     57 }
     58 
     59 GrVkAMDMemoryAllocator::~GrVkAMDMemoryAllocator() {
     60     vmaDestroyAllocator(fAllocator);
     61     fAllocator = VK_NULL_HANDLE;
     62 }
     63 
     64 bool GrVkAMDMemoryAllocator::allocateMemoryForImage(VkImage image, AllocationPropertyFlags flags,
     65                                                     GrVkBackendMemory* backendMemory) {
     66     VmaAllocationCreateInfo info;
     67     info.flags = 0;
     68     info.usage = VMA_MEMORY_USAGE_UNKNOWN;
     69     info.requiredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
     70     info.preferredFlags = 0;
     71     info.memoryTypeBits = 0;
     72     info.pool = VK_NULL_HANDLE;
     73     info.pUserData = nullptr;
     74 
     75     if (AllocationPropertyFlags::kDedicatedAllocation & flags) {
     76         info.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT;
     77     }
     78 
     79     if (AllocationPropertyFlags::kLazyAllocation & flags) {
     80         info.preferredFlags |= VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT;
     81     }
     82 
     83     VmaAllocation allocation;
     84     VkResult result = vmaAllocateMemoryForImage(fAllocator, image, &info, &allocation, nullptr);
     85     if (VK_SUCCESS != result) {
     86         return false;
     87     }
     88     *backendMemory = (GrVkBackendMemory)allocation;
     89     return true;
     90 }
     91 
     92 bool GrVkAMDMemoryAllocator::allocateMemoryForBuffer(VkBuffer buffer, BufferUsage usage,
     93                                                      AllocationPropertyFlags flags,
     94                                                      GrVkBackendMemory* backendMemory) {
     95     VmaAllocationCreateInfo info;
     96     info.flags = 0;
     97     info.usage = VMA_MEMORY_USAGE_UNKNOWN;
     98     info.memoryTypeBits = 0;
     99     info.pool = VK_NULL_HANDLE;
    100     info.pUserData = nullptr;
    101 
    102     switch (usage) {
    103         case BufferUsage::kGpuOnly:
    104             info.requiredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
    105             info.preferredFlags = 0;
    106             break;
    107         case BufferUsage::kCpuOnly:
    108             info.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
    109                                  VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
    110             info.preferredFlags = VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
    111             break;
    112         case BufferUsage::kCpuWritesGpuReads:
    113             // First attempt to try memory is also cached
    114             info.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
    115                                  VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
    116             info.preferredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
    117             break;
    118         case BufferUsage::kGpuWritesCpuReads:
    119             info.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
    120             info.preferredFlags = VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
    121                                   VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
    122             break;
    123     }
    124 
    125     if (AllocationPropertyFlags::kDedicatedAllocation & flags) {
    126         info.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT;
    127     }
    128 
    129     if ((AllocationPropertyFlags::kLazyAllocation & flags) && BufferUsage::kGpuOnly == usage) {
    130         info.preferredFlags |= VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT;
    131     }
    132 
    133     if (AllocationPropertyFlags::kPersistentlyMapped & flags) {
    134         SkASSERT(BufferUsage::kGpuOnly != usage);
    135         info.flags |= VMA_ALLOCATION_CREATE_MAPPED_BIT;
    136     }
    137 
    138     VmaAllocation allocation;
    139     VkResult result = vmaAllocateMemoryForBuffer(fAllocator, buffer, &info, &allocation, nullptr);
    140     if (VK_SUCCESS != result) {
    141         if (usage == BufferUsage::kCpuWritesGpuReads) {
    142             // We try again but this time drop the requirement for cached
    143             info.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
    144             result = vmaAllocateMemoryForBuffer(fAllocator, buffer, &info, &allocation, nullptr);
    145         }
    146     }
    147     if (VK_SUCCESS != result) {
    148         return false;
    149     }
    150 
    151     *backendMemory = (GrVkBackendMemory)allocation;
    152     return true;
    153 }
    154 
    155 void GrVkAMDMemoryAllocator::freeMemory(const GrVkBackendMemory& memoryHandle) {
    156     const VmaAllocation allocation = (const VmaAllocation)memoryHandle;
    157     vmaFreeMemory(fAllocator, allocation);
    158 }
    159 
    160 void GrVkAMDMemoryAllocator::getAllocInfo(const GrVkBackendMemory& memoryHandle,
    161                                           GrVkAlloc* alloc) const {
    162     const VmaAllocation allocation = (const VmaAllocation)memoryHandle;
    163     VmaAllocationInfo vmaInfo;
    164     vmaGetAllocationInfo(fAllocator, allocation, &vmaInfo);
    165 
    166     VkMemoryPropertyFlags memFlags;
    167     vmaGetMemoryTypeProperties(fAllocator, vmaInfo.memoryType, &memFlags);
    168 
    169     uint32_t flags = 0;
    170     if (VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT & memFlags) {
    171         flags |= GrVkAlloc::kMappable_Flag;
    172     }
    173     if (!SkToBool(VK_MEMORY_PROPERTY_HOST_COHERENT_BIT & memFlags)) {
    174         flags |= GrVkAlloc::kNoncoherent_Flag;
    175     }
    176 
    177     alloc->fMemory        = vmaInfo.deviceMemory;
    178     alloc->fOffset        = vmaInfo.offset;
    179     alloc->fSize          = vmaInfo.size;
    180     alloc->fFlags         = flags;
    181     alloc->fBackendMemory = memoryHandle;
    182 
    183     // TODO: Remove this hack once the AMD allocator is able to handle the alignment of noncoherent
    184     // memory itself.
    185     if (!SkToBool(VK_MEMORY_PROPERTY_HOST_COHERENT_BIT & memFlags)) {
    186         // This is a hack to say that the allocation size is actually larger than it is. This is to
    187         // make sure when we are flushing and invalidating noncoherent memory we have a size that is
    188         // aligned to the nonCoherentAtomSize. This is safe for three reasons. First the total size
    189         // of the VkDeviceMemory we allocate will always be a multple of the max possible alignment
    190         // (currently 256). Second all sub allocations are alignmed with an offset of 256. And
    191         // finally the allocator we are using always maps the entire VkDeviceMemory so the range
    192         // we'll be flushing/invalidating will be mapped. So our new fake allocation size will
    193         // always fit into the VkDeviceMemory, will never push it into another suballocation, and
    194         // will always be mapped when map is called.
    195         const VkPhysicalDeviceProperties* devProps;
    196         vmaGetPhysicalDeviceProperties(fAllocator, &devProps);
    197         VkDeviceSize alignment = devProps->limits.nonCoherentAtomSize;
    198 
    199         alloc->fSize = (alloc->fSize + alignment - 1) & ~(alignment -1);
    200     }
    201 }
    202 
    203 void* GrVkAMDMemoryAllocator::mapMemory(const GrVkBackendMemory& memoryHandle) {
    204     const VmaAllocation allocation = (const VmaAllocation)memoryHandle;
    205     void* mapPtr;
    206     vmaMapMemory(fAllocator, allocation, &mapPtr);
    207     return mapPtr;
    208 }
    209 
    210 void GrVkAMDMemoryAllocator::unmapMemory(const GrVkBackendMemory& memoryHandle) {
    211     const VmaAllocation allocation = (const VmaAllocation)memoryHandle;
    212     vmaUnmapMemory(fAllocator, allocation);
    213 }
    214 
    215 void GrVkAMDMemoryAllocator::flushMappedMemory(const GrVkBackendMemory& memoryHandle,
    216                                                VkDeviceSize offset, VkDeviceSize size) {
    217     GrVkAlloc info;
    218     this->getAllocInfo(memoryHandle, &info);
    219 
    220     if (GrVkAlloc::kNoncoherent_Flag & info.fFlags) {
    221         // We need to store the nonCoherentAtomSize for non-coherent flush/invalidate alignment.
    222         const VkPhysicalDeviceProperties* physDevProps;
    223         vmaGetPhysicalDeviceProperties(fAllocator, &physDevProps);
    224         VkDeviceSize alignment = physDevProps->limits.nonCoherentAtomSize;
    225 
    226         VkMappedMemoryRange mappedMemoryRange;
    227         GrVkMemory::GetNonCoherentMappedMemoryRange(info, offset, size, alignment,
    228                                                     &mappedMemoryRange);
    229         GR_VK_CALL(fInterface, FlushMappedMemoryRanges(fDevice, 1, &mappedMemoryRange));
    230     }
    231 }
    232 
    233 void GrVkAMDMemoryAllocator::invalidateMappedMemory(const GrVkBackendMemory& memoryHandle,
    234                                                     VkDeviceSize offset, VkDeviceSize size) {
    235     GrVkAlloc info;
    236     this->getAllocInfo(memoryHandle, &info);
    237 
    238     if (GrVkAlloc::kNoncoherent_Flag & info.fFlags) {
    239         // We need to store the nonCoherentAtomSize for non-coherent flush/invalidate alignment.
    240         const VkPhysicalDeviceProperties* physDevProps;
    241         vmaGetPhysicalDeviceProperties(fAllocator, &physDevProps);
    242         VkDeviceSize alignment = physDevProps->limits.nonCoherentAtomSize;
    243 
    244         VkMappedMemoryRange mappedMemoryRange;
    245         GrVkMemory::GetNonCoherentMappedMemoryRange(info, offset, size, alignment,
    246                                                     &mappedMemoryRange);
    247         GR_VK_CALL(fInterface, InvalidateMappedMemoryRanges(fDevice, 1, &mappedMemoryRange));
    248     }
    249 }
    250 
    251 uint64_t GrVkAMDMemoryAllocator::totalUsedMemory() const {
    252     VmaStats stats;
    253     vmaCalculateStats(fAllocator, &stats);
    254     return stats.total.usedBytes;
    255 }
    256 
    257 uint64_t GrVkAMDMemoryAllocator::totalAllocatedMemory() const {
    258     VmaStats stats;
    259     vmaCalculateStats(fAllocator, &stats);
    260     return stats.total.usedBytes + stats.total.unusedBytes;
    261 }
    262 
    263