1 /* 2 * Copyright 2018 Google Inc. 3 * 4 * Use of this source code is governed by a BSD-style license that can be 5 * found in the LICENSE file. 6 */ 7 8 #include "GrVkAMDMemoryAllocator.h" 9 10 #include "GrVkInterface.h" 11 #include "GrVkMemory.h" 12 #include "GrVkUtil.h" 13 14 GrVkAMDMemoryAllocator::GrVkAMDMemoryAllocator(VkPhysicalDevice physicalDevice, 15 VkDevice device, 16 sk_sp<const GrVkInterface> interface) 17 : fAllocator(VK_NULL_HANDLE) 18 , fInterface(std::move(interface)) 19 , fDevice(device) { 20 #define GR_COPY_FUNCTION(NAME) functions.vk##NAME = fInterface->fFunctions.f##NAME 21 22 VmaVulkanFunctions functions; 23 GR_COPY_FUNCTION(GetPhysicalDeviceProperties); 24 GR_COPY_FUNCTION(GetPhysicalDeviceMemoryProperties); 25 GR_COPY_FUNCTION(AllocateMemory); 26 GR_COPY_FUNCTION(FreeMemory); 27 GR_COPY_FUNCTION(MapMemory); 28 GR_COPY_FUNCTION(UnmapMemory); 29 GR_COPY_FUNCTION(BindBufferMemory); 30 GR_COPY_FUNCTION(BindImageMemory); 31 GR_COPY_FUNCTION(GetBufferMemoryRequirements); 32 GR_COPY_FUNCTION(GetImageMemoryRequirements); 33 GR_COPY_FUNCTION(CreateBuffer); 34 GR_COPY_FUNCTION(DestroyBuffer); 35 GR_COPY_FUNCTION(CreateImage); 36 GR_COPY_FUNCTION(DestroyImage); 37 38 // Skia current doesn't support VK_KHR_dedicated_allocation 39 functions.vkGetBufferMemoryRequirements2KHR = nullptr; 40 functions.vkGetImageMemoryRequirements2KHR = nullptr; 41 42 VmaAllocatorCreateInfo info; 43 info.flags = 0; 44 info.physicalDevice = physicalDevice; 45 info.device = device; 46 // Manually testing runs of dm using 64 here instead of the default 256 shows less memory usage 47 // on average. Also dm seems to run faster using 64 so it doesn't seem to be trading off speed 48 // for memory. 49 info.preferredLargeHeapBlockSize = 4*1024*1024; 50 info.pAllocationCallbacks = nullptr; 51 info.pDeviceMemoryCallbacks = nullptr; 52 info.frameInUseCount = 0; 53 info.pHeapSizeLimit = nullptr; 54 info.pVulkanFunctions = &functions; 55 56 vmaCreateAllocator(&info, &fAllocator); 57 } 58 59 GrVkAMDMemoryAllocator::~GrVkAMDMemoryAllocator() { 60 vmaDestroyAllocator(fAllocator); 61 fAllocator = VK_NULL_HANDLE; 62 } 63 64 bool GrVkAMDMemoryAllocator::allocateMemoryForImage(VkImage image, AllocationPropertyFlags flags, 65 GrVkBackendMemory* backendMemory) { 66 VmaAllocationCreateInfo info; 67 info.flags = 0; 68 info.usage = VMA_MEMORY_USAGE_UNKNOWN; 69 info.requiredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; 70 info.preferredFlags = 0; 71 info.memoryTypeBits = 0; 72 info.pool = VK_NULL_HANDLE; 73 info.pUserData = nullptr; 74 75 if (AllocationPropertyFlags::kDedicatedAllocation & flags) { 76 info.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT; 77 } 78 79 if (AllocationPropertyFlags::kLazyAllocation & flags) { 80 info.preferredFlags |= VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT; 81 } 82 83 VmaAllocation allocation; 84 VkResult result = vmaAllocateMemoryForImage(fAllocator, image, &info, &allocation, nullptr); 85 if (VK_SUCCESS != result) { 86 return false; 87 } 88 *backendMemory = (GrVkBackendMemory)allocation; 89 return true; 90 } 91 92 bool GrVkAMDMemoryAllocator::allocateMemoryForBuffer(VkBuffer buffer, BufferUsage usage, 93 AllocationPropertyFlags flags, 94 GrVkBackendMemory* backendMemory) { 95 VmaAllocationCreateInfo info; 96 info.flags = 0; 97 info.usage = VMA_MEMORY_USAGE_UNKNOWN; 98 info.memoryTypeBits = 0; 99 info.pool = VK_NULL_HANDLE; 100 info.pUserData = nullptr; 101 102 switch (usage) { 103 case BufferUsage::kGpuOnly: 104 info.requiredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; 105 info.preferredFlags = 0; 106 break; 107 case BufferUsage::kCpuOnly: 108 info.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | 109 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; 110 info.preferredFlags = VK_MEMORY_PROPERTY_HOST_CACHED_BIT; 111 break; 112 case BufferUsage::kCpuWritesGpuReads: 113 // First attempt to try memory is also cached 114 info.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | 115 VK_MEMORY_PROPERTY_HOST_CACHED_BIT; 116 info.preferredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; 117 break; 118 case BufferUsage::kGpuWritesCpuReads: 119 info.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; 120 info.preferredFlags = VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | 121 VK_MEMORY_PROPERTY_HOST_CACHED_BIT; 122 break; 123 } 124 125 if (AllocationPropertyFlags::kDedicatedAllocation & flags) { 126 info.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT; 127 } 128 129 if ((AllocationPropertyFlags::kLazyAllocation & flags) && BufferUsage::kGpuOnly == usage) { 130 info.preferredFlags |= VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT; 131 } 132 133 if (AllocationPropertyFlags::kPersistentlyMapped & flags) { 134 SkASSERT(BufferUsage::kGpuOnly != usage); 135 info.flags |= VMA_ALLOCATION_CREATE_MAPPED_BIT; 136 } 137 138 VmaAllocation allocation; 139 VkResult result = vmaAllocateMemoryForBuffer(fAllocator, buffer, &info, &allocation, nullptr); 140 if (VK_SUCCESS != result) { 141 if (usage == BufferUsage::kCpuWritesGpuReads) { 142 // We try again but this time drop the requirement for cached 143 info.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; 144 result = vmaAllocateMemoryForBuffer(fAllocator, buffer, &info, &allocation, nullptr); 145 } 146 } 147 if (VK_SUCCESS != result) { 148 return false; 149 } 150 151 *backendMemory = (GrVkBackendMemory)allocation; 152 return true; 153 } 154 155 void GrVkAMDMemoryAllocator::freeMemory(const GrVkBackendMemory& memoryHandle) { 156 const VmaAllocation allocation = (const VmaAllocation)memoryHandle; 157 vmaFreeMemory(fAllocator, allocation); 158 } 159 160 void GrVkAMDMemoryAllocator::getAllocInfo(const GrVkBackendMemory& memoryHandle, 161 GrVkAlloc* alloc) const { 162 const VmaAllocation allocation = (const VmaAllocation)memoryHandle; 163 VmaAllocationInfo vmaInfo; 164 vmaGetAllocationInfo(fAllocator, allocation, &vmaInfo); 165 166 VkMemoryPropertyFlags memFlags; 167 vmaGetMemoryTypeProperties(fAllocator, vmaInfo.memoryType, &memFlags); 168 169 uint32_t flags = 0; 170 if (VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT & memFlags) { 171 flags |= GrVkAlloc::kMappable_Flag; 172 } 173 if (!SkToBool(VK_MEMORY_PROPERTY_HOST_COHERENT_BIT & memFlags)) { 174 flags |= GrVkAlloc::kNoncoherent_Flag; 175 } 176 177 alloc->fMemory = vmaInfo.deviceMemory; 178 alloc->fOffset = vmaInfo.offset; 179 alloc->fSize = vmaInfo.size; 180 alloc->fFlags = flags; 181 alloc->fBackendMemory = memoryHandle; 182 183 // TODO: Remove this hack once the AMD allocator is able to handle the alignment of noncoherent 184 // memory itself. 185 if (!SkToBool(VK_MEMORY_PROPERTY_HOST_COHERENT_BIT & memFlags)) { 186 // This is a hack to say that the allocation size is actually larger than it is. This is to 187 // make sure when we are flushing and invalidating noncoherent memory we have a size that is 188 // aligned to the nonCoherentAtomSize. This is safe for three reasons. First the total size 189 // of the VkDeviceMemory we allocate will always be a multple of the max possible alignment 190 // (currently 256). Second all sub allocations are alignmed with an offset of 256. And 191 // finally the allocator we are using always maps the entire VkDeviceMemory so the range 192 // we'll be flushing/invalidating will be mapped. So our new fake allocation size will 193 // always fit into the VkDeviceMemory, will never push it into another suballocation, and 194 // will always be mapped when map is called. 195 const VkPhysicalDeviceProperties* devProps; 196 vmaGetPhysicalDeviceProperties(fAllocator, &devProps); 197 VkDeviceSize alignment = devProps->limits.nonCoherentAtomSize; 198 199 alloc->fSize = (alloc->fSize + alignment - 1) & ~(alignment -1); 200 } 201 } 202 203 void* GrVkAMDMemoryAllocator::mapMemory(const GrVkBackendMemory& memoryHandle) { 204 const VmaAllocation allocation = (const VmaAllocation)memoryHandle; 205 void* mapPtr; 206 vmaMapMemory(fAllocator, allocation, &mapPtr); 207 return mapPtr; 208 } 209 210 void GrVkAMDMemoryAllocator::unmapMemory(const GrVkBackendMemory& memoryHandle) { 211 const VmaAllocation allocation = (const VmaAllocation)memoryHandle; 212 vmaUnmapMemory(fAllocator, allocation); 213 } 214 215 void GrVkAMDMemoryAllocator::flushMappedMemory(const GrVkBackendMemory& memoryHandle, 216 VkDeviceSize offset, VkDeviceSize size) { 217 GrVkAlloc info; 218 this->getAllocInfo(memoryHandle, &info); 219 220 if (GrVkAlloc::kNoncoherent_Flag & info.fFlags) { 221 // We need to store the nonCoherentAtomSize for non-coherent flush/invalidate alignment. 222 const VkPhysicalDeviceProperties* physDevProps; 223 vmaGetPhysicalDeviceProperties(fAllocator, &physDevProps); 224 VkDeviceSize alignment = physDevProps->limits.nonCoherentAtomSize; 225 226 VkMappedMemoryRange mappedMemoryRange; 227 GrVkMemory::GetNonCoherentMappedMemoryRange(info, offset, size, alignment, 228 &mappedMemoryRange); 229 GR_VK_CALL(fInterface, FlushMappedMemoryRanges(fDevice, 1, &mappedMemoryRange)); 230 } 231 } 232 233 void GrVkAMDMemoryAllocator::invalidateMappedMemory(const GrVkBackendMemory& memoryHandle, 234 VkDeviceSize offset, VkDeviceSize size) { 235 GrVkAlloc info; 236 this->getAllocInfo(memoryHandle, &info); 237 238 if (GrVkAlloc::kNoncoherent_Flag & info.fFlags) { 239 // We need to store the nonCoherentAtomSize for non-coherent flush/invalidate alignment. 240 const VkPhysicalDeviceProperties* physDevProps; 241 vmaGetPhysicalDeviceProperties(fAllocator, &physDevProps); 242 VkDeviceSize alignment = physDevProps->limits.nonCoherentAtomSize; 243 244 VkMappedMemoryRange mappedMemoryRange; 245 GrVkMemory::GetNonCoherentMappedMemoryRange(info, offset, size, alignment, 246 &mappedMemoryRange); 247 GR_VK_CALL(fInterface, InvalidateMappedMemoryRanges(fDevice, 1, &mappedMemoryRange)); 248 } 249 } 250 251 uint64_t GrVkAMDMemoryAllocator::totalUsedMemory() const { 252 VmaStats stats; 253 vmaCalculateStats(fAllocator, &stats); 254 return stats.total.usedBytes; 255 } 256 257 uint64_t GrVkAMDMemoryAllocator::totalAllocatedMemory() const { 258 VmaStats stats; 259 vmaCalculateStats(fAllocator, &stats); 260 return stats.total.usedBytes + stats.total.unusedBytes; 261 } 262 263