1 /* 2 * Copyright 2016 Red Hat. 3 * Copyright 2016 Bas Nieuwenhuizen 4 * 5 * based in part on anv driver which is: 6 * Copyright 2015 Intel Corporation 7 * 8 * Permission is hereby granted, free of charge, to any person obtaining a 9 * copy of this software and associated documentation files (the "Software"), 10 * to deal in the Software without restriction, including without limitation 11 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 12 * and/or sell copies of the Software, and to permit persons to whom the 13 * Software is furnished to do so, subject to the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the next 16 * paragraph) shall be included in all copies or substantial portions of the 17 * Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 25 * IN THE SOFTWARE. 26 */ 27 28 #include "radv_debug.h" 29 #include "radv_private.h" 30 #include "vk_format.h" 31 #include "vk_util.h" 32 #include "radv_radeon_winsys.h" 33 #include "sid.h" 34 #include "gfx9d.h" 35 #include "util/debug.h" 36 #include "util/u_atomic.h" 37 static unsigned 38 radv_choose_tiling(struct radv_device *device, 39 const struct radv_image_create_info *create_info) 40 { 41 const VkImageCreateInfo *pCreateInfo = create_info->vk_info; 42 43 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) { 44 assert(pCreateInfo->samples <= 1); 45 return RADEON_SURF_MODE_LINEAR_ALIGNED; 46 } 47 48 if (!vk_format_is_compressed(pCreateInfo->format) && 49 !vk_format_is_depth_or_stencil(pCreateInfo->format) 50 && device->physical_device->rad_info.chip_class <= VI) { 51 /* this causes hangs in some VK CTS tests on GFX9. */ 52 /* Textures with a very small height are recommended to be linear. */ 53 if (pCreateInfo->imageType == VK_IMAGE_TYPE_1D || 54 /* Only very thin and long 2D textures should benefit from 55 * linear_aligned. */ 56 (pCreateInfo->extent.width > 8 && pCreateInfo->extent.height <= 2)) 57 return RADEON_SURF_MODE_LINEAR_ALIGNED; 58 } 59 60 /* MSAA resources must be 2D tiled. */ 61 if (pCreateInfo->samples > 1) 62 return RADEON_SURF_MODE_2D; 63 64 return RADEON_SURF_MODE_2D; 65 } 66 static int 67 radv_init_surface(struct radv_device *device, 68 struct radeon_surf *surface, 69 const struct radv_image_create_info *create_info) 70 { 71 const VkImageCreateInfo *pCreateInfo = create_info->vk_info; 72 unsigned array_mode = radv_choose_tiling(device, create_info); 73 const struct vk_format_description *desc = 74 vk_format_description(pCreateInfo->format); 75 bool is_depth, is_stencil, blendable; 76 77 is_depth = vk_format_has_depth(desc); 78 is_stencil = vk_format_has_stencil(desc); 79 80 surface->blk_w = vk_format_get_blockwidth(pCreateInfo->format); 81 surface->blk_h = vk_format_get_blockheight(pCreateInfo->format); 82 83 surface->bpe = vk_format_get_blocksize(vk_format_depth_only(pCreateInfo->format)); 84 /* align byte per element on dword */ 85 if (surface->bpe == 3) { 86 surface->bpe = 4; 87 } 88 surface->flags = RADEON_SURF_SET(array_mode, MODE); 89 90 switch (pCreateInfo->imageType){ 91 case VK_IMAGE_TYPE_1D: 92 if (pCreateInfo->arrayLayers > 1) 93 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D_ARRAY, TYPE); 94 else 95 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D, TYPE); 96 break; 97 case VK_IMAGE_TYPE_2D: 98 if (pCreateInfo->arrayLayers > 1) 99 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D_ARRAY, TYPE); 100 else 101 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D, TYPE); 102 break; 103 case VK_IMAGE_TYPE_3D: 104 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_3D, TYPE); 105 break; 106 default: 107 unreachable("unhandled image type"); 108 } 109 110 if (is_depth) { 111 surface->flags |= RADEON_SURF_ZBUFFER; 112 if (!(pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT) && 113 !(pCreateInfo->flags & (VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT | 114 VK_IMAGE_CREATE_EXTENDED_USAGE_BIT_KHR)) && 115 pCreateInfo->tiling != VK_IMAGE_TILING_LINEAR && 116 pCreateInfo->mipLevels <= 1 && 117 device->physical_device->rad_info.chip_class >= VI && 118 ((pCreateInfo->format == VK_FORMAT_D32_SFLOAT || 119 /* for some reason TC compat with 2/4/8 samples breaks some cts tests - disable for now */ 120 (pCreateInfo->samples < 2 && pCreateInfo->format == VK_FORMAT_D32_SFLOAT_S8_UINT)) || 121 (device->physical_device->rad_info.chip_class >= GFX9 && 122 pCreateInfo->format == VK_FORMAT_D16_UNORM))) 123 surface->flags |= RADEON_SURF_TC_COMPATIBLE_HTILE; 124 } 125 126 if (is_stencil) 127 surface->flags |= RADEON_SURF_SBUFFER; 128 129 surface->flags |= RADEON_SURF_OPTIMIZE_FOR_SPACE; 130 131 bool dcc_compatible_formats = radv_is_colorbuffer_format_supported(pCreateInfo->format, &blendable); 132 if (pCreateInfo->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) { 133 const struct VkImageFormatListCreateInfoKHR *format_list = 134 (const struct VkImageFormatListCreateInfoKHR *) 135 vk_find_struct_const(pCreateInfo->pNext, 136 IMAGE_FORMAT_LIST_CREATE_INFO_KHR); 137 138 /* We have to ignore the existence of the list if viewFormatCount = 0 */ 139 if (format_list && format_list->viewFormatCount) { 140 /* compatibility is transitive, so we only need to check 141 * one format with everything else. */ 142 for (unsigned i = 0; i < format_list->viewFormatCount; ++i) { 143 if (!radv_dcc_formats_compatible(pCreateInfo->format, 144 format_list->pViewFormats[i])) 145 dcc_compatible_formats = false; 146 } 147 } else { 148 dcc_compatible_formats = false; 149 } 150 } 151 152 if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT) || 153 (pCreateInfo->flags & VK_IMAGE_CREATE_EXTENDED_USAGE_BIT_KHR) || 154 !dcc_compatible_formats || 155 (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) || 156 pCreateInfo->mipLevels > 1 || pCreateInfo->arrayLayers > 1 || 157 device->physical_device->rad_info.chip_class < VI || 158 create_info->scanout || (device->instance->debug_flags & RADV_DEBUG_NO_DCC) || 159 pCreateInfo->samples >= 2) 160 surface->flags |= RADEON_SURF_DISABLE_DCC; 161 if (create_info->scanout) 162 surface->flags |= RADEON_SURF_SCANOUT; 163 return 0; 164 } 165 166 static uint32_t si_get_bo_metadata_word1(struct radv_device *device) 167 { 168 return (ATI_VENDOR_ID << 16) | device->physical_device->rad_info.pci_id; 169 } 170 171 static inline unsigned 172 si_tile_mode_index(const struct radv_image *image, unsigned level, bool stencil) 173 { 174 if (stencil) 175 return image->surface.u.legacy.stencil_tiling_index[level]; 176 else 177 return image->surface.u.legacy.tiling_index[level]; 178 } 179 180 static unsigned radv_map_swizzle(unsigned swizzle) 181 { 182 switch (swizzle) { 183 case VK_SWIZZLE_Y: 184 return V_008F0C_SQ_SEL_Y; 185 case VK_SWIZZLE_Z: 186 return V_008F0C_SQ_SEL_Z; 187 case VK_SWIZZLE_W: 188 return V_008F0C_SQ_SEL_W; 189 case VK_SWIZZLE_0: 190 return V_008F0C_SQ_SEL_0; 191 case VK_SWIZZLE_1: 192 return V_008F0C_SQ_SEL_1; 193 default: /* VK_SWIZZLE_X */ 194 return V_008F0C_SQ_SEL_X; 195 } 196 } 197 198 static void 199 radv_make_buffer_descriptor(struct radv_device *device, 200 struct radv_buffer *buffer, 201 VkFormat vk_format, 202 unsigned offset, 203 unsigned range, 204 uint32_t *state) 205 { 206 const struct vk_format_description *desc; 207 unsigned stride; 208 uint64_t gpu_address = radv_buffer_get_va(buffer->bo); 209 uint64_t va = gpu_address + buffer->offset; 210 unsigned num_format, data_format; 211 int first_non_void; 212 desc = vk_format_description(vk_format); 213 first_non_void = vk_format_get_first_non_void_channel(vk_format); 214 stride = desc->block.bits / 8; 215 216 num_format = radv_translate_buffer_numformat(desc, first_non_void); 217 data_format = radv_translate_buffer_dataformat(desc, first_non_void); 218 219 va += offset; 220 state[0] = va; 221 state[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | 222 S_008F04_STRIDE(stride); 223 224 if (device->physical_device->rad_info.chip_class != VI && stride) { 225 range /= stride; 226 } 227 228 state[2] = range; 229 state[3] = S_008F0C_DST_SEL_X(radv_map_swizzle(desc->swizzle[0])) | 230 S_008F0C_DST_SEL_Y(radv_map_swizzle(desc->swizzle[1])) | 231 S_008F0C_DST_SEL_Z(radv_map_swizzle(desc->swizzle[2])) | 232 S_008F0C_DST_SEL_W(radv_map_swizzle(desc->swizzle[3])) | 233 S_008F0C_NUM_FORMAT(num_format) | 234 S_008F0C_DATA_FORMAT(data_format); 235 } 236 237 static void 238 si_set_mutable_tex_desc_fields(struct radv_device *device, 239 struct radv_image *image, 240 const struct legacy_surf_level *base_level_info, 241 unsigned base_level, unsigned first_level, 242 unsigned block_width, bool is_stencil, 243 bool is_storage_image, uint32_t *state) 244 { 245 uint64_t gpu_address = image->bo ? radv_buffer_get_va(image->bo) + image->offset : 0; 246 uint64_t va = gpu_address; 247 enum chip_class chip_class = device->physical_device->rad_info.chip_class; 248 uint64_t meta_va = 0; 249 if (chip_class >= GFX9) { 250 if (is_stencil) 251 va += image->surface.u.gfx9.stencil_offset; 252 else 253 va += image->surface.u.gfx9.surf_offset; 254 } else 255 va += base_level_info->offset; 256 257 state[0] = va >> 8; 258 if (chip_class >= GFX9 || 259 base_level_info->mode == RADEON_SURF_MODE_2D) 260 state[0] |= image->surface.tile_swizzle; 261 state[1] &= C_008F14_BASE_ADDRESS_HI; 262 state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40); 263 264 if (chip_class >= VI) { 265 state[6] &= C_008F28_COMPRESSION_EN; 266 state[7] = 0; 267 if (!is_storage_image && radv_vi_dcc_enabled(image, first_level)) { 268 meta_va = gpu_address + image->dcc_offset; 269 if (chip_class <= VI) 270 meta_va += base_level_info->dcc_offset; 271 } else if(!is_storage_image && image->tc_compatible_htile && 272 image->surface.htile_size) { 273 meta_va = gpu_address + image->htile_offset; 274 } 275 276 if (meta_va) { 277 state[6] |= S_008F28_COMPRESSION_EN(1); 278 state[7] = meta_va >> 8; 279 state[7] |= image->surface.tile_swizzle; 280 } 281 } 282 283 if (chip_class >= GFX9) { 284 state[3] &= C_008F1C_SW_MODE; 285 state[4] &= C_008F20_PITCH_GFX9; 286 287 if (is_stencil) { 288 state[3] |= S_008F1C_SW_MODE(image->surface.u.gfx9.stencil.swizzle_mode); 289 state[4] |= S_008F20_PITCH_GFX9(image->surface.u.gfx9.stencil.epitch); 290 } else { 291 state[3] |= S_008F1C_SW_MODE(image->surface.u.gfx9.surf.swizzle_mode); 292 state[4] |= S_008F20_PITCH_GFX9(image->surface.u.gfx9.surf.epitch); 293 } 294 295 state[5] &= C_008F24_META_DATA_ADDRESS & 296 C_008F24_META_PIPE_ALIGNED & 297 C_008F24_META_RB_ALIGNED; 298 if (meta_va) { 299 struct gfx9_surf_meta_flags meta; 300 301 if (image->dcc_offset) 302 meta = image->surface.u.gfx9.dcc; 303 else 304 meta = image->surface.u.gfx9.htile; 305 306 state[5] |= S_008F24_META_DATA_ADDRESS(meta_va >> 40) | 307 S_008F24_META_PIPE_ALIGNED(meta.pipe_aligned) | 308 S_008F24_META_RB_ALIGNED(meta.rb_aligned); 309 } 310 } else { 311 /* SI-CI-VI */ 312 unsigned pitch = base_level_info->nblk_x * block_width; 313 unsigned index = si_tile_mode_index(image, base_level, is_stencil); 314 315 state[3] &= C_008F1C_TILING_INDEX; 316 state[3] |= S_008F1C_TILING_INDEX(index); 317 state[4] &= C_008F20_PITCH_GFX6; 318 state[4] |= S_008F20_PITCH_GFX6(pitch - 1); 319 } 320 } 321 322 static unsigned radv_tex_dim(VkImageType image_type, VkImageViewType view_type, 323 unsigned nr_layers, unsigned nr_samples, bool is_storage_image, bool gfx9) 324 { 325 if (view_type == VK_IMAGE_VIEW_TYPE_CUBE || view_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY) 326 return is_storage_image ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_CUBE; 327 328 /* GFX9 allocates 1D textures as 2D. */ 329 if (gfx9 && image_type == VK_IMAGE_TYPE_1D) 330 image_type = VK_IMAGE_TYPE_2D; 331 switch (image_type) { 332 case VK_IMAGE_TYPE_1D: 333 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_1D_ARRAY : V_008F1C_SQ_RSRC_IMG_1D; 334 case VK_IMAGE_TYPE_2D: 335 if (nr_samples > 1) 336 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY : V_008F1C_SQ_RSRC_IMG_2D_MSAA; 337 else 338 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_2D; 339 case VK_IMAGE_TYPE_3D: 340 if (view_type == VK_IMAGE_VIEW_TYPE_3D) 341 return V_008F1C_SQ_RSRC_IMG_3D; 342 else 343 return V_008F1C_SQ_RSRC_IMG_2D_ARRAY; 344 default: 345 unreachable("illegale image type"); 346 } 347 } 348 349 static unsigned gfx9_border_color_swizzle(const enum vk_swizzle swizzle[4]) 350 { 351 unsigned bc_swizzle = V_008F20_BC_SWIZZLE_XYZW; 352 353 if (swizzle[3] == VK_SWIZZLE_X) { 354 /* For the pre-defined border color values (white, opaque 355 * black, transparent black), the only thing that matters is 356 * that the alpha channel winds up in the correct place 357 * (because the RGB channels are all the same) so either of 358 * these enumerations will work. 359 */ 360 if (swizzle[2] == VK_SWIZZLE_Y) 361 bc_swizzle = V_008F20_BC_SWIZZLE_WZYX; 362 else 363 bc_swizzle = V_008F20_BC_SWIZZLE_WXYZ; 364 } else if (swizzle[0] == VK_SWIZZLE_X) { 365 if (swizzle[1] == VK_SWIZZLE_Y) 366 bc_swizzle = V_008F20_BC_SWIZZLE_XYZW; 367 else 368 bc_swizzle = V_008F20_BC_SWIZZLE_XWYZ; 369 } else if (swizzle[1] == VK_SWIZZLE_X) { 370 bc_swizzle = V_008F20_BC_SWIZZLE_YXWZ; 371 } else if (swizzle[2] == VK_SWIZZLE_X) { 372 bc_swizzle = V_008F20_BC_SWIZZLE_ZYXW; 373 } 374 375 return bc_swizzle; 376 } 377 378 /** 379 * Build the sampler view descriptor for a texture. 380 */ 381 static void 382 si_make_texture_descriptor(struct radv_device *device, 383 struct radv_image *image, 384 bool is_storage_image, 385 VkImageViewType view_type, 386 VkFormat vk_format, 387 const VkComponentMapping *mapping, 388 unsigned first_level, unsigned last_level, 389 unsigned first_layer, unsigned last_layer, 390 unsigned width, unsigned height, unsigned depth, 391 uint32_t *state, 392 uint32_t *fmask_state) 393 { 394 const struct vk_format_description *desc; 395 enum vk_swizzle swizzle[4]; 396 int first_non_void; 397 unsigned num_format, data_format, type; 398 399 desc = vk_format_description(vk_format); 400 401 if (desc->colorspace == VK_FORMAT_COLORSPACE_ZS) { 402 const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0}; 403 vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle); 404 } else { 405 vk_format_compose_swizzles(mapping, desc->swizzle, swizzle); 406 } 407 408 first_non_void = vk_format_get_first_non_void_channel(vk_format); 409 410 num_format = radv_translate_tex_numformat(vk_format, desc, first_non_void); 411 if (num_format == ~0) { 412 num_format = 0; 413 } 414 415 data_format = radv_translate_tex_dataformat(vk_format, desc, first_non_void); 416 if (data_format == ~0) { 417 data_format = 0; 418 } 419 420 /* S8 with either Z16 or Z32 HTILE need a special format. */ 421 if (device->physical_device->rad_info.chip_class >= GFX9 && 422 vk_format == VK_FORMAT_S8_UINT && 423 image->tc_compatible_htile) { 424 if (image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT) 425 data_format = V_008F14_IMG_DATA_FORMAT_S8_32; 426 else if (image->vk_format == VK_FORMAT_D16_UNORM_S8_UINT) 427 data_format = V_008F14_IMG_DATA_FORMAT_S8_16; 428 } 429 type = radv_tex_dim(image->type, view_type, image->info.array_size, image->info.samples, 430 is_storage_image, device->physical_device->rad_info.chip_class >= GFX9); 431 if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) { 432 height = 1; 433 depth = image->info.array_size; 434 } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY || 435 type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) { 436 if (view_type != VK_IMAGE_VIEW_TYPE_3D) 437 depth = image->info.array_size; 438 } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE) 439 depth = image->info.array_size / 6; 440 441 state[0] = 0; 442 state[1] = (S_008F14_DATA_FORMAT_GFX6(data_format) | 443 S_008F14_NUM_FORMAT_GFX6(num_format)); 444 state[2] = (S_008F18_WIDTH(width - 1) | 445 S_008F18_HEIGHT(height - 1) | 446 S_008F18_PERF_MOD(4)); 447 state[3] = (S_008F1C_DST_SEL_X(radv_map_swizzle(swizzle[0])) | 448 S_008F1C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) | 449 S_008F1C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) | 450 S_008F1C_DST_SEL_W(radv_map_swizzle(swizzle[3])) | 451 S_008F1C_BASE_LEVEL(image->info.samples > 1 ? 452 0 : first_level) | 453 S_008F1C_LAST_LEVEL(image->info.samples > 1 ? 454 util_logbase2(image->info.samples) : 455 last_level) | 456 S_008F1C_TYPE(type)); 457 state[4] = 0; 458 state[5] = S_008F24_BASE_ARRAY(first_layer); 459 state[6] = 0; 460 state[7] = 0; 461 462 if (device->physical_device->rad_info.chip_class >= GFX9) { 463 unsigned bc_swizzle = gfx9_border_color_swizzle(swizzle); 464 465 /* Depth is the the last accessible layer on Gfx9. 466 * The hw doesn't need to know the total number of layers. 467 */ 468 if (type == V_008F1C_SQ_RSRC_IMG_3D) 469 state[4] |= S_008F20_DEPTH(depth - 1); 470 else 471 state[4] |= S_008F20_DEPTH(last_layer); 472 473 state[4] |= S_008F20_BC_SWIZZLE(bc_swizzle); 474 state[5] |= S_008F24_MAX_MIP(image->info.samples > 1 ? 475 util_logbase2(image->info.samples) : 476 image->info.levels - 1); 477 } else { 478 state[3] |= S_008F1C_POW2_PAD(image->info.levels > 1); 479 state[4] |= S_008F20_DEPTH(depth - 1); 480 state[5] |= S_008F24_LAST_ARRAY(last_layer); 481 } 482 if (image->dcc_offset) { 483 unsigned swap = radv_translate_colorswap(vk_format, FALSE); 484 485 state[6] = S_008F28_ALPHA_IS_ON_MSB(swap <= 1); 486 } else { 487 /* The last dword is unused by hw. The shader uses it to clear 488 * bits in the first dword of sampler state. 489 */ 490 if (device->physical_device->rad_info.chip_class <= CIK && image->info.samples <= 1) { 491 if (first_level == last_level) 492 state[7] = C_008F30_MAX_ANISO_RATIO; 493 else 494 state[7] = 0xffffffff; 495 } 496 } 497 498 /* Initialize the sampler view for FMASK. */ 499 if (image->fmask.size) { 500 uint32_t fmask_format, num_format; 501 uint64_t gpu_address = radv_buffer_get_va(image->bo); 502 uint64_t va; 503 504 va = gpu_address + image->offset + image->fmask.offset; 505 506 if (device->physical_device->rad_info.chip_class >= GFX9) { 507 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK; 508 switch (image->info.samples) { 509 case 2: 510 num_format = V_008F14_IMG_FMASK_8_2_2; 511 break; 512 case 4: 513 num_format = V_008F14_IMG_FMASK_8_4_4; 514 break; 515 case 8: 516 num_format = V_008F14_IMG_FMASK_32_8_8; 517 break; 518 default: 519 unreachable("invalid nr_samples"); 520 } 521 } else { 522 switch (image->info.samples) { 523 case 2: 524 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2; 525 break; 526 case 4: 527 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4; 528 break; 529 case 8: 530 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8; 531 break; 532 default: 533 assert(0); 534 fmask_format = V_008F14_IMG_DATA_FORMAT_INVALID; 535 } 536 num_format = V_008F14_IMG_NUM_FORMAT_UINT; 537 } 538 539 fmask_state[0] = va >> 8; 540 fmask_state[0] |= image->fmask.tile_swizzle; 541 fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) | 542 S_008F14_DATA_FORMAT_GFX6(fmask_format) | 543 S_008F14_NUM_FORMAT_GFX6(num_format); 544 fmask_state[2] = S_008F18_WIDTH(width - 1) | 545 S_008F18_HEIGHT(height - 1); 546 fmask_state[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) | 547 S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) | 548 S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) | 549 S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) | 550 S_008F1C_TYPE(radv_tex_dim(image->type, view_type, 1, 0, false, false)); 551 fmask_state[4] = 0; 552 fmask_state[5] = S_008F24_BASE_ARRAY(first_layer); 553 fmask_state[6] = 0; 554 fmask_state[7] = 0; 555 556 if (device->physical_device->rad_info.chip_class >= GFX9) { 557 fmask_state[3] |= S_008F1C_SW_MODE(image->surface.u.gfx9.fmask.swizzle_mode); 558 fmask_state[4] |= S_008F20_DEPTH(last_layer) | 559 S_008F20_PITCH_GFX9(image->surface.u.gfx9.fmask.epitch); 560 fmask_state[5] |= S_008F24_META_PIPE_ALIGNED(image->surface.u.gfx9.cmask.pipe_aligned) | 561 S_008F24_META_RB_ALIGNED(image->surface.u.gfx9.cmask.rb_aligned); 562 } else { 563 fmask_state[3] |= S_008F1C_TILING_INDEX(image->fmask.tile_mode_index); 564 fmask_state[4] |= S_008F20_DEPTH(depth - 1) | 565 S_008F20_PITCH_GFX6(image->fmask.pitch_in_pixels - 1); 566 fmask_state[5] |= S_008F24_LAST_ARRAY(last_layer); 567 } 568 } else if (fmask_state) 569 memset(fmask_state, 0, 8 * 4); 570 } 571 572 static void 573 radv_query_opaque_metadata(struct radv_device *device, 574 struct radv_image *image, 575 struct radeon_bo_metadata *md) 576 { 577 static const VkComponentMapping fixedmapping; 578 uint32_t desc[8], i; 579 580 /* Metadata image format format version 1: 581 * [0] = 1 (metadata format identifier) 582 * [1] = (VENDOR_ID << 16) | PCI_ID 583 * [2:9] = image descriptor for the whole resource 584 * [2] is always 0, because the base address is cleared 585 * [9] is the DCC offset bits [39:8] from the beginning of 586 * the buffer 587 * [10:10+LAST_LEVEL] = mipmap level offset bits [39:8] for each level 588 */ 589 md->metadata[0] = 1; /* metadata image format version 1 */ 590 591 /* TILE_MODE_INDEX is ambiguous without a PCI ID. */ 592 md->metadata[1] = si_get_bo_metadata_word1(device); 593 594 595 si_make_texture_descriptor(device, image, false, 596 (VkImageViewType)image->type, image->vk_format, 597 &fixedmapping, 0, image->info.levels - 1, 0, 598 image->info.array_size, 599 image->info.width, image->info.height, 600 image->info.depth, 601 desc, NULL); 602 603 si_set_mutable_tex_desc_fields(device, image, &image->surface.u.legacy.level[0], 0, 0, 604 image->surface.blk_w, false, false, desc); 605 606 /* Clear the base address and set the relative DCC offset. */ 607 desc[0] = 0; 608 desc[1] &= C_008F14_BASE_ADDRESS_HI; 609 desc[7] = image->dcc_offset >> 8; 610 611 /* Dwords [2:9] contain the image descriptor. */ 612 memcpy(&md->metadata[2], desc, sizeof(desc)); 613 614 /* Dwords [10:..] contain the mipmap level offsets. */ 615 if (device->physical_device->rad_info.chip_class <= VI) { 616 for (i = 0; i <= image->info.levels - 1; i++) 617 md->metadata[10+i] = image->surface.u.legacy.level[i].offset >> 8; 618 md->size_metadata = (11 + image->info.levels - 1) * 4; 619 } 620 } 621 622 void 623 radv_init_metadata(struct radv_device *device, 624 struct radv_image *image, 625 struct radeon_bo_metadata *metadata) 626 { 627 struct radeon_surf *surface = &image->surface; 628 629 memset(metadata, 0, sizeof(*metadata)); 630 631 if (device->physical_device->rad_info.chip_class >= GFX9) { 632 metadata->u.gfx9.swizzle_mode = surface->u.gfx9.surf.swizzle_mode; 633 } else { 634 metadata->u.legacy.microtile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D ? 635 RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR; 636 metadata->u.legacy.macrotile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D ? 637 RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR; 638 metadata->u.legacy.pipe_config = surface->u.legacy.pipe_config; 639 metadata->u.legacy.bankw = surface->u.legacy.bankw; 640 metadata->u.legacy.bankh = surface->u.legacy.bankh; 641 metadata->u.legacy.tile_split = surface->u.legacy.tile_split; 642 metadata->u.legacy.mtilea = surface->u.legacy.mtilea; 643 metadata->u.legacy.num_banks = surface->u.legacy.num_banks; 644 metadata->u.legacy.stride = surface->u.legacy.level[0].nblk_x * surface->bpe; 645 metadata->u.legacy.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0; 646 } 647 radv_query_opaque_metadata(device, image, metadata); 648 } 649 650 /* The number of samples can be specified independently of the texture. */ 651 static void 652 radv_image_get_fmask_info(struct radv_device *device, 653 struct radv_image *image, 654 unsigned nr_samples, 655 struct radv_fmask_info *out) 656 { 657 /* FMASK is allocated like an ordinary texture. */ 658 struct radeon_surf fmask = {}; 659 struct ac_surf_info info = image->info; 660 memset(out, 0, sizeof(*out)); 661 662 if (device->physical_device->rad_info.chip_class >= GFX9) { 663 out->alignment = image->surface.u.gfx9.fmask_alignment; 664 out->size = image->surface.u.gfx9.fmask_size; 665 return; 666 } 667 668 fmask.blk_w = image->surface.blk_w; 669 fmask.blk_h = image->surface.blk_h; 670 info.samples = 1; 671 fmask.flags = image->surface.flags | RADEON_SURF_FMASK; 672 673 if (!image->shareable) 674 info.surf_index = &device->fmask_mrt_offset_counter; 675 676 /* Force 2D tiling if it wasn't set. This may occur when creating 677 * FMASK for MSAA resolve on R6xx. On R6xx, the single-sample 678 * destination buffer must have an FMASK too. */ 679 fmask.flags = RADEON_SURF_CLR(fmask.flags, MODE); 680 fmask.flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE); 681 682 switch (nr_samples) { 683 case 2: 684 case 4: 685 fmask.bpe = 1; 686 break; 687 case 8: 688 fmask.bpe = 4; 689 break; 690 default: 691 return; 692 } 693 694 device->ws->surface_init(device->ws, &info, &fmask); 695 assert(fmask.u.legacy.level[0].mode == RADEON_SURF_MODE_2D); 696 697 out->slice_tile_max = (fmask.u.legacy.level[0].nblk_x * fmask.u.legacy.level[0].nblk_y) / 64; 698 if (out->slice_tile_max) 699 out->slice_tile_max -= 1; 700 701 out->tile_mode_index = fmask.u.legacy.tiling_index[0]; 702 out->pitch_in_pixels = fmask.u.legacy.level[0].nblk_x; 703 out->bank_height = fmask.u.legacy.bankh; 704 out->tile_swizzle = fmask.tile_swizzle; 705 out->alignment = MAX2(256, fmask.surf_alignment); 706 out->size = fmask.surf_size; 707 708 assert(!out->tile_swizzle || !image->shareable); 709 } 710 711 static void 712 radv_image_alloc_fmask(struct radv_device *device, 713 struct radv_image *image) 714 { 715 radv_image_get_fmask_info(device, image, image->info.samples, &image->fmask); 716 717 image->fmask.offset = align64(image->size, image->fmask.alignment); 718 image->size = image->fmask.offset + image->fmask.size; 719 image->alignment = MAX2(image->alignment, image->fmask.alignment); 720 } 721 722 static void 723 radv_image_get_cmask_info(struct radv_device *device, 724 struct radv_image *image, 725 struct radv_cmask_info *out) 726 { 727 unsigned pipe_interleave_bytes = device->physical_device->rad_info.pipe_interleave_bytes; 728 unsigned num_pipes = device->physical_device->rad_info.num_tile_pipes; 729 unsigned cl_width, cl_height; 730 731 if (device->physical_device->rad_info.chip_class >= GFX9) { 732 out->alignment = image->surface.u.gfx9.cmask_alignment; 733 out->size = image->surface.u.gfx9.cmask_size; 734 return; 735 } 736 737 switch (num_pipes) { 738 case 2: 739 cl_width = 32; 740 cl_height = 16; 741 break; 742 case 4: 743 cl_width = 32; 744 cl_height = 32; 745 break; 746 case 8: 747 cl_width = 64; 748 cl_height = 32; 749 break; 750 case 16: /* Hawaii */ 751 cl_width = 64; 752 cl_height = 64; 753 break; 754 default: 755 assert(0); 756 return; 757 } 758 759 unsigned base_align = num_pipes * pipe_interleave_bytes; 760 761 unsigned width = align(image->info.width, cl_width*8); 762 unsigned height = align(image->info.height, cl_height*8); 763 unsigned slice_elements = (width * height) / (8*8); 764 765 /* Each element of CMASK is a nibble. */ 766 unsigned slice_bytes = slice_elements / 2; 767 768 out->slice_tile_max = (width * height) / (128*128); 769 if (out->slice_tile_max) 770 out->slice_tile_max -= 1; 771 772 out->alignment = MAX2(256, base_align); 773 out->size = (image->type == VK_IMAGE_TYPE_3D ? image->info.depth : image->info.array_size) * 774 align(slice_bytes, base_align); 775 } 776 777 static void 778 radv_image_alloc_cmask(struct radv_device *device, 779 struct radv_image *image) 780 { 781 uint32_t clear_value_size = 0; 782 radv_image_get_cmask_info(device, image, &image->cmask); 783 784 image->cmask.offset = align64(image->size, image->cmask.alignment); 785 /* + 8 for storing the clear values */ 786 if (!image->clear_value_offset) { 787 image->clear_value_offset = image->cmask.offset + image->cmask.size; 788 clear_value_size = 8; 789 } 790 image->size = image->cmask.offset + image->cmask.size + clear_value_size; 791 image->alignment = MAX2(image->alignment, image->cmask.alignment); 792 } 793 794 static void 795 radv_image_alloc_dcc(struct radv_image *image) 796 { 797 image->dcc_offset = align64(image->size, image->surface.dcc_alignment); 798 /* + 16 for storing the clear values + dcc pred */ 799 image->clear_value_offset = image->dcc_offset + image->surface.dcc_size; 800 image->dcc_pred_offset = image->clear_value_offset + 8; 801 image->size = image->dcc_offset + image->surface.dcc_size + 16; 802 image->alignment = MAX2(image->alignment, image->surface.dcc_alignment); 803 } 804 805 static void 806 radv_image_alloc_htile(struct radv_image *image) 807 { 808 image->htile_offset = align64(image->size, image->surface.htile_alignment); 809 810 /* + 8 for storing the clear values */ 811 image->clear_value_offset = image->htile_offset + image->surface.htile_size; 812 image->size = image->clear_value_offset + 8; 813 image->alignment = align64(image->alignment, image->surface.htile_alignment); 814 } 815 816 static inline bool 817 radv_image_can_enable_dcc_or_cmask(struct radv_image *image) 818 { 819 if (image->info.samples <= 1 && 820 image->info.width * image->info.height <= 512 * 512) { 821 /* Do not enable CMASK or DCC for small surfaces where the cost 822 * of the eliminate pass can be higher than the benefit of fast 823 * clear. RadeonSI does this, but the image threshold is 824 * different. 825 */ 826 return false; 827 } 828 829 return image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT && 830 (image->exclusive || image->queue_family_mask == 1); 831 } 832 833 static inline bool 834 radv_image_can_enable_dcc(struct radv_image *image) 835 { 836 return radv_image_can_enable_dcc_or_cmask(image) && 837 image->surface.dcc_size; 838 } 839 840 static inline bool 841 radv_image_can_enable_cmask(struct radv_image *image) 842 { 843 if (image->surface.bpe > 8 && image->info.samples == 1) { 844 /* Do not enable CMASK for non-MSAA images (fast color clear) 845 * because 128 bit formats are not supported, but FMASK might 846 * still be used. 847 */ 848 return false; 849 } 850 851 return radv_image_can_enable_dcc_or_cmask(image) && 852 image->info.levels == 1 && 853 image->info.depth == 1 && 854 !image->surface.is_linear; 855 } 856 857 static inline bool 858 radv_image_can_enable_fmask(struct radv_image *image) 859 { 860 return image->info.samples > 1 && vk_format_is_color(image->vk_format); 861 } 862 863 static inline bool 864 radv_image_can_enable_htile(struct radv_image *image) 865 { 866 return image->info.levels == 1 && vk_format_is_depth(image->vk_format); 867 } 868 869 VkResult 870 radv_image_create(VkDevice _device, 871 const struct radv_image_create_info *create_info, 872 const VkAllocationCallbacks* alloc, 873 VkImage *pImage) 874 { 875 RADV_FROM_HANDLE(radv_device, device, _device); 876 const VkImageCreateInfo *pCreateInfo = create_info->vk_info; 877 struct radv_image *image = NULL; 878 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO); 879 880 radv_assert(pCreateInfo->mipLevels > 0); 881 radv_assert(pCreateInfo->arrayLayers > 0); 882 radv_assert(pCreateInfo->samples > 0); 883 radv_assert(pCreateInfo->extent.width > 0); 884 radv_assert(pCreateInfo->extent.height > 0); 885 radv_assert(pCreateInfo->extent.depth > 0); 886 887 image = vk_zalloc2(&device->alloc, alloc, sizeof(*image), 8, 888 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 889 if (!image) 890 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); 891 892 image->type = pCreateInfo->imageType; 893 image->info.width = pCreateInfo->extent.width; 894 image->info.height = pCreateInfo->extent.height; 895 image->info.depth = pCreateInfo->extent.depth; 896 image->info.samples = pCreateInfo->samples; 897 image->info.array_size = pCreateInfo->arrayLayers; 898 image->info.levels = pCreateInfo->mipLevels; 899 900 image->vk_format = pCreateInfo->format; 901 image->tiling = pCreateInfo->tiling; 902 image->usage = pCreateInfo->usage; 903 image->flags = pCreateInfo->flags; 904 905 image->exclusive = pCreateInfo->sharingMode == VK_SHARING_MODE_EXCLUSIVE; 906 if (pCreateInfo->sharingMode == VK_SHARING_MODE_CONCURRENT) { 907 for (uint32_t i = 0; i < pCreateInfo->queueFamilyIndexCount; ++i) 908 if (pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_EXTERNAL_KHR) 909 image->queue_family_mask |= (1u << RADV_MAX_QUEUE_FAMILIES) - 1u; 910 else 911 image->queue_family_mask |= 1u << pCreateInfo->pQueueFamilyIndices[i]; 912 } 913 914 image->shareable = vk_find_struct_const(pCreateInfo->pNext, 915 EXTERNAL_MEMORY_IMAGE_CREATE_INFO_KHR) != NULL; 916 if (!vk_format_is_depth(pCreateInfo->format) && !create_info->scanout && !image->shareable) { 917 image->info.surf_index = &device->image_mrt_offset_counter; 918 } 919 920 radv_init_surface(device, &image->surface, create_info); 921 922 device->ws->surface_init(device->ws, &image->info, &image->surface); 923 924 image->size = image->surface.surf_size; 925 image->alignment = image->surface.surf_alignment; 926 927 if (!create_info->no_metadata_planes) { 928 /* Try to enable DCC first. */ 929 if (radv_image_can_enable_dcc(image)) { 930 radv_image_alloc_dcc(image); 931 } else { 932 /* When DCC cannot be enabled, try CMASK. */ 933 image->surface.dcc_size = 0; 934 if (radv_image_can_enable_cmask(image)) { 935 radv_image_alloc_cmask(device, image); 936 } 937 } 938 939 /* Try to enable FMASK for multisampled images. */ 940 if (radv_image_can_enable_fmask(image)) { 941 radv_image_alloc_fmask(device, image); 942 } else { 943 /* Otherwise, try to enable HTILE for depth surfaces. */ 944 if (radv_image_can_enable_htile(image) && 945 !(device->instance->debug_flags & RADV_DEBUG_NO_HIZ)) { 946 radv_image_alloc_htile(image); 947 image->tc_compatible_htile = image->surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE; 948 } else { 949 image->surface.htile_size = 0; 950 } 951 } 952 } else { 953 image->surface.dcc_size = 0; 954 image->surface.htile_size = 0; 955 } 956 957 if (pCreateInfo->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) { 958 image->alignment = MAX2(image->alignment, 4096); 959 image->size = align64(image->size, image->alignment); 960 image->offset = 0; 961 962 image->bo = device->ws->buffer_create(device->ws, image->size, image->alignment, 963 0, RADEON_FLAG_VIRTUAL); 964 if (!image->bo) { 965 vk_free2(&device->alloc, alloc, image); 966 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY); 967 } 968 } 969 970 *pImage = radv_image_to_handle(image); 971 972 return VK_SUCCESS; 973 } 974 975 static void 976 radv_image_view_make_descriptor(struct radv_image_view *iview, 977 struct radv_device *device, 978 const VkComponentMapping *components, 979 bool is_storage_image) 980 { 981 struct radv_image *image = iview->image; 982 bool is_stencil = iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT; 983 uint32_t blk_w; 984 uint32_t *descriptor; 985 uint32_t hw_level = 0; 986 987 if (is_storage_image) { 988 descriptor = iview->storage_descriptor; 989 } else { 990 descriptor = iview->descriptor; 991 } 992 993 assert(image->surface.blk_w % vk_format_get_blockwidth(image->vk_format) == 0); 994 blk_w = image->surface.blk_w / vk_format_get_blockwidth(image->vk_format) * vk_format_get_blockwidth(iview->vk_format); 995 996 if (device->physical_device->rad_info.chip_class >= GFX9) 997 hw_level = iview->base_mip; 998 si_make_texture_descriptor(device, image, is_storage_image, 999 iview->type, 1000 iview->vk_format, 1001 components, 1002 hw_level, hw_level + iview->level_count - 1, 1003 iview->base_layer, 1004 iview->base_layer + iview->layer_count - 1, 1005 iview->extent.width, 1006 iview->extent.height, 1007 iview->extent.depth, 1008 descriptor, 1009 descriptor + 8); 1010 1011 const struct legacy_surf_level *base_level_info = NULL; 1012 if (device->physical_device->rad_info.chip_class <= GFX9) { 1013 if (is_stencil) 1014 base_level_info = &image->surface.u.legacy.stencil_level[iview->base_mip]; 1015 else 1016 base_level_info = &image->surface.u.legacy.level[iview->base_mip]; 1017 } 1018 si_set_mutable_tex_desc_fields(device, image, 1019 base_level_info, 1020 iview->base_mip, 1021 iview->base_mip, 1022 blk_w, is_stencil, is_storage_image, descriptor); 1023 } 1024 1025 void 1026 radv_image_view_init(struct radv_image_view *iview, 1027 struct radv_device *device, 1028 const VkImageViewCreateInfo* pCreateInfo) 1029 { 1030 RADV_FROM_HANDLE(radv_image, image, pCreateInfo->image); 1031 const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; 1032 1033 switch (image->type) { 1034 case VK_IMAGE_TYPE_1D: 1035 case VK_IMAGE_TYPE_2D: 1036 assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1 <= image->info.array_size); 1037 break; 1038 case VK_IMAGE_TYPE_3D: 1039 assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1 1040 <= radv_minify(image->info.depth, range->baseMipLevel)); 1041 break; 1042 default: 1043 unreachable("bad VkImageType"); 1044 } 1045 iview->image = image; 1046 iview->bo = image->bo; 1047 iview->type = pCreateInfo->viewType; 1048 iview->vk_format = pCreateInfo->format; 1049 iview->aspect_mask = pCreateInfo->subresourceRange.aspectMask; 1050 1051 if (iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) { 1052 iview->vk_format = vk_format_stencil_only(iview->vk_format); 1053 } else if (iview->aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) { 1054 iview->vk_format = vk_format_depth_only(iview->vk_format); 1055 } 1056 1057 if (device->physical_device->rad_info.chip_class >= GFX9) { 1058 iview->extent = (VkExtent3D) { 1059 .width = image->info.width, 1060 .height = image->info.height, 1061 .depth = image->info.depth, 1062 }; 1063 } else { 1064 iview->extent = (VkExtent3D) { 1065 .width = radv_minify(image->info.width , range->baseMipLevel), 1066 .height = radv_minify(image->info.height, range->baseMipLevel), 1067 .depth = radv_minify(image->info.depth , range->baseMipLevel), 1068 }; 1069 } 1070 1071 if (iview->vk_format != image->vk_format) { 1072 unsigned view_bw = vk_format_get_blockwidth(iview->vk_format); 1073 unsigned view_bh = vk_format_get_blockheight(iview->vk_format); 1074 unsigned img_bw = vk_format_get_blockwidth(image->vk_format); 1075 unsigned img_bh = vk_format_get_blockheight(image->vk_format); 1076 1077 iview->extent.width = round_up_u32(iview->extent.width * view_bw, img_bw); 1078 iview->extent.height = round_up_u32(iview->extent.height * view_bh, img_bh); 1079 1080 /* Comment ported from amdvlk - 1081 * If we have the following image: 1082 * Uncompressed pixels Compressed block sizes (4x4) 1083 * mip0: 22 x 22 6 x 6 1084 * mip1: 11 x 11 3 x 3 1085 * mip2: 5 x 5 2 x 2 1086 * mip3: 2 x 2 1 x 1 1087 * mip4: 1 x 1 1 x 1 1088 * 1089 * On GFX9 the descriptor is always programmed with the WIDTH and HEIGHT of the base level and the HW is 1090 * calculating the degradation of the block sizes down the mip-chain as follows (straight-up 1091 * divide-by-two integer math): 1092 * mip0: 6x6 1093 * mip1: 3x3 1094 * mip2: 1x1 1095 * mip3: 1x1 1096 * 1097 * This means that mip2 will be missing texels. 1098 * 1099 * Fix this by calculating the base mip's width and height, then convert that, and round it 1100 * back up to get the level 0 size. 1101 * Clamp the converted size between the original values, and next power of two, which 1102 * means we don't oversize the image. 1103 */ 1104 if (device->physical_device->rad_info.chip_class >= GFX9 && 1105 vk_format_is_compressed(image->vk_format) && 1106 !vk_format_is_compressed(iview->vk_format)) { 1107 unsigned rounded_img_w = util_next_power_of_two(iview->extent.width); 1108 unsigned rounded_img_h = util_next_power_of_two(iview->extent.height); 1109 unsigned lvl_width = radv_minify(image->info.width , range->baseMipLevel); 1110 unsigned lvl_height = radv_minify(image->info.height, range->baseMipLevel); 1111 1112 lvl_width = round_up_u32(lvl_width * view_bw, img_bw); 1113 lvl_height = round_up_u32(lvl_height * view_bh, img_bh); 1114 1115 lvl_width <<= range->baseMipLevel; 1116 lvl_height <<= range->baseMipLevel; 1117 1118 iview->extent.width = CLAMP(lvl_width, iview->extent.width, rounded_img_w); 1119 iview->extent.height = CLAMP(lvl_height, iview->extent.height, rounded_img_h); 1120 } 1121 } 1122 1123 iview->base_layer = range->baseArrayLayer; 1124 iview->layer_count = radv_get_layerCount(image, range); 1125 iview->base_mip = range->baseMipLevel; 1126 iview->level_count = radv_get_levelCount(image, range); 1127 1128 radv_image_view_make_descriptor(iview, device, &pCreateInfo->components, false); 1129 radv_image_view_make_descriptor(iview, device, &pCreateInfo->components, true); 1130 } 1131 1132 bool radv_layout_has_htile(const struct radv_image *image, 1133 VkImageLayout layout, 1134 unsigned queue_mask) 1135 { 1136 if (image->surface.htile_size && image->tc_compatible_htile) 1137 return layout != VK_IMAGE_LAYOUT_GENERAL; 1138 1139 return image->surface.htile_size && 1140 (layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL || 1141 layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) && 1142 queue_mask == (1u << RADV_QUEUE_GENERAL); 1143 } 1144 1145 bool radv_layout_is_htile_compressed(const struct radv_image *image, 1146 VkImageLayout layout, 1147 unsigned queue_mask) 1148 { 1149 if (image->surface.htile_size && image->tc_compatible_htile) 1150 return layout != VK_IMAGE_LAYOUT_GENERAL; 1151 1152 return image->surface.htile_size && 1153 (layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL || 1154 layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) && 1155 queue_mask == (1u << RADV_QUEUE_GENERAL); 1156 } 1157 1158 bool radv_layout_can_fast_clear(const struct radv_image *image, 1159 VkImageLayout layout, 1160 unsigned queue_mask) 1161 { 1162 return layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL && 1163 queue_mask == (1u << RADV_QUEUE_GENERAL); 1164 } 1165 1166 bool radv_layout_dcc_compressed(const struct radv_image *image, 1167 VkImageLayout layout, 1168 unsigned queue_mask) 1169 { 1170 /* Don't compress compute transfer dst, as image stores are not supported. */ 1171 if (layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL && 1172 (queue_mask & (1u << RADV_QUEUE_COMPUTE))) 1173 return false; 1174 1175 return image->surface.dcc_size && layout != VK_IMAGE_LAYOUT_GENERAL; 1176 } 1177 1178 1179 unsigned radv_image_queue_family_mask(const struct radv_image *image, uint32_t family, uint32_t queue_family) 1180 { 1181 if (!image->exclusive) 1182 return image->queue_family_mask; 1183 if (family == VK_QUEUE_FAMILY_EXTERNAL_KHR) 1184 return (1u << RADV_MAX_QUEUE_FAMILIES) - 1u; 1185 if (family == VK_QUEUE_FAMILY_IGNORED) 1186 return 1u << queue_family; 1187 return 1u << family; 1188 } 1189 1190 VkResult 1191 radv_CreateImage(VkDevice device, 1192 const VkImageCreateInfo *pCreateInfo, 1193 const VkAllocationCallbacks *pAllocator, 1194 VkImage *pImage) 1195 { 1196 #ifdef ANDROID 1197 const VkNativeBufferANDROID *gralloc_info = 1198 vk_find_struct_const(pCreateInfo->pNext, NATIVE_BUFFER_ANDROID); 1199 1200 if (gralloc_info) 1201 return radv_image_from_gralloc(device, pCreateInfo, gralloc_info, 1202 pAllocator, pImage); 1203 #endif 1204 1205 const struct wsi_image_create_info *wsi_info = 1206 vk_find_struct_const(pCreateInfo->pNext, WSI_IMAGE_CREATE_INFO_MESA); 1207 bool scanout = wsi_info && wsi_info->scanout; 1208 1209 return radv_image_create(device, 1210 &(struct radv_image_create_info) { 1211 .vk_info = pCreateInfo, 1212 .scanout = scanout, 1213 }, 1214 pAllocator, 1215 pImage); 1216 } 1217 1218 void 1219 radv_DestroyImage(VkDevice _device, VkImage _image, 1220 const VkAllocationCallbacks *pAllocator) 1221 { 1222 RADV_FROM_HANDLE(radv_device, device, _device); 1223 RADV_FROM_HANDLE(radv_image, image, _image); 1224 1225 if (!image) 1226 return; 1227 1228 if (image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) 1229 device->ws->buffer_destroy(image->bo); 1230 1231 if (image->owned_memory != VK_NULL_HANDLE) 1232 radv_FreeMemory(_device, image->owned_memory, pAllocator); 1233 1234 vk_free2(&device->alloc, pAllocator, image); 1235 } 1236 1237 void radv_GetImageSubresourceLayout( 1238 VkDevice _device, 1239 VkImage _image, 1240 const VkImageSubresource* pSubresource, 1241 VkSubresourceLayout* pLayout) 1242 { 1243 RADV_FROM_HANDLE(radv_image, image, _image); 1244 RADV_FROM_HANDLE(radv_device, device, _device); 1245 int level = pSubresource->mipLevel; 1246 int layer = pSubresource->arrayLayer; 1247 struct radeon_surf *surface = &image->surface; 1248 1249 if (device->physical_device->rad_info.chip_class >= GFX9) { 1250 pLayout->offset = surface->u.gfx9.offset[level] + surface->u.gfx9.surf_slice_size * layer; 1251 pLayout->rowPitch = surface->u.gfx9.surf_pitch * surface->bpe; 1252 pLayout->arrayPitch = surface->u.gfx9.surf_slice_size; 1253 pLayout->depthPitch = surface->u.gfx9.surf_slice_size; 1254 pLayout->size = surface->u.gfx9.surf_slice_size; 1255 if (image->type == VK_IMAGE_TYPE_3D) 1256 pLayout->size *= u_minify(image->info.depth, level); 1257 } else { 1258 pLayout->offset = surface->u.legacy.level[level].offset + (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4 * layer; 1259 pLayout->rowPitch = surface->u.legacy.level[level].nblk_x * surface->bpe; 1260 pLayout->arrayPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4; 1261 pLayout->depthPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4; 1262 pLayout->size = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4; 1263 if (image->type == VK_IMAGE_TYPE_3D) 1264 pLayout->size *= u_minify(image->info.depth, level); 1265 } 1266 } 1267 1268 1269 VkResult 1270 radv_CreateImageView(VkDevice _device, 1271 const VkImageViewCreateInfo *pCreateInfo, 1272 const VkAllocationCallbacks *pAllocator, 1273 VkImageView *pView) 1274 { 1275 RADV_FROM_HANDLE(radv_device, device, _device); 1276 struct radv_image_view *view; 1277 1278 view = vk_alloc2(&device->alloc, pAllocator, sizeof(*view), 8, 1279 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 1280 if (view == NULL) 1281 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); 1282 1283 radv_image_view_init(view, device, pCreateInfo); 1284 1285 *pView = radv_image_view_to_handle(view); 1286 1287 return VK_SUCCESS; 1288 } 1289 1290 void 1291 radv_DestroyImageView(VkDevice _device, VkImageView _iview, 1292 const VkAllocationCallbacks *pAllocator) 1293 { 1294 RADV_FROM_HANDLE(radv_device, device, _device); 1295 RADV_FROM_HANDLE(radv_image_view, iview, _iview); 1296 1297 if (!iview) 1298 return; 1299 vk_free2(&device->alloc, pAllocator, iview); 1300 } 1301 1302 void radv_buffer_view_init(struct radv_buffer_view *view, 1303 struct radv_device *device, 1304 const VkBufferViewCreateInfo* pCreateInfo) 1305 { 1306 RADV_FROM_HANDLE(radv_buffer, buffer, pCreateInfo->buffer); 1307 1308 view->bo = buffer->bo; 1309 view->range = pCreateInfo->range == VK_WHOLE_SIZE ? 1310 buffer->size - pCreateInfo->offset : pCreateInfo->range; 1311 view->vk_format = pCreateInfo->format; 1312 1313 radv_make_buffer_descriptor(device, buffer, view->vk_format, 1314 pCreateInfo->offset, view->range, view->state); 1315 } 1316 1317 VkResult 1318 radv_CreateBufferView(VkDevice _device, 1319 const VkBufferViewCreateInfo *pCreateInfo, 1320 const VkAllocationCallbacks *pAllocator, 1321 VkBufferView *pView) 1322 { 1323 RADV_FROM_HANDLE(radv_device, device, _device); 1324 struct radv_buffer_view *view; 1325 1326 view = vk_alloc2(&device->alloc, pAllocator, sizeof(*view), 8, 1327 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 1328 if (!view) 1329 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); 1330 1331 radv_buffer_view_init(view, device, pCreateInfo); 1332 1333 *pView = radv_buffer_view_to_handle(view); 1334 1335 return VK_SUCCESS; 1336 } 1337 1338 void 1339 radv_DestroyBufferView(VkDevice _device, VkBufferView bufferView, 1340 const VkAllocationCallbacks *pAllocator) 1341 { 1342 RADV_FROM_HANDLE(radv_device, device, _device); 1343 RADV_FROM_HANDLE(radv_buffer_view, view, bufferView); 1344 1345 if (!view) 1346 return; 1347 1348 vk_free2(&device->alloc, pAllocator, view); 1349 } 1350