1 /* 2 * Copyright 2016 Red Hat. 3 * Copyright 2016 Bas Nieuwenhuizen 4 * 5 * based in part on anv driver which is: 6 * Copyright 2015 Intel Corporation 7 * 8 * Permission is hereby granted, free of charge, to any person obtaining a 9 * copy of this software and associated documentation files (the "Software"), 10 * to deal in the Software without restriction, including without limitation 11 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 12 * and/or sell copies of the Software, and to permit persons to whom the 13 * Software is furnished to do so, subject to the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the next 16 * paragraph) shall be included in all copies or substantial portions of the 17 * Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 25 * IN THE SOFTWARE. 26 */ 27 28 #include "radv_private.h" 29 #include "vk_format.h" 30 #include "radv_radeon_winsys.h" 31 #include "sid.h" 32 #include "util/debug.h" 33 static unsigned 34 radv_choose_tiling(struct radv_device *Device, 35 const struct radv_image_create_info *create_info) 36 { 37 const VkImageCreateInfo *pCreateInfo = create_info->vk_info; 38 39 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) { 40 assert(pCreateInfo->samples <= 1); 41 return RADEON_SURF_MODE_LINEAR_ALIGNED; 42 } 43 44 /* MSAA resources must be 2D tiled. */ 45 if (pCreateInfo->samples > 1) 46 return RADEON_SURF_MODE_2D; 47 48 return RADEON_SURF_MODE_2D; 49 } 50 static int 51 radv_init_surface(struct radv_device *device, 52 struct radeon_surf *surface, 53 const struct radv_image_create_info *create_info) 54 { 55 const VkImageCreateInfo *pCreateInfo = create_info->vk_info; 56 unsigned array_mode = radv_choose_tiling(device, create_info); 57 const struct vk_format_description *desc = 58 vk_format_description(pCreateInfo->format); 59 bool is_depth, is_stencil, blendable; 60 61 is_depth = vk_format_has_depth(desc); 62 is_stencil = vk_format_has_stencil(desc); 63 surface->npix_x = pCreateInfo->extent.width; 64 surface->npix_y = pCreateInfo->extent.height; 65 surface->npix_z = pCreateInfo->extent.depth; 66 67 surface->blk_w = vk_format_get_blockwidth(pCreateInfo->format); 68 surface->blk_h = vk_format_get_blockheight(pCreateInfo->format); 69 surface->blk_d = 1; 70 surface->array_size = pCreateInfo->arrayLayers; 71 surface->last_level = pCreateInfo->mipLevels - 1; 72 73 surface->bpe = vk_format_get_blocksize(pCreateInfo->format); 74 /* align byte per element on dword */ 75 if (surface->bpe == 3) { 76 surface->bpe = 4; 77 } 78 surface->nsamples = pCreateInfo->samples ? pCreateInfo->samples : 1; 79 surface->flags = RADEON_SURF_SET(array_mode, MODE); 80 81 switch (pCreateInfo->imageType){ 82 case VK_IMAGE_TYPE_1D: 83 if (pCreateInfo->arrayLayers > 1) 84 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D_ARRAY, TYPE); 85 else 86 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D, TYPE); 87 break; 88 case VK_IMAGE_TYPE_2D: 89 if (pCreateInfo->arrayLayers > 1) 90 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D_ARRAY, TYPE); 91 else 92 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D, TYPE); 93 break; 94 case VK_IMAGE_TYPE_3D: 95 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_3D, TYPE); 96 break; 97 default: 98 unreachable("unhandled image type"); 99 } 100 101 if (is_depth) { 102 surface->flags |= RADEON_SURF_ZBUFFER; 103 } 104 105 if (is_stencil) 106 surface->flags |= RADEON_SURF_SBUFFER | 107 RADEON_SURF_HAS_SBUFFER_MIPTREE; 108 109 surface->flags |= RADEON_SURF_HAS_TILE_MODE_INDEX; 110 111 if ((pCreateInfo->usage & (VK_IMAGE_USAGE_TRANSFER_SRC_BIT | 112 VK_IMAGE_USAGE_STORAGE_BIT)) || 113 (pCreateInfo->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) || 114 (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) || 115 device->physical_device->rad_info.chip_class < VI || 116 create_info->scanout || (device->debug_flags & RADV_DEBUG_NO_DCC) || 117 !radv_is_colorbuffer_format_supported(pCreateInfo->format, &blendable)) 118 surface->flags |= RADEON_SURF_DISABLE_DCC; 119 if (create_info->scanout) 120 surface->flags |= RADEON_SURF_SCANOUT; 121 return 0; 122 } 123 #define ATI_VENDOR_ID 0x1002 124 static uint32_t si_get_bo_metadata_word1(struct radv_device *device) 125 { 126 return (ATI_VENDOR_ID << 16) | device->physical_device->rad_info.pci_id; 127 } 128 129 static inline unsigned 130 si_tile_mode_index(const struct radv_image *image, unsigned level, bool stencil) 131 { 132 if (stencil) 133 return image->surface.stencil_tiling_index[level]; 134 else 135 return image->surface.tiling_index[level]; 136 } 137 138 static unsigned radv_map_swizzle(unsigned swizzle) 139 { 140 switch (swizzle) { 141 case VK_SWIZZLE_Y: 142 return V_008F0C_SQ_SEL_Y; 143 case VK_SWIZZLE_Z: 144 return V_008F0C_SQ_SEL_Z; 145 case VK_SWIZZLE_W: 146 return V_008F0C_SQ_SEL_W; 147 case VK_SWIZZLE_0: 148 return V_008F0C_SQ_SEL_0; 149 case VK_SWIZZLE_1: 150 return V_008F0C_SQ_SEL_1; 151 default: /* VK_SWIZZLE_X */ 152 return V_008F0C_SQ_SEL_X; 153 } 154 } 155 156 static void 157 radv_make_buffer_descriptor(struct radv_device *device, 158 struct radv_buffer *buffer, 159 VkFormat vk_format, 160 unsigned offset, 161 unsigned range, 162 uint32_t *state) 163 { 164 const struct vk_format_description *desc; 165 unsigned stride; 166 uint64_t gpu_address = device->ws->buffer_get_va(buffer->bo); 167 uint64_t va = gpu_address + buffer->offset; 168 unsigned num_format, data_format; 169 int first_non_void; 170 desc = vk_format_description(vk_format); 171 first_non_void = vk_format_get_first_non_void_channel(vk_format); 172 stride = desc->block.bits / 8; 173 174 num_format = radv_translate_buffer_numformat(desc, first_non_void); 175 data_format = radv_translate_buffer_dataformat(desc, first_non_void); 176 177 va += offset; 178 state[0] = va; 179 state[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | 180 S_008F04_STRIDE(stride); 181 state[2] = range; 182 state[3] = S_008F0C_DST_SEL_X(radv_map_swizzle(desc->swizzle[0])) | 183 S_008F0C_DST_SEL_Y(radv_map_swizzle(desc->swizzle[1])) | 184 S_008F0C_DST_SEL_Z(radv_map_swizzle(desc->swizzle[2])) | 185 S_008F0C_DST_SEL_W(radv_map_swizzle(desc->swizzle[3])) | 186 S_008F0C_NUM_FORMAT(num_format) | 187 S_008F0C_DATA_FORMAT(data_format); 188 } 189 190 static void 191 si_set_mutable_tex_desc_fields(struct radv_device *device, 192 struct radv_image *image, 193 const struct radeon_surf_level *base_level_info, 194 unsigned base_level, unsigned first_level, 195 unsigned block_width, bool is_stencil, 196 uint32_t *state) 197 { 198 uint64_t gpu_address = device->ws->buffer_get_va(image->bo) + image->offset; 199 uint64_t va = gpu_address + base_level_info->offset; 200 unsigned pitch = base_level_info->nblk_x * block_width; 201 202 state[1] &= C_008F14_BASE_ADDRESS_HI; 203 state[3] &= C_008F1C_TILING_INDEX; 204 state[4] &= C_008F20_PITCH; 205 state[6] &= C_008F28_COMPRESSION_EN; 206 207 assert(!(va & 255)); 208 209 state[0] = va >> 8; 210 state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40); 211 state[3] |= S_008F1C_TILING_INDEX(si_tile_mode_index(image, base_level, 212 is_stencil)); 213 state[4] |= S_008F20_PITCH(pitch - 1); 214 215 if (image->surface.dcc_size && image->surface.level[first_level].dcc_enabled) { 216 state[6] |= S_008F28_COMPRESSION_EN(1); 217 state[7] = (gpu_address + 218 image->dcc_offset + 219 base_level_info->dcc_offset) >> 8; 220 } 221 } 222 223 static unsigned radv_tex_dim(VkImageType image_type, VkImageViewType view_type, 224 unsigned nr_layers, unsigned nr_samples, bool is_storage_image) 225 { 226 if (view_type == VK_IMAGE_VIEW_TYPE_CUBE || view_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY) 227 return is_storage_image ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_CUBE; 228 switch (image_type) { 229 case VK_IMAGE_TYPE_1D: 230 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_1D_ARRAY : V_008F1C_SQ_RSRC_IMG_1D; 231 case VK_IMAGE_TYPE_2D: 232 if (nr_samples > 1) 233 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY : V_008F1C_SQ_RSRC_IMG_2D_MSAA; 234 else 235 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_2D; 236 case VK_IMAGE_TYPE_3D: 237 if (view_type == VK_IMAGE_VIEW_TYPE_3D) 238 return V_008F1C_SQ_RSRC_IMG_3D; 239 else 240 return V_008F1C_SQ_RSRC_IMG_2D_ARRAY; 241 default: 242 unreachable("illegale image type"); 243 } 244 } 245 /** 246 * Build the sampler view descriptor for a texture. 247 */ 248 static void 249 si_make_texture_descriptor(struct radv_device *device, 250 struct radv_image *image, 251 bool sampler, 252 VkImageViewType view_type, 253 VkFormat vk_format, 254 const VkComponentMapping *mapping, 255 unsigned first_level, unsigned last_level, 256 unsigned first_layer, unsigned last_layer, 257 unsigned width, unsigned height, unsigned depth, 258 uint32_t *state, 259 uint32_t *fmask_state) 260 { 261 const struct vk_format_description *desc; 262 enum vk_swizzle swizzle[4]; 263 int first_non_void; 264 unsigned num_format, data_format, type; 265 266 desc = vk_format_description(vk_format); 267 268 if (desc->colorspace == VK_FORMAT_COLORSPACE_ZS) { 269 const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0}; 270 vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle); 271 } else { 272 vk_format_compose_swizzles(mapping, desc->swizzle, swizzle); 273 } 274 275 first_non_void = vk_format_get_first_non_void_channel(vk_format); 276 277 num_format = radv_translate_tex_numformat(vk_format, desc, first_non_void); 278 if (num_format == ~0) { 279 num_format = 0; 280 } 281 282 data_format = radv_translate_tex_dataformat(vk_format, desc, first_non_void); 283 if (data_format == ~0) { 284 data_format = 0; 285 } 286 287 type = radv_tex_dim(image->type, view_type, image->array_size, image->samples, 288 (image->usage & VK_IMAGE_USAGE_STORAGE_BIT)); 289 if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) { 290 height = 1; 291 depth = image->array_size; 292 } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY || 293 type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) { 294 if (view_type != VK_IMAGE_VIEW_TYPE_3D) 295 depth = image->array_size; 296 } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE) 297 depth = image->array_size / 6; 298 299 state[0] = 0; 300 state[1] = (S_008F14_DATA_FORMAT(data_format) | 301 S_008F14_NUM_FORMAT(num_format)); 302 state[2] = (S_008F18_WIDTH(width - 1) | 303 S_008F18_HEIGHT(height - 1)); 304 state[3] = (S_008F1C_DST_SEL_X(radv_map_swizzle(swizzle[0])) | 305 S_008F1C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) | 306 S_008F1C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) | 307 S_008F1C_DST_SEL_W(radv_map_swizzle(swizzle[3])) | 308 S_008F1C_BASE_LEVEL(image->samples > 1 ? 309 0 : first_level) | 310 S_008F1C_LAST_LEVEL(image->samples > 1 ? 311 util_logbase2(image->samples) : 312 last_level) | 313 S_008F1C_POW2_PAD(image->levels > 1) | 314 S_008F1C_TYPE(type)); 315 state[4] = S_008F20_DEPTH(depth - 1); 316 state[5] = (S_008F24_BASE_ARRAY(first_layer) | 317 S_008F24_LAST_ARRAY(last_layer)); 318 state[6] = 0; 319 state[7] = 0; 320 321 if (image->dcc_offset) { 322 unsigned swap = radv_translate_colorswap(vk_format, FALSE); 323 324 state[6] = S_008F28_ALPHA_IS_ON_MSB(swap <= 1); 325 } else { 326 /* The last dword is unused by hw. The shader uses it to clear 327 * bits in the first dword of sampler state. 328 */ 329 if (device->physical_device->rad_info.chip_class <= CIK && image->samples <= 1) { 330 if (first_level == last_level) 331 state[7] = C_008F30_MAX_ANISO_RATIO; 332 else 333 state[7] = 0xffffffff; 334 } 335 } 336 337 /* Initialize the sampler view for FMASK. */ 338 if (image->fmask.size) { 339 uint32_t fmask_format; 340 uint64_t gpu_address = device->ws->buffer_get_va(image->bo); 341 uint64_t va; 342 343 va = gpu_address + image->offset + image->fmask.offset; 344 345 switch (image->samples) { 346 case 2: 347 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2; 348 break; 349 case 4: 350 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4; 351 break; 352 case 8: 353 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8; 354 break; 355 default: 356 assert(0); 357 fmask_format = V_008F14_IMG_DATA_FORMAT_INVALID; 358 } 359 360 fmask_state[0] = va >> 8; 361 fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) | 362 S_008F14_DATA_FORMAT(fmask_format) | 363 S_008F14_NUM_FORMAT(V_008F14_IMG_NUM_FORMAT_UINT); 364 fmask_state[2] = S_008F18_WIDTH(width - 1) | 365 S_008F18_HEIGHT(height - 1); 366 fmask_state[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) | 367 S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) | 368 S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) | 369 S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) | 370 S_008F1C_TILING_INDEX(image->fmask.tile_mode_index) | 371 S_008F1C_TYPE(radv_tex_dim(image->type, view_type, 1, 0, false)); 372 fmask_state[4] = S_008F20_DEPTH(depth - 1) | 373 S_008F20_PITCH(image->fmask.pitch_in_pixels - 1); 374 fmask_state[5] = S_008F24_BASE_ARRAY(first_layer) | 375 S_008F24_LAST_ARRAY(last_layer); 376 fmask_state[6] = 0; 377 fmask_state[7] = 0; 378 } 379 } 380 381 static void 382 radv_query_opaque_metadata(struct radv_device *device, 383 struct radv_image *image, 384 struct radeon_bo_metadata *md) 385 { 386 static const VkComponentMapping fixedmapping; 387 uint32_t desc[8], i; 388 389 /* Metadata image format format version 1: 390 * [0] = 1 (metadata format identifier) 391 * [1] = (VENDOR_ID << 16) | PCI_ID 392 * [2:9] = image descriptor for the whole resource 393 * [2] is always 0, because the base address is cleared 394 * [9] is the DCC offset bits [39:8] from the beginning of 395 * the buffer 396 * [10:10+LAST_LEVEL] = mipmap level offset bits [39:8] for each level 397 */ 398 md->metadata[0] = 1; /* metadata image format version 1 */ 399 400 /* TILE_MODE_INDEX is ambiguous without a PCI ID. */ 401 md->metadata[1] = si_get_bo_metadata_word1(device); 402 403 404 si_make_texture_descriptor(device, image, true, 405 (VkImageViewType)image->type, image->vk_format, 406 &fixedmapping, 0, image->levels - 1, 0, 407 image->array_size, 408 image->extent.width, image->extent.height, 409 image->extent.depth, 410 desc, NULL); 411 412 si_set_mutable_tex_desc_fields(device, image, &image->surface.level[0], 0, 0, 413 image->surface.blk_w, false, desc); 414 415 /* Clear the base address and set the relative DCC offset. */ 416 desc[0] = 0; 417 desc[1] &= C_008F14_BASE_ADDRESS_HI; 418 desc[7] = image->dcc_offset >> 8; 419 420 /* Dwords [2:9] contain the image descriptor. */ 421 memcpy(&md->metadata[2], desc, sizeof(desc)); 422 423 /* Dwords [10:..] contain the mipmap level offsets. */ 424 for (i = 0; i <= image->levels - 1; i++) 425 md->metadata[10+i] = image->surface.level[i].offset >> 8; 426 427 md->size_metadata = (11 + image->levels - 1) * 4; 428 } 429 430 void 431 radv_init_metadata(struct radv_device *device, 432 struct radv_image *image, 433 struct radeon_bo_metadata *metadata) 434 { 435 struct radeon_surf *surface = &image->surface; 436 437 memset(metadata, 0, sizeof(*metadata)); 438 metadata->microtile = surface->level[0].mode >= RADEON_SURF_MODE_1D ? 439 RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR; 440 metadata->macrotile = surface->level[0].mode >= RADEON_SURF_MODE_2D ? 441 RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR; 442 metadata->pipe_config = surface->pipe_config; 443 metadata->bankw = surface->bankw; 444 metadata->bankh = surface->bankh; 445 metadata->tile_split = surface->tile_split; 446 metadata->mtilea = surface->mtilea; 447 metadata->num_banks = surface->num_banks; 448 metadata->stride = surface->level[0].pitch_bytes; 449 metadata->scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0; 450 451 radv_query_opaque_metadata(device, image, metadata); 452 } 453 454 /* The number of samples can be specified independently of the texture. */ 455 static void 456 radv_image_get_fmask_info(struct radv_device *device, 457 struct radv_image *image, 458 unsigned nr_samples, 459 struct radv_fmask_info *out) 460 { 461 /* FMASK is allocated like an ordinary texture. */ 462 struct radeon_surf fmask = image->surface; 463 464 memset(out, 0, sizeof(*out)); 465 466 fmask.bo_alignment = 0; 467 fmask.bo_size = 0; 468 fmask.nsamples = 1; 469 fmask.flags |= RADEON_SURF_FMASK; 470 471 /* Force 2D tiling if it wasn't set. This may occur when creating 472 * FMASK for MSAA resolve on R6xx. On R6xx, the single-sample 473 * destination buffer must have an FMASK too. */ 474 fmask.flags = RADEON_SURF_CLR(fmask.flags, MODE); 475 fmask.flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE); 476 477 fmask.flags |= RADEON_SURF_HAS_TILE_MODE_INDEX; 478 479 switch (nr_samples) { 480 case 2: 481 case 4: 482 fmask.bpe = 1; 483 break; 484 case 8: 485 fmask.bpe = 4; 486 break; 487 default: 488 return; 489 } 490 491 device->ws->surface_init(device->ws, &fmask); 492 assert(fmask.level[0].mode == RADEON_SURF_MODE_2D); 493 494 out->slice_tile_max = (fmask.level[0].nblk_x * fmask.level[0].nblk_y) / 64; 495 if (out->slice_tile_max) 496 out->slice_tile_max -= 1; 497 498 out->tile_mode_index = fmask.tiling_index[0]; 499 out->pitch_in_pixels = fmask.level[0].nblk_x; 500 out->bank_height = fmask.bankh; 501 out->alignment = MAX2(256, fmask.bo_alignment); 502 out->size = fmask.bo_size; 503 } 504 505 static void 506 radv_image_alloc_fmask(struct radv_device *device, 507 struct radv_image *image) 508 { 509 radv_image_get_fmask_info(device, image, image->samples, &image->fmask); 510 511 image->fmask.offset = align64(image->size, image->fmask.alignment); 512 image->size = image->fmask.offset + image->fmask.size; 513 image->alignment = MAX2(image->alignment, image->fmask.alignment); 514 } 515 516 static void 517 radv_image_get_cmask_info(struct radv_device *device, 518 struct radv_image *image, 519 struct radv_cmask_info *out) 520 { 521 unsigned pipe_interleave_bytes = device->physical_device->rad_info.pipe_interleave_bytes; 522 unsigned num_pipes = device->physical_device->rad_info.num_tile_pipes; 523 unsigned cl_width, cl_height; 524 525 switch (num_pipes) { 526 case 2: 527 cl_width = 32; 528 cl_height = 16; 529 break; 530 case 4: 531 cl_width = 32; 532 cl_height = 32; 533 break; 534 case 8: 535 cl_width = 64; 536 cl_height = 32; 537 break; 538 case 16: /* Hawaii */ 539 cl_width = 64; 540 cl_height = 64; 541 break; 542 default: 543 assert(0); 544 return; 545 } 546 547 unsigned base_align = num_pipes * pipe_interleave_bytes; 548 549 unsigned width = align(image->surface.npix_x, cl_width*8); 550 unsigned height = align(image->surface.npix_y, cl_height*8); 551 unsigned slice_elements = (width * height) / (8*8); 552 553 /* Each element of CMASK is a nibble. */ 554 unsigned slice_bytes = slice_elements / 2; 555 556 out->slice_tile_max = (width * height) / (128*128); 557 if (out->slice_tile_max) 558 out->slice_tile_max -= 1; 559 560 out->alignment = MAX2(256, base_align); 561 out->size = (image->type == VK_IMAGE_TYPE_3D ? image->extent.depth : image->array_size) * 562 align(slice_bytes, base_align); 563 } 564 565 static void 566 radv_image_alloc_cmask(struct radv_device *device, 567 struct radv_image *image) 568 { 569 radv_image_get_cmask_info(device, image, &image->cmask); 570 571 image->cmask.offset = align64(image->size, image->cmask.alignment); 572 /* + 8 for storing the clear values */ 573 image->clear_value_offset = image->cmask.offset + image->cmask.size; 574 image->size = image->cmask.offset + image->cmask.size + 8; 575 image->alignment = MAX2(image->alignment, image->cmask.alignment); 576 } 577 578 static void 579 radv_image_alloc_dcc(struct radv_device *device, 580 struct radv_image *image) 581 { 582 image->dcc_offset = align64(image->size, image->surface.dcc_alignment); 583 /* + 8 for storing the clear values */ 584 image->clear_value_offset = image->dcc_offset + image->surface.dcc_size; 585 image->size = image->dcc_offset + image->surface.dcc_size + 8; 586 image->alignment = MAX2(image->alignment, image->surface.dcc_alignment); 587 } 588 589 static unsigned 590 radv_image_get_htile_size(struct radv_device *device, 591 struct radv_image *image) 592 { 593 unsigned cl_width, cl_height, width, height; 594 unsigned slice_elements, slice_bytes, base_align; 595 unsigned num_pipes = device->physical_device->rad_info.num_tile_pipes; 596 unsigned pipe_interleave_bytes = device->physical_device->rad_info.pipe_interleave_bytes; 597 598 /* Overalign HTILE on P2 configs to work around GPU hangs in 599 * piglit/depthstencil-render-miplevels 585. 600 * 601 * This has been confirmed to help Kabini & Stoney, where the hangs 602 * are always reproducible. I think I have seen the test hang 603 * on Carrizo too, though it was very rare there. 604 */ 605 if (device->physical_device->rad_info.chip_class >= CIK && num_pipes < 4) 606 num_pipes = 4; 607 608 switch (num_pipes) { 609 case 1: 610 cl_width = 32; 611 cl_height = 16; 612 break; 613 case 2: 614 cl_width = 32; 615 cl_height = 32; 616 break; 617 case 4: 618 cl_width = 64; 619 cl_height = 32; 620 break; 621 case 8: 622 cl_width = 64; 623 cl_height = 64; 624 break; 625 case 16: 626 cl_width = 128; 627 cl_height = 64; 628 break; 629 default: 630 assert(0); 631 return 0; 632 } 633 634 width = align(image->surface.npix_x, cl_width * 8); 635 height = align(image->surface.npix_y, cl_height * 8); 636 637 slice_elements = (width * height) / (8 * 8); 638 slice_bytes = slice_elements * 4; 639 640 base_align = num_pipes * pipe_interleave_bytes; 641 642 image->htile.pitch = width; 643 image->htile.height = height; 644 image->htile.xalign = cl_width * 8; 645 image->htile.yalign = cl_height * 8; 646 647 return image->array_size * 648 align(slice_bytes, base_align); 649 } 650 651 static void 652 radv_image_alloc_htile(struct radv_device *device, 653 struct radv_image *image) 654 { 655 if (device->debug_flags & RADV_DEBUG_NO_HIZ) 656 return; 657 658 if (image->array_size > 1 || image->levels > 1) 659 return; 660 661 image->htile.size = radv_image_get_htile_size(device, image); 662 663 if (!image->htile.size) 664 return; 665 666 image->htile.offset = align64(image->size, 32768); 667 668 /* + 8 for storing the clear values */ 669 image->clear_value_offset = image->htile.offset + image->htile.size; 670 image->size = image->htile.offset + image->htile.size + 8; 671 image->alignment = align64(image->alignment, 32768); 672 } 673 674 VkResult 675 radv_image_create(VkDevice _device, 676 const struct radv_image_create_info *create_info, 677 const VkAllocationCallbacks* alloc, 678 VkImage *pImage) 679 { 680 RADV_FROM_HANDLE(radv_device, device, _device); 681 const VkImageCreateInfo *pCreateInfo = create_info->vk_info; 682 struct radv_image *image = NULL; 683 bool can_cmask_dcc = false; 684 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO); 685 686 radv_assert(pCreateInfo->mipLevels > 0); 687 radv_assert(pCreateInfo->arrayLayers > 0); 688 radv_assert(pCreateInfo->samples > 0); 689 radv_assert(pCreateInfo->extent.width > 0); 690 radv_assert(pCreateInfo->extent.height > 0); 691 radv_assert(pCreateInfo->extent.depth > 0); 692 693 image = vk_alloc2(&device->alloc, alloc, sizeof(*image), 8, 694 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 695 if (!image) 696 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); 697 698 memset(image, 0, sizeof(*image)); 699 image->type = pCreateInfo->imageType; 700 image->extent = pCreateInfo->extent; 701 image->vk_format = pCreateInfo->format; 702 image->levels = pCreateInfo->mipLevels; 703 image->array_size = pCreateInfo->arrayLayers; 704 image->samples = pCreateInfo->samples; 705 image->tiling = pCreateInfo->tiling; 706 image->usage = pCreateInfo->usage; 707 708 image->exclusive = pCreateInfo->sharingMode == VK_SHARING_MODE_EXCLUSIVE; 709 if (pCreateInfo->sharingMode == VK_SHARING_MODE_CONCURRENT) { 710 for (uint32_t i = 0; i < pCreateInfo->queueFamilyIndexCount; ++i) 711 image->queue_family_mask |= 1u << pCreateInfo->pQueueFamilyIndices[i]; 712 } 713 714 radv_init_surface(device, &image->surface, create_info); 715 716 device->ws->surface_init(device->ws, &image->surface); 717 718 image->size = image->surface.bo_size; 719 image->alignment = image->surface.bo_alignment; 720 721 if (image->exclusive || image->queue_family_mask == 1) 722 can_cmask_dcc = true; 723 724 if ((pCreateInfo->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) && 725 image->surface.dcc_size && can_cmask_dcc) 726 radv_image_alloc_dcc(device, image); 727 else 728 image->surface.dcc_size = 0; 729 730 if ((pCreateInfo->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) && 731 pCreateInfo->mipLevels == 1 && 732 !image->surface.dcc_size && image->extent.depth == 1 && can_cmask_dcc) 733 radv_image_alloc_cmask(device, image); 734 if (image->samples > 1 && vk_format_is_color(pCreateInfo->format)) { 735 radv_image_alloc_fmask(device, image); 736 } else if (vk_format_is_depth(pCreateInfo->format)) { 737 738 radv_image_alloc_htile(device, image); 739 } 740 741 742 if (create_info->stride && create_info->stride != image->surface.level[0].pitch_bytes) { 743 image->surface.level[0].nblk_x = create_info->stride / image->surface.bpe; 744 image->surface.level[0].pitch_bytes = create_info->stride; 745 image->surface.level[0].slice_size = create_info->stride * image->surface.level[0].nblk_y; 746 } 747 *pImage = radv_image_to_handle(image); 748 749 return VK_SUCCESS; 750 } 751 752 void 753 radv_image_view_init(struct radv_image_view *iview, 754 struct radv_device *device, 755 const VkImageViewCreateInfo* pCreateInfo, 756 struct radv_cmd_buffer *cmd_buffer, 757 VkImageUsageFlags usage_mask) 758 { 759 RADV_FROM_HANDLE(radv_image, image, pCreateInfo->image); 760 const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; 761 uint32_t blk_w; 762 bool is_stencil = false; 763 switch (image->type) { 764 case VK_IMAGE_TYPE_1D: 765 case VK_IMAGE_TYPE_2D: 766 assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1 <= image->array_size); 767 break; 768 case VK_IMAGE_TYPE_3D: 769 assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1 770 <= radv_minify(image->extent.depth, range->baseMipLevel)); 771 break; 772 default: 773 unreachable("bad VkImageType"); 774 } 775 iview->image = image; 776 iview->bo = image->bo; 777 iview->type = pCreateInfo->viewType; 778 iview->vk_format = pCreateInfo->format; 779 iview->aspect_mask = pCreateInfo->subresourceRange.aspectMask; 780 781 if (iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) { 782 is_stencil = true; 783 iview->vk_format = vk_format_stencil_only(iview->vk_format); 784 } else if (iview->aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) { 785 iview->vk_format = vk_format_depth_only(iview->vk_format); 786 } 787 788 iview->extent = (VkExtent3D) { 789 .width = radv_minify(image->extent.width , range->baseMipLevel), 790 .height = radv_minify(image->extent.height, range->baseMipLevel), 791 .depth = radv_minify(image->extent.depth , range->baseMipLevel), 792 }; 793 794 iview->extent.width = round_up_u32(iview->extent.width * vk_format_get_blockwidth(iview->vk_format), 795 vk_format_get_blockwidth(image->vk_format)); 796 iview->extent.height = round_up_u32(iview->extent.height * vk_format_get_blockheight(iview->vk_format), 797 vk_format_get_blockheight(image->vk_format)); 798 799 assert(image->surface.blk_w % vk_format_get_blockwidth(image->vk_format) == 0); 800 blk_w = image->surface.blk_w / vk_format_get_blockwidth(image->vk_format) * vk_format_get_blockwidth(iview->vk_format); 801 iview->base_layer = range->baseArrayLayer; 802 iview->layer_count = radv_get_layerCount(image, range); 803 iview->base_mip = range->baseMipLevel; 804 805 si_make_texture_descriptor(device, image, false, 806 iview->type, 807 iview->vk_format, 808 &pCreateInfo->components, 809 0, radv_get_levelCount(image, range) - 1, 810 range->baseArrayLayer, 811 range->baseArrayLayer + radv_get_layerCount(image, range) - 1, 812 iview->extent.width, 813 iview->extent.height, 814 iview->extent.depth, 815 iview->descriptor, 816 iview->fmask_descriptor); 817 si_set_mutable_tex_desc_fields(device, image, 818 is_stencil ? &image->surface.stencil_level[range->baseMipLevel] : &image->surface.level[range->baseMipLevel], range->baseMipLevel, 819 range->baseMipLevel, 820 blk_w, is_stencil, iview->descriptor); 821 } 822 823 void radv_image_set_optimal_micro_tile_mode(struct radv_device *device, 824 struct radv_image *image, uint32_t micro_tile_mode) 825 { 826 /* These magic numbers were copied from addrlib. It doesn't use any 827 * definitions for them either. They are all 2D_TILED_THIN1 modes with 828 * different bpp and micro tile mode. 829 */ 830 if (device->physical_device->rad_info.chip_class >= CIK) { 831 switch (micro_tile_mode) { 832 case 0: /* displayable */ 833 image->surface.tiling_index[0] = 10; 834 break; 835 case 1: /* thin */ 836 image->surface.tiling_index[0] = 14; 837 break; 838 case 3: /* rotated */ 839 image->surface.tiling_index[0] = 28; 840 break; 841 default: /* depth, thick */ 842 assert(!"unexpected micro mode"); 843 return; 844 } 845 } else { /* SI */ 846 switch (micro_tile_mode) { 847 case 0: /* displayable */ 848 switch (image->surface.bpe) { 849 case 1: 850 image->surface.tiling_index[0] = 10; 851 break; 852 case 2: 853 image->surface.tiling_index[0] = 11; 854 break; 855 default: /* 4, 8 */ 856 image->surface.tiling_index[0] = 12; 857 break; 858 } 859 break; 860 case 1: /* thin */ 861 switch (image->surface.bpe) { 862 case 1: 863 image->surface.tiling_index[0] = 14; 864 break; 865 case 2: 866 image->surface.tiling_index[0] = 15; 867 break; 868 case 4: 869 image->surface.tiling_index[0] = 16; 870 break; 871 default: /* 8, 16 */ 872 image->surface.tiling_index[0] = 17; 873 break; 874 } 875 break; 876 default: /* depth, thick */ 877 assert(!"unexpected micro mode"); 878 return; 879 } 880 } 881 882 image->surface.micro_tile_mode = micro_tile_mode; 883 } 884 885 bool radv_layout_has_htile(const struct radv_image *image, 886 VkImageLayout layout) 887 { 888 return (layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL || 889 layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); 890 } 891 892 bool radv_layout_is_htile_compressed(const struct radv_image *image, 893 VkImageLayout layout) 894 { 895 return layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; 896 } 897 898 bool radv_layout_can_expclear(const struct radv_image *image, 899 VkImageLayout layout) 900 { 901 return (layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL || 902 layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); 903 } 904 905 bool radv_layout_can_fast_clear(const struct radv_image *image, 906 VkImageLayout layout, 907 unsigned queue_mask) 908 { 909 return layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL && 910 queue_mask == (1u << RADV_QUEUE_GENERAL); 911 } 912 913 914 unsigned radv_image_queue_family_mask(const struct radv_image *image, int family) { 915 if (image->exclusive) 916 return 1u <<family; 917 return image->queue_family_mask; 918 } 919 920 VkResult 921 radv_CreateImage(VkDevice device, 922 const VkImageCreateInfo *pCreateInfo, 923 const VkAllocationCallbacks *pAllocator, 924 VkImage *pImage) 925 { 926 return radv_image_create(device, 927 &(struct radv_image_create_info) { 928 .vk_info = pCreateInfo, 929 .scanout = false, 930 }, 931 pAllocator, 932 pImage); 933 } 934 935 void 936 radv_DestroyImage(VkDevice _device, VkImage _image, 937 const VkAllocationCallbacks *pAllocator) 938 { 939 RADV_FROM_HANDLE(radv_device, device, _device); 940 941 if (!_image) 942 return; 943 944 vk_free2(&device->alloc, pAllocator, radv_image_from_handle(_image)); 945 } 946 947 void radv_GetImageSubresourceLayout( 948 VkDevice device, 949 VkImage _image, 950 const VkImageSubresource* pSubresource, 951 VkSubresourceLayout* pLayout) 952 { 953 RADV_FROM_HANDLE(radv_image, image, _image); 954 int level = pSubresource->mipLevel; 955 int layer = pSubresource->arrayLayer; 956 957 pLayout->offset = image->surface.level[level].offset + image->surface.level[level].slice_size * layer; 958 pLayout->rowPitch = image->surface.level[level].pitch_bytes; 959 pLayout->arrayPitch = image->surface.level[level].slice_size; 960 pLayout->depthPitch = image->surface.level[level].slice_size; 961 pLayout->size = image->surface.level[level].slice_size; 962 if (image->type == VK_IMAGE_TYPE_3D) 963 pLayout->size *= image->surface.level[level].nblk_z; 964 } 965 966 967 VkResult 968 radv_CreateImageView(VkDevice _device, 969 const VkImageViewCreateInfo *pCreateInfo, 970 const VkAllocationCallbacks *pAllocator, 971 VkImageView *pView) 972 { 973 RADV_FROM_HANDLE(radv_device, device, _device); 974 struct radv_image_view *view; 975 976 view = vk_alloc2(&device->alloc, pAllocator, sizeof(*view), 8, 977 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 978 if (view == NULL) 979 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); 980 981 radv_image_view_init(view, device, pCreateInfo, NULL, ~0); 982 983 *pView = radv_image_view_to_handle(view); 984 985 return VK_SUCCESS; 986 } 987 988 void 989 radv_DestroyImageView(VkDevice _device, VkImageView _iview, 990 const VkAllocationCallbacks *pAllocator) 991 { 992 RADV_FROM_HANDLE(radv_device, device, _device); 993 RADV_FROM_HANDLE(radv_image_view, iview, _iview); 994 995 if (!iview) 996 return; 997 vk_free2(&device->alloc, pAllocator, iview); 998 } 999 1000 void radv_buffer_view_init(struct radv_buffer_view *view, 1001 struct radv_device *device, 1002 const VkBufferViewCreateInfo* pCreateInfo, 1003 struct radv_cmd_buffer *cmd_buffer) 1004 { 1005 RADV_FROM_HANDLE(radv_buffer, buffer, pCreateInfo->buffer); 1006 1007 view->bo = buffer->bo; 1008 view->range = pCreateInfo->range == VK_WHOLE_SIZE ? 1009 buffer->size - pCreateInfo->offset : pCreateInfo->range; 1010 view->vk_format = pCreateInfo->format; 1011 1012 radv_make_buffer_descriptor(device, buffer, view->vk_format, 1013 pCreateInfo->offset, view->range, view->state); 1014 } 1015 1016 VkResult 1017 radv_CreateBufferView(VkDevice _device, 1018 const VkBufferViewCreateInfo *pCreateInfo, 1019 const VkAllocationCallbacks *pAllocator, 1020 VkBufferView *pView) 1021 { 1022 RADV_FROM_HANDLE(radv_device, device, _device); 1023 struct radv_buffer_view *view; 1024 1025 view = vk_alloc2(&device->alloc, pAllocator, sizeof(*view), 8, 1026 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 1027 if (!view) 1028 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); 1029 1030 radv_buffer_view_init(view, device, pCreateInfo, NULL); 1031 1032 *pView = radv_buffer_view_to_handle(view); 1033 1034 return VK_SUCCESS; 1035 } 1036 1037 void 1038 radv_DestroyBufferView(VkDevice _device, VkBufferView bufferView, 1039 const VkAllocationCallbacks *pAllocator) 1040 { 1041 RADV_FROM_HANDLE(radv_device, device, _device); 1042 RADV_FROM_HANDLE(radv_buffer_view, view, bufferView); 1043 1044 if (!view) 1045 return; 1046 1047 vk_free2(&device->alloc, pAllocator, view); 1048 } 1049