Home | History | Annotate | Download | only in vulkan
      1 /*
      2  * Copyright  2016 Red Hat.
      3  * Copyright  2016 Bas Nieuwenhuizen
      4  *
      5  * based in part on anv driver which is:
      6  * Copyright  2015 Intel Corporation
      7  *
      8  * Permission is hereby granted, free of charge, to any person obtaining a
      9  * copy of this software and associated documentation files (the "Software"),
     10  * to deal in the Software without restriction, including without limitation
     11  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     12  * and/or sell copies of the Software, and to permit persons to whom the
     13  * Software is furnished to do so, subject to the following conditions:
     14  *
     15  * The above copyright notice and this permission notice (including the next
     16  * paragraph) shall be included in all copies or substantial portions of the
     17  * Software.
     18  *
     19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     20  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     21  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     22  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     23  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     24  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     25  * IN THE SOFTWARE.
     26  */
     27 
     28 #include "radv_debug.h"
     29 #include "radv_private.h"
     30 #include "vk_format.h"
     31 #include "vk_util.h"
     32 #include "radv_radeon_winsys.h"
     33 #include "sid.h"
     34 #include "gfx9d.h"
     35 #include "util/debug.h"
     36 #include "util/u_atomic.h"
     37 static unsigned
     38 radv_choose_tiling(struct radv_device *device,
     39 		   const struct radv_image_create_info *create_info)
     40 {
     41 	const VkImageCreateInfo *pCreateInfo = create_info->vk_info;
     42 
     43 	if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) {
     44 		assert(pCreateInfo->samples <= 1);
     45 		return RADEON_SURF_MODE_LINEAR_ALIGNED;
     46 	}
     47 
     48 	if (!vk_format_is_compressed(pCreateInfo->format) &&
     49 	    !vk_format_is_depth_or_stencil(pCreateInfo->format)
     50 	    && device->physical_device->rad_info.chip_class <= VI) {
     51 		/* this causes hangs in some VK CTS tests on GFX9. */
     52 		/* Textures with a very small height are recommended to be linear. */
     53 		if (pCreateInfo->imageType == VK_IMAGE_TYPE_1D ||
     54 		    /* Only very thin and long 2D textures should benefit from
     55 		     * linear_aligned. */
     56 		    (pCreateInfo->extent.width > 8 && pCreateInfo->extent.height <= 2))
     57 			return RADEON_SURF_MODE_LINEAR_ALIGNED;
     58 	}
     59 
     60 	/* MSAA resources must be 2D tiled. */
     61 	if (pCreateInfo->samples > 1)
     62 		return RADEON_SURF_MODE_2D;
     63 
     64 	return RADEON_SURF_MODE_2D;
     65 }
     66 static int
     67 radv_init_surface(struct radv_device *device,
     68 		  struct radeon_surf *surface,
     69 		  const struct radv_image_create_info *create_info)
     70 {
     71 	const VkImageCreateInfo *pCreateInfo = create_info->vk_info;
     72 	unsigned array_mode = radv_choose_tiling(device, create_info);
     73 	const struct vk_format_description *desc =
     74 		vk_format_description(pCreateInfo->format);
     75 	bool is_depth, is_stencil, blendable;
     76 
     77 	is_depth = vk_format_has_depth(desc);
     78 	is_stencil = vk_format_has_stencil(desc);
     79 
     80 	surface->blk_w = vk_format_get_blockwidth(pCreateInfo->format);
     81 	surface->blk_h = vk_format_get_blockheight(pCreateInfo->format);
     82 
     83 	surface->bpe = vk_format_get_blocksize(vk_format_depth_only(pCreateInfo->format));
     84 	/* align byte per element on dword */
     85 	if (surface->bpe == 3) {
     86 		surface->bpe = 4;
     87 	}
     88 	surface->flags = RADEON_SURF_SET(array_mode, MODE);
     89 
     90 	switch (pCreateInfo->imageType){
     91 	case VK_IMAGE_TYPE_1D:
     92 		if (pCreateInfo->arrayLayers > 1)
     93 			surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D_ARRAY, TYPE);
     94 		else
     95 			surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D, TYPE);
     96 		break;
     97 	case VK_IMAGE_TYPE_2D:
     98 		if (pCreateInfo->arrayLayers > 1)
     99 			surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D_ARRAY, TYPE);
    100 		else
    101 			surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D, TYPE);
    102 		break;
    103 	case VK_IMAGE_TYPE_3D:
    104 		surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_3D, TYPE);
    105 		break;
    106 	default:
    107 		unreachable("unhandled image type");
    108 	}
    109 
    110 	if (is_depth) {
    111 		surface->flags |= RADEON_SURF_ZBUFFER;
    112 		if (!(pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT) &&
    113 		    !(pCreateInfo->flags & (VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT |
    114 		                            VK_IMAGE_CREATE_EXTENDED_USAGE_BIT_KHR)) &&
    115 		    pCreateInfo->tiling != VK_IMAGE_TILING_LINEAR &&
    116 		    pCreateInfo->mipLevels <= 1 &&
    117 		    device->physical_device->rad_info.chip_class >= VI &&
    118 		    ((pCreateInfo->format == VK_FORMAT_D32_SFLOAT ||
    119 		      /* for some reason TC compat with 2/4/8 samples breaks some cts tests - disable for now */
    120 		      (pCreateInfo->samples < 2 && pCreateInfo->format == VK_FORMAT_D32_SFLOAT_S8_UINT)) ||
    121 		     (device->physical_device->rad_info.chip_class >= GFX9 &&
    122 		      pCreateInfo->format == VK_FORMAT_D16_UNORM)))
    123 			surface->flags |= RADEON_SURF_TC_COMPATIBLE_HTILE;
    124 	}
    125 
    126 	if (is_stencil)
    127 		surface->flags |= RADEON_SURF_SBUFFER;
    128 
    129 	surface->flags |= RADEON_SURF_OPTIMIZE_FOR_SPACE;
    130 
    131 	bool dcc_compatible_formats = radv_is_colorbuffer_format_supported(pCreateInfo->format, &blendable);
    132 	if (pCreateInfo->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
    133 		const struct  VkImageFormatListCreateInfoKHR *format_list =
    134 		          (const struct  VkImageFormatListCreateInfoKHR *)
    135 		                vk_find_struct_const(pCreateInfo->pNext,
    136 		                                     IMAGE_FORMAT_LIST_CREATE_INFO_KHR);
    137 
    138 		/* We have to ignore the existence of the list if viewFormatCount = 0 */
    139 		if (format_list && format_list->viewFormatCount) {
    140 			/* compatibility is transitive, so we only need to check
    141 			 * one format with everything else. */
    142 			for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
    143 				if (!radv_dcc_formats_compatible(pCreateInfo->format,
    144 				                                 format_list->pViewFormats[i]))
    145 					dcc_compatible_formats = false;
    146 			}
    147 		} else {
    148 			dcc_compatible_formats = false;
    149 		}
    150 	}
    151 
    152 	if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT) ||
    153 	    (pCreateInfo->flags & VK_IMAGE_CREATE_EXTENDED_USAGE_BIT_KHR) ||
    154 	    !dcc_compatible_formats ||
    155             (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) ||
    156             pCreateInfo->mipLevels > 1 || pCreateInfo->arrayLayers > 1 ||
    157             device->physical_device->rad_info.chip_class < VI ||
    158             create_info->scanout || (device->instance->debug_flags & RADV_DEBUG_NO_DCC) ||
    159 	    pCreateInfo->samples >= 2)
    160 		surface->flags |= RADEON_SURF_DISABLE_DCC;
    161 	if (create_info->scanout)
    162 		surface->flags |= RADEON_SURF_SCANOUT;
    163 	return 0;
    164 }
    165 
    166 static uint32_t si_get_bo_metadata_word1(struct radv_device *device)
    167 {
    168 	return (ATI_VENDOR_ID << 16) | device->physical_device->rad_info.pci_id;
    169 }
    170 
    171 static inline unsigned
    172 si_tile_mode_index(const struct radv_image *image, unsigned level, bool stencil)
    173 {
    174 	if (stencil)
    175 		return image->surface.u.legacy.stencil_tiling_index[level];
    176 	else
    177 		return image->surface.u.legacy.tiling_index[level];
    178 }
    179 
    180 static unsigned radv_map_swizzle(unsigned swizzle)
    181 {
    182 	switch (swizzle) {
    183 	case VK_SWIZZLE_Y:
    184 		return V_008F0C_SQ_SEL_Y;
    185 	case VK_SWIZZLE_Z:
    186 		return V_008F0C_SQ_SEL_Z;
    187 	case VK_SWIZZLE_W:
    188 		return V_008F0C_SQ_SEL_W;
    189 	case VK_SWIZZLE_0:
    190 		return V_008F0C_SQ_SEL_0;
    191 	case VK_SWIZZLE_1:
    192 		return V_008F0C_SQ_SEL_1;
    193 	default: /* VK_SWIZZLE_X */
    194 		return V_008F0C_SQ_SEL_X;
    195 	}
    196 }
    197 
    198 static void
    199 radv_make_buffer_descriptor(struct radv_device *device,
    200 			    struct radv_buffer *buffer,
    201 			    VkFormat vk_format,
    202 			    unsigned offset,
    203 			    unsigned range,
    204 			    uint32_t *state)
    205 {
    206 	const struct vk_format_description *desc;
    207 	unsigned stride;
    208 	uint64_t gpu_address = radv_buffer_get_va(buffer->bo);
    209 	uint64_t va = gpu_address + buffer->offset;
    210 	unsigned num_format, data_format;
    211 	int first_non_void;
    212 	desc = vk_format_description(vk_format);
    213 	first_non_void = vk_format_get_first_non_void_channel(vk_format);
    214 	stride = desc->block.bits / 8;
    215 
    216 	num_format = radv_translate_buffer_numformat(desc, first_non_void);
    217 	data_format = radv_translate_buffer_dataformat(desc, first_non_void);
    218 
    219 	va += offset;
    220 	state[0] = va;
    221 	state[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
    222 		S_008F04_STRIDE(stride);
    223 
    224 	if (device->physical_device->rad_info.chip_class != VI && stride) {
    225 		range /= stride;
    226 	}
    227 
    228 	state[2] = range;
    229 	state[3] = S_008F0C_DST_SEL_X(radv_map_swizzle(desc->swizzle[0])) |
    230 		   S_008F0C_DST_SEL_Y(radv_map_swizzle(desc->swizzle[1])) |
    231 		   S_008F0C_DST_SEL_Z(radv_map_swizzle(desc->swizzle[2])) |
    232 		   S_008F0C_DST_SEL_W(radv_map_swizzle(desc->swizzle[3])) |
    233 		   S_008F0C_NUM_FORMAT(num_format) |
    234 		   S_008F0C_DATA_FORMAT(data_format);
    235 }
    236 
    237 static void
    238 si_set_mutable_tex_desc_fields(struct radv_device *device,
    239 			       struct radv_image *image,
    240 			       const struct legacy_surf_level *base_level_info,
    241 			       unsigned base_level, unsigned first_level,
    242 			       unsigned block_width, bool is_stencil,
    243 			       bool is_storage_image, uint32_t *state)
    244 {
    245 	uint64_t gpu_address = image->bo ? radv_buffer_get_va(image->bo) + image->offset : 0;
    246 	uint64_t va = gpu_address;
    247 	enum chip_class chip_class = device->physical_device->rad_info.chip_class;
    248 	uint64_t meta_va = 0;
    249 	if (chip_class >= GFX9) {
    250 		if (is_stencil)
    251 			va += image->surface.u.gfx9.stencil_offset;
    252 		else
    253 			va += image->surface.u.gfx9.surf_offset;
    254 	} else
    255 		va += base_level_info->offset;
    256 
    257 	state[0] = va >> 8;
    258 	if (chip_class >= GFX9 ||
    259 	    base_level_info->mode == RADEON_SURF_MODE_2D)
    260 		state[0] |= image->surface.tile_swizzle;
    261 	state[1] &= C_008F14_BASE_ADDRESS_HI;
    262 	state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);
    263 
    264 	if (chip_class >= VI) {
    265 		state[6] &= C_008F28_COMPRESSION_EN;
    266 		state[7] = 0;
    267 		if (!is_storage_image && radv_vi_dcc_enabled(image, first_level)) {
    268 			meta_va = gpu_address + image->dcc_offset;
    269 			if (chip_class <= VI)
    270 				meta_va += base_level_info->dcc_offset;
    271 		} else if(!is_storage_image && image->tc_compatible_htile &&
    272 		          image->surface.htile_size) {
    273 			meta_va = gpu_address + image->htile_offset;
    274 		}
    275 
    276 		if (meta_va) {
    277 			state[6] |= S_008F28_COMPRESSION_EN(1);
    278 			state[7] = meta_va >> 8;
    279 			state[7] |= image->surface.tile_swizzle;
    280 		}
    281 	}
    282 
    283 	if (chip_class >= GFX9) {
    284 		state[3] &= C_008F1C_SW_MODE;
    285 		state[4] &= C_008F20_PITCH_GFX9;
    286 
    287 		if (is_stencil) {
    288 			state[3] |= S_008F1C_SW_MODE(image->surface.u.gfx9.stencil.swizzle_mode);
    289 			state[4] |= S_008F20_PITCH_GFX9(image->surface.u.gfx9.stencil.epitch);
    290 		} else {
    291 			state[3] |= S_008F1C_SW_MODE(image->surface.u.gfx9.surf.swizzle_mode);
    292 			state[4] |= S_008F20_PITCH_GFX9(image->surface.u.gfx9.surf.epitch);
    293 		}
    294 
    295 		state[5] &= C_008F24_META_DATA_ADDRESS &
    296 			    C_008F24_META_PIPE_ALIGNED &
    297 			    C_008F24_META_RB_ALIGNED;
    298 		if (meta_va) {
    299 			struct gfx9_surf_meta_flags meta;
    300 
    301 			if (image->dcc_offset)
    302 				meta = image->surface.u.gfx9.dcc;
    303 			else
    304 				meta = image->surface.u.gfx9.htile;
    305 
    306 			state[5] |= S_008F24_META_DATA_ADDRESS(meta_va >> 40) |
    307 				    S_008F24_META_PIPE_ALIGNED(meta.pipe_aligned) |
    308 				    S_008F24_META_RB_ALIGNED(meta.rb_aligned);
    309 		}
    310 	} else {
    311 		/* SI-CI-VI */
    312 		unsigned pitch = base_level_info->nblk_x * block_width;
    313 		unsigned index = si_tile_mode_index(image, base_level, is_stencil);
    314 
    315 		state[3] &= C_008F1C_TILING_INDEX;
    316 		state[3] |= S_008F1C_TILING_INDEX(index);
    317 		state[4] &= C_008F20_PITCH_GFX6;
    318 		state[4] |= S_008F20_PITCH_GFX6(pitch - 1);
    319 	}
    320 }
    321 
    322 static unsigned radv_tex_dim(VkImageType image_type, VkImageViewType view_type,
    323 			     unsigned nr_layers, unsigned nr_samples, bool is_storage_image, bool gfx9)
    324 {
    325 	if (view_type == VK_IMAGE_VIEW_TYPE_CUBE || view_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY)
    326 		return is_storage_image ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_CUBE;
    327 
    328 	/* GFX9 allocates 1D textures as 2D. */
    329 	if (gfx9 && image_type == VK_IMAGE_TYPE_1D)
    330 		image_type = VK_IMAGE_TYPE_2D;
    331 	switch (image_type) {
    332 	case VK_IMAGE_TYPE_1D:
    333 		return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_1D_ARRAY : V_008F1C_SQ_RSRC_IMG_1D;
    334 	case VK_IMAGE_TYPE_2D:
    335 		if (nr_samples > 1)
    336 			return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY : V_008F1C_SQ_RSRC_IMG_2D_MSAA;
    337 		else
    338 			return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_2D;
    339 	case VK_IMAGE_TYPE_3D:
    340 		if (view_type == VK_IMAGE_VIEW_TYPE_3D)
    341 			return V_008F1C_SQ_RSRC_IMG_3D;
    342 		else
    343 			return V_008F1C_SQ_RSRC_IMG_2D_ARRAY;
    344 	default:
    345 		unreachable("illegale image type");
    346 	}
    347 }
    348 
    349 static unsigned gfx9_border_color_swizzle(const enum vk_swizzle swizzle[4])
    350 {
    351 	unsigned bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
    352 
    353 	if (swizzle[3] == VK_SWIZZLE_X) {
    354 		/* For the pre-defined border color values (white, opaque
    355 		 * black, transparent black), the only thing that matters is
    356 		 * that the alpha channel winds up in the correct place
    357 		 * (because the RGB channels are all the same) so either of
    358 		 * these enumerations will work.
    359 		 */
    360 		if (swizzle[2] == VK_SWIZZLE_Y)
    361 			bc_swizzle = V_008F20_BC_SWIZZLE_WZYX;
    362 		else
    363 			bc_swizzle = V_008F20_BC_SWIZZLE_WXYZ;
    364 	} else if (swizzle[0] == VK_SWIZZLE_X) {
    365 		if (swizzle[1] == VK_SWIZZLE_Y)
    366 			bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
    367 		else
    368 			bc_swizzle = V_008F20_BC_SWIZZLE_XWYZ;
    369 	} else if (swizzle[1] == VK_SWIZZLE_X) {
    370 		bc_swizzle = V_008F20_BC_SWIZZLE_YXWZ;
    371 	} else if (swizzle[2] == VK_SWIZZLE_X) {
    372 		bc_swizzle = V_008F20_BC_SWIZZLE_ZYXW;
    373 	}
    374 
    375 	return bc_swizzle;
    376 }
    377 
    378 /**
    379  * Build the sampler view descriptor for a texture.
    380  */
    381 static void
    382 si_make_texture_descriptor(struct radv_device *device,
    383 			   struct radv_image *image,
    384 			   bool is_storage_image,
    385 			   VkImageViewType view_type,
    386 			   VkFormat vk_format,
    387 			   const VkComponentMapping *mapping,
    388 			   unsigned first_level, unsigned last_level,
    389 			   unsigned first_layer, unsigned last_layer,
    390 			   unsigned width, unsigned height, unsigned depth,
    391 			   uint32_t *state,
    392 			   uint32_t *fmask_state)
    393 {
    394 	const struct vk_format_description *desc;
    395 	enum vk_swizzle swizzle[4];
    396 	int first_non_void;
    397 	unsigned num_format, data_format, type;
    398 
    399 	desc = vk_format_description(vk_format);
    400 
    401 	if (desc->colorspace == VK_FORMAT_COLORSPACE_ZS) {
    402 		const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0};
    403 		vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle);
    404 	} else {
    405 		vk_format_compose_swizzles(mapping, desc->swizzle, swizzle);
    406 	}
    407 
    408 	first_non_void = vk_format_get_first_non_void_channel(vk_format);
    409 
    410 	num_format = radv_translate_tex_numformat(vk_format, desc, first_non_void);
    411 	if (num_format == ~0) {
    412 		num_format = 0;
    413 	}
    414 
    415 	data_format = radv_translate_tex_dataformat(vk_format, desc, first_non_void);
    416 	if (data_format == ~0) {
    417 		data_format = 0;
    418 	}
    419 
    420 	/* S8 with either Z16 or Z32 HTILE need a special format. */
    421 	if (device->physical_device->rad_info.chip_class >= GFX9 &&
    422 	    vk_format == VK_FORMAT_S8_UINT &&
    423 	    image->tc_compatible_htile) {
    424 		if (image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT)
    425 			data_format = V_008F14_IMG_DATA_FORMAT_S8_32;
    426 		else if (image->vk_format == VK_FORMAT_D16_UNORM_S8_UINT)
    427 			data_format = V_008F14_IMG_DATA_FORMAT_S8_16;
    428 	}
    429 	type = radv_tex_dim(image->type, view_type, image->info.array_size, image->info.samples,
    430 			    is_storage_image, device->physical_device->rad_info.chip_class >= GFX9);
    431 	if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
    432 	        height = 1;
    433 		depth = image->info.array_size;
    434 	} else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY ||
    435 		   type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
    436 		if (view_type != VK_IMAGE_VIEW_TYPE_3D)
    437 			depth = image->info.array_size;
    438 	} else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
    439 		depth = image->info.array_size / 6;
    440 
    441 	state[0] = 0;
    442 	state[1] = (S_008F14_DATA_FORMAT_GFX6(data_format) |
    443 		    S_008F14_NUM_FORMAT_GFX6(num_format));
    444 	state[2] = (S_008F18_WIDTH(width - 1) |
    445 		    S_008F18_HEIGHT(height - 1) |
    446 		    S_008F18_PERF_MOD(4));
    447 	state[3] = (S_008F1C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
    448 		    S_008F1C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
    449 		    S_008F1C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
    450 		    S_008F1C_DST_SEL_W(radv_map_swizzle(swizzle[3])) |
    451 		    S_008F1C_BASE_LEVEL(image->info.samples > 1 ?
    452 					0 : first_level) |
    453 		    S_008F1C_LAST_LEVEL(image->info.samples > 1 ?
    454 					util_logbase2(image->info.samples) :
    455 					last_level) |
    456 		    S_008F1C_TYPE(type));
    457 	state[4] = 0;
    458 	state[5] = S_008F24_BASE_ARRAY(first_layer);
    459 	state[6] = 0;
    460 	state[7] = 0;
    461 
    462 	if (device->physical_device->rad_info.chip_class >= GFX9) {
    463 		unsigned bc_swizzle = gfx9_border_color_swizzle(swizzle);
    464 
    465 		/* Depth is the the last accessible layer on Gfx9.
    466 		 * The hw doesn't need to know the total number of layers.
    467 		 */
    468 		if (type == V_008F1C_SQ_RSRC_IMG_3D)
    469 			state[4] |= S_008F20_DEPTH(depth - 1);
    470 		else
    471 			state[4] |= S_008F20_DEPTH(last_layer);
    472 
    473 		state[4] |= S_008F20_BC_SWIZZLE(bc_swizzle);
    474 		state[5] |= S_008F24_MAX_MIP(image->info.samples > 1 ?
    475 					     util_logbase2(image->info.samples) :
    476 					     image->info.levels - 1);
    477 	} else {
    478 		state[3] |= S_008F1C_POW2_PAD(image->info.levels > 1);
    479 		state[4] |= S_008F20_DEPTH(depth - 1);
    480 		state[5] |= S_008F24_LAST_ARRAY(last_layer);
    481 	}
    482 	if (image->dcc_offset) {
    483 		unsigned swap = radv_translate_colorswap(vk_format, FALSE);
    484 
    485 		state[6] = S_008F28_ALPHA_IS_ON_MSB(swap <= 1);
    486 	} else {
    487 		/* The last dword is unused by hw. The shader uses it to clear
    488 		 * bits in the first dword of sampler state.
    489 		 */
    490 		if (device->physical_device->rad_info.chip_class <= CIK && image->info.samples <= 1) {
    491 			if (first_level == last_level)
    492 				state[7] = C_008F30_MAX_ANISO_RATIO;
    493 			else
    494 				state[7] = 0xffffffff;
    495 		}
    496 	}
    497 
    498 	/* Initialize the sampler view for FMASK. */
    499 	if (image->fmask.size) {
    500 		uint32_t fmask_format, num_format;
    501 		uint64_t gpu_address = radv_buffer_get_va(image->bo);
    502 		uint64_t va;
    503 
    504 		va = gpu_address + image->offset + image->fmask.offset;
    505 
    506 		if (device->physical_device->rad_info.chip_class >= GFX9) {
    507 			fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK;
    508 			switch (image->info.samples) {
    509 			case 2:
    510 				num_format = V_008F14_IMG_FMASK_8_2_2;
    511 				break;
    512 			case 4:
    513 				num_format = V_008F14_IMG_FMASK_8_4_4;
    514 				break;
    515 			case 8:
    516 				num_format = V_008F14_IMG_FMASK_32_8_8;
    517 				break;
    518 			default:
    519 				unreachable("invalid nr_samples");
    520 			}
    521 		} else {
    522 			switch (image->info.samples) {
    523 			case 2:
    524 				fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2;
    525 				break;
    526 			case 4:
    527 				fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4;
    528 				break;
    529 			case 8:
    530 				fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8;
    531 				break;
    532 			default:
    533 				assert(0);
    534 				fmask_format = V_008F14_IMG_DATA_FORMAT_INVALID;
    535 			}
    536 			num_format = V_008F14_IMG_NUM_FORMAT_UINT;
    537 		}
    538 
    539 		fmask_state[0] = va >> 8;
    540 		fmask_state[0] |= image->fmask.tile_swizzle;
    541 		fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) |
    542 			S_008F14_DATA_FORMAT_GFX6(fmask_format) |
    543 			S_008F14_NUM_FORMAT_GFX6(num_format);
    544 		fmask_state[2] = S_008F18_WIDTH(width - 1) |
    545 			S_008F18_HEIGHT(height - 1);
    546 		fmask_state[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) |
    547 			S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
    548 			S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
    549 			S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
    550 			S_008F1C_TYPE(radv_tex_dim(image->type, view_type, 1, 0, false, false));
    551 		fmask_state[4] = 0;
    552 		fmask_state[5] = S_008F24_BASE_ARRAY(first_layer);
    553 		fmask_state[6] = 0;
    554 		fmask_state[7] = 0;
    555 
    556 		if (device->physical_device->rad_info.chip_class >= GFX9) {
    557 			fmask_state[3] |= S_008F1C_SW_MODE(image->surface.u.gfx9.fmask.swizzle_mode);
    558 			fmask_state[4] |= S_008F20_DEPTH(last_layer) |
    559 					  S_008F20_PITCH_GFX9(image->surface.u.gfx9.fmask.epitch);
    560 			fmask_state[5] |= S_008F24_META_PIPE_ALIGNED(image->surface.u.gfx9.cmask.pipe_aligned) |
    561 					  S_008F24_META_RB_ALIGNED(image->surface.u.gfx9.cmask.rb_aligned);
    562 		} else {
    563 			fmask_state[3] |= S_008F1C_TILING_INDEX(image->fmask.tile_mode_index);
    564 			fmask_state[4] |= S_008F20_DEPTH(depth - 1) |
    565 				S_008F20_PITCH_GFX6(image->fmask.pitch_in_pixels - 1);
    566 			fmask_state[5] |= S_008F24_LAST_ARRAY(last_layer);
    567 		}
    568 	} else if (fmask_state)
    569 		memset(fmask_state, 0, 8 * 4);
    570 }
    571 
    572 static void
    573 radv_query_opaque_metadata(struct radv_device *device,
    574 			   struct radv_image *image,
    575 			   struct radeon_bo_metadata *md)
    576 {
    577 	static const VkComponentMapping fixedmapping;
    578 	uint32_t desc[8], i;
    579 
    580 	/* Metadata image format format version 1:
    581 	 * [0] = 1 (metadata format identifier)
    582 	 * [1] = (VENDOR_ID << 16) | PCI_ID
    583 	 * [2:9] = image descriptor for the whole resource
    584 	 *         [2] is always 0, because the base address is cleared
    585 	 *         [9] is the DCC offset bits [39:8] from the beginning of
    586 	 *             the buffer
    587 	 * [10:10+LAST_LEVEL] = mipmap level offset bits [39:8] for each level
    588 	 */
    589 	md->metadata[0] = 1; /* metadata image format version 1 */
    590 
    591 	/* TILE_MODE_INDEX is ambiguous without a PCI ID. */
    592 	md->metadata[1] = si_get_bo_metadata_word1(device);
    593 
    594 
    595 	si_make_texture_descriptor(device, image, false,
    596 				   (VkImageViewType)image->type, image->vk_format,
    597 				   &fixedmapping, 0, image->info.levels - 1, 0,
    598 				   image->info.array_size,
    599 				   image->info.width, image->info.height,
    600 				   image->info.depth,
    601 				   desc, NULL);
    602 
    603 	si_set_mutable_tex_desc_fields(device, image, &image->surface.u.legacy.level[0], 0, 0,
    604 				       image->surface.blk_w, false, false, desc);
    605 
    606 	/* Clear the base address and set the relative DCC offset. */
    607 	desc[0] = 0;
    608 	desc[1] &= C_008F14_BASE_ADDRESS_HI;
    609 	desc[7] = image->dcc_offset >> 8;
    610 
    611 	/* Dwords [2:9] contain the image descriptor. */
    612 	memcpy(&md->metadata[2], desc, sizeof(desc));
    613 
    614 	/* Dwords [10:..] contain the mipmap level offsets. */
    615 	if (device->physical_device->rad_info.chip_class <= VI) {
    616 		for (i = 0; i <= image->info.levels - 1; i++)
    617 			md->metadata[10+i] = image->surface.u.legacy.level[i].offset >> 8;
    618 		md->size_metadata = (11 + image->info.levels - 1) * 4;
    619 	}
    620 }
    621 
    622 void
    623 radv_init_metadata(struct radv_device *device,
    624 		   struct radv_image *image,
    625 		   struct radeon_bo_metadata *metadata)
    626 {
    627 	struct radeon_surf *surface = &image->surface;
    628 
    629 	memset(metadata, 0, sizeof(*metadata));
    630 
    631 	if (device->physical_device->rad_info.chip_class >= GFX9) {
    632 		metadata->u.gfx9.swizzle_mode = surface->u.gfx9.surf.swizzle_mode;
    633 	} else {
    634 		metadata->u.legacy.microtile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D ?
    635 			RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
    636 		metadata->u.legacy.macrotile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D ?
    637 			RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
    638 		metadata->u.legacy.pipe_config = surface->u.legacy.pipe_config;
    639 		metadata->u.legacy.bankw = surface->u.legacy.bankw;
    640 		metadata->u.legacy.bankh = surface->u.legacy.bankh;
    641 		metadata->u.legacy.tile_split = surface->u.legacy.tile_split;
    642 		metadata->u.legacy.mtilea = surface->u.legacy.mtilea;
    643 		metadata->u.legacy.num_banks = surface->u.legacy.num_banks;
    644 		metadata->u.legacy.stride = surface->u.legacy.level[0].nblk_x * surface->bpe;
    645 		metadata->u.legacy.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
    646 	}
    647 	radv_query_opaque_metadata(device, image, metadata);
    648 }
    649 
    650 /* The number of samples can be specified independently of the texture. */
    651 static void
    652 radv_image_get_fmask_info(struct radv_device *device,
    653 			  struct radv_image *image,
    654 			  unsigned nr_samples,
    655 			  struct radv_fmask_info *out)
    656 {
    657 	/* FMASK is allocated like an ordinary texture. */
    658 	struct radeon_surf fmask = {};
    659 	struct ac_surf_info info = image->info;
    660 	memset(out, 0, sizeof(*out));
    661 
    662 	if (device->physical_device->rad_info.chip_class >= GFX9) {
    663 		out->alignment = image->surface.u.gfx9.fmask_alignment;
    664 		out->size = image->surface.u.gfx9.fmask_size;
    665 		return;
    666 	}
    667 
    668 	fmask.blk_w = image->surface.blk_w;
    669 	fmask.blk_h = image->surface.blk_h;
    670 	info.samples = 1;
    671 	fmask.flags = image->surface.flags | RADEON_SURF_FMASK;
    672 
    673 	if (!image->shareable)
    674 		info.surf_index = &device->fmask_mrt_offset_counter;
    675 
    676 	/* Force 2D tiling if it wasn't set. This may occur when creating
    677 	 * FMASK for MSAA resolve on R6xx. On R6xx, the single-sample
    678 	 * destination buffer must have an FMASK too. */
    679 	fmask.flags = RADEON_SURF_CLR(fmask.flags, MODE);
    680 	fmask.flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
    681 
    682 	switch (nr_samples) {
    683 	case 2:
    684 	case 4:
    685 		fmask.bpe = 1;
    686 		break;
    687 	case 8:
    688 		fmask.bpe = 4;
    689 		break;
    690 	default:
    691 		return;
    692 	}
    693 
    694 	device->ws->surface_init(device->ws, &info, &fmask);
    695 	assert(fmask.u.legacy.level[0].mode == RADEON_SURF_MODE_2D);
    696 
    697 	out->slice_tile_max = (fmask.u.legacy.level[0].nblk_x * fmask.u.legacy.level[0].nblk_y) / 64;
    698 	if (out->slice_tile_max)
    699 		out->slice_tile_max -= 1;
    700 
    701 	out->tile_mode_index = fmask.u.legacy.tiling_index[0];
    702 	out->pitch_in_pixels = fmask.u.legacy.level[0].nblk_x;
    703 	out->bank_height = fmask.u.legacy.bankh;
    704 	out->tile_swizzle = fmask.tile_swizzle;
    705 	out->alignment = MAX2(256, fmask.surf_alignment);
    706 	out->size = fmask.surf_size;
    707 
    708 	assert(!out->tile_swizzle || !image->shareable);
    709 }
    710 
    711 static void
    712 radv_image_alloc_fmask(struct radv_device *device,
    713 		       struct radv_image *image)
    714 {
    715 	radv_image_get_fmask_info(device, image, image->info.samples, &image->fmask);
    716 
    717 	image->fmask.offset = align64(image->size, image->fmask.alignment);
    718 	image->size = image->fmask.offset + image->fmask.size;
    719 	image->alignment = MAX2(image->alignment, image->fmask.alignment);
    720 }
    721 
    722 static void
    723 radv_image_get_cmask_info(struct radv_device *device,
    724 			  struct radv_image *image,
    725 			  struct radv_cmask_info *out)
    726 {
    727 	unsigned pipe_interleave_bytes = device->physical_device->rad_info.pipe_interleave_bytes;
    728 	unsigned num_pipes = device->physical_device->rad_info.num_tile_pipes;
    729 	unsigned cl_width, cl_height;
    730 
    731 	if (device->physical_device->rad_info.chip_class >= GFX9) {
    732 		out->alignment = image->surface.u.gfx9.cmask_alignment;
    733 		out->size = image->surface.u.gfx9.cmask_size;
    734 		return;
    735 	}
    736 
    737 	switch (num_pipes) {
    738 	case 2:
    739 		cl_width = 32;
    740 		cl_height = 16;
    741 		break;
    742 	case 4:
    743 		cl_width = 32;
    744 		cl_height = 32;
    745 		break;
    746 	case 8:
    747 		cl_width = 64;
    748 		cl_height = 32;
    749 		break;
    750 	case 16: /* Hawaii */
    751 		cl_width = 64;
    752 		cl_height = 64;
    753 		break;
    754 	default:
    755 		assert(0);
    756 		return;
    757 	}
    758 
    759 	unsigned base_align = num_pipes * pipe_interleave_bytes;
    760 
    761 	unsigned width = align(image->info.width, cl_width*8);
    762 	unsigned height = align(image->info.height, cl_height*8);
    763 	unsigned slice_elements = (width * height) / (8*8);
    764 
    765 	/* Each element of CMASK is a nibble. */
    766 	unsigned slice_bytes = slice_elements / 2;
    767 
    768 	out->slice_tile_max = (width * height) / (128*128);
    769 	if (out->slice_tile_max)
    770 		out->slice_tile_max -= 1;
    771 
    772 	out->alignment = MAX2(256, base_align);
    773 	out->size = (image->type == VK_IMAGE_TYPE_3D ? image->info.depth : image->info.array_size) *
    774 		    align(slice_bytes, base_align);
    775 }
    776 
    777 static void
    778 radv_image_alloc_cmask(struct radv_device *device,
    779 		       struct radv_image *image)
    780 {
    781 	uint32_t clear_value_size = 0;
    782 	radv_image_get_cmask_info(device, image, &image->cmask);
    783 
    784 	image->cmask.offset = align64(image->size, image->cmask.alignment);
    785 	/* + 8 for storing the clear values */
    786 	if (!image->clear_value_offset) {
    787 		image->clear_value_offset = image->cmask.offset + image->cmask.size;
    788 		clear_value_size = 8;
    789 	}
    790 	image->size = image->cmask.offset + image->cmask.size + clear_value_size;
    791 	image->alignment = MAX2(image->alignment, image->cmask.alignment);
    792 }
    793 
    794 static void
    795 radv_image_alloc_dcc(struct radv_image *image)
    796 {
    797 	image->dcc_offset = align64(image->size, image->surface.dcc_alignment);
    798 	/* + 16 for storing the clear values + dcc pred */
    799 	image->clear_value_offset = image->dcc_offset + image->surface.dcc_size;
    800 	image->dcc_pred_offset = image->clear_value_offset + 8;
    801 	image->size = image->dcc_offset + image->surface.dcc_size + 16;
    802 	image->alignment = MAX2(image->alignment, image->surface.dcc_alignment);
    803 }
    804 
    805 static void
    806 radv_image_alloc_htile(struct radv_image *image)
    807 {
    808 	image->htile_offset = align64(image->size, image->surface.htile_alignment);
    809 
    810 	/* + 8 for storing the clear values */
    811 	image->clear_value_offset = image->htile_offset + image->surface.htile_size;
    812 	image->size = image->clear_value_offset + 8;
    813 	image->alignment = align64(image->alignment, image->surface.htile_alignment);
    814 }
    815 
    816 static inline bool
    817 radv_image_can_enable_dcc_or_cmask(struct radv_image *image)
    818 {
    819 	if (image->info.samples <= 1 &&
    820 	    image->info.width * image->info.height <= 512 * 512) {
    821 		/* Do not enable CMASK or DCC for small surfaces where the cost
    822 		 * of the eliminate pass can be higher than the benefit of fast
    823 		 * clear. RadeonSI does this, but the image threshold is
    824 		 * different.
    825 		 */
    826 		return false;
    827 	}
    828 
    829 	return image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT &&
    830 	       (image->exclusive || image->queue_family_mask == 1);
    831 }
    832 
    833 static inline bool
    834 radv_image_can_enable_dcc(struct radv_image *image)
    835 {
    836 	return radv_image_can_enable_dcc_or_cmask(image) &&
    837 	       image->surface.dcc_size;
    838 }
    839 
    840 static inline bool
    841 radv_image_can_enable_cmask(struct radv_image *image)
    842 {
    843 	if (image->surface.bpe > 8 && image->info.samples == 1) {
    844 		/* Do not enable CMASK for non-MSAA images (fast color clear)
    845 		 * because 128 bit formats are not supported, but FMASK might
    846 		 * still be used.
    847 		 */
    848 		return false;
    849 	}
    850 
    851 	return radv_image_can_enable_dcc_or_cmask(image) &&
    852 	       image->info.levels == 1 &&
    853 	       image->info.depth == 1 &&
    854 	       !image->surface.is_linear;
    855 }
    856 
    857 static inline bool
    858 radv_image_can_enable_fmask(struct radv_image *image)
    859 {
    860 	return image->info.samples > 1 && vk_format_is_color(image->vk_format);
    861 }
    862 
    863 static inline bool
    864 radv_image_can_enable_htile(struct radv_image *image)
    865 {
    866 	return image->info.levels == 1 && vk_format_is_depth(image->vk_format);
    867 }
    868 
    869 VkResult
    870 radv_image_create(VkDevice _device,
    871 		  const struct radv_image_create_info *create_info,
    872 		  const VkAllocationCallbacks* alloc,
    873 		  VkImage *pImage)
    874 {
    875 	RADV_FROM_HANDLE(radv_device, device, _device);
    876 	const VkImageCreateInfo *pCreateInfo = create_info->vk_info;
    877 	struct radv_image *image = NULL;
    878 	assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO);
    879 
    880 	radv_assert(pCreateInfo->mipLevels > 0);
    881 	radv_assert(pCreateInfo->arrayLayers > 0);
    882 	radv_assert(pCreateInfo->samples > 0);
    883 	radv_assert(pCreateInfo->extent.width > 0);
    884 	radv_assert(pCreateInfo->extent.height > 0);
    885 	radv_assert(pCreateInfo->extent.depth > 0);
    886 
    887 	image = vk_zalloc2(&device->alloc, alloc, sizeof(*image), 8,
    888 			   VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
    889 	if (!image)
    890 		return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
    891 
    892 	image->type = pCreateInfo->imageType;
    893 	image->info.width = pCreateInfo->extent.width;
    894 	image->info.height = pCreateInfo->extent.height;
    895 	image->info.depth = pCreateInfo->extent.depth;
    896 	image->info.samples = pCreateInfo->samples;
    897 	image->info.array_size = pCreateInfo->arrayLayers;
    898 	image->info.levels = pCreateInfo->mipLevels;
    899 
    900 	image->vk_format = pCreateInfo->format;
    901 	image->tiling = pCreateInfo->tiling;
    902 	image->usage = pCreateInfo->usage;
    903 	image->flags = pCreateInfo->flags;
    904 
    905 	image->exclusive = pCreateInfo->sharingMode == VK_SHARING_MODE_EXCLUSIVE;
    906 	if (pCreateInfo->sharingMode == VK_SHARING_MODE_CONCURRENT) {
    907 		for (uint32_t i = 0; i < pCreateInfo->queueFamilyIndexCount; ++i)
    908 			if (pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_EXTERNAL_KHR)
    909 				image->queue_family_mask |= (1u << RADV_MAX_QUEUE_FAMILIES) - 1u;
    910 			else
    911 				image->queue_family_mask |= 1u << pCreateInfo->pQueueFamilyIndices[i];
    912 	}
    913 
    914 	image->shareable = vk_find_struct_const(pCreateInfo->pNext,
    915 	                                        EXTERNAL_MEMORY_IMAGE_CREATE_INFO_KHR) != NULL;
    916 	if (!vk_format_is_depth(pCreateInfo->format) && !create_info->scanout && !image->shareable) {
    917 		image->info.surf_index = &device->image_mrt_offset_counter;
    918 	}
    919 
    920 	radv_init_surface(device, &image->surface, create_info);
    921 
    922 	device->ws->surface_init(device->ws, &image->info, &image->surface);
    923 
    924 	image->size = image->surface.surf_size;
    925 	image->alignment = image->surface.surf_alignment;
    926 
    927 	if (!create_info->no_metadata_planes) {
    928 		/* Try to enable DCC first. */
    929 		if (radv_image_can_enable_dcc(image)) {
    930 			radv_image_alloc_dcc(image);
    931 		} else {
    932 			/* When DCC cannot be enabled, try CMASK. */
    933 			image->surface.dcc_size = 0;
    934 			if (radv_image_can_enable_cmask(image)) {
    935 				radv_image_alloc_cmask(device, image);
    936 			}
    937 		}
    938 
    939 		/* Try to enable FMASK for multisampled images. */
    940 		if (radv_image_can_enable_fmask(image)) {
    941 			radv_image_alloc_fmask(device, image);
    942 		} else {
    943 			/* Otherwise, try to enable HTILE for depth surfaces. */
    944 			if (radv_image_can_enable_htile(image) &&
    945 			    !(device->instance->debug_flags & RADV_DEBUG_NO_HIZ)) {
    946 				radv_image_alloc_htile(image);
    947 				image->tc_compatible_htile = image->surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE;
    948 			} else {
    949 				image->surface.htile_size = 0;
    950 			}
    951 		}
    952 	} else {
    953 		image->surface.dcc_size = 0;
    954 		image->surface.htile_size = 0;
    955 	}
    956 
    957 	if (pCreateInfo->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) {
    958 		image->alignment = MAX2(image->alignment, 4096);
    959 		image->size = align64(image->size, image->alignment);
    960 		image->offset = 0;
    961 
    962 		image->bo = device->ws->buffer_create(device->ws, image->size, image->alignment,
    963 		                                      0, RADEON_FLAG_VIRTUAL);
    964 		if (!image->bo) {
    965 			vk_free2(&device->alloc, alloc, image);
    966 			return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
    967 		}
    968 	}
    969 
    970 	*pImage = radv_image_to_handle(image);
    971 
    972 	return VK_SUCCESS;
    973 }
    974 
    975 static void
    976 radv_image_view_make_descriptor(struct radv_image_view *iview,
    977 				struct radv_device *device,
    978 				const VkComponentMapping *components,
    979 				bool is_storage_image)
    980 {
    981 	struct radv_image *image = iview->image;
    982 	bool is_stencil = iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT;
    983 	uint32_t blk_w;
    984 	uint32_t *descriptor;
    985 	uint32_t hw_level = 0;
    986 
    987 	if (is_storage_image) {
    988 		descriptor = iview->storage_descriptor;
    989 	} else {
    990 		descriptor = iview->descriptor;
    991 	}
    992 
    993 	assert(image->surface.blk_w % vk_format_get_blockwidth(image->vk_format) == 0);
    994 	blk_w = image->surface.blk_w / vk_format_get_blockwidth(image->vk_format) * vk_format_get_blockwidth(iview->vk_format);
    995 
    996 	if (device->physical_device->rad_info.chip_class >= GFX9)
    997 		hw_level = iview->base_mip;
    998 	si_make_texture_descriptor(device, image, is_storage_image,
    999 				   iview->type,
   1000 				   iview->vk_format,
   1001 				   components,
   1002 				   hw_level, hw_level + iview->level_count - 1,
   1003 				   iview->base_layer,
   1004 				   iview->base_layer + iview->layer_count - 1,
   1005 				   iview->extent.width,
   1006 				   iview->extent.height,
   1007 				   iview->extent.depth,
   1008 				   descriptor,
   1009 				   descriptor + 8);
   1010 
   1011 	const struct legacy_surf_level *base_level_info = NULL;
   1012 	if (device->physical_device->rad_info.chip_class <= GFX9) {
   1013 		if (is_stencil)
   1014 			base_level_info = &image->surface.u.legacy.stencil_level[iview->base_mip];
   1015 		else
   1016 			base_level_info = &image->surface.u.legacy.level[iview->base_mip];
   1017 	}
   1018 	si_set_mutable_tex_desc_fields(device, image,
   1019 				       base_level_info,
   1020 				       iview->base_mip,
   1021 				       iview->base_mip,
   1022 				       blk_w, is_stencil, is_storage_image, descriptor);
   1023 }
   1024 
   1025 void
   1026 radv_image_view_init(struct radv_image_view *iview,
   1027 		     struct radv_device *device,
   1028 		     const VkImageViewCreateInfo* pCreateInfo)
   1029 {
   1030 	RADV_FROM_HANDLE(radv_image, image, pCreateInfo->image);
   1031 	const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange;
   1032 
   1033 	switch (image->type) {
   1034 	case VK_IMAGE_TYPE_1D:
   1035 	case VK_IMAGE_TYPE_2D:
   1036 		assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1 <= image->info.array_size);
   1037 		break;
   1038 	case VK_IMAGE_TYPE_3D:
   1039 		assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1
   1040 		       <= radv_minify(image->info.depth, range->baseMipLevel));
   1041 		break;
   1042 	default:
   1043 		unreachable("bad VkImageType");
   1044 	}
   1045 	iview->image = image;
   1046 	iview->bo = image->bo;
   1047 	iview->type = pCreateInfo->viewType;
   1048 	iview->vk_format = pCreateInfo->format;
   1049 	iview->aspect_mask = pCreateInfo->subresourceRange.aspectMask;
   1050 
   1051 	if (iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) {
   1052 		iview->vk_format = vk_format_stencil_only(iview->vk_format);
   1053 	} else if (iview->aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) {
   1054 		iview->vk_format = vk_format_depth_only(iview->vk_format);
   1055 	}
   1056 
   1057 	if (device->physical_device->rad_info.chip_class >= GFX9) {
   1058 		iview->extent = (VkExtent3D) {
   1059 			.width = image->info.width,
   1060 			.height = image->info.height,
   1061 			.depth = image->info.depth,
   1062 		};
   1063 	} else {
   1064 		iview->extent = (VkExtent3D) {
   1065 			.width  = radv_minify(image->info.width , range->baseMipLevel),
   1066 			.height = radv_minify(image->info.height, range->baseMipLevel),
   1067 			.depth  = radv_minify(image->info.depth , range->baseMipLevel),
   1068 		};
   1069 	}
   1070 
   1071 	if (iview->vk_format != image->vk_format) {
   1072 		unsigned view_bw = vk_format_get_blockwidth(iview->vk_format);
   1073 		unsigned view_bh = vk_format_get_blockheight(iview->vk_format);
   1074 		unsigned img_bw = vk_format_get_blockwidth(image->vk_format);
   1075 		unsigned img_bh = vk_format_get_blockheight(image->vk_format);
   1076 
   1077 		iview->extent.width = round_up_u32(iview->extent.width * view_bw, img_bw);
   1078 		iview->extent.height = round_up_u32(iview->extent.height * view_bh, img_bh);
   1079 
   1080 		/* Comment ported from amdvlk -
   1081 		 * If we have the following image:
   1082 		 *              Uncompressed pixels   Compressed block sizes (4x4)
   1083 		 *      mip0:       22 x 22                   6 x 6
   1084 		 *      mip1:       11 x 11                   3 x 3
   1085 		 *      mip2:        5 x  5                   2 x 2
   1086 		 *      mip3:        2 x  2                   1 x 1
   1087 		 *      mip4:        1 x  1                   1 x 1
   1088 		 *
   1089 		 * On GFX9 the descriptor is always programmed with the WIDTH and HEIGHT of the base level and the HW is
   1090 		 * calculating the degradation of the block sizes down the mip-chain as follows (straight-up
   1091 		 * divide-by-two integer math):
   1092 		 *      mip0:  6x6
   1093 		 *      mip1:  3x3
   1094 		 *      mip2:  1x1
   1095 		 *      mip3:  1x1
   1096 		 *
   1097 		 * This means that mip2 will be missing texels.
   1098 		 *
   1099 		 * Fix this by calculating the base mip's width and height, then convert that, and round it
   1100 		 * back up to get the level 0 size.
   1101 		 * Clamp the converted size between the original values, and next power of two, which
   1102 		 * means we don't oversize the image.
   1103 		 */
   1104 		 if (device->physical_device->rad_info.chip_class >= GFX9 &&
   1105 		     vk_format_is_compressed(image->vk_format) &&
   1106 		     !vk_format_is_compressed(iview->vk_format)) {
   1107 			 unsigned rounded_img_w = util_next_power_of_two(iview->extent.width);
   1108 			 unsigned rounded_img_h = util_next_power_of_two(iview->extent.height);
   1109 			 unsigned lvl_width  = radv_minify(image->info.width , range->baseMipLevel);
   1110 			 unsigned lvl_height = radv_minify(image->info.height, range->baseMipLevel);
   1111 
   1112 			 lvl_width = round_up_u32(lvl_width * view_bw, img_bw);
   1113 			 lvl_height = round_up_u32(lvl_height * view_bh, img_bh);
   1114 
   1115 			 lvl_width <<= range->baseMipLevel;
   1116 			 lvl_height <<= range->baseMipLevel;
   1117 
   1118 			 iview->extent.width = CLAMP(lvl_width, iview->extent.width, rounded_img_w);
   1119 			 iview->extent.height = CLAMP(lvl_height, iview->extent.height, rounded_img_h);
   1120 		 }
   1121 	}
   1122 
   1123 	iview->base_layer = range->baseArrayLayer;
   1124 	iview->layer_count = radv_get_layerCount(image, range);
   1125 	iview->base_mip = range->baseMipLevel;
   1126 	iview->level_count = radv_get_levelCount(image, range);
   1127 
   1128 	radv_image_view_make_descriptor(iview, device, &pCreateInfo->components, false);
   1129 	radv_image_view_make_descriptor(iview, device, &pCreateInfo->components, true);
   1130 }
   1131 
   1132 bool radv_layout_has_htile(const struct radv_image *image,
   1133                            VkImageLayout layout,
   1134                            unsigned queue_mask)
   1135 {
   1136 	if (image->surface.htile_size && image->tc_compatible_htile)
   1137 		return layout != VK_IMAGE_LAYOUT_GENERAL;
   1138 
   1139 	return image->surface.htile_size &&
   1140 	       (layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL ||
   1141 	        layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) &&
   1142 	       queue_mask == (1u << RADV_QUEUE_GENERAL);
   1143 }
   1144 
   1145 bool radv_layout_is_htile_compressed(const struct radv_image *image,
   1146                                      VkImageLayout layout,
   1147                                      unsigned queue_mask)
   1148 {
   1149 	if (image->surface.htile_size && image->tc_compatible_htile)
   1150 		return layout != VK_IMAGE_LAYOUT_GENERAL;
   1151 
   1152 	return image->surface.htile_size &&
   1153 	       (layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL ||
   1154 	        layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) &&
   1155 	       queue_mask == (1u << RADV_QUEUE_GENERAL);
   1156 }
   1157 
   1158 bool radv_layout_can_fast_clear(const struct radv_image *image,
   1159 			        VkImageLayout layout,
   1160 			        unsigned queue_mask)
   1161 {
   1162 	return layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL &&
   1163 		queue_mask == (1u << RADV_QUEUE_GENERAL);
   1164 }
   1165 
   1166 bool radv_layout_dcc_compressed(const struct radv_image *image,
   1167 			        VkImageLayout layout,
   1168 			        unsigned queue_mask)
   1169 {
   1170 	/* Don't compress compute transfer dst, as image stores are not supported. */
   1171 	if (layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL &&
   1172 	    (queue_mask & (1u << RADV_QUEUE_COMPUTE)))
   1173 		return false;
   1174 
   1175 	return image->surface.dcc_size && layout != VK_IMAGE_LAYOUT_GENERAL;
   1176 }
   1177 
   1178 
   1179 unsigned radv_image_queue_family_mask(const struct radv_image *image, uint32_t family, uint32_t queue_family)
   1180 {
   1181 	if (!image->exclusive)
   1182 		return image->queue_family_mask;
   1183 	if (family == VK_QUEUE_FAMILY_EXTERNAL_KHR)
   1184 		return (1u << RADV_MAX_QUEUE_FAMILIES) - 1u;
   1185 	if (family == VK_QUEUE_FAMILY_IGNORED)
   1186 		return 1u << queue_family;
   1187 	return 1u << family;
   1188 }
   1189 
   1190 VkResult
   1191 radv_CreateImage(VkDevice device,
   1192 		 const VkImageCreateInfo *pCreateInfo,
   1193 		 const VkAllocationCallbacks *pAllocator,
   1194 		 VkImage *pImage)
   1195 {
   1196 #ifdef ANDROID
   1197 	const VkNativeBufferANDROID *gralloc_info =
   1198 		vk_find_struct_const(pCreateInfo->pNext, NATIVE_BUFFER_ANDROID);
   1199 
   1200 	if (gralloc_info)
   1201 		return radv_image_from_gralloc(device, pCreateInfo, gralloc_info,
   1202 		                              pAllocator, pImage);
   1203 #endif
   1204 
   1205 	const struct wsi_image_create_info *wsi_info =
   1206 		vk_find_struct_const(pCreateInfo->pNext, WSI_IMAGE_CREATE_INFO_MESA);
   1207 	bool scanout = wsi_info && wsi_info->scanout;
   1208 
   1209 	return radv_image_create(device,
   1210 				 &(struct radv_image_create_info) {
   1211 					 .vk_info = pCreateInfo,
   1212 					 .scanout = scanout,
   1213 				 },
   1214 				 pAllocator,
   1215 				 pImage);
   1216 }
   1217 
   1218 void
   1219 radv_DestroyImage(VkDevice _device, VkImage _image,
   1220 		  const VkAllocationCallbacks *pAllocator)
   1221 {
   1222 	RADV_FROM_HANDLE(radv_device, device, _device);
   1223 	RADV_FROM_HANDLE(radv_image, image, _image);
   1224 
   1225 	if (!image)
   1226 		return;
   1227 
   1228 	if (image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT)
   1229 		device->ws->buffer_destroy(image->bo);
   1230 
   1231 	if (image->owned_memory != VK_NULL_HANDLE)
   1232 		radv_FreeMemory(_device, image->owned_memory, pAllocator);
   1233 
   1234 	vk_free2(&device->alloc, pAllocator, image);
   1235 }
   1236 
   1237 void radv_GetImageSubresourceLayout(
   1238 	VkDevice                                    _device,
   1239 	VkImage                                     _image,
   1240 	const VkImageSubresource*                   pSubresource,
   1241 	VkSubresourceLayout*                        pLayout)
   1242 {
   1243 	RADV_FROM_HANDLE(radv_image, image, _image);
   1244 	RADV_FROM_HANDLE(radv_device, device, _device);
   1245 	int level = pSubresource->mipLevel;
   1246 	int layer = pSubresource->arrayLayer;
   1247 	struct radeon_surf *surface = &image->surface;
   1248 
   1249 	if (device->physical_device->rad_info.chip_class >= GFX9) {
   1250 		pLayout->offset = surface->u.gfx9.offset[level] + surface->u.gfx9.surf_slice_size * layer;
   1251 		pLayout->rowPitch = surface->u.gfx9.surf_pitch * surface->bpe;
   1252 		pLayout->arrayPitch = surface->u.gfx9.surf_slice_size;
   1253 		pLayout->depthPitch = surface->u.gfx9.surf_slice_size;
   1254 		pLayout->size = surface->u.gfx9.surf_slice_size;
   1255 		if (image->type == VK_IMAGE_TYPE_3D)
   1256 			pLayout->size *= u_minify(image->info.depth, level);
   1257 	} else {
   1258 		pLayout->offset = surface->u.legacy.level[level].offset + (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4 * layer;
   1259 		pLayout->rowPitch = surface->u.legacy.level[level].nblk_x * surface->bpe;
   1260 		pLayout->arrayPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
   1261 		pLayout->depthPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
   1262 		pLayout->size = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
   1263 		if (image->type == VK_IMAGE_TYPE_3D)
   1264 			pLayout->size *= u_minify(image->info.depth, level);
   1265 	}
   1266 }
   1267 
   1268 
   1269 VkResult
   1270 radv_CreateImageView(VkDevice _device,
   1271 		     const VkImageViewCreateInfo *pCreateInfo,
   1272 		     const VkAllocationCallbacks *pAllocator,
   1273 		     VkImageView *pView)
   1274 {
   1275 	RADV_FROM_HANDLE(radv_device, device, _device);
   1276 	struct radv_image_view *view;
   1277 
   1278 	view = vk_alloc2(&device->alloc, pAllocator, sizeof(*view), 8,
   1279 			   VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
   1280 	if (view == NULL)
   1281 		return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
   1282 
   1283 	radv_image_view_init(view, device, pCreateInfo);
   1284 
   1285 	*pView = radv_image_view_to_handle(view);
   1286 
   1287 	return VK_SUCCESS;
   1288 }
   1289 
   1290 void
   1291 radv_DestroyImageView(VkDevice _device, VkImageView _iview,
   1292 		      const VkAllocationCallbacks *pAllocator)
   1293 {
   1294 	RADV_FROM_HANDLE(radv_device, device, _device);
   1295 	RADV_FROM_HANDLE(radv_image_view, iview, _iview);
   1296 
   1297 	if (!iview)
   1298 		return;
   1299 	vk_free2(&device->alloc, pAllocator, iview);
   1300 }
   1301 
   1302 void radv_buffer_view_init(struct radv_buffer_view *view,
   1303 			   struct radv_device *device,
   1304 			   const VkBufferViewCreateInfo* pCreateInfo)
   1305 {
   1306 	RADV_FROM_HANDLE(radv_buffer, buffer, pCreateInfo->buffer);
   1307 
   1308 	view->bo = buffer->bo;
   1309 	view->range = pCreateInfo->range == VK_WHOLE_SIZE ?
   1310 		buffer->size - pCreateInfo->offset : pCreateInfo->range;
   1311 	view->vk_format = pCreateInfo->format;
   1312 
   1313 	radv_make_buffer_descriptor(device, buffer, view->vk_format,
   1314 				    pCreateInfo->offset, view->range, view->state);
   1315 }
   1316 
   1317 VkResult
   1318 radv_CreateBufferView(VkDevice _device,
   1319 		      const VkBufferViewCreateInfo *pCreateInfo,
   1320 		      const VkAllocationCallbacks *pAllocator,
   1321 		      VkBufferView *pView)
   1322 {
   1323 	RADV_FROM_HANDLE(radv_device, device, _device);
   1324 	struct radv_buffer_view *view;
   1325 
   1326 	view = vk_alloc2(&device->alloc, pAllocator, sizeof(*view), 8,
   1327 			   VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
   1328 	if (!view)
   1329 		return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
   1330 
   1331 	radv_buffer_view_init(view, device, pCreateInfo);
   1332 
   1333 	*pView = radv_buffer_view_to_handle(view);
   1334 
   1335 	return VK_SUCCESS;
   1336 }
   1337 
   1338 void
   1339 radv_DestroyBufferView(VkDevice _device, VkBufferView bufferView,
   1340 		       const VkAllocationCallbacks *pAllocator)
   1341 {
   1342 	RADV_FROM_HANDLE(radv_device, device, _device);
   1343 	RADV_FROM_HANDLE(radv_buffer_view, view, bufferView);
   1344 
   1345 	if (!view)
   1346 		return;
   1347 
   1348 	vk_free2(&device->alloc, pAllocator, view);
   1349 }
   1350