Home | History | Annotate | Download | only in vulkan
      1 /*
      2  * Copyright  2016 Red Hat.
      3  * Copyright  2016 Bas Nieuwenhuizen
      4  *
      5  * based in part on anv driver which is:
      6  * Copyright  2015 Intel Corporation
      7  *
      8  * Permission is hereby granted, free of charge, to any person obtaining a
      9  * copy of this software and associated documentation files (the "Software"),
     10  * to deal in the Software without restriction, including without limitation
     11  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     12  * and/or sell copies of the Software, and to permit persons to whom the
     13  * Software is furnished to do so, subject to the following conditions:
     14  *
     15  * The above copyright notice and this permission notice (including the next
     16  * paragraph) shall be included in all copies or substantial portions of the
     17  * Software.
     18  *
     19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     20  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     21  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     22  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     23  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     24  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     25  * IN THE SOFTWARE.
     26  */
     27 
     28 #include "util/mesa-sha1.h"
     29 #include "util/u_atomic.h"
     30 #include "radv_debug.h"
     31 #include "radv_private.h"
     32 #include "radv_shader.h"
     33 #include "nir/nir.h"
     34 #include "nir/nir_builder.h"
     35 #include "spirv/nir_spirv.h"
     36 #include "vk_util.h"
     37 
     38 #include <llvm-c/Core.h>
     39 #include <llvm-c/TargetMachine.h>
     40 
     41 #include "sid.h"
     42 #include "gfx9d.h"
     43 #include "ac_binary.h"
     44 #include "ac_llvm_util.h"
     45 #include "ac_nir_to_llvm.h"
     46 #include "vk_format.h"
     47 #include "util/debug.h"
     48 #include "ac_exp_param.h"
     49 #include "ac_shader_util.h"
     50 
     51 static void
     52 radv_pipeline_destroy(struct radv_device *device,
     53                       struct radv_pipeline *pipeline,
     54                       const VkAllocationCallbacks* allocator)
     55 {
     56 	for (unsigned i = 0; i < MESA_SHADER_STAGES; ++i)
     57 		if (pipeline->shaders[i])
     58 			radv_shader_variant_destroy(device, pipeline->shaders[i]);
     59 
     60 	if (pipeline->gs_copy_shader)
     61 		radv_shader_variant_destroy(device, pipeline->gs_copy_shader);
     62 
     63 	vk_free2(&device->alloc, allocator, pipeline);
     64 }
     65 
     66 void radv_DestroyPipeline(
     67 	VkDevice                                    _device,
     68 	VkPipeline                                  _pipeline,
     69 	const VkAllocationCallbacks*                pAllocator)
     70 {
     71 	RADV_FROM_HANDLE(radv_device, device, _device);
     72 	RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
     73 
     74 	if (!_pipeline)
     75 		return;
     76 
     77 	radv_pipeline_destroy(device, pipeline, pAllocator);
     78 }
     79 
     80 static void radv_dump_pipeline_stats(struct radv_device *device, struct radv_pipeline *pipeline)
     81 {
     82 	int i;
     83 
     84 	for (i = 0; i < MESA_SHADER_STAGES; i++) {
     85 		if (!pipeline->shaders[i])
     86 			continue;
     87 
     88 		radv_shader_dump_stats(device, pipeline->shaders[i], i, stderr);
     89 	}
     90 }
     91 
     92 static uint32_t get_hash_flags(struct radv_device *device)
     93 {
     94 	uint32_t hash_flags = 0;
     95 
     96 	if (device->instance->debug_flags & RADV_DEBUG_UNSAFE_MATH)
     97 		hash_flags |= RADV_HASH_SHADER_UNSAFE_MATH;
     98 	if (device->instance->perftest_flags & RADV_PERFTEST_SISCHED)
     99 		hash_flags |= RADV_HASH_SHADER_SISCHED;
    100 	return hash_flags;
    101 }
    102 
    103 static VkResult
    104 radv_pipeline_scratch_init(struct radv_device *device,
    105                            struct radv_pipeline *pipeline)
    106 {
    107 	unsigned scratch_bytes_per_wave = 0;
    108 	unsigned max_waves = 0;
    109 	unsigned min_waves = 1;
    110 
    111 	for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
    112 		if (pipeline->shaders[i]) {
    113 			unsigned max_stage_waves = device->scratch_waves;
    114 
    115 			scratch_bytes_per_wave = MAX2(scratch_bytes_per_wave,
    116 			                              pipeline->shaders[i]->config.scratch_bytes_per_wave);
    117 
    118 			max_stage_waves = MIN2(max_stage_waves,
    119 			          4 * device->physical_device->rad_info.num_good_compute_units *
    120 			          (256 / pipeline->shaders[i]->config.num_vgprs));
    121 			max_waves = MAX2(max_waves, max_stage_waves);
    122 		}
    123 	}
    124 
    125 	if (pipeline->shaders[MESA_SHADER_COMPUTE]) {
    126 		unsigned group_size = pipeline->shaders[MESA_SHADER_COMPUTE]->info.cs.block_size[0] *
    127 		                      pipeline->shaders[MESA_SHADER_COMPUTE]->info.cs.block_size[1] *
    128 		                      pipeline->shaders[MESA_SHADER_COMPUTE]->info.cs.block_size[2];
    129 		min_waves = MAX2(min_waves, round_up_u32(group_size, 64));
    130 	}
    131 
    132 	if (scratch_bytes_per_wave)
    133 		max_waves = MIN2(max_waves, 0xffffffffu / scratch_bytes_per_wave);
    134 
    135 	if (scratch_bytes_per_wave && max_waves < min_waves) {
    136 		/* Not really true at this moment, but will be true on first
    137 		 * execution. Avoid having hanging shaders. */
    138 		return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
    139 	}
    140 	pipeline->scratch_bytes_per_wave = scratch_bytes_per_wave;
    141 	pipeline->max_waves = max_waves;
    142 	return VK_SUCCESS;
    143 }
    144 
    145 static uint32_t si_translate_blend_function(VkBlendOp op)
    146 {
    147 	switch (op) {
    148 	case VK_BLEND_OP_ADD:
    149 		return V_028780_COMB_DST_PLUS_SRC;
    150 	case VK_BLEND_OP_SUBTRACT:
    151 		return V_028780_COMB_SRC_MINUS_DST;
    152 	case VK_BLEND_OP_REVERSE_SUBTRACT:
    153 		return V_028780_COMB_DST_MINUS_SRC;
    154 	case VK_BLEND_OP_MIN:
    155 		return V_028780_COMB_MIN_DST_SRC;
    156 	case VK_BLEND_OP_MAX:
    157 		return V_028780_COMB_MAX_DST_SRC;
    158 	default:
    159 		return 0;
    160 	}
    161 }
    162 
    163 static uint32_t si_translate_blend_factor(VkBlendFactor factor)
    164 {
    165 	switch (factor) {
    166 	case VK_BLEND_FACTOR_ZERO:
    167 		return V_028780_BLEND_ZERO;
    168 	case VK_BLEND_FACTOR_ONE:
    169 		return V_028780_BLEND_ONE;
    170 	case VK_BLEND_FACTOR_SRC_COLOR:
    171 		return V_028780_BLEND_SRC_COLOR;
    172 	case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
    173 		return V_028780_BLEND_ONE_MINUS_SRC_COLOR;
    174 	case VK_BLEND_FACTOR_DST_COLOR:
    175 		return V_028780_BLEND_DST_COLOR;
    176 	case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
    177 		return V_028780_BLEND_ONE_MINUS_DST_COLOR;
    178 	case VK_BLEND_FACTOR_SRC_ALPHA:
    179 		return V_028780_BLEND_SRC_ALPHA;
    180 	case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
    181 		return V_028780_BLEND_ONE_MINUS_SRC_ALPHA;
    182 	case VK_BLEND_FACTOR_DST_ALPHA:
    183 		return V_028780_BLEND_DST_ALPHA;
    184 	case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
    185 		return V_028780_BLEND_ONE_MINUS_DST_ALPHA;
    186 	case VK_BLEND_FACTOR_CONSTANT_COLOR:
    187 		return V_028780_BLEND_CONSTANT_COLOR;
    188 	case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
    189 		return V_028780_BLEND_ONE_MINUS_CONSTANT_COLOR;
    190 	case VK_BLEND_FACTOR_CONSTANT_ALPHA:
    191 		return V_028780_BLEND_CONSTANT_ALPHA;
    192 	case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
    193 		return V_028780_BLEND_ONE_MINUS_CONSTANT_ALPHA;
    194 	case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
    195 		return V_028780_BLEND_SRC_ALPHA_SATURATE;
    196 	case VK_BLEND_FACTOR_SRC1_COLOR:
    197 		return V_028780_BLEND_SRC1_COLOR;
    198 	case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR:
    199 		return V_028780_BLEND_INV_SRC1_COLOR;
    200 	case VK_BLEND_FACTOR_SRC1_ALPHA:
    201 		return V_028780_BLEND_SRC1_ALPHA;
    202 	case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA:
    203 		return V_028780_BLEND_INV_SRC1_ALPHA;
    204 	default:
    205 		return 0;
    206 	}
    207 }
    208 
    209 static uint32_t si_translate_blend_opt_function(VkBlendOp op)
    210 {
    211 	switch (op) {
    212 	case VK_BLEND_OP_ADD:
    213 		return V_028760_OPT_COMB_ADD;
    214 	case VK_BLEND_OP_SUBTRACT:
    215 		return V_028760_OPT_COMB_SUBTRACT;
    216 	case VK_BLEND_OP_REVERSE_SUBTRACT:
    217 		return V_028760_OPT_COMB_REVSUBTRACT;
    218 	case VK_BLEND_OP_MIN:
    219 		return V_028760_OPT_COMB_MIN;
    220 	case VK_BLEND_OP_MAX:
    221 		return V_028760_OPT_COMB_MAX;
    222 	default:
    223 		return V_028760_OPT_COMB_BLEND_DISABLED;
    224 	}
    225 }
    226 
    227 static uint32_t si_translate_blend_opt_factor(VkBlendFactor factor, bool is_alpha)
    228 {
    229 	switch (factor) {
    230 	case VK_BLEND_FACTOR_ZERO:
    231 		return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_ALL;
    232 	case VK_BLEND_FACTOR_ONE:
    233 		return V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE;
    234 	case VK_BLEND_FACTOR_SRC_COLOR:
    235 		return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0
    236 				: V_028760_BLEND_OPT_PRESERVE_C1_IGNORE_C0;
    237 	case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
    238 		return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1
    239 				: V_028760_BLEND_OPT_PRESERVE_C0_IGNORE_C1;
    240 	case VK_BLEND_FACTOR_SRC_ALPHA:
    241 		return V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0;
    242 	case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
    243 		return V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1;
    244 	case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
    245 		return is_alpha ? V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE
    246 				: V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0;
    247 	default:
    248 		return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;
    249 	}
    250 }
    251 
    252 /**
    253  * Get rid of DST in the blend factors by commuting the operands:
    254  *    func(src * DST, dst * 0) ---> func(src * 0, dst * SRC)
    255  */
    256 static void si_blend_remove_dst(unsigned *func, unsigned *src_factor,
    257 				unsigned *dst_factor, unsigned expected_dst,
    258 				unsigned replacement_src)
    259 {
    260 	if (*src_factor == expected_dst &&
    261 	    *dst_factor == VK_BLEND_FACTOR_ZERO) {
    262 		*src_factor = VK_BLEND_FACTOR_ZERO;
    263 		*dst_factor = replacement_src;
    264 
    265 		/* Commuting the operands requires reversing subtractions. */
    266 		if (*func == VK_BLEND_OP_SUBTRACT)
    267 			*func = VK_BLEND_OP_REVERSE_SUBTRACT;
    268 		else if (*func == VK_BLEND_OP_REVERSE_SUBTRACT)
    269 			*func = VK_BLEND_OP_SUBTRACT;
    270 	}
    271 }
    272 
    273 static bool si_blend_factor_uses_dst(unsigned factor)
    274 {
    275 	return factor == VK_BLEND_FACTOR_DST_COLOR ||
    276 		factor == VK_BLEND_FACTOR_DST_ALPHA ||
    277 		factor == VK_BLEND_FACTOR_SRC_ALPHA_SATURATE ||
    278 		factor == VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA ||
    279 		factor == VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR;
    280 }
    281 
    282 static bool is_dual_src(VkBlendFactor factor)
    283 {
    284 	switch (factor) {
    285 	case VK_BLEND_FACTOR_SRC1_COLOR:
    286 	case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR:
    287 	case VK_BLEND_FACTOR_SRC1_ALPHA:
    288 	case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA:
    289 		return true;
    290 	default:
    291 		return false;
    292 	}
    293 }
    294 
    295 static unsigned si_choose_spi_color_format(VkFormat vk_format,
    296 					    bool blend_enable,
    297 					    bool blend_need_alpha)
    298 {
    299 	const struct vk_format_description *desc = vk_format_description(vk_format);
    300 	unsigned format, ntype, swap;
    301 
    302 	/* Alpha is needed for alpha-to-coverage.
    303 	 * Blending may be with or without alpha.
    304 	 */
    305 	unsigned normal = 0; /* most optimal, may not support blending or export alpha */
    306 	unsigned alpha = 0; /* exports alpha, but may not support blending */
    307 	unsigned blend = 0; /* supports blending, but may not export alpha */
    308 	unsigned blend_alpha = 0; /* least optimal, supports blending and exports alpha */
    309 
    310 	format = radv_translate_colorformat(vk_format);
    311 	ntype = radv_translate_color_numformat(vk_format, desc,
    312 					       vk_format_get_first_non_void_channel(vk_format));
    313 	swap = radv_translate_colorswap(vk_format, false);
    314 
    315 	/* Choose the SPI color formats. These are required values for Stoney/RB+.
    316 	 * Other chips have multiple choices, though they are not necessarily better.
    317 	 */
    318 	switch (format) {
    319 	case V_028C70_COLOR_5_6_5:
    320 	case V_028C70_COLOR_1_5_5_5:
    321 	case V_028C70_COLOR_5_5_5_1:
    322 	case V_028C70_COLOR_4_4_4_4:
    323 	case V_028C70_COLOR_10_11_11:
    324 	case V_028C70_COLOR_11_11_10:
    325 	case V_028C70_COLOR_8:
    326 	case V_028C70_COLOR_8_8:
    327 	case V_028C70_COLOR_8_8_8_8:
    328 	case V_028C70_COLOR_10_10_10_2:
    329 	case V_028C70_COLOR_2_10_10_10:
    330 		if (ntype == V_028C70_NUMBER_UINT)
    331 			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_UINT16_ABGR;
    332 		else if (ntype == V_028C70_NUMBER_SINT)
    333 			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_SINT16_ABGR;
    334 		else
    335 			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_FP16_ABGR;
    336 		break;
    337 
    338 	case V_028C70_COLOR_16:
    339 	case V_028C70_COLOR_16_16:
    340 	case V_028C70_COLOR_16_16_16_16:
    341 		if (ntype == V_028C70_NUMBER_UNORM ||
    342 		    ntype == V_028C70_NUMBER_SNORM) {
    343 			/* UNORM16 and SNORM16 don't support blending */
    344 			if (ntype == V_028C70_NUMBER_UNORM)
    345 				normal = alpha = V_028714_SPI_SHADER_UNORM16_ABGR;
    346 			else
    347 				normal = alpha = V_028714_SPI_SHADER_SNORM16_ABGR;
    348 
    349 			/* Use 32 bits per channel for blending. */
    350 			if (format == V_028C70_COLOR_16) {
    351 				if (swap == V_028C70_SWAP_STD) { /* R */
    352 					blend = V_028714_SPI_SHADER_32_R;
    353 					blend_alpha = V_028714_SPI_SHADER_32_AR;
    354 				} else if (swap == V_028C70_SWAP_ALT_REV) /* A */
    355 					blend = blend_alpha = V_028714_SPI_SHADER_32_AR;
    356 				else
    357 					assert(0);
    358 			} else if (format == V_028C70_COLOR_16_16) {
    359 				if (swap == V_028C70_SWAP_STD) { /* RG */
    360 					blend = V_028714_SPI_SHADER_32_GR;
    361 					blend_alpha = V_028714_SPI_SHADER_32_ABGR;
    362 				} else if (swap == V_028C70_SWAP_ALT) /* RA */
    363 					blend = blend_alpha = V_028714_SPI_SHADER_32_AR;
    364 				else
    365 					assert(0);
    366 			} else /* 16_16_16_16 */
    367 				blend = blend_alpha = V_028714_SPI_SHADER_32_ABGR;
    368 		} else if (ntype == V_028C70_NUMBER_UINT)
    369 			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_UINT16_ABGR;
    370 		else if (ntype == V_028C70_NUMBER_SINT)
    371 			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_SINT16_ABGR;
    372 		else if (ntype == V_028C70_NUMBER_FLOAT)
    373 			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_FP16_ABGR;
    374 		else
    375 			assert(0);
    376 		break;
    377 
    378 	case V_028C70_COLOR_32:
    379 		if (swap == V_028C70_SWAP_STD) { /* R */
    380 			blend = normal = V_028714_SPI_SHADER_32_R;
    381 			alpha = blend_alpha = V_028714_SPI_SHADER_32_AR;
    382 		} else if (swap == V_028C70_SWAP_ALT_REV) /* A */
    383 			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_AR;
    384 		else
    385 			assert(0);
    386 		break;
    387 
    388 	case V_028C70_COLOR_32_32:
    389 		if (swap == V_028C70_SWAP_STD) { /* RG */
    390 			blend = normal = V_028714_SPI_SHADER_32_GR;
    391 			alpha = blend_alpha = V_028714_SPI_SHADER_32_ABGR;
    392 		} else if (swap == V_028C70_SWAP_ALT) /* RA */
    393 			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_AR;
    394 		else
    395 			assert(0);
    396 		break;
    397 
    398 	case V_028C70_COLOR_32_32_32_32:
    399 	case V_028C70_COLOR_8_24:
    400 	case V_028C70_COLOR_24_8:
    401 	case V_028C70_COLOR_X24_8_32_FLOAT:
    402 		alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_ABGR;
    403 		break;
    404 
    405 	default:
    406 		unreachable("unhandled blend format");
    407 	}
    408 
    409 	if (blend_enable && blend_need_alpha)
    410 		return blend_alpha;
    411 	else if(blend_need_alpha)
    412 		return alpha;
    413 	else if(blend_enable)
    414 		return blend;
    415 	else
    416 		return normal;
    417 }
    418 
    419 static void
    420 radv_pipeline_compute_spi_color_formats(struct radv_pipeline *pipeline,
    421 					const VkGraphicsPipelineCreateInfo *pCreateInfo,
    422 					uint32_t blend_enable,
    423 					uint32_t blend_need_alpha,
    424 					bool single_cb_enable,
    425 					bool blend_mrt0_is_dual_src)
    426 {
    427 	RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
    428 	struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
    429 	struct radv_blend_state *blend = &pipeline->graphics.blend;
    430 	unsigned col_format = 0;
    431 
    432 	for (unsigned i = 0; i < (single_cb_enable ? 1 : subpass->color_count); ++i) {
    433 		unsigned cf;
    434 
    435 		if (subpass->color_attachments[i].attachment == VK_ATTACHMENT_UNUSED) {
    436 			cf = V_028714_SPI_SHADER_ZERO;
    437 		} else {
    438 			struct radv_render_pass_attachment *attachment = pass->attachments + subpass->color_attachments[i].attachment;
    439 
    440 			cf = si_choose_spi_color_format(attachment->format,
    441 			                                blend_enable & (1 << i),
    442 			                                blend_need_alpha & (1 << i));
    443 		}
    444 
    445 		col_format |= cf << (4 * i);
    446 	}
    447 
    448 	blend->cb_shader_mask = ac_get_cb_shader_mask(col_format);
    449 
    450 	if (blend_mrt0_is_dual_src)
    451 		col_format |= (col_format & 0xf) << 4;
    452 	blend->spi_shader_col_format = col_format;
    453 }
    454 
    455 static bool
    456 format_is_int8(VkFormat format)
    457 {
    458 	const struct vk_format_description *desc = vk_format_description(format);
    459 	int channel =  vk_format_get_first_non_void_channel(format);
    460 
    461 	return channel >= 0 && desc->channel[channel].pure_integer &&
    462 	       desc->channel[channel].size == 8;
    463 }
    464 
    465 static bool
    466 format_is_int10(VkFormat format)
    467 {
    468 	const struct vk_format_description *desc = vk_format_description(format);
    469 
    470 	if (desc->nr_channels != 4)
    471 		return false;
    472 	for (unsigned i = 0; i < 4; i++) {
    473 		if (desc->channel[i].pure_integer && desc->channel[i].size == 10)
    474 			return true;
    475 	}
    476 	return false;
    477 }
    478 
    479 unsigned radv_format_meta_fs_key(VkFormat format)
    480 {
    481 	unsigned col_format = si_choose_spi_color_format(format, false, false) - 1;
    482 	bool is_int8 = format_is_int8(format);
    483 	bool is_int10 = format_is_int10(format);
    484 
    485 	return col_format + (is_int8 ? 3 : is_int10 ? 5 : 0);
    486 }
    487 
    488 static void
    489 radv_pipeline_compute_get_int_clamp(const VkGraphicsPipelineCreateInfo *pCreateInfo,
    490 				    unsigned *is_int8, unsigned *is_int10)
    491 {
    492 	RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
    493 	struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
    494 	*is_int8 = 0;
    495 	*is_int10 = 0;
    496 
    497 	for (unsigned i = 0; i < subpass->color_count; ++i) {
    498 		struct radv_render_pass_attachment *attachment;
    499 
    500 		if (subpass->color_attachments[i].attachment == VK_ATTACHMENT_UNUSED)
    501 			continue;
    502 
    503 		attachment = pass->attachments + subpass->color_attachments[i].attachment;
    504 
    505 		if (format_is_int8(attachment->format))
    506 			*is_int8 |= 1 << i;
    507 		if (format_is_int10(attachment->format))
    508 			*is_int10 |= 1 << i;
    509 	}
    510 }
    511 
    512 static void
    513 radv_pipeline_init_blend_state(struct radv_pipeline *pipeline,
    514 			       const VkGraphicsPipelineCreateInfo *pCreateInfo,
    515 			       const struct radv_graphics_pipeline_create_info *extra)
    516 {
    517 	const VkPipelineColorBlendStateCreateInfo *vkblend = pCreateInfo->pColorBlendState;
    518 	const VkPipelineMultisampleStateCreateInfo *vkms = pCreateInfo->pMultisampleState;
    519 	struct radv_blend_state *blend = &pipeline->graphics.blend;
    520 	unsigned mode = V_028808_CB_NORMAL;
    521 	uint32_t blend_enable = 0, blend_need_alpha = 0;
    522 	bool blend_mrt0_is_dual_src = false;
    523 	int i;
    524 	bool single_cb_enable = false;
    525 
    526 	if (!vkblend)
    527 		return;
    528 
    529 	if (extra && extra->custom_blend_mode) {
    530 		single_cb_enable = true;
    531 		mode = extra->custom_blend_mode;
    532 	}
    533 	blend->cb_color_control = 0;
    534 	if (vkblend->logicOpEnable)
    535 		blend->cb_color_control |= S_028808_ROP3(vkblend->logicOp | (vkblend->logicOp << 4));
    536 	else
    537 		blend->cb_color_control |= S_028808_ROP3(0xcc);
    538 
    539 	blend->db_alpha_to_mask = S_028B70_ALPHA_TO_MASK_OFFSET0(2) |
    540 		S_028B70_ALPHA_TO_MASK_OFFSET1(2) |
    541 		S_028B70_ALPHA_TO_MASK_OFFSET2(2) |
    542 		S_028B70_ALPHA_TO_MASK_OFFSET3(2);
    543 
    544 	if (vkms && vkms->alphaToCoverageEnable) {
    545 		blend->db_alpha_to_mask |= S_028B70_ALPHA_TO_MASK_ENABLE(1);
    546 	}
    547 
    548 	blend->cb_target_mask = 0;
    549 	for (i = 0; i < vkblend->attachmentCount; i++) {
    550 		const VkPipelineColorBlendAttachmentState *att = &vkblend->pAttachments[i];
    551 		unsigned blend_cntl = 0;
    552 		unsigned srcRGB_opt, dstRGB_opt, srcA_opt, dstA_opt;
    553 		VkBlendOp eqRGB = att->colorBlendOp;
    554 		VkBlendFactor srcRGB = att->srcColorBlendFactor;
    555 		VkBlendFactor dstRGB = att->dstColorBlendFactor;
    556 		VkBlendOp eqA = att->alphaBlendOp;
    557 		VkBlendFactor srcA = att->srcAlphaBlendFactor;
    558 		VkBlendFactor dstA = att->dstAlphaBlendFactor;
    559 
    560 		blend->sx_mrt_blend_opt[i] = S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) | S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED);
    561 
    562 		if (!att->colorWriteMask)
    563 			continue;
    564 
    565 		blend->cb_target_mask |= (unsigned)att->colorWriteMask << (4 * i);
    566 		if (!att->blendEnable) {
    567 			blend->cb_blend_control[i] = blend_cntl;
    568 			continue;
    569 		}
    570 
    571 		if (is_dual_src(srcRGB) || is_dual_src(dstRGB) || is_dual_src(srcA) || is_dual_src(dstA))
    572 			if (i == 0)
    573 				blend_mrt0_is_dual_src = true;
    574 
    575 		if (eqRGB == VK_BLEND_OP_MIN || eqRGB == VK_BLEND_OP_MAX) {
    576 			srcRGB = VK_BLEND_FACTOR_ONE;
    577 			dstRGB = VK_BLEND_FACTOR_ONE;
    578 		}
    579 		if (eqA == VK_BLEND_OP_MIN || eqA == VK_BLEND_OP_MAX) {
    580 			srcA = VK_BLEND_FACTOR_ONE;
    581 			dstA = VK_BLEND_FACTOR_ONE;
    582 		}
    583 
    584 		/* Blending optimizations for RB+.
    585 		 * These transformations don't change the behavior.
    586 		 *
    587 		 * First, get rid of DST in the blend factors:
    588 		 *    func(src * DST, dst * 0) ---> func(src * 0, dst * SRC)
    589 		 */
    590 		si_blend_remove_dst(&eqRGB, &srcRGB, &dstRGB,
    591 				    VK_BLEND_FACTOR_DST_COLOR,
    592 				    VK_BLEND_FACTOR_SRC_COLOR);
    593 
    594 		si_blend_remove_dst(&eqA, &srcA, &dstA,
    595 				    VK_BLEND_FACTOR_DST_COLOR,
    596 				    VK_BLEND_FACTOR_SRC_COLOR);
    597 
    598 		si_blend_remove_dst(&eqA, &srcA, &dstA,
    599 				    VK_BLEND_FACTOR_DST_ALPHA,
    600 				    VK_BLEND_FACTOR_SRC_ALPHA);
    601 
    602 		/* Look up the ideal settings from tables. */
    603 		srcRGB_opt = si_translate_blend_opt_factor(srcRGB, false);
    604 		dstRGB_opt = si_translate_blend_opt_factor(dstRGB, false);
    605 		srcA_opt = si_translate_blend_opt_factor(srcA, true);
    606 		dstA_opt = si_translate_blend_opt_factor(dstA, true);
    607 
    608 				/* Handle interdependencies. */
    609 		if (si_blend_factor_uses_dst(srcRGB))
    610 			dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;
    611 		if (si_blend_factor_uses_dst(srcA))
    612 			dstA_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;
    613 
    614 		if (srcRGB == VK_BLEND_FACTOR_SRC_ALPHA_SATURATE &&
    615 		    (dstRGB == VK_BLEND_FACTOR_ZERO ||
    616 		     dstRGB == VK_BLEND_FACTOR_SRC_ALPHA ||
    617 		     dstRGB == VK_BLEND_FACTOR_SRC_ALPHA_SATURATE))
    618 			dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0;
    619 
    620 		/* Set the final value. */
    621 		blend->sx_mrt_blend_opt[i] =
    622 			S_028760_COLOR_SRC_OPT(srcRGB_opt) |
    623 			S_028760_COLOR_DST_OPT(dstRGB_opt) |
    624 			S_028760_COLOR_COMB_FCN(si_translate_blend_opt_function(eqRGB)) |
    625 			S_028760_ALPHA_SRC_OPT(srcA_opt) |
    626 			S_028760_ALPHA_DST_OPT(dstA_opt) |
    627 			S_028760_ALPHA_COMB_FCN(si_translate_blend_opt_function(eqA));
    628 		blend_cntl |= S_028780_ENABLE(1);
    629 
    630 		blend_cntl |= S_028780_COLOR_COMB_FCN(si_translate_blend_function(eqRGB));
    631 		blend_cntl |= S_028780_COLOR_SRCBLEND(si_translate_blend_factor(srcRGB));
    632 		blend_cntl |= S_028780_COLOR_DESTBLEND(si_translate_blend_factor(dstRGB));
    633 		if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) {
    634 			blend_cntl |= S_028780_SEPARATE_ALPHA_BLEND(1);
    635 			blend_cntl |= S_028780_ALPHA_COMB_FCN(si_translate_blend_function(eqA));
    636 			blend_cntl |= S_028780_ALPHA_SRCBLEND(si_translate_blend_factor(srcA));
    637 			blend_cntl |= S_028780_ALPHA_DESTBLEND(si_translate_blend_factor(dstA));
    638 		}
    639 		blend->cb_blend_control[i] = blend_cntl;
    640 
    641 		blend_enable |= 1 << i;
    642 
    643 		if (srcRGB == VK_BLEND_FACTOR_SRC_ALPHA ||
    644 		    dstRGB == VK_BLEND_FACTOR_SRC_ALPHA ||
    645 		    srcRGB == VK_BLEND_FACTOR_SRC_ALPHA_SATURATE ||
    646 		    dstRGB == VK_BLEND_FACTOR_SRC_ALPHA_SATURATE ||
    647 		    srcRGB == VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA ||
    648 		    dstRGB == VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA)
    649 			blend_need_alpha |= 1 << i;
    650 	}
    651 	for (i = vkblend->attachmentCount; i < 8; i++) {
    652 		blend->cb_blend_control[i] = 0;
    653 		blend->sx_mrt_blend_opt[i] = S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) | S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED);
    654 	}
    655 
    656 	/* disable RB+ for now */
    657 	if (pipeline->device->physical_device->has_rbplus)
    658 		blend->cb_color_control |= S_028808_DISABLE_DUAL_QUAD(1);
    659 
    660 	if (blend->cb_target_mask)
    661 		blend->cb_color_control |= S_028808_MODE(mode);
    662 	else
    663 		blend->cb_color_control |= S_028808_MODE(V_028808_CB_DISABLE);
    664 
    665 	radv_pipeline_compute_spi_color_formats(pipeline, pCreateInfo,
    666 						blend_enable, blend_need_alpha, single_cb_enable, blend_mrt0_is_dual_src);
    667 }
    668 
    669 static uint32_t si_translate_stencil_op(enum VkStencilOp op)
    670 {
    671 	switch (op) {
    672 	case VK_STENCIL_OP_KEEP:
    673 		return V_02842C_STENCIL_KEEP;
    674 	case VK_STENCIL_OP_ZERO:
    675 		return V_02842C_STENCIL_ZERO;
    676 	case VK_STENCIL_OP_REPLACE:
    677 		return V_02842C_STENCIL_REPLACE_TEST;
    678 	case VK_STENCIL_OP_INCREMENT_AND_CLAMP:
    679 		return V_02842C_STENCIL_ADD_CLAMP;
    680 	case VK_STENCIL_OP_DECREMENT_AND_CLAMP:
    681 		return V_02842C_STENCIL_SUB_CLAMP;
    682 	case VK_STENCIL_OP_INVERT:
    683 		return V_02842C_STENCIL_INVERT;
    684 	case VK_STENCIL_OP_INCREMENT_AND_WRAP:
    685 		return V_02842C_STENCIL_ADD_WRAP;
    686 	case VK_STENCIL_OP_DECREMENT_AND_WRAP:
    687 		return V_02842C_STENCIL_SUB_WRAP;
    688 	default:
    689 		return 0;
    690 	}
    691 }
    692 static void
    693 radv_pipeline_init_depth_stencil_state(struct radv_pipeline *pipeline,
    694 				       const VkGraphicsPipelineCreateInfo *pCreateInfo,
    695 				       const struct radv_graphics_pipeline_create_info *extra)
    696 {
    697 	const VkPipelineDepthStencilStateCreateInfo *vkds = pCreateInfo->pDepthStencilState;
    698 	struct radv_depth_stencil_state *ds = &pipeline->graphics.ds;
    699 
    700 	if (!vkds)
    701 		return;
    702 
    703 	RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
    704 	struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
    705 	if (subpass->depth_stencil_attachment.attachment == VK_ATTACHMENT_UNUSED)
    706 		return;
    707 
    708 	struct radv_render_pass_attachment *attachment = pass->attachments + subpass->depth_stencil_attachment.attachment;
    709 	bool has_depth_attachment = vk_format_is_depth(attachment->format);
    710 	bool has_stencil_attachment = vk_format_is_stencil(attachment->format);
    711 
    712 	if (has_depth_attachment) {
    713 		ds->db_depth_control = S_028800_Z_ENABLE(vkds->depthTestEnable ? 1 : 0) |
    714 		                       S_028800_Z_WRITE_ENABLE(vkds->depthWriteEnable ? 1 : 0) |
    715 		                       S_028800_ZFUNC(vkds->depthCompareOp) |
    716 		                       S_028800_DEPTH_BOUNDS_ENABLE(vkds->depthBoundsTestEnable ? 1 : 0);
    717 	}
    718 
    719 	if (has_stencil_attachment && vkds->stencilTestEnable) {
    720 		ds->db_depth_control |= S_028800_STENCIL_ENABLE(1) | S_028800_BACKFACE_ENABLE(1);
    721 		ds->db_depth_control |= S_028800_STENCILFUNC(vkds->front.compareOp);
    722 		ds->db_stencil_control |= S_02842C_STENCILFAIL(si_translate_stencil_op(vkds->front.failOp));
    723 		ds->db_stencil_control |= S_02842C_STENCILZPASS(si_translate_stencil_op(vkds->front.passOp));
    724 		ds->db_stencil_control |= S_02842C_STENCILZFAIL(si_translate_stencil_op(vkds->front.depthFailOp));
    725 
    726 		ds->db_depth_control |= S_028800_STENCILFUNC_BF(vkds->back.compareOp);
    727 		ds->db_stencil_control |= S_02842C_STENCILFAIL_BF(si_translate_stencil_op(vkds->back.failOp));
    728 		ds->db_stencil_control |= S_02842C_STENCILZPASS_BF(si_translate_stencil_op(vkds->back.passOp));
    729 		ds->db_stencil_control |= S_02842C_STENCILZFAIL_BF(si_translate_stencil_op(vkds->back.depthFailOp));
    730 	}
    731 
    732 	if (extra) {
    733 
    734 		ds->db_render_control |= S_028000_DEPTH_CLEAR_ENABLE(extra->db_depth_clear);
    735 		ds->db_render_control |= S_028000_STENCIL_CLEAR_ENABLE(extra->db_stencil_clear);
    736 
    737 		ds->db_render_control |= S_028000_RESUMMARIZE_ENABLE(extra->db_resummarize);
    738 		ds->db_render_control |= S_028000_DEPTH_COMPRESS_DISABLE(extra->db_flush_depth_inplace);
    739 		ds->db_render_control |= S_028000_STENCIL_COMPRESS_DISABLE(extra->db_flush_stencil_inplace);
    740 		ds->db_render_override2 |= S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(extra->db_depth_disable_expclear);
    741 		ds->db_render_override2 |= S_028010_DISABLE_SMEM_EXPCLEAR_OPTIMIZATION(extra->db_stencil_disable_expclear);
    742 	}
    743 }
    744 
    745 static uint32_t si_translate_fill(VkPolygonMode func)
    746 {
    747 	switch(func) {
    748 	case VK_POLYGON_MODE_FILL:
    749 		return V_028814_X_DRAW_TRIANGLES;
    750 	case VK_POLYGON_MODE_LINE:
    751 		return V_028814_X_DRAW_LINES;
    752 	case VK_POLYGON_MODE_POINT:
    753 		return V_028814_X_DRAW_POINTS;
    754 	default:
    755 		assert(0);
    756 		return V_028814_X_DRAW_POINTS;
    757 	}
    758 }
    759 static void
    760 radv_pipeline_init_raster_state(struct radv_pipeline *pipeline,
    761 				const VkGraphicsPipelineCreateInfo *pCreateInfo)
    762 {
    763 	const VkPipelineRasterizationStateCreateInfo *vkraster = pCreateInfo->pRasterizationState;
    764 	struct radv_raster_state *raster = &pipeline->graphics.raster;
    765 
    766 	raster->spi_interp_control =
    767 		S_0286D4_FLAT_SHADE_ENA(1) |
    768 		S_0286D4_PNT_SPRITE_ENA(1) |
    769 		S_0286D4_PNT_SPRITE_OVRD_X(V_0286D4_SPI_PNT_SPRITE_SEL_S) |
    770 		S_0286D4_PNT_SPRITE_OVRD_Y(V_0286D4_SPI_PNT_SPRITE_SEL_T) |
    771 		S_0286D4_PNT_SPRITE_OVRD_Z(V_0286D4_SPI_PNT_SPRITE_SEL_0) |
    772 		S_0286D4_PNT_SPRITE_OVRD_W(V_0286D4_SPI_PNT_SPRITE_SEL_1) |
    773 		S_0286D4_PNT_SPRITE_TOP_1(0); // vulkan is top to bottom - 1.0 at bottom
    774 
    775 
    776 	raster->pa_cl_clip_cntl = S_028810_PS_UCP_MODE(3) |
    777 		S_028810_DX_CLIP_SPACE_DEF(1) | // vulkan uses DX conventions.
    778 		S_028810_ZCLIP_NEAR_DISABLE(vkraster->depthClampEnable ? 1 : 0) |
    779 		S_028810_ZCLIP_FAR_DISABLE(vkraster->depthClampEnable ? 1 : 0) |
    780 		S_028810_DX_RASTERIZATION_KILL(vkraster->rasterizerDiscardEnable ? 1 : 0) |
    781 		S_028810_DX_LINEAR_ATTR_CLIP_ENA(1);
    782 
    783 	raster->pa_su_vtx_cntl =
    784 		S_028BE4_PIX_CENTER(1) | // TODO verify
    785 		S_028BE4_ROUND_MODE(V_028BE4_X_ROUND_TO_EVEN) |
    786 		S_028BE4_QUANT_MODE(V_028BE4_X_16_8_FIXED_POINT_1_256TH);
    787 
    788 	raster->pa_su_sc_mode_cntl =
    789 		S_028814_FACE(vkraster->frontFace) |
    790 		S_028814_CULL_FRONT(!!(vkraster->cullMode & VK_CULL_MODE_FRONT_BIT)) |
    791 		S_028814_CULL_BACK(!!(vkraster->cullMode & VK_CULL_MODE_BACK_BIT)) |
    792 		S_028814_POLY_MODE(vkraster->polygonMode != VK_POLYGON_MODE_FILL) |
    793 		S_028814_POLYMODE_FRONT_PTYPE(si_translate_fill(vkraster->polygonMode)) |
    794 		S_028814_POLYMODE_BACK_PTYPE(si_translate_fill(vkraster->polygonMode)) |
    795 		S_028814_POLY_OFFSET_FRONT_ENABLE(vkraster->depthBiasEnable ? 1 : 0) |
    796 		S_028814_POLY_OFFSET_BACK_ENABLE(vkraster->depthBiasEnable ? 1 : 0) |
    797 		S_028814_POLY_OFFSET_PARA_ENABLE(vkraster->depthBiasEnable ? 1 : 0);
    798 
    799 }
    800 
    801 static uint8_t radv_pipeline_get_ps_iter_samples(const VkPipelineMultisampleStateCreateInfo *vkms)
    802 {
    803 	uint32_t num_samples = vkms->rasterizationSamples;
    804 	uint32_t ps_iter_samples = 1;
    805 
    806 	if (vkms->sampleShadingEnable) {
    807 		ps_iter_samples = ceil(vkms->minSampleShading * num_samples);
    808 		ps_iter_samples = util_next_power_of_two(ps_iter_samples);
    809 	}
    810 	return ps_iter_samples;
    811 }
    812 
    813 static void
    814 radv_pipeline_init_multisample_state(struct radv_pipeline *pipeline,
    815 				     const VkGraphicsPipelineCreateInfo *pCreateInfo)
    816 {
    817 	const VkPipelineMultisampleStateCreateInfo *vkms = pCreateInfo->pMultisampleState;
    818 	struct radv_multisample_state *ms = &pipeline->graphics.ms;
    819 	unsigned num_tile_pipes = pipeline->device->physical_device->rad_info.num_tile_pipes;
    820 	int ps_iter_samples = 1;
    821 	uint32_t mask = 0xffff;
    822 
    823 	if (vkms)
    824 		ms->num_samples = vkms->rasterizationSamples;
    825 	else
    826 		ms->num_samples = 1;
    827 
    828 	if (vkms)
    829 		ps_iter_samples = radv_pipeline_get_ps_iter_samples(vkms);
    830 	if (vkms && !vkms->sampleShadingEnable && pipeline->shaders[MESA_SHADER_FRAGMENT]->info.info.ps.force_persample) {
    831 		ps_iter_samples = ms->num_samples;
    832 	}
    833 
    834 	ms->pa_sc_line_cntl = S_028BDC_DX10_DIAMOND_TEST_ENA(1);
    835 	ms->pa_sc_aa_config = 0;
    836 	ms->db_eqaa = S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
    837 		S_028804_STATIC_ANCHOR_ASSOCIATIONS(1);
    838 	ms->pa_sc_mode_cntl_1 =
    839 		S_028A4C_WALK_FENCE_ENABLE(1) | //TODO linear dst fixes
    840 		S_028A4C_WALK_FENCE_SIZE(num_tile_pipes == 2 ? 2 : 3) |
    841 		/* always 1: */
    842 		S_028A4C_WALK_ALIGN8_PRIM_FITS_ST(1) |
    843 		S_028A4C_SUPERTILE_WALK_ORDER_ENABLE(1) |
    844 		S_028A4C_TILE_WALK_ORDER_ENABLE(1) |
    845 		S_028A4C_MULTI_SHADER_ENGINE_PRIM_DISCARD_ENABLE(1) |
    846 		S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) |
    847 		S_028A4C_FORCE_EOV_REZ_ENABLE(1);
    848 	ms->pa_sc_mode_cntl_0 = S_028A48_ALTERNATE_RBS_PER_TILE(pipeline->device->physical_device->rad_info.chip_class >= GFX9) |
    849 	                        S_028A48_VPORT_SCISSOR_ENABLE(1);
    850 
    851 	if (ms->num_samples > 1) {
    852 		unsigned log_samples = util_logbase2(ms->num_samples);
    853 		unsigned log_ps_iter_samples = util_logbase2(ps_iter_samples);
    854 		ms->pa_sc_mode_cntl_0 |= S_028A48_MSAA_ENABLE(1);
    855 		ms->pa_sc_line_cntl |= S_028BDC_EXPAND_LINE_WIDTH(1); /* CM_R_028BDC_PA_SC_LINE_CNTL */
    856 		ms->db_eqaa |= S_028804_MAX_ANCHOR_SAMPLES(log_samples) |
    857 			S_028804_PS_ITER_SAMPLES(log_ps_iter_samples) |
    858 			S_028804_MASK_EXPORT_NUM_SAMPLES(log_samples) |
    859 			S_028804_ALPHA_TO_MASK_NUM_SAMPLES(log_samples);
    860 		ms->pa_sc_aa_config |= S_028BE0_MSAA_NUM_SAMPLES(log_samples) |
    861 			S_028BE0_MAX_SAMPLE_DIST(radv_cayman_get_maxdist(log_samples)) |
    862 			S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples); /* CM_R_028BE0_PA_SC_AA_CONFIG */
    863 		ms->pa_sc_mode_cntl_1 |= S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1);
    864 		if (ps_iter_samples > 1)
    865 			pipeline->graphics.spi_baryc_cntl |= S_0286E0_POS_FLOAT_LOCATION(2);
    866 	}
    867 
    868 	const struct VkPipelineRasterizationStateRasterizationOrderAMD *raster_order =
    869 		vk_find_struct_const(pCreateInfo->pRasterizationState->pNext, PIPELINE_RASTERIZATION_STATE_RASTERIZATION_ORDER_AMD);
    870 	if (raster_order && raster_order->rasterizationOrder == VK_RASTERIZATION_ORDER_RELAXED_AMD) {
    871 		ms->pa_sc_mode_cntl_1 |= S_028A4C_OUT_OF_ORDER_PRIMITIVE_ENABLE(1) |
    872 					S_028A4C_OUT_OF_ORDER_WATER_MARK(0x7);
    873 	}
    874 
    875 	if (vkms && vkms->pSampleMask) {
    876 		mask = vkms->pSampleMask[0] & 0xffff;
    877 	}
    878 
    879 	ms->pa_sc_aa_mask[0] = mask | (mask << 16);
    880 	ms->pa_sc_aa_mask[1] = mask | (mask << 16);
    881 }
    882 
    883 static bool
    884 radv_prim_can_use_guardband(enum VkPrimitiveTopology topology)
    885 {
    886 	switch (topology) {
    887 	case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
    888 	case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
    889 	case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
    890 	case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
    891 	case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
    892 		return false;
    893 	case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
    894 	case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
    895 	case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
    896 	case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
    897 	case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
    898 	case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST:
    899 		return true;
    900 	default:
    901 		unreachable("unhandled primitive type");
    902 	}
    903 }
    904 
    905 static uint32_t
    906 si_translate_prim(enum VkPrimitiveTopology topology)
    907 {
    908 	switch (topology) {
    909 	case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
    910 		return V_008958_DI_PT_POINTLIST;
    911 	case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
    912 		return V_008958_DI_PT_LINELIST;
    913 	case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
    914 		return V_008958_DI_PT_LINESTRIP;
    915 	case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
    916 		return V_008958_DI_PT_TRILIST;
    917 	case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
    918 		return V_008958_DI_PT_TRISTRIP;
    919 	case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
    920 		return V_008958_DI_PT_TRIFAN;
    921 	case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
    922 		return V_008958_DI_PT_LINELIST_ADJ;
    923 	case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
    924 		return V_008958_DI_PT_LINESTRIP_ADJ;
    925 	case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
    926 		return V_008958_DI_PT_TRILIST_ADJ;
    927 	case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
    928 		return V_008958_DI_PT_TRISTRIP_ADJ;
    929 	case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST:
    930 		return V_008958_DI_PT_PATCH;
    931 	default:
    932 		assert(0);
    933 		return 0;
    934 	}
    935 }
    936 
    937 static uint32_t
    938 si_conv_gl_prim_to_gs_out(unsigned gl_prim)
    939 {
    940 	switch (gl_prim) {
    941 	case 0: /* GL_POINTS */
    942 		return V_028A6C_OUTPRIM_TYPE_POINTLIST;
    943 	case 1: /* GL_LINES */
    944 	case 3: /* GL_LINE_STRIP */
    945 	case 0xA: /* GL_LINE_STRIP_ADJACENCY_ARB */
    946 	case 0x8E7A: /* GL_ISOLINES */
    947 		return V_028A6C_OUTPRIM_TYPE_LINESTRIP;
    948 
    949 	case 4: /* GL_TRIANGLES */
    950 	case 0xc: /* GL_TRIANGLES_ADJACENCY_ARB */
    951 	case 5: /* GL_TRIANGLE_STRIP */
    952 	case 7: /* GL_QUADS */
    953 		return V_028A6C_OUTPRIM_TYPE_TRISTRIP;
    954 	default:
    955 		assert(0);
    956 		return 0;
    957 	}
    958 }
    959 
    960 static uint32_t
    961 si_conv_prim_to_gs_out(enum VkPrimitiveTopology topology)
    962 {
    963 	switch (topology) {
    964 	case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
    965 	case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST:
    966 		return V_028A6C_OUTPRIM_TYPE_POINTLIST;
    967 	case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
    968 	case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
    969 	case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
    970 	case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
    971 		return V_028A6C_OUTPRIM_TYPE_LINESTRIP;
    972 	case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
    973 	case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
    974 	case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
    975 	case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
    976 	case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
    977 		return V_028A6C_OUTPRIM_TYPE_TRISTRIP;
    978 	default:
    979 		assert(0);
    980 		return 0;
    981 	}
    982 }
    983 
    984 static unsigned si_map_swizzle(unsigned swizzle)
    985 {
    986 	switch (swizzle) {
    987 	case VK_SWIZZLE_Y:
    988 		return V_008F0C_SQ_SEL_Y;
    989 	case VK_SWIZZLE_Z:
    990 		return V_008F0C_SQ_SEL_Z;
    991 	case VK_SWIZZLE_W:
    992 		return V_008F0C_SQ_SEL_W;
    993 	case VK_SWIZZLE_0:
    994 		return V_008F0C_SQ_SEL_0;
    995 	case VK_SWIZZLE_1:
    996 		return V_008F0C_SQ_SEL_1;
    997 	default: /* VK_SWIZZLE_X */
    998 		return V_008F0C_SQ_SEL_X;
    999 	}
   1000 }
   1001 
   1002 
   1003 static unsigned radv_dynamic_state_mask(VkDynamicState state)
   1004 {
   1005 	switch(state) {
   1006 	case VK_DYNAMIC_STATE_VIEWPORT:
   1007 		return RADV_DYNAMIC_VIEWPORT;
   1008 	case VK_DYNAMIC_STATE_SCISSOR:
   1009 		return RADV_DYNAMIC_SCISSOR;
   1010 	case VK_DYNAMIC_STATE_LINE_WIDTH:
   1011 		return RADV_DYNAMIC_LINE_WIDTH;
   1012 	case VK_DYNAMIC_STATE_DEPTH_BIAS:
   1013 		return RADV_DYNAMIC_DEPTH_BIAS;
   1014 	case VK_DYNAMIC_STATE_BLEND_CONSTANTS:
   1015 		return RADV_DYNAMIC_BLEND_CONSTANTS;
   1016 	case VK_DYNAMIC_STATE_DEPTH_BOUNDS:
   1017 		return RADV_DYNAMIC_DEPTH_BOUNDS;
   1018 	case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK:
   1019 		return RADV_DYNAMIC_STENCIL_COMPARE_MASK;
   1020 	case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK:
   1021 		return RADV_DYNAMIC_STENCIL_WRITE_MASK;
   1022 	case VK_DYNAMIC_STATE_STENCIL_REFERENCE:
   1023 		return RADV_DYNAMIC_STENCIL_REFERENCE;
   1024 	case VK_DYNAMIC_STATE_DISCARD_RECTANGLE_EXT:
   1025 		return RADV_DYNAMIC_DISCARD_RECTANGLE;
   1026 	default:
   1027 		unreachable("Unhandled dynamic state");
   1028 	}
   1029 }
   1030 
   1031 static void
   1032 radv_pipeline_init_dynamic_state(struct radv_pipeline *pipeline,
   1033 				 const VkGraphicsPipelineCreateInfo *pCreateInfo)
   1034 {
   1035 	uint32_t states = RADV_DYNAMIC_ALL;
   1036 	RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
   1037 	struct radv_subpass *subpass = &pass->subpasses[pCreateInfo->subpass];
   1038 
   1039 	pipeline->dynamic_state = default_dynamic_state;
   1040 
   1041 	if (pCreateInfo->pDynamicState) {
   1042 		/* Remove all of the states that are marked as dynamic */
   1043 		uint32_t count = pCreateInfo->pDynamicState->dynamicStateCount;
   1044 		for (uint32_t s = 0; s < count; s++)
   1045 			states &= ~radv_dynamic_state_mask(pCreateInfo->pDynamicState->pDynamicStates[s]);
   1046 	}
   1047 
   1048 	struct radv_dynamic_state *dynamic = &pipeline->dynamic_state;
   1049 
   1050 	/* Section 9.2 of the Vulkan 1.0.15 spec says:
   1051 	 *
   1052 	 *    pViewportState is [...] NULL if the pipeline
   1053 	 *    has rasterization disabled.
   1054 	 */
   1055 	if (!pCreateInfo->pRasterizationState->rasterizerDiscardEnable) {
   1056 		assert(pCreateInfo->pViewportState);
   1057 
   1058 		dynamic->viewport.count = pCreateInfo->pViewportState->viewportCount;
   1059 		if (states & RADV_DYNAMIC_VIEWPORT) {
   1060 			typed_memcpy(dynamic->viewport.viewports,
   1061 				     pCreateInfo->pViewportState->pViewports,
   1062 				     pCreateInfo->pViewportState->viewportCount);
   1063 		}
   1064 
   1065 		dynamic->scissor.count = pCreateInfo->pViewportState->scissorCount;
   1066 		if (states & RADV_DYNAMIC_SCISSOR) {
   1067 			typed_memcpy(dynamic->scissor.scissors,
   1068 				     pCreateInfo->pViewportState->pScissors,
   1069 				     pCreateInfo->pViewportState->scissorCount);
   1070 		}
   1071 	}
   1072 
   1073 	if (states & RADV_DYNAMIC_LINE_WIDTH) {
   1074 		assert(pCreateInfo->pRasterizationState);
   1075 		dynamic->line_width = pCreateInfo->pRasterizationState->lineWidth;
   1076 	}
   1077 
   1078 	if (states & RADV_DYNAMIC_DEPTH_BIAS) {
   1079 		assert(pCreateInfo->pRasterizationState);
   1080 		dynamic->depth_bias.bias =
   1081 			pCreateInfo->pRasterizationState->depthBiasConstantFactor;
   1082 		dynamic->depth_bias.clamp =
   1083 			pCreateInfo->pRasterizationState->depthBiasClamp;
   1084 		dynamic->depth_bias.slope =
   1085 			pCreateInfo->pRasterizationState->depthBiasSlopeFactor;
   1086 	}
   1087 
   1088 	/* Section 9.2 of the Vulkan 1.0.15 spec says:
   1089 	 *
   1090 	 *    pColorBlendState is [...] NULL if the pipeline has rasterization
   1091 	 *    disabled or if the subpass of the render pass the pipeline is
   1092 	 *    created against does not use any color attachments.
   1093 	 */
   1094 	bool uses_color_att = false;
   1095 	for (unsigned i = 0; i < subpass->color_count; ++i) {
   1096 		if (subpass->color_attachments[i].attachment != VK_ATTACHMENT_UNUSED) {
   1097 			uses_color_att = true;
   1098 			break;
   1099 		}
   1100 	}
   1101 
   1102 	if (uses_color_att && states & RADV_DYNAMIC_BLEND_CONSTANTS) {
   1103 		assert(pCreateInfo->pColorBlendState);
   1104 		typed_memcpy(dynamic->blend_constants,
   1105 			     pCreateInfo->pColorBlendState->blendConstants, 4);
   1106 	}
   1107 
   1108 	/* If there is no depthstencil attachment, then don't read
   1109 	 * pDepthStencilState. The Vulkan spec states that pDepthStencilState may
   1110 	 * be NULL in this case. Even if pDepthStencilState is non-NULL, there is
   1111 	 * no need to override the depthstencil defaults in
   1112 	 * radv_pipeline::dynamic_state when there is no depthstencil attachment.
   1113 	 *
   1114 	 * Section 9.2 of the Vulkan 1.0.15 spec says:
   1115 	 *
   1116 	 *    pDepthStencilState is [...] NULL if the pipeline has rasterization
   1117 	 *    disabled or if the subpass of the render pass the pipeline is created
   1118 	 *    against does not use a depth/stencil attachment.
   1119 	 */
   1120 	if (!pCreateInfo->pRasterizationState->rasterizerDiscardEnable &&
   1121 	    subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) {
   1122 		assert(pCreateInfo->pDepthStencilState);
   1123 
   1124 		if (states & RADV_DYNAMIC_DEPTH_BOUNDS) {
   1125 			dynamic->depth_bounds.min =
   1126 				pCreateInfo->pDepthStencilState->minDepthBounds;
   1127 			dynamic->depth_bounds.max =
   1128 				pCreateInfo->pDepthStencilState->maxDepthBounds;
   1129 		}
   1130 
   1131 		if (states & RADV_DYNAMIC_STENCIL_COMPARE_MASK) {
   1132 			dynamic->stencil_compare_mask.front =
   1133 				pCreateInfo->pDepthStencilState->front.compareMask;
   1134 			dynamic->stencil_compare_mask.back =
   1135 				pCreateInfo->pDepthStencilState->back.compareMask;
   1136 		}
   1137 
   1138 		if (states & RADV_DYNAMIC_STENCIL_WRITE_MASK) {
   1139 			dynamic->stencil_write_mask.front =
   1140 				pCreateInfo->pDepthStencilState->front.writeMask;
   1141 			dynamic->stencil_write_mask.back =
   1142 				pCreateInfo->pDepthStencilState->back.writeMask;
   1143 		}
   1144 
   1145 		if (states & RADV_DYNAMIC_STENCIL_REFERENCE) {
   1146 			dynamic->stencil_reference.front =
   1147 				pCreateInfo->pDepthStencilState->front.reference;
   1148 			dynamic->stencil_reference.back =
   1149 				pCreateInfo->pDepthStencilState->back.reference;
   1150 		}
   1151 	}
   1152 
   1153 	const  VkPipelineDiscardRectangleStateCreateInfoEXT *discard_rectangle_info =
   1154 			vk_find_struct_const(pCreateInfo->pNext, PIPELINE_DISCARD_RECTANGLE_STATE_CREATE_INFO_EXT);
   1155 	if (discard_rectangle_info) {
   1156 		dynamic->discard_rectangle.count = discard_rectangle_info->discardRectangleCount;
   1157 		typed_memcpy(dynamic->discard_rectangle.rectangles,
   1158 		             discard_rectangle_info->pDiscardRectangles,
   1159 		             discard_rectangle_info->discardRectangleCount);
   1160 
   1161 		unsigned mask = 0;
   1162 
   1163 		for (unsigned i = 0; i < (1u << MAX_DISCARD_RECTANGLES); ++i) {
   1164 			/* Interpret i as a bitmask, and then set the bit in the mask if
   1165 			 * that combination of rectangles in which the pixel is contained
   1166 			 * should pass the cliprect test. */
   1167 			unsigned relevant_subset = i & ((1u << discard_rectangle_info->discardRectangleCount) - 1);
   1168 
   1169 			if (discard_rectangle_info->discardRectangleMode == VK_DISCARD_RECTANGLE_MODE_INCLUSIVE_EXT &&
   1170 			    !relevant_subset)
   1171 				continue;
   1172 
   1173 			if (discard_rectangle_info->discardRectangleMode == VK_DISCARD_RECTANGLE_MODE_EXCLUSIVE_EXT &&
   1174 			    relevant_subset)
   1175 				continue;
   1176 
   1177 			mask |= 1u << i;
   1178 		}
   1179 		pipeline->graphics.pa_sc_cliprect_rule = mask;
   1180 	} else {
   1181 		states &= ~RADV_DYNAMIC_DISCARD_RECTANGLE;
   1182 
   1183 		/* Allow from all rectangle combinations */
   1184 		pipeline->graphics.pa_sc_cliprect_rule = 0xffff;
   1185 	}
   1186 	pipeline->dynamic_state.mask = states;
   1187 }
   1188 
   1189 static void calculate_gfx9_gs_info(const VkGraphicsPipelineCreateInfo *pCreateInfo,
   1190                                    struct radv_pipeline *pipeline)
   1191 {
   1192 	struct ac_shader_variant_info *gs_info = &pipeline->shaders[MESA_SHADER_GEOMETRY]->info;
   1193 	struct ac_es_output_info *es_info = radv_pipeline_has_tess(pipeline) ?
   1194 		&gs_info->tes.es_info : &gs_info->vs.es_info;
   1195 	unsigned gs_num_invocations = MAX2(gs_info->gs.invocations, 1);
   1196 	bool uses_adjacency;
   1197 	switch(pCreateInfo->pInputAssemblyState->topology) {
   1198 	case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
   1199 	case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
   1200 	case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
   1201 	case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
   1202 		uses_adjacency = true;
   1203 		break;
   1204 	default:
   1205 		uses_adjacency = false;
   1206 		break;
   1207 	}
   1208 
   1209 	/* All these are in dwords: */
   1210 	/* We can't allow using the whole LDS, because GS waves compete with
   1211 	 * other shader stages for LDS space. */
   1212 	const unsigned max_lds_size = 8 * 1024;
   1213 	const unsigned esgs_itemsize = es_info->esgs_itemsize / 4;
   1214 	unsigned esgs_lds_size;
   1215 
   1216 	/* All these are per subgroup: */
   1217 	const unsigned max_out_prims = 32 * 1024;
   1218 	const unsigned max_es_verts = 255;
   1219 	const unsigned ideal_gs_prims = 64;
   1220 	unsigned max_gs_prims, gs_prims;
   1221 	unsigned min_es_verts, es_verts, worst_case_es_verts;
   1222 
   1223 	if (uses_adjacency || gs_num_invocations > 1)
   1224 		max_gs_prims = 127 / gs_num_invocations;
   1225 	else
   1226 		max_gs_prims = 255;
   1227 
   1228 	/* MAX_PRIMS_PER_SUBGROUP = gs_prims * max_vert_out * gs_invocations.
   1229 	 * Make sure we don't go over the maximum value.
   1230 	 */
   1231 	if (gs_info->gs.vertices_out > 0) {
   1232 		max_gs_prims = MIN2(max_gs_prims,
   1233 				    max_out_prims /
   1234 				    (gs_info->gs.vertices_out * gs_num_invocations));
   1235 	}
   1236 	assert(max_gs_prims > 0);
   1237 
   1238 	/* If the primitive has adjacency, halve the number of vertices
   1239 	 * that will be reused in multiple primitives.
   1240 	 */
   1241 	min_es_verts = gs_info->gs.vertices_in / (uses_adjacency ? 2 : 1);
   1242 
   1243 	gs_prims = MIN2(ideal_gs_prims, max_gs_prims);
   1244 	worst_case_es_verts = MIN2(min_es_verts * gs_prims, max_es_verts);
   1245 
   1246 	/* Compute ESGS LDS size based on the worst case number of ES vertices
   1247 	 * needed to create the target number of GS prims per subgroup.
   1248 	 */
   1249 	esgs_lds_size = esgs_itemsize * worst_case_es_verts;
   1250 
   1251 	/* If total LDS usage is too big, refactor partitions based on ratio
   1252 	 * of ESGS item sizes.
   1253 	 */
   1254 	if (esgs_lds_size > max_lds_size) {
   1255 		/* Our target GS Prims Per Subgroup was too large. Calculate
   1256 		 * the maximum number of GS Prims Per Subgroup that will fit
   1257 		 * into LDS, capped by the maximum that the hardware can support.
   1258 		 */
   1259 		gs_prims = MIN2((max_lds_size / (esgs_itemsize * min_es_verts)),
   1260 				max_gs_prims);
   1261 		assert(gs_prims > 0);
   1262 		worst_case_es_verts = MIN2(min_es_verts * gs_prims,
   1263 					   max_es_verts);
   1264 
   1265 		esgs_lds_size = esgs_itemsize * worst_case_es_verts;
   1266 		assert(esgs_lds_size <= max_lds_size);
   1267 	}
   1268 
   1269 	/* Now calculate remaining ESGS information. */
   1270 	if (esgs_lds_size)
   1271 		es_verts = MIN2(esgs_lds_size / esgs_itemsize, max_es_verts);
   1272 	else
   1273 		es_verts = max_es_verts;
   1274 
   1275 	/* Vertices for adjacency primitives are not always reused, so restore
   1276 	 * it for ES_VERTS_PER_SUBGRP.
   1277 	 */
   1278 	min_es_verts = gs_info->gs.vertices_in;
   1279 
   1280 	/* For normal primitives, the VGT only checks if they are past the ES
   1281 	 * verts per subgroup after allocating a full GS primitive and if they
   1282 	 * are, kick off a new subgroup.  But if those additional ES verts are
   1283 	 * unique (e.g. not reused) we need to make sure there is enough LDS
   1284 	 * space to account for those ES verts beyond ES_VERTS_PER_SUBGRP.
   1285 	 */
   1286 	es_verts -= min_es_verts - 1;
   1287 
   1288 	uint32_t es_verts_per_subgroup = es_verts;
   1289 	uint32_t gs_prims_per_subgroup = gs_prims;
   1290 	uint32_t gs_inst_prims_in_subgroup = gs_prims * gs_num_invocations;
   1291 	uint32_t max_prims_per_subgroup = gs_inst_prims_in_subgroup * gs_info->gs.vertices_out;
   1292 	pipeline->graphics.gs.lds_size = align(esgs_lds_size, 128) / 128;
   1293 	pipeline->graphics.gs.vgt_gs_onchip_cntl =
   1294 	                       S_028A44_ES_VERTS_PER_SUBGRP(es_verts_per_subgroup) |
   1295 	                       S_028A44_GS_PRIMS_PER_SUBGRP(gs_prims_per_subgroup) |
   1296 	                       S_028A44_GS_INST_PRIMS_IN_SUBGRP(gs_inst_prims_in_subgroup);
   1297 	pipeline->graphics.gs.vgt_gs_max_prims_per_subgroup =
   1298 	                       S_028A94_MAX_PRIMS_PER_SUBGROUP(max_prims_per_subgroup);
   1299 	pipeline->graphics.gs.vgt_esgs_ring_itemsize  = esgs_itemsize;
   1300 	assert(max_prims_per_subgroup <= max_out_prims);
   1301 }
   1302 
   1303 static void
   1304 calculate_gs_ring_sizes(struct radv_pipeline *pipeline)
   1305 {
   1306 	struct radv_device *device = pipeline->device;
   1307 	unsigned num_se = device->physical_device->rad_info.max_se;
   1308 	unsigned wave_size = 64;
   1309 	unsigned max_gs_waves = 32 * num_se; /* max 32 per SE on GCN */
   1310 	unsigned gs_vertex_reuse = 16 * num_se; /* GS_VERTEX_REUSE register (per SE) */
   1311 	unsigned alignment = 256 * num_se;
   1312 	/* The maximum size is 63.999 MB per SE. */
   1313 	unsigned max_size = ((unsigned)(63.999 * 1024 * 1024) & ~255) * num_se;
   1314 	struct ac_shader_variant_info *gs_info = &pipeline->shaders[MESA_SHADER_GEOMETRY]->info;
   1315 	struct ac_es_output_info *es_info;
   1316 	if (pipeline->device->physical_device->rad_info.chip_class >= GFX9)
   1317 		es_info = radv_pipeline_has_tess(pipeline) ? &gs_info->tes.es_info : &gs_info->vs.es_info;
   1318 	else
   1319 		es_info = radv_pipeline_has_tess(pipeline) ?
   1320 			&pipeline->shaders[MESA_SHADER_TESS_EVAL]->info.tes.es_info :
   1321 			&pipeline->shaders[MESA_SHADER_VERTEX]->info.vs.es_info;
   1322 
   1323 	/* Calculate the minimum size. */
   1324 	unsigned min_esgs_ring_size = align(es_info->esgs_itemsize * gs_vertex_reuse *
   1325 					    wave_size, alignment);
   1326 	/* These are recommended sizes, not minimum sizes. */
   1327 	unsigned esgs_ring_size = max_gs_waves * 2 * wave_size *
   1328 		es_info->esgs_itemsize * gs_info->gs.vertices_in;
   1329 	unsigned gsvs_ring_size = max_gs_waves * 2 * wave_size *
   1330 		gs_info->gs.max_gsvs_emit_size * 1; // no streams in VK (gs->max_gs_stream + 1);
   1331 
   1332 	min_esgs_ring_size = align(min_esgs_ring_size, alignment);
   1333 	esgs_ring_size = align(esgs_ring_size, alignment);
   1334 	gsvs_ring_size = align(gsvs_ring_size, alignment);
   1335 
   1336 	if (pipeline->device->physical_device->rad_info.chip_class <= VI)
   1337 		pipeline->graphics.esgs_ring_size = CLAMP(esgs_ring_size, min_esgs_ring_size, max_size);
   1338 
   1339 	pipeline->graphics.gs.vgt_esgs_ring_itemsize = es_info->esgs_itemsize / 4;
   1340 	pipeline->graphics.gsvs_ring_size = MIN2(gsvs_ring_size, max_size);
   1341 }
   1342 
   1343 static void si_multiwave_lds_size_workaround(struct radv_device *device,
   1344 					     unsigned *lds_size)
   1345 {
   1346 	/* SPI barrier management bug:
   1347 	 *   Make sure we have at least 4k of LDS in use to avoid the bug.
   1348 	 *   It applies to workgroup sizes of more than one wavefront.
   1349 	 */
   1350 	if (device->physical_device->rad_info.family == CHIP_BONAIRE ||
   1351 	    device->physical_device->rad_info.family == CHIP_KABINI ||
   1352 	    device->physical_device->rad_info.family == CHIP_MULLINS)
   1353 		*lds_size = MAX2(*lds_size, 8);
   1354 }
   1355 
   1356 struct radv_shader_variant *
   1357 radv_get_vertex_shader(struct radv_pipeline *pipeline)
   1358 {
   1359 	if (pipeline->shaders[MESA_SHADER_VERTEX])
   1360 		return pipeline->shaders[MESA_SHADER_VERTEX];
   1361 	if (pipeline->shaders[MESA_SHADER_TESS_CTRL])
   1362 		return pipeline->shaders[MESA_SHADER_TESS_CTRL];
   1363 	return pipeline->shaders[MESA_SHADER_GEOMETRY];
   1364 }
   1365 
   1366 static struct radv_shader_variant *
   1367 radv_get_tess_eval_shader(struct radv_pipeline *pipeline)
   1368 {
   1369 	if (pipeline->shaders[MESA_SHADER_TESS_EVAL])
   1370 		return pipeline->shaders[MESA_SHADER_TESS_EVAL];
   1371 	return pipeline->shaders[MESA_SHADER_GEOMETRY];
   1372 }
   1373 
   1374 static void
   1375 calculate_tess_state(struct radv_pipeline *pipeline,
   1376 		     const VkGraphicsPipelineCreateInfo *pCreateInfo)
   1377 {
   1378 	unsigned num_tcs_input_cp = pCreateInfo->pTessellationState->patchControlPoints;
   1379 	unsigned num_tcs_output_cp, num_tcs_inputs, num_tcs_outputs;
   1380 	unsigned num_tcs_patch_outputs;
   1381 	unsigned input_vertex_size, output_vertex_size, pervertex_output_patch_size;
   1382 	unsigned input_patch_size, output_patch_size, output_patch0_offset;
   1383 	unsigned lds_size, hardware_lds_size;
   1384 	unsigned perpatch_output_offset;
   1385 	unsigned num_patches;
   1386 	struct radv_tessellation_state *tess = &pipeline->graphics.tess;
   1387 
   1388 	/* This calculates how shader inputs and outputs among VS, TCS, and TES
   1389 	 * are laid out in LDS. */
   1390 	num_tcs_inputs = util_last_bit64(radv_get_vertex_shader(pipeline)->info.vs.outputs_written);
   1391 
   1392 	num_tcs_outputs = util_last_bit64(pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.tcs.outputs_written); //tcs->outputs_written
   1393 	num_tcs_output_cp = pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.tcs.tcs_vertices_out; //TCS VERTICES OUT
   1394 	num_tcs_patch_outputs = util_last_bit64(pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.tcs.patch_outputs_written);
   1395 
   1396 	/* Ensure that we only need one wave per SIMD so we don't need to check
   1397 	 * resource usage. Also ensures that the number of tcs in and out
   1398 	 * vertices per threadgroup are at most 256.
   1399 	 */
   1400 	input_vertex_size = num_tcs_inputs * 16;
   1401 	output_vertex_size = num_tcs_outputs * 16;
   1402 
   1403 	input_patch_size = num_tcs_input_cp * input_vertex_size;
   1404 
   1405 	pervertex_output_patch_size = num_tcs_output_cp * output_vertex_size;
   1406 	output_patch_size = pervertex_output_patch_size + num_tcs_patch_outputs * 16;
   1407 	/* Ensure that we only need one wave per SIMD so we don't need to check
   1408 	 * resource usage. Also ensures that the number of tcs in and out
   1409 	 * vertices per threadgroup are at most 256.
   1410 	 */
   1411 	num_patches = 64 / MAX2(num_tcs_input_cp, num_tcs_output_cp) * 4;
   1412 
   1413 	/* Make sure that the data fits in LDS. This assumes the shaders only
   1414 	 * use LDS for the inputs and outputs.
   1415 	 */
   1416 	hardware_lds_size = pipeline->device->physical_device->rad_info.chip_class >= CIK ? 65536 : 32768;
   1417 	num_patches = MIN2(num_patches, hardware_lds_size / (input_patch_size + output_patch_size));
   1418 
   1419 	/* Make sure the output data fits in the offchip buffer */
   1420 	num_patches = MIN2(num_patches,
   1421 			    (pipeline->device->tess_offchip_block_dw_size * 4) /
   1422 			    output_patch_size);
   1423 
   1424 	/* Not necessary for correctness, but improves performance. The
   1425 	 * specific value is taken from the proprietary driver.
   1426 	 */
   1427 	num_patches = MIN2(num_patches, 40);
   1428 
   1429 	/* SI bug workaround - limit LS-HS threadgroups to only one wave. */
   1430 	if (pipeline->device->physical_device->rad_info.chip_class == SI) {
   1431 		unsigned one_wave = 64 / MAX2(num_tcs_input_cp, num_tcs_output_cp);
   1432 		num_patches = MIN2(num_patches, one_wave);
   1433 	}
   1434 
   1435 	output_patch0_offset = input_patch_size * num_patches;
   1436 	perpatch_output_offset = output_patch0_offset + pervertex_output_patch_size;
   1437 
   1438 	lds_size = output_patch0_offset + output_patch_size * num_patches;
   1439 
   1440 	if (pipeline->device->physical_device->rad_info.chip_class >= CIK) {
   1441 		assert(lds_size <= 65536);
   1442 		lds_size = align(lds_size, 512) / 512;
   1443 	} else {
   1444 		assert(lds_size <= 32768);
   1445 		lds_size = align(lds_size, 256) / 256;
   1446 	}
   1447 	si_multiwave_lds_size_workaround(pipeline->device, &lds_size);
   1448 
   1449 	tess->lds_size = lds_size;
   1450 
   1451 	tess->tcs_in_layout = (input_patch_size / 4) |
   1452 		((input_vertex_size / 4) << 13);
   1453 	tess->tcs_out_layout = (output_patch_size / 4) |
   1454 		((output_vertex_size / 4) << 13);
   1455 	tess->tcs_out_offsets = (output_patch0_offset / 16) |
   1456 		((perpatch_output_offset / 16) << 16);
   1457 	tess->offchip_layout = (pervertex_output_patch_size * num_patches << 16) |
   1458 		(num_tcs_output_cp << 9) | num_patches;
   1459 
   1460 	tess->ls_hs_config = S_028B58_NUM_PATCHES(num_patches) |
   1461 		S_028B58_HS_NUM_INPUT_CP(num_tcs_input_cp) |
   1462 		S_028B58_HS_NUM_OUTPUT_CP(num_tcs_output_cp);
   1463 	tess->num_patches = num_patches;
   1464 	tess->num_tcs_input_cp = num_tcs_input_cp;
   1465 
   1466 	struct radv_shader_variant *tes = radv_get_tess_eval_shader(pipeline);
   1467 	unsigned type = 0, partitioning = 0, topology = 0, distribution_mode = 0;
   1468 
   1469 	switch (tes->info.tes.primitive_mode) {
   1470 	case GL_TRIANGLES:
   1471 		type = V_028B6C_TESS_TRIANGLE;
   1472 		break;
   1473 	case GL_QUADS:
   1474 		type = V_028B6C_TESS_QUAD;
   1475 		break;
   1476 	case GL_ISOLINES:
   1477 		type = V_028B6C_TESS_ISOLINE;
   1478 		break;
   1479 	}
   1480 
   1481 	switch (tes->info.tes.spacing) {
   1482 	case TESS_SPACING_EQUAL:
   1483 		partitioning = V_028B6C_PART_INTEGER;
   1484 		break;
   1485 	case TESS_SPACING_FRACTIONAL_ODD:
   1486 		partitioning = V_028B6C_PART_FRAC_ODD;
   1487 		break;
   1488 	case TESS_SPACING_FRACTIONAL_EVEN:
   1489 		partitioning = V_028B6C_PART_FRAC_EVEN;
   1490 		break;
   1491 	default:
   1492 		break;
   1493 	}
   1494 
   1495 	bool ccw = tes->info.tes.ccw;
   1496 	const VkPipelineTessellationDomainOriginStateCreateInfoKHR *domain_origin_state =
   1497 	              vk_find_struct_const(pCreateInfo->pTessellationState,
   1498 	                                   PIPELINE_TESSELLATION_DOMAIN_ORIGIN_STATE_CREATE_INFO_KHR);
   1499 
   1500 	if (domain_origin_state && domain_origin_state->domainOrigin != VK_TESSELLATION_DOMAIN_ORIGIN_UPPER_LEFT_KHR)
   1501 		ccw = !ccw;
   1502 
   1503 	if (tes->info.tes.point_mode)
   1504 		topology = V_028B6C_OUTPUT_POINT;
   1505 	else if (tes->info.tes.primitive_mode == GL_ISOLINES)
   1506 		topology = V_028B6C_OUTPUT_LINE;
   1507 	else if (ccw)
   1508 		topology = V_028B6C_OUTPUT_TRIANGLE_CCW;
   1509 	else
   1510 		topology = V_028B6C_OUTPUT_TRIANGLE_CW;
   1511 
   1512 	if (pipeline->device->has_distributed_tess) {
   1513 		if (pipeline->device->physical_device->rad_info.family == CHIP_FIJI ||
   1514 		    pipeline->device->physical_device->rad_info.family >= CHIP_POLARIS10)
   1515 			distribution_mode = V_028B6C_DISTRIBUTION_MODE_TRAPEZOIDS;
   1516 		else
   1517 			distribution_mode = V_028B6C_DISTRIBUTION_MODE_DONUTS;
   1518 	} else
   1519 		distribution_mode = V_028B6C_DISTRIBUTION_MODE_NO_DIST;
   1520 
   1521 	tess->tf_param = S_028B6C_TYPE(type) |
   1522 		S_028B6C_PARTITIONING(partitioning) |
   1523 		S_028B6C_TOPOLOGY(topology) |
   1524 		S_028B6C_DISTRIBUTION_MODE(distribution_mode);
   1525 }
   1526 
   1527 static const struct radv_prim_vertex_count prim_size_table[] = {
   1528 	[V_008958_DI_PT_NONE] = {0, 0},
   1529 	[V_008958_DI_PT_POINTLIST] = {1, 1},
   1530 	[V_008958_DI_PT_LINELIST] = {2, 2},
   1531 	[V_008958_DI_PT_LINESTRIP] = {2, 1},
   1532 	[V_008958_DI_PT_TRILIST] = {3, 3},
   1533 	[V_008958_DI_PT_TRIFAN] = {3, 1},
   1534 	[V_008958_DI_PT_TRISTRIP] = {3, 1},
   1535 	[V_008958_DI_PT_LINELIST_ADJ] = {4, 4},
   1536 	[V_008958_DI_PT_LINESTRIP_ADJ] = {4, 1},
   1537 	[V_008958_DI_PT_TRILIST_ADJ] = {6, 6},
   1538 	[V_008958_DI_PT_TRISTRIP_ADJ] = {6, 2},
   1539 	[V_008958_DI_PT_RECTLIST] = {3, 3},
   1540 	[V_008958_DI_PT_LINELOOP] = {2, 1},
   1541 	[V_008958_DI_PT_POLYGON] = {3, 1},
   1542 	[V_008958_DI_PT_2D_TRI_STRIP] = {0, 0},
   1543 };
   1544 
   1545 static struct ac_vs_output_info *get_vs_output_info(struct radv_pipeline *pipeline)
   1546 {
   1547 	if (radv_pipeline_has_gs(pipeline))
   1548 		return &pipeline->gs_copy_shader->info.vs.outinfo;
   1549 	else if (radv_pipeline_has_tess(pipeline))
   1550 		return &pipeline->shaders[MESA_SHADER_TESS_EVAL]->info.tes.outinfo;
   1551 	else
   1552 		return &pipeline->shaders[MESA_SHADER_VERTEX]->info.vs.outinfo;
   1553 }
   1554 
   1555 static void calculate_vgt_gs_mode(struct radv_pipeline *pipeline)
   1556 {
   1557 	struct ac_vs_output_info *outinfo = get_vs_output_info(pipeline);
   1558 
   1559 	pipeline->graphics.vgt_primitiveid_en = false;
   1560 	pipeline->graphics.vgt_gs_mode = 0;
   1561 
   1562 	if (radv_pipeline_has_gs(pipeline)) {
   1563 		struct radv_shader_variant *gs =
   1564 			pipeline->shaders[MESA_SHADER_GEOMETRY];
   1565 
   1566 		pipeline->graphics.vgt_gs_mode =
   1567 			ac_vgt_gs_mode(gs->info.gs.vertices_out,
   1568 				       pipeline->device->physical_device->rad_info.chip_class);
   1569 	} else if (outinfo->export_prim_id) {
   1570 		pipeline->graphics.vgt_gs_mode = S_028A40_MODE(V_028A40_GS_SCENARIO_A);
   1571 		pipeline->graphics.vgt_primitiveid_en = true;
   1572 	}
   1573 }
   1574 
   1575 static void calculate_vs_outinfo(struct radv_pipeline *pipeline)
   1576 {
   1577 	struct ac_vs_output_info *outinfo = get_vs_output_info(pipeline);
   1578 
   1579 	unsigned clip_dist_mask, cull_dist_mask, total_mask;
   1580 	clip_dist_mask = outinfo->clip_dist_mask;
   1581 	cull_dist_mask = outinfo->cull_dist_mask;
   1582 	total_mask = clip_dist_mask | cull_dist_mask;
   1583 
   1584 	bool misc_vec_ena = outinfo->writes_pointsize ||
   1585 		outinfo->writes_layer ||
   1586 		outinfo->writes_viewport_index;
   1587 	pipeline->graphics.vs.pa_cl_vs_out_cntl =
   1588 		S_02881C_USE_VTX_POINT_SIZE(outinfo->writes_pointsize) |
   1589 		S_02881C_USE_VTX_RENDER_TARGET_INDX(outinfo->writes_layer) |
   1590 		S_02881C_USE_VTX_VIEWPORT_INDX(outinfo->writes_viewport_index) |
   1591 		S_02881C_VS_OUT_MISC_VEC_ENA(misc_vec_ena) |
   1592 		S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(misc_vec_ena) |
   1593 		S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0f) != 0) |
   1594 		S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xf0) != 0) |
   1595 		cull_dist_mask << 8 |
   1596 		clip_dist_mask;
   1597 
   1598 	pipeline->graphics.vs.spi_shader_pos_format =
   1599 		S_02870C_POS0_EXPORT_FORMAT(V_02870C_SPI_SHADER_4COMP) |
   1600 		S_02870C_POS1_EXPORT_FORMAT(outinfo->pos_exports > 1 ?
   1601 					    V_02870C_SPI_SHADER_4COMP :
   1602 					    V_02870C_SPI_SHADER_NONE) |
   1603 		S_02870C_POS2_EXPORT_FORMAT(outinfo->pos_exports > 2 ?
   1604 					    V_02870C_SPI_SHADER_4COMP :
   1605 					    V_02870C_SPI_SHADER_NONE) |
   1606 		S_02870C_POS3_EXPORT_FORMAT(outinfo->pos_exports > 3 ?
   1607 					    V_02870C_SPI_SHADER_4COMP :
   1608 					    V_02870C_SPI_SHADER_NONE);
   1609 
   1610 	pipeline->graphics.vs.spi_vs_out_config = S_0286C4_VS_EXPORT_COUNT(MAX2(1, outinfo->param_exports) - 1);
   1611 	/* only emitted on pre-VI */
   1612 	pipeline->graphics.vs.vgt_reuse_off = S_028AB4_REUSE_OFF(outinfo->writes_viewport_index);
   1613 }
   1614 
   1615 static uint32_t offset_to_ps_input(uint32_t offset, bool flat_shade)
   1616 {
   1617 	uint32_t ps_input_cntl;
   1618 	if (offset <= AC_EXP_PARAM_OFFSET_31) {
   1619 		ps_input_cntl = S_028644_OFFSET(offset);
   1620 		if (flat_shade)
   1621 			ps_input_cntl |= S_028644_FLAT_SHADE(1);
   1622 	} else {
   1623 		/* The input is a DEFAULT_VAL constant. */
   1624 		assert(offset >= AC_EXP_PARAM_DEFAULT_VAL_0000 &&
   1625 		       offset <= AC_EXP_PARAM_DEFAULT_VAL_1111);
   1626 		offset -= AC_EXP_PARAM_DEFAULT_VAL_0000;
   1627 		ps_input_cntl = S_028644_OFFSET(0x20) |
   1628 			S_028644_DEFAULT_VAL(offset);
   1629 	}
   1630 	return ps_input_cntl;
   1631 }
   1632 
   1633 static void calculate_ps_inputs(struct radv_pipeline *pipeline)
   1634 {
   1635 	struct radv_shader_variant *ps;
   1636 	struct ac_vs_output_info *outinfo = get_vs_output_info(pipeline);
   1637 
   1638 	ps = pipeline->shaders[MESA_SHADER_FRAGMENT];
   1639 
   1640 	unsigned ps_offset = 0;
   1641 
   1642 	if (ps->info.fs.prim_id_input) {
   1643 		unsigned vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID];
   1644 		if (vs_offset != AC_EXP_PARAM_UNDEFINED) {
   1645 			pipeline->graphics.ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true);
   1646 			++ps_offset;
   1647 		}
   1648 	}
   1649 
   1650 	if (ps->info.fs.layer_input) {
   1651 		unsigned vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_LAYER];
   1652 		if (vs_offset != AC_EXP_PARAM_UNDEFINED)
   1653 			pipeline->graphics.ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true);
   1654 		else
   1655 			pipeline->graphics.ps_input_cntl[ps_offset] = offset_to_ps_input(AC_EXP_PARAM_DEFAULT_VAL_0000, true);
   1656 		++ps_offset;
   1657 	}
   1658 
   1659 	if (ps->info.fs.has_pcoord) {
   1660 		unsigned val;
   1661 		val = S_028644_PT_SPRITE_TEX(1) | S_028644_OFFSET(0x20);
   1662 		pipeline->graphics.ps_input_cntl[ps_offset] = val;
   1663 		ps_offset++;
   1664 	}
   1665 
   1666 	for (unsigned i = 0; i < 32 && (1u << i) <= ps->info.fs.input_mask; ++i) {
   1667 		unsigned vs_offset;
   1668 		bool flat_shade;
   1669 		if (!(ps->info.fs.input_mask & (1u << i)))
   1670 			continue;
   1671 
   1672 		vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_VAR0 + i];
   1673 		if (vs_offset == AC_EXP_PARAM_UNDEFINED) {
   1674 			pipeline->graphics.ps_input_cntl[ps_offset] = S_028644_OFFSET(0x20);
   1675 			++ps_offset;
   1676 			continue;
   1677 		}
   1678 
   1679 		flat_shade = !!(ps->info.fs.flat_shaded_mask & (1u << ps_offset));
   1680 
   1681 		pipeline->graphics.ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, flat_shade);
   1682 		++ps_offset;
   1683 	}
   1684 
   1685 	pipeline->graphics.ps_input_cntl_num = ps_offset;
   1686 }
   1687 
   1688 static void
   1689 radv_link_shaders(struct radv_pipeline *pipeline, nir_shader **shaders)
   1690 {
   1691 	nir_shader* ordered_shaders[MESA_SHADER_STAGES];
   1692 	int shader_count = 0;
   1693 
   1694 	if(shaders[MESA_SHADER_FRAGMENT]) {
   1695 		ordered_shaders[shader_count++] = shaders[MESA_SHADER_FRAGMENT];
   1696 	}
   1697 	if(shaders[MESA_SHADER_GEOMETRY]) {
   1698 		ordered_shaders[shader_count++] = shaders[MESA_SHADER_GEOMETRY];
   1699 	}
   1700 	if(shaders[MESA_SHADER_TESS_EVAL]) {
   1701 		ordered_shaders[shader_count++] = shaders[MESA_SHADER_TESS_EVAL];
   1702 	}
   1703 	if(shaders[MESA_SHADER_TESS_CTRL]) {
   1704 		ordered_shaders[shader_count++] = shaders[MESA_SHADER_TESS_CTRL];
   1705 	}
   1706 	if(shaders[MESA_SHADER_VERTEX]) {
   1707 		ordered_shaders[shader_count++] = shaders[MESA_SHADER_VERTEX];
   1708 	}
   1709 
   1710 	for (int i = 1; i < shader_count; ++i)  {
   1711 		nir_lower_io_arrays_to_elements(ordered_shaders[i],
   1712 						ordered_shaders[i - 1]);
   1713 
   1714 		nir_remove_dead_variables(ordered_shaders[i],
   1715 					  nir_var_shader_out);
   1716 		nir_remove_dead_variables(ordered_shaders[i - 1],
   1717 					  nir_var_shader_in);
   1718 
   1719 		bool progress = nir_remove_unused_varyings(ordered_shaders[i],
   1720 							   ordered_shaders[i - 1]);
   1721 
   1722 		if (progress) {
   1723 			if (nir_lower_global_vars_to_local(ordered_shaders[i])) {
   1724 				radv_lower_indirect_derefs(ordered_shaders[i],
   1725 				                           pipeline->device->physical_device);
   1726 			}
   1727 			radv_optimize_nir(ordered_shaders[i]);
   1728 
   1729 			if (nir_lower_global_vars_to_local(ordered_shaders[i - 1])) {
   1730 				radv_lower_indirect_derefs(ordered_shaders[i - 1],
   1731 				                           pipeline->device->physical_device);
   1732 			}
   1733 			radv_optimize_nir(ordered_shaders[i - 1]);
   1734 		}
   1735 	}
   1736 }
   1737 
   1738 
   1739 static struct radv_pipeline_key
   1740 radv_generate_graphics_pipeline_key(struct radv_pipeline *pipeline,
   1741                                     const VkGraphicsPipelineCreateInfo *pCreateInfo,
   1742                                     bool has_view_index)
   1743 {
   1744 	const VkPipelineVertexInputStateCreateInfo *input_state =
   1745 	                                         pCreateInfo->pVertexInputState;
   1746 	struct radv_pipeline_key key;
   1747 	memset(&key, 0, sizeof(key));
   1748 
   1749 	key.has_multiview_view_index = has_view_index;
   1750 
   1751 	uint32_t binding_input_rate = 0;
   1752 	for (unsigned i = 0; i < input_state->vertexBindingDescriptionCount; ++i) {
   1753 		if (input_state->pVertexBindingDescriptions[i].inputRate)
   1754 			binding_input_rate |= 1u << input_state->pVertexBindingDescriptions[i].binding;
   1755 	}
   1756 
   1757 	for (unsigned i = 0; i < input_state->vertexAttributeDescriptionCount; ++i) {
   1758 		unsigned binding;
   1759 		binding = input_state->pVertexAttributeDescriptions[i].binding;
   1760 		if (binding_input_rate & (1u << binding))
   1761 			key.instance_rate_inputs |= 1u << input_state->pVertexAttributeDescriptions[i].location;
   1762 	}
   1763 
   1764 	if (pCreateInfo->pTessellationState)
   1765 		key.tess_input_vertices = pCreateInfo->pTessellationState->patchControlPoints;
   1766 
   1767 
   1768 	if (pCreateInfo->pMultisampleState &&
   1769 	    pCreateInfo->pMultisampleState->rasterizationSamples > 1) {
   1770 		uint32_t num_samples = pCreateInfo->pMultisampleState->rasterizationSamples;
   1771 		uint32_t ps_iter_samples = radv_pipeline_get_ps_iter_samples(pCreateInfo->pMultisampleState);
   1772 		key.multisample = true;
   1773 		key.log2_num_samples = util_logbase2(num_samples);
   1774 		key.log2_ps_iter_samples = util_logbase2(ps_iter_samples);
   1775 	}
   1776 
   1777 	key.col_format = pipeline->graphics.blend.spi_shader_col_format;
   1778 	if (pipeline->device->physical_device->rad_info.chip_class < VI)
   1779 		radv_pipeline_compute_get_int_clamp(pCreateInfo, &key.is_int8, &key.is_int10);
   1780 
   1781 	return key;
   1782 }
   1783 
   1784 static void
   1785 radv_fill_shader_keys(struct ac_shader_variant_key *keys,
   1786                       const struct radv_pipeline_key *key,
   1787                       nir_shader **nir)
   1788 {
   1789 	keys[MESA_SHADER_VERTEX].vs.instance_rate_inputs = key->instance_rate_inputs;
   1790 
   1791 	if (nir[MESA_SHADER_TESS_CTRL]) {
   1792 		keys[MESA_SHADER_VERTEX].vs.as_ls = true;
   1793 		keys[MESA_SHADER_TESS_CTRL].tcs.input_vertices = key->tess_input_vertices;
   1794 		keys[MESA_SHADER_TESS_CTRL].tcs.primitive_mode = nir[MESA_SHADER_TESS_EVAL]->info.tess.primitive_mode;
   1795 
   1796 		keys[MESA_SHADER_TESS_CTRL].tcs.tes_reads_tess_factors = !!(nir[MESA_SHADER_TESS_EVAL]->info.inputs_read & (VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER));
   1797 	}
   1798 
   1799 	if (nir[MESA_SHADER_GEOMETRY]) {
   1800 		if (nir[MESA_SHADER_TESS_CTRL])
   1801 			keys[MESA_SHADER_TESS_EVAL].tes.as_es = true;
   1802 		else
   1803 			keys[MESA_SHADER_VERTEX].vs.as_es = true;
   1804 	}
   1805 
   1806 	for(int i = 0; i < MESA_SHADER_STAGES; ++i)
   1807 		keys[i].has_multiview_view_index = key->has_multiview_view_index;
   1808 
   1809 	keys[MESA_SHADER_FRAGMENT].fs.multisample = key->multisample;
   1810 	keys[MESA_SHADER_FRAGMENT].fs.col_format = key->col_format;
   1811 	keys[MESA_SHADER_FRAGMENT].fs.is_int8 = key->is_int8;
   1812 	keys[MESA_SHADER_FRAGMENT].fs.is_int10 = key->is_int10;
   1813 	keys[MESA_SHADER_FRAGMENT].fs.log2_ps_iter_samples = key->log2_ps_iter_samples;
   1814 	keys[MESA_SHADER_FRAGMENT].fs.log2_num_samples = key->log2_num_samples;
   1815 }
   1816 
   1817 static void
   1818 merge_tess_info(struct shader_info *tes_info,
   1819                 const struct shader_info *tcs_info)
   1820 {
   1821 	/* The Vulkan 1.0.38 spec, section 21.1 Tessellator says:
   1822 	 *
   1823 	 *    "PointMode. Controls generation of points rather than triangles
   1824 	 *     or lines. This functionality defaults to disabled, and is
   1825 	 *     enabled if either shader stage includes the execution mode.
   1826 	 *
   1827 	 * and about Triangles, Quads, IsoLines, VertexOrderCw, VertexOrderCcw,
   1828 	 * PointMode, SpacingEqual, SpacingFractionalEven, SpacingFractionalOdd,
   1829 	 * and OutputVertices, it says:
   1830 	 *
   1831 	 *    "One mode must be set in at least one of the tessellation
   1832 	 *     shader stages."
   1833 	 *
   1834 	 * So, the fields can be set in either the TCS or TES, but they must
   1835 	 * agree if set in both.  Our backend looks at TES, so bitwise-or in
   1836 	 * the values from the TCS.
   1837 	 */
   1838 	assert(tcs_info->tess.tcs_vertices_out == 0 ||
   1839 	       tes_info->tess.tcs_vertices_out == 0 ||
   1840 	       tcs_info->tess.tcs_vertices_out == tes_info->tess.tcs_vertices_out);
   1841 	tes_info->tess.tcs_vertices_out |= tcs_info->tess.tcs_vertices_out;
   1842 
   1843 	assert(tcs_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
   1844 	       tes_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
   1845 	       tcs_info->tess.spacing == tes_info->tess.spacing);
   1846 	tes_info->tess.spacing |= tcs_info->tess.spacing;
   1847 
   1848 	assert(tcs_info->tess.primitive_mode == 0 ||
   1849 	       tes_info->tess.primitive_mode == 0 ||
   1850 	       tcs_info->tess.primitive_mode == tes_info->tess.primitive_mode);
   1851 	tes_info->tess.primitive_mode |= tcs_info->tess.primitive_mode;
   1852 	tes_info->tess.ccw |= tcs_info->tess.ccw;
   1853 	tes_info->tess.point_mode |= tcs_info->tess.point_mode;
   1854 }
   1855 
   1856 static
   1857 void radv_create_shaders(struct radv_pipeline *pipeline,
   1858                          struct radv_device *device,
   1859                          struct radv_pipeline_cache *cache,
   1860                          struct radv_pipeline_key key,
   1861                          const VkPipelineShaderStageCreateInfo **pStages)
   1862 {
   1863 	struct radv_shader_module fs_m = {0};
   1864 	struct radv_shader_module *modules[MESA_SHADER_STAGES] = { 0, };
   1865 	nir_shader *nir[MESA_SHADER_STAGES] = {0};
   1866 	void *codes[MESA_SHADER_STAGES] = {0};
   1867 	unsigned code_sizes[MESA_SHADER_STAGES] = {0};
   1868 	struct ac_shader_variant_key keys[MESA_SHADER_STAGES] = {{{{0}}}};
   1869 	unsigned char hash[20], gs_copy_hash[20];
   1870 
   1871 	for (unsigned i = 0; i < MESA_SHADER_STAGES; ++i) {
   1872 		if (pStages[i]) {
   1873 			modules[i] = radv_shader_module_from_handle(pStages[i]->module);
   1874 			if (modules[i]->nir)
   1875 				_mesa_sha1_compute(modules[i]->nir->info.name,
   1876 				                   strlen(modules[i]->nir->info.name),
   1877 				                   modules[i]->sha1);
   1878 		}
   1879 	}
   1880 
   1881 	radv_hash_shaders(hash, pStages, pipeline->layout, &key, get_hash_flags(device));
   1882 	memcpy(gs_copy_hash, hash, 20);
   1883 	gs_copy_hash[0] ^= 1;
   1884 
   1885 	if (modules[MESA_SHADER_GEOMETRY]) {
   1886 		struct radv_shader_variant *variants[MESA_SHADER_STAGES] = {0};
   1887 		radv_create_shader_variants_from_pipeline_cache(device, cache, gs_copy_hash, variants);
   1888 		pipeline->gs_copy_shader = variants[MESA_SHADER_GEOMETRY];
   1889 	}
   1890 
   1891 	if (radv_create_shader_variants_from_pipeline_cache(device, cache, hash, pipeline->shaders) &&
   1892 	    (!modules[MESA_SHADER_GEOMETRY] || pipeline->gs_copy_shader)) {
   1893 		for (unsigned i = 0; i < MESA_SHADER_STAGES; ++i) {
   1894 			if (pipeline->shaders[i])
   1895 				pipeline->active_stages |= mesa_to_vk_shader_stage(i);
   1896 		}
   1897 		return;
   1898 	}
   1899 
   1900 	if (!modules[MESA_SHADER_FRAGMENT] && !modules[MESA_SHADER_COMPUTE]) {
   1901 		nir_builder fs_b;
   1902 		nir_builder_init_simple_shader(&fs_b, NULL, MESA_SHADER_FRAGMENT, NULL);
   1903 		fs_b.shader->info.name = ralloc_strdup(fs_b.shader, "noop_fs");
   1904 		fs_m.nir = fs_b.shader;
   1905 		modules[MESA_SHADER_FRAGMENT] = &fs_m;
   1906 	}
   1907 
   1908 	/* Determine first and last stage. */
   1909 	unsigned first = MESA_SHADER_STAGES;
   1910 	unsigned last = 0;
   1911 	for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
   1912 		if (!pStages[i])
   1913 			continue;
   1914 		if (first == MESA_SHADER_STAGES)
   1915 			first = i;
   1916 		last = i;
   1917 	}
   1918 
   1919 	int prev = -1;
   1920 	for (unsigned i = 0; i < MESA_SHADER_STAGES; ++i) {
   1921 		const VkPipelineShaderStageCreateInfo *stage = pStages[i];
   1922 
   1923 		if (!modules[i])
   1924 			continue;
   1925 
   1926 		nir[i] = radv_shader_compile_to_nir(device, modules[i],
   1927 						    stage ? stage->pName : "main", i,
   1928 						    stage ? stage->pSpecializationInfo : NULL);
   1929 		pipeline->active_stages |= mesa_to_vk_shader_stage(i);
   1930 
   1931 		/* We don't want to alter meta shaders IR directly so clone it
   1932 		 * first.
   1933 		 */
   1934 		if (nir[i]->info.name) {
   1935 			nir[i] = nir_shader_clone(NULL, nir[i]);
   1936 		}
   1937 
   1938 		if (first != last) {
   1939 			nir_variable_mode mask = 0;
   1940 
   1941 			if (i != first)
   1942 				mask = mask | nir_var_shader_in;
   1943 
   1944 			if (i != last)
   1945 				mask = mask | nir_var_shader_out;
   1946 
   1947 			nir_lower_io_to_scalar_early(nir[i], mask);
   1948 			radv_optimize_nir(nir[i]);
   1949 		}
   1950 
   1951 		if (prev != -1) {
   1952 			nir_compact_varyings(nir[prev], nir[i], true);
   1953 		}
   1954 		prev = i;
   1955 	}
   1956 
   1957 	if (nir[MESA_SHADER_TESS_CTRL]) {
   1958 		nir_lower_tes_patch_vertices(nir[MESA_SHADER_TESS_EVAL], nir[MESA_SHADER_TESS_CTRL]->info.tess.tcs_vertices_out);
   1959 		merge_tess_info(&nir[MESA_SHADER_TESS_EVAL]->info, &nir[MESA_SHADER_TESS_CTRL]->info);
   1960 	}
   1961 
   1962 	radv_link_shaders(pipeline, nir);
   1963 
   1964 	for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
   1965 		if (modules[i] && radv_can_dump_shader(device, modules[i]))
   1966 			nir_print_shader(nir[i], stderr);
   1967 	}
   1968 
   1969 	radv_fill_shader_keys(keys, &key, nir);
   1970 
   1971 	if (nir[MESA_SHADER_FRAGMENT]) {
   1972 		if (!pipeline->shaders[MESA_SHADER_FRAGMENT]) {
   1973 			pipeline->shaders[MESA_SHADER_FRAGMENT] =
   1974 			       radv_shader_variant_create(device, modules[MESA_SHADER_FRAGMENT], &nir[MESA_SHADER_FRAGMENT], 1,
   1975 			                                  pipeline->layout, keys + MESA_SHADER_FRAGMENT,
   1976 			                                  &codes[MESA_SHADER_FRAGMENT], &code_sizes[MESA_SHADER_FRAGMENT]);
   1977 		}
   1978 
   1979 		/* TODO: These are no longer used as keys we should refactor this */
   1980 		keys[MESA_SHADER_VERTEX].vs.export_prim_id =
   1981 		        pipeline->shaders[MESA_SHADER_FRAGMENT]->info.fs.prim_id_input;
   1982 		keys[MESA_SHADER_TESS_EVAL].tes.export_prim_id =
   1983 		        pipeline->shaders[MESA_SHADER_FRAGMENT]->info.fs.prim_id_input;
   1984 	}
   1985 
   1986 	if (device->physical_device->rad_info.chip_class >= GFX9 && modules[MESA_SHADER_TESS_CTRL]) {
   1987 		if (!pipeline->shaders[MESA_SHADER_TESS_CTRL]) {
   1988 			struct nir_shader *combined_nir[] = {nir[MESA_SHADER_VERTEX], nir[MESA_SHADER_TESS_CTRL]};
   1989 			struct ac_shader_variant_key key = keys[MESA_SHADER_TESS_CTRL];
   1990 			key.tcs.vs_key = keys[MESA_SHADER_VERTEX].vs;
   1991 			pipeline->shaders[MESA_SHADER_TESS_CTRL] = radv_shader_variant_create(device, modules[MESA_SHADER_TESS_CTRL], combined_nir, 2,
   1992 			                                                                      pipeline->layout,
   1993 			                                                                      &key, &codes[MESA_SHADER_TESS_CTRL],
   1994 			                                                                      &code_sizes[MESA_SHADER_TESS_CTRL]);
   1995 		}
   1996 		modules[MESA_SHADER_VERTEX] = NULL;
   1997 	}
   1998 
   1999 	if (device->physical_device->rad_info.chip_class >= GFX9 && modules[MESA_SHADER_GEOMETRY]) {
   2000 		gl_shader_stage pre_stage = modules[MESA_SHADER_TESS_EVAL] ? MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX;
   2001 		if (!pipeline->shaders[MESA_SHADER_GEOMETRY]) {
   2002 			struct nir_shader *combined_nir[] = {nir[pre_stage], nir[MESA_SHADER_GEOMETRY]};
   2003 			pipeline->shaders[MESA_SHADER_GEOMETRY] = radv_shader_variant_create(device, modules[MESA_SHADER_GEOMETRY], combined_nir, 2,
   2004 			                                                                     pipeline->layout,
   2005 			                                                                     &keys[pre_stage] , &codes[MESA_SHADER_GEOMETRY],
   2006 		                                                                     &code_sizes[MESA_SHADER_GEOMETRY]);
   2007 		}
   2008 		modules[pre_stage] = NULL;
   2009 	}
   2010 
   2011 	for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
   2012 		if(modules[i] && !pipeline->shaders[i]) {
   2013 			pipeline->shaders[i] = radv_shader_variant_create(device, modules[i], &nir[i], 1,
   2014 									  pipeline->layout,
   2015 									  keys + i, &codes[i],
   2016 									  &code_sizes[i]);
   2017 		}
   2018 	}
   2019 
   2020 	if(modules[MESA_SHADER_GEOMETRY]) {
   2021 		void *gs_copy_code = NULL;
   2022 		unsigned gs_copy_code_size = 0;
   2023 		if (!pipeline->gs_copy_shader) {
   2024 			pipeline->gs_copy_shader = radv_create_gs_copy_shader(
   2025 					device, nir[MESA_SHADER_GEOMETRY], &gs_copy_code,
   2026 					&gs_copy_code_size,
   2027 					keys[MESA_SHADER_GEOMETRY].has_multiview_view_index);
   2028 		}
   2029 
   2030 		if (pipeline->gs_copy_shader) {
   2031 			void *code[MESA_SHADER_STAGES] = {0};
   2032 			unsigned code_size[MESA_SHADER_STAGES] = {0};
   2033 			struct radv_shader_variant *variants[MESA_SHADER_STAGES] = {0};
   2034 
   2035 			code[MESA_SHADER_GEOMETRY] = gs_copy_code;
   2036 			code_size[MESA_SHADER_GEOMETRY] = gs_copy_code_size;
   2037 			variants[MESA_SHADER_GEOMETRY] = pipeline->gs_copy_shader;
   2038 
   2039 			radv_pipeline_cache_insert_shaders(device, cache,
   2040 							   gs_copy_hash,
   2041 							   variants,
   2042 							   (const void**)code,
   2043 							   code_size);
   2044 		}
   2045 		free(gs_copy_code);
   2046 	}
   2047 
   2048 	radv_pipeline_cache_insert_shaders(device, cache, hash, pipeline->shaders,
   2049 					   (const void**)codes, code_sizes);
   2050 
   2051 	for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
   2052 		free(codes[i]);
   2053 		if (modules[i] && !pipeline->device->keep_shader_info)
   2054 			ralloc_free(nir[i]);
   2055 	}
   2056 
   2057 	if (fs_m.nir)
   2058 		ralloc_free(fs_m.nir);
   2059 }
   2060 
   2061 static uint32_t
   2062 radv_pipeline_stage_to_user_data_0(struct radv_pipeline *pipeline,
   2063 				   gl_shader_stage stage, enum chip_class chip_class)
   2064 {
   2065 	bool has_gs = radv_pipeline_has_gs(pipeline);
   2066 	bool has_tess = radv_pipeline_has_tess(pipeline);
   2067 	switch (stage) {
   2068 	case MESA_SHADER_FRAGMENT:
   2069 		return R_00B030_SPI_SHADER_USER_DATA_PS_0;
   2070 	case MESA_SHADER_VERTEX:
   2071 		if (chip_class >= GFX9) {
   2072 			return has_tess ? R_00B430_SPI_SHADER_USER_DATA_LS_0 :
   2073 			       has_gs ? R_00B330_SPI_SHADER_USER_DATA_ES_0 :
   2074 			       R_00B130_SPI_SHADER_USER_DATA_VS_0;
   2075 		}
   2076 		if (has_tess)
   2077 			return R_00B530_SPI_SHADER_USER_DATA_LS_0;
   2078 		else
   2079 			return has_gs ? R_00B330_SPI_SHADER_USER_DATA_ES_0 : R_00B130_SPI_SHADER_USER_DATA_VS_0;
   2080 	case MESA_SHADER_GEOMETRY:
   2081 		return chip_class >= GFX9 ? R_00B330_SPI_SHADER_USER_DATA_ES_0 :
   2082 		                            R_00B230_SPI_SHADER_USER_DATA_GS_0;
   2083 	case MESA_SHADER_COMPUTE:
   2084 		return R_00B900_COMPUTE_USER_DATA_0;
   2085 	case MESA_SHADER_TESS_CTRL:
   2086 		return chip_class >= GFX9 ? R_00B430_SPI_SHADER_USER_DATA_LS_0 :
   2087 		                            R_00B430_SPI_SHADER_USER_DATA_HS_0;
   2088 	case MESA_SHADER_TESS_EVAL:
   2089 		if (chip_class >= GFX9) {
   2090 			return has_gs ? R_00B330_SPI_SHADER_USER_DATA_ES_0 :
   2091 			       R_00B130_SPI_SHADER_USER_DATA_VS_0;
   2092 		}
   2093 		if (has_gs)
   2094 			return R_00B330_SPI_SHADER_USER_DATA_ES_0;
   2095 		else
   2096 			return R_00B130_SPI_SHADER_USER_DATA_VS_0;
   2097 	default:
   2098 		unreachable("unknown shader");
   2099 	}
   2100 }
   2101 
   2102 struct radv_bin_size_entry {
   2103 	unsigned bpp;
   2104 	VkExtent2D extent;
   2105 };
   2106 
   2107 static VkExtent2D
   2108 radv_compute_bin_size(struct radv_pipeline *pipeline, const VkGraphicsPipelineCreateInfo *pCreateInfo)
   2109 {
   2110 	static const struct radv_bin_size_entry color_size_table[][3][9] = {
   2111 		{
   2112 			/* One RB / SE */
   2113 			{
   2114 				/* One shader engine */
   2115 				{        0, {128,  128}},
   2116 				{        1, { 64,  128}},
   2117 				{        2, { 32,  128}},
   2118 				{        3, { 16,  128}},
   2119 				{       17, {  0,    0}},
   2120 				{ UINT_MAX, {  0,    0}},
   2121 			},
   2122 			{
   2123 				/* Two shader engines */
   2124 				{        0, {128,  128}},
   2125 				{        2, { 64,  128}},
   2126 				{        3, { 32,  128}},
   2127 				{        5, { 16,  128}},
   2128 				{       17, {  0,    0}},
   2129 				{ UINT_MAX, {  0,    0}},
   2130 			},
   2131 			{
   2132 				/* Four shader engines */
   2133 				{        0, {128,  128}},
   2134 				{        3, { 64,  128}},
   2135 				{        5, { 16,  128}},
   2136 				{       17, {  0,    0}},
   2137 				{ UINT_MAX, {  0,    0}},
   2138 			},
   2139 		},
   2140 		{
   2141 			/* Two RB / SE */
   2142 			{
   2143 				/* One shader engine */
   2144 				{        0, {128,  128}},
   2145 				{        2, { 64,  128}},
   2146 				{        3, { 32,  128}},
   2147 				{        5, { 16,  128}},
   2148 				{       33, {  0,    0}},
   2149 				{ UINT_MAX, {  0,    0}},
   2150 			},
   2151 			{
   2152 				/* Two shader engines */
   2153 				{        0, {128,  128}},
   2154 				{        3, { 64,  128}},
   2155 				{        5, { 32,  128}},
   2156 				{        9, { 16,  128}},
   2157 				{       33, {  0,    0}},
   2158 				{ UINT_MAX, {  0,    0}},
   2159 			},
   2160 			{
   2161 				/* Four shader engines */
   2162 				{        0, {256,  256}},
   2163 				{        2, {128,  256}},
   2164 				{        3, {128,  128}},
   2165 				{        5, { 64,  128}},
   2166 				{        9, { 16,  128}},
   2167 				{       33, {  0,    0}},
   2168 				{ UINT_MAX, {  0,    0}},
   2169 			},
   2170 		},
   2171 		{
   2172 			/* Four RB / SE */
   2173 			{
   2174 				/* One shader engine */
   2175 				{        0, {128,  256}},
   2176 				{        2, {128,  128}},
   2177 				{        3, { 64,  128}},
   2178 				{        5, { 32,  128}},
   2179 				{        9, { 16,  128}},
   2180 				{       33, {  0,    0}},
   2181 				{ UINT_MAX, {  0,    0}},
   2182 			},
   2183 			{
   2184 				/* Two shader engines */
   2185 				{        0, {256,  256}},
   2186 				{        2, {128,  256}},
   2187 				{        3, {128,  128}},
   2188 				{        5, { 64,  128}},
   2189 				{        9, { 32,  128}},
   2190 				{       17, { 16,  128}},
   2191 				{       33, {  0,    0}},
   2192 				{ UINT_MAX, {  0,    0}},
   2193 			},
   2194 			{
   2195 				/* Four shader engines */
   2196 				{        0, {256,  512}},
   2197 				{        2, {256,  256}},
   2198 				{        3, {128,  256}},
   2199 				{        5, {128,  128}},
   2200 				{        9, { 64,  128}},
   2201 				{       17, { 16,  128}},
   2202 				{       33, {  0,    0}},
   2203 				{ UINT_MAX, {  0,    0}},
   2204 			},
   2205 		},
   2206 	};
   2207 	static const struct radv_bin_size_entry ds_size_table[][3][9] = {
   2208 		{
   2209 			// One RB / SE
   2210 			{
   2211 				// One shader engine
   2212 				{        0, {128,  256}},
   2213 				{        2, {128,  128}},
   2214 				{        4, { 64,  128}},
   2215 				{        7, { 32,  128}},
   2216 				{       13, { 16,  128}},
   2217 				{       49, {  0,    0}},
   2218 				{ UINT_MAX, {  0,    0}},
   2219 			},
   2220 			{
   2221 				// Two shader engines
   2222 				{        0, {256,  256}},
   2223 				{        2, {128,  256}},
   2224 				{        4, {128,  128}},
   2225 				{        7, { 64,  128}},
   2226 				{       13, { 32,  128}},
   2227 				{       25, { 16,  128}},
   2228 				{       49, {  0,    0}},
   2229 				{ UINT_MAX, {  0,    0}},
   2230 			},
   2231 			{
   2232 				// Four shader engines
   2233 				{        0, {256,  512}},
   2234 				{        2, {256,  256}},
   2235 				{        4, {128,  256}},
   2236 				{        7, {128,  128}},
   2237 				{       13, { 64,  128}},
   2238 				{       25, { 16,  128}},
   2239 				{       49, {  0,    0}},
   2240 				{ UINT_MAX, {  0,    0}},
   2241 			},
   2242 		},
   2243 		{
   2244 			// Two RB / SE
   2245 			{
   2246 				// One shader engine
   2247 				{        0, {256,  256}},
   2248 				{        2, {128,  256}},
   2249 				{        4, {128,  128}},
   2250 				{        7, { 64,  128}},
   2251 				{       13, { 32,  128}},
   2252 				{       25, { 16,  128}},
   2253 				{       97, {  0,    0}},
   2254 				{ UINT_MAX, {  0,    0}},
   2255 			},
   2256 			{
   2257 				// Two shader engines
   2258 				{        0, {256,  512}},
   2259 				{        2, {256,  256}},
   2260 				{        4, {128,  256}},
   2261 				{        7, {128,  128}},
   2262 				{       13, { 64,  128}},
   2263 				{       25, { 32,  128}},
   2264 				{       49, { 16,  128}},
   2265 				{       97, {  0,    0}},
   2266 				{ UINT_MAX, {  0,    0}},
   2267 			},
   2268 			{
   2269 				// Four shader engines
   2270 				{        0, {512,  512}},
   2271 				{        2, {256,  512}},
   2272 				{        4, {256,  256}},
   2273 				{        7, {128,  256}},
   2274 				{       13, {128,  128}},
   2275 				{       25, { 64,  128}},
   2276 				{       49, { 16,  128}},
   2277 				{       97, {  0,    0}},
   2278 				{ UINT_MAX, {  0,    0}},
   2279 			},
   2280 		},
   2281 		{
   2282 			// Four RB / SE
   2283 			{
   2284 				// One shader engine
   2285 				{        0, {256,  512}},
   2286 				{        2, {256,  256}},
   2287 				{        4, {128,  256}},
   2288 				{        7, {128,  128}},
   2289 				{       13, { 64,  128}},
   2290 				{       25, { 32,  128}},
   2291 				{       49, { 16,  128}},
   2292 				{ UINT_MAX, {  0,    0}},
   2293 			},
   2294 			{
   2295 				// Two shader engines
   2296 				{        0, {512,  512}},
   2297 				{        2, {256,  512}},
   2298 				{        4, {256,  256}},
   2299 				{        7, {128,  256}},
   2300 				{       13, {128,  128}},
   2301 				{       25, { 64,  128}},
   2302 				{       49, { 32,  128}},
   2303 				{       97, { 16,  128}},
   2304 				{ UINT_MAX, {  0,    0}},
   2305 			},
   2306 			{
   2307 				// Four shader engines
   2308 				{        0, {512,  512}},
   2309 				{        4, {256,  512}},
   2310 				{        7, {256,  256}},
   2311 				{       13, {128,  256}},
   2312 				{       25, {128,  128}},
   2313 				{       49, { 64,  128}},
   2314 				{       97, { 16,  128}},
   2315 				{ UINT_MAX, {  0,    0}},
   2316 			},
   2317 		},
   2318 	};
   2319 
   2320 	RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
   2321 	struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
   2322 	VkExtent2D extent = {512, 512};
   2323 
   2324 	unsigned log_num_rb_per_se =
   2325 	    util_logbase2_ceil(pipeline->device->physical_device->rad_info.num_render_backends /
   2326 	                       pipeline->device->physical_device->rad_info.max_se);
   2327 	unsigned log_num_se = util_logbase2_ceil(pipeline->device->physical_device->rad_info.max_se);
   2328 
   2329 	unsigned total_samples = 1u << G_028BE0_MSAA_NUM_SAMPLES(pipeline->graphics.ms.pa_sc_mode_cntl_1);
   2330 	unsigned ps_iter_samples = 1u << G_028804_PS_ITER_SAMPLES(pipeline->graphics.ms.db_eqaa);
   2331 	unsigned effective_samples = total_samples;
   2332 	unsigned cb_target_mask = pipeline->graphics.blend.cb_target_mask;
   2333 	unsigned color_bytes_per_pixel = 0;
   2334 
   2335 	for (unsigned i = 0; i < subpass->color_count; i++) {
   2336 		if (!(cb_target_mask & (0xf << (i * 4))))
   2337 			continue;
   2338 
   2339 		if (subpass->color_attachments[i].attachment == VK_ATTACHMENT_UNUSED)
   2340 			continue;
   2341 
   2342 		VkFormat format = pass->attachments[subpass->color_attachments[i].attachment].format;
   2343 		color_bytes_per_pixel += vk_format_get_blocksize(format);
   2344 	}
   2345 
   2346 	/* MSAA images typically don't use all samples all the time. */
   2347 	if (effective_samples >= 2 && ps_iter_samples <= 1)
   2348 		effective_samples = 2;
   2349 	color_bytes_per_pixel *= effective_samples;
   2350 
   2351 	const struct radv_bin_size_entry *color_entry = color_size_table[log_num_rb_per_se][log_num_se];
   2352 	while(color_entry->bpp <= color_bytes_per_pixel)
   2353 		++color_entry;
   2354 
   2355 	extent = color_entry->extent;
   2356 
   2357 	if (subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) {
   2358 		struct radv_render_pass_attachment *attachment = pass->attachments + subpass->depth_stencil_attachment.attachment;
   2359 
   2360 		/* Coefficients taken from AMDVLK */
   2361 		unsigned depth_coeff = vk_format_is_depth(attachment->format) ? 5 : 0;
   2362 		unsigned stencil_coeff = vk_format_is_stencil(attachment->format) ? 1 : 0;
   2363 		unsigned ds_bytes_per_pixel = 4 * (depth_coeff + stencil_coeff) * total_samples;
   2364 
   2365 		const struct radv_bin_size_entry *ds_entry = ds_size_table[log_num_rb_per_se][log_num_se];
   2366 		while(ds_entry->bpp <= ds_bytes_per_pixel)
   2367 			++ds_entry;
   2368 
   2369 		extent.width = MIN2(extent.width, ds_entry->extent.width);
   2370 		extent.height = MIN2(extent.height, ds_entry->extent.height);
   2371 	}
   2372 
   2373 	return extent;
   2374 }
   2375 
   2376 static void
   2377 radv_compute_binning_state(struct radv_pipeline *pipeline, const VkGraphicsPipelineCreateInfo *pCreateInfo)
   2378 {
   2379 	pipeline->graphics.bin.pa_sc_binner_cntl_0 =
   2380 	                S_028C44_BINNING_MODE(V_028C44_DISABLE_BINNING_USE_LEGACY_SC) |
   2381 	                S_028C44_DISABLE_START_OF_PRIM(1);
   2382 	pipeline->graphics.bin.db_dfsm_control = S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF);
   2383 
   2384 	if (!pipeline->device->pbb_allowed)
   2385 		return;
   2386 
   2387 	VkExtent2D bin_size = radv_compute_bin_size(pipeline, pCreateInfo);
   2388 	if (!bin_size.width || !bin_size.height)
   2389 		return;
   2390 
   2391 	unsigned context_states_per_bin; /* allowed range: [1, 6] */
   2392 	unsigned persistent_states_per_bin; /* allowed range: [1, 32] */
   2393 	unsigned fpovs_per_batch; /* allowed range: [0, 255], 0 = unlimited */
   2394 
   2395 	switch (pipeline->device->physical_device->rad_info.family) {
   2396 	case CHIP_VEGA10:
   2397 		context_states_per_bin = 1;
   2398 		persistent_states_per_bin = 1;
   2399 		fpovs_per_batch = 63;
   2400 		break;
   2401 	case CHIP_RAVEN:
   2402 		context_states_per_bin = 6;
   2403 		persistent_states_per_bin = 32;
   2404 		fpovs_per_batch = 63;
   2405 		break;
   2406 	default:
   2407 		unreachable("unhandled family while determining binning state.");
   2408 	}
   2409 
   2410 	pipeline->graphics.bin.pa_sc_binner_cntl_0 =
   2411 	                S_028C44_BINNING_MODE(V_028C44_BINNING_ALLOWED) |
   2412 	                S_028C44_BIN_SIZE_X(bin_size.width == 16) |
   2413 	                S_028C44_BIN_SIZE_Y(bin_size.height == 16) |
   2414 	                S_028C44_BIN_SIZE_X_EXTEND(util_logbase2(MAX2(bin_size.width, 32)) - 5) |
   2415 	                S_028C44_BIN_SIZE_Y_EXTEND(util_logbase2(MAX2(bin_size.height, 32)) - 5) |
   2416 	                S_028C44_CONTEXT_STATES_PER_BIN(context_states_per_bin - 1) |
   2417 	                S_028C44_PERSISTENT_STATES_PER_BIN(persistent_states_per_bin - 1) |
   2418 	                S_028C44_DISABLE_START_OF_PRIM(1) |
   2419 	                S_028C44_FPOVS_PER_BATCH(fpovs_per_batch) |
   2420 	                S_028C44_OPTIMAL_BIN_SELECTION(1);
   2421 
   2422 	/* DFSM is not implemented yet */
   2423 	assert(!pipeline->device->dfsm_allowed);
   2424 }
   2425 
   2426 static VkResult
   2427 radv_pipeline_init(struct radv_pipeline *pipeline,
   2428 		   struct radv_device *device,
   2429 		   struct radv_pipeline_cache *cache,
   2430 		   const VkGraphicsPipelineCreateInfo *pCreateInfo,
   2431 		   const struct radv_graphics_pipeline_create_info *extra,
   2432 		   const VkAllocationCallbacks *alloc)
   2433 {
   2434 	VkResult result;
   2435 	bool has_view_index = false;
   2436 
   2437 	RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
   2438 	struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
   2439 	if (subpass->view_mask)
   2440 		has_view_index = true;
   2441 	if (alloc == NULL)
   2442 		alloc = &device->alloc;
   2443 
   2444 	pipeline->device = device;
   2445 	pipeline->layout = radv_pipeline_layout_from_handle(pCreateInfo->layout);
   2446 	assert(pipeline->layout);
   2447 
   2448 	radv_pipeline_init_dynamic_state(pipeline, pCreateInfo);
   2449 	radv_pipeline_init_blend_state(pipeline, pCreateInfo, extra);
   2450 
   2451 	const VkPipelineShaderStageCreateInfo *pStages[MESA_SHADER_STAGES] = { 0, };
   2452 	for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
   2453 		gl_shader_stage stage = ffs(pCreateInfo->pStages[i].stage) - 1;
   2454 		pStages[stage] = &pCreateInfo->pStages[i];
   2455 	}
   2456 
   2457 	radv_create_shaders(pipeline, device, cache,
   2458 	                    radv_generate_graphics_pipeline_key(pipeline, pCreateInfo, has_view_index),
   2459 	                    pStages);
   2460 
   2461 	pipeline->graphics.spi_baryc_cntl = S_0286E0_FRONT_FACE_ALL_BITS(1);
   2462 	radv_pipeline_init_depth_stencil_state(pipeline, pCreateInfo, extra);
   2463 	radv_pipeline_init_raster_state(pipeline, pCreateInfo);
   2464 	radv_pipeline_init_multisample_state(pipeline, pCreateInfo);
   2465 	pipeline->graphics.prim = si_translate_prim(pCreateInfo->pInputAssemblyState->topology);
   2466 	pipeline->graphics.can_use_guardband = radv_prim_can_use_guardband(pCreateInfo->pInputAssemblyState->topology);
   2467 
   2468 	if (radv_pipeline_has_gs(pipeline)) {
   2469 		pipeline->graphics.gs_out = si_conv_gl_prim_to_gs_out(pipeline->shaders[MESA_SHADER_GEOMETRY]->info.gs.output_prim);
   2470 		pipeline->graphics.can_use_guardband = pipeline->graphics.gs_out == V_028A6C_OUTPRIM_TYPE_TRISTRIP;
   2471 	} else {
   2472 		pipeline->graphics.gs_out = si_conv_prim_to_gs_out(pCreateInfo->pInputAssemblyState->topology);
   2473 	}
   2474 	if (extra && extra->use_rectlist) {
   2475 		pipeline->graphics.prim = V_008958_DI_PT_RECTLIST;
   2476 		pipeline->graphics.gs_out = V_028A6C_OUTPRIM_TYPE_TRISTRIP;
   2477 		pipeline->graphics.can_use_guardband = true;
   2478 	}
   2479 	pipeline->graphics.prim_restart_enable = !!pCreateInfo->pInputAssemblyState->primitiveRestartEnable;
   2480 	/* prim vertex count will need TESS changes */
   2481 	pipeline->graphics.prim_vertex_count = prim_size_table[pipeline->graphics.prim];
   2482 
   2483 	/* Ensure that some export memory is always allocated, for two reasons:
   2484 	 *
   2485 	 * 1) Correctness: The hardware ignores the EXEC mask if no export
   2486 	 *    memory is allocated, so KILL and alpha test do not work correctly
   2487 	 *    without this.
   2488 	 * 2) Performance: Every shader needs at least a NULL export, even when
   2489 	 *    it writes no color/depth output. The NULL export instruction
   2490 	 *    stalls without this setting.
   2491 	 *
   2492 	 * Don't add this to CB_SHADER_MASK.
   2493 	 */
   2494 	struct radv_shader_variant *ps = pipeline->shaders[MESA_SHADER_FRAGMENT];
   2495 	if (!pipeline->graphics.blend.spi_shader_col_format) {
   2496 		if (!ps->info.fs.writes_z &&
   2497 		    !ps->info.fs.writes_stencil &&
   2498 		    !ps->info.fs.writes_sample_mask)
   2499 			pipeline->graphics.blend.spi_shader_col_format = V_028714_SPI_SHADER_32_R;
   2500 	}
   2501 
   2502 	unsigned z_order;
   2503 	pipeline->graphics.db_shader_control = 0;
   2504 	if (ps->info.fs.early_fragment_test || !ps->info.fs.writes_memory)
   2505 		z_order = V_02880C_EARLY_Z_THEN_LATE_Z;
   2506 	else
   2507 		z_order = V_02880C_LATE_Z;
   2508 
   2509 	pipeline->graphics.db_shader_control =
   2510 		S_02880C_Z_EXPORT_ENABLE(ps->info.fs.writes_z) |
   2511 		S_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(ps->info.fs.writes_stencil) |
   2512 		S_02880C_KILL_ENABLE(!!ps->info.fs.can_discard) |
   2513 		S_02880C_MASK_EXPORT_ENABLE(ps->info.fs.writes_sample_mask) |
   2514 		S_02880C_Z_ORDER(z_order) |
   2515 		S_02880C_DEPTH_BEFORE_SHADER(ps->info.fs.early_fragment_test) |
   2516 		S_02880C_EXEC_ON_HIER_FAIL(ps->info.fs.writes_memory) |
   2517 		S_02880C_EXEC_ON_NOOP(ps->info.fs.writes_memory);
   2518 
   2519 	if (pipeline->device->physical_device->has_rbplus)
   2520 		pipeline->graphics.db_shader_control |= S_02880C_DUAL_QUAD_DISABLE(1);
   2521 
   2522 	unsigned shader_z_format =
   2523 		ac_get_spi_shader_z_format(ps->info.fs.writes_z,
   2524 					   ps->info.fs.writes_stencil,
   2525 					   ps->info.fs.writes_sample_mask);
   2526 	pipeline->graphics.shader_z_format = shader_z_format;
   2527 
   2528 	calculate_vgt_gs_mode(pipeline);
   2529 	calculate_vs_outinfo(pipeline);
   2530 	calculate_ps_inputs(pipeline);
   2531 
   2532 	for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
   2533 		if (pipeline->shaders[i]) {
   2534 			pipeline->need_indirect_descriptor_sets |= pipeline->shaders[i]->info.need_indirect_descriptor_sets;
   2535 		}
   2536 	}
   2537 
   2538 	uint32_t stages = 0;
   2539 	if (radv_pipeline_has_tess(pipeline)) {
   2540 		stages |= S_028B54_LS_EN(V_028B54_LS_STAGE_ON) |
   2541 			S_028B54_HS_EN(1) | S_028B54_DYNAMIC_HS(1);
   2542 
   2543 		if (radv_pipeline_has_gs(pipeline))
   2544 			stages |=  S_028B54_ES_EN(V_028B54_ES_STAGE_DS) |
   2545 				S_028B54_GS_EN(1) |
   2546 				S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER);
   2547 		else
   2548 			stages |= S_028B54_VS_EN(V_028B54_VS_STAGE_DS);
   2549 
   2550 	} else if (radv_pipeline_has_gs(pipeline))
   2551 		stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_REAL) |
   2552 			S_028B54_GS_EN(1) |
   2553 			S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER);
   2554 
   2555 	if (device->physical_device->rad_info.chip_class >= GFX9)
   2556 		stages |= S_028B54_MAX_PRIMGRP_IN_WAVE(2);
   2557 
   2558 	pipeline->graphics.vgt_shader_stages_en = stages;
   2559 
   2560 	if (radv_pipeline_has_gs(pipeline)) {
   2561 		calculate_gs_ring_sizes(pipeline);
   2562 		if (device->physical_device->rad_info.chip_class >= GFX9)
   2563 			calculate_gfx9_gs_info(pCreateInfo, pipeline);
   2564 	}
   2565 
   2566 	if (radv_pipeline_has_tess(pipeline)) {
   2567 		if (pipeline->graphics.prim == V_008958_DI_PT_PATCH) {
   2568 			pipeline->graphics.prim_vertex_count.min = pCreateInfo->pTessellationState->patchControlPoints;
   2569 			pipeline->graphics.prim_vertex_count.incr = 1;
   2570 		}
   2571 		calculate_tess_state(pipeline, pCreateInfo);
   2572 	}
   2573 
   2574 	if (radv_pipeline_has_tess(pipeline))
   2575 		pipeline->graphics.primgroup_size = pipeline->graphics.tess.num_patches;
   2576 	else if (radv_pipeline_has_gs(pipeline))
   2577 		pipeline->graphics.primgroup_size = 64;
   2578 	else
   2579 		pipeline->graphics.primgroup_size = 128; /* recommended without a GS */
   2580 
   2581 	pipeline->graphics.partial_es_wave = false;
   2582 	if (pipeline->device->has_distributed_tess) {
   2583 		if (radv_pipeline_has_gs(pipeline)) {
   2584 			if (device->physical_device->rad_info.chip_class <= VI)
   2585 				pipeline->graphics.partial_es_wave = true;
   2586 		}
   2587 	}
   2588 	/* GS requirement. */
   2589 	if (SI_GS_PER_ES / pipeline->graphics.primgroup_size >= pipeline->device->gs_table_depth - 3)
   2590 		pipeline->graphics.partial_es_wave = true;
   2591 
   2592 	pipeline->graphics.wd_switch_on_eop = false;
   2593 	if (device->physical_device->rad_info.chip_class >= CIK) {
   2594 		unsigned prim = pipeline->graphics.prim;
   2595 		/* WD_SWITCH_ON_EOP has no effect on GPUs with less than
   2596 		 * 4 shader engines. Set 1 to pass the assertion below.
   2597 		 * The other cases are hardware requirements. */
   2598 		if (device->physical_device->rad_info.max_se < 4 ||
   2599 		    prim == V_008958_DI_PT_POLYGON ||
   2600 		    prim == V_008958_DI_PT_LINELOOP ||
   2601 		    prim == V_008958_DI_PT_TRIFAN ||
   2602 		    prim == V_008958_DI_PT_TRISTRIP_ADJ ||
   2603 		    (pipeline->graphics.prim_restart_enable &&
   2604 		     (device->physical_device->rad_info.family < CHIP_POLARIS10 ||
   2605 		      (prim != V_008958_DI_PT_POINTLIST &&
   2606 		       prim != V_008958_DI_PT_LINESTRIP &&
   2607 		       prim != V_008958_DI_PT_TRISTRIP))))
   2608 			pipeline->graphics.wd_switch_on_eop = true;
   2609 	}
   2610 
   2611 	pipeline->graphics.ia_switch_on_eoi = false;
   2612 	if (pipeline->shaders[MESA_SHADER_FRAGMENT]->info.fs.prim_id_input)
   2613 		pipeline->graphics.ia_switch_on_eoi = true;
   2614 	if (radv_pipeline_has_gs(pipeline) &&
   2615 	    pipeline->shaders[MESA_SHADER_GEOMETRY]->info.info.uses_prim_id)
   2616 		pipeline->graphics.ia_switch_on_eoi = true;
   2617 	if (radv_pipeline_has_tess(pipeline)) {
   2618 		/* SWITCH_ON_EOI must be set if PrimID is used. */
   2619 		if (pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.info.uses_prim_id ||
   2620 		    radv_get_tess_eval_shader(pipeline)->info.info.uses_prim_id)
   2621 			pipeline->graphics.ia_switch_on_eoi = true;
   2622 	}
   2623 
   2624 	pipeline->graphics.partial_vs_wave = false;
   2625 	if (radv_pipeline_has_tess(pipeline)) {
   2626 		/* Bug with tessellation and GS on Bonaire and older 2 SE chips. */
   2627 		if ((device->physical_device->rad_info.family == CHIP_TAHITI ||
   2628 		     device->physical_device->rad_info.family == CHIP_PITCAIRN ||
   2629 		     device->physical_device->rad_info.family == CHIP_BONAIRE) &&
   2630 		    radv_pipeline_has_gs(pipeline))
   2631 			pipeline->graphics.partial_vs_wave = true;
   2632 		/* Needed for 028B6C_DISTRIBUTION_MODE != 0 */
   2633 		if (device->has_distributed_tess) {
   2634 			if (radv_pipeline_has_gs(pipeline)) {
   2635 				if (device->physical_device->rad_info.family == CHIP_TONGA ||
   2636 				    device->physical_device->rad_info.family == CHIP_FIJI ||
   2637 				    device->physical_device->rad_info.family == CHIP_POLARIS10 ||
   2638 				    device->physical_device->rad_info.family == CHIP_POLARIS11 ||
   2639 				    device->physical_device->rad_info.family == CHIP_POLARIS12)
   2640 					pipeline->graphics.partial_vs_wave = true;
   2641 			} else {
   2642 				pipeline->graphics.partial_vs_wave = true;
   2643 			}
   2644 		}
   2645 	}
   2646 
   2647 	pipeline->graphics.base_ia_multi_vgt_param =
   2648 		S_028AA8_PRIMGROUP_SIZE(pipeline->graphics.primgroup_size - 1) |
   2649 		/* The following field was moved to VGT_SHADER_STAGES_EN in GFX9. */
   2650 		S_028AA8_MAX_PRIMGRP_IN_WAVE(device->physical_device->rad_info.chip_class == VI ? 2 : 0) |
   2651 		S_030960_EN_INST_OPT_BASIC(device->physical_device->rad_info.chip_class >= GFX9) |
   2652 		S_030960_EN_INST_OPT_ADV(device->physical_device->rad_info.chip_class >= GFX9);
   2653 
   2654 	const VkPipelineVertexInputStateCreateInfo *vi_info =
   2655 		pCreateInfo->pVertexInputState;
   2656 	struct radv_vertex_elements_info *velems = &pipeline->vertex_elements;
   2657 
   2658 	for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
   2659 		const VkVertexInputAttributeDescription *desc =
   2660 			&vi_info->pVertexAttributeDescriptions[i];
   2661 		unsigned loc = desc->location;
   2662 		const struct vk_format_description *format_desc;
   2663 		int first_non_void;
   2664 		uint32_t num_format, data_format;
   2665 		format_desc = vk_format_description(desc->format);
   2666 		first_non_void = vk_format_get_first_non_void_channel(desc->format);
   2667 
   2668 		num_format = radv_translate_buffer_numformat(format_desc, first_non_void);
   2669 		data_format = radv_translate_buffer_dataformat(format_desc, first_non_void);
   2670 
   2671 		velems->rsrc_word3[loc] = S_008F0C_DST_SEL_X(si_map_swizzle(format_desc->swizzle[0])) |
   2672 			S_008F0C_DST_SEL_Y(si_map_swizzle(format_desc->swizzle[1])) |
   2673 			S_008F0C_DST_SEL_Z(si_map_swizzle(format_desc->swizzle[2])) |
   2674 			S_008F0C_DST_SEL_W(si_map_swizzle(format_desc->swizzle[3])) |
   2675 			S_008F0C_NUM_FORMAT(num_format) |
   2676 			S_008F0C_DATA_FORMAT(data_format);
   2677 		velems->format_size[loc] = format_desc->block.bits / 8;
   2678 		velems->offset[loc] = desc->offset;
   2679 		velems->binding[loc] = desc->binding;
   2680 		velems->count = MAX2(velems->count, loc + 1);
   2681 	}
   2682 
   2683 	for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) {
   2684 		const VkVertexInputBindingDescription *desc =
   2685 			&vi_info->pVertexBindingDescriptions[i];
   2686 
   2687 		pipeline->binding_stride[desc->binding] = desc->stride;
   2688 	}
   2689 
   2690 	for (uint32_t i = 0; i < MESA_SHADER_STAGES; i++)
   2691 		pipeline->user_data_0[i] = radv_pipeline_stage_to_user_data_0(pipeline, i, device->physical_device->rad_info.chip_class);
   2692 
   2693 	struct ac_userdata_info *loc = radv_lookup_user_sgpr(pipeline, MESA_SHADER_VERTEX,
   2694 							     AC_UD_VS_BASE_VERTEX_START_INSTANCE);
   2695 	if (loc->sgpr_idx != -1) {
   2696 		pipeline->graphics.vtx_base_sgpr = pipeline->user_data_0[MESA_SHADER_VERTEX];
   2697 		pipeline->graphics.vtx_base_sgpr += loc->sgpr_idx * 4;
   2698 		if (radv_get_vertex_shader(pipeline)->info.info.vs.needs_draw_id)
   2699 			pipeline->graphics.vtx_emit_num = 3;
   2700 		else
   2701 			pipeline->graphics.vtx_emit_num = 2;
   2702 	}
   2703 
   2704 	pipeline->graphics.vtx_reuse_depth = 30;
   2705 	if (radv_pipeline_has_tess(pipeline) &&
   2706 	    radv_get_tess_eval_shader(pipeline)->info.tes.spacing == TESS_SPACING_FRACTIONAL_ODD) {
   2707 		pipeline->graphics.vtx_reuse_depth = 14;
   2708 	}
   2709 
   2710 	if (device->instance->debug_flags & RADV_DEBUG_DUMP_SHADER_STATS) {
   2711 		radv_dump_pipeline_stats(device, pipeline);
   2712 	}
   2713 
   2714 	radv_compute_binning_state(pipeline, pCreateInfo);
   2715 
   2716 	result = radv_pipeline_scratch_init(device, pipeline);
   2717 	return result;
   2718 }
   2719 
   2720 VkResult
   2721 radv_graphics_pipeline_create(
   2722 	VkDevice _device,
   2723 	VkPipelineCache _cache,
   2724 	const VkGraphicsPipelineCreateInfo *pCreateInfo,
   2725 	const struct radv_graphics_pipeline_create_info *extra,
   2726 	const VkAllocationCallbacks *pAllocator,
   2727 	VkPipeline *pPipeline)
   2728 {
   2729 	RADV_FROM_HANDLE(radv_device, device, _device);
   2730 	RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache);
   2731 	struct radv_pipeline *pipeline;
   2732 	VkResult result;
   2733 
   2734 	pipeline = vk_zalloc2(&device->alloc, pAllocator, sizeof(*pipeline), 8,
   2735 			      VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
   2736 	if (pipeline == NULL)
   2737 		return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
   2738 
   2739 	result = radv_pipeline_init(pipeline, device, cache,
   2740 				    pCreateInfo, extra, pAllocator);
   2741 	if (result != VK_SUCCESS) {
   2742 		radv_pipeline_destroy(device, pipeline, pAllocator);
   2743 		return result;
   2744 	}
   2745 
   2746 	*pPipeline = radv_pipeline_to_handle(pipeline);
   2747 
   2748 	return VK_SUCCESS;
   2749 }
   2750 
   2751 VkResult radv_CreateGraphicsPipelines(
   2752 	VkDevice                                    _device,
   2753 	VkPipelineCache                             pipelineCache,
   2754 	uint32_t                                    count,
   2755 	const VkGraphicsPipelineCreateInfo*         pCreateInfos,
   2756 	const VkAllocationCallbacks*                pAllocator,
   2757 	VkPipeline*                                 pPipelines)
   2758 {
   2759 	VkResult result = VK_SUCCESS;
   2760 	unsigned i = 0;
   2761 
   2762 	for (; i < count; i++) {
   2763 		VkResult r;
   2764 		r = radv_graphics_pipeline_create(_device,
   2765 						  pipelineCache,
   2766 						  &pCreateInfos[i],
   2767 						  NULL, pAllocator, &pPipelines[i]);
   2768 		if (r != VK_SUCCESS) {
   2769 			result = r;
   2770 			pPipelines[i] = VK_NULL_HANDLE;
   2771 		}
   2772 	}
   2773 
   2774 	return result;
   2775 }
   2776 
   2777 static VkResult radv_compute_pipeline_create(
   2778 	VkDevice                                    _device,
   2779 	VkPipelineCache                             _cache,
   2780 	const VkComputePipelineCreateInfo*          pCreateInfo,
   2781 	const VkAllocationCallbacks*                pAllocator,
   2782 	VkPipeline*                                 pPipeline)
   2783 {
   2784 	RADV_FROM_HANDLE(radv_device, device, _device);
   2785 	RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache);
   2786 	const VkPipelineShaderStageCreateInfo *pStages[MESA_SHADER_STAGES] = { 0, };
   2787 	struct radv_pipeline *pipeline;
   2788 	VkResult result;
   2789 
   2790 	pipeline = vk_zalloc2(&device->alloc, pAllocator, sizeof(*pipeline), 8,
   2791 			      VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
   2792 	if (pipeline == NULL)
   2793 		return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
   2794 
   2795 	pipeline->device = device;
   2796 	pipeline->layout = radv_pipeline_layout_from_handle(pCreateInfo->layout);
   2797 	assert(pipeline->layout);
   2798 
   2799 	pStages[MESA_SHADER_COMPUTE] = &pCreateInfo->stage;
   2800 	radv_create_shaders(pipeline, device, cache, (struct radv_pipeline_key) {0}, pStages);
   2801 
   2802 	pipeline->user_data_0[MESA_SHADER_COMPUTE] = radv_pipeline_stage_to_user_data_0(pipeline, MESA_SHADER_COMPUTE, device->physical_device->rad_info.chip_class);
   2803 	pipeline->need_indirect_descriptor_sets |= pipeline->shaders[MESA_SHADER_COMPUTE]->info.need_indirect_descriptor_sets;
   2804 	result = radv_pipeline_scratch_init(device, pipeline);
   2805 	if (result != VK_SUCCESS) {
   2806 		radv_pipeline_destroy(device, pipeline, pAllocator);
   2807 		return result;
   2808 	}
   2809 
   2810 	*pPipeline = radv_pipeline_to_handle(pipeline);
   2811 
   2812 	if (device->instance->debug_flags & RADV_DEBUG_DUMP_SHADER_STATS) {
   2813 		radv_dump_pipeline_stats(device, pipeline);
   2814 	}
   2815 	return VK_SUCCESS;
   2816 }
   2817 VkResult radv_CreateComputePipelines(
   2818 	VkDevice                                    _device,
   2819 	VkPipelineCache                             pipelineCache,
   2820 	uint32_t                                    count,
   2821 	const VkComputePipelineCreateInfo*          pCreateInfos,
   2822 	const VkAllocationCallbacks*                pAllocator,
   2823 	VkPipeline*                                 pPipelines)
   2824 {
   2825 	VkResult result = VK_SUCCESS;
   2826 
   2827 	unsigned i = 0;
   2828 	for (; i < count; i++) {
   2829 		VkResult r;
   2830 		r = radv_compute_pipeline_create(_device, pipelineCache,
   2831 						 &pCreateInfos[i],
   2832 						 pAllocator, &pPipelines[i]);
   2833 		if (r != VK_SUCCESS) {
   2834 			result = r;
   2835 			pPipelines[i] = VK_NULL_HANDLE;
   2836 		}
   2837 	}
   2838 
   2839 	return result;
   2840 }
   2841