Home | History | Annotate | Download | only in radeonsi
      1 /*
      2  * Copyright 2012 Advanced Micro Devices, Inc.
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * on the rights to use, copy, modify, merge, publish, distribute, sub
      8  * license, and/or sell copies of the Software, and to permit persons to whom
      9  * the Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
     18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
     19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
     20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
     21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
     22  */
     23 
     24 #include "si_pipe.h"
     25 #include "sid.h"
     26 #include "gfx9d.h"
     27 #include "radeon/r600_cs.h"
     28 #include "radeon/r600_query.h"
     29 
     30 #include "util/u_dual_blend.h"
     31 #include "util/u_format.h"
     32 #include "util/u_format_s3tc.h"
     33 #include "util/u_memory.h"
     34 #include "util/u_resource.h"
     35 #include "util/u_upload_mgr.h"
     36 
     37 /* Initialize an external atom (owned by ../radeon). */
     38 static void
     39 si_init_external_atom(struct si_context *sctx, struct r600_atom *atom,
     40 		      struct r600_atom **list_elem)
     41 {
     42 	atom->id = list_elem - sctx->atoms.array;
     43 	*list_elem = atom;
     44 }
     45 
     46 /* Initialize an atom owned by radeonsi.  */
     47 void si_init_atom(struct si_context *sctx, struct r600_atom *atom,
     48 		  struct r600_atom **list_elem,
     49 		  void (*emit_func)(struct si_context *ctx, struct r600_atom *state))
     50 {
     51 	atom->emit = (void*)emit_func;
     52 	atom->id = list_elem - sctx->atoms.array;
     53 	*list_elem = atom;
     54 }
     55 
     56 static unsigned si_map_swizzle(unsigned swizzle)
     57 {
     58 	switch (swizzle) {
     59 	case PIPE_SWIZZLE_Y:
     60 		return V_008F0C_SQ_SEL_Y;
     61 	case PIPE_SWIZZLE_Z:
     62 		return V_008F0C_SQ_SEL_Z;
     63 	case PIPE_SWIZZLE_W:
     64 		return V_008F0C_SQ_SEL_W;
     65 	case PIPE_SWIZZLE_0:
     66 		return V_008F0C_SQ_SEL_0;
     67 	case PIPE_SWIZZLE_1:
     68 		return V_008F0C_SQ_SEL_1;
     69 	default: /* PIPE_SWIZZLE_X */
     70 		return V_008F0C_SQ_SEL_X;
     71 	}
     72 }
     73 
     74 /* 12.4 fixed-point */
     75 static unsigned si_pack_float_12p4(float x)
     76 {
     77 	return x <= 0    ? 0 :
     78 	       x >= 4096 ? 0xffff : x * 16;
     79 }
     80 
     81 /*
     82  * Inferred framebuffer and blender state.
     83  *
     84  * CB_TARGET_MASK is emitted here to avoid a hang with dual source blending
     85  * if there is not enough PS outputs.
     86  */
     87 static void si_emit_cb_render_state(struct si_context *sctx, struct r600_atom *atom)
     88 {
     89 	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
     90 	struct si_state_blend *blend = sctx->queued.named.blend;
     91 	/* CB_COLORn_INFO.FORMAT=INVALID should disable unbound colorbuffers,
     92 	 * but you never know. */
     93 	uint32_t cb_target_mask = sctx->framebuffer.colorbuf_enabled_4bit;
     94 	unsigned i;
     95 
     96 	if (blend)
     97 		cb_target_mask &= blend->cb_target_mask;
     98 
     99 	/* Avoid a hang that happens when dual source blending is enabled
    100 	 * but there is not enough color outputs. This is undefined behavior,
    101 	 * so disable color writes completely.
    102 	 *
    103 	 * Reproducible with Unigine Heaven 4.0 and drirc missing.
    104 	 */
    105 	if (blend && blend->dual_src_blend &&
    106 	    sctx->ps_shader.cso &&
    107 	    (sctx->ps_shader.cso->info.colors_written & 0x3) != 0x3)
    108 		cb_target_mask = 0;
    109 
    110 	radeon_set_context_reg(cs, R_028238_CB_TARGET_MASK, cb_target_mask);
    111 
    112 	/* GFX9: Flush DFSM when CB_TARGET_MASK changes.
    113 	 * I think we don't have to do anything between IBs.
    114 	 */
    115 	if (sctx->screen->dfsm_allowed &&
    116 	    sctx->last_cb_target_mask != cb_target_mask) {
    117 		sctx->last_cb_target_mask = cb_target_mask;
    118 
    119 		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
    120 		radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0));
    121 	}
    122 
    123 	if (sctx->b.chip_class >= VI) {
    124 		/* DCC MSAA workaround for blending.
    125 		 * Alternatively, we can set CB_COLORi_DCC_CONTROL.OVERWRITE_-
    126 		 * COMBINER_DISABLE, but that would be more complicated.
    127 		 */
    128 		bool oc_disable = (sctx->b.chip_class == VI ||
    129 				   sctx->b.chip_class == GFX9) &&
    130 				  blend &&
    131 				  blend->blend_enable_4bit & cb_target_mask &&
    132 				  sctx->framebuffer.nr_samples >= 2;
    133 
    134 		radeon_set_context_reg(cs, R_028424_CB_DCC_CONTROL,
    135 				       S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(1) |
    136 				       S_028424_OVERWRITE_COMBINER_WATERMARK(4) |
    137 				       S_028424_OVERWRITE_COMBINER_DISABLE(oc_disable));
    138 	}
    139 
    140 	/* RB+ register settings. */
    141 	if (sctx->screen->rbplus_allowed) {
    142 		unsigned spi_shader_col_format =
    143 			sctx->ps_shader.cso ?
    144 			sctx->ps_shader.current->key.part.ps.epilog.spi_shader_col_format : 0;
    145 		unsigned sx_ps_downconvert = 0;
    146 		unsigned sx_blend_opt_epsilon = 0;
    147 		unsigned sx_blend_opt_control = 0;
    148 
    149 		for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) {
    150 			struct r600_surface *surf =
    151 				(struct r600_surface*)sctx->framebuffer.state.cbufs[i];
    152 			unsigned format, swap, spi_format, colormask;
    153 			bool has_alpha, has_rgb;
    154 
    155 			if (!surf)
    156 				continue;
    157 
    158 			format = G_028C70_FORMAT(surf->cb_color_info);
    159 			swap = G_028C70_COMP_SWAP(surf->cb_color_info);
    160 			spi_format = (spi_shader_col_format >> (i * 4)) & 0xf;
    161 			colormask = (cb_target_mask >> (i * 4)) & 0xf;
    162 
    163 			/* Set if RGB and A are present. */
    164 			has_alpha = !G_028C74_FORCE_DST_ALPHA_1(surf->cb_color_attrib);
    165 
    166 			if (format == V_028C70_COLOR_8 ||
    167 			    format == V_028C70_COLOR_16 ||
    168 			    format == V_028C70_COLOR_32)
    169 				has_rgb = !has_alpha;
    170 			else
    171 				has_rgb = true;
    172 
    173 			/* Check the colormask and export format. */
    174 			if (!(colormask & (PIPE_MASK_RGBA & ~PIPE_MASK_A)))
    175 				has_rgb = false;
    176 			if (!(colormask & PIPE_MASK_A))
    177 				has_alpha = false;
    178 
    179 			if (spi_format == V_028714_SPI_SHADER_ZERO) {
    180 				has_rgb = false;
    181 				has_alpha = false;
    182 			}
    183 
    184 			/* Disable value checking for disabled channels. */
    185 			if (!has_rgb)
    186 				sx_blend_opt_control |= S_02875C_MRT0_COLOR_OPT_DISABLE(1) << (i * 4);
    187 			if (!has_alpha)
    188 				sx_blend_opt_control |= S_02875C_MRT0_ALPHA_OPT_DISABLE(1) << (i * 4);
    189 
    190 			/* Enable down-conversion for 32bpp and smaller formats. */
    191 			switch (format) {
    192 			case V_028C70_COLOR_8:
    193 			case V_028C70_COLOR_8_8:
    194 			case V_028C70_COLOR_8_8_8_8:
    195 				/* For 1 and 2-channel formats, use the superset thereof. */
    196 				if (spi_format == V_028714_SPI_SHADER_FP16_ABGR ||
    197 				    spi_format == V_028714_SPI_SHADER_UINT16_ABGR ||
    198 				    spi_format == V_028714_SPI_SHADER_SINT16_ABGR) {
    199 					sx_ps_downconvert |= V_028754_SX_RT_EXPORT_8_8_8_8 << (i * 4);
    200 					sx_blend_opt_epsilon |= V_028758_8BIT_FORMAT << (i * 4);
    201 				}
    202 				break;
    203 
    204 			case V_028C70_COLOR_5_6_5:
    205 				if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
    206 					sx_ps_downconvert |= V_028754_SX_RT_EXPORT_5_6_5 << (i * 4);
    207 					sx_blend_opt_epsilon |= V_028758_6BIT_FORMAT << (i * 4);
    208 				}
    209 				break;
    210 
    211 			case V_028C70_COLOR_1_5_5_5:
    212 				if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
    213 					sx_ps_downconvert |= V_028754_SX_RT_EXPORT_1_5_5_5 << (i * 4);
    214 					sx_blend_opt_epsilon |= V_028758_5BIT_FORMAT << (i * 4);
    215 				}
    216 				break;
    217 
    218 			case V_028C70_COLOR_4_4_4_4:
    219 				if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
    220 					sx_ps_downconvert |= V_028754_SX_RT_EXPORT_4_4_4_4 << (i * 4);
    221 					sx_blend_opt_epsilon |= V_028758_4BIT_FORMAT << (i * 4);
    222 				}
    223 				break;
    224 
    225 			case V_028C70_COLOR_32:
    226 				if (swap == V_028C70_SWAP_STD &&
    227 				    spi_format == V_028714_SPI_SHADER_32_R)
    228 					sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_R << (i * 4);
    229 				else if (swap == V_028C70_SWAP_ALT_REV &&
    230 					 spi_format == V_028714_SPI_SHADER_32_AR)
    231 					sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_A << (i * 4);
    232 				break;
    233 
    234 			case V_028C70_COLOR_16:
    235 			case V_028C70_COLOR_16_16:
    236 				/* For 1-channel formats, use the superset thereof. */
    237 				if (spi_format == V_028714_SPI_SHADER_UNORM16_ABGR ||
    238 				    spi_format == V_028714_SPI_SHADER_SNORM16_ABGR ||
    239 				    spi_format == V_028714_SPI_SHADER_UINT16_ABGR ||
    240 				    spi_format == V_028714_SPI_SHADER_SINT16_ABGR) {
    241 					if (swap == V_028C70_SWAP_STD ||
    242 					    swap == V_028C70_SWAP_STD_REV)
    243 						sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_GR << (i * 4);
    244 					else
    245 						sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_AR << (i * 4);
    246 				}
    247 				break;
    248 
    249 			case V_028C70_COLOR_10_11_11:
    250 				if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
    251 					sx_ps_downconvert |= V_028754_SX_RT_EXPORT_10_11_11 << (i * 4);
    252 					sx_blend_opt_epsilon |= V_028758_11BIT_FORMAT << (i * 4);
    253 				}
    254 				break;
    255 
    256 			case V_028C70_COLOR_2_10_10_10:
    257 				if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
    258 					sx_ps_downconvert |= V_028754_SX_RT_EXPORT_2_10_10_10 << (i * 4);
    259 					sx_blend_opt_epsilon |= V_028758_10BIT_FORMAT << (i * 4);
    260 				}
    261 				break;
    262 			}
    263 		}
    264 
    265 		radeon_set_context_reg_seq(cs, R_028754_SX_PS_DOWNCONVERT, 3);
    266 		radeon_emit(cs, sx_ps_downconvert);	/* R_028754_SX_PS_DOWNCONVERT */
    267 		radeon_emit(cs, sx_blend_opt_epsilon);	/* R_028758_SX_BLEND_OPT_EPSILON */
    268 		radeon_emit(cs, sx_blend_opt_control);	/* R_02875C_SX_BLEND_OPT_CONTROL */
    269 	} else if (sctx->screen->has_rbplus) {
    270 		radeon_set_context_reg_seq(cs, R_028754_SX_PS_DOWNCONVERT, 3);
    271 		radeon_emit(cs, 0);	/* R_028754_SX_PS_DOWNCONVERT */
    272 		radeon_emit(cs, 0);	/* R_028758_SX_BLEND_OPT_EPSILON */
    273 		radeon_emit(cs, 0);	/* R_02875C_SX_BLEND_OPT_CONTROL */
    274 	}
    275 }
    276 
    277 /*
    278  * Blender functions
    279  */
    280 
    281 static uint32_t si_translate_blend_function(int blend_func)
    282 {
    283 	switch (blend_func) {
    284 	case PIPE_BLEND_ADD:
    285 		return V_028780_COMB_DST_PLUS_SRC;
    286 	case PIPE_BLEND_SUBTRACT:
    287 		return V_028780_COMB_SRC_MINUS_DST;
    288 	case PIPE_BLEND_REVERSE_SUBTRACT:
    289 		return V_028780_COMB_DST_MINUS_SRC;
    290 	case PIPE_BLEND_MIN:
    291 		return V_028780_COMB_MIN_DST_SRC;
    292 	case PIPE_BLEND_MAX:
    293 		return V_028780_COMB_MAX_DST_SRC;
    294 	default:
    295 		R600_ERR("Unknown blend function %d\n", blend_func);
    296 		assert(0);
    297 		break;
    298 	}
    299 	return 0;
    300 }
    301 
    302 static uint32_t si_translate_blend_factor(int blend_fact)
    303 {
    304 	switch (blend_fact) {
    305 	case PIPE_BLENDFACTOR_ONE:
    306 		return V_028780_BLEND_ONE;
    307 	case PIPE_BLENDFACTOR_SRC_COLOR:
    308 		return V_028780_BLEND_SRC_COLOR;
    309 	case PIPE_BLENDFACTOR_SRC_ALPHA:
    310 		return V_028780_BLEND_SRC_ALPHA;
    311 	case PIPE_BLENDFACTOR_DST_ALPHA:
    312 		return V_028780_BLEND_DST_ALPHA;
    313 	case PIPE_BLENDFACTOR_DST_COLOR:
    314 		return V_028780_BLEND_DST_COLOR;
    315 	case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
    316 		return V_028780_BLEND_SRC_ALPHA_SATURATE;
    317 	case PIPE_BLENDFACTOR_CONST_COLOR:
    318 		return V_028780_BLEND_CONSTANT_COLOR;
    319 	case PIPE_BLENDFACTOR_CONST_ALPHA:
    320 		return V_028780_BLEND_CONSTANT_ALPHA;
    321 	case PIPE_BLENDFACTOR_ZERO:
    322 		return V_028780_BLEND_ZERO;
    323 	case PIPE_BLENDFACTOR_INV_SRC_COLOR:
    324 		return V_028780_BLEND_ONE_MINUS_SRC_COLOR;
    325 	case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
    326 		return V_028780_BLEND_ONE_MINUS_SRC_ALPHA;
    327 	case PIPE_BLENDFACTOR_INV_DST_ALPHA:
    328 		return V_028780_BLEND_ONE_MINUS_DST_ALPHA;
    329 	case PIPE_BLENDFACTOR_INV_DST_COLOR:
    330 		return V_028780_BLEND_ONE_MINUS_DST_COLOR;
    331 	case PIPE_BLENDFACTOR_INV_CONST_COLOR:
    332 		return V_028780_BLEND_ONE_MINUS_CONSTANT_COLOR;
    333 	case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
    334 		return V_028780_BLEND_ONE_MINUS_CONSTANT_ALPHA;
    335 	case PIPE_BLENDFACTOR_SRC1_COLOR:
    336 		return V_028780_BLEND_SRC1_COLOR;
    337 	case PIPE_BLENDFACTOR_SRC1_ALPHA:
    338 		return V_028780_BLEND_SRC1_ALPHA;
    339 	case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
    340 		return V_028780_BLEND_INV_SRC1_COLOR;
    341 	case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
    342 		return V_028780_BLEND_INV_SRC1_ALPHA;
    343 	default:
    344 		R600_ERR("Bad blend factor %d not supported!\n", blend_fact);
    345 		assert(0);
    346 		break;
    347 	}
    348 	return 0;
    349 }
    350 
    351 static uint32_t si_translate_blend_opt_function(int blend_func)
    352 {
    353 	switch (blend_func) {
    354 	case PIPE_BLEND_ADD:
    355 		return V_028760_OPT_COMB_ADD;
    356 	case PIPE_BLEND_SUBTRACT:
    357 		return V_028760_OPT_COMB_SUBTRACT;
    358 	case PIPE_BLEND_REVERSE_SUBTRACT:
    359 		return V_028760_OPT_COMB_REVSUBTRACT;
    360 	case PIPE_BLEND_MIN:
    361 		return V_028760_OPT_COMB_MIN;
    362 	case PIPE_BLEND_MAX:
    363 		return V_028760_OPT_COMB_MAX;
    364 	default:
    365 		return V_028760_OPT_COMB_BLEND_DISABLED;
    366 	}
    367 }
    368 
    369 static uint32_t si_translate_blend_opt_factor(int blend_fact, bool is_alpha)
    370 {
    371 	switch (blend_fact) {
    372 	case PIPE_BLENDFACTOR_ZERO:
    373 		return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_ALL;
    374 	case PIPE_BLENDFACTOR_ONE:
    375 		return V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE;
    376 	case PIPE_BLENDFACTOR_SRC_COLOR:
    377 		return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0
    378 				: V_028760_BLEND_OPT_PRESERVE_C1_IGNORE_C0;
    379 	case PIPE_BLENDFACTOR_INV_SRC_COLOR:
    380 		return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1
    381 				: V_028760_BLEND_OPT_PRESERVE_C0_IGNORE_C1;
    382 	case PIPE_BLENDFACTOR_SRC_ALPHA:
    383 		return V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0;
    384 	case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
    385 		return V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1;
    386 	case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
    387 		return is_alpha ? V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE
    388 				: V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0;
    389 	default:
    390 		return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;
    391 	}
    392 }
    393 
    394 static void si_blend_check_commutativity(struct si_screen *sscreen,
    395 					 struct si_state_blend *blend,
    396 					 enum pipe_blend_func func,
    397 					 enum pipe_blendfactor src,
    398 					 enum pipe_blendfactor dst,
    399 					 unsigned chanmask)
    400 {
    401 	/* Src factor is allowed when it does not depend on Dst */
    402 	static const uint32_t src_allowed =
    403 		(1u << PIPE_BLENDFACTOR_ONE) |
    404 		(1u << PIPE_BLENDFACTOR_SRC_COLOR) |
    405 		(1u << PIPE_BLENDFACTOR_SRC_ALPHA) |
    406 		(1u << PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE) |
    407 		(1u << PIPE_BLENDFACTOR_CONST_COLOR) |
    408 		(1u << PIPE_BLENDFACTOR_CONST_ALPHA) |
    409 		(1u << PIPE_BLENDFACTOR_SRC1_COLOR) |
    410 		(1u << PIPE_BLENDFACTOR_SRC1_ALPHA) |
    411 		(1u << PIPE_BLENDFACTOR_ZERO) |
    412 		(1u << PIPE_BLENDFACTOR_INV_SRC_COLOR) |
    413 		(1u << PIPE_BLENDFACTOR_INV_SRC_ALPHA) |
    414 		(1u << PIPE_BLENDFACTOR_INV_CONST_COLOR) |
    415 		(1u << PIPE_BLENDFACTOR_INV_CONST_ALPHA) |
    416 		(1u << PIPE_BLENDFACTOR_INV_SRC1_COLOR) |
    417 		(1u << PIPE_BLENDFACTOR_INV_SRC1_ALPHA);
    418 
    419 	if (dst == PIPE_BLENDFACTOR_ONE &&
    420 	    (src_allowed & (1u << src))) {
    421 		/* Addition is commutative, but floating point addition isn't
    422 		 * associative: subtle changes can be introduced via different
    423 		 * rounding.
    424 		 *
    425 		 * Out-of-order is also non-deterministic, which means that
    426 		 * this breaks OpenGL invariance requirements. So only enable
    427 		 * out-of-order additive blending if explicitly allowed by a
    428 		 * setting.
    429 		 */
    430 		if (func == PIPE_BLEND_MAX || func == PIPE_BLEND_MIN ||
    431 		    (func == PIPE_BLEND_ADD && sscreen->commutative_blend_add))
    432 			blend->commutative_4bit |= chanmask;
    433 	}
    434 }
    435 
    436 /**
    437  * Get rid of DST in the blend factors by commuting the operands:
    438  *    func(src * DST, dst * 0) ---> func(src * 0, dst * SRC)
    439  */
    440 static void si_blend_remove_dst(unsigned *func, unsigned *src_factor,
    441 				unsigned *dst_factor, unsigned expected_dst,
    442 				unsigned replacement_src)
    443 {
    444 	if (*src_factor == expected_dst &&
    445 	    *dst_factor == PIPE_BLENDFACTOR_ZERO) {
    446 		*src_factor = PIPE_BLENDFACTOR_ZERO;
    447 		*dst_factor = replacement_src;
    448 
    449 		/* Commuting the operands requires reversing subtractions. */
    450 		if (*func == PIPE_BLEND_SUBTRACT)
    451 			*func = PIPE_BLEND_REVERSE_SUBTRACT;
    452 		else if (*func == PIPE_BLEND_REVERSE_SUBTRACT)
    453 			*func = PIPE_BLEND_SUBTRACT;
    454 	}
    455 }
    456 
    457 static bool si_blend_factor_uses_dst(unsigned factor)
    458 {
    459 	return factor == PIPE_BLENDFACTOR_DST_COLOR ||
    460 		factor == PIPE_BLENDFACTOR_DST_ALPHA ||
    461 		factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
    462 		factor == PIPE_BLENDFACTOR_INV_DST_ALPHA ||
    463 		factor == PIPE_BLENDFACTOR_INV_DST_COLOR;
    464 }
    465 
    466 static void *si_create_blend_state_mode(struct pipe_context *ctx,
    467 					const struct pipe_blend_state *state,
    468 					unsigned mode)
    469 {
    470 	struct si_context *sctx = (struct si_context*)ctx;
    471 	struct si_state_blend *blend = CALLOC_STRUCT(si_state_blend);
    472 	struct si_pm4_state *pm4 = &blend->pm4;
    473 	uint32_t sx_mrt_blend_opt[8] = {0};
    474 	uint32_t color_control = 0;
    475 
    476 	if (!blend)
    477 		return NULL;
    478 
    479 	blend->alpha_to_coverage = state->alpha_to_coverage;
    480 	blend->alpha_to_one = state->alpha_to_one;
    481 	blend->dual_src_blend = util_blend_state_is_dual(state, 0);
    482 	blend->logicop_enable = state->logicop_enable;
    483 
    484 	if (state->logicop_enable) {
    485 		color_control |= S_028808_ROP3(state->logicop_func | (state->logicop_func << 4));
    486 	} else {
    487 		color_control |= S_028808_ROP3(0xcc);
    488 	}
    489 
    490 	si_pm4_set_reg(pm4, R_028B70_DB_ALPHA_TO_MASK,
    491 		       S_028B70_ALPHA_TO_MASK_ENABLE(state->alpha_to_coverage) |
    492 		       S_028B70_ALPHA_TO_MASK_OFFSET0(2) |
    493 		       S_028B70_ALPHA_TO_MASK_OFFSET1(2) |
    494 		       S_028B70_ALPHA_TO_MASK_OFFSET2(2) |
    495 		       S_028B70_ALPHA_TO_MASK_OFFSET3(2));
    496 
    497 	if (state->alpha_to_coverage)
    498 		blend->need_src_alpha_4bit |= 0xf;
    499 
    500 	blend->cb_target_mask = 0;
    501 	blend->cb_target_enabled_4bit = 0;
    502 
    503 	for (int i = 0; i < 8; i++) {
    504 		/* state->rt entries > 0 only written if independent blending */
    505 		const int j = state->independent_blend_enable ? i : 0;
    506 
    507 		unsigned eqRGB = state->rt[j].rgb_func;
    508 		unsigned srcRGB = state->rt[j].rgb_src_factor;
    509 		unsigned dstRGB = state->rt[j].rgb_dst_factor;
    510 		unsigned eqA = state->rt[j].alpha_func;
    511 		unsigned srcA = state->rt[j].alpha_src_factor;
    512 		unsigned dstA = state->rt[j].alpha_dst_factor;
    513 
    514 		unsigned srcRGB_opt, dstRGB_opt, srcA_opt, dstA_opt;
    515 		unsigned blend_cntl = 0;
    516 
    517 		sx_mrt_blend_opt[i] =
    518 			S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) |
    519 			S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED);
    520 
    521 		/* Only set dual source blending for MRT0 to avoid a hang. */
    522 		if (i >= 1 && blend->dual_src_blend) {
    523 			/* Vulkan does this for dual source blending. */
    524 			if (i == 1)
    525 				blend_cntl |= S_028780_ENABLE(1);
    526 
    527 			si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
    528 			continue;
    529 		}
    530 
    531 		/* Only addition and subtraction equations are supported with
    532 		 * dual source blending.
    533 		 */
    534 		if (blend->dual_src_blend &&
    535 		    (eqRGB == PIPE_BLEND_MIN || eqRGB == PIPE_BLEND_MAX ||
    536 		     eqA == PIPE_BLEND_MIN || eqA == PIPE_BLEND_MAX)) {
    537 			assert(!"Unsupported equation for dual source blending");
    538 			si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
    539 			continue;
    540 		}
    541 
    542 		/* cb_render_state will disable unused ones */
    543 		blend->cb_target_mask |= (unsigned)state->rt[j].colormask << (4 * i);
    544 		if (state->rt[j].colormask)
    545 			blend->cb_target_enabled_4bit |= 0xf << (4 * i);
    546 
    547 		if (!state->rt[j].colormask || !state->rt[j].blend_enable) {
    548 			si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
    549 			continue;
    550 		}
    551 
    552 		si_blend_check_commutativity(sctx->screen, blend,
    553 					     eqRGB, srcRGB, dstRGB, 0x7 << (4 * i));
    554 		si_blend_check_commutativity(sctx->screen, blend,
    555 					     eqA, srcA, dstA, 0x8 << (4 * i));
    556 
    557 		/* Blending optimizations for RB+.
    558 		 * These transformations don't change the behavior.
    559 		 *
    560 		 * First, get rid of DST in the blend factors:
    561 		 *    func(src * DST, dst * 0) ---> func(src * 0, dst * SRC)
    562 		 */
    563 		si_blend_remove_dst(&eqRGB, &srcRGB, &dstRGB,
    564 				    PIPE_BLENDFACTOR_DST_COLOR,
    565 				    PIPE_BLENDFACTOR_SRC_COLOR);
    566 		si_blend_remove_dst(&eqA, &srcA, &dstA,
    567 				    PIPE_BLENDFACTOR_DST_COLOR,
    568 				    PIPE_BLENDFACTOR_SRC_COLOR);
    569 		si_blend_remove_dst(&eqA, &srcA, &dstA,
    570 				    PIPE_BLENDFACTOR_DST_ALPHA,
    571 				    PIPE_BLENDFACTOR_SRC_ALPHA);
    572 
    573 		/* Look up the ideal settings from tables. */
    574 		srcRGB_opt = si_translate_blend_opt_factor(srcRGB, false);
    575 		dstRGB_opt = si_translate_blend_opt_factor(dstRGB, false);
    576 		srcA_opt = si_translate_blend_opt_factor(srcA, true);
    577 		dstA_opt = si_translate_blend_opt_factor(dstA, true);
    578 
    579 		/* Handle interdependencies. */
    580 		if (si_blend_factor_uses_dst(srcRGB))
    581 			dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;
    582 		if (si_blend_factor_uses_dst(srcA))
    583 			dstA_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;
    584 
    585 		if (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE &&
    586 		    (dstRGB == PIPE_BLENDFACTOR_ZERO ||
    587 		     dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
    588 		     dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE))
    589 			dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0;
    590 
    591 		/* Set the final value. */
    592 		sx_mrt_blend_opt[i] =
    593 			S_028760_COLOR_SRC_OPT(srcRGB_opt) |
    594 			S_028760_COLOR_DST_OPT(dstRGB_opt) |
    595 			S_028760_COLOR_COMB_FCN(si_translate_blend_opt_function(eqRGB)) |
    596 			S_028760_ALPHA_SRC_OPT(srcA_opt) |
    597 			S_028760_ALPHA_DST_OPT(dstA_opt) |
    598 			S_028760_ALPHA_COMB_FCN(si_translate_blend_opt_function(eqA));
    599 
    600 		/* Set blend state. */
    601 		blend_cntl |= S_028780_ENABLE(1);
    602 		blend_cntl |= S_028780_COLOR_COMB_FCN(si_translate_blend_function(eqRGB));
    603 		blend_cntl |= S_028780_COLOR_SRCBLEND(si_translate_blend_factor(srcRGB));
    604 		blend_cntl |= S_028780_COLOR_DESTBLEND(si_translate_blend_factor(dstRGB));
    605 
    606 		if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) {
    607 			blend_cntl |= S_028780_SEPARATE_ALPHA_BLEND(1);
    608 			blend_cntl |= S_028780_ALPHA_COMB_FCN(si_translate_blend_function(eqA));
    609 			blend_cntl |= S_028780_ALPHA_SRCBLEND(si_translate_blend_factor(srcA));
    610 			blend_cntl |= S_028780_ALPHA_DESTBLEND(si_translate_blend_factor(dstA));
    611 		}
    612 		si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
    613 
    614 		blend->blend_enable_4bit |= 0xfu << (i * 4);
    615 
    616 		/* This is only important for formats without alpha. */
    617 		if (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
    618 		    dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
    619 		    srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
    620 		    dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
    621 		    srcRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
    622 		    dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA)
    623 			blend->need_src_alpha_4bit |= 0xfu << (i * 4);
    624 	}
    625 
    626 	if (blend->cb_target_mask) {
    627 		color_control |= S_028808_MODE(mode);
    628 	} else {
    629 		color_control |= S_028808_MODE(V_028808_CB_DISABLE);
    630 	}
    631 
    632 	if (sctx->screen->has_rbplus) {
    633 		/* Disable RB+ blend optimizations for dual source blending.
    634 		 * Vulkan does this.
    635 		 */
    636 		if (blend->dual_src_blend) {
    637 			for (int i = 0; i < 8; i++) {
    638 				sx_mrt_blend_opt[i] =
    639 					S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_NONE) |
    640 					S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_NONE);
    641 			}
    642 		}
    643 
    644 		for (int i = 0; i < 8; i++)
    645 			si_pm4_set_reg(pm4, R_028760_SX_MRT0_BLEND_OPT + i * 4,
    646 				       sx_mrt_blend_opt[i]);
    647 
    648 		/* RB+ doesn't work with dual source blending, logic op, and RESOLVE. */
    649 		if (blend->dual_src_blend || state->logicop_enable ||
    650 		    mode == V_028808_CB_RESOLVE)
    651 			color_control |= S_028808_DISABLE_DUAL_QUAD(1);
    652 	}
    653 
    654 	si_pm4_set_reg(pm4, R_028808_CB_COLOR_CONTROL, color_control);
    655 	return blend;
    656 }
    657 
    658 static void *si_create_blend_state(struct pipe_context *ctx,
    659 				   const struct pipe_blend_state *state)
    660 {
    661 	return si_create_blend_state_mode(ctx, state, V_028808_CB_NORMAL);
    662 }
    663 
    664 static void si_bind_blend_state(struct pipe_context *ctx, void *state)
    665 {
    666 	struct si_context *sctx = (struct si_context *)ctx;
    667 	struct si_state_blend *old_blend = sctx->queued.named.blend;
    668 	struct si_state_blend *blend = (struct si_state_blend *)state;
    669 
    670 	if (!state)
    671 		return;
    672 
    673 	si_pm4_bind_state(sctx, blend, state);
    674 
    675 	if (!old_blend ||
    676 	    old_blend->cb_target_mask != blend->cb_target_mask ||
    677 	    old_blend->dual_src_blend != blend->dual_src_blend ||
    678 	    (old_blend->blend_enable_4bit != blend->blend_enable_4bit &&
    679 	     sctx->framebuffer.nr_samples >= 2 &&
    680 	     sctx->screen->dcc_msaa_allowed))
    681 		si_mark_atom_dirty(sctx, &sctx->cb_render_state);
    682 
    683 	if (!old_blend ||
    684 	    old_blend->cb_target_mask != blend->cb_target_mask ||
    685 	    old_blend->alpha_to_coverage != blend->alpha_to_coverage ||
    686 	    old_blend->alpha_to_one != blend->alpha_to_one ||
    687 	    old_blend->dual_src_blend != blend->dual_src_blend ||
    688 	    old_blend->blend_enable_4bit != blend->blend_enable_4bit ||
    689 	    old_blend->need_src_alpha_4bit != blend->need_src_alpha_4bit)
    690 		sctx->do_update_shaders = true;
    691 
    692 	if (sctx->screen->dpbb_allowed &&
    693 	    (!old_blend ||
    694 	     old_blend->alpha_to_coverage != blend->alpha_to_coverage ||
    695 	     old_blend->blend_enable_4bit != blend->blend_enable_4bit ||
    696 	     old_blend->cb_target_enabled_4bit != blend->cb_target_enabled_4bit))
    697 		si_mark_atom_dirty(sctx, &sctx->dpbb_state);
    698 
    699 	if (sctx->screen->has_out_of_order_rast &&
    700 	    (!old_blend ||
    701 	     (old_blend->blend_enable_4bit != blend->blend_enable_4bit ||
    702 	      old_blend->cb_target_enabled_4bit != blend->cb_target_enabled_4bit ||
    703 	      old_blend->commutative_4bit != blend->commutative_4bit ||
    704 	      old_blend->logicop_enable != blend->logicop_enable)))
    705 		si_mark_atom_dirty(sctx, &sctx->msaa_config);
    706 }
    707 
    708 static void si_delete_blend_state(struct pipe_context *ctx, void *state)
    709 {
    710 	struct si_context *sctx = (struct si_context *)ctx;
    711 	si_pm4_delete_state(sctx, blend, (struct si_state_blend *)state);
    712 }
    713 
    714 static void si_set_blend_color(struct pipe_context *ctx,
    715 			       const struct pipe_blend_color *state)
    716 {
    717 	struct si_context *sctx = (struct si_context *)ctx;
    718 	static const struct pipe_blend_color zeros;
    719 
    720 	sctx->blend_color.state = *state;
    721 	sctx->blend_color.any_nonzeros = memcmp(state, &zeros, sizeof(*state)) != 0;
    722 	si_mark_atom_dirty(sctx, &sctx->blend_color.atom);
    723 }
    724 
    725 static void si_emit_blend_color(struct si_context *sctx, struct r600_atom *atom)
    726 {
    727 	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
    728 
    729 	radeon_set_context_reg_seq(cs, R_028414_CB_BLEND_RED, 4);
    730 	radeon_emit_array(cs, (uint32_t*)sctx->blend_color.state.color, 4);
    731 }
    732 
    733 /*
    734  * Clipping
    735  */
    736 
    737 static void si_set_clip_state(struct pipe_context *ctx,
    738 			      const struct pipe_clip_state *state)
    739 {
    740 	struct si_context *sctx = (struct si_context *)ctx;
    741 	struct pipe_constant_buffer cb;
    742 	static const struct pipe_clip_state zeros;
    743 
    744 	if (memcmp(&sctx->clip_state.state, state, sizeof(*state)) == 0)
    745 		return;
    746 
    747 	sctx->clip_state.state = *state;
    748 	sctx->clip_state.any_nonzeros = memcmp(state, &zeros, sizeof(*state)) != 0;
    749 	si_mark_atom_dirty(sctx, &sctx->clip_state.atom);
    750 
    751 	cb.buffer = NULL;
    752 	cb.user_buffer = state->ucp;
    753 	cb.buffer_offset = 0;
    754 	cb.buffer_size = 4*4*8;
    755 	si_set_rw_buffer(sctx, SI_VS_CONST_CLIP_PLANES, &cb);
    756 	pipe_resource_reference(&cb.buffer, NULL);
    757 }
    758 
    759 static void si_emit_clip_state(struct si_context *sctx, struct r600_atom *atom)
    760 {
    761 	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
    762 
    763 	radeon_set_context_reg_seq(cs, R_0285BC_PA_CL_UCP_0_X, 6*4);
    764 	radeon_emit_array(cs, (uint32_t*)sctx->clip_state.state.ucp, 6*4);
    765 }
    766 
    767 static void si_emit_clip_regs(struct si_context *sctx, struct r600_atom *atom)
    768 {
    769 	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
    770 	struct si_shader *vs = si_get_vs_state(sctx);
    771 	struct si_shader_selector *vs_sel = vs->selector;
    772 	struct tgsi_shader_info *info = &vs_sel->info;
    773 	struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
    774 	unsigned window_space =
    775 	   info->properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION];
    776 	unsigned clipdist_mask = vs_sel->clipdist_mask;
    777 	unsigned ucp_mask = clipdist_mask ? 0 : rs->clip_plane_enable & SIX_BITS;
    778 	unsigned culldist_mask = vs_sel->culldist_mask;
    779 	unsigned total_mask;
    780 
    781 	if (vs->key.opt.clip_disable) {
    782 		assert(!info->culldist_writemask);
    783 		clipdist_mask = 0;
    784 		culldist_mask = 0;
    785 	}
    786 	total_mask = clipdist_mask | culldist_mask;
    787 
    788 	/* Clip distances on points have no effect, so need to be implemented
    789 	 * as cull distances. This applies for the clipvertex case as well.
    790 	 *
    791 	 * Setting this for primitives other than points should have no adverse
    792 	 * effects.
    793 	 */
    794 	clipdist_mask &= rs->clip_plane_enable;
    795 	culldist_mask |= clipdist_mask;
    796 
    797 	radeon_set_context_reg(cs, R_02881C_PA_CL_VS_OUT_CNTL,
    798 		vs_sel->pa_cl_vs_out_cntl |
    799 		S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0F) != 0) |
    800 		S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xF0) != 0) |
    801 		clipdist_mask | (culldist_mask << 8));
    802 	radeon_set_context_reg(cs, R_028810_PA_CL_CLIP_CNTL,
    803 		rs->pa_cl_clip_cntl |
    804 		ucp_mask |
    805 		S_028810_CLIP_DISABLE(window_space));
    806 }
    807 
    808 /*
    809  * inferred state between framebuffer and rasterizer
    810  */
    811 static void si_update_poly_offset_state(struct si_context *sctx)
    812 {
    813 	struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
    814 
    815 	if (!rs || !rs->uses_poly_offset || !sctx->framebuffer.state.zsbuf) {
    816 		si_pm4_bind_state(sctx, poly_offset, NULL);
    817 		return;
    818 	}
    819 
    820 	/* Use the user format, not db_render_format, so that the polygon
    821 	 * offset behaves as expected by applications.
    822 	 */
    823 	switch (sctx->framebuffer.state.zsbuf->texture->format) {
    824 	case PIPE_FORMAT_Z16_UNORM:
    825 		si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[0]);
    826 		break;
    827 	default: /* 24-bit */
    828 		si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[1]);
    829 		break;
    830 	case PIPE_FORMAT_Z32_FLOAT:
    831 	case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
    832 		si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[2]);
    833 		break;
    834 	}
    835 }
    836 
    837 /*
    838  * Rasterizer
    839  */
    840 
    841 static uint32_t si_translate_fill(uint32_t func)
    842 {
    843 	switch(func) {
    844 	case PIPE_POLYGON_MODE_FILL:
    845 		return V_028814_X_DRAW_TRIANGLES;
    846 	case PIPE_POLYGON_MODE_LINE:
    847 		return V_028814_X_DRAW_LINES;
    848 	case PIPE_POLYGON_MODE_POINT:
    849 		return V_028814_X_DRAW_POINTS;
    850 	default:
    851 		assert(0);
    852 		return V_028814_X_DRAW_POINTS;
    853 	}
    854 }
    855 
    856 static void *si_create_rs_state(struct pipe_context *ctx,
    857 				const struct pipe_rasterizer_state *state)
    858 {
    859 	struct si_screen *sscreen = ((struct si_context *)ctx)->screen;
    860 	struct si_state_rasterizer *rs = CALLOC_STRUCT(si_state_rasterizer);
    861 	struct si_pm4_state *pm4 = &rs->pm4;
    862 	unsigned tmp, i;
    863 	float psize_min, psize_max;
    864 
    865 	if (!rs) {
    866 		return NULL;
    867 	}
    868 
    869 	rs->scissor_enable = state->scissor;
    870 	rs->clip_halfz = state->clip_halfz;
    871 	rs->two_side = state->light_twoside;
    872 	rs->multisample_enable = state->multisample;
    873 	rs->force_persample_interp = state->force_persample_interp;
    874 	rs->clip_plane_enable = state->clip_plane_enable;
    875 	rs->line_stipple_enable = state->line_stipple_enable;
    876 	rs->poly_stipple_enable = state->poly_stipple_enable;
    877 	rs->line_smooth = state->line_smooth;
    878 	rs->line_width = state->line_width;
    879 	rs->poly_smooth = state->poly_smooth;
    880 	rs->uses_poly_offset = state->offset_point || state->offset_line ||
    881 			       state->offset_tri;
    882 	rs->clamp_fragment_color = state->clamp_fragment_color;
    883 	rs->clamp_vertex_color = state->clamp_vertex_color;
    884 	rs->flatshade = state->flatshade;
    885 	rs->sprite_coord_enable = state->sprite_coord_enable;
    886 	rs->rasterizer_discard = state->rasterizer_discard;
    887 	rs->pa_sc_line_stipple = state->line_stipple_enable ?
    888 				S_028A0C_LINE_PATTERN(state->line_stipple_pattern) |
    889 				S_028A0C_REPEAT_COUNT(state->line_stipple_factor) : 0;
    890 	rs->pa_cl_clip_cntl =
    891 		S_028810_DX_CLIP_SPACE_DEF(state->clip_halfz) |
    892 		S_028810_ZCLIP_NEAR_DISABLE(!state->depth_clip) |
    893 		S_028810_ZCLIP_FAR_DISABLE(!state->depth_clip) |
    894 		S_028810_DX_RASTERIZATION_KILL(state->rasterizer_discard) |
    895 		S_028810_DX_LINEAR_ATTR_CLIP_ENA(1);
    896 
    897 	si_pm4_set_reg(pm4, R_0286D4_SPI_INTERP_CONTROL_0,
    898 		S_0286D4_FLAT_SHADE_ENA(1) |
    899 		S_0286D4_PNT_SPRITE_ENA(state->point_quad_rasterization) |
    900 		S_0286D4_PNT_SPRITE_OVRD_X(V_0286D4_SPI_PNT_SPRITE_SEL_S) |
    901 		S_0286D4_PNT_SPRITE_OVRD_Y(V_0286D4_SPI_PNT_SPRITE_SEL_T) |
    902 		S_0286D4_PNT_SPRITE_OVRD_Z(V_0286D4_SPI_PNT_SPRITE_SEL_0) |
    903 		S_0286D4_PNT_SPRITE_OVRD_W(V_0286D4_SPI_PNT_SPRITE_SEL_1) |
    904 		S_0286D4_PNT_SPRITE_TOP_1(state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT));
    905 
    906 	/* point size 12.4 fixed point */
    907 	tmp = (unsigned)(state->point_size * 8.0);
    908 	si_pm4_set_reg(pm4, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp));
    909 
    910 	if (state->point_size_per_vertex) {
    911 		psize_min = util_get_min_point_size(state);
    912 		psize_max = 8192;
    913 	} else {
    914 		/* Force the point size to be as if the vertex output was disabled. */
    915 		psize_min = state->point_size;
    916 		psize_max = state->point_size;
    917 	}
    918 	rs->max_point_size = psize_max;
    919 
    920 	/* Divide by two, because 0.5 = 1 pixel. */
    921 	si_pm4_set_reg(pm4, R_028A04_PA_SU_POINT_MINMAX,
    922 			S_028A04_MIN_SIZE(si_pack_float_12p4(psize_min/2)) |
    923 			S_028A04_MAX_SIZE(si_pack_float_12p4(psize_max/2)));
    924 
    925 	si_pm4_set_reg(pm4, R_028A08_PA_SU_LINE_CNTL,
    926 		       S_028A08_WIDTH(si_pack_float_12p4(state->line_width/2)));
    927 	si_pm4_set_reg(pm4, R_028A48_PA_SC_MODE_CNTL_0,
    928 		       S_028A48_LINE_STIPPLE_ENABLE(state->line_stipple_enable) |
    929 		       S_028A48_MSAA_ENABLE(state->multisample ||
    930 					    state->poly_smooth ||
    931 					    state->line_smooth) |
    932 		       S_028A48_VPORT_SCISSOR_ENABLE(1) |
    933 		       S_028A48_ALTERNATE_RBS_PER_TILE(sscreen->info.chip_class >= GFX9));
    934 
    935 	si_pm4_set_reg(pm4, R_028BE4_PA_SU_VTX_CNTL,
    936 		       S_028BE4_PIX_CENTER(state->half_pixel_center) |
    937 		       S_028BE4_QUANT_MODE(V_028BE4_X_16_8_FIXED_POINT_1_256TH));
    938 
    939 	si_pm4_set_reg(pm4, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, fui(state->offset_clamp));
    940 	si_pm4_set_reg(pm4, R_028814_PA_SU_SC_MODE_CNTL,
    941 		S_028814_PROVOKING_VTX_LAST(!state->flatshade_first) |
    942 		S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) |
    943 		S_028814_CULL_BACK((state->cull_face & PIPE_FACE_BACK) ? 1 : 0) |
    944 		S_028814_FACE(!state->front_ccw) |
    945 		S_028814_POLY_OFFSET_FRONT_ENABLE(util_get_offset(state, state->fill_front)) |
    946 		S_028814_POLY_OFFSET_BACK_ENABLE(util_get_offset(state, state->fill_back)) |
    947 		S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_point || state->offset_line) |
    948 		S_028814_POLY_MODE(state->fill_front != PIPE_POLYGON_MODE_FILL ||
    949 				   state->fill_back != PIPE_POLYGON_MODE_FILL) |
    950 		S_028814_POLYMODE_FRONT_PTYPE(si_translate_fill(state->fill_front)) |
    951 		S_028814_POLYMODE_BACK_PTYPE(si_translate_fill(state->fill_back)));
    952 
    953 	if (!rs->uses_poly_offset)
    954 		return rs;
    955 
    956 	rs->pm4_poly_offset = CALLOC(3, sizeof(struct si_pm4_state));
    957 	if (!rs->pm4_poly_offset) {
    958 		FREE(rs);
    959 		return NULL;
    960 	}
    961 
    962 	/* Precalculate polygon offset states for 16-bit, 24-bit, and 32-bit zbuffers. */
    963 	for (i = 0; i < 3; i++) {
    964 		struct si_pm4_state *pm4 = &rs->pm4_poly_offset[i];
    965 		float offset_units = state->offset_units;
    966 		float offset_scale = state->offset_scale * 16.0f;
    967 		uint32_t pa_su_poly_offset_db_fmt_cntl = 0;
    968 
    969 		if (!state->offset_units_unscaled) {
    970 			switch (i) {
    971 			case 0: /* 16-bit zbuffer */
    972 				offset_units *= 4.0f;
    973 				pa_su_poly_offset_db_fmt_cntl =
    974 					S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
    975 				break;
    976 			case 1: /* 24-bit zbuffer */
    977 				offset_units *= 2.0f;
    978 				pa_su_poly_offset_db_fmt_cntl =
    979 					S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
    980 				break;
    981 			case 2: /* 32-bit zbuffer */
    982 				offset_units *= 1.0f;
    983 				pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) |
    984 								S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
    985 				break;
    986 			}
    987 		}
    988 
    989 		si_pm4_set_reg(pm4, R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE,
    990 			       fui(offset_scale));
    991 		si_pm4_set_reg(pm4, R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET,
    992 			       fui(offset_units));
    993 		si_pm4_set_reg(pm4, R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE,
    994 			       fui(offset_scale));
    995 		si_pm4_set_reg(pm4, R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET,
    996 			       fui(offset_units));
    997 		si_pm4_set_reg(pm4, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
    998 			       pa_su_poly_offset_db_fmt_cntl);
    999 	}
   1000 
   1001 	return rs;
   1002 }
   1003 
   1004 static void si_bind_rs_state(struct pipe_context *ctx, void *state)
   1005 {
   1006 	struct si_context *sctx = (struct si_context *)ctx;
   1007 	struct si_state_rasterizer *old_rs =
   1008 		(struct si_state_rasterizer*)sctx->queued.named.rasterizer;
   1009 	struct si_state_rasterizer *rs = (struct si_state_rasterizer *)state;
   1010 
   1011 	if (!state)
   1012 		return;
   1013 
   1014 	if (!old_rs || old_rs->multisample_enable != rs->multisample_enable) {
   1015 		si_mark_atom_dirty(sctx, &sctx->db_render_state);
   1016 
   1017 		/* Update the small primitive filter workaround if necessary. */
   1018 		if (sctx->screen->has_msaa_sample_loc_bug &&
   1019 		    sctx->framebuffer.nr_samples > 1)
   1020 			si_mark_atom_dirty(sctx, &sctx->msaa_sample_locs.atom);
   1021 	}
   1022 
   1023 	sctx->current_vs_state &= C_VS_STATE_CLAMP_VERTEX_COLOR;
   1024 	sctx->current_vs_state |= S_VS_STATE_CLAMP_VERTEX_COLOR(rs->clamp_vertex_color);
   1025 
   1026 	si_pm4_bind_state(sctx, rasterizer, rs);
   1027 	si_update_poly_offset_state(sctx);
   1028 
   1029 	if (!old_rs ||
   1030 	    (old_rs->scissor_enable != rs->scissor_enable ||
   1031 	     old_rs->line_width != rs->line_width ||
   1032 	     old_rs->max_point_size != rs->max_point_size)) {
   1033 		sctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
   1034 		si_mark_atom_dirty(sctx, &sctx->scissors.atom);
   1035 	}
   1036 
   1037 	if (!old_rs ||
   1038 	    old_rs->clip_halfz != rs->clip_halfz) {
   1039 		sctx->viewports.depth_range_dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
   1040 		si_mark_atom_dirty(sctx, &sctx->viewports.atom);
   1041 	}
   1042 
   1043 	if (!old_rs ||
   1044 	    old_rs->clip_plane_enable != rs->clip_plane_enable ||
   1045 	    old_rs->pa_cl_clip_cntl != rs->pa_cl_clip_cntl)
   1046 		si_mark_atom_dirty(sctx, &sctx->clip_regs);
   1047 
   1048 	sctx->ia_multi_vgt_param_key.u.line_stipple_enabled =
   1049 		rs->line_stipple_enable;
   1050 
   1051 	if (!old_rs ||
   1052 	    old_rs->clip_plane_enable != rs->clip_plane_enable ||
   1053 	    old_rs->rasterizer_discard != rs->rasterizer_discard ||
   1054 	    old_rs->sprite_coord_enable != rs->sprite_coord_enable ||
   1055 	    old_rs->flatshade != rs->flatshade ||
   1056 	    old_rs->two_side != rs->two_side ||
   1057 	    old_rs->multisample_enable != rs->multisample_enable ||
   1058 	    old_rs->poly_stipple_enable != rs->poly_stipple_enable ||
   1059 	    old_rs->poly_smooth != rs->poly_smooth ||
   1060 	    old_rs->line_smooth != rs->line_smooth ||
   1061 	    old_rs->clamp_fragment_color != rs->clamp_fragment_color ||
   1062 	    old_rs->force_persample_interp != rs->force_persample_interp)
   1063 		sctx->do_update_shaders = true;
   1064 }
   1065 
   1066 static void si_delete_rs_state(struct pipe_context *ctx, void *state)
   1067 {
   1068 	struct si_context *sctx = (struct si_context *)ctx;
   1069 	struct si_state_rasterizer *rs = (struct si_state_rasterizer *)state;
   1070 
   1071 	if (sctx->queued.named.rasterizer == state)
   1072 		si_pm4_bind_state(sctx, poly_offset, NULL);
   1073 
   1074 	FREE(rs->pm4_poly_offset);
   1075 	si_pm4_delete_state(sctx, rasterizer, rs);
   1076 }
   1077 
   1078 /*
   1079  * infeered state between dsa and stencil ref
   1080  */
   1081 static void si_emit_stencil_ref(struct si_context *sctx, struct r600_atom *atom)
   1082 {
   1083 	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
   1084 	struct pipe_stencil_ref *ref = &sctx->stencil_ref.state;
   1085 	struct si_dsa_stencil_ref_part *dsa = &sctx->stencil_ref.dsa_part;
   1086 
   1087 	radeon_set_context_reg_seq(cs, R_028430_DB_STENCILREFMASK, 2);
   1088 	radeon_emit(cs, S_028430_STENCILTESTVAL(ref->ref_value[0]) |
   1089 			S_028430_STENCILMASK(dsa->valuemask[0]) |
   1090 			S_028430_STENCILWRITEMASK(dsa->writemask[0]) |
   1091 			S_028430_STENCILOPVAL(1));
   1092 	radeon_emit(cs, S_028434_STENCILTESTVAL_BF(ref->ref_value[1]) |
   1093 			S_028434_STENCILMASK_BF(dsa->valuemask[1]) |
   1094 			S_028434_STENCILWRITEMASK_BF(dsa->writemask[1]) |
   1095 			S_028434_STENCILOPVAL_BF(1));
   1096 }
   1097 
   1098 static void si_set_stencil_ref(struct pipe_context *ctx,
   1099 			       const struct pipe_stencil_ref *state)
   1100 {
   1101         struct si_context *sctx = (struct si_context *)ctx;
   1102 
   1103 	if (memcmp(&sctx->stencil_ref.state, state, sizeof(*state)) == 0)
   1104 		return;
   1105 
   1106 	sctx->stencil_ref.state = *state;
   1107 	si_mark_atom_dirty(sctx, &sctx->stencil_ref.atom);
   1108 }
   1109 
   1110 
   1111 /*
   1112  * DSA
   1113  */
   1114 
   1115 static uint32_t si_translate_stencil_op(int s_op)
   1116 {
   1117 	switch (s_op) {
   1118 	case PIPE_STENCIL_OP_KEEP:
   1119 		return V_02842C_STENCIL_KEEP;
   1120 	case PIPE_STENCIL_OP_ZERO:
   1121 		return V_02842C_STENCIL_ZERO;
   1122 	case PIPE_STENCIL_OP_REPLACE:
   1123 		return V_02842C_STENCIL_REPLACE_TEST;
   1124 	case PIPE_STENCIL_OP_INCR:
   1125 		return V_02842C_STENCIL_ADD_CLAMP;
   1126 	case PIPE_STENCIL_OP_DECR:
   1127 		return V_02842C_STENCIL_SUB_CLAMP;
   1128 	case PIPE_STENCIL_OP_INCR_WRAP:
   1129 		return V_02842C_STENCIL_ADD_WRAP;
   1130 	case PIPE_STENCIL_OP_DECR_WRAP:
   1131 		return V_02842C_STENCIL_SUB_WRAP;
   1132 	case PIPE_STENCIL_OP_INVERT:
   1133 		return V_02842C_STENCIL_INVERT;
   1134 	default:
   1135 		R600_ERR("Unknown stencil op %d", s_op);
   1136 		assert(0);
   1137 		break;
   1138 	}
   1139 	return 0;
   1140 }
   1141 
   1142 static bool si_dsa_writes_stencil(const struct pipe_stencil_state *s)
   1143 {
   1144 	return s->enabled && s->writemask &&
   1145 	       (s->fail_op  != PIPE_STENCIL_OP_KEEP ||
   1146 		s->zfail_op != PIPE_STENCIL_OP_KEEP ||
   1147 		s->zpass_op != PIPE_STENCIL_OP_KEEP);
   1148 }
   1149 
   1150 static bool si_order_invariant_stencil_op(enum pipe_stencil_op op)
   1151 {
   1152 	/* REPLACE is normally order invariant, except when the stencil
   1153 	 * reference value is written by the fragment shader. Tracking this
   1154 	 * interaction does not seem worth the effort, so be conservative. */
   1155 	return op != PIPE_STENCIL_OP_INCR &&
   1156 	       op != PIPE_STENCIL_OP_DECR &&
   1157 	       op != PIPE_STENCIL_OP_REPLACE;
   1158 }
   1159 
   1160 /* Compute whether, assuming Z writes are disabled, this stencil state is order
   1161  * invariant in the sense that the set of passing fragments as well as the
   1162  * final stencil buffer result does not depend on the order of fragments. */
   1163 static bool si_order_invariant_stencil_state(const struct pipe_stencil_state *state)
   1164 {
   1165 	return !state->enabled || !state->writemask ||
   1166 	       /* The following assumes that Z writes are disabled. */
   1167 	       (state->func == PIPE_FUNC_ALWAYS &&
   1168 	        si_order_invariant_stencil_op(state->zpass_op) &&
   1169 	        si_order_invariant_stencil_op(state->zfail_op)) ||
   1170 	       (state->func == PIPE_FUNC_NEVER &&
   1171 	        si_order_invariant_stencil_op(state->fail_op));
   1172 }
   1173 
   1174 static void *si_create_dsa_state(struct pipe_context *ctx,
   1175 				 const struct pipe_depth_stencil_alpha_state *state)
   1176 {
   1177 	struct si_context *sctx = (struct si_context *)ctx;
   1178 	struct si_state_dsa *dsa = CALLOC_STRUCT(si_state_dsa);
   1179 	struct si_pm4_state *pm4 = &dsa->pm4;
   1180 	unsigned db_depth_control;
   1181 	uint32_t db_stencil_control = 0;
   1182 
   1183 	if (!dsa) {
   1184 		return NULL;
   1185 	}
   1186 
   1187 	dsa->stencil_ref.valuemask[0] = state->stencil[0].valuemask;
   1188 	dsa->stencil_ref.valuemask[1] = state->stencil[1].valuemask;
   1189 	dsa->stencil_ref.writemask[0] = state->stencil[0].writemask;
   1190 	dsa->stencil_ref.writemask[1] = state->stencil[1].writemask;
   1191 
   1192 	db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) |
   1193 		S_028800_Z_WRITE_ENABLE(state->depth.writemask) |
   1194 		S_028800_ZFUNC(state->depth.func) |
   1195 		S_028800_DEPTH_BOUNDS_ENABLE(state->depth.bounds_test);
   1196 
   1197 	/* stencil */
   1198 	if (state->stencil[0].enabled) {
   1199 		db_depth_control |= S_028800_STENCIL_ENABLE(1);
   1200 		db_depth_control |= S_028800_STENCILFUNC(state->stencil[0].func);
   1201 		db_stencil_control |= S_02842C_STENCILFAIL(si_translate_stencil_op(state->stencil[0].fail_op));
   1202 		db_stencil_control |= S_02842C_STENCILZPASS(si_translate_stencil_op(state->stencil[0].zpass_op));
   1203 		db_stencil_control |= S_02842C_STENCILZFAIL(si_translate_stencil_op(state->stencil[0].zfail_op));
   1204 
   1205 		if (state->stencil[1].enabled) {
   1206 			db_depth_control |= S_028800_BACKFACE_ENABLE(1);
   1207 			db_depth_control |= S_028800_STENCILFUNC_BF(state->stencil[1].func);
   1208 			db_stencil_control |= S_02842C_STENCILFAIL_BF(si_translate_stencil_op(state->stencil[1].fail_op));
   1209 			db_stencil_control |= S_02842C_STENCILZPASS_BF(si_translate_stencil_op(state->stencil[1].zpass_op));
   1210 			db_stencil_control |= S_02842C_STENCILZFAIL_BF(si_translate_stencil_op(state->stencil[1].zfail_op));
   1211 		}
   1212 	}
   1213 
   1214 	/* alpha */
   1215 	if (state->alpha.enabled) {
   1216 		dsa->alpha_func = state->alpha.func;
   1217 
   1218 		si_pm4_set_reg(pm4, R_00B030_SPI_SHADER_USER_DATA_PS_0 +
   1219 		               SI_SGPR_ALPHA_REF * 4, fui(state->alpha.ref_value));
   1220 	} else {
   1221 		dsa->alpha_func = PIPE_FUNC_ALWAYS;
   1222 	}
   1223 
   1224 	si_pm4_set_reg(pm4, R_028800_DB_DEPTH_CONTROL, db_depth_control);
   1225 	if (state->stencil[0].enabled)
   1226 		si_pm4_set_reg(pm4, R_02842C_DB_STENCIL_CONTROL, db_stencil_control);
   1227 	if (state->depth.bounds_test) {
   1228 		si_pm4_set_reg(pm4, R_028020_DB_DEPTH_BOUNDS_MIN, fui(state->depth.bounds_min));
   1229 		si_pm4_set_reg(pm4, R_028024_DB_DEPTH_BOUNDS_MAX, fui(state->depth.bounds_max));
   1230 	}
   1231 
   1232 	dsa->depth_enabled = state->depth.enabled;
   1233 	dsa->depth_write_enabled = state->depth.enabled &&
   1234 				   state->depth.writemask;
   1235 	dsa->stencil_enabled = state->stencil[0].enabled;
   1236 	dsa->stencil_write_enabled = state->stencil[0].enabled &&
   1237 				     (si_dsa_writes_stencil(&state->stencil[0]) ||
   1238 				      si_dsa_writes_stencil(&state->stencil[1]));
   1239 	dsa->db_can_write = dsa->depth_write_enabled ||
   1240 			    dsa->stencil_write_enabled;
   1241 
   1242 	bool zfunc_is_ordered =
   1243 		state->depth.func == PIPE_FUNC_NEVER ||
   1244 		state->depth.func == PIPE_FUNC_LESS ||
   1245 		state->depth.func == PIPE_FUNC_LEQUAL ||
   1246 		state->depth.func == PIPE_FUNC_GREATER ||
   1247 		state->depth.func == PIPE_FUNC_GEQUAL;
   1248 
   1249 	bool nozwrite_and_order_invariant_stencil =
   1250 		!dsa->db_can_write ||
   1251 		(!dsa->depth_write_enabled &&
   1252 		 si_order_invariant_stencil_state(&state->stencil[0]) &&
   1253 		 si_order_invariant_stencil_state(&state->stencil[1]));
   1254 
   1255 	dsa->order_invariance[1].zs =
   1256 		nozwrite_and_order_invariant_stencil ||
   1257 		(!dsa->stencil_write_enabled && zfunc_is_ordered);
   1258 	dsa->order_invariance[0].zs = !dsa->depth_write_enabled || zfunc_is_ordered;
   1259 
   1260 	dsa->order_invariance[1].pass_set =
   1261 		nozwrite_and_order_invariant_stencil ||
   1262 		(!dsa->stencil_write_enabled &&
   1263 		 (state->depth.func == PIPE_FUNC_ALWAYS ||
   1264 		  state->depth.func == PIPE_FUNC_NEVER));
   1265 	dsa->order_invariance[0].pass_set =
   1266 		!dsa->depth_write_enabled ||
   1267 		(state->depth.func == PIPE_FUNC_ALWAYS ||
   1268 		 state->depth.func == PIPE_FUNC_NEVER);
   1269 
   1270 	dsa->order_invariance[1].pass_last =
   1271 		sctx->screen->assume_no_z_fights &&
   1272 		!dsa->stencil_write_enabled &&
   1273 		dsa->depth_write_enabled && zfunc_is_ordered;
   1274 	dsa->order_invariance[0].pass_last =
   1275 		sctx->screen->assume_no_z_fights &&
   1276 		dsa->depth_write_enabled && zfunc_is_ordered;
   1277 
   1278 	return dsa;
   1279 }
   1280 
   1281 static void si_bind_dsa_state(struct pipe_context *ctx, void *state)
   1282 {
   1283         struct si_context *sctx = (struct si_context *)ctx;
   1284 	struct si_state_dsa *old_dsa = sctx->queued.named.dsa;
   1285         struct si_state_dsa *dsa = state;
   1286 
   1287         if (!state)
   1288                 return;
   1289 
   1290 	si_pm4_bind_state(sctx, dsa, dsa);
   1291 
   1292 	if (memcmp(&dsa->stencil_ref, &sctx->stencil_ref.dsa_part,
   1293 		   sizeof(struct si_dsa_stencil_ref_part)) != 0) {
   1294 		sctx->stencil_ref.dsa_part = dsa->stencil_ref;
   1295 		si_mark_atom_dirty(sctx, &sctx->stencil_ref.atom);
   1296 	}
   1297 
   1298 	if (!old_dsa || old_dsa->alpha_func != dsa->alpha_func)
   1299 		sctx->do_update_shaders = true;
   1300 
   1301 	if (sctx->screen->dpbb_allowed &&
   1302 	    (!old_dsa ||
   1303 	     (old_dsa->depth_enabled != dsa->depth_enabled ||
   1304 	      old_dsa->stencil_enabled != dsa->stencil_enabled ||
   1305 	      old_dsa->db_can_write != dsa->db_can_write)))
   1306 		si_mark_atom_dirty(sctx, &sctx->dpbb_state);
   1307 
   1308 	if (sctx->screen->has_out_of_order_rast &&
   1309 	    (!old_dsa ||
   1310 	     memcmp(old_dsa->order_invariance, dsa->order_invariance,
   1311 		    sizeof(old_dsa->order_invariance))))
   1312 		si_mark_atom_dirty(sctx, &sctx->msaa_config);
   1313 }
   1314 
   1315 static void si_delete_dsa_state(struct pipe_context *ctx, void *state)
   1316 {
   1317 	struct si_context *sctx = (struct si_context *)ctx;
   1318 	si_pm4_delete_state(sctx, dsa, (struct si_state_dsa *)state);
   1319 }
   1320 
   1321 static void *si_create_db_flush_dsa(struct si_context *sctx)
   1322 {
   1323 	struct pipe_depth_stencil_alpha_state dsa = {};
   1324 
   1325 	return sctx->b.b.create_depth_stencil_alpha_state(&sctx->b.b, &dsa);
   1326 }
   1327 
   1328 /* DB RENDER STATE */
   1329 
   1330 static void si_set_active_query_state(struct pipe_context *ctx, boolean enable)
   1331 {
   1332 	struct si_context *sctx = (struct si_context*)ctx;
   1333 
   1334 	/* Pipeline stat & streamout queries. */
   1335 	if (enable) {
   1336 		sctx->b.flags &= ~SI_CONTEXT_STOP_PIPELINE_STATS;
   1337 		sctx->b.flags |= SI_CONTEXT_START_PIPELINE_STATS;
   1338 	} else {
   1339 		sctx->b.flags &= ~SI_CONTEXT_START_PIPELINE_STATS;
   1340 		sctx->b.flags |= SI_CONTEXT_STOP_PIPELINE_STATS;
   1341 	}
   1342 
   1343 	/* Occlusion queries. */
   1344 	if (sctx->occlusion_queries_disabled != !enable) {
   1345 		sctx->occlusion_queries_disabled = !enable;
   1346 		si_mark_atom_dirty(sctx, &sctx->db_render_state);
   1347 	}
   1348 }
   1349 
   1350 static void si_set_occlusion_query_state(struct pipe_context *ctx,
   1351 					 bool old_enable,
   1352 					 bool old_perfect_enable)
   1353 {
   1354 	struct si_context *sctx = (struct si_context*)ctx;
   1355 
   1356 	si_mark_atom_dirty(sctx, &sctx->db_render_state);
   1357 
   1358 	bool perfect_enable = sctx->b.num_perfect_occlusion_queries != 0;
   1359 
   1360 	if (perfect_enable != old_perfect_enable)
   1361 		si_mark_atom_dirty(sctx, &sctx->msaa_config);
   1362 }
   1363 
   1364 static void si_save_qbo_state(struct pipe_context *ctx, struct r600_qbo_state *st)
   1365 {
   1366 	struct si_context *sctx = (struct si_context*)ctx;
   1367 
   1368 	st->saved_compute = sctx->cs_shader_state.program;
   1369 
   1370 	si_get_pipe_constant_buffer(sctx, PIPE_SHADER_COMPUTE, 0, &st->saved_const0);
   1371 	si_get_shader_buffers(sctx, PIPE_SHADER_COMPUTE, 0, 3, st->saved_ssbo);
   1372 }
   1373 
   1374 static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *state)
   1375 {
   1376 	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
   1377 	struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
   1378 	unsigned db_shader_control;
   1379 
   1380 	radeon_set_context_reg_seq(cs, R_028000_DB_RENDER_CONTROL, 2);
   1381 
   1382 	/* DB_RENDER_CONTROL */
   1383 	if (sctx->dbcb_depth_copy_enabled ||
   1384 	    sctx->dbcb_stencil_copy_enabled) {
   1385 		radeon_emit(cs,
   1386 			    S_028000_DEPTH_COPY(sctx->dbcb_depth_copy_enabled) |
   1387 			    S_028000_STENCIL_COPY(sctx->dbcb_stencil_copy_enabled) |
   1388 			    S_028000_COPY_CENTROID(1) |
   1389 			    S_028000_COPY_SAMPLE(sctx->dbcb_copy_sample));
   1390 	} else if (sctx->db_flush_depth_inplace || sctx->db_flush_stencil_inplace) {
   1391 		radeon_emit(cs,
   1392 			    S_028000_DEPTH_COMPRESS_DISABLE(sctx->db_flush_depth_inplace) |
   1393 			    S_028000_STENCIL_COMPRESS_DISABLE(sctx->db_flush_stencil_inplace));
   1394 	} else {
   1395 		radeon_emit(cs,
   1396 			    S_028000_DEPTH_CLEAR_ENABLE(sctx->db_depth_clear) |
   1397 			    S_028000_STENCIL_CLEAR_ENABLE(sctx->db_stencil_clear));
   1398 	}
   1399 
   1400 	/* DB_COUNT_CONTROL (occlusion queries) */
   1401 	if (sctx->b.num_occlusion_queries > 0 &&
   1402 	    !sctx->occlusion_queries_disabled) {
   1403 		bool perfect = sctx->b.num_perfect_occlusion_queries > 0;
   1404 
   1405 		if (sctx->b.chip_class >= CIK) {
   1406 			radeon_emit(cs,
   1407 				    S_028004_PERFECT_ZPASS_COUNTS(perfect) |
   1408 				    S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples) |
   1409 				    S_028004_ZPASS_ENABLE(1) |
   1410 				    S_028004_SLICE_EVEN_ENABLE(1) |
   1411 				    S_028004_SLICE_ODD_ENABLE(1));
   1412 		} else {
   1413 			radeon_emit(cs,
   1414 				    S_028004_PERFECT_ZPASS_COUNTS(perfect) |
   1415 				    S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples));
   1416 		}
   1417 	} else {
   1418 		/* Disable occlusion queries. */
   1419 		if (sctx->b.chip_class >= CIK) {
   1420 			radeon_emit(cs, 0);
   1421 		} else {
   1422 			radeon_emit(cs, S_028004_ZPASS_INCREMENT_DISABLE(1));
   1423 		}
   1424 	}
   1425 
   1426 	/* DB_RENDER_OVERRIDE2 */
   1427 	radeon_set_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2,
   1428 		S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(sctx->db_depth_disable_expclear) |
   1429 		S_028010_DISABLE_SMEM_EXPCLEAR_OPTIMIZATION(sctx->db_stencil_disable_expclear) |
   1430 		S_028010_DECOMPRESS_Z_ON_FLUSH(sctx->framebuffer.nr_samples >= 4));
   1431 
   1432 	db_shader_control = sctx->ps_db_shader_control;
   1433 
   1434 	/* Bug workaround for smoothing (overrasterization) on SI. */
   1435 	if (sctx->b.chip_class == SI && sctx->smoothing_enabled) {
   1436 		db_shader_control &= C_02880C_Z_ORDER;
   1437 		db_shader_control |= S_02880C_Z_ORDER(V_02880C_LATE_Z);
   1438 	}
   1439 
   1440 	/* Disable the gl_SampleMask fragment shader output if MSAA is disabled. */
   1441 	if (!rs || !rs->multisample_enable)
   1442 		db_shader_control &= C_02880C_MASK_EXPORT_ENABLE;
   1443 
   1444 	if (sctx->screen->has_rbplus &&
   1445 	    !sctx->screen->rbplus_allowed)
   1446 		db_shader_control |= S_02880C_DUAL_QUAD_DISABLE(1);
   1447 
   1448 	radeon_set_context_reg(cs, R_02880C_DB_SHADER_CONTROL,
   1449 			       db_shader_control);
   1450 }
   1451 
   1452 /*
   1453  * format translation
   1454  */
   1455 static uint32_t si_translate_colorformat(enum pipe_format format)
   1456 {
   1457 	const struct util_format_description *desc = util_format_description(format);
   1458 	if (!desc)
   1459 		return V_028C70_COLOR_INVALID;
   1460 
   1461 #define HAS_SIZE(x,y,z,w) \
   1462 	(desc->channel[0].size == (x) && desc->channel[1].size == (y) && \
   1463          desc->channel[2].size == (z) && desc->channel[3].size == (w))
   1464 
   1465 	if (format == PIPE_FORMAT_R11G11B10_FLOAT) /* isn't plain */
   1466 		return V_028C70_COLOR_10_11_11;
   1467 
   1468 	if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN)
   1469 		return V_028C70_COLOR_INVALID;
   1470 
   1471 	/* hw cannot support mixed formats (except depth/stencil, since
   1472 	 * stencil is not written to). */
   1473 	if (desc->is_mixed && desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS)
   1474 		return V_028C70_COLOR_INVALID;
   1475 
   1476 	switch (desc->nr_channels) {
   1477 	case 1:
   1478 		switch (desc->channel[0].size) {
   1479 		case 8:
   1480 			return V_028C70_COLOR_8;
   1481 		case 16:
   1482 			return V_028C70_COLOR_16;
   1483 		case 32:
   1484 			return V_028C70_COLOR_32;
   1485 		}
   1486 		break;
   1487 	case 2:
   1488 		if (desc->channel[0].size == desc->channel[1].size) {
   1489 			switch (desc->channel[0].size) {
   1490 			case 8:
   1491 				return V_028C70_COLOR_8_8;
   1492 			case 16:
   1493 				return V_028C70_COLOR_16_16;
   1494 			case 32:
   1495 				return V_028C70_COLOR_32_32;
   1496 			}
   1497 		} else if (HAS_SIZE(8,24,0,0)) {
   1498 			return V_028C70_COLOR_24_8;
   1499 		} else if (HAS_SIZE(24,8,0,0)) {
   1500 			return V_028C70_COLOR_8_24;
   1501 		}
   1502 		break;
   1503 	case 3:
   1504 		if (HAS_SIZE(5,6,5,0)) {
   1505 			return V_028C70_COLOR_5_6_5;
   1506 		} else if (HAS_SIZE(32,8,24,0)) {
   1507 			return V_028C70_COLOR_X24_8_32_FLOAT;
   1508 		}
   1509 		break;
   1510 	case 4:
   1511 		if (desc->channel[0].size == desc->channel[1].size &&
   1512 		    desc->channel[0].size == desc->channel[2].size &&
   1513 		    desc->channel[0].size == desc->channel[3].size) {
   1514 			switch (desc->channel[0].size) {
   1515 			case 4:
   1516 				return V_028C70_COLOR_4_4_4_4;
   1517 			case 8:
   1518 				return V_028C70_COLOR_8_8_8_8;
   1519 			case 16:
   1520 				return V_028C70_COLOR_16_16_16_16;
   1521 			case 32:
   1522 				return V_028C70_COLOR_32_32_32_32;
   1523 			}
   1524 		} else if (HAS_SIZE(5,5,5,1)) {
   1525 			return V_028C70_COLOR_1_5_5_5;
   1526 		} else if (HAS_SIZE(1,5,5,5)) {
   1527 			return V_028C70_COLOR_5_5_5_1;
   1528 		} else if (HAS_SIZE(10,10,10,2)) {
   1529 			return V_028C70_COLOR_2_10_10_10;
   1530 		}
   1531 		break;
   1532 	}
   1533 	return V_028C70_COLOR_INVALID;
   1534 }
   1535 
   1536 static uint32_t si_colorformat_endian_swap(uint32_t colorformat)
   1537 {
   1538 	if (SI_BIG_ENDIAN) {
   1539 		switch(colorformat) {
   1540 		/* 8-bit buffers. */
   1541 		case V_028C70_COLOR_8:
   1542 			return V_028C70_ENDIAN_NONE;
   1543 
   1544 		/* 16-bit buffers. */
   1545 		case V_028C70_COLOR_5_6_5:
   1546 		case V_028C70_COLOR_1_5_5_5:
   1547 		case V_028C70_COLOR_4_4_4_4:
   1548 		case V_028C70_COLOR_16:
   1549 		case V_028C70_COLOR_8_8:
   1550 			return V_028C70_ENDIAN_8IN16;
   1551 
   1552 		/* 32-bit buffers. */
   1553 		case V_028C70_COLOR_8_8_8_8:
   1554 		case V_028C70_COLOR_2_10_10_10:
   1555 		case V_028C70_COLOR_8_24:
   1556 		case V_028C70_COLOR_24_8:
   1557 		case V_028C70_COLOR_16_16:
   1558 			return V_028C70_ENDIAN_8IN32;
   1559 
   1560 		/* 64-bit buffers. */
   1561 		case V_028C70_COLOR_16_16_16_16:
   1562 			return V_028C70_ENDIAN_8IN16;
   1563 
   1564 		case V_028C70_COLOR_32_32:
   1565 			return V_028C70_ENDIAN_8IN32;
   1566 
   1567 		/* 128-bit buffers. */
   1568 		case V_028C70_COLOR_32_32_32_32:
   1569 			return V_028C70_ENDIAN_8IN32;
   1570 		default:
   1571 			return V_028C70_ENDIAN_NONE; /* Unsupported. */
   1572 		}
   1573 	} else {
   1574 		return V_028C70_ENDIAN_NONE;
   1575 	}
   1576 }
   1577 
   1578 static uint32_t si_translate_dbformat(enum pipe_format format)
   1579 {
   1580 	switch (format) {
   1581 	case PIPE_FORMAT_Z16_UNORM:
   1582 		return V_028040_Z_16;
   1583 	case PIPE_FORMAT_S8_UINT_Z24_UNORM:
   1584 	case PIPE_FORMAT_X8Z24_UNORM:
   1585 	case PIPE_FORMAT_Z24X8_UNORM:
   1586 	case PIPE_FORMAT_Z24_UNORM_S8_UINT:
   1587 		return V_028040_Z_24; /* deprecated on SI */
   1588 	case PIPE_FORMAT_Z32_FLOAT:
   1589 	case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
   1590 		return V_028040_Z_32_FLOAT;
   1591 	default:
   1592 		return V_028040_Z_INVALID;
   1593 	}
   1594 }
   1595 
   1596 /*
   1597  * Texture translation
   1598  */
   1599 
   1600 static uint32_t si_translate_texformat(struct pipe_screen *screen,
   1601 				       enum pipe_format format,
   1602 				       const struct util_format_description *desc,
   1603 				       int first_non_void)
   1604 {
   1605 	struct si_screen *sscreen = (struct si_screen*)screen;
   1606 	bool enable_compressed_formats = (sscreen->info.drm_major == 2 &&
   1607 					  sscreen->info.drm_minor >= 31) ||
   1608 					 sscreen->info.drm_major == 3;
   1609 	bool uniform = true;
   1610 	int i;
   1611 
   1612 	/* Colorspace (return non-RGB formats directly). */
   1613 	switch (desc->colorspace) {
   1614 	/* Depth stencil formats */
   1615 	case UTIL_FORMAT_COLORSPACE_ZS:
   1616 		switch (format) {
   1617 		case PIPE_FORMAT_Z16_UNORM:
   1618 			return V_008F14_IMG_DATA_FORMAT_16;
   1619 		case PIPE_FORMAT_X24S8_UINT:
   1620 		case PIPE_FORMAT_S8X24_UINT:
   1621 			/*
   1622 			 * Implemented as an 8_8_8_8 data format to fix texture
   1623 			 * gathers in stencil sampling. This affects at least
   1624 			 * GL45-CTS.texture_cube_map_array.sampling on VI.
   1625 			 */
   1626 			return V_008F14_IMG_DATA_FORMAT_8_8_8_8;
   1627 		case PIPE_FORMAT_Z24X8_UNORM:
   1628 		case PIPE_FORMAT_Z24_UNORM_S8_UINT:
   1629 			return V_008F14_IMG_DATA_FORMAT_8_24;
   1630 		case PIPE_FORMAT_X8Z24_UNORM:
   1631 		case PIPE_FORMAT_S8_UINT_Z24_UNORM:
   1632 			return V_008F14_IMG_DATA_FORMAT_24_8;
   1633 		case PIPE_FORMAT_S8_UINT:
   1634 			return V_008F14_IMG_DATA_FORMAT_8;
   1635 		case PIPE_FORMAT_Z32_FLOAT:
   1636 			return V_008F14_IMG_DATA_FORMAT_32;
   1637 		case PIPE_FORMAT_X32_S8X24_UINT:
   1638 		case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
   1639 			return V_008F14_IMG_DATA_FORMAT_X24_8_32;
   1640 		default:
   1641 			goto out_unknown;
   1642 		}
   1643 
   1644 	case UTIL_FORMAT_COLORSPACE_YUV:
   1645 		goto out_unknown; /* TODO */
   1646 
   1647 	case UTIL_FORMAT_COLORSPACE_SRGB:
   1648 		if (desc->nr_channels != 4 && desc->nr_channels != 1)
   1649 			goto out_unknown;
   1650 		break;
   1651 
   1652 	default:
   1653 		break;
   1654 	}
   1655 
   1656 	if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) {
   1657 		if (!enable_compressed_formats)
   1658 			goto out_unknown;
   1659 
   1660 		switch (format) {
   1661 		case PIPE_FORMAT_RGTC1_SNORM:
   1662 		case PIPE_FORMAT_LATC1_SNORM:
   1663 		case PIPE_FORMAT_RGTC1_UNORM:
   1664 		case PIPE_FORMAT_LATC1_UNORM:
   1665 			return V_008F14_IMG_DATA_FORMAT_BC4;
   1666 		case PIPE_FORMAT_RGTC2_SNORM:
   1667 		case PIPE_FORMAT_LATC2_SNORM:
   1668 		case PIPE_FORMAT_RGTC2_UNORM:
   1669 		case PIPE_FORMAT_LATC2_UNORM:
   1670 			return V_008F14_IMG_DATA_FORMAT_BC5;
   1671 		default:
   1672 			goto out_unknown;
   1673 		}
   1674 	}
   1675 
   1676 	if (desc->layout == UTIL_FORMAT_LAYOUT_ETC &&
   1677 	    (sscreen->info.family == CHIP_STONEY ||
   1678 	     sscreen->info.chip_class >= GFX9)) {
   1679 		switch (format) {
   1680 		case PIPE_FORMAT_ETC1_RGB8:
   1681 		case PIPE_FORMAT_ETC2_RGB8:
   1682 		case PIPE_FORMAT_ETC2_SRGB8:
   1683 			return V_008F14_IMG_DATA_FORMAT_ETC2_RGB;
   1684 		case PIPE_FORMAT_ETC2_RGB8A1:
   1685 		case PIPE_FORMAT_ETC2_SRGB8A1:
   1686 			return V_008F14_IMG_DATA_FORMAT_ETC2_RGBA1;
   1687 		case PIPE_FORMAT_ETC2_RGBA8:
   1688 		case PIPE_FORMAT_ETC2_SRGBA8:
   1689 			return V_008F14_IMG_DATA_FORMAT_ETC2_RGBA;
   1690 		case PIPE_FORMAT_ETC2_R11_UNORM:
   1691 		case PIPE_FORMAT_ETC2_R11_SNORM:
   1692 			return V_008F14_IMG_DATA_FORMAT_ETC2_R;
   1693 		case PIPE_FORMAT_ETC2_RG11_UNORM:
   1694 		case PIPE_FORMAT_ETC2_RG11_SNORM:
   1695 			return V_008F14_IMG_DATA_FORMAT_ETC2_RG;
   1696 		default:
   1697 			goto out_unknown;
   1698 		}
   1699 	}
   1700 
   1701 	if (desc->layout == UTIL_FORMAT_LAYOUT_BPTC) {
   1702 		if (!enable_compressed_formats)
   1703 			goto out_unknown;
   1704 
   1705 		switch (format) {
   1706 		case PIPE_FORMAT_BPTC_RGBA_UNORM:
   1707 		case PIPE_FORMAT_BPTC_SRGBA:
   1708 			return V_008F14_IMG_DATA_FORMAT_BC7;
   1709 		case PIPE_FORMAT_BPTC_RGB_FLOAT:
   1710 		case PIPE_FORMAT_BPTC_RGB_UFLOAT:
   1711 			return V_008F14_IMG_DATA_FORMAT_BC6;
   1712 		default:
   1713 			goto out_unknown;
   1714 		}
   1715 	}
   1716 
   1717 	if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
   1718 		switch (format) {
   1719 		case PIPE_FORMAT_R8G8_B8G8_UNORM:
   1720 		case PIPE_FORMAT_G8R8_B8R8_UNORM:
   1721 			return V_008F14_IMG_DATA_FORMAT_GB_GR;
   1722 		case PIPE_FORMAT_G8R8_G8B8_UNORM:
   1723 		case PIPE_FORMAT_R8G8_R8B8_UNORM:
   1724 			return V_008F14_IMG_DATA_FORMAT_BG_RG;
   1725 		default:
   1726 			goto out_unknown;
   1727 		}
   1728 	}
   1729 
   1730 	if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
   1731 		if (!enable_compressed_formats)
   1732 			goto out_unknown;
   1733 
   1734 		switch (format) {
   1735 		case PIPE_FORMAT_DXT1_RGB:
   1736 		case PIPE_FORMAT_DXT1_RGBA:
   1737 		case PIPE_FORMAT_DXT1_SRGB:
   1738 		case PIPE_FORMAT_DXT1_SRGBA:
   1739 			return V_008F14_IMG_DATA_FORMAT_BC1;
   1740 		case PIPE_FORMAT_DXT3_RGBA:
   1741 		case PIPE_FORMAT_DXT3_SRGBA:
   1742 			return V_008F14_IMG_DATA_FORMAT_BC2;
   1743 		case PIPE_FORMAT_DXT5_RGBA:
   1744 		case PIPE_FORMAT_DXT5_SRGBA:
   1745 			return V_008F14_IMG_DATA_FORMAT_BC3;
   1746 		default:
   1747 			goto out_unknown;
   1748 		}
   1749 	}
   1750 
   1751 	if (format == PIPE_FORMAT_R9G9B9E5_FLOAT) {
   1752 		return V_008F14_IMG_DATA_FORMAT_5_9_9_9;
   1753 	} else if (format == PIPE_FORMAT_R11G11B10_FLOAT) {
   1754 		return V_008F14_IMG_DATA_FORMAT_10_11_11;
   1755 	}
   1756 
   1757 	/* R8G8Bx_SNORM - TODO CxV8U8 */
   1758 
   1759 	/* hw cannot support mixed formats (except depth/stencil, since only
   1760 	 * depth is read).*/
   1761 	if (desc->is_mixed && desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS)
   1762 		goto out_unknown;
   1763 
   1764 	/* See whether the components are of the same size. */
   1765 	for (i = 1; i < desc->nr_channels; i++) {
   1766 		uniform = uniform && desc->channel[0].size == desc->channel[i].size;
   1767 	}
   1768 
   1769 	/* Non-uniform formats. */
   1770 	if (!uniform) {
   1771 		switch(desc->nr_channels) {
   1772 		case 3:
   1773 			if (desc->channel[0].size == 5 &&
   1774 			    desc->channel[1].size == 6 &&
   1775 			    desc->channel[2].size == 5) {
   1776 				return V_008F14_IMG_DATA_FORMAT_5_6_5;
   1777 			}
   1778 			goto out_unknown;
   1779 		case 4:
   1780 			if (desc->channel[0].size == 5 &&
   1781 			    desc->channel[1].size == 5 &&
   1782 			    desc->channel[2].size == 5 &&
   1783 			    desc->channel[3].size == 1) {
   1784 				return V_008F14_IMG_DATA_FORMAT_1_5_5_5;
   1785 			}
   1786 			if (desc->channel[0].size == 1 &&
   1787 			    desc->channel[1].size == 5 &&
   1788 			    desc->channel[2].size == 5 &&
   1789 			    desc->channel[3].size == 5) {
   1790 				return V_008F14_IMG_DATA_FORMAT_5_5_5_1;
   1791 			}
   1792 			if (desc->channel[0].size == 10 &&
   1793 			    desc->channel[1].size == 10 &&
   1794 			    desc->channel[2].size == 10 &&
   1795 			    desc->channel[3].size == 2) {
   1796 				return V_008F14_IMG_DATA_FORMAT_2_10_10_10;
   1797 			}
   1798 			goto out_unknown;
   1799 		}
   1800 		goto out_unknown;
   1801 	}
   1802 
   1803 	if (first_non_void < 0 || first_non_void > 3)
   1804 		goto out_unknown;
   1805 
   1806 	/* uniform formats */
   1807 	switch (desc->channel[first_non_void].size) {
   1808 	case 4:
   1809 		switch (desc->nr_channels) {
   1810 #if 0 /* Not supported for render targets */
   1811 		case 2:
   1812 			return V_008F14_IMG_DATA_FORMAT_4_4;
   1813 #endif
   1814 		case 4:
   1815 			return V_008F14_IMG_DATA_FORMAT_4_4_4_4;
   1816 		}
   1817 		break;
   1818 	case 8:
   1819 		switch (desc->nr_channels) {
   1820 		case 1:
   1821 			return V_008F14_IMG_DATA_FORMAT_8;
   1822 		case 2:
   1823 			return V_008F14_IMG_DATA_FORMAT_8_8;
   1824 		case 4:
   1825 			return V_008F14_IMG_DATA_FORMAT_8_8_8_8;
   1826 		}
   1827 		break;
   1828 	case 16:
   1829 		switch (desc->nr_channels) {
   1830 		case 1:
   1831 			return V_008F14_IMG_DATA_FORMAT_16;
   1832 		case 2:
   1833 			return V_008F14_IMG_DATA_FORMAT_16_16;
   1834 		case 4:
   1835 			return V_008F14_IMG_DATA_FORMAT_16_16_16_16;
   1836 		}
   1837 		break;
   1838 	case 32:
   1839 		switch (desc->nr_channels) {
   1840 		case 1:
   1841 			return V_008F14_IMG_DATA_FORMAT_32;
   1842 		case 2:
   1843 			return V_008F14_IMG_DATA_FORMAT_32_32;
   1844 #if 0 /* Not supported for render targets */
   1845 		case 3:
   1846 			return V_008F14_IMG_DATA_FORMAT_32_32_32;
   1847 #endif
   1848 		case 4:
   1849 			return V_008F14_IMG_DATA_FORMAT_32_32_32_32;
   1850 		}
   1851 	}
   1852 
   1853 out_unknown:
   1854 	/* R600_ERR("Unable to handle texformat %d %s\n", format, util_format_name(format)); */
   1855 	return ~0;
   1856 }
   1857 
   1858 static unsigned si_tex_wrap(unsigned wrap)
   1859 {
   1860 	switch (wrap) {
   1861 	default:
   1862 	case PIPE_TEX_WRAP_REPEAT:
   1863 		return V_008F30_SQ_TEX_WRAP;
   1864 	case PIPE_TEX_WRAP_CLAMP:
   1865 		return V_008F30_SQ_TEX_CLAMP_HALF_BORDER;
   1866 	case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
   1867 		return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
   1868 	case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
   1869 		return V_008F30_SQ_TEX_CLAMP_BORDER;
   1870 	case PIPE_TEX_WRAP_MIRROR_REPEAT:
   1871 		return V_008F30_SQ_TEX_MIRROR;
   1872 	case PIPE_TEX_WRAP_MIRROR_CLAMP:
   1873 		return V_008F30_SQ_TEX_MIRROR_ONCE_HALF_BORDER;
   1874 	case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
   1875 		return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
   1876 	case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
   1877 		return V_008F30_SQ_TEX_MIRROR_ONCE_BORDER;
   1878 	}
   1879 }
   1880 
   1881 static unsigned si_tex_mipfilter(unsigned filter)
   1882 {
   1883 	switch (filter) {
   1884 	case PIPE_TEX_MIPFILTER_NEAREST:
   1885 		return V_008F38_SQ_TEX_Z_FILTER_POINT;
   1886 	case PIPE_TEX_MIPFILTER_LINEAR:
   1887 		return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
   1888 	default:
   1889 	case PIPE_TEX_MIPFILTER_NONE:
   1890 		return V_008F38_SQ_TEX_Z_FILTER_NONE;
   1891 	}
   1892 }
   1893 
   1894 static unsigned si_tex_compare(unsigned compare)
   1895 {
   1896 	switch (compare) {
   1897 	default:
   1898 	case PIPE_FUNC_NEVER:
   1899 		return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
   1900 	case PIPE_FUNC_LESS:
   1901 		return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
   1902 	case PIPE_FUNC_EQUAL:
   1903 		return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
   1904 	case PIPE_FUNC_LEQUAL:
   1905 		return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
   1906 	case PIPE_FUNC_GREATER:
   1907 		return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
   1908 	case PIPE_FUNC_NOTEQUAL:
   1909 		return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
   1910 	case PIPE_FUNC_GEQUAL:
   1911 		return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
   1912 	case PIPE_FUNC_ALWAYS:
   1913 		return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
   1914 	}
   1915 }
   1916 
   1917 static unsigned si_tex_dim(struct si_screen *sscreen, struct r600_texture *rtex,
   1918 			   unsigned view_target, unsigned nr_samples)
   1919 {
   1920 	unsigned res_target = rtex->resource.b.b.target;
   1921 
   1922 	if (view_target == PIPE_TEXTURE_CUBE ||
   1923 	    view_target == PIPE_TEXTURE_CUBE_ARRAY)
   1924 		res_target = view_target;
   1925 	/* If interpreting cubemaps as something else, set 2D_ARRAY. */
   1926 	else if (res_target == PIPE_TEXTURE_CUBE ||
   1927 		 res_target == PIPE_TEXTURE_CUBE_ARRAY)
   1928 		res_target = PIPE_TEXTURE_2D_ARRAY;
   1929 
   1930 	/* GFX9 allocates 1D textures as 2D. */
   1931 	if ((res_target == PIPE_TEXTURE_1D ||
   1932 	     res_target == PIPE_TEXTURE_1D_ARRAY) &&
   1933 	    sscreen->info.chip_class >= GFX9 &&
   1934 	    rtex->surface.u.gfx9.resource_type == RADEON_RESOURCE_2D) {
   1935 		if (res_target == PIPE_TEXTURE_1D)
   1936 			res_target = PIPE_TEXTURE_2D;
   1937 		else
   1938 			res_target = PIPE_TEXTURE_2D_ARRAY;
   1939 	}
   1940 
   1941 	switch (res_target) {
   1942 	default:
   1943 	case PIPE_TEXTURE_1D:
   1944 		return V_008F1C_SQ_RSRC_IMG_1D;
   1945 	case PIPE_TEXTURE_1D_ARRAY:
   1946 		return V_008F1C_SQ_RSRC_IMG_1D_ARRAY;
   1947 	case PIPE_TEXTURE_2D:
   1948 	case PIPE_TEXTURE_RECT:
   1949 		return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA :
   1950 					V_008F1C_SQ_RSRC_IMG_2D;
   1951 	case PIPE_TEXTURE_2D_ARRAY:
   1952 		return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY :
   1953 					V_008F1C_SQ_RSRC_IMG_2D_ARRAY;
   1954 	case PIPE_TEXTURE_3D:
   1955 		return V_008F1C_SQ_RSRC_IMG_3D;
   1956 	case PIPE_TEXTURE_CUBE:
   1957 	case PIPE_TEXTURE_CUBE_ARRAY:
   1958 		return V_008F1C_SQ_RSRC_IMG_CUBE;
   1959 	}
   1960 }
   1961 
   1962 /*
   1963  * Format support testing
   1964  */
   1965 
   1966 static bool si_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format)
   1967 {
   1968 	const struct util_format_description *desc = util_format_description(format);
   1969 	if (!desc)
   1970 		return false;
   1971 
   1972 	return si_translate_texformat(screen, format, desc,
   1973 				      util_format_get_first_non_void_channel(format)) != ~0U;
   1974 }
   1975 
   1976 static uint32_t si_translate_buffer_dataformat(struct pipe_screen *screen,
   1977 					       const struct util_format_description *desc,
   1978 					       int first_non_void)
   1979 {
   1980 	int i;
   1981 
   1982 	if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT)
   1983 		return V_008F0C_BUF_DATA_FORMAT_10_11_11;
   1984 
   1985 	assert(first_non_void >= 0);
   1986 
   1987 	if (desc->nr_channels == 4 &&
   1988 	    desc->channel[0].size == 10 &&
   1989 	    desc->channel[1].size == 10 &&
   1990 	    desc->channel[2].size == 10 &&
   1991 	    desc->channel[3].size == 2)
   1992 		return V_008F0C_BUF_DATA_FORMAT_2_10_10_10;
   1993 
   1994 	/* See whether the components are of the same size. */
   1995 	for (i = 0; i < desc->nr_channels; i++) {
   1996 		if (desc->channel[first_non_void].size != desc->channel[i].size)
   1997 			return V_008F0C_BUF_DATA_FORMAT_INVALID;
   1998 	}
   1999 
   2000 	switch (desc->channel[first_non_void].size) {
   2001 	case 8:
   2002 		switch (desc->nr_channels) {
   2003 		case 1:
   2004 		case 3: /* 3 loads */
   2005 			return V_008F0C_BUF_DATA_FORMAT_8;
   2006 		case 2:
   2007 			return V_008F0C_BUF_DATA_FORMAT_8_8;
   2008 		case 4:
   2009 			return V_008F0C_BUF_DATA_FORMAT_8_8_8_8;
   2010 		}
   2011 		break;
   2012 	case 16:
   2013 		switch (desc->nr_channels) {
   2014 		case 1:
   2015 		case 3: /* 3 loads */
   2016 			return V_008F0C_BUF_DATA_FORMAT_16;
   2017 		case 2:
   2018 			return V_008F0C_BUF_DATA_FORMAT_16_16;
   2019 		case 4:
   2020 			return V_008F0C_BUF_DATA_FORMAT_16_16_16_16;
   2021 		}
   2022 		break;
   2023 	case 32:
   2024 		switch (desc->nr_channels) {
   2025 		case 1:
   2026 			return V_008F0C_BUF_DATA_FORMAT_32;
   2027 		case 2:
   2028 			return V_008F0C_BUF_DATA_FORMAT_32_32;
   2029 		case 3:
   2030 			return V_008F0C_BUF_DATA_FORMAT_32_32_32;
   2031 		case 4:
   2032 			return V_008F0C_BUF_DATA_FORMAT_32_32_32_32;
   2033 		}
   2034 		break;
   2035 	case 64:
   2036 		/* Legacy double formats. */
   2037 		switch (desc->nr_channels) {
   2038 		case 1: /* 1 load */
   2039 			return V_008F0C_BUF_DATA_FORMAT_32_32;
   2040 		case 2: /* 1 load */
   2041 			return V_008F0C_BUF_DATA_FORMAT_32_32_32_32;
   2042 		case 3: /* 3 loads */
   2043 			return V_008F0C_BUF_DATA_FORMAT_32_32;
   2044 		case 4: /* 2 loads */
   2045 			return V_008F0C_BUF_DATA_FORMAT_32_32_32_32;
   2046 		}
   2047 		break;
   2048 	}
   2049 
   2050 	return V_008F0C_BUF_DATA_FORMAT_INVALID;
   2051 }
   2052 
   2053 static uint32_t si_translate_buffer_numformat(struct pipe_screen *screen,
   2054 					      const struct util_format_description *desc,
   2055 					      int first_non_void)
   2056 {
   2057 	if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT)
   2058 		return V_008F0C_BUF_NUM_FORMAT_FLOAT;
   2059 
   2060 	assert(first_non_void >= 0);
   2061 
   2062 	switch (desc->channel[first_non_void].type) {
   2063 	case UTIL_FORMAT_TYPE_SIGNED:
   2064 	case UTIL_FORMAT_TYPE_FIXED:
   2065 		if (desc->channel[first_non_void].size >= 32 ||
   2066 		    desc->channel[first_non_void].pure_integer)
   2067 			return V_008F0C_BUF_NUM_FORMAT_SINT;
   2068 		else if (desc->channel[first_non_void].normalized)
   2069 			return V_008F0C_BUF_NUM_FORMAT_SNORM;
   2070 		else
   2071 			return V_008F0C_BUF_NUM_FORMAT_SSCALED;
   2072 		break;
   2073 	case UTIL_FORMAT_TYPE_UNSIGNED:
   2074 		if (desc->channel[first_non_void].size >= 32 ||
   2075 		    desc->channel[first_non_void].pure_integer)
   2076 			return V_008F0C_BUF_NUM_FORMAT_UINT;
   2077 		else if (desc->channel[first_non_void].normalized)
   2078 			return V_008F0C_BUF_NUM_FORMAT_UNORM;
   2079 		else
   2080 			return V_008F0C_BUF_NUM_FORMAT_USCALED;
   2081 		break;
   2082 	case UTIL_FORMAT_TYPE_FLOAT:
   2083 	default:
   2084 		return V_008F0C_BUF_NUM_FORMAT_FLOAT;
   2085 	}
   2086 }
   2087 
   2088 static unsigned si_is_vertex_format_supported(struct pipe_screen *screen,
   2089 					      enum pipe_format format,
   2090 					      unsigned usage)
   2091 {
   2092 	const struct util_format_description *desc;
   2093 	int first_non_void;
   2094 	unsigned data_format;
   2095 
   2096 	assert((usage & ~(PIPE_BIND_SHADER_IMAGE |
   2097 			  PIPE_BIND_SAMPLER_VIEW |
   2098 			  PIPE_BIND_VERTEX_BUFFER)) == 0);
   2099 
   2100 	desc = util_format_description(format);
   2101 	if (!desc)
   2102 		return 0;
   2103 
   2104 	/* There are no native 8_8_8 or 16_16_16 data formats, and we currently
   2105 	 * select 8_8_8_8 and 16_16_16_16 instead. This works reasonably well
   2106 	 * for read-only access (with caveats surrounding bounds checks), but
   2107 	 * obviously fails for write access which we have to implement for
   2108 	 * shader images. Luckily, OpenGL doesn't expect this to be supported
   2109 	 * anyway, and so the only impact is on PBO uploads / downloads, which
   2110 	 * shouldn't be expected to be fast for GL_RGB anyway.
   2111 	 */
   2112 	if (desc->block.bits == 3 * 8 ||
   2113 	    desc->block.bits == 3 * 16) {
   2114 		if (usage & (PIPE_BIND_SHADER_IMAGE | PIPE_BIND_SAMPLER_VIEW)) {
   2115 		    usage &= ~(PIPE_BIND_SHADER_IMAGE | PIPE_BIND_SAMPLER_VIEW);
   2116 			if (!usage)
   2117 				return 0;
   2118 		}
   2119 	}
   2120 
   2121 	first_non_void = util_format_get_first_non_void_channel(format);
   2122 	data_format = si_translate_buffer_dataformat(screen, desc, first_non_void);
   2123 	if (data_format == V_008F0C_BUF_DATA_FORMAT_INVALID)
   2124 		return 0;
   2125 
   2126 	return usage;
   2127 }
   2128 
   2129 static bool si_is_colorbuffer_format_supported(enum pipe_format format)
   2130 {
   2131 	return si_translate_colorformat(format) != V_028C70_COLOR_INVALID &&
   2132 		si_translate_colorswap(format, false) != ~0U;
   2133 }
   2134 
   2135 static bool si_is_zs_format_supported(enum pipe_format format)
   2136 {
   2137 	return si_translate_dbformat(format) != V_028040_Z_INVALID;
   2138 }
   2139 
   2140 static boolean si_is_format_supported(struct pipe_screen *screen,
   2141 				      enum pipe_format format,
   2142 				      enum pipe_texture_target target,
   2143 				      unsigned sample_count,
   2144 				      unsigned usage)
   2145 {
   2146 	unsigned retval = 0;
   2147 
   2148 	if (target >= PIPE_MAX_TEXTURE_TYPES) {
   2149 		R600_ERR("r600: unsupported texture type %d\n", target);
   2150 		return false;
   2151 	}
   2152 
   2153 	if (!util_format_is_supported(format, usage))
   2154 		return false;
   2155 
   2156 	if (sample_count > 1) {
   2157 		if (!screen->get_param(screen, PIPE_CAP_TEXTURE_MULTISAMPLE))
   2158 			return false;
   2159 
   2160 		if (usage & PIPE_BIND_SHADER_IMAGE)
   2161 			return false;
   2162 
   2163 		switch (sample_count) {
   2164 		case 2:
   2165 		case 4:
   2166 		case 8:
   2167 			break;
   2168 		case 16:
   2169 			if (format == PIPE_FORMAT_NONE)
   2170 				return true;
   2171 			else
   2172 				return false;
   2173 		default:
   2174 			return false;
   2175 		}
   2176 	}
   2177 
   2178 	if (usage & (PIPE_BIND_SAMPLER_VIEW |
   2179 		     PIPE_BIND_SHADER_IMAGE)) {
   2180 		if (target == PIPE_BUFFER) {
   2181 			retval |= si_is_vertex_format_supported(
   2182 				screen, format, usage & (PIPE_BIND_SAMPLER_VIEW |
   2183 						         PIPE_BIND_SHADER_IMAGE));
   2184 		} else {
   2185 			if (si_is_sampler_format_supported(screen, format))
   2186 				retval |= usage & (PIPE_BIND_SAMPLER_VIEW |
   2187 						   PIPE_BIND_SHADER_IMAGE);
   2188 		}
   2189 	}
   2190 
   2191 	if ((usage & (PIPE_BIND_RENDER_TARGET |
   2192 		      PIPE_BIND_DISPLAY_TARGET |
   2193 		      PIPE_BIND_SCANOUT |
   2194 		      PIPE_BIND_SHARED |
   2195 		      PIPE_BIND_BLENDABLE)) &&
   2196 	    si_is_colorbuffer_format_supported(format)) {
   2197 		retval |= usage &
   2198 			  (PIPE_BIND_RENDER_TARGET |
   2199 			   PIPE_BIND_DISPLAY_TARGET |
   2200 			   PIPE_BIND_SCANOUT |
   2201 			   PIPE_BIND_SHARED);
   2202 		if (!util_format_is_pure_integer(format) &&
   2203 		    !util_format_is_depth_or_stencil(format))
   2204 			retval |= usage & PIPE_BIND_BLENDABLE;
   2205 	}
   2206 
   2207 	if ((usage & PIPE_BIND_DEPTH_STENCIL) &&
   2208 	    si_is_zs_format_supported(format)) {
   2209 		retval |= PIPE_BIND_DEPTH_STENCIL;
   2210 	}
   2211 
   2212 	if (usage & PIPE_BIND_VERTEX_BUFFER) {
   2213 		retval |= si_is_vertex_format_supported(screen, format,
   2214 							PIPE_BIND_VERTEX_BUFFER);
   2215 	}
   2216 
   2217 	if ((usage & PIPE_BIND_LINEAR) &&
   2218 	    !util_format_is_compressed(format) &&
   2219 	    !(usage & PIPE_BIND_DEPTH_STENCIL))
   2220 		retval |= PIPE_BIND_LINEAR;
   2221 
   2222 	return retval == usage;
   2223 }
   2224 
   2225 /*
   2226  * framebuffer handling
   2227  */
   2228 
   2229 static void si_choose_spi_color_formats(struct r600_surface *surf,
   2230 					unsigned format, unsigned swap,
   2231 					unsigned ntype, bool is_depth)
   2232 {
   2233 	/* Alpha is needed for alpha-to-coverage.
   2234 	 * Blending may be with or without alpha.
   2235 	 */
   2236 	unsigned normal = 0; /* most optimal, may not support blending or export alpha */
   2237 	unsigned alpha = 0; /* exports alpha, but may not support blending */
   2238 	unsigned blend = 0; /* supports blending, but may not export alpha */
   2239 	unsigned blend_alpha = 0; /* least optimal, supports blending and exports alpha */
   2240 
   2241 	/* Choose the SPI color formats. These are required values for RB+.
   2242 	 * Other chips have multiple choices, though they are not necessarily better.
   2243 	 */
   2244 	switch (format) {
   2245 	case V_028C70_COLOR_5_6_5:
   2246 	case V_028C70_COLOR_1_5_5_5:
   2247 	case V_028C70_COLOR_5_5_5_1:
   2248 	case V_028C70_COLOR_4_4_4_4:
   2249 	case V_028C70_COLOR_10_11_11:
   2250 	case V_028C70_COLOR_11_11_10:
   2251 	case V_028C70_COLOR_8:
   2252 	case V_028C70_COLOR_8_8:
   2253 	case V_028C70_COLOR_8_8_8_8:
   2254 	case V_028C70_COLOR_10_10_10_2:
   2255 	case V_028C70_COLOR_2_10_10_10:
   2256 		if (ntype == V_028C70_NUMBER_UINT)
   2257 			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_UINT16_ABGR;
   2258 		else if (ntype == V_028C70_NUMBER_SINT)
   2259 			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_SINT16_ABGR;
   2260 		else
   2261 			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_FP16_ABGR;
   2262 		break;
   2263 
   2264 	case V_028C70_COLOR_16:
   2265 	case V_028C70_COLOR_16_16:
   2266 	case V_028C70_COLOR_16_16_16_16:
   2267 		if (ntype == V_028C70_NUMBER_UNORM ||
   2268 		    ntype == V_028C70_NUMBER_SNORM) {
   2269 			/* UNORM16 and SNORM16 don't support blending */
   2270 			if (ntype == V_028C70_NUMBER_UNORM)
   2271 				normal = alpha = V_028714_SPI_SHADER_UNORM16_ABGR;
   2272 			else
   2273 				normal = alpha = V_028714_SPI_SHADER_SNORM16_ABGR;
   2274 
   2275 			/* Use 32 bits per channel for blending. */
   2276 			if (format == V_028C70_COLOR_16) {
   2277 				if (swap == V_028C70_SWAP_STD) { /* R */
   2278 					blend = V_028714_SPI_SHADER_32_R;
   2279 					blend_alpha = V_028714_SPI_SHADER_32_AR;
   2280 				} else if (swap == V_028C70_SWAP_ALT_REV) /* A */
   2281 					blend = blend_alpha = V_028714_SPI_SHADER_32_AR;
   2282 				else
   2283 					assert(0);
   2284 			} else if (format == V_028C70_COLOR_16_16) {
   2285 				if (swap == V_028C70_SWAP_STD) { /* RG */
   2286 					blend = V_028714_SPI_SHADER_32_GR;
   2287 					blend_alpha = V_028714_SPI_SHADER_32_ABGR;
   2288 				} else if (swap == V_028C70_SWAP_ALT) /* RA */
   2289 					blend = blend_alpha = V_028714_SPI_SHADER_32_AR;
   2290 				else
   2291 					assert(0);
   2292 			} else /* 16_16_16_16 */
   2293 				blend = blend_alpha = V_028714_SPI_SHADER_32_ABGR;
   2294 		} else if (ntype == V_028C70_NUMBER_UINT)
   2295 			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_UINT16_ABGR;
   2296 		else if (ntype == V_028C70_NUMBER_SINT)
   2297 			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_SINT16_ABGR;
   2298 		else if (ntype == V_028C70_NUMBER_FLOAT)
   2299 			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_FP16_ABGR;
   2300 		else
   2301 			assert(0);
   2302 		break;
   2303 
   2304 	case V_028C70_COLOR_32:
   2305 		if (swap == V_028C70_SWAP_STD) { /* R */
   2306 			blend = normal = V_028714_SPI_SHADER_32_R;
   2307 			alpha = blend_alpha = V_028714_SPI_SHADER_32_AR;
   2308 		} else if (swap == V_028C70_SWAP_ALT_REV) /* A */
   2309 			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_AR;
   2310 		else
   2311 			assert(0);
   2312 		break;
   2313 
   2314 	case V_028C70_COLOR_32_32:
   2315 		if (swap == V_028C70_SWAP_STD) { /* RG */
   2316 			blend = normal = V_028714_SPI_SHADER_32_GR;
   2317 			alpha = blend_alpha = V_028714_SPI_SHADER_32_ABGR;
   2318 		} else if (swap == V_028C70_SWAP_ALT) /* RA */
   2319 			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_AR;
   2320 		else
   2321 			assert(0);
   2322 		break;
   2323 
   2324 	case V_028C70_COLOR_32_32_32_32:
   2325 	case V_028C70_COLOR_8_24:
   2326 	case V_028C70_COLOR_24_8:
   2327 	case V_028C70_COLOR_X24_8_32_FLOAT:
   2328 		alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_ABGR;
   2329 		break;
   2330 
   2331 	default:
   2332 		assert(0);
   2333 		return;
   2334 	}
   2335 
   2336 	/* The DB->CB copy needs 32_ABGR. */
   2337 	if (is_depth)
   2338 		alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_ABGR;
   2339 
   2340 	surf->spi_shader_col_format = normal;
   2341 	surf->spi_shader_col_format_alpha = alpha;
   2342 	surf->spi_shader_col_format_blend = blend;
   2343 	surf->spi_shader_col_format_blend_alpha = blend_alpha;
   2344 }
   2345 
   2346 static void si_initialize_color_surface(struct si_context *sctx,
   2347 					struct r600_surface *surf)
   2348 {
   2349 	struct r600_texture *rtex = (struct r600_texture*)surf->base.texture;
   2350 	unsigned color_info, color_attrib;
   2351 	unsigned format, swap, ntype, endian;
   2352 	const struct util_format_description *desc;
   2353 	int firstchan;
   2354 	unsigned blend_clamp = 0, blend_bypass = 0;
   2355 
   2356 	desc = util_format_description(surf->base.format);
   2357 	for (firstchan = 0; firstchan < 4; firstchan++) {
   2358 		if (desc->channel[firstchan].type != UTIL_FORMAT_TYPE_VOID) {
   2359 			break;
   2360 		}
   2361 	}
   2362 	if (firstchan == 4 || desc->channel[firstchan].type == UTIL_FORMAT_TYPE_FLOAT) {
   2363 		ntype = V_028C70_NUMBER_FLOAT;
   2364 	} else {
   2365 		ntype = V_028C70_NUMBER_UNORM;
   2366 		if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
   2367 			ntype = V_028C70_NUMBER_SRGB;
   2368 		else if (desc->channel[firstchan].type == UTIL_FORMAT_TYPE_SIGNED) {
   2369 			if (desc->channel[firstchan].pure_integer) {
   2370 				ntype = V_028C70_NUMBER_SINT;
   2371 			} else {
   2372 				assert(desc->channel[firstchan].normalized);
   2373 				ntype = V_028C70_NUMBER_SNORM;
   2374 			}
   2375 		} else if (desc->channel[firstchan].type == UTIL_FORMAT_TYPE_UNSIGNED) {
   2376 			if (desc->channel[firstchan].pure_integer) {
   2377 				ntype = V_028C70_NUMBER_UINT;
   2378 			} else {
   2379 				assert(desc->channel[firstchan].normalized);
   2380 				ntype = V_028C70_NUMBER_UNORM;
   2381 			}
   2382 		}
   2383 	}
   2384 
   2385 	format = si_translate_colorformat(surf->base.format);
   2386 	if (format == V_028C70_COLOR_INVALID) {
   2387 		R600_ERR("Invalid CB format: %d, disabling CB.\n", surf->base.format);
   2388 	}
   2389 	assert(format != V_028C70_COLOR_INVALID);
   2390 	swap = si_translate_colorswap(surf->base.format, false);
   2391 	endian = si_colorformat_endian_swap(format);
   2392 
   2393 	/* blend clamp should be set for all NORM/SRGB types */
   2394 	if (ntype == V_028C70_NUMBER_UNORM ||
   2395 	    ntype == V_028C70_NUMBER_SNORM ||
   2396 	    ntype == V_028C70_NUMBER_SRGB)
   2397 		blend_clamp = 1;
   2398 
   2399 	/* set blend bypass according to docs if SINT/UINT or
   2400 	   8/24 COLOR variants */
   2401 	if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
   2402 	    format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
   2403 	    format == V_028C70_COLOR_X24_8_32_FLOAT) {
   2404 		blend_clamp = 0;
   2405 		blend_bypass = 1;
   2406 	}
   2407 
   2408 	if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) {
   2409 		if (format == V_028C70_COLOR_8 ||
   2410 		    format == V_028C70_COLOR_8_8 ||
   2411 		    format == V_028C70_COLOR_8_8_8_8)
   2412 			surf->color_is_int8 = true;
   2413 		else if (format == V_028C70_COLOR_10_10_10_2 ||
   2414 			 format == V_028C70_COLOR_2_10_10_10)
   2415 			surf->color_is_int10 = true;
   2416 	}
   2417 
   2418 	color_info = S_028C70_FORMAT(format) |
   2419 		S_028C70_COMP_SWAP(swap) |
   2420 		S_028C70_BLEND_CLAMP(blend_clamp) |
   2421 		S_028C70_BLEND_BYPASS(blend_bypass) |
   2422 		S_028C70_SIMPLE_FLOAT(1) |
   2423 		S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM &&
   2424 				    ntype != V_028C70_NUMBER_SNORM &&
   2425 				    ntype != V_028C70_NUMBER_SRGB &&
   2426 				    format != V_028C70_COLOR_8_24 &&
   2427 				    format != V_028C70_COLOR_24_8) |
   2428 		S_028C70_NUMBER_TYPE(ntype) |
   2429 		S_028C70_ENDIAN(endian);
   2430 
   2431 	/* Intensity is implemented as Red, so treat it that way. */
   2432 	color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == PIPE_SWIZZLE_1 ||
   2433 						  util_format_is_intensity(surf->base.format));
   2434 
   2435 	if (rtex->resource.b.b.nr_samples > 1) {
   2436 		unsigned log_samples = util_logbase2(rtex->resource.b.b.nr_samples);
   2437 
   2438 		color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
   2439 				S_028C74_NUM_FRAGMENTS(log_samples);
   2440 
   2441 		if (rtex->fmask.size) {
   2442 			color_info |= S_028C70_COMPRESSION(1);
   2443 			unsigned fmask_bankh = util_logbase2(rtex->fmask.bank_height);
   2444 
   2445 			if (sctx->b.chip_class == SI) {
   2446 				/* due to a hw bug, FMASK_BANK_HEIGHT must be set on SI too */
   2447 				color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
   2448 			}
   2449 		}
   2450 	}
   2451 
   2452 	if (sctx->b.chip_class >= VI) {
   2453 		unsigned max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_256B;
   2454 		unsigned min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_32B;
   2455 
   2456 		/* amdvlk: [min-compressed-block-size] should be set to 32 for dGPU and
   2457 		   64 for APU because all of our APUs to date use DIMMs which have
   2458 		   a request granularity size of 64B while all other chips have a
   2459 		   32B request size */
   2460 		if (!sctx->screen->info.has_dedicated_vram)
   2461 			min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_64B;
   2462 
   2463 		if (rtex->resource.b.b.nr_samples > 1) {
   2464 			if (rtex->surface.bpe == 1)
   2465 				max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
   2466 			else if (rtex->surface.bpe == 2)
   2467 				max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_128B;
   2468 		}
   2469 
   2470 		surf->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
   2471 				       S_028C78_MIN_COMPRESSED_BLOCK_SIZE(min_compressed_block_size) |
   2472 		                       S_028C78_INDEPENDENT_64B_BLOCKS(1);
   2473 	}
   2474 
   2475 	/* This must be set for fast clear to work without FMASK. */
   2476 	if (!rtex->fmask.size && sctx->b.chip_class == SI) {
   2477 		unsigned bankh = util_logbase2(rtex->surface.u.legacy.bankh);
   2478 		color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
   2479 	}
   2480 
   2481 	unsigned color_view = S_028C6C_SLICE_START(surf->base.u.tex.first_layer) |
   2482 			      S_028C6C_SLICE_MAX(surf->base.u.tex.last_layer);
   2483 
   2484 	if (sctx->b.chip_class >= GFX9) {
   2485 		unsigned mip0_depth = util_max_layer(&rtex->resource.b.b, 0);
   2486 
   2487 		color_view |= S_028C6C_MIP_LEVEL(surf->base.u.tex.level);
   2488 		color_attrib |= S_028C74_MIP0_DEPTH(mip0_depth) |
   2489 				S_028C74_RESOURCE_TYPE(rtex->surface.u.gfx9.resource_type);
   2490 		surf->cb_color_attrib2 = S_028C68_MIP0_WIDTH(surf->width0 - 1) |
   2491 					 S_028C68_MIP0_HEIGHT(surf->height0 - 1) |
   2492 					 S_028C68_MAX_MIP(rtex->resource.b.b.last_level);
   2493 	}
   2494 
   2495 	surf->cb_color_view = color_view;
   2496 	surf->cb_color_info = color_info;
   2497 	surf->cb_color_attrib = color_attrib;
   2498 
   2499 	/* Determine pixel shader export format */
   2500 	si_choose_spi_color_formats(surf, format, swap, ntype, rtex->is_depth);
   2501 
   2502 	surf->color_initialized = true;
   2503 }
   2504 
   2505 static void si_init_depth_surface(struct si_context *sctx,
   2506 				  struct r600_surface *surf)
   2507 {
   2508 	struct r600_texture *rtex = (struct r600_texture*)surf->base.texture;
   2509 	unsigned level = surf->base.u.tex.level;
   2510 	unsigned format, stencil_format;
   2511 	uint32_t z_info, s_info;
   2512 
   2513 	format = si_translate_dbformat(rtex->db_render_format);
   2514 	stencil_format = rtex->surface.has_stencil ?
   2515 				 V_028044_STENCIL_8 : V_028044_STENCIL_INVALID;
   2516 
   2517 	assert(format != V_028040_Z_INVALID);
   2518 	if (format == V_028040_Z_INVALID)
   2519 		R600_ERR("Invalid DB format: %d, disabling DB.\n", rtex->resource.b.b.format);
   2520 
   2521 	surf->db_depth_view = S_028008_SLICE_START(surf->base.u.tex.first_layer) |
   2522 			      S_028008_SLICE_MAX(surf->base.u.tex.last_layer);
   2523 	surf->db_htile_data_base = 0;
   2524 	surf->db_htile_surface = 0;
   2525 
   2526 	if (sctx->b.chip_class >= GFX9) {
   2527 		assert(rtex->surface.u.gfx9.surf_offset == 0);
   2528 		surf->db_depth_base = rtex->resource.gpu_address >> 8;
   2529 		surf->db_stencil_base = (rtex->resource.gpu_address +
   2530 					 rtex->surface.u.gfx9.stencil_offset) >> 8;
   2531 		z_info = S_028038_FORMAT(format) |
   2532 			 S_028038_NUM_SAMPLES(util_logbase2(rtex->resource.b.b.nr_samples)) |
   2533 			 S_028038_SW_MODE(rtex->surface.u.gfx9.surf.swizzle_mode) |
   2534 			 S_028038_MAXMIP(rtex->resource.b.b.last_level);
   2535 		s_info = S_02803C_FORMAT(stencil_format) |
   2536 			 S_02803C_SW_MODE(rtex->surface.u.gfx9.stencil.swizzle_mode);
   2537 		surf->db_z_info2 = S_028068_EPITCH(rtex->surface.u.gfx9.surf.epitch);
   2538 		surf->db_stencil_info2 = S_02806C_EPITCH(rtex->surface.u.gfx9.stencil.epitch);
   2539 		surf->db_depth_view |= S_028008_MIPID(level);
   2540 		surf->db_depth_size = S_02801C_X_MAX(rtex->resource.b.b.width0 - 1) |
   2541 				      S_02801C_Y_MAX(rtex->resource.b.b.height0 - 1);
   2542 
   2543 		if (si_htile_enabled(rtex, level)) {
   2544 			z_info |= S_028038_TILE_SURFACE_ENABLE(1) |
   2545 				  S_028038_ALLOW_EXPCLEAR(1);
   2546 
   2547 			if (rtex->tc_compatible_htile) {
   2548 				unsigned max_zplanes = 4;
   2549 
   2550 				if (rtex->db_render_format == PIPE_FORMAT_Z16_UNORM &&
   2551 				    rtex->resource.b.b.nr_samples > 1)
   2552 					max_zplanes = 2;
   2553 
   2554 				z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes + 1) |
   2555 					  S_028038_ITERATE_FLUSH(1);
   2556 				s_info |= S_02803C_ITERATE_FLUSH(1);
   2557 			}
   2558 
   2559 			if (rtex->surface.has_stencil) {
   2560 				/* Stencil buffer workaround ported from the SI-CI-VI code.
   2561 				 * See that for explanation.
   2562 				 */
   2563 				s_info |= S_02803C_ALLOW_EXPCLEAR(rtex->resource.b.b.nr_samples <= 1);
   2564 			} else {
   2565 				/* Use all HTILE for depth if there's no stencil. */
   2566 				s_info |= S_02803C_TILE_STENCIL_DISABLE(1);
   2567 			}
   2568 
   2569 			surf->db_htile_data_base = (rtex->resource.gpu_address +
   2570 						    rtex->htile_offset) >> 8;
   2571 			surf->db_htile_surface = S_028ABC_FULL_CACHE(1) |
   2572 						 S_028ABC_PIPE_ALIGNED(rtex->surface.u.gfx9.htile.pipe_aligned) |
   2573 						 S_028ABC_RB_ALIGNED(rtex->surface.u.gfx9.htile.rb_aligned);
   2574 		}
   2575 	} else {
   2576 		/* SI-CI-VI */
   2577 		struct legacy_surf_level *levelinfo = &rtex->surface.u.legacy.level[level];
   2578 
   2579 		assert(levelinfo->nblk_x % 8 == 0 && levelinfo->nblk_y % 8 == 0);
   2580 
   2581 		surf->db_depth_base = (rtex->resource.gpu_address +
   2582 				       rtex->surface.u.legacy.level[level].offset) >> 8;
   2583 		surf->db_stencil_base = (rtex->resource.gpu_address +
   2584 					 rtex->surface.u.legacy.stencil_level[level].offset) >> 8;
   2585 
   2586 		z_info = S_028040_FORMAT(format) |
   2587 			 S_028040_NUM_SAMPLES(util_logbase2(rtex->resource.b.b.nr_samples));
   2588 		s_info = S_028044_FORMAT(stencil_format);
   2589 		surf->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!rtex->tc_compatible_htile);
   2590 
   2591 		if (sctx->b.chip_class >= CIK) {
   2592 			struct radeon_info *info = &sctx->screen->info;
   2593 			unsigned index = rtex->surface.u.legacy.tiling_index[level];
   2594 			unsigned stencil_index = rtex->surface.u.legacy.stencil_tiling_index[level];
   2595 			unsigned macro_index = rtex->surface.u.legacy.macro_tile_index;
   2596 			unsigned tile_mode = info->si_tile_mode_array[index];
   2597 			unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
   2598 			unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
   2599 
   2600 			surf->db_depth_info |=
   2601 				S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
   2602 				S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
   2603 				S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
   2604 				S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
   2605 				S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
   2606 				S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
   2607 			z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
   2608 			s_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
   2609 		} else {
   2610 			unsigned tile_mode_index = si_tile_mode_index(rtex, level, false);
   2611 			z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
   2612 			tile_mode_index = si_tile_mode_index(rtex, level, true);
   2613 			s_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
   2614 		}
   2615 
   2616 		surf->db_depth_size = S_028058_PITCH_TILE_MAX((levelinfo->nblk_x / 8) - 1) |
   2617 				      S_028058_HEIGHT_TILE_MAX((levelinfo->nblk_y / 8) - 1);
   2618 		surf->db_depth_slice = S_02805C_SLICE_TILE_MAX((levelinfo->nblk_x *
   2619 								levelinfo->nblk_y) / 64 - 1);
   2620 
   2621 		if (si_htile_enabled(rtex, level)) {
   2622 			z_info |= S_028040_TILE_SURFACE_ENABLE(1) |
   2623 				  S_028040_ALLOW_EXPCLEAR(1);
   2624 
   2625 			if (rtex->surface.has_stencil) {
   2626 				/* Workaround: For a not yet understood reason, the
   2627 				 * combination of MSAA, fast stencil clear and stencil
   2628 				 * decompress messes with subsequent stencil buffer
   2629 				 * uses. Problem was reproduced on Verde, Bonaire,
   2630 				 * Tonga, and Carrizo.
   2631 				 *
   2632 				 * Disabling EXPCLEAR works around the problem.
   2633 				 *
   2634 				 * Check piglit's arb_texture_multisample-stencil-clear
   2635 				 * test if you want to try changing this.
   2636 				 */
   2637 				if (rtex->resource.b.b.nr_samples <= 1)
   2638 					s_info |= S_028044_ALLOW_EXPCLEAR(1);
   2639 			} else if (!rtex->tc_compatible_htile) {
   2640 				/* Use all of the htile_buffer for depth if there's no stencil.
   2641 				 * This must not be set when TC-compatible HTILE is enabled
   2642 				 * due to a hw bug.
   2643 				 */
   2644 				s_info |= S_028044_TILE_STENCIL_DISABLE(1);
   2645 			}
   2646 
   2647 			surf->db_htile_data_base = (rtex->resource.gpu_address +
   2648 						    rtex->htile_offset) >> 8;
   2649 			surf->db_htile_surface = S_028ABC_FULL_CACHE(1);
   2650 
   2651 			if (rtex->tc_compatible_htile) {
   2652 				surf->db_htile_surface |= S_028ABC_TC_COMPATIBLE(1);
   2653 
   2654 				if (rtex->resource.b.b.nr_samples <= 1)
   2655 					z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(5);
   2656 				else if (rtex->resource.b.b.nr_samples <= 4)
   2657 					z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(3);
   2658 				else
   2659 					z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(2);
   2660 			}
   2661 		}
   2662 	}
   2663 
   2664 	surf->db_z_info = z_info;
   2665 	surf->db_stencil_info = s_info;
   2666 
   2667 	surf->depth_initialized = true;
   2668 }
   2669 
   2670 void si_update_fb_dirtiness_after_rendering(struct si_context *sctx)
   2671 {
   2672 	if (sctx->decompression_enabled)
   2673 		return;
   2674 
   2675 	if (sctx->framebuffer.state.zsbuf) {
   2676 		struct pipe_surface *surf = sctx->framebuffer.state.zsbuf;
   2677 		struct r600_texture *rtex = (struct r600_texture *)surf->texture;
   2678 
   2679 		rtex->dirty_level_mask |= 1 << surf->u.tex.level;
   2680 
   2681 		if (rtex->surface.has_stencil)
   2682 			rtex->stencil_dirty_level_mask |= 1 << surf->u.tex.level;
   2683 	}
   2684 
   2685 	unsigned compressed_cb_mask = sctx->framebuffer.compressed_cb_mask;
   2686 	while (compressed_cb_mask) {
   2687 		unsigned i = u_bit_scan(&compressed_cb_mask);
   2688 		struct pipe_surface *surf = sctx->framebuffer.state.cbufs[i];
   2689 		struct r600_texture *rtex = (struct r600_texture*)surf->texture;
   2690 
   2691 		if (rtex->fmask.size)
   2692 			rtex->dirty_level_mask |= 1 << surf->u.tex.level;
   2693 		if (rtex->dcc_gather_statistics)
   2694 			rtex->separate_dcc_dirty = true;
   2695 	}
   2696 }
   2697 
   2698 static void si_dec_framebuffer_counters(const struct pipe_framebuffer_state *state)
   2699 {
   2700 	for (int i = 0; i < state->nr_cbufs; ++i) {
   2701 		struct r600_surface *surf = NULL;
   2702 		struct r600_texture *rtex;
   2703 
   2704 		if (!state->cbufs[i])
   2705 			continue;
   2706 		surf = (struct r600_surface*)state->cbufs[i];
   2707 		rtex = (struct r600_texture*)surf->base.texture;
   2708 
   2709 		p_atomic_dec(&rtex->framebuffers_bound);
   2710 	}
   2711 }
   2712 
   2713 static void si_set_framebuffer_state(struct pipe_context *ctx,
   2714 				     const struct pipe_framebuffer_state *state)
   2715 {
   2716 	struct si_context *sctx = (struct si_context *)ctx;
   2717 	struct pipe_constant_buffer constbuf = {0};
   2718 	struct r600_surface *surf = NULL;
   2719 	struct r600_texture *rtex;
   2720 	bool old_any_dst_linear = sctx->framebuffer.any_dst_linear;
   2721 	unsigned old_nr_samples = sctx->framebuffer.nr_samples;
   2722 	unsigned old_colorbuf_enabled_4bit = sctx->framebuffer.colorbuf_enabled_4bit;
   2723 	bool old_has_zsbuf = !!sctx->framebuffer.state.zsbuf;
   2724 	bool old_has_stencil =
   2725 		old_has_zsbuf &&
   2726 		((struct r600_texture*)sctx->framebuffer.state.zsbuf->texture)->surface.has_stencil;
   2727 	bool unbound = false;
   2728 	int i;
   2729 
   2730 	si_update_fb_dirtiness_after_rendering(sctx);
   2731 
   2732 	for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) {
   2733 		if (!sctx->framebuffer.state.cbufs[i])
   2734 			continue;
   2735 
   2736 		rtex = (struct r600_texture*)sctx->framebuffer.state.cbufs[i]->texture;
   2737 		if (rtex->dcc_gather_statistics)
   2738 			vi_separate_dcc_stop_query(ctx, rtex);
   2739 	}
   2740 
   2741 	/* Disable DCC if the formats are incompatible. */
   2742 	for (i = 0; i < state->nr_cbufs; i++) {
   2743 		if (!state->cbufs[i])
   2744 			continue;
   2745 
   2746 		surf = (struct r600_surface*)state->cbufs[i];
   2747 		rtex = (struct r600_texture*)surf->base.texture;
   2748 
   2749 		if (!surf->dcc_incompatible)
   2750 			continue;
   2751 
   2752 		/* Since the DCC decompression calls back into set_framebuffer-
   2753 		 * _state, we need to unbind the framebuffer, so that
   2754 		 * vi_separate_dcc_stop_query isn't called twice with the same
   2755 		 * color buffer.
   2756 		 */
   2757 		if (!unbound) {
   2758 			util_copy_framebuffer_state(&sctx->framebuffer.state, NULL);
   2759 			unbound = true;
   2760 		}
   2761 
   2762 		if (vi_dcc_enabled(rtex, surf->base.u.tex.level))
   2763 			if (!si_texture_disable_dcc(&sctx->b, rtex))
   2764 				sctx->b.decompress_dcc(ctx, rtex);
   2765 
   2766 		surf->dcc_incompatible = false;
   2767 	}
   2768 
   2769 	/* Only flush TC when changing the framebuffer state, because
   2770 	 * the only client not using TC that can change textures is
   2771 	 * the framebuffer.
   2772 	 *
   2773 	 * Wait for compute shaders because of possible transitions:
   2774 	 * - FB write -> shader read
   2775 	 * - shader write -> FB read
   2776 	 *
   2777 	 * DB caches are flushed on demand (using si_decompress_textures).
   2778 	 *
   2779 	 * When MSAA is enabled, CB and TC caches are flushed on demand
   2780 	 * (after FMASK decompression). Shader write -> FB read transitions
   2781 	 * cannot happen for MSAA textures, because MSAA shader images are
   2782 	 * not supported.
   2783 	 *
   2784 	 * Only flush and wait for CB if there is actually a bound color buffer.
   2785 	 */
   2786 	if (sctx->framebuffer.nr_samples <= 1 &&
   2787 	    sctx->framebuffer.state.nr_cbufs)
   2788 		si_make_CB_shader_coherent(sctx, sctx->framebuffer.nr_samples,
   2789 					   sctx->framebuffer.CB_has_shader_readable_metadata);
   2790 
   2791 	sctx->b.flags |= SI_CONTEXT_CS_PARTIAL_FLUSH;
   2792 
   2793 	/* u_blitter doesn't invoke depth decompression when it does multiple
   2794 	 * blits in a row, but the only case when it matters for DB is when
   2795 	 * doing generate_mipmap. So here we flush DB manually between
   2796 	 * individual generate_mipmap blits.
   2797 	 * Note that lower mipmap levels aren't compressed.
   2798 	 */
   2799 	if (sctx->generate_mipmap_for_depth) {
   2800 		si_make_DB_shader_coherent(sctx, 1, false,
   2801 					   sctx->framebuffer.DB_has_shader_readable_metadata);
   2802 	} else if (sctx->b.chip_class == GFX9) {
   2803 		/* It appears that DB metadata "leaks" in a sequence of:
   2804 		 *  - depth clear
   2805 		 *  - DCC decompress for shader image writes (with DB disabled)
   2806 		 *  - render with DEPTH_BEFORE_SHADER=1
   2807 		 * Flushing DB metadata works around the problem.
   2808 		 */
   2809 		sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_DB_META;
   2810 	}
   2811 
   2812 	/* Take the maximum of the old and new count. If the new count is lower,
   2813 	 * dirtying is needed to disable the unbound colorbuffers.
   2814 	 */
   2815 	sctx->framebuffer.dirty_cbufs |=
   2816 		(1 << MAX2(sctx->framebuffer.state.nr_cbufs, state->nr_cbufs)) - 1;
   2817 	sctx->framebuffer.dirty_zsbuf |= sctx->framebuffer.state.zsbuf != state->zsbuf;
   2818 
   2819 	si_dec_framebuffer_counters(&sctx->framebuffer.state);
   2820 	util_copy_framebuffer_state(&sctx->framebuffer.state, state);
   2821 
   2822 	sctx->framebuffer.colorbuf_enabled_4bit = 0;
   2823 	sctx->framebuffer.spi_shader_col_format = 0;
   2824 	sctx->framebuffer.spi_shader_col_format_alpha = 0;
   2825 	sctx->framebuffer.spi_shader_col_format_blend = 0;
   2826 	sctx->framebuffer.spi_shader_col_format_blend_alpha = 0;
   2827 	sctx->framebuffer.color_is_int8 = 0;
   2828 	sctx->framebuffer.color_is_int10 = 0;
   2829 
   2830 	sctx->framebuffer.compressed_cb_mask = 0;
   2831 	sctx->framebuffer.nr_samples = util_framebuffer_get_num_samples(state);
   2832 	sctx->framebuffer.log_samples = util_logbase2(sctx->framebuffer.nr_samples);
   2833 	sctx->framebuffer.any_dst_linear = false;
   2834 	sctx->framebuffer.CB_has_shader_readable_metadata = false;
   2835 	sctx->framebuffer.DB_has_shader_readable_metadata = false;
   2836 
   2837 	for (i = 0; i < state->nr_cbufs; i++) {
   2838 		if (!state->cbufs[i])
   2839 			continue;
   2840 
   2841 		surf = (struct r600_surface*)state->cbufs[i];
   2842 		rtex = (struct r600_texture*)surf->base.texture;
   2843 
   2844 		if (!surf->color_initialized) {
   2845 			si_initialize_color_surface(sctx, surf);
   2846 		}
   2847 
   2848 		sctx->framebuffer.colorbuf_enabled_4bit |= 0xf << (i * 4);
   2849 		sctx->framebuffer.spi_shader_col_format |=
   2850 			surf->spi_shader_col_format << (i * 4);
   2851 		sctx->framebuffer.spi_shader_col_format_alpha |=
   2852 			surf->spi_shader_col_format_alpha << (i * 4);
   2853 		sctx->framebuffer.spi_shader_col_format_blend |=
   2854 			surf->spi_shader_col_format_blend << (i * 4);
   2855 		sctx->framebuffer.spi_shader_col_format_blend_alpha |=
   2856 			surf->spi_shader_col_format_blend_alpha << (i * 4);
   2857 
   2858 		if (surf->color_is_int8)
   2859 			sctx->framebuffer.color_is_int8 |= 1 << i;
   2860 		if (surf->color_is_int10)
   2861 			sctx->framebuffer.color_is_int10 |= 1 << i;
   2862 
   2863 		if (rtex->fmask.size) {
   2864 			sctx->framebuffer.compressed_cb_mask |= 1 << i;
   2865 		}
   2866 
   2867 		if (rtex->surface.is_linear)
   2868 			sctx->framebuffer.any_dst_linear = true;
   2869 
   2870 		if (vi_dcc_enabled(rtex, surf->base.u.tex.level))
   2871 			sctx->framebuffer.CB_has_shader_readable_metadata = true;
   2872 
   2873 		si_context_add_resource_size(ctx, surf->base.texture);
   2874 
   2875 		p_atomic_inc(&rtex->framebuffers_bound);
   2876 
   2877 		if (rtex->dcc_gather_statistics) {
   2878 			/* Dirty tracking must be enabled for DCC usage analysis. */
   2879 			sctx->framebuffer.compressed_cb_mask |= 1 << i;
   2880 			vi_separate_dcc_start_query(ctx, rtex);
   2881 		}
   2882 	}
   2883 
   2884 	struct r600_texture *zstex = NULL;
   2885 
   2886 	if (state->zsbuf) {
   2887 		surf = (struct r600_surface*)state->zsbuf;
   2888 		zstex = (struct r600_texture*)surf->base.texture;
   2889 
   2890 		if (!surf->depth_initialized) {
   2891 			si_init_depth_surface(sctx, surf);
   2892 		}
   2893 
   2894 		if (vi_tc_compat_htile_enabled(zstex, surf->base.u.tex.level))
   2895 			sctx->framebuffer.DB_has_shader_readable_metadata = true;
   2896 
   2897 		si_context_add_resource_size(ctx, surf->base.texture);
   2898 	}
   2899 
   2900 	si_update_poly_offset_state(sctx);
   2901 	si_mark_atom_dirty(sctx, &sctx->cb_render_state);
   2902 	si_mark_atom_dirty(sctx, &sctx->framebuffer.atom);
   2903 
   2904 	if (sctx->screen->dpbb_allowed)
   2905 		si_mark_atom_dirty(sctx, &sctx->dpbb_state);
   2906 
   2907 	if (sctx->framebuffer.any_dst_linear != old_any_dst_linear)
   2908 		si_mark_atom_dirty(sctx, &sctx->msaa_config);
   2909 
   2910 	if (sctx->screen->has_out_of_order_rast &&
   2911 	    (sctx->framebuffer.colorbuf_enabled_4bit != old_colorbuf_enabled_4bit ||
   2912 	     !!sctx->framebuffer.state.zsbuf != old_has_zsbuf ||
   2913 	     (zstex && zstex->surface.has_stencil != old_has_stencil)))
   2914 		si_mark_atom_dirty(sctx, &sctx->msaa_config);
   2915 
   2916 	if (sctx->framebuffer.nr_samples != old_nr_samples) {
   2917 		si_mark_atom_dirty(sctx, &sctx->msaa_config);
   2918 		si_mark_atom_dirty(sctx, &sctx->db_render_state);
   2919 
   2920 		/* Set sample locations as fragment shader constants. */
   2921 		switch (sctx->framebuffer.nr_samples) {
   2922 		case 1:
   2923 			constbuf.user_buffer = sctx->sample_locations_1x;
   2924 			break;
   2925 		case 2:
   2926 			constbuf.user_buffer = sctx->sample_locations_2x;
   2927 			break;
   2928 		case 4:
   2929 			constbuf.user_buffer = sctx->sample_locations_4x;
   2930 			break;
   2931 		case 8:
   2932 			constbuf.user_buffer = sctx->sample_locations_8x;
   2933 			break;
   2934 		case 16:
   2935 			constbuf.user_buffer = sctx->sample_locations_16x;
   2936 			break;
   2937 		default:
   2938 			R600_ERR("Requested an invalid number of samples %i.\n",
   2939 				 sctx->framebuffer.nr_samples);
   2940 			assert(0);
   2941 		}
   2942 		constbuf.buffer_size = sctx->framebuffer.nr_samples * 2 * 4;
   2943 		si_set_rw_buffer(sctx, SI_PS_CONST_SAMPLE_POSITIONS, &constbuf);
   2944 
   2945 		si_mark_atom_dirty(sctx, &sctx->msaa_sample_locs.atom);
   2946 	}
   2947 
   2948 	sctx->do_update_shaders = true;
   2949 
   2950 	if (!sctx->decompression_enabled) {
   2951 		/* Prevent textures decompression when the framebuffer state
   2952 		 * changes come from the decompression passes themselves.
   2953 		 */
   2954 		sctx->need_check_render_feedback = true;
   2955 	}
   2956 }
   2957 
   2958 static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom *atom)
   2959 {
   2960 	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
   2961 	struct pipe_framebuffer_state *state = &sctx->framebuffer.state;
   2962 	unsigned i, nr_cbufs = state->nr_cbufs;
   2963 	struct r600_texture *tex = NULL;
   2964 	struct r600_surface *cb = NULL;
   2965 	unsigned cb_color_info = 0;
   2966 
   2967 	/* Colorbuffers. */
   2968 	for (i = 0; i < nr_cbufs; i++) {
   2969 		uint64_t cb_color_base, cb_color_fmask, cb_dcc_base;
   2970 		unsigned cb_color_attrib;
   2971 
   2972 		if (!(sctx->framebuffer.dirty_cbufs & (1 << i)))
   2973 			continue;
   2974 
   2975 		cb = (struct r600_surface*)state->cbufs[i];
   2976 		if (!cb) {
   2977 			radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C,
   2978 					       S_028C70_FORMAT(V_028C70_COLOR_INVALID));
   2979 			continue;
   2980 		}
   2981 
   2982 		tex = (struct r600_texture *)cb->base.texture;
   2983 		radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
   2984 				      &tex->resource, RADEON_USAGE_READWRITE,
   2985 				      tex->resource.b.b.nr_samples > 1 ?
   2986 					      RADEON_PRIO_COLOR_BUFFER_MSAA :
   2987 					      RADEON_PRIO_COLOR_BUFFER);
   2988 
   2989 		if (tex->cmask_buffer && tex->cmask_buffer != &tex->resource) {
   2990 			radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
   2991 				tex->cmask_buffer, RADEON_USAGE_READWRITE,
   2992 				RADEON_PRIO_CMASK);
   2993 		}
   2994 
   2995 		if (tex->dcc_separate_buffer)
   2996 			radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
   2997 						  tex->dcc_separate_buffer,
   2998 						  RADEON_USAGE_READWRITE,
   2999 						  RADEON_PRIO_DCC);
   3000 
   3001 		/* Compute mutable surface parameters. */
   3002 		cb_color_base = tex->resource.gpu_address >> 8;
   3003 		cb_color_fmask = 0;
   3004 		cb_dcc_base = 0;
   3005 		cb_color_info = cb->cb_color_info | tex->cb_color_info;
   3006 		cb_color_attrib = cb->cb_color_attrib;
   3007 
   3008 		if (tex->fmask.size) {
   3009 			cb_color_fmask = (tex->resource.gpu_address + tex->fmask.offset) >> 8;
   3010 			cb_color_fmask |= tex->fmask.tile_swizzle;
   3011 		}
   3012 
   3013 		/* Set up DCC. */
   3014 		if (vi_dcc_enabled(tex, cb->base.u.tex.level)) {
   3015 			bool is_msaa_resolve_dst = state->cbufs[0] &&
   3016 						   state->cbufs[0]->texture->nr_samples > 1 &&
   3017 						   state->cbufs[1] == &cb->base &&
   3018 						   state->cbufs[1]->texture->nr_samples <= 1;
   3019 
   3020 			if (!is_msaa_resolve_dst)
   3021 				cb_color_info |= S_028C70_DCC_ENABLE(1);
   3022 
   3023 			cb_dcc_base = ((!tex->dcc_separate_buffer ? tex->resource.gpu_address : 0) +
   3024 				       tex->dcc_offset) >> 8;
   3025 			cb_dcc_base |= tex->surface.tile_swizzle;
   3026 		}
   3027 
   3028 		if (sctx->b.chip_class >= GFX9) {
   3029 			struct gfx9_surf_meta_flags meta;
   3030 
   3031 			if (tex->dcc_offset)
   3032 				meta = tex->surface.u.gfx9.dcc;
   3033 			else
   3034 				meta = tex->surface.u.gfx9.cmask;
   3035 
   3036 			/* Set mutable surface parameters. */
   3037 			cb_color_base += tex->surface.u.gfx9.surf_offset >> 8;
   3038 			cb_color_base |= tex->surface.tile_swizzle;
   3039 			if (!tex->fmask.size)
   3040 				cb_color_fmask = cb_color_base;
   3041 			cb_color_attrib |= S_028C74_COLOR_SW_MODE(tex->surface.u.gfx9.surf.swizzle_mode) |
   3042 					   S_028C74_FMASK_SW_MODE(tex->surface.u.gfx9.fmask.swizzle_mode) |
   3043 					   S_028C74_RB_ALIGNED(meta.rb_aligned) |
   3044 					   S_028C74_PIPE_ALIGNED(meta.pipe_aligned);
   3045 
   3046 			radeon_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 15);
   3047 			radeon_emit(cs, cb_color_base);		/* CB_COLOR0_BASE */
   3048 			radeon_emit(cs, cb_color_base >> 32);	/* CB_COLOR0_BASE_EXT */
   3049 			radeon_emit(cs, cb->cb_color_attrib2);	/* CB_COLOR0_ATTRIB2 */
   3050 			radeon_emit(cs, cb->cb_color_view);	/* CB_COLOR0_VIEW */
   3051 			radeon_emit(cs, cb_color_info);		/* CB_COLOR0_INFO */
   3052 			radeon_emit(cs, cb_color_attrib);	/* CB_COLOR0_ATTRIB */
   3053 			radeon_emit(cs, cb->cb_dcc_control);	/* CB_COLOR0_DCC_CONTROL */
   3054 			radeon_emit(cs, tex->cmask.base_address_reg); /* CB_COLOR0_CMASK */
   3055 			radeon_emit(cs, tex->cmask.base_address_reg >> 32); /* CB_COLOR0_CMASK_BASE_EXT */
   3056 			radeon_emit(cs, cb_color_fmask);	/* CB_COLOR0_FMASK */
   3057 			radeon_emit(cs, cb_color_fmask >> 32);	/* CB_COLOR0_FMASK_BASE_EXT */
   3058 			radeon_emit(cs, tex->color_clear_value[0]); /* CB_COLOR0_CLEAR_WORD0 */
   3059 			radeon_emit(cs, tex->color_clear_value[1]); /* CB_COLOR0_CLEAR_WORD1 */
   3060 			radeon_emit(cs, cb_dcc_base);		/* CB_COLOR0_DCC_BASE */
   3061 			radeon_emit(cs, cb_dcc_base >> 32);	/* CB_COLOR0_DCC_BASE_EXT */
   3062 
   3063 			radeon_set_context_reg(cs, R_0287A0_CB_MRT0_EPITCH + i * 4,
   3064 					       S_0287A0_EPITCH(tex->surface.u.gfx9.surf.epitch));
   3065 		} else {
   3066 			/* Compute mutable surface parameters (SI-CI-VI). */
   3067 			const struct legacy_surf_level *level_info =
   3068 				&tex->surface.u.legacy.level[cb->base.u.tex.level];
   3069 			unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
   3070 			unsigned cb_color_pitch, cb_color_slice, cb_color_fmask_slice;
   3071 
   3072 			cb_color_base += level_info->offset >> 8;
   3073 			/* Only macrotiled modes can set tile swizzle. */
   3074 			if (level_info->mode == RADEON_SURF_MODE_2D)
   3075 				cb_color_base |= tex->surface.tile_swizzle;
   3076 
   3077 			if (!tex->fmask.size)
   3078 				cb_color_fmask = cb_color_base;
   3079 			if (cb_dcc_base)
   3080 				cb_dcc_base += level_info->dcc_offset >> 8;
   3081 
   3082 			pitch_tile_max = level_info->nblk_x / 8 - 1;
   3083 			slice_tile_max = level_info->nblk_x *
   3084 					 level_info->nblk_y / 64 - 1;
   3085 			tile_mode_index = si_tile_mode_index(tex, cb->base.u.tex.level, false);
   3086 
   3087 			cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index);
   3088 			cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
   3089 			cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
   3090 
   3091 			if (tex->fmask.size) {
   3092 				if (sctx->b.chip_class >= CIK)
   3093 					cb_color_pitch |= S_028C64_FMASK_TILE_MAX(tex->fmask.pitch_in_pixels / 8 - 1);
   3094 				cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tex->fmask.tile_mode_index);
   3095 				cb_color_fmask_slice = S_028C88_TILE_MAX(tex->fmask.slice_tile_max);
   3096 			} else {
   3097 				/* This must be set for fast clear to work without FMASK. */
   3098 				if (sctx->b.chip_class >= CIK)
   3099 					cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
   3100 				cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
   3101 				cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
   3102 			}
   3103 
   3104 			radeon_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C,
   3105 						   sctx->b.chip_class >= VI ? 14 : 13);
   3106 			radeon_emit(cs, cb_color_base);		/* CB_COLOR0_BASE */
   3107 			radeon_emit(cs, cb_color_pitch);	/* CB_COLOR0_PITCH */
   3108 			radeon_emit(cs, cb_color_slice);	/* CB_COLOR0_SLICE */
   3109 			radeon_emit(cs, cb->cb_color_view);	/* CB_COLOR0_VIEW */
   3110 			radeon_emit(cs, cb_color_info);		/* CB_COLOR0_INFO */
   3111 			radeon_emit(cs, cb_color_attrib);	/* CB_COLOR0_ATTRIB */
   3112 			radeon_emit(cs, cb->cb_dcc_control);	/* CB_COLOR0_DCC_CONTROL */
   3113 			radeon_emit(cs, tex->cmask.base_address_reg);	/* CB_COLOR0_CMASK */
   3114 			radeon_emit(cs, tex->cmask.slice_tile_max);	/* CB_COLOR0_CMASK_SLICE */
   3115 			radeon_emit(cs, cb_color_fmask);		/* CB_COLOR0_FMASK */
   3116 			radeon_emit(cs, cb_color_fmask_slice);		/* CB_COLOR0_FMASK_SLICE */
   3117 			radeon_emit(cs, tex->color_clear_value[0]);	/* CB_COLOR0_CLEAR_WORD0 */
   3118 			radeon_emit(cs, tex->color_clear_value[1]);	/* CB_COLOR0_CLEAR_WORD1 */
   3119 
   3120 			if (sctx->b.chip_class >= VI) /* R_028C94_CB_COLOR0_DCC_BASE */
   3121 				radeon_emit(cs, cb_dcc_base);
   3122 		}
   3123 	}
   3124 	for (; i < 8 ; i++)
   3125 		if (sctx->framebuffer.dirty_cbufs & (1 << i))
   3126 			radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0);
   3127 
   3128 	/* ZS buffer. */
   3129 	if (state->zsbuf && sctx->framebuffer.dirty_zsbuf) {
   3130 		struct r600_surface *zb = (struct r600_surface*)state->zsbuf;
   3131 		struct r600_texture *rtex = (struct r600_texture*)zb->base.texture;
   3132 
   3133 		radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
   3134 				      &rtex->resource, RADEON_USAGE_READWRITE,
   3135 				      zb->base.texture->nr_samples > 1 ?
   3136 					      RADEON_PRIO_DEPTH_BUFFER_MSAA :
   3137 					      RADEON_PRIO_DEPTH_BUFFER);
   3138 
   3139 		if (sctx->b.chip_class >= GFX9) {
   3140 			radeon_set_context_reg_seq(cs, R_028014_DB_HTILE_DATA_BASE, 3);
   3141 			radeon_emit(cs, zb->db_htile_data_base);	/* DB_HTILE_DATA_BASE */
   3142 			radeon_emit(cs, zb->db_htile_data_base >> 32);	/* DB_HTILE_DATA_BASE_HI */
   3143 			radeon_emit(cs, zb->db_depth_size);		/* DB_DEPTH_SIZE */
   3144 
   3145 			radeon_set_context_reg_seq(cs, R_028038_DB_Z_INFO, 10);
   3146 			radeon_emit(cs, zb->db_z_info |			/* DB_Z_INFO */
   3147 				    S_028038_ZRANGE_PRECISION(rtex->depth_clear_value != 0));
   3148 			radeon_emit(cs, zb->db_stencil_info);		/* DB_STENCIL_INFO */
   3149 			radeon_emit(cs, zb->db_depth_base);		/* DB_Z_READ_BASE */
   3150 			radeon_emit(cs, zb->db_depth_base >> 32);	/* DB_Z_READ_BASE_HI */
   3151 			radeon_emit(cs, zb->db_stencil_base);		/* DB_STENCIL_READ_BASE */
   3152 			radeon_emit(cs, zb->db_stencil_base >> 32);	/* DB_STENCIL_READ_BASE_HI */
   3153 			radeon_emit(cs, zb->db_depth_base);		/* DB_Z_WRITE_BASE */
   3154 			radeon_emit(cs, zb->db_depth_base >> 32);	/* DB_Z_WRITE_BASE_HI */
   3155 			radeon_emit(cs, zb->db_stencil_base);		/* DB_STENCIL_WRITE_BASE */
   3156 			radeon_emit(cs, zb->db_stencil_base >> 32);	/* DB_STENCIL_WRITE_BASE_HI */
   3157 
   3158 			radeon_set_context_reg_seq(cs, R_028068_DB_Z_INFO2, 2);
   3159 			radeon_emit(cs, zb->db_z_info2);	/* DB_Z_INFO2 */
   3160 			radeon_emit(cs, zb->db_stencil_info2);	/* DB_STENCIL_INFO2 */
   3161 		} else {
   3162 			radeon_set_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, zb->db_htile_data_base);
   3163 
   3164 			radeon_set_context_reg_seq(cs, R_02803C_DB_DEPTH_INFO, 9);
   3165 			radeon_emit(cs, zb->db_depth_info);	/* DB_DEPTH_INFO */
   3166 			radeon_emit(cs, zb->db_z_info |		/* DB_Z_INFO */
   3167 				    S_028040_ZRANGE_PRECISION(rtex->depth_clear_value != 0));
   3168 			radeon_emit(cs, zb->db_stencil_info);	/* DB_STENCIL_INFO */
   3169 			radeon_emit(cs, zb->db_depth_base);	/* DB_Z_READ_BASE */
   3170 			radeon_emit(cs, zb->db_stencil_base);	/* DB_STENCIL_READ_BASE */
   3171 			radeon_emit(cs, zb->db_depth_base);	/* DB_Z_WRITE_BASE */
   3172 			radeon_emit(cs, zb->db_stencil_base);	/* DB_STENCIL_WRITE_BASE */
   3173 			radeon_emit(cs, zb->db_depth_size);	/* DB_DEPTH_SIZE */
   3174 			radeon_emit(cs, zb->db_depth_slice);	/* DB_DEPTH_SLICE */
   3175 		}
   3176 
   3177 		radeon_set_context_reg_seq(cs, R_028028_DB_STENCIL_CLEAR, 2);
   3178 		radeon_emit(cs, rtex->stencil_clear_value); /* R_028028_DB_STENCIL_CLEAR */
   3179 		radeon_emit(cs, fui(rtex->depth_clear_value)); /* R_02802C_DB_DEPTH_CLEAR */
   3180 
   3181 		radeon_set_context_reg(cs, R_028008_DB_DEPTH_VIEW, zb->db_depth_view);
   3182 		radeon_set_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, zb->db_htile_surface);
   3183 	} else if (sctx->framebuffer.dirty_zsbuf) {
   3184 		if (sctx->b.chip_class >= GFX9)
   3185 			radeon_set_context_reg_seq(cs, R_028038_DB_Z_INFO, 2);
   3186 		else
   3187 			radeon_set_context_reg_seq(cs, R_028040_DB_Z_INFO, 2);
   3188 
   3189 		radeon_emit(cs, S_028040_FORMAT(V_028040_Z_INVALID)); /* DB_Z_INFO */
   3190 		radeon_emit(cs, S_028044_FORMAT(V_028044_STENCIL_INVALID)); /* DB_STENCIL_INFO */
   3191 	}
   3192 
   3193 	/* Framebuffer dimensions. */
   3194         /* PA_SC_WINDOW_SCISSOR_TL is set in si_init_config() */
   3195 	radeon_set_context_reg(cs, R_028208_PA_SC_WINDOW_SCISSOR_BR,
   3196 			       S_028208_BR_X(state->width) | S_028208_BR_Y(state->height));
   3197 
   3198 	if (sctx->screen->dfsm_allowed) {
   3199 		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
   3200 		radeon_emit(cs, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0));
   3201 	}
   3202 
   3203 	sctx->framebuffer.dirty_cbufs = 0;
   3204 	sctx->framebuffer.dirty_zsbuf = false;
   3205 }
   3206 
   3207 static void si_emit_msaa_sample_locs(struct si_context *sctx,
   3208 				     struct r600_atom *atom)
   3209 {
   3210 	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
   3211 	unsigned nr_samples = sctx->framebuffer.nr_samples;
   3212 	bool has_msaa_sample_loc_bug = sctx->screen->has_msaa_sample_loc_bug;
   3213 
   3214 	/* Smoothing (only possible with nr_samples == 1) uses the same
   3215 	 * sample locations as the MSAA it simulates.
   3216 	 */
   3217 	if (nr_samples <= 1 && sctx->smoothing_enabled)
   3218 		nr_samples = SI_NUM_SMOOTH_AA_SAMPLES;
   3219 
   3220 	/* On Polaris, the small primitive filter uses the sample locations
   3221 	 * even when MSAA is off, so we need to make sure they're set to 0.
   3222 	 */
   3223 	if (has_msaa_sample_loc_bug)
   3224 		nr_samples = MAX2(nr_samples, 1);
   3225 
   3226 	if (nr_samples != sctx->msaa_sample_locs.nr_samples) {
   3227 		sctx->msaa_sample_locs.nr_samples = nr_samples;
   3228 		si_emit_sample_locations(cs, nr_samples);
   3229 	}
   3230 
   3231 	if (sctx->b.family >= CHIP_POLARIS10) {
   3232 		struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
   3233 		unsigned small_prim_filter_cntl =
   3234 			S_028830_SMALL_PRIM_FILTER_ENABLE(1) |
   3235 			/* line bug */
   3236 			S_028830_LINE_FILTER_DISABLE(sctx->b.family <= CHIP_POLARIS12);
   3237 
   3238 		/* The alternative of setting sample locations to 0 would
   3239 		 * require a DB flush to avoid Z errors, see
   3240 		 * https://bugs.freedesktop.org/show_bug.cgi?id=96908
   3241 		 */
   3242 		if (has_msaa_sample_loc_bug &&
   3243 		    sctx->framebuffer.nr_samples > 1 &&
   3244 		    rs && !rs->multisample_enable)
   3245 			small_prim_filter_cntl &= C_028830_SMALL_PRIM_FILTER_ENABLE;
   3246 
   3247 		radeon_set_context_reg(cs, R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL,
   3248 				       small_prim_filter_cntl);
   3249 	}
   3250 }
   3251 
   3252 static bool si_out_of_order_rasterization(struct si_context *sctx)
   3253 {
   3254 	struct si_state_blend *blend = sctx->queued.named.blend;
   3255 	struct si_state_dsa *dsa = sctx->queued.named.dsa;
   3256 
   3257 	if (!sctx->screen->has_out_of_order_rast)
   3258 		return false;
   3259 
   3260 	unsigned colormask = sctx->framebuffer.colorbuf_enabled_4bit;
   3261 
   3262 	if (blend) {
   3263 		colormask &= blend->cb_target_enabled_4bit;
   3264 	} else {
   3265 		colormask = 0;
   3266 	}
   3267 
   3268 	/* Conservative: No logic op. */
   3269 	if (colormask && blend->logicop_enable)
   3270 		return false;
   3271 
   3272 	struct si_dsa_order_invariance dsa_order_invariant = {
   3273 		.zs = true, .pass_set = true, .pass_last = false
   3274 	};
   3275 
   3276 	if (sctx->framebuffer.state.zsbuf) {
   3277 		struct r600_texture *zstex =
   3278 			(struct r600_texture*)sctx->framebuffer.state.zsbuf->texture;
   3279 		bool has_stencil = zstex->surface.has_stencil;
   3280 		dsa_order_invariant = dsa->order_invariance[has_stencil];
   3281 		if (!dsa_order_invariant.zs)
   3282 			return false;
   3283 
   3284 		/* The set of PS invocations is always order invariant,
   3285 		 * except when early Z/S tests are requested. */
   3286 		if (sctx->ps_shader.cso &&
   3287 		    sctx->ps_shader.cso->info.writes_memory &&
   3288 		    sctx->ps_shader.cso->info.properties[TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL] &&
   3289 		    !dsa_order_invariant.pass_set)
   3290 			return false;
   3291 
   3292 		if (sctx->b.num_perfect_occlusion_queries != 0 &&
   3293 		    !dsa_order_invariant.pass_set)
   3294 			return false;
   3295 	}
   3296 
   3297 	if (!colormask)
   3298 		return true;
   3299 
   3300 	unsigned blendmask = colormask & blend->blend_enable_4bit;
   3301 
   3302 	if (blendmask) {
   3303 		/* Only commutative blending. */
   3304 		if (blendmask & ~blend->commutative_4bit)
   3305 			return false;
   3306 
   3307 		if (!dsa_order_invariant.pass_set)
   3308 			return false;
   3309 	}
   3310 
   3311 	if (colormask & ~blendmask) {
   3312 		if (!dsa_order_invariant.pass_last)
   3313 			return false;
   3314 	}
   3315 
   3316 	return true;
   3317 }
   3318 
   3319 static void si_emit_msaa_config(struct si_context *sctx, struct r600_atom *atom)
   3320 {
   3321 	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
   3322 	unsigned num_tile_pipes = sctx->screen->info.num_tile_pipes;
   3323 	/* 33% faster rendering to linear color buffers */
   3324 	bool dst_is_linear = sctx->framebuffer.any_dst_linear;
   3325 	bool out_of_order_rast = si_out_of_order_rasterization(sctx);
   3326 	unsigned sc_mode_cntl_1 =
   3327 		S_028A4C_WALK_SIZE(dst_is_linear) |
   3328 		S_028A4C_WALK_FENCE_ENABLE(!dst_is_linear) |
   3329 		S_028A4C_WALK_FENCE_SIZE(num_tile_pipes == 2 ? 2 : 3) |
   3330 		S_028A4C_OUT_OF_ORDER_PRIMITIVE_ENABLE(out_of_order_rast) |
   3331 		S_028A4C_OUT_OF_ORDER_WATER_MARK(0x7) |
   3332 		/* always 1: */
   3333 		S_028A4C_WALK_ALIGN8_PRIM_FITS_ST(1) |
   3334 		S_028A4C_SUPERTILE_WALK_ORDER_ENABLE(1) |
   3335 		S_028A4C_TILE_WALK_ORDER_ENABLE(1) |
   3336 		S_028A4C_MULTI_SHADER_ENGINE_PRIM_DISCARD_ENABLE(1) |
   3337 		S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) |
   3338 		S_028A4C_FORCE_EOV_REZ_ENABLE(1);
   3339 
   3340 	int setup_samples = sctx->framebuffer.nr_samples > 1 ? sctx->framebuffer.nr_samples :
   3341 			    sctx->smoothing_enabled ? SI_NUM_SMOOTH_AA_SAMPLES : 0;
   3342 
   3343 	/* Required by OpenGL line rasterization.
   3344 	 *
   3345 	 * TODO: We should also enable perpendicular endcaps for AA lines,
   3346 	 *       but that requires implementing line stippling in the pixel
   3347 	 *       shader. SC can only do line stippling with axis-aligned
   3348 	 *       endcaps.
   3349 	 */
   3350 	unsigned sc_line_cntl = S_028BDC_DX10_DIAMOND_TEST_ENA(1);
   3351 
   3352 	if (setup_samples > 1) {
   3353 		/* distance from the pixel center, indexed by log2(nr_samples) */
   3354 		static unsigned max_dist[] = {
   3355 			0, /* unused */
   3356 			4, /* 2x MSAA */
   3357 			6, /* 4x MSAA */
   3358 			7, /* 8x MSAA */
   3359 			8, /* 16x MSAA */
   3360 		};
   3361 		unsigned log_samples = util_logbase2(setup_samples);
   3362 		unsigned log_ps_iter_samples =
   3363 			util_logbase2(util_next_power_of_two(sctx->ps_iter_samples));
   3364 
   3365 		radeon_set_context_reg_seq(cs, R_028BDC_PA_SC_LINE_CNTL, 2);
   3366 		radeon_emit(cs, sc_line_cntl |
   3367 			    S_028BDC_EXPAND_LINE_WIDTH(1)); /* CM_R_028BDC_PA_SC_LINE_CNTL */
   3368 		radeon_emit(cs, S_028BE0_MSAA_NUM_SAMPLES(log_samples) |
   3369 			    S_028BE0_MAX_SAMPLE_DIST(max_dist[log_samples]) |
   3370 			    S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples)); /* CM_R_028BE0_PA_SC_AA_CONFIG */
   3371 
   3372 		if (sctx->framebuffer.nr_samples > 1) {
   3373 			radeon_set_context_reg(cs, R_028804_DB_EQAA,
   3374 					       S_028804_MAX_ANCHOR_SAMPLES(log_samples) |
   3375 					       S_028804_PS_ITER_SAMPLES(log_ps_iter_samples) |
   3376 					       S_028804_MASK_EXPORT_NUM_SAMPLES(log_samples) |
   3377 					       S_028804_ALPHA_TO_MASK_NUM_SAMPLES(log_samples) |
   3378 					       S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
   3379 					       S_028804_STATIC_ANCHOR_ASSOCIATIONS(1));
   3380 			radeon_set_context_reg(cs, R_028A4C_PA_SC_MODE_CNTL_1,
   3381 					       S_028A4C_PS_ITER_SAMPLE(sctx->ps_iter_samples > 1) |
   3382 					       sc_mode_cntl_1);
   3383 		} else if (sctx->smoothing_enabled) {
   3384 			radeon_set_context_reg(cs, R_028804_DB_EQAA,
   3385 					       S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
   3386 					       S_028804_STATIC_ANCHOR_ASSOCIATIONS(1) |
   3387 					       S_028804_OVERRASTERIZATION_AMOUNT(log_samples));
   3388 			radeon_set_context_reg(cs, R_028A4C_PA_SC_MODE_CNTL_1,
   3389 					       sc_mode_cntl_1);
   3390 		}
   3391 	} else {
   3392 		radeon_set_context_reg_seq(cs, R_028BDC_PA_SC_LINE_CNTL, 2);
   3393 		radeon_emit(cs, sc_line_cntl); /* CM_R_028BDC_PA_SC_LINE_CNTL */
   3394 		radeon_emit(cs, 0); /* CM_R_028BE0_PA_SC_AA_CONFIG */
   3395 
   3396 		radeon_set_context_reg(cs, R_028804_DB_EQAA,
   3397 				       S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
   3398 				       S_028804_STATIC_ANCHOR_ASSOCIATIONS(1));
   3399 		radeon_set_context_reg(cs, R_028A4C_PA_SC_MODE_CNTL_1,
   3400 				       sc_mode_cntl_1);
   3401 	}
   3402 
   3403 	/* GFX9: Flush DFSM when the AA mode changes. */
   3404 	if (sctx->screen->dfsm_allowed) {
   3405 		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
   3406 		radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0));
   3407 	}
   3408 }
   3409 
   3410 static void si_set_min_samples(struct pipe_context *ctx, unsigned min_samples)
   3411 {
   3412 	struct si_context *sctx = (struct si_context *)ctx;
   3413 
   3414 	if (sctx->ps_iter_samples == min_samples)
   3415 		return;
   3416 
   3417 	sctx->ps_iter_samples = min_samples;
   3418 	sctx->do_update_shaders = true;
   3419 
   3420 	if (sctx->framebuffer.nr_samples > 1)
   3421 		si_mark_atom_dirty(sctx, &sctx->msaa_config);
   3422 	if (sctx->screen->dpbb_allowed)
   3423 		si_mark_atom_dirty(sctx, &sctx->dpbb_state);
   3424 }
   3425 
   3426 /*
   3427  * Samplers
   3428  */
   3429 
   3430 /**
   3431  * Build the sampler view descriptor for a buffer texture.
   3432  * @param state 256-bit descriptor; only the high 128 bits are filled in
   3433  */
   3434 void
   3435 si_make_buffer_descriptor(struct si_screen *screen, struct r600_resource *buf,
   3436 			  enum pipe_format format,
   3437 			  unsigned offset, unsigned size,
   3438 			  uint32_t *state)
   3439 {
   3440 	const struct util_format_description *desc;
   3441 	int first_non_void;
   3442 	unsigned stride;
   3443 	unsigned num_records;
   3444 	unsigned num_format, data_format;
   3445 
   3446 	desc = util_format_description(format);
   3447 	first_non_void = util_format_get_first_non_void_channel(format);
   3448 	stride = desc->block.bits / 8;
   3449 	num_format = si_translate_buffer_numformat(&screen->b, desc, first_non_void);
   3450 	data_format = si_translate_buffer_dataformat(&screen->b, desc, first_non_void);
   3451 
   3452 	num_records = size / stride;
   3453 	num_records = MIN2(num_records, (buf->b.b.width0 - offset) / stride);
   3454 
   3455 	/* The NUM_RECORDS field has a different meaning depending on the chip,
   3456 	 * instruction type, STRIDE, and SWIZZLE_ENABLE.
   3457 	 *
   3458 	 * SI-CIK:
   3459 	 * - If STRIDE == 0, it's in byte units.
   3460 	 * - If STRIDE != 0, it's in units of STRIDE, used with inst.IDXEN.
   3461 	 *
   3462 	 * VI:
   3463 	 * - For SMEM and STRIDE == 0, it's in byte units.
   3464 	 * - For SMEM and STRIDE != 0, it's in units of STRIDE.
   3465 	 * - For VMEM and STRIDE == 0 or SWIZZLE_ENABLE == 0, it's in byte units.
   3466 	 * - For VMEM and STRIDE != 0 and SWIZZLE_ENABLE == 1, it's in units of STRIDE.
   3467 	 * NOTE: There is incompatibility between VMEM and SMEM opcodes due to SWIZZLE_-
   3468 	 *       ENABLE. The workaround is to set STRIDE = 0 if SWIZZLE_ENABLE == 0 when
   3469 	 *       using SMEM. This can be done in the shader by clearing STRIDE with s_and.
   3470 	 *       That way the same descriptor can be used by both SMEM and VMEM.
   3471 	 *
   3472 	 * GFX9:
   3473 	 * - For SMEM and STRIDE == 0, it's in byte units.
   3474 	 * - For SMEM and STRIDE != 0, it's in units of STRIDE.
   3475 	 * - For VMEM and inst.IDXEN == 0 or STRIDE == 0, it's in byte units.
   3476 	 * - For VMEM and inst.IDXEN == 1 and STRIDE != 0, it's in units of STRIDE.
   3477 	 */
   3478 	if (screen->info.chip_class >= GFX9)
   3479 		/* When vindex == 0, LLVM sets IDXEN = 0, thus changing units
   3480 		 * from STRIDE to bytes. This works around it by setting
   3481 		 * NUM_RECORDS to at least the size of one element, so that
   3482 		 * the first element is readable when IDXEN == 0.
   3483 		 *
   3484 		 * TODO: Fix this in LLVM, but do we need a new intrinsic where
   3485 		 *       IDXEN is enforced?
   3486 		 */
   3487 		num_records = num_records ? MAX2(num_records, stride) : 0;
   3488 	else if (screen->info.chip_class == VI)
   3489 		num_records *= stride;
   3490 
   3491 	state[4] = 0;
   3492 	state[5] = S_008F04_STRIDE(stride);
   3493 	state[6] = num_records;
   3494 	state[7] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) |
   3495 		   S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) |
   3496 		   S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) |
   3497 		   S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) |
   3498 		   S_008F0C_NUM_FORMAT(num_format) |
   3499 		   S_008F0C_DATA_FORMAT(data_format);
   3500 }
   3501 
   3502 static unsigned gfx9_border_color_swizzle(const unsigned char swizzle[4])
   3503 {
   3504 	unsigned bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
   3505 
   3506 	if (swizzle[3] == PIPE_SWIZZLE_X) {
   3507 		/* For the pre-defined border color values (white, opaque
   3508 		 * black, transparent black), the only thing that matters is
   3509 		 * that the alpha channel winds up in the correct place
   3510 		 * (because the RGB channels are all the same) so either of
   3511 		 * these enumerations will work.
   3512 		 */
   3513 		if (swizzle[2] == PIPE_SWIZZLE_Y)
   3514 			bc_swizzle = V_008F20_BC_SWIZZLE_WZYX;
   3515 		else
   3516 			bc_swizzle = V_008F20_BC_SWIZZLE_WXYZ;
   3517 	} else if (swizzle[0] == PIPE_SWIZZLE_X) {
   3518 		if (swizzle[1] == PIPE_SWIZZLE_Y)
   3519 			bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
   3520 		else
   3521 			bc_swizzle = V_008F20_BC_SWIZZLE_XWYZ;
   3522 	} else if (swizzle[1] == PIPE_SWIZZLE_X) {
   3523 		bc_swizzle = V_008F20_BC_SWIZZLE_YXWZ;
   3524 	} else if (swizzle[2] == PIPE_SWIZZLE_X) {
   3525 		bc_swizzle = V_008F20_BC_SWIZZLE_ZYXW;
   3526 	}
   3527 
   3528 	return bc_swizzle;
   3529 }
   3530 
   3531 /**
   3532  * Build the sampler view descriptor for a texture.
   3533  */
   3534 void
   3535 si_make_texture_descriptor(struct si_screen *screen,
   3536 			   struct r600_texture *tex,
   3537 			   bool sampler,
   3538 			   enum pipe_texture_target target,
   3539 			   enum pipe_format pipe_format,
   3540 			   const unsigned char state_swizzle[4],
   3541 			   unsigned first_level, unsigned last_level,
   3542 			   unsigned first_layer, unsigned last_layer,
   3543 			   unsigned width, unsigned height, unsigned depth,
   3544 			   uint32_t *state,
   3545 			   uint32_t *fmask_state)
   3546 {
   3547 	struct pipe_resource *res = &tex->resource.b.b;
   3548 	const struct util_format_description *desc;
   3549 	unsigned char swizzle[4];
   3550 	int first_non_void;
   3551 	unsigned num_format, data_format, type;
   3552 	uint64_t va;
   3553 
   3554 	desc = util_format_description(pipe_format);
   3555 
   3556 	if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
   3557 		const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0};
   3558 		const unsigned char swizzle_yyyy[4] = {1, 1, 1, 1};
   3559 		const unsigned char swizzle_wwww[4] = {3, 3, 3, 3};
   3560 
   3561 		switch (pipe_format) {
   3562 		case PIPE_FORMAT_S8_UINT_Z24_UNORM:
   3563 		case PIPE_FORMAT_X32_S8X24_UINT:
   3564 		case PIPE_FORMAT_X8Z24_UNORM:
   3565 			util_format_compose_swizzles(swizzle_yyyy, state_swizzle, swizzle);
   3566 			break;
   3567 		case PIPE_FORMAT_X24S8_UINT:
   3568 			/*
   3569 			 * X24S8 is implemented as an 8_8_8_8 data format, to
   3570 			 * fix texture gathers. This affects at least
   3571 			 * GL45-CTS.texture_cube_map_array.sampling on VI.
   3572 			 */
   3573 			util_format_compose_swizzles(swizzle_wwww, state_swizzle, swizzle);
   3574 			break;
   3575 		default:
   3576 			util_format_compose_swizzles(swizzle_xxxx, state_swizzle, swizzle);
   3577 		}
   3578 	} else {
   3579 		util_format_compose_swizzles(desc->swizzle, state_swizzle, swizzle);
   3580 	}
   3581 
   3582 	first_non_void = util_format_get_first_non_void_channel(pipe_format);
   3583 
   3584 	switch (pipe_format) {
   3585 	case PIPE_FORMAT_S8_UINT_Z24_UNORM:
   3586 		num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
   3587 		break;
   3588 	default:
   3589 		if (first_non_void < 0) {
   3590 			if (util_format_is_compressed(pipe_format)) {
   3591 				switch (pipe_format) {
   3592 				case PIPE_FORMAT_DXT1_SRGB:
   3593 				case PIPE_FORMAT_DXT1_SRGBA:
   3594 				case PIPE_FORMAT_DXT3_SRGBA:
   3595 				case PIPE_FORMAT_DXT5_SRGBA:
   3596 				case PIPE_FORMAT_BPTC_SRGBA:
   3597 				case PIPE_FORMAT_ETC2_SRGB8:
   3598 				case PIPE_FORMAT_ETC2_SRGB8A1:
   3599 				case PIPE_FORMAT_ETC2_SRGBA8:
   3600 					num_format = V_008F14_IMG_NUM_FORMAT_SRGB;
   3601 					break;
   3602 				case PIPE_FORMAT_RGTC1_SNORM:
   3603 				case PIPE_FORMAT_LATC1_SNORM:
   3604 				case PIPE_FORMAT_RGTC2_SNORM:
   3605 				case PIPE_FORMAT_LATC2_SNORM:
   3606 				case PIPE_FORMAT_ETC2_R11_SNORM:
   3607 				case PIPE_FORMAT_ETC2_RG11_SNORM:
   3608 				/* implies float, so use SNORM/UNORM to determine
   3609 				   whether data is signed or not */
   3610 				case PIPE_FORMAT_BPTC_RGB_FLOAT:
   3611 					num_format = V_008F14_IMG_NUM_FORMAT_SNORM;
   3612 					break;
   3613 				default:
   3614 					num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
   3615 					break;
   3616 				}
   3617 			} else if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
   3618 				num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
   3619 			} else {
   3620 				num_format = V_008F14_IMG_NUM_FORMAT_FLOAT;
   3621 			}
   3622 		} else if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
   3623 			num_format = V_008F14_IMG_NUM_FORMAT_SRGB;
   3624 		} else {
   3625 			num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
   3626 
   3627 			switch (desc->channel[first_non_void].type) {
   3628 			case UTIL_FORMAT_TYPE_FLOAT:
   3629 				num_format = V_008F14_IMG_NUM_FORMAT_FLOAT;
   3630 				break;
   3631 			case UTIL_FORMAT_TYPE_SIGNED:
   3632 				if (desc->channel[first_non_void].normalized)
   3633 					num_format = V_008F14_IMG_NUM_FORMAT_SNORM;
   3634 				else if (desc->channel[first_non_void].pure_integer)
   3635 					num_format = V_008F14_IMG_NUM_FORMAT_SINT;
   3636 				else
   3637 					num_format = V_008F14_IMG_NUM_FORMAT_SSCALED;
   3638 				break;
   3639 			case UTIL_FORMAT_TYPE_UNSIGNED:
   3640 				if (desc->channel[first_non_void].normalized)
   3641 					num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
   3642 				else if (desc->channel[first_non_void].pure_integer)
   3643 					num_format = V_008F14_IMG_NUM_FORMAT_UINT;
   3644 				else
   3645 					num_format = V_008F14_IMG_NUM_FORMAT_USCALED;
   3646 			}
   3647 		}
   3648 	}
   3649 
   3650 	data_format = si_translate_texformat(&screen->b, pipe_format, desc, first_non_void);
   3651 	if (data_format == ~0) {
   3652 		data_format = 0;
   3653 	}
   3654 
   3655 	/* S8 with Z32 HTILE needs a special format. */
   3656 	if (screen->info.chip_class >= GFX9 &&
   3657 	    pipe_format == PIPE_FORMAT_S8_UINT &&
   3658 	    tex->tc_compatible_htile)
   3659 		data_format = V_008F14_IMG_DATA_FORMAT_S8_32;
   3660 
   3661 	if (!sampler &&
   3662 	    (res->target == PIPE_TEXTURE_CUBE ||
   3663 	     res->target == PIPE_TEXTURE_CUBE_ARRAY ||
   3664 	     (screen->info.chip_class <= VI &&
   3665 	      res->target == PIPE_TEXTURE_3D))) {
   3666 		/* For the purpose of shader images, treat cube maps and 3D
   3667 		 * textures as 2D arrays. For 3D textures, the address
   3668 		 * calculations for mipmaps are different, so we rely on the
   3669 		 * caller to effectively disable mipmaps.
   3670 		 */
   3671 		type = V_008F1C_SQ_RSRC_IMG_2D_ARRAY;
   3672 
   3673 		assert(res->target != PIPE_TEXTURE_3D || (first_level == 0 && last_level == 0));
   3674 	} else {
   3675 		type = si_tex_dim(screen, tex, target, res->nr_samples);
   3676 	}
   3677 
   3678 	if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
   3679 	        height = 1;
   3680 		depth = res->array_size;
   3681 	} else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY ||
   3682 		   type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
   3683 		if (sampler || res->target != PIPE_TEXTURE_3D)
   3684 			depth = res->array_size;
   3685 	} else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
   3686 		depth = res->array_size / 6;
   3687 
   3688 	state[0] = 0;
   3689 	state[1] = (S_008F14_DATA_FORMAT_GFX6(data_format) |
   3690 		    S_008F14_NUM_FORMAT_GFX6(num_format));
   3691 	state[2] = (S_008F18_WIDTH(width - 1) |
   3692 		    S_008F18_HEIGHT(height - 1) |
   3693 		    S_008F18_PERF_MOD(4));
   3694 	state[3] = (S_008F1C_DST_SEL_X(si_map_swizzle(swizzle[0])) |
   3695 		    S_008F1C_DST_SEL_Y(si_map_swizzle(swizzle[1])) |
   3696 		    S_008F1C_DST_SEL_Z(si_map_swizzle(swizzle[2])) |
   3697 		    S_008F1C_DST_SEL_W(si_map_swizzle(swizzle[3])) |
   3698 		    S_008F1C_BASE_LEVEL(res->nr_samples > 1 ?
   3699 					0 : first_level) |
   3700 		    S_008F1C_LAST_LEVEL(res->nr_samples > 1 ?
   3701 					util_logbase2(res->nr_samples) :
   3702 					last_level) |
   3703 		    S_008F1C_TYPE(type));
   3704 	state[4] = 0;
   3705 	state[5] = S_008F24_BASE_ARRAY(first_layer);
   3706 	state[6] = 0;
   3707 	state[7] = 0;
   3708 
   3709 	if (screen->info.chip_class >= GFX9) {
   3710 		unsigned bc_swizzle = gfx9_border_color_swizzle(desc->swizzle);
   3711 
   3712 		/* Depth is the the last accessible layer on Gfx9.
   3713 		 * The hw doesn't need to know the total number of layers.
   3714 		 */
   3715 		if (type == V_008F1C_SQ_RSRC_IMG_3D)
   3716 			state[4] |= S_008F20_DEPTH(depth - 1);
   3717 		else
   3718 			state[4] |= S_008F20_DEPTH(last_layer);
   3719 
   3720 		state[4] |= S_008F20_BC_SWIZZLE(bc_swizzle);
   3721 		state[5] |= S_008F24_MAX_MIP(res->nr_samples > 1 ?
   3722 					     util_logbase2(res->nr_samples) :
   3723 					     tex->resource.b.b.last_level);
   3724 	} else {
   3725 		state[3] |= S_008F1C_POW2_PAD(res->last_level > 0);
   3726 		state[4] |= S_008F20_DEPTH(depth - 1);
   3727 		state[5] |= S_008F24_LAST_ARRAY(last_layer);
   3728 	}
   3729 
   3730 	if (tex->dcc_offset) {
   3731 		unsigned swap = si_translate_colorswap(pipe_format, false);
   3732 
   3733 		state[6] = S_008F28_ALPHA_IS_ON_MSB(swap <= 1);
   3734 	} else {
   3735 		/* The last dword is unused by hw. The shader uses it to clear
   3736 		 * bits in the first dword of sampler state.
   3737 		 */
   3738 		if (screen->info.chip_class <= CIK && res->nr_samples <= 1) {
   3739 			if (first_level == last_level)
   3740 				state[7] = C_008F30_MAX_ANISO_RATIO;
   3741 			else
   3742 				state[7] = 0xffffffff;
   3743 		}
   3744 	}
   3745 
   3746 	/* Initialize the sampler view for FMASK. */
   3747 	if (tex->fmask.size) {
   3748 		uint32_t data_format, num_format;
   3749 
   3750 		va = tex->resource.gpu_address + tex->fmask.offset;
   3751 
   3752 		if (screen->info.chip_class >= GFX9) {
   3753 			data_format = V_008F14_IMG_DATA_FORMAT_FMASK;
   3754 			switch (res->nr_samples) {
   3755 			case 2:
   3756 				num_format = V_008F14_IMG_FMASK_8_2_2;
   3757 				break;
   3758 			case 4:
   3759 				num_format = V_008F14_IMG_FMASK_8_4_4;
   3760 				break;
   3761 			case 8:
   3762 				num_format = V_008F14_IMG_FMASK_32_8_8;
   3763 				break;
   3764 			default:
   3765 				unreachable("invalid nr_samples");
   3766 			}
   3767 		} else {
   3768 			switch (res->nr_samples) {
   3769 			case 2:
   3770 				data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2;
   3771 				break;
   3772 			case 4:
   3773 				data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4;
   3774 				break;
   3775 			case 8:
   3776 				data_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8;
   3777 				break;
   3778 			default:
   3779 				unreachable("invalid nr_samples");
   3780 			}
   3781 			num_format = V_008F14_IMG_NUM_FORMAT_UINT;
   3782 		}
   3783 
   3784 		fmask_state[0] = (va >> 8) | tex->fmask.tile_swizzle;
   3785 		fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) |
   3786 				 S_008F14_DATA_FORMAT_GFX6(data_format) |
   3787 				 S_008F14_NUM_FORMAT_GFX6(num_format);
   3788 		fmask_state[2] = S_008F18_WIDTH(width - 1) |
   3789 				 S_008F18_HEIGHT(height - 1);
   3790 		fmask_state[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) |
   3791 				 S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
   3792 				 S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
   3793 				 S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
   3794 				 S_008F1C_TYPE(si_tex_dim(screen, tex, target, 0));
   3795 		fmask_state[4] = 0;
   3796 		fmask_state[5] = S_008F24_BASE_ARRAY(first_layer);
   3797 		fmask_state[6] = 0;
   3798 		fmask_state[7] = 0;
   3799 
   3800 		if (screen->info.chip_class >= GFX9) {
   3801 			fmask_state[3] |= S_008F1C_SW_MODE(tex->surface.u.gfx9.fmask.swizzle_mode);
   3802 			fmask_state[4] |= S_008F20_DEPTH(last_layer) |
   3803 					  S_008F20_PITCH_GFX9(tex->surface.u.gfx9.fmask.epitch);
   3804 			fmask_state[5] |= S_008F24_META_PIPE_ALIGNED(tex->surface.u.gfx9.cmask.pipe_aligned) |
   3805 					  S_008F24_META_RB_ALIGNED(tex->surface.u.gfx9.cmask.rb_aligned);
   3806 		} else {
   3807 			fmask_state[3] |= S_008F1C_TILING_INDEX(tex->fmask.tile_mode_index);
   3808 			fmask_state[4] |= S_008F20_DEPTH(depth - 1) |
   3809 					  S_008F20_PITCH_GFX6(tex->fmask.pitch_in_pixels - 1);
   3810 			fmask_state[5] |= S_008F24_LAST_ARRAY(last_layer);
   3811 		}
   3812 	}
   3813 }
   3814 
   3815 /**
   3816  * Create a sampler view.
   3817  *
   3818  * @param ctx		context
   3819  * @param texture	texture
   3820  * @param state		sampler view template
   3821  * @param width0	width0 override (for compressed textures as int)
   3822  * @param height0	height0 override (for compressed textures as int)
   3823  * @param force_level   set the base address to the level (for compressed textures)
   3824  */
   3825 struct pipe_sampler_view *
   3826 si_create_sampler_view_custom(struct pipe_context *ctx,
   3827 			      struct pipe_resource *texture,
   3828 			      const struct pipe_sampler_view *state,
   3829 			      unsigned width0, unsigned height0,
   3830 			      unsigned force_level)
   3831 {
   3832 	struct si_context *sctx = (struct si_context*)ctx;
   3833 	struct si_sampler_view *view = CALLOC_STRUCT(si_sampler_view);
   3834 	struct r600_texture *tmp = (struct r600_texture*)texture;
   3835 	unsigned base_level, first_level, last_level;
   3836 	unsigned char state_swizzle[4];
   3837 	unsigned height, depth, width;
   3838 	unsigned last_layer = state->u.tex.last_layer;
   3839 	enum pipe_format pipe_format;
   3840 	const struct legacy_surf_level *surflevel;
   3841 
   3842 	if (!view)
   3843 		return NULL;
   3844 
   3845 	/* initialize base object */
   3846 	view->base = *state;
   3847 	view->base.texture = NULL;
   3848 	view->base.reference.count = 1;
   3849 	view->base.context = ctx;
   3850 
   3851 	assert(texture);
   3852 	pipe_resource_reference(&view->base.texture, texture);
   3853 
   3854 	if (state->format == PIPE_FORMAT_X24S8_UINT ||
   3855 	    state->format == PIPE_FORMAT_S8X24_UINT ||
   3856 	    state->format == PIPE_FORMAT_X32_S8X24_UINT ||
   3857 	    state->format == PIPE_FORMAT_S8_UINT)
   3858 		view->is_stencil_sampler = true;
   3859 
   3860 	/* Buffer resource. */
   3861 	if (texture->target == PIPE_BUFFER) {
   3862 		si_make_buffer_descriptor(sctx->screen,
   3863 					  (struct r600_resource *)texture,
   3864 					  state->format,
   3865 					  state->u.buf.offset,
   3866 					  state->u.buf.size,
   3867 					  view->state);
   3868 		return &view->base;
   3869 	}
   3870 
   3871 	state_swizzle[0] = state->swizzle_r;
   3872 	state_swizzle[1] = state->swizzle_g;
   3873 	state_swizzle[2] = state->swizzle_b;
   3874 	state_swizzle[3] = state->swizzle_a;
   3875 
   3876 	base_level = 0;
   3877 	first_level = state->u.tex.first_level;
   3878 	last_level = state->u.tex.last_level;
   3879 	width = width0;
   3880 	height = height0;
   3881 	depth = texture->depth0;
   3882 
   3883 	if (sctx->b.chip_class <= VI && force_level) {
   3884 		assert(force_level == first_level &&
   3885 		       force_level == last_level);
   3886 		base_level = force_level;
   3887 		first_level = 0;
   3888 		last_level = 0;
   3889 		width = u_minify(width, force_level);
   3890 		height = u_minify(height, force_level);
   3891 		depth = u_minify(depth, force_level);
   3892 	}
   3893 
   3894 	/* This is not needed if state trackers set last_layer correctly. */
   3895 	if (state->target == PIPE_TEXTURE_1D ||
   3896 	    state->target == PIPE_TEXTURE_2D ||
   3897 	    state->target == PIPE_TEXTURE_RECT ||
   3898 	    state->target == PIPE_TEXTURE_CUBE)
   3899 		last_layer = state->u.tex.first_layer;
   3900 
   3901 	/* Texturing with separate depth and stencil. */
   3902 	pipe_format = state->format;
   3903 
   3904 	/* Depth/stencil texturing sometimes needs separate texture. */
   3905 	if (tmp->is_depth && !si_can_sample_zs(tmp, view->is_stencil_sampler)) {
   3906 		if (!tmp->flushed_depth_texture &&
   3907 		    !si_init_flushed_depth_texture(ctx, texture, NULL)) {
   3908 			pipe_resource_reference(&view->base.texture, NULL);
   3909 			FREE(view);
   3910 			return NULL;
   3911 		}
   3912 
   3913 		assert(tmp->flushed_depth_texture);
   3914 
   3915 		/* Override format for the case where the flushed texture
   3916 		 * contains only Z or only S.
   3917 		 */
   3918 		if (tmp->flushed_depth_texture->resource.b.b.format != tmp->resource.b.b.format)
   3919 			pipe_format = tmp->flushed_depth_texture->resource.b.b.format;
   3920 
   3921 		tmp = tmp->flushed_depth_texture;
   3922 	}
   3923 
   3924 	surflevel = tmp->surface.u.legacy.level;
   3925 
   3926 	if (tmp->db_compatible) {
   3927 		if (!view->is_stencil_sampler)
   3928 			pipe_format = tmp->db_render_format;
   3929 
   3930 		switch (pipe_format) {
   3931 		case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
   3932 			pipe_format = PIPE_FORMAT_Z32_FLOAT;
   3933 			break;
   3934 		case PIPE_FORMAT_X8Z24_UNORM:
   3935 		case PIPE_FORMAT_S8_UINT_Z24_UNORM:
   3936 			/* Z24 is always stored like this for DB
   3937 			 * compatibility.
   3938 			 */
   3939 			pipe_format = PIPE_FORMAT_Z24X8_UNORM;
   3940 			break;
   3941 		case PIPE_FORMAT_X24S8_UINT:
   3942 		case PIPE_FORMAT_S8X24_UINT:
   3943 		case PIPE_FORMAT_X32_S8X24_UINT:
   3944 			pipe_format = PIPE_FORMAT_S8_UINT;
   3945 			surflevel = tmp->surface.u.legacy.stencil_level;
   3946 			break;
   3947 		default:;
   3948 		}
   3949 	}
   3950 
   3951 	view->dcc_incompatible =
   3952 		vi_dcc_formats_are_incompatible(texture,
   3953 						state->u.tex.first_level,
   3954 						state->format);
   3955 
   3956 	si_make_texture_descriptor(sctx->screen, tmp, true,
   3957 				   state->target, pipe_format, state_swizzle,
   3958 				   first_level, last_level,
   3959 				   state->u.tex.first_layer, last_layer,
   3960 				   width, height, depth,
   3961 				   view->state, view->fmask_state);
   3962 
   3963 	unsigned num_format = G_008F14_NUM_FORMAT_GFX6(view->state[1]);
   3964 	view->is_integer =
   3965 		num_format == V_008F14_IMG_NUM_FORMAT_USCALED ||
   3966 		num_format == V_008F14_IMG_NUM_FORMAT_SSCALED ||
   3967 		num_format == V_008F14_IMG_NUM_FORMAT_UINT ||
   3968 		num_format == V_008F14_IMG_NUM_FORMAT_SINT;
   3969 	view->base_level_info = &surflevel[base_level];
   3970 	view->base_level = base_level;
   3971 	view->block_width = util_format_get_blockwidth(pipe_format);
   3972 	return &view->base;
   3973 }
   3974 
   3975 static struct pipe_sampler_view *
   3976 si_create_sampler_view(struct pipe_context *ctx,
   3977 		       struct pipe_resource *texture,
   3978 		       const struct pipe_sampler_view *state)
   3979 {
   3980 	return si_create_sampler_view_custom(ctx, texture, state,
   3981 					     texture ? texture->width0 : 0,
   3982 					     texture ? texture->height0 : 0, 0);
   3983 }
   3984 
   3985 static void si_sampler_view_destroy(struct pipe_context *ctx,
   3986 				    struct pipe_sampler_view *state)
   3987 {
   3988 	struct si_sampler_view *view = (struct si_sampler_view *)state;
   3989 
   3990 	pipe_resource_reference(&state->texture, NULL);
   3991 	FREE(view);
   3992 }
   3993 
   3994 static bool wrap_mode_uses_border_color(unsigned wrap, bool linear_filter)
   3995 {
   3996 	return wrap == PIPE_TEX_WRAP_CLAMP_TO_BORDER ||
   3997 	       wrap == PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER ||
   3998 	       (linear_filter &&
   3999 	        (wrap == PIPE_TEX_WRAP_CLAMP ||
   4000 		 wrap == PIPE_TEX_WRAP_MIRROR_CLAMP));
   4001 }
   4002 
   4003 static uint32_t si_translate_border_color(struct si_context *sctx,
   4004 					  const struct pipe_sampler_state *state,
   4005 					  const union pipe_color_union *color,
   4006 					  bool is_integer)
   4007 {
   4008 	bool linear_filter = state->min_img_filter != PIPE_TEX_FILTER_NEAREST ||
   4009 			     state->mag_img_filter != PIPE_TEX_FILTER_NEAREST;
   4010 
   4011 	if (!wrap_mode_uses_border_color(state->wrap_s, linear_filter) &&
   4012 	    !wrap_mode_uses_border_color(state->wrap_t, linear_filter) &&
   4013 	    !wrap_mode_uses_border_color(state->wrap_r, linear_filter))
   4014 		return S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK);
   4015 
   4016 #define simple_border_types(elt) \
   4017 do { \
   4018 	if (color->elt[0] == 0 && color->elt[1] == 0 &&                         \
   4019 	    color->elt[2] == 0 && color->elt[3] == 0)                           \
   4020 		return S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK); \
   4021 	if (color->elt[0] == 0 && color->elt[1] == 0 &&                         \
   4022 	    color->elt[2] == 0 && color->elt[3] == 1)                           \
   4023 		return S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK); \
   4024 	if (color->elt[0] == 1 && color->elt[1] == 1 &&                         \
   4025 	    color->elt[2] == 1 && color->elt[3] == 1)                           \
   4026 		return S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE); \
   4027 } while (false)
   4028 
   4029 	if (is_integer)
   4030 		simple_border_types(ui);
   4031 	else
   4032 		simple_border_types(f);
   4033 
   4034 #undef simple_border_types
   4035 
   4036 	int i;
   4037 
   4038 	/* Check if the border has been uploaded already. */
   4039 	for (i = 0; i < sctx->border_color_count; i++)
   4040 		if (memcmp(&sctx->border_color_table[i], color,
   4041 			   sizeof(*color)) == 0)
   4042 			break;
   4043 
   4044 	if (i >= SI_MAX_BORDER_COLORS) {
   4045 		/* Getting 4096 unique border colors is very unlikely. */
   4046 		fprintf(stderr, "radeonsi: The border color table is full. "
   4047 			"Any new border colors will be just black. "
   4048 			"Please file a bug.\n");
   4049 		return S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK);
   4050 	}
   4051 
   4052 	if (i == sctx->border_color_count) {
   4053 		/* Upload a new border color. */
   4054 		memcpy(&sctx->border_color_table[i], color,
   4055 		       sizeof(*color));
   4056 		util_memcpy_cpu_to_le32(&sctx->border_color_map[i],
   4057 					color, sizeof(*color));
   4058 		sctx->border_color_count++;
   4059 	}
   4060 
   4061 	return S_008F3C_BORDER_COLOR_PTR(i) |
   4062 	       S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER);
   4063 }
   4064 
   4065 static inline int S_FIXED(float value, unsigned frac_bits)
   4066 {
   4067 	return value * (1 << frac_bits);
   4068 }
   4069 
   4070 static inline unsigned si_tex_filter(unsigned filter, unsigned max_aniso)
   4071 {
   4072 	if (filter == PIPE_TEX_FILTER_LINEAR)
   4073 		return max_aniso > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR
   4074 				     : V_008F38_SQ_TEX_XY_FILTER_BILINEAR;
   4075 	else
   4076 		return max_aniso > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT
   4077 				     : V_008F38_SQ_TEX_XY_FILTER_POINT;
   4078 }
   4079 
   4080 static inline unsigned si_tex_aniso_filter(unsigned filter)
   4081 {
   4082 	if (filter < 2)
   4083 		return 0;
   4084 	if (filter < 4)
   4085 		return 1;
   4086 	if (filter < 8)
   4087 		return 2;
   4088 	if (filter < 16)
   4089 		return 3;
   4090 	return 4;
   4091 }
   4092 
   4093 static void *si_create_sampler_state(struct pipe_context *ctx,
   4094 				     const struct pipe_sampler_state *state)
   4095 {
   4096 	struct si_context *sctx = (struct si_context *)ctx;
   4097 	struct si_screen *sscreen = sctx->screen;
   4098 	struct si_sampler_state *rstate = CALLOC_STRUCT(si_sampler_state);
   4099 	unsigned max_aniso = sscreen->force_aniso >= 0 ? sscreen->force_aniso
   4100 						       : state->max_anisotropy;
   4101 	unsigned max_aniso_ratio = si_tex_aniso_filter(max_aniso);
   4102 	union pipe_color_union clamped_border_color;
   4103 
   4104 	if (!rstate) {
   4105 		return NULL;
   4106 	}
   4107 
   4108 #ifdef DEBUG
   4109 	rstate->magic = SI_SAMPLER_STATE_MAGIC;
   4110 #endif
   4111 	rstate->val[0] = (S_008F30_CLAMP_X(si_tex_wrap(state->wrap_s)) |
   4112 			  S_008F30_CLAMP_Y(si_tex_wrap(state->wrap_t)) |
   4113 			  S_008F30_CLAMP_Z(si_tex_wrap(state->wrap_r)) |
   4114 			  S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) |
   4115 			  S_008F30_DEPTH_COMPARE_FUNC(si_tex_compare(state->compare_func)) |
   4116 			  S_008F30_FORCE_UNNORMALIZED(!state->normalized_coords) |
   4117 			  S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) |
   4118 			  S_008F30_ANISO_BIAS(max_aniso_ratio) |
   4119 			  S_008F30_DISABLE_CUBE_WRAP(!state->seamless_cube_map) |
   4120 			  S_008F30_COMPAT_MODE(sctx->b.chip_class >= VI));
   4121 	rstate->val[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 8)) |
   4122 			  S_008F34_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 8)) |
   4123 			  S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));
   4124 	rstate->val[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) |
   4125 			  S_008F38_XY_MAG_FILTER(si_tex_filter(state->mag_img_filter, max_aniso)) |
   4126 			  S_008F38_XY_MIN_FILTER(si_tex_filter(state->min_img_filter, max_aniso)) |
   4127 			  S_008F38_MIP_FILTER(si_tex_mipfilter(state->min_mip_filter)) |
   4128 			  S_008F38_MIP_POINT_PRECLAMP(0) |
   4129 			  S_008F38_DISABLE_LSB_CEIL(sctx->b.chip_class <= VI) |
   4130 			  S_008F38_FILTER_PREC_FIX(1) |
   4131 			  S_008F38_ANISO_OVERRIDE(sctx->b.chip_class >= VI));
   4132 	rstate->val[3] = si_translate_border_color(sctx, state, &state->border_color, false);
   4133 
   4134 	/* Create sampler resource for integer textures. */
   4135 	memcpy(rstate->integer_val, rstate->val, sizeof(rstate->val));
   4136 	rstate->integer_val[3] = si_translate_border_color(sctx, state, &state->border_color, true);
   4137 
   4138 	/* Create sampler resource for upgraded depth textures. */
   4139 	memcpy(rstate->upgraded_depth_val, rstate->val, sizeof(rstate->val));
   4140 
   4141 	for (unsigned i = 0; i < 4; ++i) {
   4142 		/* Use channel 0 on purpose, so that we can use OPAQUE_WHITE
   4143 		 * when the border color is 1.0. */
   4144 		clamped_border_color.f[i] = CLAMP(state->border_color.f[0], 0, 1);
   4145 	}
   4146 
   4147 	if (memcmp(&state->border_color, &clamped_border_color, sizeof(clamped_border_color)) == 0)
   4148 		rstate->upgraded_depth_val[3] |= S_008F3C_UPGRADED_DEPTH(1);
   4149 	else
   4150 		rstate->upgraded_depth_val[3] =
   4151 			si_translate_border_color(sctx, state, &clamped_border_color, false) |
   4152 			S_008F3C_UPGRADED_DEPTH(1);
   4153 
   4154 	return rstate;
   4155 }
   4156 
   4157 static void si_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask)
   4158 {
   4159 	struct si_context *sctx = (struct si_context *)ctx;
   4160 
   4161 	if (sctx->sample_mask.sample_mask == (uint16_t)sample_mask)
   4162 		return;
   4163 
   4164 	sctx->sample_mask.sample_mask = sample_mask;
   4165 	si_mark_atom_dirty(sctx, &sctx->sample_mask.atom);
   4166 }
   4167 
   4168 static void si_emit_sample_mask(struct si_context *sctx, struct r600_atom *atom)
   4169 {
   4170 	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
   4171 	unsigned mask = sctx->sample_mask.sample_mask;
   4172 
   4173 	/* Needed for line and polygon smoothing as well as for the Polaris
   4174 	 * small primitive filter. We expect the state tracker to take care of
   4175 	 * this for us.
   4176 	 */
   4177 	assert(mask == 0xffff || sctx->framebuffer.nr_samples > 1 ||
   4178 	       (mask & 1 && sctx->blitter->running));
   4179 
   4180 	radeon_set_context_reg_seq(cs, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2);
   4181 	radeon_emit(cs, mask | (mask << 16));
   4182 	radeon_emit(cs, mask | (mask << 16));
   4183 }
   4184 
   4185 static void si_delete_sampler_state(struct pipe_context *ctx, void *state)
   4186 {
   4187 #ifdef DEBUG
   4188 	struct si_sampler_state *s = state;
   4189 
   4190 	assert(s->magic == SI_SAMPLER_STATE_MAGIC);
   4191 	s->magic = 0;
   4192 #endif
   4193 	free(state);
   4194 }
   4195 
   4196 /*
   4197  * Vertex elements & buffers
   4198  */
   4199 
   4200 static void *si_create_vertex_elements(struct pipe_context *ctx,
   4201 				       unsigned count,
   4202 				       const struct pipe_vertex_element *elements)
   4203 {
   4204 	struct si_screen *sscreen = (struct si_screen*)ctx->screen;
   4205 	struct si_vertex_elements *v = CALLOC_STRUCT(si_vertex_elements);
   4206 	bool used[SI_NUM_VERTEX_BUFFERS] = {};
   4207 	int i;
   4208 
   4209 	assert(count <= SI_MAX_ATTRIBS);
   4210 	if (!v)
   4211 		return NULL;
   4212 
   4213 	v->count = count;
   4214 	v->desc_list_byte_size = align(count * 16, SI_CPDMA_ALIGNMENT);
   4215 
   4216 	for (i = 0; i < count; ++i) {
   4217 		const struct util_format_description *desc;
   4218 		const struct util_format_channel_description *channel;
   4219 		unsigned data_format, num_format;
   4220 		int first_non_void;
   4221 		unsigned vbo_index = elements[i].vertex_buffer_index;
   4222 		unsigned char swizzle[4];
   4223 
   4224 		if (vbo_index >= SI_NUM_VERTEX_BUFFERS) {
   4225 			FREE(v);
   4226 			return NULL;
   4227 		}
   4228 
   4229 		if (elements[i].instance_divisor) {
   4230 			v->uses_instance_divisors = true;
   4231 			v->instance_divisors[i] = elements[i].instance_divisor;
   4232 
   4233 			if (v->instance_divisors[i] == 1)
   4234 				v->instance_divisor_is_one |= 1u << i;
   4235 			else
   4236 				v->instance_divisor_is_fetched |= 1u << i;
   4237 		}
   4238 
   4239 		if (!used[vbo_index]) {
   4240 			v->first_vb_use_mask |= 1 << i;
   4241 			used[vbo_index] = true;
   4242 		}
   4243 
   4244 		desc = util_format_description(elements[i].src_format);
   4245 		first_non_void = util_format_get_first_non_void_channel(elements[i].src_format);
   4246 		data_format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void);
   4247 		num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void);
   4248 		channel = first_non_void >= 0 ? &desc->channel[first_non_void] : NULL;
   4249 		memcpy(swizzle, desc->swizzle, sizeof(swizzle));
   4250 
   4251 		v->format_size[i] = desc->block.bits / 8;
   4252 		v->src_offset[i] = elements[i].src_offset;
   4253 		v->vertex_buffer_index[i] = vbo_index;
   4254 
   4255 		/* The hardware always treats the 2-bit alpha channel as
   4256 		 * unsigned, so a shader workaround is needed. The affected
   4257 		 * chips are VI and older except Stoney (GFX8.1).
   4258 		 */
   4259 		if (data_format == V_008F0C_BUF_DATA_FORMAT_2_10_10_10 &&
   4260 		    sscreen->info.chip_class <= VI &&
   4261 		    sscreen->info.family != CHIP_STONEY) {
   4262 			if (num_format == V_008F0C_BUF_NUM_FORMAT_SNORM) {
   4263 				v->fix_fetch[i] = SI_FIX_FETCH_A2_SNORM;
   4264 			} else if (num_format == V_008F0C_BUF_NUM_FORMAT_SSCALED) {
   4265 				v->fix_fetch[i] = SI_FIX_FETCH_A2_SSCALED;
   4266 			} else if (num_format == V_008F0C_BUF_NUM_FORMAT_SINT) {
   4267 				/* This isn't actually used in OpenGL. */
   4268 				v->fix_fetch[i] = SI_FIX_FETCH_A2_SINT;
   4269 			}
   4270 		} else if (channel && channel->type == UTIL_FORMAT_TYPE_FIXED) {
   4271 			if (desc->swizzle[3] == PIPE_SWIZZLE_1)
   4272 				v->fix_fetch[i] = SI_FIX_FETCH_RGBX_32_FIXED;
   4273 			else
   4274 				v->fix_fetch[i] = SI_FIX_FETCH_RGBA_32_FIXED;
   4275 		} else if (channel && channel->size == 32 && !channel->pure_integer) {
   4276 			if (channel->type == UTIL_FORMAT_TYPE_SIGNED) {
   4277 				if (channel->normalized) {
   4278 					if (desc->swizzle[3] == PIPE_SWIZZLE_1)
   4279 						v->fix_fetch[i] = SI_FIX_FETCH_RGBX_32_SNORM;
   4280 					else
   4281 						v->fix_fetch[i] = SI_FIX_FETCH_RGBA_32_SNORM;
   4282 				} else {
   4283 					v->fix_fetch[i] = SI_FIX_FETCH_RGBA_32_SSCALED;
   4284 				}
   4285 			} else if (channel->type == UTIL_FORMAT_TYPE_UNSIGNED) {
   4286 				if (channel->normalized) {
   4287 					if (desc->swizzle[3] == PIPE_SWIZZLE_1)
   4288 						v->fix_fetch[i] = SI_FIX_FETCH_RGBX_32_UNORM;
   4289 					else
   4290 						v->fix_fetch[i] = SI_FIX_FETCH_RGBA_32_UNORM;
   4291 				} else {
   4292 					v->fix_fetch[i] = SI_FIX_FETCH_RGBA_32_USCALED;
   4293 				}
   4294 			}
   4295 		} else if (channel && channel->size == 64 &&
   4296 			   channel->type == UTIL_FORMAT_TYPE_FLOAT) {
   4297 			switch (desc->nr_channels) {
   4298 			case 1:
   4299 			case 2:
   4300 				v->fix_fetch[i] = SI_FIX_FETCH_RG_64_FLOAT;
   4301 				swizzle[0] = PIPE_SWIZZLE_X;
   4302 				swizzle[1] = PIPE_SWIZZLE_Y;
   4303 				swizzle[2] = desc->nr_channels == 2 ? PIPE_SWIZZLE_Z : PIPE_SWIZZLE_0;
   4304 				swizzle[3] = desc->nr_channels == 2 ? PIPE_SWIZZLE_W : PIPE_SWIZZLE_0;
   4305 				break;
   4306 			case 3:
   4307 				v->fix_fetch[i] = SI_FIX_FETCH_RGB_64_FLOAT;
   4308 				swizzle[0] = PIPE_SWIZZLE_X; /* 3 loads */
   4309 				swizzle[1] = PIPE_SWIZZLE_Y;
   4310 				swizzle[2] = PIPE_SWIZZLE_0;
   4311 				swizzle[3] = PIPE_SWIZZLE_0;
   4312 				break;
   4313 			case 4:
   4314 				v->fix_fetch[i] = SI_FIX_FETCH_RGBA_64_FLOAT;
   4315 				swizzle[0] = PIPE_SWIZZLE_X; /* 2 loads */
   4316 				swizzle[1] = PIPE_SWIZZLE_Y;
   4317 				swizzle[2] = PIPE_SWIZZLE_Z;
   4318 				swizzle[3] = PIPE_SWIZZLE_W;
   4319 				break;
   4320 			default:
   4321 				assert(0);
   4322 			}
   4323 		} else if (channel && desc->nr_channels == 3) {
   4324 			assert(desc->swizzle[0] == PIPE_SWIZZLE_X);
   4325 
   4326 			if (channel->size == 8) {
   4327 				if (channel->pure_integer)
   4328 					v->fix_fetch[i] = SI_FIX_FETCH_RGB_8_INT;
   4329 				else
   4330 					v->fix_fetch[i] = SI_FIX_FETCH_RGB_8;
   4331 			} else if (channel->size == 16) {
   4332 				if (channel->pure_integer)
   4333 					v->fix_fetch[i] = SI_FIX_FETCH_RGB_16_INT;
   4334 				else
   4335 					v->fix_fetch[i] = SI_FIX_FETCH_RGB_16;
   4336 			}
   4337 		}
   4338 
   4339 		v->rsrc_word3[i] = S_008F0C_DST_SEL_X(si_map_swizzle(swizzle[0])) |
   4340 				   S_008F0C_DST_SEL_Y(si_map_swizzle(swizzle[1])) |
   4341 				   S_008F0C_DST_SEL_Z(si_map_swizzle(swizzle[2])) |
   4342 				   S_008F0C_DST_SEL_W(si_map_swizzle(swizzle[3])) |
   4343 				   S_008F0C_NUM_FORMAT(num_format) |
   4344 				   S_008F0C_DATA_FORMAT(data_format);
   4345 	}
   4346 	return v;
   4347 }
   4348 
   4349 static void si_bind_vertex_elements(struct pipe_context *ctx, void *state)
   4350 {
   4351 	struct si_context *sctx = (struct si_context *)ctx;
   4352 	struct si_vertex_elements *old = sctx->vertex_elements;
   4353 	struct si_vertex_elements *v = (struct si_vertex_elements*)state;
   4354 
   4355 	sctx->vertex_elements = v;
   4356 	sctx->vertex_buffers_dirty = true;
   4357 
   4358 	if (v &&
   4359 	    (!old ||
   4360 	     old->count != v->count ||
   4361 	     old->uses_instance_divisors != v->uses_instance_divisors ||
   4362 	     v->uses_instance_divisors || /* we don't check which divisors changed */
   4363 	     memcmp(old->fix_fetch, v->fix_fetch, sizeof(v->fix_fetch[0]) * v->count)))
   4364 		sctx->do_update_shaders = true;
   4365 
   4366 	if (v && v->instance_divisor_is_fetched) {
   4367 		struct pipe_constant_buffer cb;
   4368 
   4369 		cb.buffer = NULL;
   4370 		cb.user_buffer = v->instance_divisors;
   4371 		cb.buffer_offset = 0;
   4372 		cb.buffer_size = sizeof(uint32_t) * v->count;
   4373 		si_set_rw_buffer(sctx, SI_VS_CONST_INSTANCE_DIVISORS, &cb);
   4374 	}
   4375 }
   4376 
   4377 static void si_delete_vertex_element(struct pipe_context *ctx, void *state)
   4378 {
   4379 	struct si_context *sctx = (struct si_context *)ctx;
   4380 
   4381 	if (sctx->vertex_elements == state)
   4382 		sctx->vertex_elements = NULL;
   4383 	FREE(state);
   4384 }
   4385 
   4386 static void si_set_vertex_buffers(struct pipe_context *ctx,
   4387 				  unsigned start_slot, unsigned count,
   4388 				  const struct pipe_vertex_buffer *buffers)
   4389 {
   4390 	struct si_context *sctx = (struct si_context *)ctx;
   4391 	struct pipe_vertex_buffer *dst = sctx->vertex_buffer + start_slot;
   4392 	int i;
   4393 
   4394 	assert(start_slot + count <= ARRAY_SIZE(sctx->vertex_buffer));
   4395 
   4396 	if (buffers) {
   4397 		for (i = 0; i < count; i++) {
   4398 			const struct pipe_vertex_buffer *src = buffers + i;
   4399 			struct pipe_vertex_buffer *dsti = dst + i;
   4400 			struct pipe_resource *buf = src->buffer.resource;
   4401 
   4402 			pipe_resource_reference(&dsti->buffer.resource, buf);
   4403 			dsti->buffer_offset = src->buffer_offset;
   4404 			dsti->stride = src->stride;
   4405 			si_context_add_resource_size(ctx, buf);
   4406 			if (buf)
   4407 				r600_resource(buf)->bind_history |= PIPE_BIND_VERTEX_BUFFER;
   4408 		}
   4409 	} else {
   4410 		for (i = 0; i < count; i++) {
   4411 			pipe_resource_reference(&dst[i].buffer.resource, NULL);
   4412 		}
   4413 	}
   4414 	sctx->vertex_buffers_dirty = true;
   4415 }
   4416 
   4417 /*
   4418  * Misc
   4419  */
   4420 
   4421 static void si_set_tess_state(struct pipe_context *ctx,
   4422 			      const float default_outer_level[4],
   4423 			      const float default_inner_level[2])
   4424 {
   4425 	struct si_context *sctx = (struct si_context *)ctx;
   4426 	struct pipe_constant_buffer cb;
   4427 	float array[8];
   4428 
   4429 	memcpy(array, default_outer_level, sizeof(float) * 4);
   4430 	memcpy(array+4, default_inner_level, sizeof(float) * 2);
   4431 
   4432 	cb.buffer = NULL;
   4433 	cb.user_buffer = NULL;
   4434 	cb.buffer_size = sizeof(array);
   4435 
   4436 	si_upload_const_buffer(sctx, (struct r600_resource**)&cb.buffer,
   4437 			       (void*)array, sizeof(array),
   4438 			       &cb.buffer_offset);
   4439 
   4440 	si_set_rw_buffer(sctx, SI_HS_CONST_DEFAULT_TESS_LEVELS, &cb);
   4441 	pipe_resource_reference(&cb.buffer, NULL);
   4442 }
   4443 
   4444 static void si_texture_barrier(struct pipe_context *ctx, unsigned flags)
   4445 {
   4446 	struct si_context *sctx = (struct si_context *)ctx;
   4447 
   4448 	si_update_fb_dirtiness_after_rendering(sctx);
   4449 
   4450 	/* Multisample surfaces are flushed in si_decompress_textures. */
   4451 	if (sctx->framebuffer.nr_samples <= 1 &&
   4452 	    sctx->framebuffer.state.nr_cbufs)
   4453 		si_make_CB_shader_coherent(sctx, sctx->framebuffer.nr_samples,
   4454 					   sctx->framebuffer.CB_has_shader_readable_metadata);
   4455 }
   4456 
   4457 /* This only ensures coherency for shader image/buffer stores. */
   4458 static void si_memory_barrier(struct pipe_context *ctx, unsigned flags)
   4459 {
   4460 	struct si_context *sctx = (struct si_context *)ctx;
   4461 
   4462 	/* Subsequent commands must wait for all shader invocations to
   4463 	 * complete. */
   4464 	sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
   4465 	                 SI_CONTEXT_CS_PARTIAL_FLUSH;
   4466 
   4467 	if (flags & PIPE_BARRIER_CONSTANT_BUFFER)
   4468 		sctx->b.flags |= SI_CONTEXT_INV_SMEM_L1 |
   4469 				 SI_CONTEXT_INV_VMEM_L1;
   4470 
   4471 	if (flags & (PIPE_BARRIER_VERTEX_BUFFER |
   4472 		     PIPE_BARRIER_SHADER_BUFFER |
   4473 		     PIPE_BARRIER_TEXTURE |
   4474 		     PIPE_BARRIER_IMAGE |
   4475 		     PIPE_BARRIER_STREAMOUT_BUFFER |
   4476 		     PIPE_BARRIER_GLOBAL_BUFFER)) {
   4477 		/* As far as I can tell, L1 contents are written back to L2
   4478 		 * automatically at end of shader, but the contents of other
   4479 		 * L1 caches might still be stale. */
   4480 		sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1;
   4481 	}
   4482 
   4483 	if (flags & PIPE_BARRIER_INDEX_BUFFER) {
   4484 		/* Indices are read through TC L2 since VI.
   4485 		 * L1 isn't used.
   4486 		 */
   4487 		if (sctx->screen->info.chip_class <= CIK)
   4488 			sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
   4489 	}
   4490 
   4491 	/* MSAA color, any depth and any stencil are flushed in
   4492 	 * si_decompress_textures when needed.
   4493 	 */
   4494 	if (flags & PIPE_BARRIER_FRAMEBUFFER &&
   4495 	    sctx->framebuffer.nr_samples <= 1 &&
   4496 	    sctx->framebuffer.state.nr_cbufs) {
   4497 		sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB;
   4498 
   4499 		if (sctx->b.chip_class <= VI)
   4500 			sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
   4501 	}
   4502 
   4503 	/* Indirect buffers use TC L2 on GFX9, but not older hw. */
   4504 	if (sctx->screen->info.chip_class <= VI &&
   4505 	    flags & PIPE_BARRIER_INDIRECT_BUFFER)
   4506 		sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
   4507 }
   4508 
   4509 static void *si_create_blend_custom(struct si_context *sctx, unsigned mode)
   4510 {
   4511 	struct pipe_blend_state blend;
   4512 
   4513 	memset(&blend, 0, sizeof(blend));
   4514 	blend.independent_blend_enable = true;
   4515 	blend.rt[0].colormask = 0xf;
   4516 	return si_create_blend_state_mode(&sctx->b.b, &blend, mode);
   4517 }
   4518 
   4519 static void si_need_gfx_cs_space(struct pipe_context *ctx, unsigned num_dw,
   4520 				 bool include_draw_vbo)
   4521 {
   4522 	si_need_cs_space((struct si_context*)ctx);
   4523 }
   4524 
   4525 static void si_init_config(struct si_context *sctx);
   4526 
   4527 void si_init_state_functions(struct si_context *sctx)
   4528 {
   4529 	si_init_external_atom(sctx, &sctx->b.render_cond_atom, &sctx->atoms.s.render_cond);
   4530 	si_init_external_atom(sctx, &sctx->streamout.begin_atom, &sctx->atoms.s.streamout_begin);
   4531 	si_init_external_atom(sctx, &sctx->streamout.enable_atom, &sctx->atoms.s.streamout_enable);
   4532 	si_init_external_atom(sctx, &sctx->scissors.atom, &sctx->atoms.s.scissors);
   4533 	si_init_external_atom(sctx, &sctx->viewports.atom, &sctx->atoms.s.viewports);
   4534 
   4535 	si_init_atom(sctx, &sctx->framebuffer.atom, &sctx->atoms.s.framebuffer, si_emit_framebuffer_state);
   4536 	si_init_atom(sctx, &sctx->msaa_sample_locs.atom, &sctx->atoms.s.msaa_sample_locs, si_emit_msaa_sample_locs);
   4537 	si_init_atom(sctx, &sctx->db_render_state, &sctx->atoms.s.db_render_state, si_emit_db_render_state);
   4538 	si_init_atom(sctx, &sctx->dpbb_state, &sctx->atoms.s.dpbb_state, si_emit_dpbb_state);
   4539 	si_init_atom(sctx, &sctx->msaa_config, &sctx->atoms.s.msaa_config, si_emit_msaa_config);
   4540 	si_init_atom(sctx, &sctx->sample_mask.atom, &sctx->atoms.s.sample_mask, si_emit_sample_mask);
   4541 	si_init_atom(sctx, &sctx->cb_render_state, &sctx->atoms.s.cb_render_state, si_emit_cb_render_state);
   4542 	si_init_atom(sctx, &sctx->blend_color.atom, &sctx->atoms.s.blend_color, si_emit_blend_color);
   4543 	si_init_atom(sctx, &sctx->clip_regs, &sctx->atoms.s.clip_regs, si_emit_clip_regs);
   4544 	si_init_atom(sctx, &sctx->clip_state.atom, &sctx->atoms.s.clip_state, si_emit_clip_state);
   4545 	si_init_atom(sctx, &sctx->stencil_ref.atom, &sctx->atoms.s.stencil_ref, si_emit_stencil_ref);
   4546 
   4547 	sctx->b.b.create_blend_state = si_create_blend_state;
   4548 	sctx->b.b.bind_blend_state = si_bind_blend_state;
   4549 	sctx->b.b.delete_blend_state = si_delete_blend_state;
   4550 	sctx->b.b.set_blend_color = si_set_blend_color;
   4551 
   4552 	sctx->b.b.create_rasterizer_state = si_create_rs_state;
   4553 	sctx->b.b.bind_rasterizer_state = si_bind_rs_state;
   4554 	sctx->b.b.delete_rasterizer_state = si_delete_rs_state;
   4555 
   4556 	sctx->b.b.create_depth_stencil_alpha_state = si_create_dsa_state;
   4557 	sctx->b.b.bind_depth_stencil_alpha_state = si_bind_dsa_state;
   4558 	sctx->b.b.delete_depth_stencil_alpha_state = si_delete_dsa_state;
   4559 
   4560 	sctx->custom_dsa_flush = si_create_db_flush_dsa(sctx);
   4561 	sctx->custom_blend_resolve = si_create_blend_custom(sctx, V_028808_CB_RESOLVE);
   4562 	sctx->custom_blend_fmask_decompress = si_create_blend_custom(sctx, V_028808_CB_FMASK_DECOMPRESS);
   4563 	sctx->custom_blend_eliminate_fastclear = si_create_blend_custom(sctx, V_028808_CB_ELIMINATE_FAST_CLEAR);
   4564 	sctx->custom_blend_dcc_decompress = si_create_blend_custom(sctx, V_028808_CB_DCC_DECOMPRESS);
   4565 
   4566 	sctx->b.b.set_clip_state = si_set_clip_state;
   4567 	sctx->b.b.set_stencil_ref = si_set_stencil_ref;
   4568 
   4569 	sctx->b.b.set_framebuffer_state = si_set_framebuffer_state;
   4570 
   4571 	sctx->b.b.create_sampler_state = si_create_sampler_state;
   4572 	sctx->b.b.delete_sampler_state = si_delete_sampler_state;
   4573 
   4574 	sctx->b.b.create_sampler_view = si_create_sampler_view;
   4575 	sctx->b.b.sampler_view_destroy = si_sampler_view_destroy;
   4576 
   4577 	sctx->b.b.set_sample_mask = si_set_sample_mask;
   4578 
   4579 	sctx->b.b.create_vertex_elements_state = si_create_vertex_elements;
   4580 	sctx->b.b.bind_vertex_elements_state = si_bind_vertex_elements;
   4581 	sctx->b.b.delete_vertex_elements_state = si_delete_vertex_element;
   4582 	sctx->b.b.set_vertex_buffers = si_set_vertex_buffers;
   4583 
   4584 	sctx->b.b.texture_barrier = si_texture_barrier;
   4585 	sctx->b.b.memory_barrier = si_memory_barrier;
   4586 	sctx->b.b.set_min_samples = si_set_min_samples;
   4587 	sctx->b.b.set_tess_state = si_set_tess_state;
   4588 
   4589 	sctx->b.b.set_active_query_state = si_set_active_query_state;
   4590 	sctx->b.set_occlusion_query_state = si_set_occlusion_query_state;
   4591 	sctx->b.save_qbo_state = si_save_qbo_state;
   4592 	sctx->b.need_gfx_cs_space = si_need_gfx_cs_space;
   4593 
   4594 	sctx->b.b.draw_vbo = si_draw_vbo;
   4595 
   4596 	si_init_config(sctx);
   4597 }
   4598 
   4599 void si_init_screen_state_functions(struct si_screen *sscreen)
   4600 {
   4601 	sscreen->b.is_format_supported = si_is_format_supported;
   4602 }
   4603 
   4604 static void si_set_grbm_gfx_index(struct si_context *sctx,
   4605 				  struct si_pm4_state *pm4,  unsigned value)
   4606 {
   4607 	unsigned reg = sctx->b.chip_class >= CIK ? R_030800_GRBM_GFX_INDEX :
   4608 						   R_00802C_GRBM_GFX_INDEX;
   4609 	si_pm4_set_reg(pm4, reg, value);
   4610 }
   4611 
   4612 static void si_set_grbm_gfx_index_se(struct si_context *sctx,
   4613 				     struct si_pm4_state *pm4, unsigned se)
   4614 {
   4615 	assert(se == ~0 || se < sctx->screen->info.max_se);
   4616 	si_set_grbm_gfx_index(sctx, pm4,
   4617 			      (se == ~0 ? S_030800_SE_BROADCAST_WRITES(1) :
   4618 					  S_030800_SE_INDEX(se)) |
   4619 			      S_030800_SH_BROADCAST_WRITES(1) |
   4620 			      S_030800_INSTANCE_BROADCAST_WRITES(1));
   4621 }
   4622 
   4623 static void
   4624 si_write_harvested_raster_configs(struct si_context *sctx,
   4625 				  struct si_pm4_state *pm4,
   4626 				  unsigned raster_config,
   4627 				  unsigned raster_config_1)
   4628 {
   4629 	unsigned sh_per_se = MAX2(sctx->screen->info.max_sh_per_se, 1);
   4630 	unsigned num_se = MAX2(sctx->screen->info.max_se, 1);
   4631 	unsigned rb_mask = sctx->screen->info.enabled_rb_mask;
   4632 	unsigned num_rb = MIN2(sctx->screen->info.num_render_backends, 16);
   4633 	unsigned rb_per_pkr = MIN2(num_rb / num_se / sh_per_se, 2);
   4634 	unsigned rb_per_se = num_rb / num_se;
   4635 	unsigned se_mask[4];
   4636 	unsigned se;
   4637 
   4638 	se_mask[0] = ((1 << rb_per_se) - 1);
   4639 	se_mask[1] = (se_mask[0] << rb_per_se);
   4640 	se_mask[2] = (se_mask[1] << rb_per_se);
   4641 	se_mask[3] = (se_mask[2] << rb_per_se);
   4642 
   4643 	se_mask[0] &= rb_mask;
   4644 	se_mask[1] &= rb_mask;
   4645 	se_mask[2] &= rb_mask;
   4646 	se_mask[3] &= rb_mask;
   4647 
   4648 	assert(num_se == 1 || num_se == 2 || num_se == 4);
   4649 	assert(sh_per_se == 1 || sh_per_se == 2);
   4650 	assert(rb_per_pkr == 1 || rb_per_pkr == 2);
   4651 
   4652 	/* XXX: I can't figure out what the *_XSEL and *_YSEL
   4653 	 * fields are for, so I'm leaving them as their default
   4654 	 * values. */
   4655 
   4656 	for (se = 0; se < num_se; se++) {
   4657 		unsigned raster_config_se = raster_config;
   4658 		unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
   4659 		unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
   4660 		int idx = (se / 2) * 2;
   4661 
   4662 		if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
   4663 			raster_config_se &= C_028350_SE_MAP;
   4664 
   4665 			if (!se_mask[idx]) {
   4666 				raster_config_se |=
   4667 					S_028350_SE_MAP(V_028350_RASTER_CONFIG_SE_MAP_3);
   4668 			} else {
   4669 				raster_config_se |=
   4670 					S_028350_SE_MAP(V_028350_RASTER_CONFIG_SE_MAP_0);
   4671 			}
   4672 		}
   4673 
   4674 		pkr0_mask &= rb_mask;
   4675 		pkr1_mask &= rb_mask;
   4676 		if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
   4677 			raster_config_se &= C_028350_PKR_MAP;
   4678 
   4679 			if (!pkr0_mask) {
   4680 				raster_config_se |=
   4681 					S_028350_PKR_MAP(V_028350_RASTER_CONFIG_PKR_MAP_3);
   4682 			} else {
   4683 				raster_config_se |=
   4684 					S_028350_PKR_MAP(V_028350_RASTER_CONFIG_PKR_MAP_0);
   4685 			}
   4686 		}
   4687 
   4688 		if (rb_per_se >= 2) {
   4689 			unsigned rb0_mask = 1 << (se * rb_per_se);
   4690 			unsigned rb1_mask = rb0_mask << 1;
   4691 
   4692 			rb0_mask &= rb_mask;
   4693 			rb1_mask &= rb_mask;
   4694 			if (!rb0_mask || !rb1_mask) {
   4695 				raster_config_se &= C_028350_RB_MAP_PKR0;
   4696 
   4697 				if (!rb0_mask) {
   4698 					raster_config_se |=
   4699 						S_028350_RB_MAP_PKR0(V_028350_RASTER_CONFIG_RB_MAP_3);
   4700 				} else {
   4701 					raster_config_se |=
   4702 						S_028350_RB_MAP_PKR0(V_028350_RASTER_CONFIG_RB_MAP_0);
   4703 				}
   4704 			}
   4705 
   4706 			if (rb_per_se > 2) {
   4707 				rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
   4708 				rb1_mask = rb0_mask << 1;
   4709 				rb0_mask &= rb_mask;
   4710 				rb1_mask &= rb_mask;
   4711 				if (!rb0_mask || !rb1_mask) {
   4712 					raster_config_se &= C_028350_RB_MAP_PKR1;
   4713 
   4714 					if (!rb0_mask) {
   4715 						raster_config_se |=
   4716 							S_028350_RB_MAP_PKR1(V_028350_RASTER_CONFIG_RB_MAP_3);
   4717 					} else {
   4718 						raster_config_se |=
   4719 							S_028350_RB_MAP_PKR1(V_028350_RASTER_CONFIG_RB_MAP_0);
   4720 					}
   4721 				}
   4722 			}
   4723 		}
   4724 
   4725 		si_set_grbm_gfx_index_se(sctx, pm4, se);
   4726 		si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, raster_config_se);
   4727 	}
   4728 	si_set_grbm_gfx_index(sctx, pm4, ~0);
   4729 
   4730 	if (sctx->b.chip_class >= CIK) {
   4731 		if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
   4732 		                     (!se_mask[2] && !se_mask[3]))) {
   4733 			raster_config_1 &= C_028354_SE_PAIR_MAP;
   4734 
   4735 			if (!se_mask[0] && !se_mask[1]) {
   4736 				raster_config_1 |=
   4737 					S_028354_SE_PAIR_MAP(V_028354_RASTER_CONFIG_SE_PAIR_MAP_3);
   4738 			} else {
   4739 				raster_config_1 |=
   4740 					S_028354_SE_PAIR_MAP(V_028354_RASTER_CONFIG_SE_PAIR_MAP_0);
   4741 			}
   4742 		}
   4743 
   4744 		si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1);
   4745 	}
   4746 }
   4747 
   4748 static void si_set_raster_config(struct si_context *sctx, struct si_pm4_state *pm4)
   4749 {
   4750 	struct si_screen *sscreen = sctx->screen;
   4751 	unsigned num_rb = MIN2(sctx->screen->info.num_render_backends, 16);
   4752 	unsigned rb_mask = sctx->screen->info.enabled_rb_mask;
   4753 	unsigned raster_config, raster_config_1;
   4754 
   4755 	switch (sctx->b.family) {
   4756 	case CHIP_TAHITI:
   4757 	case CHIP_PITCAIRN:
   4758 		raster_config = 0x2a00126a;
   4759 		raster_config_1 = 0x00000000;
   4760 		break;
   4761 	case CHIP_VERDE:
   4762 		raster_config = 0x0000124a;
   4763 		raster_config_1 = 0x00000000;
   4764 		break;
   4765 	case CHIP_OLAND:
   4766 		raster_config = 0x00000082;
   4767 		raster_config_1 = 0x00000000;
   4768 		break;
   4769 	case CHIP_HAINAN:
   4770 		raster_config = 0x00000000;
   4771 		raster_config_1 = 0x00000000;
   4772 		break;
   4773 	case CHIP_BONAIRE:
   4774 		raster_config = 0x16000012;
   4775 		raster_config_1 = 0x00000000;
   4776 		break;
   4777 	case CHIP_HAWAII:
   4778 		raster_config = 0x3a00161a;
   4779 		raster_config_1 = 0x0000002e;
   4780 		break;
   4781 	case CHIP_FIJI:
   4782 		if (sscreen->info.cik_macrotile_mode_array[0] == 0x000000e8) {
   4783 			/* old kernels with old tiling config */
   4784 			raster_config = 0x16000012;
   4785 			raster_config_1 = 0x0000002a;
   4786 		} else {
   4787 			raster_config = 0x3a00161a;
   4788 			raster_config_1 = 0x0000002e;
   4789 		}
   4790 		break;
   4791 	case CHIP_POLARIS10:
   4792 		raster_config = 0x16000012;
   4793 		raster_config_1 = 0x0000002a;
   4794 		break;
   4795 	case CHIP_POLARIS11:
   4796 	case CHIP_POLARIS12:
   4797 		raster_config = 0x16000012;
   4798 		raster_config_1 = 0x00000000;
   4799 		break;
   4800 	case CHIP_TONGA:
   4801 		raster_config = 0x16000012;
   4802 		raster_config_1 = 0x0000002a;
   4803 		break;
   4804 	case CHIP_ICELAND:
   4805 		if (num_rb == 1)
   4806 			raster_config = 0x00000000;
   4807 		else
   4808 			raster_config = 0x00000002;
   4809 		raster_config_1 = 0x00000000;
   4810 		break;
   4811 	case CHIP_CARRIZO:
   4812 		raster_config = 0x00000002;
   4813 		raster_config_1 = 0x00000000;
   4814 		break;
   4815 	case CHIP_KAVERI:
   4816 		/* KV should be 0x00000002, but that causes problems with radeon */
   4817 		raster_config = 0x00000000; /* 0x00000002 */
   4818 		raster_config_1 = 0x00000000;
   4819 		break;
   4820 	case CHIP_KABINI:
   4821 	case CHIP_MULLINS:
   4822 	case CHIP_STONEY:
   4823 		raster_config = 0x00000000;
   4824 		raster_config_1 = 0x00000000;
   4825 		break;
   4826 	default:
   4827 		fprintf(stderr,
   4828 			"radeonsi: Unknown GPU, using 0 for raster_config\n");
   4829 		raster_config = 0x00000000;
   4830 		raster_config_1 = 0x00000000;
   4831 	}
   4832 
   4833 	if (!rb_mask || util_bitcount(rb_mask) >= num_rb) {
   4834 		/* Always use the default config when all backends are enabled
   4835 		 * (or when we failed to determine the enabled backends).
   4836 		 */
   4837 		si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG,
   4838 			       raster_config);
   4839 		if (sctx->b.chip_class >= CIK)
   4840 			si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1,
   4841 				       raster_config_1);
   4842 	} else {
   4843 		si_write_harvested_raster_configs(sctx, pm4, raster_config, raster_config_1);
   4844 	}
   4845 }
   4846 
   4847 static void si_init_config(struct si_context *sctx)
   4848 {
   4849 	struct si_screen *sscreen = sctx->screen;
   4850 	uint64_t border_color_va = sctx->border_color_buffer->gpu_address;
   4851 	bool has_clear_state = sscreen->has_clear_state;
   4852 	struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
   4853 
   4854 	/* Only SI can disable CLEAR_STATE for now. */
   4855 	assert(has_clear_state || sscreen->info.chip_class == SI);
   4856 
   4857 	if (!pm4)
   4858 		return;
   4859 
   4860 	si_pm4_cmd_begin(pm4, PKT3_CONTEXT_CONTROL);
   4861 	si_pm4_cmd_add(pm4, CONTEXT_CONTROL_LOAD_ENABLE(1));
   4862 	si_pm4_cmd_add(pm4, CONTEXT_CONTROL_SHADOW_ENABLE(1));
   4863 	si_pm4_cmd_end(pm4, false);
   4864 
   4865 	if (has_clear_state) {
   4866 		si_pm4_cmd_begin(pm4, PKT3_CLEAR_STATE);
   4867 		si_pm4_cmd_add(pm4, 0);
   4868 		si_pm4_cmd_end(pm4, false);
   4869 	}
   4870 
   4871 	if (sctx->b.chip_class <= VI)
   4872 		si_set_raster_config(sctx, pm4);
   4873 
   4874 	si_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64));
   4875 	if (!has_clear_state)
   4876 		si_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0));
   4877 
   4878 	/* FIXME calculate these values somehow ??? */
   4879 	if (sctx->b.chip_class <= VI) {
   4880 		si_pm4_set_reg(pm4, R_028A54_VGT_GS_PER_ES, SI_GS_PER_ES);
   4881 		si_pm4_set_reg(pm4, R_028A58_VGT_ES_PER_GS, 0x40);
   4882 	}
   4883 
   4884 	if (!has_clear_state) {
   4885 		si_pm4_set_reg(pm4, R_028A5C_VGT_GS_PER_VS, 0x2);
   4886 		si_pm4_set_reg(pm4, R_028A8C_VGT_PRIMITIVEID_RESET, 0x0);
   4887 		si_pm4_set_reg(pm4, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0);
   4888 	}
   4889 
   4890 	si_pm4_set_reg(pm4, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 1);
   4891 	if (!has_clear_state)
   4892 		si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0x0);
   4893 	if (sctx->b.chip_class < CIK)
   4894 		si_pm4_set_reg(pm4, R_008A14_PA_CL_ENHANCE, S_008A14_NUM_CLIP_SEQ(3) |
   4895 			       S_008A14_CLIP_VTX_REORDER_ENA(1));
   4896 
   4897 	si_pm4_set_reg(pm4, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 0x76543210);
   4898 	si_pm4_set_reg(pm4, R_028BD8_PA_SC_CENTROID_PRIORITY_1, 0xfedcba98);
   4899 
   4900 	if (!has_clear_state)
   4901 		si_pm4_set_reg(pm4, R_02882C_PA_SU_PRIM_FILTER_CNTL, 0);
   4902 
   4903 	/* CLEAR_STATE doesn't clear these correctly on certain generations.
   4904 	 * I don't know why. Deduced by trial and error.
   4905 	 */
   4906 	if (sctx->b.chip_class <= CIK) {
   4907 		si_pm4_set_reg(pm4, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0);
   4908 		si_pm4_set_reg(pm4, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1));
   4909 		si_pm4_set_reg(pm4, R_028240_PA_SC_GENERIC_SCISSOR_TL, S_028240_WINDOW_OFFSET_DISABLE(1));
   4910 		si_pm4_set_reg(pm4, R_028244_PA_SC_GENERIC_SCISSOR_BR,
   4911 			       S_028244_BR_X(16384) | S_028244_BR_Y(16384));
   4912 		si_pm4_set_reg(pm4, R_028030_PA_SC_SCREEN_SCISSOR_TL, 0);
   4913 		si_pm4_set_reg(pm4, R_028034_PA_SC_SCREEN_SCISSOR_BR,
   4914 			       S_028034_BR_X(16384) | S_028034_BR_Y(16384));
   4915 	}
   4916 
   4917 	if (!has_clear_state) {
   4918 		si_pm4_set_reg(pm4, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF);
   4919 		si_pm4_set_reg(pm4, R_028230_PA_SC_EDGERULE,
   4920 			       S_028230_ER_TRI(0xA) |
   4921 			       S_028230_ER_POINT(0xA) |
   4922 			       S_028230_ER_RECT(0xA) |
   4923 			       /* Required by DX10_DIAMOND_TEST_ENA: */
   4924 			       S_028230_ER_LINE_LR(0x1A) |
   4925 			       S_028230_ER_LINE_RL(0x26) |
   4926 			       S_028230_ER_LINE_TB(0xA) |
   4927 			       S_028230_ER_LINE_BT(0xA));
   4928 		/* PA_SU_HARDWARE_SCREEN_OFFSET must be 0 due to hw bug on SI */
   4929 		si_pm4_set_reg(pm4, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0);
   4930 		si_pm4_set_reg(pm4, R_028820_PA_CL_NANINF_CNTL, 0);
   4931 		si_pm4_set_reg(pm4, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0);
   4932 		si_pm4_set_reg(pm4, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0);
   4933 		si_pm4_set_reg(pm4, R_028AC8_DB_PRELOAD_CONTROL, 0x0);
   4934 		si_pm4_set_reg(pm4, R_02800C_DB_RENDER_OVERRIDE, 0);
   4935 	}
   4936 
   4937 	if (sctx->b.chip_class >= GFX9) {
   4938 		si_pm4_set_reg(pm4, R_030920_VGT_MAX_VTX_INDX, ~0);
   4939 		si_pm4_set_reg(pm4, R_030924_VGT_MIN_VTX_INDX, 0);
   4940 		si_pm4_set_reg(pm4, R_030928_VGT_INDX_OFFSET, 0);
   4941 	} else {
   4942 		/* These registers, when written, also overwrite the CLEAR_STATE
   4943 		 * context, so we can't rely on CLEAR_STATE setting them.
   4944 		 * It would be an issue if there was another UMD changing them.
   4945 		 */
   4946 		si_pm4_set_reg(pm4, R_028400_VGT_MAX_VTX_INDX, ~0);
   4947 		si_pm4_set_reg(pm4, R_028404_VGT_MIN_VTX_INDX, 0);
   4948 		si_pm4_set_reg(pm4, R_028408_VGT_INDX_OFFSET, 0);
   4949 	}
   4950 
   4951 	if (sctx->b.chip_class >= CIK) {
   4952 		if (sctx->b.chip_class >= GFX9) {
   4953 			si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS,
   4954 				       S_00B41C_CU_EN(0xffff) | S_00B41C_WAVE_LIMIT(0x3F));
   4955 		} else {
   4956 			si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS,
   4957 				       S_00B51C_CU_EN(0xffff) | S_00B51C_WAVE_LIMIT(0x3F));
   4958 			si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS,
   4959 				       S_00B41C_WAVE_LIMIT(0x3F));
   4960 			si_pm4_set_reg(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES,
   4961 				       S_00B31C_CU_EN(0xffff) | S_00B31C_WAVE_LIMIT(0x3F));
   4962 
   4963 			/* If this is 0, Bonaire can hang even if GS isn't being used.
   4964 			 * Other chips are unaffected. These are suboptimal values,
   4965 			 * but we don't use on-chip GS.
   4966 			 */
   4967 			si_pm4_set_reg(pm4, R_028A44_VGT_GS_ONCHIP_CNTL,
   4968 				       S_028A44_ES_VERTS_PER_SUBGRP(64) |
   4969 				       S_028A44_GS_PRIMS_PER_SUBGRP(4));
   4970 		}
   4971 		si_pm4_set_reg(pm4, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
   4972 			       S_00B21C_CU_EN(0xffff) | S_00B21C_WAVE_LIMIT(0x3F));
   4973 
   4974 		/* Compute LATE_ALLOC_VS.LIMIT. */
   4975 		unsigned num_cu_per_sh = sscreen->info.num_good_compute_units /
   4976 					 (sscreen->info.max_se *
   4977 					  sscreen->info.max_sh_per_se);
   4978 		unsigned late_alloc_limit; /* The limit is per SH. */
   4979 
   4980 		if (sctx->b.family == CHIP_KABINI) {
   4981 			late_alloc_limit = 0; /* Potential hang on Kabini. */
   4982 		} else if (num_cu_per_sh <= 4) {
   4983 			/* Too few available compute units per SH. Disallowing
   4984 			 * VS to run on one CU could hurt us more than late VS
   4985 			 * allocation would help.
   4986 			 *
   4987 			 * 2 is the highest safe number that allows us to keep
   4988 			 * all CUs enabled.
   4989 			 */
   4990 			late_alloc_limit = 2;
   4991 		} else {
   4992 			/* This is a good initial value, allowing 1 late_alloc
   4993 			 * wave per SIMD on num_cu - 2.
   4994 			 */
   4995 			late_alloc_limit = (num_cu_per_sh - 2) * 4;
   4996 
   4997 			/* The limit is 0-based, so 0 means 1. */
   4998 			assert(late_alloc_limit > 0 && late_alloc_limit <= 64);
   4999 			late_alloc_limit -= 1;
   5000 		}
   5001 
   5002 		/* VS can't execute on one CU if the limit is > 2. */
   5003 		si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS,
   5004 			       S_00B118_CU_EN(late_alloc_limit > 2 ? 0xfffe : 0xffff) |
   5005 			       S_00B118_WAVE_LIMIT(0x3F));
   5006 		si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS,
   5007 			       S_00B11C_LIMIT(late_alloc_limit));
   5008 		si_pm4_set_reg(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS,
   5009 			       S_00B01C_CU_EN(0xffff) | S_00B01C_WAVE_LIMIT(0x3F));
   5010 	}
   5011 
   5012 	if (sctx->b.chip_class >= VI) {
   5013 		unsigned vgt_tess_distribution;
   5014 
   5015 		vgt_tess_distribution =
   5016 			S_028B50_ACCUM_ISOLINE(32) |
   5017 			S_028B50_ACCUM_TRI(11) |
   5018 			S_028B50_ACCUM_QUAD(11) |
   5019 			S_028B50_DONUT_SPLIT(16);
   5020 
   5021 		/* Testing with Unigine Heaven extreme tesselation yielded best results
   5022 		 * with TRAP_SPLIT = 3.
   5023 		 */
   5024 		if (sctx->b.family == CHIP_FIJI ||
   5025 		    sctx->b.family >= CHIP_POLARIS10)
   5026 			vgt_tess_distribution |= S_028B50_TRAP_SPLIT(3);
   5027 
   5028 		si_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION, vgt_tess_distribution);
   5029 	} else if (!has_clear_state) {
   5030 		si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14);
   5031 		si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 16);
   5032 	}
   5033 
   5034 	si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8);
   5035 	if (sctx->b.chip_class >= CIK)
   5036 		si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, border_color_va >> 40);
   5037 	si_pm4_add_bo(pm4, sctx->border_color_buffer, RADEON_USAGE_READ,
   5038 		      RADEON_PRIO_BORDER_COLORS);
   5039 
   5040 	if (sctx->b.chip_class >= GFX9) {
   5041 		unsigned num_se = sscreen->info.max_se;
   5042 		unsigned pc_lines = 0;
   5043 
   5044 		switch (sctx->b.family) {
   5045 		case CHIP_VEGA10:
   5046 			pc_lines = 4096;
   5047 			break;
   5048 		case CHIP_RAVEN:
   5049 			pc_lines = 1024;
   5050 			break;
   5051 		default:
   5052 			assert(0);
   5053 		}
   5054 
   5055 		si_pm4_set_reg(pm4, R_028C48_PA_SC_BINNER_CNTL_1,
   5056 			       S_028C48_MAX_ALLOC_COUNT(MIN2(128, pc_lines / (4 * num_se))) |
   5057 			       S_028C48_MAX_PRIM_PER_BATCH(1023));
   5058 		si_pm4_set_reg(pm4, R_028C4C_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL,
   5059 			       S_028C4C_NULL_SQUAD_AA_MASK_ENABLE(1));
   5060 		si_pm4_set_reg(pm4, R_030968_VGT_INSTANCE_BASE_ID, 0);
   5061 	}
   5062 
   5063 	si_pm4_upload_indirect_buffer(sctx, pm4);
   5064 	sctx->init_config = pm4;
   5065 }
   5066