Home | History | Annotate | Download | only in radeonsi
      1 /*
      2  * Copyright 2012 Advanced Micro Devices, Inc.
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * on the rights to use, copy, modify, merge, publish, distribute, sub
      8  * license, and/or sell copies of the Software, and to permit persons to whom
      9  * the Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
     18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
     19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
     20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
     21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
     22  *
     23  * Authors:
     24  *      Christian Knig <christian.koenig (at) amd.com>
     25  */
     26 
     27 #include "si_pipe.h"
     28 #include "sid.h"
     29 #include "radeon/r600_cs.h"
     30 #include "radeon/r600_query.h"
     31 
     32 #include "util/u_dual_blend.h"
     33 #include "util/u_format.h"
     34 #include "util/u_format_s3tc.h"
     35 #include "util/u_memory.h"
     36 #include "util/u_resource.h"
     37 
     38 /* Initialize an external atom (owned by ../radeon). */
     39 static void
     40 si_init_external_atom(struct si_context *sctx, struct r600_atom *atom,
     41 		      struct r600_atom **list_elem)
     42 {
     43 	atom->id = list_elem - sctx->atoms.array + 1;
     44 	*list_elem = atom;
     45 }
     46 
     47 /* Initialize an atom owned by radeonsi.  */
     48 void si_init_atom(struct si_context *sctx, struct r600_atom *atom,
     49 		  struct r600_atom **list_elem,
     50 		  void (*emit_func)(struct si_context *ctx, struct r600_atom *state))
     51 {
     52 	atom->emit = (void*)emit_func;
     53 	atom->id = list_elem - sctx->atoms.array + 1; /* index+1 in the atom array */
     54 	*list_elem = atom;
     55 }
     56 
     57 static unsigned si_map_swizzle(unsigned swizzle)
     58 {
     59 	switch (swizzle) {
     60 	case PIPE_SWIZZLE_Y:
     61 		return V_008F0C_SQ_SEL_Y;
     62 	case PIPE_SWIZZLE_Z:
     63 		return V_008F0C_SQ_SEL_Z;
     64 	case PIPE_SWIZZLE_W:
     65 		return V_008F0C_SQ_SEL_W;
     66 	case PIPE_SWIZZLE_0:
     67 		return V_008F0C_SQ_SEL_0;
     68 	case PIPE_SWIZZLE_1:
     69 		return V_008F0C_SQ_SEL_1;
     70 	default: /* PIPE_SWIZZLE_X */
     71 		return V_008F0C_SQ_SEL_X;
     72 	}
     73 }
     74 
     75 static uint32_t S_FIXED(float value, uint32_t frac_bits)
     76 {
     77 	return value * (1 << frac_bits);
     78 }
     79 
     80 /* 12.4 fixed-point */
     81 static unsigned si_pack_float_12p4(float x)
     82 {
     83 	return x <= 0    ? 0 :
     84 	       x >= 4096 ? 0xffff : x * 16;
     85 }
     86 
     87 /*
     88  * Inferred framebuffer and blender state.
     89  *
     90  * CB_TARGET_MASK is emitted here to avoid a hang with dual source blending
     91  * if there is not enough PS outputs.
     92  */
     93 static void si_emit_cb_render_state(struct si_context *sctx, struct r600_atom *atom)
     94 {
     95 	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
     96 	struct si_state_blend *blend = sctx->queued.named.blend;
     97 	/* CB_COLORn_INFO.FORMAT=INVALID should disable unbound colorbuffers,
     98 	 * but you never know. */
     99 	uint32_t cb_target_mask = sctx->framebuffer.colorbuf_enabled_4bit;
    100 	unsigned i;
    101 
    102 	if (blend)
    103 		cb_target_mask &= blend->cb_target_mask;
    104 
    105 	/* Avoid a hang that happens when dual source blending is enabled
    106 	 * but there is not enough color outputs. This is undefined behavior,
    107 	 * so disable color writes completely.
    108 	 *
    109 	 * Reproducible with Unigine Heaven 4.0 and drirc missing.
    110 	 */
    111 	if (blend && blend->dual_src_blend &&
    112 	    sctx->ps_shader.cso &&
    113 	    (sctx->ps_shader.cso->info.colors_written & 0x3) != 0x3)
    114 		cb_target_mask = 0;
    115 
    116 	radeon_set_context_reg(cs, R_028238_CB_TARGET_MASK, cb_target_mask);
    117 
    118 	/* STONEY-specific register settings. */
    119 	if (sctx->b.family == CHIP_STONEY) {
    120 		unsigned spi_shader_col_format =
    121 			sctx->ps_shader.cso ?
    122 			sctx->ps_shader.current->key.part.ps.epilog.spi_shader_col_format : 0;
    123 		unsigned sx_ps_downconvert = 0;
    124 		unsigned sx_blend_opt_epsilon = 0;
    125 		unsigned sx_blend_opt_control = 0;
    126 
    127 		for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) {
    128 			struct r600_surface *surf =
    129 				(struct r600_surface*)sctx->framebuffer.state.cbufs[i];
    130 			unsigned format, swap, spi_format, colormask;
    131 			bool has_alpha, has_rgb;
    132 
    133 			if (!surf)
    134 				continue;
    135 
    136 			format = G_028C70_FORMAT(surf->cb_color_info);
    137 			swap = G_028C70_COMP_SWAP(surf->cb_color_info);
    138 			spi_format = (spi_shader_col_format >> (i * 4)) & 0xf;
    139 			colormask = (cb_target_mask >> (i * 4)) & 0xf;
    140 
    141 			/* Set if RGB and A are present. */
    142 			has_alpha = !G_028C74_FORCE_DST_ALPHA_1(surf->cb_color_attrib);
    143 
    144 			if (format == V_028C70_COLOR_8 ||
    145 			    format == V_028C70_COLOR_16 ||
    146 			    format == V_028C70_COLOR_32)
    147 				has_rgb = !has_alpha;
    148 			else
    149 				has_rgb = true;
    150 
    151 			/* Check the colormask and export format. */
    152 			if (!(colormask & (PIPE_MASK_RGBA & ~PIPE_MASK_A)))
    153 				has_rgb = false;
    154 			if (!(colormask & PIPE_MASK_A))
    155 				has_alpha = false;
    156 
    157 			if (spi_format == V_028714_SPI_SHADER_ZERO) {
    158 				has_rgb = false;
    159 				has_alpha = false;
    160 			}
    161 
    162 			/* Disable value checking for disabled channels. */
    163 			if (!has_rgb)
    164 				sx_blend_opt_control |= S_02875C_MRT0_COLOR_OPT_DISABLE(1) << (i * 4);
    165 			if (!has_alpha)
    166 				sx_blend_opt_control |= S_02875C_MRT0_ALPHA_OPT_DISABLE(1) << (i * 4);
    167 
    168 			/* Enable down-conversion for 32bpp and smaller formats. */
    169 			switch (format) {
    170 			case V_028C70_COLOR_8:
    171 			case V_028C70_COLOR_8_8:
    172 			case V_028C70_COLOR_8_8_8_8:
    173 				/* For 1 and 2-channel formats, use the superset thereof. */
    174 				if (spi_format == V_028714_SPI_SHADER_FP16_ABGR ||
    175 				    spi_format == V_028714_SPI_SHADER_UINT16_ABGR ||
    176 				    spi_format == V_028714_SPI_SHADER_SINT16_ABGR) {
    177 					sx_ps_downconvert |= V_028754_SX_RT_EXPORT_8_8_8_8 << (i * 4);
    178 					sx_blend_opt_epsilon |= V_028758_8BIT_FORMAT << (i * 4);
    179 				}
    180 				break;
    181 
    182 			case V_028C70_COLOR_5_6_5:
    183 				if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
    184 					sx_ps_downconvert |= V_028754_SX_RT_EXPORT_5_6_5 << (i * 4);
    185 					sx_blend_opt_epsilon |= V_028758_6BIT_FORMAT << (i * 4);
    186 				}
    187 				break;
    188 
    189 			case V_028C70_COLOR_1_5_5_5:
    190 				if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
    191 					sx_ps_downconvert |= V_028754_SX_RT_EXPORT_1_5_5_5 << (i * 4);
    192 					sx_blend_opt_epsilon |= V_028758_5BIT_FORMAT << (i * 4);
    193 				}
    194 				break;
    195 
    196 			case V_028C70_COLOR_4_4_4_4:
    197 				if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
    198 					sx_ps_downconvert |= V_028754_SX_RT_EXPORT_4_4_4_4 << (i * 4);
    199 					sx_blend_opt_epsilon |= V_028758_4BIT_FORMAT << (i * 4);
    200 				}
    201 				break;
    202 
    203 			case V_028C70_COLOR_32:
    204 				if (swap == V_0280A0_SWAP_STD &&
    205 				    spi_format == V_028714_SPI_SHADER_32_R)
    206 					sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_R << (i * 4);
    207 				else if (swap == V_0280A0_SWAP_ALT_REV &&
    208 					 spi_format == V_028714_SPI_SHADER_32_AR)
    209 					sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_A << (i * 4);
    210 				break;
    211 
    212 			case V_028C70_COLOR_16:
    213 			case V_028C70_COLOR_16_16:
    214 				/* For 1-channel formats, use the superset thereof. */
    215 				if (spi_format == V_028714_SPI_SHADER_UNORM16_ABGR ||
    216 				    spi_format == V_028714_SPI_SHADER_SNORM16_ABGR ||
    217 				    spi_format == V_028714_SPI_SHADER_UINT16_ABGR ||
    218 				    spi_format == V_028714_SPI_SHADER_SINT16_ABGR) {
    219 					if (swap == V_0280A0_SWAP_STD ||
    220 					    swap == V_0280A0_SWAP_STD_REV)
    221 						sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_GR << (i * 4);
    222 					else
    223 						sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_AR << (i * 4);
    224 				}
    225 				break;
    226 
    227 			case V_028C70_COLOR_10_11_11:
    228 				if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
    229 					sx_ps_downconvert |= V_028754_SX_RT_EXPORT_10_11_11 << (i * 4);
    230 					sx_blend_opt_epsilon |= V_028758_11BIT_FORMAT << (i * 4);
    231 				}
    232 				break;
    233 
    234 			case V_028C70_COLOR_2_10_10_10:
    235 				if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
    236 					sx_ps_downconvert |= V_028754_SX_RT_EXPORT_2_10_10_10 << (i * 4);
    237 					sx_blend_opt_epsilon |= V_028758_10BIT_FORMAT << (i * 4);
    238 				}
    239 				break;
    240 			}
    241 		}
    242 
    243 		if (sctx->screen->b.debug_flags & DBG_NO_RB_PLUS) {
    244 			sx_ps_downconvert = 0;
    245 			sx_blend_opt_epsilon = 0;
    246 			sx_blend_opt_control = 0;
    247 		}
    248 
    249 		radeon_set_context_reg_seq(cs, R_028754_SX_PS_DOWNCONVERT, 3);
    250 		radeon_emit(cs, sx_ps_downconvert);	/* R_028754_SX_PS_DOWNCONVERT */
    251 		radeon_emit(cs, sx_blend_opt_epsilon);	/* R_028758_SX_BLEND_OPT_EPSILON */
    252 		radeon_emit(cs, sx_blend_opt_control);	/* R_02875C_SX_BLEND_OPT_CONTROL */
    253 	}
    254 }
    255 
    256 /*
    257  * Blender functions
    258  */
    259 
    260 static uint32_t si_translate_blend_function(int blend_func)
    261 {
    262 	switch (blend_func) {
    263 	case PIPE_BLEND_ADD:
    264 		return V_028780_COMB_DST_PLUS_SRC;
    265 	case PIPE_BLEND_SUBTRACT:
    266 		return V_028780_COMB_SRC_MINUS_DST;
    267 	case PIPE_BLEND_REVERSE_SUBTRACT:
    268 		return V_028780_COMB_DST_MINUS_SRC;
    269 	case PIPE_BLEND_MIN:
    270 		return V_028780_COMB_MIN_DST_SRC;
    271 	case PIPE_BLEND_MAX:
    272 		return V_028780_COMB_MAX_DST_SRC;
    273 	default:
    274 		R600_ERR("Unknown blend function %d\n", blend_func);
    275 		assert(0);
    276 		break;
    277 	}
    278 	return 0;
    279 }
    280 
    281 static uint32_t si_translate_blend_factor(int blend_fact)
    282 {
    283 	switch (blend_fact) {
    284 	case PIPE_BLENDFACTOR_ONE:
    285 		return V_028780_BLEND_ONE;
    286 	case PIPE_BLENDFACTOR_SRC_COLOR:
    287 		return V_028780_BLEND_SRC_COLOR;
    288 	case PIPE_BLENDFACTOR_SRC_ALPHA:
    289 		return V_028780_BLEND_SRC_ALPHA;
    290 	case PIPE_BLENDFACTOR_DST_ALPHA:
    291 		return V_028780_BLEND_DST_ALPHA;
    292 	case PIPE_BLENDFACTOR_DST_COLOR:
    293 		return V_028780_BLEND_DST_COLOR;
    294 	case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
    295 		return V_028780_BLEND_SRC_ALPHA_SATURATE;
    296 	case PIPE_BLENDFACTOR_CONST_COLOR:
    297 		return V_028780_BLEND_CONSTANT_COLOR;
    298 	case PIPE_BLENDFACTOR_CONST_ALPHA:
    299 		return V_028780_BLEND_CONSTANT_ALPHA;
    300 	case PIPE_BLENDFACTOR_ZERO:
    301 		return V_028780_BLEND_ZERO;
    302 	case PIPE_BLENDFACTOR_INV_SRC_COLOR:
    303 		return V_028780_BLEND_ONE_MINUS_SRC_COLOR;
    304 	case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
    305 		return V_028780_BLEND_ONE_MINUS_SRC_ALPHA;
    306 	case PIPE_BLENDFACTOR_INV_DST_ALPHA:
    307 		return V_028780_BLEND_ONE_MINUS_DST_ALPHA;
    308 	case PIPE_BLENDFACTOR_INV_DST_COLOR:
    309 		return V_028780_BLEND_ONE_MINUS_DST_COLOR;
    310 	case PIPE_BLENDFACTOR_INV_CONST_COLOR:
    311 		return V_028780_BLEND_ONE_MINUS_CONSTANT_COLOR;
    312 	case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
    313 		return V_028780_BLEND_ONE_MINUS_CONSTANT_ALPHA;
    314 	case PIPE_BLENDFACTOR_SRC1_COLOR:
    315 		return V_028780_BLEND_SRC1_COLOR;
    316 	case PIPE_BLENDFACTOR_SRC1_ALPHA:
    317 		return V_028780_BLEND_SRC1_ALPHA;
    318 	case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
    319 		return V_028780_BLEND_INV_SRC1_COLOR;
    320 	case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
    321 		return V_028780_BLEND_INV_SRC1_ALPHA;
    322 	default:
    323 		R600_ERR("Bad blend factor %d not supported!\n", blend_fact);
    324 		assert(0);
    325 		break;
    326 	}
    327 	return 0;
    328 }
    329 
    330 static uint32_t si_translate_blend_opt_function(int blend_func)
    331 {
    332 	switch (blend_func) {
    333 	case PIPE_BLEND_ADD:
    334 		return V_028760_OPT_COMB_ADD;
    335 	case PIPE_BLEND_SUBTRACT:
    336 		return V_028760_OPT_COMB_SUBTRACT;
    337 	case PIPE_BLEND_REVERSE_SUBTRACT:
    338 		return V_028760_OPT_COMB_REVSUBTRACT;
    339 	case PIPE_BLEND_MIN:
    340 		return V_028760_OPT_COMB_MIN;
    341 	case PIPE_BLEND_MAX:
    342 		return V_028760_OPT_COMB_MAX;
    343 	default:
    344 		return V_028760_OPT_COMB_BLEND_DISABLED;
    345 	}
    346 }
    347 
    348 static uint32_t si_translate_blend_opt_factor(int blend_fact, bool is_alpha)
    349 {
    350 	switch (blend_fact) {
    351 	case PIPE_BLENDFACTOR_ZERO:
    352 		return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_ALL;
    353 	case PIPE_BLENDFACTOR_ONE:
    354 		return V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE;
    355 	case PIPE_BLENDFACTOR_SRC_COLOR:
    356 		return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0
    357 				: V_028760_BLEND_OPT_PRESERVE_C1_IGNORE_C0;
    358 	case PIPE_BLENDFACTOR_INV_SRC_COLOR:
    359 		return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1
    360 				: V_028760_BLEND_OPT_PRESERVE_C0_IGNORE_C1;
    361 	case PIPE_BLENDFACTOR_SRC_ALPHA:
    362 		return V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0;
    363 	case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
    364 		return V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1;
    365 	case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
    366 		return is_alpha ? V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE
    367 				: V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0;
    368 	default:
    369 		return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;
    370 	}
    371 }
    372 
    373 /**
    374  * Get rid of DST in the blend factors by commuting the operands:
    375  *    func(src * DST, dst * 0) ---> func(src * 0, dst * SRC)
    376  */
    377 static void si_blend_remove_dst(unsigned *func, unsigned *src_factor,
    378 				unsigned *dst_factor, unsigned expected_dst,
    379 				unsigned replacement_src)
    380 {
    381 	if (*src_factor == expected_dst &&
    382 	    *dst_factor == PIPE_BLENDFACTOR_ZERO) {
    383 		*src_factor = PIPE_BLENDFACTOR_ZERO;
    384 		*dst_factor = replacement_src;
    385 
    386 		/* Commuting the operands requires reversing subtractions. */
    387 		if (*func == PIPE_BLEND_SUBTRACT)
    388 			*func = PIPE_BLEND_REVERSE_SUBTRACT;
    389 		else if (*func == PIPE_BLEND_REVERSE_SUBTRACT)
    390 			*func = PIPE_BLEND_SUBTRACT;
    391 	}
    392 }
    393 
    394 static bool si_blend_factor_uses_dst(unsigned factor)
    395 {
    396 	return factor == PIPE_BLENDFACTOR_DST_COLOR ||
    397 		factor == PIPE_BLENDFACTOR_DST_ALPHA ||
    398 		factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
    399 		factor == PIPE_BLENDFACTOR_INV_DST_ALPHA ||
    400 		factor == PIPE_BLENDFACTOR_INV_DST_COLOR;
    401 }
    402 
    403 static void *si_create_blend_state_mode(struct pipe_context *ctx,
    404 					const struct pipe_blend_state *state,
    405 					unsigned mode)
    406 {
    407 	struct si_context *sctx = (struct si_context*)ctx;
    408 	struct si_state_blend *blend = CALLOC_STRUCT(si_state_blend);
    409 	struct si_pm4_state *pm4 = &blend->pm4;
    410 	uint32_t sx_mrt_blend_opt[8] = {0};
    411 	uint32_t color_control = 0;
    412 
    413 	if (!blend)
    414 		return NULL;
    415 
    416 	blend->alpha_to_coverage = state->alpha_to_coverage;
    417 	blend->alpha_to_one = state->alpha_to_one;
    418 	blend->dual_src_blend = util_blend_state_is_dual(state, 0);
    419 
    420 	if (state->logicop_enable) {
    421 		color_control |= S_028808_ROP3(state->logicop_func | (state->logicop_func << 4));
    422 	} else {
    423 		color_control |= S_028808_ROP3(0xcc);
    424 	}
    425 
    426 	si_pm4_set_reg(pm4, R_028B70_DB_ALPHA_TO_MASK,
    427 		       S_028B70_ALPHA_TO_MASK_ENABLE(state->alpha_to_coverage) |
    428 		       S_028B70_ALPHA_TO_MASK_OFFSET0(2) |
    429 		       S_028B70_ALPHA_TO_MASK_OFFSET1(2) |
    430 		       S_028B70_ALPHA_TO_MASK_OFFSET2(2) |
    431 		       S_028B70_ALPHA_TO_MASK_OFFSET3(2));
    432 
    433 	if (state->alpha_to_coverage)
    434 		blend->need_src_alpha_4bit |= 0xf;
    435 
    436 	blend->cb_target_mask = 0;
    437 	for (int i = 0; i < 8; i++) {
    438 		/* state->rt entries > 0 only written if independent blending */
    439 		const int j = state->independent_blend_enable ? i : 0;
    440 
    441 		unsigned eqRGB = state->rt[j].rgb_func;
    442 		unsigned srcRGB = state->rt[j].rgb_src_factor;
    443 		unsigned dstRGB = state->rt[j].rgb_dst_factor;
    444 		unsigned eqA = state->rt[j].alpha_func;
    445 		unsigned srcA = state->rt[j].alpha_src_factor;
    446 		unsigned dstA = state->rt[j].alpha_dst_factor;
    447 
    448 		unsigned srcRGB_opt, dstRGB_opt, srcA_opt, dstA_opt;
    449 		unsigned blend_cntl = 0;
    450 
    451 		sx_mrt_blend_opt[i] =
    452 			S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) |
    453 			S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED);
    454 
    455 		/* Only set dual source blending for MRT0 to avoid a hang. */
    456 		if (i >= 1 && blend->dual_src_blend) {
    457 			/* Vulkan does this for dual source blending. */
    458 			if (i == 1)
    459 				blend_cntl |= S_028780_ENABLE(1);
    460 
    461 			si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
    462 			continue;
    463 		}
    464 
    465 		/* Only addition and subtraction equations are supported with
    466 		 * dual source blending.
    467 		 */
    468 		if (blend->dual_src_blend &&
    469 		    (eqRGB == PIPE_BLEND_MIN || eqRGB == PIPE_BLEND_MAX ||
    470 		     eqA == PIPE_BLEND_MIN || eqA == PIPE_BLEND_MAX)) {
    471 			assert(!"Unsupported equation for dual source blending");
    472 			si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
    473 			continue;
    474 		}
    475 
    476 		/* cb_render_state will disable unused ones */
    477 		blend->cb_target_mask |= (unsigned)state->rt[j].colormask << (4 * i);
    478 
    479 		if (!state->rt[j].colormask || !state->rt[j].blend_enable) {
    480 			si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
    481 			continue;
    482 		}
    483 
    484 		/* Blending optimizations for Stoney.
    485 		 * These transformations don't change the behavior.
    486 		 *
    487 		 * First, get rid of DST in the blend factors:
    488 		 *    func(src * DST, dst * 0) ---> func(src * 0, dst * SRC)
    489 		 */
    490 		si_blend_remove_dst(&eqRGB, &srcRGB, &dstRGB,
    491 				    PIPE_BLENDFACTOR_DST_COLOR,
    492 				    PIPE_BLENDFACTOR_SRC_COLOR);
    493 		si_blend_remove_dst(&eqA, &srcA, &dstA,
    494 				    PIPE_BLENDFACTOR_DST_COLOR,
    495 				    PIPE_BLENDFACTOR_SRC_COLOR);
    496 		si_blend_remove_dst(&eqA, &srcA, &dstA,
    497 				    PIPE_BLENDFACTOR_DST_ALPHA,
    498 				    PIPE_BLENDFACTOR_SRC_ALPHA);
    499 
    500 		/* Look up the ideal settings from tables. */
    501 		srcRGB_opt = si_translate_blend_opt_factor(srcRGB, false);
    502 		dstRGB_opt = si_translate_blend_opt_factor(dstRGB, false);
    503 		srcA_opt = si_translate_blend_opt_factor(srcA, true);
    504 		dstA_opt = si_translate_blend_opt_factor(dstA, true);
    505 
    506 		/* Handle interdependencies. */
    507 		if (si_blend_factor_uses_dst(srcRGB))
    508 			dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;
    509 		if (si_blend_factor_uses_dst(srcA))
    510 			dstA_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;
    511 
    512 		if (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE &&
    513 		    (dstRGB == PIPE_BLENDFACTOR_ZERO ||
    514 		     dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
    515 		     dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE))
    516 			dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0;
    517 
    518 		/* Set the final value. */
    519 		sx_mrt_blend_opt[i] =
    520 			S_028760_COLOR_SRC_OPT(srcRGB_opt) |
    521 			S_028760_COLOR_DST_OPT(dstRGB_opt) |
    522 			S_028760_COLOR_COMB_FCN(si_translate_blend_opt_function(eqRGB)) |
    523 			S_028760_ALPHA_SRC_OPT(srcA_opt) |
    524 			S_028760_ALPHA_DST_OPT(dstA_opt) |
    525 			S_028760_ALPHA_COMB_FCN(si_translate_blend_opt_function(eqA));
    526 
    527 		/* Set blend state. */
    528 		blend_cntl |= S_028780_ENABLE(1);
    529 		blend_cntl |= S_028780_COLOR_COMB_FCN(si_translate_blend_function(eqRGB));
    530 		blend_cntl |= S_028780_COLOR_SRCBLEND(si_translate_blend_factor(srcRGB));
    531 		blend_cntl |= S_028780_COLOR_DESTBLEND(si_translate_blend_factor(dstRGB));
    532 
    533 		if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) {
    534 			blend_cntl |= S_028780_SEPARATE_ALPHA_BLEND(1);
    535 			blend_cntl |= S_028780_ALPHA_COMB_FCN(si_translate_blend_function(eqA));
    536 			blend_cntl |= S_028780_ALPHA_SRCBLEND(si_translate_blend_factor(srcA));
    537 			blend_cntl |= S_028780_ALPHA_DESTBLEND(si_translate_blend_factor(dstA));
    538 		}
    539 		si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
    540 
    541 		blend->blend_enable_4bit |= 0xfu << (i * 4);
    542 
    543 		/* This is only important for formats without alpha. */
    544 		if (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
    545 		    dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
    546 		    srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
    547 		    dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
    548 		    srcRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
    549 		    dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA)
    550 			blend->need_src_alpha_4bit |= 0xfu << (i * 4);
    551 	}
    552 
    553 	if (blend->cb_target_mask) {
    554 		color_control |= S_028808_MODE(mode);
    555 	} else {
    556 		color_control |= S_028808_MODE(V_028808_CB_DISABLE);
    557 	}
    558 
    559 	if (sctx->b.family == CHIP_STONEY) {
    560 		/* Disable RB+ blend optimizations for dual source blending.
    561 		 * Vulkan does this.
    562 		 */
    563 		if (blend->dual_src_blend) {
    564 			for (int i = 0; i < 8; i++) {
    565 				sx_mrt_blend_opt[i] =
    566 					S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_NONE) |
    567 					S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_NONE);
    568 			}
    569 		}
    570 
    571 		for (int i = 0; i < 8; i++)
    572 			si_pm4_set_reg(pm4, R_028760_SX_MRT0_BLEND_OPT + i * 4,
    573 				       sx_mrt_blend_opt[i]);
    574 
    575 		/* RB+ doesn't work with dual source blending, logic op, and RESOLVE. */
    576 		if (blend->dual_src_blend || state->logicop_enable ||
    577 		    mode == V_028808_CB_RESOLVE)
    578 			color_control |= S_028808_DISABLE_DUAL_QUAD(1);
    579 	}
    580 
    581 	si_pm4_set_reg(pm4, R_028808_CB_COLOR_CONTROL, color_control);
    582 	return blend;
    583 }
    584 
    585 static void *si_create_blend_state(struct pipe_context *ctx,
    586 				   const struct pipe_blend_state *state)
    587 {
    588 	return si_create_blend_state_mode(ctx, state, V_028808_CB_NORMAL);
    589 }
    590 
    591 static void si_bind_blend_state(struct pipe_context *ctx, void *state)
    592 {
    593 	struct si_context *sctx = (struct si_context *)ctx;
    594 	si_pm4_bind_state(sctx, blend, (struct si_state_blend *)state);
    595 	si_mark_atom_dirty(sctx, &sctx->cb_render_state);
    596 	sctx->do_update_shaders = true;
    597 }
    598 
    599 static void si_delete_blend_state(struct pipe_context *ctx, void *state)
    600 {
    601 	struct si_context *sctx = (struct si_context *)ctx;
    602 	si_pm4_delete_state(sctx, blend, (struct si_state_blend *)state);
    603 }
    604 
    605 static void si_set_blend_color(struct pipe_context *ctx,
    606 			       const struct pipe_blend_color *state)
    607 {
    608 	struct si_context *sctx = (struct si_context *)ctx;
    609 
    610 	if (memcmp(&sctx->blend_color.state, state, sizeof(*state)) == 0)
    611 		return;
    612 
    613 	sctx->blend_color.state = *state;
    614 	si_mark_atom_dirty(sctx, &sctx->blend_color.atom);
    615 }
    616 
    617 static void si_emit_blend_color(struct si_context *sctx, struct r600_atom *atom)
    618 {
    619 	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
    620 
    621 	radeon_set_context_reg_seq(cs, R_028414_CB_BLEND_RED, 4);
    622 	radeon_emit_array(cs, (uint32_t*)sctx->blend_color.state.color, 4);
    623 }
    624 
    625 /*
    626  * Clipping
    627  */
    628 
    629 static void si_set_clip_state(struct pipe_context *ctx,
    630 			      const struct pipe_clip_state *state)
    631 {
    632 	struct si_context *sctx = (struct si_context *)ctx;
    633 	struct pipe_constant_buffer cb;
    634 
    635 	if (memcmp(&sctx->clip_state.state, state, sizeof(*state)) == 0)
    636 		return;
    637 
    638 	sctx->clip_state.state = *state;
    639 	si_mark_atom_dirty(sctx, &sctx->clip_state.atom);
    640 
    641 	cb.buffer = NULL;
    642 	cb.user_buffer = state->ucp;
    643 	cb.buffer_offset = 0;
    644 	cb.buffer_size = 4*4*8;
    645 	si_set_rw_buffer(sctx, SI_VS_CONST_CLIP_PLANES, &cb);
    646 	pipe_resource_reference(&cb.buffer, NULL);
    647 }
    648 
    649 static void si_emit_clip_state(struct si_context *sctx, struct r600_atom *atom)
    650 {
    651 	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
    652 
    653 	radeon_set_context_reg_seq(cs, R_0285BC_PA_CL_UCP_0_X, 6*4);
    654 	radeon_emit_array(cs, (uint32_t*)sctx->clip_state.state.ucp, 6*4);
    655 }
    656 
    657 #define SIX_BITS 0x3F
    658 
    659 static void si_emit_clip_regs(struct si_context *sctx, struct r600_atom *atom)
    660 {
    661 	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
    662 	struct si_shader *vs = si_get_vs_state(sctx);
    663 	struct tgsi_shader_info *info = si_get_vs_info(sctx);
    664 	struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
    665 	unsigned window_space =
    666 	   info->properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION];
    667 	unsigned clipdist_mask =
    668 		info->writes_clipvertex ? SIX_BITS : info->clipdist_writemask;
    669 	unsigned ucp_mask = clipdist_mask ? 0 : rs->clip_plane_enable & SIX_BITS;
    670 	unsigned culldist_mask = info->culldist_writemask << info->num_written_clipdistance;
    671 	unsigned total_mask;
    672 	bool misc_vec_ena;
    673 
    674 	if (vs->key.opt.hw_vs.clip_disable) {
    675 		assert(!info->culldist_writemask);
    676 		clipdist_mask = 0;
    677 		culldist_mask = 0;
    678 	}
    679 	total_mask = clipdist_mask | culldist_mask;
    680 
    681 	/* Clip distances on points have no effect, so need to be implemented
    682 	 * as cull distances. This applies for the clipvertex case as well.
    683 	 *
    684 	 * Setting this for primitives other than points should have no adverse
    685 	 * effects.
    686 	 */
    687 	clipdist_mask &= rs->clip_plane_enable;
    688 	culldist_mask |= clipdist_mask;
    689 
    690 	misc_vec_ena = info->writes_psize || info->writes_edgeflag ||
    691 		       info->writes_layer || info->writes_viewport_index;
    692 
    693 	radeon_set_context_reg(cs, R_02881C_PA_CL_VS_OUT_CNTL,
    694 		S_02881C_USE_VTX_POINT_SIZE(info->writes_psize) |
    695 		S_02881C_USE_VTX_EDGE_FLAG(info->writes_edgeflag) |
    696 		S_02881C_USE_VTX_RENDER_TARGET_INDX(info->writes_layer) |
    697 	        S_02881C_USE_VTX_VIEWPORT_INDX(info->writes_viewport_index) |
    698 		S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0F) != 0) |
    699 		S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xF0) != 0) |
    700 		S_02881C_VS_OUT_MISC_VEC_ENA(misc_vec_ena) |
    701 		S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(misc_vec_ena) |
    702 		clipdist_mask | (culldist_mask << 8));
    703 	radeon_set_context_reg(cs, R_028810_PA_CL_CLIP_CNTL,
    704 		rs->pa_cl_clip_cntl |
    705 		ucp_mask |
    706 		S_028810_CLIP_DISABLE(window_space));
    707 
    708 	/* reuse needs to be set off if we write oViewport */
    709 	radeon_set_context_reg(cs, R_028AB4_VGT_REUSE_OFF,
    710 			       S_028AB4_REUSE_OFF(info->writes_viewport_index));
    711 }
    712 
    713 /*
    714  * inferred state between framebuffer and rasterizer
    715  */
    716 static void si_update_poly_offset_state(struct si_context *sctx)
    717 {
    718 	struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
    719 
    720 	if (!rs || !rs->uses_poly_offset || !sctx->framebuffer.state.zsbuf) {
    721 		si_pm4_bind_state(sctx, poly_offset, NULL);
    722 		return;
    723 	}
    724 
    725 	/* Use the user format, not db_render_format, so that the polygon
    726 	 * offset behaves as expected by applications.
    727 	 */
    728 	switch (sctx->framebuffer.state.zsbuf->texture->format) {
    729 	case PIPE_FORMAT_Z16_UNORM:
    730 		si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[0]);
    731 		break;
    732 	default: /* 24-bit */
    733 		si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[1]);
    734 		break;
    735 	case PIPE_FORMAT_Z32_FLOAT:
    736 	case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
    737 		si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[2]);
    738 		break;
    739 	}
    740 }
    741 
    742 /*
    743  * Rasterizer
    744  */
    745 
    746 static uint32_t si_translate_fill(uint32_t func)
    747 {
    748 	switch(func) {
    749 	case PIPE_POLYGON_MODE_FILL:
    750 		return V_028814_X_DRAW_TRIANGLES;
    751 	case PIPE_POLYGON_MODE_LINE:
    752 		return V_028814_X_DRAW_LINES;
    753 	case PIPE_POLYGON_MODE_POINT:
    754 		return V_028814_X_DRAW_POINTS;
    755 	default:
    756 		assert(0);
    757 		return V_028814_X_DRAW_POINTS;
    758 	}
    759 }
    760 
    761 static void *si_create_rs_state(struct pipe_context *ctx,
    762 				const struct pipe_rasterizer_state *state)
    763 {
    764 	struct si_state_rasterizer *rs = CALLOC_STRUCT(si_state_rasterizer);
    765 	struct si_pm4_state *pm4 = &rs->pm4;
    766 	unsigned tmp, i;
    767 	float psize_min, psize_max;
    768 
    769 	if (!rs) {
    770 		return NULL;
    771 	}
    772 
    773 	rs->scissor_enable = state->scissor;
    774 	rs->clip_halfz = state->clip_halfz;
    775 	rs->two_side = state->light_twoside;
    776 	rs->multisample_enable = state->multisample;
    777 	rs->force_persample_interp = state->force_persample_interp;
    778 	rs->clip_plane_enable = state->clip_plane_enable;
    779 	rs->line_stipple_enable = state->line_stipple_enable;
    780 	rs->poly_stipple_enable = state->poly_stipple_enable;
    781 	rs->line_smooth = state->line_smooth;
    782 	rs->poly_smooth = state->poly_smooth;
    783 	rs->uses_poly_offset = state->offset_point || state->offset_line ||
    784 			       state->offset_tri;
    785 	rs->clamp_fragment_color = state->clamp_fragment_color;
    786 	rs->flatshade = state->flatshade;
    787 	rs->sprite_coord_enable = state->sprite_coord_enable;
    788 	rs->rasterizer_discard = state->rasterizer_discard;
    789 	rs->pa_sc_line_stipple = state->line_stipple_enable ?
    790 				S_028A0C_LINE_PATTERN(state->line_stipple_pattern) |
    791 				S_028A0C_REPEAT_COUNT(state->line_stipple_factor) : 0;
    792 	rs->pa_cl_clip_cntl =
    793 		S_028810_DX_CLIP_SPACE_DEF(state->clip_halfz) |
    794 		S_028810_ZCLIP_NEAR_DISABLE(!state->depth_clip) |
    795 		S_028810_ZCLIP_FAR_DISABLE(!state->depth_clip) |
    796 		S_028810_DX_RASTERIZATION_KILL(state->rasterizer_discard) |
    797 		S_028810_DX_LINEAR_ATTR_CLIP_ENA(1);
    798 
    799 	si_pm4_set_reg(pm4, R_0286D4_SPI_INTERP_CONTROL_0,
    800 		S_0286D4_FLAT_SHADE_ENA(1) |
    801 		S_0286D4_PNT_SPRITE_ENA(1) |
    802 		S_0286D4_PNT_SPRITE_OVRD_X(V_0286D4_SPI_PNT_SPRITE_SEL_S) |
    803 		S_0286D4_PNT_SPRITE_OVRD_Y(V_0286D4_SPI_PNT_SPRITE_SEL_T) |
    804 		S_0286D4_PNT_SPRITE_OVRD_Z(V_0286D4_SPI_PNT_SPRITE_SEL_0) |
    805 		S_0286D4_PNT_SPRITE_OVRD_W(V_0286D4_SPI_PNT_SPRITE_SEL_1) |
    806 		S_0286D4_PNT_SPRITE_TOP_1(state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT));
    807 
    808 	/* point size 12.4 fixed point */
    809 	tmp = (unsigned)(state->point_size * 8.0);
    810 	si_pm4_set_reg(pm4, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp));
    811 
    812 	if (state->point_size_per_vertex) {
    813 		psize_min = util_get_min_point_size(state);
    814 		psize_max = 8192;
    815 	} else {
    816 		/* Force the point size to be as if the vertex output was disabled. */
    817 		psize_min = state->point_size;
    818 		psize_max = state->point_size;
    819 	}
    820 	/* Divide by two, because 0.5 = 1 pixel. */
    821 	si_pm4_set_reg(pm4, R_028A04_PA_SU_POINT_MINMAX,
    822 			S_028A04_MIN_SIZE(si_pack_float_12p4(psize_min/2)) |
    823 			S_028A04_MAX_SIZE(si_pack_float_12p4(psize_max/2)));
    824 
    825 	tmp = (unsigned)state->line_width * 8;
    826 	si_pm4_set_reg(pm4, R_028A08_PA_SU_LINE_CNTL, S_028A08_WIDTH(tmp));
    827 	si_pm4_set_reg(pm4, R_028A48_PA_SC_MODE_CNTL_0,
    828 		       S_028A48_LINE_STIPPLE_ENABLE(state->line_stipple_enable) |
    829 		       S_028A48_MSAA_ENABLE(state->multisample ||
    830 					    state->poly_smooth ||
    831 					    state->line_smooth) |
    832 		       S_028A48_VPORT_SCISSOR_ENABLE(1));
    833 
    834 	si_pm4_set_reg(pm4, R_028BE4_PA_SU_VTX_CNTL,
    835 		       S_028BE4_PIX_CENTER(state->half_pixel_center) |
    836 		       S_028BE4_QUANT_MODE(V_028BE4_X_16_8_FIXED_POINT_1_256TH));
    837 
    838 	si_pm4_set_reg(pm4, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, fui(state->offset_clamp));
    839 	si_pm4_set_reg(pm4, R_028814_PA_SU_SC_MODE_CNTL,
    840 		S_028814_PROVOKING_VTX_LAST(!state->flatshade_first) |
    841 		S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) |
    842 		S_028814_CULL_BACK((state->cull_face & PIPE_FACE_BACK) ? 1 : 0) |
    843 		S_028814_FACE(!state->front_ccw) |
    844 		S_028814_POLY_OFFSET_FRONT_ENABLE(util_get_offset(state, state->fill_front)) |
    845 		S_028814_POLY_OFFSET_BACK_ENABLE(util_get_offset(state, state->fill_back)) |
    846 		S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_point || state->offset_line) |
    847 		S_028814_POLY_MODE(state->fill_front != PIPE_POLYGON_MODE_FILL ||
    848 				   state->fill_back != PIPE_POLYGON_MODE_FILL) |
    849 		S_028814_POLYMODE_FRONT_PTYPE(si_translate_fill(state->fill_front)) |
    850 		S_028814_POLYMODE_BACK_PTYPE(si_translate_fill(state->fill_back)));
    851 	si_pm4_set_reg(pm4, R_00B130_SPI_SHADER_USER_DATA_VS_0 +
    852 		       SI_SGPR_VS_STATE_BITS * 4, state->clamp_vertex_color);
    853 
    854 	/* Precalculate polygon offset states for 16-bit, 24-bit, and 32-bit zbuffers. */
    855 	for (i = 0; i < 3; i++) {
    856 		struct si_pm4_state *pm4 = &rs->pm4_poly_offset[i];
    857 		float offset_units = state->offset_units;
    858 		float offset_scale = state->offset_scale * 16.0f;
    859 		uint32_t pa_su_poly_offset_db_fmt_cntl = 0;
    860 
    861 		if (!state->offset_units_unscaled) {
    862 			switch (i) {
    863 			case 0: /* 16-bit zbuffer */
    864 				offset_units *= 4.0f;
    865 				pa_su_poly_offset_db_fmt_cntl =
    866 					S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
    867 				break;
    868 			case 1: /* 24-bit zbuffer */
    869 				offset_units *= 2.0f;
    870 				pa_su_poly_offset_db_fmt_cntl =
    871 					S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
    872 				break;
    873 			case 2: /* 32-bit zbuffer */
    874 				offset_units *= 1.0f;
    875 				pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) |
    876 								S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
    877 				break;
    878 			}
    879 		}
    880 
    881 		si_pm4_set_reg(pm4, R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE,
    882 			       fui(offset_scale));
    883 		si_pm4_set_reg(pm4, R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET,
    884 			       fui(offset_units));
    885 		si_pm4_set_reg(pm4, R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE,
    886 			       fui(offset_scale));
    887 		si_pm4_set_reg(pm4, R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET,
    888 			       fui(offset_units));
    889 		si_pm4_set_reg(pm4, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
    890 			       pa_su_poly_offset_db_fmt_cntl);
    891 	}
    892 
    893 	return rs;
    894 }
    895 
    896 static void si_bind_rs_state(struct pipe_context *ctx, void *state)
    897 {
    898 	struct si_context *sctx = (struct si_context *)ctx;
    899 	struct si_state_rasterizer *old_rs =
    900 		(struct si_state_rasterizer*)sctx->queued.named.rasterizer;
    901 	struct si_state_rasterizer *rs = (struct si_state_rasterizer *)state;
    902 
    903 	if (!state)
    904 		return;
    905 
    906 	if (!old_rs || old_rs->multisample_enable != rs->multisample_enable) {
    907 		si_mark_atom_dirty(sctx, &sctx->db_render_state);
    908 
    909 		/* Update the small primitive filter workaround if necessary. */
    910 		if (sctx->b.family >= CHIP_POLARIS10 &&
    911 		    sctx->framebuffer.nr_samples > 1)
    912 			si_mark_atom_dirty(sctx, &sctx->msaa_sample_locs.atom);
    913 	}
    914 
    915 	r600_viewport_set_rast_deps(&sctx->b, rs->scissor_enable, rs->clip_halfz);
    916 
    917 	si_pm4_bind_state(sctx, rasterizer, rs);
    918 	si_update_poly_offset_state(sctx);
    919 
    920 	si_mark_atom_dirty(sctx, &sctx->clip_regs);
    921 	sctx->do_update_shaders = true;
    922 }
    923 
    924 static void si_delete_rs_state(struct pipe_context *ctx, void *state)
    925 {
    926 	struct si_context *sctx = (struct si_context *)ctx;
    927 
    928 	if (sctx->queued.named.rasterizer == state)
    929 		si_pm4_bind_state(sctx, poly_offset, NULL);
    930 	si_pm4_delete_state(sctx, rasterizer, (struct si_state_rasterizer *)state);
    931 }
    932 
    933 /*
    934  * infeered state between dsa and stencil ref
    935  */
    936 static void si_emit_stencil_ref(struct si_context *sctx, struct r600_atom *atom)
    937 {
    938 	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
    939 	struct pipe_stencil_ref *ref = &sctx->stencil_ref.state;
    940 	struct si_dsa_stencil_ref_part *dsa = &sctx->stencil_ref.dsa_part;
    941 
    942 	radeon_set_context_reg_seq(cs, R_028430_DB_STENCILREFMASK, 2);
    943 	radeon_emit(cs, S_028430_STENCILTESTVAL(ref->ref_value[0]) |
    944 			S_028430_STENCILMASK(dsa->valuemask[0]) |
    945 			S_028430_STENCILWRITEMASK(dsa->writemask[0]) |
    946 			S_028430_STENCILOPVAL(1));
    947 	radeon_emit(cs, S_028434_STENCILTESTVAL_BF(ref->ref_value[1]) |
    948 			S_028434_STENCILMASK_BF(dsa->valuemask[1]) |
    949 			S_028434_STENCILWRITEMASK_BF(dsa->writemask[1]) |
    950 			S_028434_STENCILOPVAL_BF(1));
    951 }
    952 
    953 static void si_set_stencil_ref(struct pipe_context *ctx,
    954 			       const struct pipe_stencil_ref *state)
    955 {
    956         struct si_context *sctx = (struct si_context *)ctx;
    957 
    958 	if (memcmp(&sctx->stencil_ref.state, state, sizeof(*state)) == 0)
    959 		return;
    960 
    961 	sctx->stencil_ref.state = *state;
    962 	si_mark_atom_dirty(sctx, &sctx->stencil_ref.atom);
    963 }
    964 
    965 
    966 /*
    967  * DSA
    968  */
    969 
    970 static uint32_t si_translate_stencil_op(int s_op)
    971 {
    972 	switch (s_op) {
    973 	case PIPE_STENCIL_OP_KEEP:
    974 		return V_02842C_STENCIL_KEEP;
    975 	case PIPE_STENCIL_OP_ZERO:
    976 		return V_02842C_STENCIL_ZERO;
    977 	case PIPE_STENCIL_OP_REPLACE:
    978 		return V_02842C_STENCIL_REPLACE_TEST;
    979 	case PIPE_STENCIL_OP_INCR:
    980 		return V_02842C_STENCIL_ADD_CLAMP;
    981 	case PIPE_STENCIL_OP_DECR:
    982 		return V_02842C_STENCIL_SUB_CLAMP;
    983 	case PIPE_STENCIL_OP_INCR_WRAP:
    984 		return V_02842C_STENCIL_ADD_WRAP;
    985 	case PIPE_STENCIL_OP_DECR_WRAP:
    986 		return V_02842C_STENCIL_SUB_WRAP;
    987 	case PIPE_STENCIL_OP_INVERT:
    988 		return V_02842C_STENCIL_INVERT;
    989 	default:
    990 		R600_ERR("Unknown stencil op %d", s_op);
    991 		assert(0);
    992 		break;
    993 	}
    994 	return 0;
    995 }
    996 
    997 static void *si_create_dsa_state(struct pipe_context *ctx,
    998 				 const struct pipe_depth_stencil_alpha_state *state)
    999 {
   1000 	struct si_state_dsa *dsa = CALLOC_STRUCT(si_state_dsa);
   1001 	struct si_pm4_state *pm4 = &dsa->pm4;
   1002 	unsigned db_depth_control;
   1003 	uint32_t db_stencil_control = 0;
   1004 
   1005 	if (!dsa) {
   1006 		return NULL;
   1007 	}
   1008 
   1009 	dsa->stencil_ref.valuemask[0] = state->stencil[0].valuemask;
   1010 	dsa->stencil_ref.valuemask[1] = state->stencil[1].valuemask;
   1011 	dsa->stencil_ref.writemask[0] = state->stencil[0].writemask;
   1012 	dsa->stencil_ref.writemask[1] = state->stencil[1].writemask;
   1013 
   1014 	db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) |
   1015 		S_028800_Z_WRITE_ENABLE(state->depth.writemask) |
   1016 		S_028800_ZFUNC(state->depth.func) |
   1017 		S_028800_DEPTH_BOUNDS_ENABLE(state->depth.bounds_test);
   1018 
   1019 	/* stencil */
   1020 	if (state->stencil[0].enabled) {
   1021 		db_depth_control |= S_028800_STENCIL_ENABLE(1);
   1022 		db_depth_control |= S_028800_STENCILFUNC(state->stencil[0].func);
   1023 		db_stencil_control |= S_02842C_STENCILFAIL(si_translate_stencil_op(state->stencil[0].fail_op));
   1024 		db_stencil_control |= S_02842C_STENCILZPASS(si_translate_stencil_op(state->stencil[0].zpass_op));
   1025 		db_stencil_control |= S_02842C_STENCILZFAIL(si_translate_stencil_op(state->stencil[0].zfail_op));
   1026 
   1027 		if (state->stencil[1].enabled) {
   1028 			db_depth_control |= S_028800_BACKFACE_ENABLE(1);
   1029 			db_depth_control |= S_028800_STENCILFUNC_BF(state->stencil[1].func);
   1030 			db_stencil_control |= S_02842C_STENCILFAIL_BF(si_translate_stencil_op(state->stencil[1].fail_op));
   1031 			db_stencil_control |= S_02842C_STENCILZPASS_BF(si_translate_stencil_op(state->stencil[1].zpass_op));
   1032 			db_stencil_control |= S_02842C_STENCILZFAIL_BF(si_translate_stencil_op(state->stencil[1].zfail_op));
   1033 		}
   1034 	}
   1035 
   1036 	/* alpha */
   1037 	if (state->alpha.enabled) {
   1038 		dsa->alpha_func = state->alpha.func;
   1039 
   1040 		si_pm4_set_reg(pm4, R_00B030_SPI_SHADER_USER_DATA_PS_0 +
   1041 		               SI_SGPR_ALPHA_REF * 4, fui(state->alpha.ref_value));
   1042 	} else {
   1043 		dsa->alpha_func = PIPE_FUNC_ALWAYS;
   1044 	}
   1045 
   1046 	si_pm4_set_reg(pm4, R_028800_DB_DEPTH_CONTROL, db_depth_control);
   1047 	si_pm4_set_reg(pm4, R_02842C_DB_STENCIL_CONTROL, db_stencil_control);
   1048 	if (state->depth.bounds_test) {
   1049 		si_pm4_set_reg(pm4, R_028020_DB_DEPTH_BOUNDS_MIN, fui(state->depth.bounds_min));
   1050 		si_pm4_set_reg(pm4, R_028024_DB_DEPTH_BOUNDS_MAX, fui(state->depth.bounds_max));
   1051 	}
   1052 
   1053 	return dsa;
   1054 }
   1055 
   1056 static void si_bind_dsa_state(struct pipe_context *ctx, void *state)
   1057 {
   1058         struct si_context *sctx = (struct si_context *)ctx;
   1059         struct si_state_dsa *dsa = state;
   1060 
   1061         if (!state)
   1062                 return;
   1063 
   1064 	si_pm4_bind_state(sctx, dsa, dsa);
   1065 
   1066 	if (memcmp(&dsa->stencil_ref, &sctx->stencil_ref.dsa_part,
   1067 		   sizeof(struct si_dsa_stencil_ref_part)) != 0) {
   1068 		sctx->stencil_ref.dsa_part = dsa->stencil_ref;
   1069 		si_mark_atom_dirty(sctx, &sctx->stencil_ref.atom);
   1070 	}
   1071 	sctx->do_update_shaders = true;
   1072 }
   1073 
   1074 static void si_delete_dsa_state(struct pipe_context *ctx, void *state)
   1075 {
   1076 	struct si_context *sctx = (struct si_context *)ctx;
   1077 	si_pm4_delete_state(sctx, dsa, (struct si_state_dsa *)state);
   1078 }
   1079 
   1080 static void *si_create_db_flush_dsa(struct si_context *sctx)
   1081 {
   1082 	struct pipe_depth_stencil_alpha_state dsa = {};
   1083 
   1084 	return sctx->b.b.create_depth_stencil_alpha_state(&sctx->b.b, &dsa);
   1085 }
   1086 
   1087 /* DB RENDER STATE */
   1088 
   1089 static void si_set_active_query_state(struct pipe_context *ctx, boolean enable)
   1090 {
   1091 	struct si_context *sctx = (struct si_context*)ctx;
   1092 
   1093 	/* Pipeline stat & streamout queries. */
   1094 	if (enable) {
   1095 		sctx->b.flags &= ~R600_CONTEXT_STOP_PIPELINE_STATS;
   1096 		sctx->b.flags |= R600_CONTEXT_START_PIPELINE_STATS;
   1097 	} else {
   1098 		sctx->b.flags &= ~R600_CONTEXT_START_PIPELINE_STATS;
   1099 		sctx->b.flags |= R600_CONTEXT_STOP_PIPELINE_STATS;
   1100 	}
   1101 
   1102 	/* Occlusion queries. */
   1103 	if (sctx->occlusion_queries_disabled != !enable) {
   1104 		sctx->occlusion_queries_disabled = !enable;
   1105 		si_mark_atom_dirty(sctx, &sctx->db_render_state);
   1106 	}
   1107 }
   1108 
   1109 static void si_set_occlusion_query_state(struct pipe_context *ctx, bool enable)
   1110 {
   1111 	struct si_context *sctx = (struct si_context*)ctx;
   1112 
   1113 	si_mark_atom_dirty(sctx, &sctx->db_render_state);
   1114 }
   1115 
   1116 static void si_save_qbo_state(struct pipe_context *ctx, struct r600_qbo_state *st)
   1117 {
   1118 	struct si_context *sctx = (struct si_context*)ctx;
   1119 
   1120 	st->saved_compute = sctx->cs_shader_state.program;
   1121 
   1122 	si_get_pipe_constant_buffer(sctx, PIPE_SHADER_COMPUTE, 0, &st->saved_const0);
   1123 	si_get_shader_buffers(sctx, PIPE_SHADER_COMPUTE, 0, 3, st->saved_ssbo);
   1124 }
   1125 
   1126 static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *state)
   1127 {
   1128 	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
   1129 	struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
   1130 	unsigned db_shader_control;
   1131 
   1132 	radeon_set_context_reg_seq(cs, R_028000_DB_RENDER_CONTROL, 2);
   1133 
   1134 	/* DB_RENDER_CONTROL */
   1135 	if (sctx->dbcb_depth_copy_enabled ||
   1136 	    sctx->dbcb_stencil_copy_enabled) {
   1137 		radeon_emit(cs,
   1138 			    S_028000_DEPTH_COPY(sctx->dbcb_depth_copy_enabled) |
   1139 			    S_028000_STENCIL_COPY(sctx->dbcb_stencil_copy_enabled) |
   1140 			    S_028000_COPY_CENTROID(1) |
   1141 			    S_028000_COPY_SAMPLE(sctx->dbcb_copy_sample));
   1142 	} else if (sctx->db_flush_depth_inplace || sctx->db_flush_stencil_inplace) {
   1143 		radeon_emit(cs,
   1144 			    S_028000_DEPTH_COMPRESS_DISABLE(sctx->db_flush_depth_inplace) |
   1145 			    S_028000_STENCIL_COMPRESS_DISABLE(sctx->db_flush_stencil_inplace));
   1146 	} else {
   1147 		radeon_emit(cs,
   1148 			    S_028000_DEPTH_CLEAR_ENABLE(sctx->db_depth_clear) |
   1149 			    S_028000_STENCIL_CLEAR_ENABLE(sctx->db_stencil_clear));
   1150 	}
   1151 
   1152 	/* DB_COUNT_CONTROL (occlusion queries) */
   1153 	if (sctx->b.num_occlusion_queries > 0 &&
   1154 	    !sctx->occlusion_queries_disabled) {
   1155 		bool perfect = sctx->b.num_perfect_occlusion_queries > 0;
   1156 
   1157 		if (sctx->b.chip_class >= CIK) {
   1158 			radeon_emit(cs,
   1159 				    S_028004_PERFECT_ZPASS_COUNTS(perfect) |
   1160 				    S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples) |
   1161 				    S_028004_ZPASS_ENABLE(1) |
   1162 				    S_028004_SLICE_EVEN_ENABLE(1) |
   1163 				    S_028004_SLICE_ODD_ENABLE(1));
   1164 		} else {
   1165 			radeon_emit(cs,
   1166 				    S_028004_PERFECT_ZPASS_COUNTS(perfect) |
   1167 				    S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples));
   1168 		}
   1169 	} else {
   1170 		/* Disable occlusion queries. */
   1171 		if (sctx->b.chip_class >= CIK) {
   1172 			radeon_emit(cs, 0);
   1173 		} else {
   1174 			radeon_emit(cs, S_028004_ZPASS_INCREMENT_DISABLE(1));
   1175 		}
   1176 	}
   1177 
   1178 	/* DB_RENDER_OVERRIDE2 */
   1179 	radeon_set_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2,
   1180 		S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(sctx->db_depth_disable_expclear) |
   1181 		S_028010_DISABLE_SMEM_EXPCLEAR_OPTIMIZATION(sctx->db_stencil_disable_expclear) |
   1182 		S_028010_DECOMPRESS_Z_ON_FLUSH(sctx->framebuffer.nr_samples >= 4));
   1183 
   1184 	db_shader_control = sctx->ps_db_shader_control;
   1185 
   1186 	/* Bug workaround for smoothing (overrasterization) on SI. */
   1187 	if (sctx->b.chip_class == SI && sctx->smoothing_enabled) {
   1188 		db_shader_control &= C_02880C_Z_ORDER;
   1189 		db_shader_control |= S_02880C_Z_ORDER(V_02880C_LATE_Z);
   1190 	}
   1191 
   1192 	/* Disable the gl_SampleMask fragment shader output if MSAA is disabled. */
   1193 	if (!rs || !rs->multisample_enable)
   1194 		db_shader_control &= C_02880C_MASK_EXPORT_ENABLE;
   1195 
   1196 	if (sctx->b.family == CHIP_STONEY &&
   1197 	    sctx->screen->b.debug_flags & DBG_NO_RB_PLUS)
   1198 		db_shader_control |= S_02880C_DUAL_QUAD_DISABLE(1);
   1199 
   1200 	radeon_set_context_reg(cs, R_02880C_DB_SHADER_CONTROL,
   1201 			       db_shader_control);
   1202 }
   1203 
   1204 /*
   1205  * format translation
   1206  */
   1207 static uint32_t si_translate_colorformat(enum pipe_format format)
   1208 {
   1209 	const struct util_format_description *desc = util_format_description(format);
   1210 
   1211 #define HAS_SIZE(x,y,z,w) \
   1212 	(desc->channel[0].size == (x) && desc->channel[1].size == (y) && \
   1213          desc->channel[2].size == (z) && desc->channel[3].size == (w))
   1214 
   1215 	if (format == PIPE_FORMAT_R11G11B10_FLOAT) /* isn't plain */
   1216 		return V_028C70_COLOR_10_11_11;
   1217 
   1218 	if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN)
   1219 		return V_028C70_COLOR_INVALID;
   1220 
   1221 	/* hw cannot support mixed formats (except depth/stencil, since
   1222 	 * stencil is not written to). */
   1223 	if (desc->is_mixed && desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS)
   1224 		return V_028C70_COLOR_INVALID;
   1225 
   1226 	switch (desc->nr_channels) {
   1227 	case 1:
   1228 		switch (desc->channel[0].size) {
   1229 		case 8:
   1230 			return V_028C70_COLOR_8;
   1231 		case 16:
   1232 			return V_028C70_COLOR_16;
   1233 		case 32:
   1234 			return V_028C70_COLOR_32;
   1235 		}
   1236 		break;
   1237 	case 2:
   1238 		if (desc->channel[0].size == desc->channel[1].size) {
   1239 			switch (desc->channel[0].size) {
   1240 			case 8:
   1241 				return V_028C70_COLOR_8_8;
   1242 			case 16:
   1243 				return V_028C70_COLOR_16_16;
   1244 			case 32:
   1245 				return V_028C70_COLOR_32_32;
   1246 			}
   1247 		} else if (HAS_SIZE(8,24,0,0)) {
   1248 			return V_028C70_COLOR_24_8;
   1249 		} else if (HAS_SIZE(24,8,0,0)) {
   1250 			return V_028C70_COLOR_8_24;
   1251 		}
   1252 		break;
   1253 	case 3:
   1254 		if (HAS_SIZE(5,6,5,0)) {
   1255 			return V_028C70_COLOR_5_6_5;
   1256 		} else if (HAS_SIZE(32,8,24,0)) {
   1257 			return V_028C70_COLOR_X24_8_32_FLOAT;
   1258 		}
   1259 		break;
   1260 	case 4:
   1261 		if (desc->channel[0].size == desc->channel[1].size &&
   1262 		    desc->channel[0].size == desc->channel[2].size &&
   1263 		    desc->channel[0].size == desc->channel[3].size) {
   1264 			switch (desc->channel[0].size) {
   1265 			case 4:
   1266 				return V_028C70_COLOR_4_4_4_4;
   1267 			case 8:
   1268 				return V_028C70_COLOR_8_8_8_8;
   1269 			case 16:
   1270 				return V_028C70_COLOR_16_16_16_16;
   1271 			case 32:
   1272 				return V_028C70_COLOR_32_32_32_32;
   1273 			}
   1274 		} else if (HAS_SIZE(5,5,5,1)) {
   1275 			return V_028C70_COLOR_1_5_5_5;
   1276 		} else if (HAS_SIZE(10,10,10,2)) {
   1277 			return V_028C70_COLOR_2_10_10_10;
   1278 		}
   1279 		break;
   1280 	}
   1281 	return V_028C70_COLOR_INVALID;
   1282 }
   1283 
   1284 static uint32_t si_colorformat_endian_swap(uint32_t colorformat)
   1285 {
   1286 	if (SI_BIG_ENDIAN) {
   1287 		switch(colorformat) {
   1288 		/* 8-bit buffers. */
   1289 		case V_028C70_COLOR_8:
   1290 			return V_028C70_ENDIAN_NONE;
   1291 
   1292 		/* 16-bit buffers. */
   1293 		case V_028C70_COLOR_5_6_5:
   1294 		case V_028C70_COLOR_1_5_5_5:
   1295 		case V_028C70_COLOR_4_4_4_4:
   1296 		case V_028C70_COLOR_16:
   1297 		case V_028C70_COLOR_8_8:
   1298 			return V_028C70_ENDIAN_8IN16;
   1299 
   1300 		/* 32-bit buffers. */
   1301 		case V_028C70_COLOR_8_8_8_8:
   1302 		case V_028C70_COLOR_2_10_10_10:
   1303 		case V_028C70_COLOR_8_24:
   1304 		case V_028C70_COLOR_24_8:
   1305 		case V_028C70_COLOR_16_16:
   1306 			return V_028C70_ENDIAN_8IN32;
   1307 
   1308 		/* 64-bit buffers. */
   1309 		case V_028C70_COLOR_16_16_16_16:
   1310 			return V_028C70_ENDIAN_8IN16;
   1311 
   1312 		case V_028C70_COLOR_32_32:
   1313 			return V_028C70_ENDIAN_8IN32;
   1314 
   1315 		/* 128-bit buffers. */
   1316 		case V_028C70_COLOR_32_32_32_32:
   1317 			return V_028C70_ENDIAN_8IN32;
   1318 		default:
   1319 			return V_028C70_ENDIAN_NONE; /* Unsupported. */
   1320 		}
   1321 	} else {
   1322 		return V_028C70_ENDIAN_NONE;
   1323 	}
   1324 }
   1325 
   1326 static uint32_t si_translate_dbformat(enum pipe_format format)
   1327 {
   1328 	switch (format) {
   1329 	case PIPE_FORMAT_Z16_UNORM:
   1330 		return V_028040_Z_16;
   1331 	case PIPE_FORMAT_S8_UINT_Z24_UNORM:
   1332 	case PIPE_FORMAT_X8Z24_UNORM:
   1333 	case PIPE_FORMAT_Z24X8_UNORM:
   1334 	case PIPE_FORMAT_Z24_UNORM_S8_UINT:
   1335 		return V_028040_Z_24; /* deprecated on SI */
   1336 	case PIPE_FORMAT_Z32_FLOAT:
   1337 	case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
   1338 		return V_028040_Z_32_FLOAT;
   1339 	default:
   1340 		return V_028040_Z_INVALID;
   1341 	}
   1342 }
   1343 
   1344 /*
   1345  * Texture translation
   1346  */
   1347 
   1348 static uint32_t si_translate_texformat(struct pipe_screen *screen,
   1349 				       enum pipe_format format,
   1350 				       const struct util_format_description *desc,
   1351 				       int first_non_void)
   1352 {
   1353 	struct si_screen *sscreen = (struct si_screen*)screen;
   1354 	bool enable_compressed_formats = (sscreen->b.info.drm_major == 2 &&
   1355 					  sscreen->b.info.drm_minor >= 31) ||
   1356 					 sscreen->b.info.drm_major == 3;
   1357 	bool uniform = true;
   1358 	int i;
   1359 
   1360 	/* Colorspace (return non-RGB formats directly). */
   1361 	switch (desc->colorspace) {
   1362 	/* Depth stencil formats */
   1363 	case UTIL_FORMAT_COLORSPACE_ZS:
   1364 		switch (format) {
   1365 		case PIPE_FORMAT_Z16_UNORM:
   1366 			return V_008F14_IMG_DATA_FORMAT_16;
   1367 		case PIPE_FORMAT_X24S8_UINT:
   1368 		case PIPE_FORMAT_S8X24_UINT:
   1369 			/*
   1370 			 * Implemented as an 8_8_8_8 data format to fix texture
   1371 			 * gathers in stencil sampling. This affects at least
   1372 			 * GL45-CTS.texture_cube_map_array.sampling on VI.
   1373 			 */
   1374 			return V_008F14_IMG_DATA_FORMAT_8_8_8_8;
   1375 		case PIPE_FORMAT_Z24X8_UNORM:
   1376 		case PIPE_FORMAT_Z24_UNORM_S8_UINT:
   1377 			return V_008F14_IMG_DATA_FORMAT_8_24;
   1378 		case PIPE_FORMAT_X8Z24_UNORM:
   1379 		case PIPE_FORMAT_S8_UINT_Z24_UNORM:
   1380 			return V_008F14_IMG_DATA_FORMAT_24_8;
   1381 		case PIPE_FORMAT_S8_UINT:
   1382 			return V_008F14_IMG_DATA_FORMAT_8;
   1383 		case PIPE_FORMAT_Z32_FLOAT:
   1384 			return V_008F14_IMG_DATA_FORMAT_32;
   1385 		case PIPE_FORMAT_X32_S8X24_UINT:
   1386 		case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
   1387 			return V_008F14_IMG_DATA_FORMAT_X24_8_32;
   1388 		default:
   1389 			goto out_unknown;
   1390 		}
   1391 
   1392 	case UTIL_FORMAT_COLORSPACE_YUV:
   1393 		goto out_unknown; /* TODO */
   1394 
   1395 	case UTIL_FORMAT_COLORSPACE_SRGB:
   1396 		if (desc->nr_channels != 4 && desc->nr_channels != 1)
   1397 			goto out_unknown;
   1398 		break;
   1399 
   1400 	default:
   1401 		break;
   1402 	}
   1403 
   1404 	if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) {
   1405 		if (!enable_compressed_formats)
   1406 			goto out_unknown;
   1407 
   1408 		switch (format) {
   1409 		case PIPE_FORMAT_RGTC1_SNORM:
   1410 		case PIPE_FORMAT_LATC1_SNORM:
   1411 		case PIPE_FORMAT_RGTC1_UNORM:
   1412 		case PIPE_FORMAT_LATC1_UNORM:
   1413 			return V_008F14_IMG_DATA_FORMAT_BC4;
   1414 		case PIPE_FORMAT_RGTC2_SNORM:
   1415 		case PIPE_FORMAT_LATC2_SNORM:
   1416 		case PIPE_FORMAT_RGTC2_UNORM:
   1417 		case PIPE_FORMAT_LATC2_UNORM:
   1418 			return V_008F14_IMG_DATA_FORMAT_BC5;
   1419 		default:
   1420 			goto out_unknown;
   1421 		}
   1422 	}
   1423 
   1424 	if (desc->layout == UTIL_FORMAT_LAYOUT_ETC &&
   1425 	    sscreen->b.family == CHIP_STONEY) {
   1426 		switch (format) {
   1427 		case PIPE_FORMAT_ETC1_RGB8:
   1428 		case PIPE_FORMAT_ETC2_RGB8:
   1429 		case PIPE_FORMAT_ETC2_SRGB8:
   1430 			return V_008F14_IMG_DATA_FORMAT_ETC2_RGB;
   1431 		case PIPE_FORMAT_ETC2_RGB8A1:
   1432 		case PIPE_FORMAT_ETC2_SRGB8A1:
   1433 			return V_008F14_IMG_DATA_FORMAT_ETC2_RGBA1;
   1434 		case PIPE_FORMAT_ETC2_RGBA8:
   1435 		case PIPE_FORMAT_ETC2_SRGBA8:
   1436 			return V_008F14_IMG_DATA_FORMAT_ETC2_RGBA;
   1437 		case PIPE_FORMAT_ETC2_R11_UNORM:
   1438 		case PIPE_FORMAT_ETC2_R11_SNORM:
   1439 			return V_008F14_IMG_DATA_FORMAT_ETC2_R;
   1440 		case PIPE_FORMAT_ETC2_RG11_UNORM:
   1441 		case PIPE_FORMAT_ETC2_RG11_SNORM:
   1442 			return V_008F14_IMG_DATA_FORMAT_ETC2_RG;
   1443 		default:
   1444 			goto out_unknown;
   1445 		}
   1446 	}
   1447 
   1448 	if (desc->layout == UTIL_FORMAT_LAYOUT_BPTC) {
   1449 		if (!enable_compressed_formats)
   1450 			goto out_unknown;
   1451 
   1452 		switch (format) {
   1453 		case PIPE_FORMAT_BPTC_RGBA_UNORM:
   1454 		case PIPE_FORMAT_BPTC_SRGBA:
   1455 			return V_008F14_IMG_DATA_FORMAT_BC7;
   1456 		case PIPE_FORMAT_BPTC_RGB_FLOAT:
   1457 		case PIPE_FORMAT_BPTC_RGB_UFLOAT:
   1458 			return V_008F14_IMG_DATA_FORMAT_BC6;
   1459 		default:
   1460 			goto out_unknown;
   1461 		}
   1462 	}
   1463 
   1464 	if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
   1465 		switch (format) {
   1466 		case PIPE_FORMAT_R8G8_B8G8_UNORM:
   1467 		case PIPE_FORMAT_G8R8_B8R8_UNORM:
   1468 			return V_008F14_IMG_DATA_FORMAT_GB_GR;
   1469 		case PIPE_FORMAT_G8R8_G8B8_UNORM:
   1470 		case PIPE_FORMAT_R8G8_R8B8_UNORM:
   1471 			return V_008F14_IMG_DATA_FORMAT_BG_RG;
   1472 		default:
   1473 			goto out_unknown;
   1474 		}
   1475 	}
   1476 
   1477 	if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
   1478 		if (!enable_compressed_formats)
   1479 			goto out_unknown;
   1480 
   1481 		if (!util_format_s3tc_enabled) {
   1482 			goto out_unknown;
   1483 		}
   1484 
   1485 		switch (format) {
   1486 		case PIPE_FORMAT_DXT1_RGB:
   1487 		case PIPE_FORMAT_DXT1_RGBA:
   1488 		case PIPE_FORMAT_DXT1_SRGB:
   1489 		case PIPE_FORMAT_DXT1_SRGBA:
   1490 			return V_008F14_IMG_DATA_FORMAT_BC1;
   1491 		case PIPE_FORMAT_DXT3_RGBA:
   1492 		case PIPE_FORMAT_DXT3_SRGBA:
   1493 			return V_008F14_IMG_DATA_FORMAT_BC2;
   1494 		case PIPE_FORMAT_DXT5_RGBA:
   1495 		case PIPE_FORMAT_DXT5_SRGBA:
   1496 			return V_008F14_IMG_DATA_FORMAT_BC3;
   1497 		default:
   1498 			goto out_unknown;
   1499 		}
   1500 	}
   1501 
   1502 	if (format == PIPE_FORMAT_R9G9B9E5_FLOAT) {
   1503 		return V_008F14_IMG_DATA_FORMAT_5_9_9_9;
   1504 	} else if (format == PIPE_FORMAT_R11G11B10_FLOAT) {
   1505 		return V_008F14_IMG_DATA_FORMAT_10_11_11;
   1506 	}
   1507 
   1508 	/* R8G8Bx_SNORM - TODO CxV8U8 */
   1509 
   1510 	/* hw cannot support mixed formats (except depth/stencil, since only
   1511 	 * depth is read).*/
   1512 	if (desc->is_mixed && desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS)
   1513 		goto out_unknown;
   1514 
   1515 	/* See whether the components are of the same size. */
   1516 	for (i = 1; i < desc->nr_channels; i++) {
   1517 		uniform = uniform && desc->channel[0].size == desc->channel[i].size;
   1518 	}
   1519 
   1520 	/* Non-uniform formats. */
   1521 	if (!uniform) {
   1522 		switch(desc->nr_channels) {
   1523 		case 3:
   1524 			if (desc->channel[0].size == 5 &&
   1525 			    desc->channel[1].size == 6 &&
   1526 			    desc->channel[2].size == 5) {
   1527 				return V_008F14_IMG_DATA_FORMAT_5_6_5;
   1528 			}
   1529 			goto out_unknown;
   1530 		case 4:
   1531 			if (desc->channel[0].size == 5 &&
   1532 			    desc->channel[1].size == 5 &&
   1533 			    desc->channel[2].size == 5 &&
   1534 			    desc->channel[3].size == 1) {
   1535 				return V_008F14_IMG_DATA_FORMAT_1_5_5_5;
   1536 			}
   1537 			if (desc->channel[0].size == 10 &&
   1538 			    desc->channel[1].size == 10 &&
   1539 			    desc->channel[2].size == 10 &&
   1540 			    desc->channel[3].size == 2) {
   1541 				return V_008F14_IMG_DATA_FORMAT_2_10_10_10;
   1542 			}
   1543 			goto out_unknown;
   1544 		}
   1545 		goto out_unknown;
   1546 	}
   1547 
   1548 	if (first_non_void < 0 || first_non_void > 3)
   1549 		goto out_unknown;
   1550 
   1551 	/* uniform formats */
   1552 	switch (desc->channel[first_non_void].size) {
   1553 	case 4:
   1554 		switch (desc->nr_channels) {
   1555 #if 0 /* Not supported for render targets */
   1556 		case 2:
   1557 			return V_008F14_IMG_DATA_FORMAT_4_4;
   1558 #endif
   1559 		case 4:
   1560 			return V_008F14_IMG_DATA_FORMAT_4_4_4_4;
   1561 		}
   1562 		break;
   1563 	case 8:
   1564 		switch (desc->nr_channels) {
   1565 		case 1:
   1566 			return V_008F14_IMG_DATA_FORMAT_8;
   1567 		case 2:
   1568 			return V_008F14_IMG_DATA_FORMAT_8_8;
   1569 		case 4:
   1570 			return V_008F14_IMG_DATA_FORMAT_8_8_8_8;
   1571 		}
   1572 		break;
   1573 	case 16:
   1574 		switch (desc->nr_channels) {
   1575 		case 1:
   1576 			return V_008F14_IMG_DATA_FORMAT_16;
   1577 		case 2:
   1578 			return V_008F14_IMG_DATA_FORMAT_16_16;
   1579 		case 4:
   1580 			return V_008F14_IMG_DATA_FORMAT_16_16_16_16;
   1581 		}
   1582 		break;
   1583 	case 32:
   1584 		switch (desc->nr_channels) {
   1585 		case 1:
   1586 			return V_008F14_IMG_DATA_FORMAT_32;
   1587 		case 2:
   1588 			return V_008F14_IMG_DATA_FORMAT_32_32;
   1589 #if 0 /* Not supported for render targets */
   1590 		case 3:
   1591 			return V_008F14_IMG_DATA_FORMAT_32_32_32;
   1592 #endif
   1593 		case 4:
   1594 			return V_008F14_IMG_DATA_FORMAT_32_32_32_32;
   1595 		}
   1596 	}
   1597 
   1598 out_unknown:
   1599 	/* R600_ERR("Unable to handle texformat %d %s\n", format, util_format_name(format)); */
   1600 	return ~0;
   1601 }
   1602 
   1603 static unsigned si_tex_wrap(unsigned wrap)
   1604 {
   1605 	switch (wrap) {
   1606 	default:
   1607 	case PIPE_TEX_WRAP_REPEAT:
   1608 		return V_008F30_SQ_TEX_WRAP;
   1609 	case PIPE_TEX_WRAP_CLAMP:
   1610 		return V_008F30_SQ_TEX_CLAMP_HALF_BORDER;
   1611 	case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
   1612 		return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
   1613 	case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
   1614 		return V_008F30_SQ_TEX_CLAMP_BORDER;
   1615 	case PIPE_TEX_WRAP_MIRROR_REPEAT:
   1616 		return V_008F30_SQ_TEX_MIRROR;
   1617 	case PIPE_TEX_WRAP_MIRROR_CLAMP:
   1618 		return V_008F30_SQ_TEX_MIRROR_ONCE_HALF_BORDER;
   1619 	case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
   1620 		return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
   1621 	case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
   1622 		return V_008F30_SQ_TEX_MIRROR_ONCE_BORDER;
   1623 	}
   1624 }
   1625 
   1626 static unsigned si_tex_mipfilter(unsigned filter)
   1627 {
   1628 	switch (filter) {
   1629 	case PIPE_TEX_MIPFILTER_NEAREST:
   1630 		return V_008F38_SQ_TEX_Z_FILTER_POINT;
   1631 	case PIPE_TEX_MIPFILTER_LINEAR:
   1632 		return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
   1633 	default:
   1634 	case PIPE_TEX_MIPFILTER_NONE:
   1635 		return V_008F38_SQ_TEX_Z_FILTER_NONE;
   1636 	}
   1637 }
   1638 
   1639 static unsigned si_tex_compare(unsigned compare)
   1640 {
   1641 	switch (compare) {
   1642 	default:
   1643 	case PIPE_FUNC_NEVER:
   1644 		return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
   1645 	case PIPE_FUNC_LESS:
   1646 		return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
   1647 	case PIPE_FUNC_EQUAL:
   1648 		return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
   1649 	case PIPE_FUNC_LEQUAL:
   1650 		return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
   1651 	case PIPE_FUNC_GREATER:
   1652 		return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
   1653 	case PIPE_FUNC_NOTEQUAL:
   1654 		return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
   1655 	case PIPE_FUNC_GEQUAL:
   1656 		return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
   1657 	case PIPE_FUNC_ALWAYS:
   1658 		return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
   1659 	}
   1660 }
   1661 
   1662 static unsigned si_tex_dim(unsigned res_target, unsigned view_target,
   1663 			   unsigned nr_samples)
   1664 {
   1665 	if (view_target == PIPE_TEXTURE_CUBE ||
   1666 	    view_target == PIPE_TEXTURE_CUBE_ARRAY)
   1667 		res_target = view_target;
   1668 	/* If interpreting cubemaps as something else, set 2D_ARRAY. */
   1669 	else if (res_target == PIPE_TEXTURE_CUBE ||
   1670 		 res_target == PIPE_TEXTURE_CUBE_ARRAY)
   1671 		res_target = PIPE_TEXTURE_2D_ARRAY;
   1672 
   1673 	switch (res_target) {
   1674 	default:
   1675 	case PIPE_TEXTURE_1D:
   1676 		return V_008F1C_SQ_RSRC_IMG_1D;
   1677 	case PIPE_TEXTURE_1D_ARRAY:
   1678 		return V_008F1C_SQ_RSRC_IMG_1D_ARRAY;
   1679 	case PIPE_TEXTURE_2D:
   1680 	case PIPE_TEXTURE_RECT:
   1681 		return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA :
   1682 					V_008F1C_SQ_RSRC_IMG_2D;
   1683 	case PIPE_TEXTURE_2D_ARRAY:
   1684 		return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY :
   1685 					V_008F1C_SQ_RSRC_IMG_2D_ARRAY;
   1686 	case PIPE_TEXTURE_3D:
   1687 		return V_008F1C_SQ_RSRC_IMG_3D;
   1688 	case PIPE_TEXTURE_CUBE:
   1689 	case PIPE_TEXTURE_CUBE_ARRAY:
   1690 		return V_008F1C_SQ_RSRC_IMG_CUBE;
   1691 	}
   1692 }
   1693 
   1694 /*
   1695  * Format support testing
   1696  */
   1697 
   1698 static bool si_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format)
   1699 {
   1700 	return si_translate_texformat(screen, format, util_format_description(format),
   1701 				      util_format_get_first_non_void_channel(format)) != ~0U;
   1702 }
   1703 
   1704 static uint32_t si_translate_buffer_dataformat(struct pipe_screen *screen,
   1705 					       const struct util_format_description *desc,
   1706 					       int first_non_void)
   1707 {
   1708 	int i;
   1709 
   1710 	if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT)
   1711 		return V_008F0C_BUF_DATA_FORMAT_10_11_11;
   1712 
   1713 	assert(first_non_void >= 0);
   1714 
   1715 	if (desc->nr_channels == 4 &&
   1716 	    desc->channel[0].size == 10 &&
   1717 	    desc->channel[1].size == 10 &&
   1718 	    desc->channel[2].size == 10 &&
   1719 	    desc->channel[3].size == 2)
   1720 		return V_008F0C_BUF_DATA_FORMAT_2_10_10_10;
   1721 
   1722 	/* See whether the components are of the same size. */
   1723 	for (i = 0; i < desc->nr_channels; i++) {
   1724 		if (desc->channel[first_non_void].size != desc->channel[i].size)
   1725 			return V_008F0C_BUF_DATA_FORMAT_INVALID;
   1726 	}
   1727 
   1728 	switch (desc->channel[first_non_void].size) {
   1729 	case 8:
   1730 		switch (desc->nr_channels) {
   1731 		case 1:
   1732 			return V_008F0C_BUF_DATA_FORMAT_8;
   1733 		case 2:
   1734 			return V_008F0C_BUF_DATA_FORMAT_8_8;
   1735 		case 3:
   1736 		case 4:
   1737 			return V_008F0C_BUF_DATA_FORMAT_8_8_8_8;
   1738 		}
   1739 		break;
   1740 	case 16:
   1741 		switch (desc->nr_channels) {
   1742 		case 1:
   1743 			return V_008F0C_BUF_DATA_FORMAT_16;
   1744 		case 2:
   1745 			return V_008F0C_BUF_DATA_FORMAT_16_16;
   1746 		case 3:
   1747 		case 4:
   1748 			return V_008F0C_BUF_DATA_FORMAT_16_16_16_16;
   1749 		}
   1750 		break;
   1751 	case 32:
   1752 		switch (desc->nr_channels) {
   1753 		case 1:
   1754 			return V_008F0C_BUF_DATA_FORMAT_32;
   1755 		case 2:
   1756 			return V_008F0C_BUF_DATA_FORMAT_32_32;
   1757 		case 3:
   1758 			return V_008F0C_BUF_DATA_FORMAT_32_32_32;
   1759 		case 4:
   1760 			return V_008F0C_BUF_DATA_FORMAT_32_32_32_32;
   1761 		}
   1762 		break;
   1763 	}
   1764 
   1765 	return V_008F0C_BUF_DATA_FORMAT_INVALID;
   1766 }
   1767 
   1768 static uint32_t si_translate_buffer_numformat(struct pipe_screen *screen,
   1769 					      const struct util_format_description *desc,
   1770 					      int first_non_void)
   1771 {
   1772 	if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT)
   1773 		return V_008F0C_BUF_NUM_FORMAT_FLOAT;
   1774 
   1775 	assert(first_non_void >= 0);
   1776 
   1777 	switch (desc->channel[first_non_void].type) {
   1778 	case UTIL_FORMAT_TYPE_SIGNED:
   1779 	case UTIL_FORMAT_TYPE_FIXED:
   1780 		if (desc->channel[first_non_void].size >= 32 ||
   1781 		    desc->channel[first_non_void].pure_integer)
   1782 			return V_008F0C_BUF_NUM_FORMAT_SINT;
   1783 		else if (desc->channel[first_non_void].normalized)
   1784 			return V_008F0C_BUF_NUM_FORMAT_SNORM;
   1785 		else
   1786 			return V_008F0C_BUF_NUM_FORMAT_SSCALED;
   1787 		break;
   1788 	case UTIL_FORMAT_TYPE_UNSIGNED:
   1789 		if (desc->channel[first_non_void].size >= 32 ||
   1790 		    desc->channel[first_non_void].pure_integer)
   1791 			return V_008F0C_BUF_NUM_FORMAT_UINT;
   1792 		else if (desc->channel[first_non_void].normalized)
   1793 			return V_008F0C_BUF_NUM_FORMAT_UNORM;
   1794 		else
   1795 			return V_008F0C_BUF_NUM_FORMAT_USCALED;
   1796 		break;
   1797 	case UTIL_FORMAT_TYPE_FLOAT:
   1798 	default:
   1799 		return V_008F0C_BUF_NUM_FORMAT_FLOAT;
   1800 	}
   1801 }
   1802 
   1803 static unsigned si_is_vertex_format_supported(struct pipe_screen *screen,
   1804 					      enum pipe_format format,
   1805 					      unsigned usage)
   1806 {
   1807 	const struct util_format_description *desc;
   1808 	int first_non_void;
   1809 	unsigned data_format;
   1810 
   1811 	assert((usage & ~(PIPE_BIND_SHADER_IMAGE |
   1812 			  PIPE_BIND_SAMPLER_VIEW |
   1813 			  PIPE_BIND_VERTEX_BUFFER)) == 0);
   1814 
   1815 	desc = util_format_description(format);
   1816 
   1817 	/* There are no native 8_8_8 or 16_16_16 data formats, and we currently
   1818 	 * select 8_8_8_8 and 16_16_16_16 instead. This works reasonably well
   1819 	 * for read-only access (with caveats surrounding bounds checks), but
   1820 	 * obviously fails for write access which we have to implement for
   1821 	 * shader images. Luckily, OpenGL doesn't expect this to be supported
   1822 	 * anyway, and so the only impact is on PBO uploads / downloads, which
   1823 	 * shouldn't be expected to be fast for GL_RGB anyway.
   1824 	 */
   1825 	if (desc->block.bits == 3 * 8 ||
   1826 	    desc->block.bits == 3 * 16) {
   1827 		if (usage & (PIPE_BIND_SHADER_IMAGE | PIPE_BIND_SAMPLER_VIEW)) {
   1828 		    usage &= ~(PIPE_BIND_SHADER_IMAGE | PIPE_BIND_SAMPLER_VIEW);
   1829 			if (!usage)
   1830 				return 0;
   1831 		}
   1832 	}
   1833 
   1834 	first_non_void = util_format_get_first_non_void_channel(format);
   1835 	data_format = si_translate_buffer_dataformat(screen, desc, first_non_void);
   1836 	if (data_format == V_008F0C_BUF_DATA_FORMAT_INVALID)
   1837 		return 0;
   1838 
   1839 	return usage;
   1840 }
   1841 
   1842 static bool si_is_colorbuffer_format_supported(enum pipe_format format)
   1843 {
   1844 	return si_translate_colorformat(format) != V_028C70_COLOR_INVALID &&
   1845 		r600_translate_colorswap(format, false) != ~0U;
   1846 }
   1847 
   1848 static bool si_is_zs_format_supported(enum pipe_format format)
   1849 {
   1850 	return si_translate_dbformat(format) != V_028040_Z_INVALID;
   1851 }
   1852 
   1853 static boolean si_is_format_supported(struct pipe_screen *screen,
   1854 				      enum pipe_format format,
   1855 				      enum pipe_texture_target target,
   1856 				      unsigned sample_count,
   1857 				      unsigned usage)
   1858 {
   1859 	unsigned retval = 0;
   1860 
   1861 	if (target >= PIPE_MAX_TEXTURE_TYPES) {
   1862 		R600_ERR("r600: unsupported texture type %d\n", target);
   1863 		return false;
   1864 	}
   1865 
   1866 	if (!util_format_is_supported(format, usage))
   1867 		return false;
   1868 
   1869 	if (sample_count > 1) {
   1870 		if (!screen->get_param(screen, PIPE_CAP_TEXTURE_MULTISAMPLE))
   1871 			return false;
   1872 
   1873 		if (usage & PIPE_BIND_SHADER_IMAGE)
   1874 			return false;
   1875 
   1876 		switch (sample_count) {
   1877 		case 2:
   1878 		case 4:
   1879 		case 8:
   1880 			break;
   1881 		case 16:
   1882 			if (format == PIPE_FORMAT_NONE)
   1883 				return true;
   1884 			else
   1885 				return false;
   1886 		default:
   1887 			return false;
   1888 		}
   1889 	}
   1890 
   1891 	if (usage & (PIPE_BIND_SAMPLER_VIEW |
   1892 		     PIPE_BIND_SHADER_IMAGE)) {
   1893 		if (target == PIPE_BUFFER) {
   1894 			retval |= si_is_vertex_format_supported(
   1895 				screen, format, usage & (PIPE_BIND_SAMPLER_VIEW |
   1896 						         PIPE_BIND_SHADER_IMAGE));
   1897 		} else {
   1898 			if (si_is_sampler_format_supported(screen, format))
   1899 				retval |= usage & (PIPE_BIND_SAMPLER_VIEW |
   1900 						   PIPE_BIND_SHADER_IMAGE);
   1901 		}
   1902 	}
   1903 
   1904 	if ((usage & (PIPE_BIND_RENDER_TARGET |
   1905 		      PIPE_BIND_DISPLAY_TARGET |
   1906 		      PIPE_BIND_SCANOUT |
   1907 		      PIPE_BIND_SHARED |
   1908 		      PIPE_BIND_BLENDABLE)) &&
   1909 	    si_is_colorbuffer_format_supported(format)) {
   1910 		retval |= usage &
   1911 			  (PIPE_BIND_RENDER_TARGET |
   1912 			   PIPE_BIND_DISPLAY_TARGET |
   1913 			   PIPE_BIND_SCANOUT |
   1914 			   PIPE_BIND_SHARED);
   1915 		if (!util_format_is_pure_integer(format) &&
   1916 		    !util_format_is_depth_or_stencil(format))
   1917 			retval |= usage & PIPE_BIND_BLENDABLE;
   1918 	}
   1919 
   1920 	if ((usage & PIPE_BIND_DEPTH_STENCIL) &&
   1921 	    si_is_zs_format_supported(format)) {
   1922 		retval |= PIPE_BIND_DEPTH_STENCIL;
   1923 	}
   1924 
   1925 	if (usage & PIPE_BIND_VERTEX_BUFFER) {
   1926 		retval |= si_is_vertex_format_supported(screen, format,
   1927 							PIPE_BIND_VERTEX_BUFFER);
   1928 	}
   1929 
   1930 	if ((usage & PIPE_BIND_LINEAR) &&
   1931 	    !util_format_is_compressed(format) &&
   1932 	    !(usage & PIPE_BIND_DEPTH_STENCIL))
   1933 		retval |= PIPE_BIND_LINEAR;
   1934 
   1935 	return retval == usage;
   1936 }
   1937 
   1938 /*
   1939  * framebuffer handling
   1940  */
   1941 
   1942 static void si_choose_spi_color_formats(struct r600_surface *surf,
   1943 					unsigned format, unsigned swap,
   1944 					unsigned ntype, bool is_depth)
   1945 {
   1946 	/* Alpha is needed for alpha-to-coverage.
   1947 	 * Blending may be with or without alpha.
   1948 	 */
   1949 	unsigned normal = 0; /* most optimal, may not support blending or export alpha */
   1950 	unsigned alpha = 0; /* exports alpha, but may not support blending */
   1951 	unsigned blend = 0; /* supports blending, but may not export alpha */
   1952 	unsigned blend_alpha = 0; /* least optimal, supports blending and exports alpha */
   1953 
   1954 	/* Choose the SPI color formats. These are required values for Stoney/RB+.
   1955 	 * Other chips have multiple choices, though they are not necessarily better.
   1956 	 */
   1957 	switch (format) {
   1958 	case V_028C70_COLOR_5_6_5:
   1959 	case V_028C70_COLOR_1_5_5_5:
   1960 	case V_028C70_COLOR_5_5_5_1:
   1961 	case V_028C70_COLOR_4_4_4_4:
   1962 	case V_028C70_COLOR_10_11_11:
   1963 	case V_028C70_COLOR_11_11_10:
   1964 	case V_028C70_COLOR_8:
   1965 	case V_028C70_COLOR_8_8:
   1966 	case V_028C70_COLOR_8_8_8_8:
   1967 	case V_028C70_COLOR_10_10_10_2:
   1968 	case V_028C70_COLOR_2_10_10_10:
   1969 		if (ntype == V_028C70_NUMBER_UINT)
   1970 			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_UINT16_ABGR;
   1971 		else if (ntype == V_028C70_NUMBER_SINT)
   1972 			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_SINT16_ABGR;
   1973 		else
   1974 			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_FP16_ABGR;
   1975 		break;
   1976 
   1977 	case V_028C70_COLOR_16:
   1978 	case V_028C70_COLOR_16_16:
   1979 	case V_028C70_COLOR_16_16_16_16:
   1980 		if (ntype == V_028C70_NUMBER_UNORM ||
   1981 		    ntype == V_028C70_NUMBER_SNORM) {
   1982 			/* UNORM16 and SNORM16 don't support blending */
   1983 			if (ntype == V_028C70_NUMBER_UNORM)
   1984 				normal = alpha = V_028714_SPI_SHADER_UNORM16_ABGR;
   1985 			else
   1986 				normal = alpha = V_028714_SPI_SHADER_SNORM16_ABGR;
   1987 
   1988 			/* Use 32 bits per channel for blending. */
   1989 			if (format == V_028C70_COLOR_16) {
   1990 				if (swap == V_028C70_SWAP_STD) { /* R */
   1991 					blend = V_028714_SPI_SHADER_32_R;
   1992 					blend_alpha = V_028714_SPI_SHADER_32_AR;
   1993 				} else if (swap == V_028C70_SWAP_ALT_REV) /* A */
   1994 					blend = blend_alpha = V_028714_SPI_SHADER_32_AR;
   1995 				else
   1996 					assert(0);
   1997 			} else if (format == V_028C70_COLOR_16_16) {
   1998 				if (swap == V_028C70_SWAP_STD) { /* RG */
   1999 					blend = V_028714_SPI_SHADER_32_GR;
   2000 					blend_alpha = V_028714_SPI_SHADER_32_ABGR;
   2001 				} else if (swap == V_028C70_SWAP_ALT) /* RA */
   2002 					blend = blend_alpha = V_028714_SPI_SHADER_32_AR;
   2003 				else
   2004 					assert(0);
   2005 			} else /* 16_16_16_16 */
   2006 				blend = blend_alpha = V_028714_SPI_SHADER_32_ABGR;
   2007 		} else if (ntype == V_028C70_NUMBER_UINT)
   2008 			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_UINT16_ABGR;
   2009 		else if (ntype == V_028C70_NUMBER_SINT)
   2010 			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_SINT16_ABGR;
   2011 		else if (ntype == V_028C70_NUMBER_FLOAT)
   2012 			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_FP16_ABGR;
   2013 		else
   2014 			assert(0);
   2015 		break;
   2016 
   2017 	case V_028C70_COLOR_32:
   2018 		if (swap == V_028C70_SWAP_STD) { /* R */
   2019 			blend = normal = V_028714_SPI_SHADER_32_R;
   2020 			alpha = blend_alpha = V_028714_SPI_SHADER_32_AR;
   2021 		} else if (swap == V_028C70_SWAP_ALT_REV) /* A */
   2022 			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_AR;
   2023 		else
   2024 			assert(0);
   2025 		break;
   2026 
   2027 	case V_028C70_COLOR_32_32:
   2028 		if (swap == V_028C70_SWAP_STD) { /* RG */
   2029 			blend = normal = V_028714_SPI_SHADER_32_GR;
   2030 			alpha = blend_alpha = V_028714_SPI_SHADER_32_ABGR;
   2031 		} else if (swap == V_028C70_SWAP_ALT) /* RA */
   2032 			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_AR;
   2033 		else
   2034 			assert(0);
   2035 		break;
   2036 
   2037 	case V_028C70_COLOR_32_32_32_32:
   2038 	case V_028C70_COLOR_8_24:
   2039 	case V_028C70_COLOR_24_8:
   2040 	case V_028C70_COLOR_X24_8_32_FLOAT:
   2041 		alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_ABGR;
   2042 		break;
   2043 
   2044 	default:
   2045 		assert(0);
   2046 		return;
   2047 	}
   2048 
   2049 	/* The DB->CB copy needs 32_ABGR. */
   2050 	if (is_depth)
   2051 		alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_ABGR;
   2052 
   2053 	surf->spi_shader_col_format = normal;
   2054 	surf->spi_shader_col_format_alpha = alpha;
   2055 	surf->spi_shader_col_format_blend = blend;
   2056 	surf->spi_shader_col_format_blend_alpha = blend_alpha;
   2057 }
   2058 
   2059 static void si_initialize_color_surface(struct si_context *sctx,
   2060 					struct r600_surface *surf)
   2061 {
   2062 	struct r600_texture *rtex = (struct r600_texture*)surf->base.texture;
   2063 	unsigned color_info, color_attrib, color_view;
   2064 	unsigned format, swap, ntype, endian;
   2065 	const struct util_format_description *desc;
   2066 	int i;
   2067 	unsigned blend_clamp = 0, blend_bypass = 0;
   2068 
   2069 	color_view = S_028C6C_SLICE_START(surf->base.u.tex.first_layer) |
   2070 		     S_028C6C_SLICE_MAX(surf->base.u.tex.last_layer);
   2071 
   2072 	desc = util_format_description(surf->base.format);
   2073 	for (i = 0; i < 4; i++) {
   2074 		if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) {
   2075 			break;
   2076 		}
   2077 	}
   2078 	if (i == 4 || desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT) {
   2079 		ntype = V_028C70_NUMBER_FLOAT;
   2080 	} else {
   2081 		ntype = V_028C70_NUMBER_UNORM;
   2082 		if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
   2083 			ntype = V_028C70_NUMBER_SRGB;
   2084 		else if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
   2085 			if (desc->channel[i].pure_integer) {
   2086 				ntype = V_028C70_NUMBER_SINT;
   2087 			} else {
   2088 				assert(desc->channel[i].normalized);
   2089 				ntype = V_028C70_NUMBER_SNORM;
   2090 			}
   2091 		} else if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) {
   2092 			if (desc->channel[i].pure_integer) {
   2093 				ntype = V_028C70_NUMBER_UINT;
   2094 			} else {
   2095 				assert(desc->channel[i].normalized);
   2096 				ntype = V_028C70_NUMBER_UNORM;
   2097 			}
   2098 		}
   2099 	}
   2100 
   2101 	format = si_translate_colorformat(surf->base.format);
   2102 	if (format == V_028C70_COLOR_INVALID) {
   2103 		R600_ERR("Invalid CB format: %d, disabling CB.\n", surf->base.format);
   2104 	}
   2105 	assert(format != V_028C70_COLOR_INVALID);
   2106 	swap = r600_translate_colorswap(surf->base.format, false);
   2107 	endian = si_colorformat_endian_swap(format);
   2108 
   2109 	/* blend clamp should be set for all NORM/SRGB types */
   2110 	if (ntype == V_028C70_NUMBER_UNORM ||
   2111 	    ntype == V_028C70_NUMBER_SNORM ||
   2112 	    ntype == V_028C70_NUMBER_SRGB)
   2113 		blend_clamp = 1;
   2114 
   2115 	/* set blend bypass according to docs if SINT/UINT or
   2116 	   8/24 COLOR variants */
   2117 	if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
   2118 	    format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
   2119 	    format == V_028C70_COLOR_X24_8_32_FLOAT) {
   2120 		blend_clamp = 0;
   2121 		blend_bypass = 1;
   2122 	}
   2123 
   2124 	if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) {
   2125 		if (format == V_028C70_COLOR_8 ||
   2126 		    format == V_028C70_COLOR_8_8 ||
   2127 		    format == V_028C70_COLOR_8_8_8_8)
   2128 			surf->color_is_int8 = true;
   2129 		else if (format == V_028C70_COLOR_10_10_10_2 ||
   2130 			 format == V_028C70_COLOR_2_10_10_10)
   2131 			surf->color_is_int10 = true;
   2132 	}
   2133 
   2134 	color_info = S_028C70_FORMAT(format) |
   2135 		S_028C70_COMP_SWAP(swap) |
   2136 		S_028C70_BLEND_CLAMP(blend_clamp) |
   2137 		S_028C70_BLEND_BYPASS(blend_bypass) |
   2138 		S_028C70_SIMPLE_FLOAT(1) |
   2139 		S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM &&
   2140 				    ntype != V_028C70_NUMBER_SNORM &&
   2141 				    ntype != V_028C70_NUMBER_SRGB &&
   2142 				    format != V_028C70_COLOR_8_24 &&
   2143 				    format != V_028C70_COLOR_24_8) |
   2144 		S_028C70_NUMBER_TYPE(ntype) |
   2145 		S_028C70_ENDIAN(endian);
   2146 
   2147 	/* Intensity is implemented as Red, so treat it that way. */
   2148 	color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == PIPE_SWIZZLE_1 ||
   2149 						  util_format_is_intensity(surf->base.format));
   2150 
   2151 	if (rtex->resource.b.b.nr_samples > 1) {
   2152 		unsigned log_samples = util_logbase2(rtex->resource.b.b.nr_samples);
   2153 
   2154 		color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
   2155 				S_028C74_NUM_FRAGMENTS(log_samples);
   2156 
   2157 		if (rtex->fmask.size) {
   2158 			color_info |= S_028C70_COMPRESSION(1);
   2159 			unsigned fmask_bankh = util_logbase2(rtex->fmask.bank_height);
   2160 
   2161 			if (sctx->b.chip_class == SI) {
   2162 				/* due to a hw bug, FMASK_BANK_HEIGHT must be set on SI too */
   2163 				color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
   2164 			}
   2165 		}
   2166 	}
   2167 
   2168 	surf->cb_color_view = color_view;
   2169 	surf->cb_color_info = color_info;
   2170 	surf->cb_color_attrib = color_attrib;
   2171 
   2172 	if (sctx->b.chip_class >= VI) {
   2173 		unsigned max_uncompressed_block_size = 2;
   2174 
   2175 		if (rtex->resource.b.b.nr_samples > 1) {
   2176 			if (rtex->surface.bpe == 1)
   2177 				max_uncompressed_block_size = 0;
   2178 			else if (rtex->surface.bpe == 2)
   2179 				max_uncompressed_block_size = 1;
   2180 		}
   2181 
   2182 		surf->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
   2183 		                       S_028C78_INDEPENDENT_64B_BLOCKS(1);
   2184 	}
   2185 
   2186 	/* This must be set for fast clear to work without FMASK. */
   2187 	if (!rtex->fmask.size && sctx->b.chip_class == SI) {
   2188 		unsigned bankh = util_logbase2(rtex->surface.bankh);
   2189 		surf->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
   2190 	}
   2191 
   2192 	/* Determine pixel shader export format */
   2193 	si_choose_spi_color_formats(surf, format, swap, ntype, rtex->is_depth);
   2194 
   2195 	surf->color_initialized = true;
   2196 }
   2197 
   2198 static void si_init_depth_surface(struct si_context *sctx,
   2199 				  struct r600_surface *surf)
   2200 {
   2201 	struct r600_texture *rtex = (struct r600_texture*)surf->base.texture;
   2202 	unsigned level = surf->base.u.tex.level;
   2203 	struct radeon_surf_level *levelinfo = &rtex->surface.level[level];
   2204 	unsigned format;
   2205 	uint32_t z_info, s_info, db_depth_info;
   2206 	uint64_t z_offs, s_offs;
   2207 	uint32_t db_htile_data_base, db_htile_surface;
   2208 
   2209 	format = si_translate_dbformat(rtex->db_render_format);
   2210 
   2211 	if (format == V_028040_Z_INVALID) {
   2212 		R600_ERR("Invalid DB format: %d, disabling DB.\n", rtex->resource.b.b.format);
   2213 	}
   2214 	assert(format != V_028040_Z_INVALID);
   2215 
   2216 	s_offs = z_offs = rtex->resource.gpu_address;
   2217 	z_offs += rtex->surface.level[level].offset;
   2218 	s_offs += rtex->surface.stencil_level[level].offset;
   2219 
   2220 	db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!rtex->tc_compatible_htile);
   2221 
   2222 	z_info = S_028040_FORMAT(format);
   2223 	if (rtex->resource.b.b.nr_samples > 1) {
   2224 		z_info |= S_028040_NUM_SAMPLES(util_logbase2(rtex->resource.b.b.nr_samples));
   2225 	}
   2226 
   2227 	if (rtex->surface.flags & RADEON_SURF_SBUFFER)
   2228 		s_info = S_028044_FORMAT(V_028044_STENCIL_8);
   2229 	else
   2230 		s_info = S_028044_FORMAT(V_028044_STENCIL_INVALID);
   2231 
   2232 	if (sctx->b.chip_class >= CIK) {
   2233 		struct radeon_info *info = &sctx->screen->b.info;
   2234 		unsigned index = rtex->surface.tiling_index[level];
   2235 		unsigned stencil_index = rtex->surface.stencil_tiling_index[level];
   2236 		unsigned macro_index = rtex->surface.macro_tile_index;
   2237 		unsigned tile_mode = info->si_tile_mode_array[index];
   2238 		unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
   2239 		unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
   2240 
   2241 		db_depth_info |=
   2242 			S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
   2243 			S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
   2244 			S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
   2245 			S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
   2246 			S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
   2247 			S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
   2248 		z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
   2249 		s_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
   2250 	} else {
   2251 		unsigned tile_mode_index = si_tile_mode_index(rtex, level, false);
   2252 		z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
   2253 		tile_mode_index = si_tile_mode_index(rtex, level, true);
   2254 		s_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
   2255 	}
   2256 
   2257 	/* HiZ aka depth buffer htile */
   2258 	/* use htile only for first level */
   2259 	if (rtex->htile_buffer && !level) {
   2260 		z_info |= S_028040_TILE_SURFACE_ENABLE(1) |
   2261 			  S_028040_ALLOW_EXPCLEAR(1);
   2262 
   2263 		if (rtex->surface.flags & RADEON_SURF_SBUFFER) {
   2264 			/* Workaround: For a not yet understood reason, the
   2265 			 * combination of MSAA, fast stencil clear and stencil
   2266 			 * decompress messes with subsequent stencil buffer
   2267 			 * uses. Problem was reproduced on Verde, Bonaire,
   2268 			 * Tonga, and Carrizo.
   2269 			 *
   2270 			 * Disabling EXPCLEAR works around the problem.
   2271 			 *
   2272 			 * Check piglit's arb_texture_multisample-stencil-clear
   2273 			 * test if you want to try changing this.
   2274 			 */
   2275 			if (rtex->resource.b.b.nr_samples <= 1)
   2276 				s_info |= S_028044_ALLOW_EXPCLEAR(1);
   2277 		} else if (!rtex->tc_compatible_htile) {
   2278 			/* Use all of the htile_buffer for depth if there's no stencil.
   2279 			 * This must not be set when TC-compatible HTILE is enabled
   2280 			 * due to a hw bug.
   2281 			 */
   2282 			s_info |= S_028044_TILE_STENCIL_DISABLE(1);
   2283 		}
   2284 
   2285 		uint64_t va = rtex->htile_buffer->gpu_address;
   2286 		db_htile_data_base = va >> 8;
   2287 		db_htile_surface = S_028ABC_FULL_CACHE(1);
   2288 
   2289 		if (rtex->tc_compatible_htile) {
   2290 			db_htile_surface |= S_028ABC_TC_COMPATIBLE(1);
   2291 
   2292 			switch (rtex->resource.b.b.nr_samples) {
   2293 			case 0:
   2294 			case 1:
   2295 				z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(5);
   2296 				break;
   2297 			case 2:
   2298 			case 4:
   2299 				z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(3);
   2300 				break;
   2301 			case 8:
   2302 				z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(2);
   2303 				break;
   2304 			default:
   2305 				assert(0);
   2306 			}
   2307 		}
   2308 	} else {
   2309 		db_htile_data_base = 0;
   2310 		db_htile_surface = 0;
   2311 	}
   2312 
   2313 	assert(levelinfo->nblk_x % 8 == 0 && levelinfo->nblk_y % 8 == 0);
   2314 
   2315 	surf->db_depth_view = S_028008_SLICE_START(surf->base.u.tex.first_layer) |
   2316 			      S_028008_SLICE_MAX(surf->base.u.tex.last_layer);
   2317 	surf->db_htile_data_base = db_htile_data_base;
   2318 	surf->db_depth_info = db_depth_info;
   2319 	surf->db_z_info = z_info;
   2320 	surf->db_stencil_info = s_info;
   2321 	surf->db_depth_base = z_offs >> 8;
   2322 	surf->db_stencil_base = s_offs >> 8;
   2323 	surf->db_depth_size = S_028058_PITCH_TILE_MAX((levelinfo->nblk_x / 8) - 1) |
   2324 			      S_028058_HEIGHT_TILE_MAX((levelinfo->nblk_y / 8) - 1);
   2325 	surf->db_depth_slice = S_02805C_SLICE_TILE_MAX((levelinfo->nblk_x *
   2326 							levelinfo->nblk_y) / 64 - 1);
   2327 	surf->db_htile_surface = db_htile_surface;
   2328 
   2329 	surf->depth_initialized = true;
   2330 }
   2331 
   2332 static void si_dec_framebuffer_counters(const struct pipe_framebuffer_state *state)
   2333 {
   2334 	for (int i = 0; i < state->nr_cbufs; ++i) {
   2335 		struct r600_surface *surf = NULL;
   2336 		struct r600_texture *rtex;
   2337 
   2338 		if (!state->cbufs[i])
   2339 			continue;
   2340 		surf = (struct r600_surface*)state->cbufs[i];
   2341 		rtex = (struct r600_texture*)surf->base.texture;
   2342 
   2343 		p_atomic_dec(&rtex->framebuffers_bound);
   2344 	}
   2345 }
   2346 
   2347 static void si_set_framebuffer_state(struct pipe_context *ctx,
   2348 				     const struct pipe_framebuffer_state *state)
   2349 {
   2350 	struct si_context *sctx = (struct si_context *)ctx;
   2351 	struct pipe_constant_buffer constbuf = {0};
   2352 	struct r600_surface *surf = NULL;
   2353 	struct r600_texture *rtex;
   2354 	bool old_any_dst_linear = sctx->framebuffer.any_dst_linear;
   2355 	unsigned old_nr_samples = sctx->framebuffer.nr_samples;
   2356 	int i;
   2357 
   2358 	for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) {
   2359 		if (!sctx->framebuffer.state.cbufs[i])
   2360 			continue;
   2361 
   2362 		rtex = (struct r600_texture*)sctx->framebuffer.state.cbufs[i]->texture;
   2363 		if (rtex->dcc_gather_statistics)
   2364 			vi_separate_dcc_stop_query(ctx, rtex);
   2365 	}
   2366 
   2367 	/* Only flush TC when changing the framebuffer state, because
   2368 	 * the only client not using TC that can change textures is
   2369 	 * the framebuffer.
   2370 	 *
   2371 	 * Flush all CB and DB caches here because all buffers can be used
   2372 	 * for write by both TC (with shader image stores) and CB/DB.
   2373 	 */
   2374 	sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
   2375 			 SI_CONTEXT_INV_GLOBAL_L2 |
   2376 			 SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER |
   2377 			 SI_CONTEXT_CS_PARTIAL_FLUSH;
   2378 
   2379 	/* Take the maximum of the old and new count. If the new count is lower,
   2380 	 * dirtying is needed to disable the unbound colorbuffers.
   2381 	 */
   2382 	sctx->framebuffer.dirty_cbufs |=
   2383 		(1 << MAX2(sctx->framebuffer.state.nr_cbufs, state->nr_cbufs)) - 1;
   2384 	sctx->framebuffer.dirty_zsbuf |= sctx->framebuffer.state.zsbuf != state->zsbuf;
   2385 
   2386 	si_dec_framebuffer_counters(&sctx->framebuffer.state);
   2387 	util_copy_framebuffer_state(&sctx->framebuffer.state, state);
   2388 
   2389 	sctx->framebuffer.colorbuf_enabled_4bit = 0;
   2390 	sctx->framebuffer.spi_shader_col_format = 0;
   2391 	sctx->framebuffer.spi_shader_col_format_alpha = 0;
   2392 	sctx->framebuffer.spi_shader_col_format_blend = 0;
   2393 	sctx->framebuffer.spi_shader_col_format_blend_alpha = 0;
   2394 	sctx->framebuffer.color_is_int8 = 0;
   2395 	sctx->framebuffer.color_is_int10 = 0;
   2396 
   2397 	sctx->framebuffer.compressed_cb_mask = 0;
   2398 	sctx->framebuffer.nr_samples = util_framebuffer_get_num_samples(state);
   2399 	sctx->framebuffer.log_samples = util_logbase2(sctx->framebuffer.nr_samples);
   2400 	sctx->framebuffer.any_dst_linear = false;
   2401 
   2402 	for (i = 0; i < state->nr_cbufs; i++) {
   2403 		if (!state->cbufs[i])
   2404 			continue;
   2405 
   2406 		surf = (struct r600_surface*)state->cbufs[i];
   2407 		rtex = (struct r600_texture*)surf->base.texture;
   2408 
   2409 		if (!surf->color_initialized) {
   2410 			si_initialize_color_surface(sctx, surf);
   2411 		}
   2412 
   2413 		sctx->framebuffer.colorbuf_enabled_4bit |= 0xf << (i * 4);
   2414 		sctx->framebuffer.spi_shader_col_format |=
   2415 			surf->spi_shader_col_format << (i * 4);
   2416 		sctx->framebuffer.spi_shader_col_format_alpha |=
   2417 			surf->spi_shader_col_format_alpha << (i * 4);
   2418 		sctx->framebuffer.spi_shader_col_format_blend |=
   2419 			surf->spi_shader_col_format_blend << (i * 4);
   2420 		sctx->framebuffer.spi_shader_col_format_blend_alpha |=
   2421 			surf->spi_shader_col_format_blend_alpha << (i * 4);
   2422 
   2423 		if (surf->color_is_int8)
   2424 			sctx->framebuffer.color_is_int8 |= 1 << i;
   2425 		if (surf->color_is_int10)
   2426 			sctx->framebuffer.color_is_int10 |= 1 << i;
   2427 
   2428 		if (rtex->fmask.size) {
   2429 			sctx->framebuffer.compressed_cb_mask |= 1 << i;
   2430 		}
   2431 
   2432 		if (rtex->surface.is_linear)
   2433 			sctx->framebuffer.any_dst_linear = true;
   2434 
   2435 		r600_context_add_resource_size(ctx, surf->base.texture);
   2436 
   2437 		p_atomic_inc(&rtex->framebuffers_bound);
   2438 
   2439 		if (rtex->dcc_gather_statistics) {
   2440 			/* Dirty tracking must be enabled for DCC usage analysis. */
   2441 			sctx->framebuffer.compressed_cb_mask |= 1 << i;
   2442 			vi_separate_dcc_start_query(ctx, rtex);
   2443 		}
   2444 	}
   2445 
   2446 	if (state->zsbuf) {
   2447 		surf = (struct r600_surface*)state->zsbuf;
   2448 		rtex = (struct r600_texture*)surf->base.texture;
   2449 
   2450 		if (!surf->depth_initialized) {
   2451 			si_init_depth_surface(sctx, surf);
   2452 		}
   2453 		r600_context_add_resource_size(ctx, surf->base.texture);
   2454 	}
   2455 
   2456 	si_update_poly_offset_state(sctx);
   2457 	si_mark_atom_dirty(sctx, &sctx->cb_render_state);
   2458 	si_mark_atom_dirty(sctx, &sctx->framebuffer.atom);
   2459 
   2460 	if (sctx->framebuffer.any_dst_linear != old_any_dst_linear)
   2461 		si_mark_atom_dirty(sctx, &sctx->msaa_config);
   2462 
   2463 	if (sctx->framebuffer.nr_samples != old_nr_samples) {
   2464 		si_mark_atom_dirty(sctx, &sctx->msaa_config);
   2465 		si_mark_atom_dirty(sctx, &sctx->db_render_state);
   2466 
   2467 		/* Set sample locations as fragment shader constants. */
   2468 		switch (sctx->framebuffer.nr_samples) {
   2469 		case 1:
   2470 			constbuf.user_buffer = sctx->b.sample_locations_1x;
   2471 			break;
   2472 		case 2:
   2473 			constbuf.user_buffer = sctx->b.sample_locations_2x;
   2474 			break;
   2475 		case 4:
   2476 			constbuf.user_buffer = sctx->b.sample_locations_4x;
   2477 			break;
   2478 		case 8:
   2479 			constbuf.user_buffer = sctx->b.sample_locations_8x;
   2480 			break;
   2481 		case 16:
   2482 			constbuf.user_buffer = sctx->b.sample_locations_16x;
   2483 			break;
   2484 		default:
   2485 			R600_ERR("Requested an invalid number of samples %i.\n",
   2486 				 sctx->framebuffer.nr_samples);
   2487 			assert(0);
   2488 		}
   2489 		constbuf.buffer_size = sctx->framebuffer.nr_samples * 2 * 4;
   2490 		si_set_rw_buffer(sctx, SI_PS_CONST_SAMPLE_POSITIONS, &constbuf);
   2491 
   2492 		si_mark_atom_dirty(sctx, &sctx->msaa_sample_locs.atom);
   2493 	}
   2494 
   2495 	sctx->need_check_render_feedback = true;
   2496 	sctx->do_update_shaders = true;
   2497 }
   2498 
   2499 static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom *atom)
   2500 {
   2501 	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
   2502 	struct pipe_framebuffer_state *state = &sctx->framebuffer.state;
   2503 	unsigned i, nr_cbufs = state->nr_cbufs;
   2504 	struct r600_texture *tex = NULL;
   2505 	struct r600_surface *cb = NULL;
   2506 	unsigned cb_color_info = 0;
   2507 
   2508 	/* Colorbuffers. */
   2509 	for (i = 0; i < nr_cbufs; i++) {
   2510 		const struct radeon_surf_level *level_info;
   2511 		unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
   2512 		unsigned cb_color_base, cb_color_fmask, cb_color_attrib;
   2513 		unsigned cb_color_pitch, cb_color_slice, cb_color_fmask_slice;
   2514 
   2515 		if (!(sctx->framebuffer.dirty_cbufs & (1 << i)))
   2516 			continue;
   2517 
   2518 		cb = (struct r600_surface*)state->cbufs[i];
   2519 		if (!cb) {
   2520 			radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C,
   2521 					       S_028C70_FORMAT(V_028C70_COLOR_INVALID));
   2522 			continue;
   2523 		}
   2524 
   2525 		tex = (struct r600_texture *)cb->base.texture;
   2526 		level_info =  &tex->surface.level[cb->base.u.tex.level];
   2527 		radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
   2528 				      &tex->resource, RADEON_USAGE_READWRITE,
   2529 				      tex->resource.b.b.nr_samples > 1 ?
   2530 					      RADEON_PRIO_COLOR_BUFFER_MSAA :
   2531 					      RADEON_PRIO_COLOR_BUFFER);
   2532 
   2533 		if (tex->cmask_buffer && tex->cmask_buffer != &tex->resource) {
   2534 			radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
   2535 				tex->cmask_buffer, RADEON_USAGE_READWRITE,
   2536 				RADEON_PRIO_CMASK);
   2537 		}
   2538 
   2539 		if (tex->dcc_separate_buffer)
   2540 			radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
   2541 						  tex->dcc_separate_buffer,
   2542 						  RADEON_USAGE_READWRITE,
   2543 						  RADEON_PRIO_DCC);
   2544 
   2545 		/* Compute mutable surface parameters. */
   2546 		pitch_tile_max = level_info->nblk_x / 8 - 1;
   2547 		slice_tile_max = level_info->nblk_x *
   2548 				 level_info->nblk_y / 64 - 1;
   2549 		tile_mode_index = si_tile_mode_index(tex, cb->base.u.tex.level, false);
   2550 
   2551 		cb_color_base = (tex->resource.gpu_address + level_info->offset) >> 8;
   2552 		cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
   2553 		cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
   2554 		cb_color_attrib = cb->cb_color_attrib |
   2555 				  S_028C74_TILE_MODE_INDEX(tile_mode_index);
   2556 
   2557 		if (tex->fmask.size) {
   2558 			if (sctx->b.chip_class >= CIK)
   2559 				cb_color_pitch |= S_028C64_FMASK_TILE_MAX(tex->fmask.pitch_in_pixels / 8 - 1);
   2560 			cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tex->fmask.tile_mode_index);
   2561 			cb_color_fmask = (tex->resource.gpu_address + tex->fmask.offset) >> 8;
   2562 			cb_color_fmask_slice = S_028C88_TILE_MAX(tex->fmask.slice_tile_max);
   2563 		} else {
   2564 			/* This must be set for fast clear to work without FMASK. */
   2565 			if (sctx->b.chip_class >= CIK)
   2566 				cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
   2567 			cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
   2568 			cb_color_fmask = cb_color_base;
   2569 			cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
   2570 		}
   2571 
   2572 		cb_color_info = cb->cb_color_info | tex->cb_color_info;
   2573 
   2574 		if (tex->dcc_offset && cb->base.u.tex.level < tex->surface.num_dcc_levels) {
   2575 			bool is_msaa_resolve_dst = state->cbufs[0] &&
   2576 						   state->cbufs[0]->texture->nr_samples > 1 &&
   2577 						   state->cbufs[1] == &cb->base &&
   2578 						   state->cbufs[1]->texture->nr_samples <= 1;
   2579 
   2580 			if (!is_msaa_resolve_dst)
   2581 				cb_color_info |= S_028C70_DCC_ENABLE(1);
   2582 		}
   2583 
   2584 		radeon_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C,
   2585 					   sctx->b.chip_class >= VI ? 14 : 13);
   2586 		radeon_emit(cs, cb_color_base);		/* R_028C60_CB_COLOR0_BASE */
   2587 		radeon_emit(cs, cb_color_pitch);	/* R_028C64_CB_COLOR0_PITCH */
   2588 		radeon_emit(cs, cb_color_slice);	/* R_028C68_CB_COLOR0_SLICE */
   2589 		radeon_emit(cs, cb->cb_color_view);	/* R_028C6C_CB_COLOR0_VIEW */
   2590 		radeon_emit(cs, cb_color_info);		/* R_028C70_CB_COLOR0_INFO */
   2591 		radeon_emit(cs, cb_color_attrib);	/* R_028C74_CB_COLOR0_ATTRIB */
   2592 		radeon_emit(cs, cb->cb_dcc_control);	/* R_028C78_CB_COLOR0_DCC_CONTROL */
   2593 		radeon_emit(cs, tex->cmask.base_address_reg);	/* R_028C7C_CB_COLOR0_CMASK */
   2594 		radeon_emit(cs, tex->cmask.slice_tile_max);	/* R_028C80_CB_COLOR0_CMASK_SLICE */
   2595 		radeon_emit(cs, cb_color_fmask);		/* R_028C84_CB_COLOR0_FMASK */
   2596 		radeon_emit(cs, cb_color_fmask_slice);		/* R_028C88_CB_COLOR0_FMASK_SLICE */
   2597 		radeon_emit(cs, tex->color_clear_value[0]);	/* R_028C8C_CB_COLOR0_CLEAR_WORD0 */
   2598 		radeon_emit(cs, tex->color_clear_value[1]);	/* R_028C90_CB_COLOR0_CLEAR_WORD1 */
   2599 
   2600 		if (sctx->b.chip_class >= VI) /* R_028C94_CB_COLOR0_DCC_BASE */
   2601 			radeon_emit(cs, ((!tex->dcc_separate_buffer ? tex->resource.gpu_address : 0) +
   2602 					 tex->dcc_offset +
   2603 				         tex->surface.level[cb->base.u.tex.level].dcc_offset) >> 8);
   2604 	}
   2605 	for (; i < 8 ; i++)
   2606 		if (sctx->framebuffer.dirty_cbufs & (1 << i))
   2607 			radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0);
   2608 
   2609 	/* ZS buffer. */
   2610 	if (state->zsbuf && sctx->framebuffer.dirty_zsbuf) {
   2611 		struct r600_surface *zb = (struct r600_surface*)state->zsbuf;
   2612 		struct r600_texture *rtex = (struct r600_texture*)zb->base.texture;
   2613 
   2614 		radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
   2615 				      &rtex->resource, RADEON_USAGE_READWRITE,
   2616 				      zb->base.texture->nr_samples > 1 ?
   2617 					      RADEON_PRIO_DEPTH_BUFFER_MSAA :
   2618 					      RADEON_PRIO_DEPTH_BUFFER);
   2619 
   2620 		if (zb->db_htile_data_base) {
   2621 			radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
   2622 					      rtex->htile_buffer, RADEON_USAGE_READWRITE,
   2623 					      RADEON_PRIO_HTILE);
   2624 		}
   2625 
   2626 		radeon_set_context_reg(cs, R_028008_DB_DEPTH_VIEW, zb->db_depth_view);
   2627 		radeon_set_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, zb->db_htile_data_base);
   2628 
   2629 		radeon_set_context_reg_seq(cs, R_02803C_DB_DEPTH_INFO, 9);
   2630 		radeon_emit(cs, zb->db_depth_info);	/* R_02803C_DB_DEPTH_INFO */
   2631 		radeon_emit(cs, zb->db_z_info |		/* R_028040_DB_Z_INFO */
   2632 			    S_028040_ZRANGE_PRECISION(rtex->depth_clear_value != 0));
   2633 		radeon_emit(cs, zb->db_stencil_info);	/* R_028044_DB_STENCIL_INFO */
   2634 		radeon_emit(cs, zb->db_depth_base);	/* R_028048_DB_Z_READ_BASE */
   2635 		radeon_emit(cs, zb->db_stencil_base);	/* R_02804C_DB_STENCIL_READ_BASE */
   2636 		radeon_emit(cs, zb->db_depth_base);	/* R_028050_DB_Z_WRITE_BASE */
   2637 		radeon_emit(cs, zb->db_stencil_base);	/* R_028054_DB_STENCIL_WRITE_BASE */
   2638 		radeon_emit(cs, zb->db_depth_size);	/* R_028058_DB_DEPTH_SIZE */
   2639 		radeon_emit(cs, zb->db_depth_slice);	/* R_02805C_DB_DEPTH_SLICE */
   2640 
   2641 		radeon_set_context_reg_seq(cs, R_028028_DB_STENCIL_CLEAR, 2);
   2642 		radeon_emit(cs, rtex->stencil_clear_value); /* R_028028_DB_STENCIL_CLEAR */
   2643 		radeon_emit(cs, fui(rtex->depth_clear_value)); /* R_02802C_DB_DEPTH_CLEAR */
   2644 
   2645 		radeon_set_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, zb->db_htile_surface);
   2646 	} else if (sctx->framebuffer.dirty_zsbuf) {
   2647 		radeon_set_context_reg_seq(cs, R_028040_DB_Z_INFO, 2);
   2648 		radeon_emit(cs, S_028040_FORMAT(V_028040_Z_INVALID)); /* R_028040_DB_Z_INFO */
   2649 		radeon_emit(cs, S_028044_FORMAT(V_028044_STENCIL_INVALID)); /* R_028044_DB_STENCIL_INFO */
   2650 	}
   2651 
   2652 	/* Framebuffer dimensions. */
   2653         /* PA_SC_WINDOW_SCISSOR_TL is set in si_init_config() */
   2654 	radeon_set_context_reg(cs, R_028208_PA_SC_WINDOW_SCISSOR_BR,
   2655 			       S_028208_BR_X(state->width) | S_028208_BR_Y(state->height));
   2656 
   2657 	sctx->framebuffer.dirty_cbufs = 0;
   2658 	sctx->framebuffer.dirty_zsbuf = false;
   2659 }
   2660 
   2661 static void si_emit_msaa_sample_locs(struct si_context *sctx,
   2662 				     struct r600_atom *atom)
   2663 {
   2664 	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
   2665 	unsigned nr_samples = sctx->framebuffer.nr_samples;
   2666 
   2667 	/* Smoothing (only possible with nr_samples == 1) uses the same
   2668 	 * sample locations as the MSAA it simulates.
   2669 	 */
   2670 	if (nr_samples <= 1 && sctx->smoothing_enabled)
   2671 		nr_samples = SI_NUM_SMOOTH_AA_SAMPLES;
   2672 
   2673 	/* On Polaris, the small primitive filter uses the sample locations
   2674 	 * even when MSAA is off, so we need to make sure they're set to 0.
   2675 	 */
   2676 	if (sctx->b.family >= CHIP_POLARIS10)
   2677 		nr_samples = MAX2(nr_samples, 1);
   2678 
   2679 	if (nr_samples >= 1 &&
   2680 	    (nr_samples != sctx->msaa_sample_locs.nr_samples)) {
   2681 		sctx->msaa_sample_locs.nr_samples = nr_samples;
   2682 		cayman_emit_msaa_sample_locs(cs, nr_samples);
   2683 	}
   2684 
   2685 	if (sctx->b.family >= CHIP_POLARIS10) {
   2686 		struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
   2687 		unsigned small_prim_filter_cntl =
   2688 			S_028830_SMALL_PRIM_FILTER_ENABLE(1) |
   2689 			S_028830_LINE_FILTER_DISABLE(1); /* line bug */
   2690 
   2691 		/* The alternative of setting sample locations to 0 would
   2692 		 * require a DB flush to avoid Z errors, see
   2693 		 * https://bugs.freedesktop.org/show_bug.cgi?id=96908
   2694 		 */
   2695 		if (sctx->framebuffer.nr_samples > 1 && rs && !rs->multisample_enable)
   2696 			small_prim_filter_cntl &= C_028830_SMALL_PRIM_FILTER_ENABLE;
   2697 
   2698 		radeon_set_context_reg(cs, R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL,
   2699 				       small_prim_filter_cntl);
   2700 	}
   2701 }
   2702 
   2703 static void si_emit_msaa_config(struct si_context *sctx, struct r600_atom *atom)
   2704 {
   2705 	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
   2706 	unsigned num_tile_pipes = sctx->screen->b.info.num_tile_pipes;
   2707 	/* 33% faster rendering to linear color buffers */
   2708 	bool dst_is_linear = sctx->framebuffer.any_dst_linear;
   2709 	unsigned sc_mode_cntl_1 =
   2710 		S_028A4C_WALK_SIZE(dst_is_linear) |
   2711 		S_028A4C_WALK_FENCE_ENABLE(!dst_is_linear) |
   2712 		S_028A4C_WALK_FENCE_SIZE(num_tile_pipes == 2 ? 2 : 3) |
   2713 		/* always 1: */
   2714 		S_028A4C_WALK_ALIGN8_PRIM_FITS_ST(1) |
   2715 		S_028A4C_SUPERTILE_WALK_ORDER_ENABLE(1) |
   2716 		S_028A4C_TILE_WALK_ORDER_ENABLE(1) |
   2717 		S_028A4C_MULTI_SHADER_ENGINE_PRIM_DISCARD_ENABLE(1) |
   2718 		S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) |
   2719 		S_028A4C_FORCE_EOV_REZ_ENABLE(1);
   2720 
   2721 	cayman_emit_msaa_config(cs, sctx->framebuffer.nr_samples,
   2722 				sctx->ps_iter_samples,
   2723 				sctx->smoothing_enabled ? SI_NUM_SMOOTH_AA_SAMPLES : 0,
   2724 				sc_mode_cntl_1);
   2725 }
   2726 
   2727 static void si_set_min_samples(struct pipe_context *ctx, unsigned min_samples)
   2728 {
   2729 	struct si_context *sctx = (struct si_context *)ctx;
   2730 
   2731 	if (sctx->ps_iter_samples == min_samples)
   2732 		return;
   2733 
   2734 	sctx->ps_iter_samples = min_samples;
   2735 	sctx->do_update_shaders = true;
   2736 
   2737 	if (sctx->framebuffer.nr_samples > 1)
   2738 		si_mark_atom_dirty(sctx, &sctx->msaa_config);
   2739 }
   2740 
   2741 /*
   2742  * Samplers
   2743  */
   2744 
   2745 /**
   2746  * Build the sampler view descriptor for a buffer texture.
   2747  * @param state 256-bit descriptor; only the high 128 bits are filled in
   2748  */
   2749 void
   2750 si_make_buffer_descriptor(struct si_screen *screen, struct r600_resource *buf,
   2751 			  enum pipe_format format,
   2752 			  unsigned offset, unsigned size,
   2753 			  uint32_t *state)
   2754 {
   2755 	const struct util_format_description *desc;
   2756 	int first_non_void;
   2757 	unsigned stride;
   2758 	unsigned num_records;
   2759 	unsigned num_format, data_format;
   2760 
   2761 	desc = util_format_description(format);
   2762 	first_non_void = util_format_get_first_non_void_channel(format);
   2763 	stride = desc->block.bits / 8;
   2764 	num_format = si_translate_buffer_numformat(&screen->b.b, desc, first_non_void);
   2765 	data_format = si_translate_buffer_dataformat(&screen->b.b, desc, first_non_void);
   2766 
   2767 	num_records = size / stride;
   2768 	num_records = MIN2(num_records, (buf->b.b.width0 - offset) / stride);
   2769 
   2770 	if (screen->b.chip_class >= VI)
   2771 		num_records *= stride;
   2772 
   2773 	state[4] = 0;
   2774 	state[5] = S_008F04_STRIDE(stride);
   2775 	state[6] = num_records;
   2776 	state[7] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) |
   2777 		   S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) |
   2778 		   S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) |
   2779 		   S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) |
   2780 		   S_008F0C_NUM_FORMAT(num_format) |
   2781 		   S_008F0C_DATA_FORMAT(data_format);
   2782 }
   2783 
   2784 /**
   2785  * Build the sampler view descriptor for a texture.
   2786  */
   2787 void
   2788 si_make_texture_descriptor(struct si_screen *screen,
   2789 			   struct r600_texture *tex,
   2790 			   bool sampler,
   2791 			   enum pipe_texture_target target,
   2792 			   enum pipe_format pipe_format,
   2793 			   const unsigned char state_swizzle[4],
   2794 			   unsigned first_level, unsigned last_level,
   2795 			   unsigned first_layer, unsigned last_layer,
   2796 			   unsigned width, unsigned height, unsigned depth,
   2797 			   uint32_t *state,
   2798 			   uint32_t *fmask_state)
   2799 {
   2800 	struct pipe_resource *res = &tex->resource.b.b;
   2801 	const struct util_format_description *desc;
   2802 	unsigned char swizzle[4];
   2803 	int first_non_void;
   2804 	unsigned num_format, data_format, type;
   2805 	uint64_t va;
   2806 
   2807 	desc = util_format_description(pipe_format);
   2808 
   2809 	if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
   2810 		const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0};
   2811 		const unsigned char swizzle_yyyy[4] = {1, 1, 1, 1};
   2812 		const unsigned char swizzle_wwww[4] = {3, 3, 3, 3};
   2813 
   2814 		switch (pipe_format) {
   2815 		case PIPE_FORMAT_S8_UINT_Z24_UNORM:
   2816 		case PIPE_FORMAT_X32_S8X24_UINT:
   2817 		case PIPE_FORMAT_X8Z24_UNORM:
   2818 			util_format_compose_swizzles(swizzle_yyyy, state_swizzle, swizzle);
   2819 			break;
   2820 		case PIPE_FORMAT_X24S8_UINT:
   2821 			/*
   2822 			 * X24S8 is implemented as an 8_8_8_8 data format, to
   2823 			 * fix texture gathers. This affects at least
   2824 			 * GL45-CTS.texture_cube_map_array.sampling on VI.
   2825 			 */
   2826 			util_format_compose_swizzles(swizzle_wwww, state_swizzle, swizzle);
   2827 			break;
   2828 		default:
   2829 			util_format_compose_swizzles(swizzle_xxxx, state_swizzle, swizzle);
   2830 		}
   2831 	} else {
   2832 		util_format_compose_swizzles(desc->swizzle, state_swizzle, swizzle);
   2833 	}
   2834 
   2835 	first_non_void = util_format_get_first_non_void_channel(pipe_format);
   2836 
   2837 	switch (pipe_format) {
   2838 	case PIPE_FORMAT_S8_UINT_Z24_UNORM:
   2839 		num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
   2840 		break;
   2841 	default:
   2842 		if (first_non_void < 0) {
   2843 			if (util_format_is_compressed(pipe_format)) {
   2844 				switch (pipe_format) {
   2845 				case PIPE_FORMAT_DXT1_SRGB:
   2846 				case PIPE_FORMAT_DXT1_SRGBA:
   2847 				case PIPE_FORMAT_DXT3_SRGBA:
   2848 				case PIPE_FORMAT_DXT5_SRGBA:
   2849 				case PIPE_FORMAT_BPTC_SRGBA:
   2850 				case PIPE_FORMAT_ETC2_SRGB8:
   2851 				case PIPE_FORMAT_ETC2_SRGB8A1:
   2852 				case PIPE_FORMAT_ETC2_SRGBA8:
   2853 					num_format = V_008F14_IMG_NUM_FORMAT_SRGB;
   2854 					break;
   2855 				case PIPE_FORMAT_RGTC1_SNORM:
   2856 				case PIPE_FORMAT_LATC1_SNORM:
   2857 				case PIPE_FORMAT_RGTC2_SNORM:
   2858 				case PIPE_FORMAT_LATC2_SNORM:
   2859 				case PIPE_FORMAT_ETC2_R11_SNORM:
   2860 				case PIPE_FORMAT_ETC2_RG11_SNORM:
   2861 				/* implies float, so use SNORM/UNORM to determine
   2862 				   whether data is signed or not */
   2863 				case PIPE_FORMAT_BPTC_RGB_FLOAT:
   2864 					num_format = V_008F14_IMG_NUM_FORMAT_SNORM;
   2865 					break;
   2866 				default:
   2867 					num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
   2868 					break;
   2869 				}
   2870 			} else if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
   2871 				num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
   2872 			} else {
   2873 				num_format = V_008F14_IMG_NUM_FORMAT_FLOAT;
   2874 			}
   2875 		} else if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
   2876 			num_format = V_008F14_IMG_NUM_FORMAT_SRGB;
   2877 		} else {
   2878 			num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
   2879 
   2880 			switch (desc->channel[first_non_void].type) {
   2881 			case UTIL_FORMAT_TYPE_FLOAT:
   2882 				num_format = V_008F14_IMG_NUM_FORMAT_FLOAT;
   2883 				break;
   2884 			case UTIL_FORMAT_TYPE_SIGNED:
   2885 				if (desc->channel[first_non_void].normalized)
   2886 					num_format = V_008F14_IMG_NUM_FORMAT_SNORM;
   2887 				else if (desc->channel[first_non_void].pure_integer)
   2888 					num_format = V_008F14_IMG_NUM_FORMAT_SINT;
   2889 				else
   2890 					num_format = V_008F14_IMG_NUM_FORMAT_SSCALED;
   2891 				break;
   2892 			case UTIL_FORMAT_TYPE_UNSIGNED:
   2893 				if (desc->channel[first_non_void].normalized)
   2894 					num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
   2895 				else if (desc->channel[first_non_void].pure_integer)
   2896 					num_format = V_008F14_IMG_NUM_FORMAT_UINT;
   2897 				else
   2898 					num_format = V_008F14_IMG_NUM_FORMAT_USCALED;
   2899 			}
   2900 		}
   2901 	}
   2902 
   2903 	data_format = si_translate_texformat(&screen->b.b, pipe_format, desc, first_non_void);
   2904 	if (data_format == ~0) {
   2905 		data_format = 0;
   2906 	}
   2907 
   2908 	if (!sampler &&
   2909 	    (res->target == PIPE_TEXTURE_CUBE ||
   2910 	     res->target == PIPE_TEXTURE_CUBE_ARRAY ||
   2911 	     res->target == PIPE_TEXTURE_3D)) {
   2912 		/* For the purpose of shader images, treat cube maps and 3D
   2913 		 * textures as 2D arrays. For 3D textures, the address
   2914 		 * calculations for mipmaps are different, so we rely on the
   2915 		 * caller to effectively disable mipmaps.
   2916 		 */
   2917 		type = V_008F1C_SQ_RSRC_IMG_2D_ARRAY;
   2918 
   2919 		assert(res->target != PIPE_TEXTURE_3D || (first_level == 0 && last_level == 0));
   2920 	} else {
   2921 		type = si_tex_dim(res->target, target, res->nr_samples);
   2922 	}
   2923 
   2924 	if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
   2925 	        height = 1;
   2926 		depth = res->array_size;
   2927 	} else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY ||
   2928 		   type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
   2929 		if (sampler || res->target != PIPE_TEXTURE_3D)
   2930 			depth = res->array_size;
   2931 	} else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
   2932 		depth = res->array_size / 6;
   2933 
   2934 	state[0] = 0;
   2935 	state[1] = (S_008F14_DATA_FORMAT(data_format) |
   2936 		    S_008F14_NUM_FORMAT(num_format));
   2937 	state[2] = (S_008F18_WIDTH(width - 1) |
   2938 		    S_008F18_HEIGHT(height - 1) |
   2939 		    S_008F18_PERF_MOD(4));
   2940 	state[3] = (S_008F1C_DST_SEL_X(si_map_swizzle(swizzle[0])) |
   2941 		    S_008F1C_DST_SEL_Y(si_map_swizzle(swizzle[1])) |
   2942 		    S_008F1C_DST_SEL_Z(si_map_swizzle(swizzle[2])) |
   2943 		    S_008F1C_DST_SEL_W(si_map_swizzle(swizzle[3])) |
   2944 		    S_008F1C_BASE_LEVEL(res->nr_samples > 1 ?
   2945 					0 : first_level) |
   2946 		    S_008F1C_LAST_LEVEL(res->nr_samples > 1 ?
   2947 					util_logbase2(res->nr_samples) :
   2948 					last_level) |
   2949 		    S_008F1C_POW2_PAD(res->last_level > 0) |
   2950 		    S_008F1C_TYPE(type));
   2951 	state[4] = S_008F20_DEPTH(depth - 1);
   2952 	state[5] = (S_008F24_BASE_ARRAY(first_layer) |
   2953 		    S_008F24_LAST_ARRAY(last_layer));
   2954 	state[6] = 0;
   2955 	state[7] = 0;
   2956 
   2957 	if (tex->dcc_offset) {
   2958 		unsigned swap = r600_translate_colorswap(pipe_format, false);
   2959 
   2960 		state[6] = S_008F28_ALPHA_IS_ON_MSB(swap <= 1);
   2961 	} else {
   2962 		/* The last dword is unused by hw. The shader uses it to clear
   2963 		 * bits in the first dword of sampler state.
   2964 		 */
   2965 		if (screen->b.chip_class <= CIK && res->nr_samples <= 1) {
   2966 			if (first_level == last_level)
   2967 				state[7] = C_008F30_MAX_ANISO_RATIO;
   2968 			else
   2969 				state[7] = 0xffffffff;
   2970 		}
   2971 	}
   2972 
   2973 	/* Initialize the sampler view for FMASK. */
   2974 	if (tex->fmask.size) {
   2975 		uint32_t fmask_format;
   2976 
   2977 		va = tex->resource.gpu_address + tex->fmask.offset;
   2978 
   2979 		switch (res->nr_samples) {
   2980 		case 2:
   2981 			fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2;
   2982 			break;
   2983 		case 4:
   2984 			fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4;
   2985 			break;
   2986 		case 8:
   2987 			fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8;
   2988 			break;
   2989 		default:
   2990 			assert(0);
   2991 			fmask_format = V_008F14_IMG_DATA_FORMAT_INVALID;
   2992 		}
   2993 
   2994 		fmask_state[0] = va >> 8;
   2995 		fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) |
   2996 				 S_008F14_DATA_FORMAT(fmask_format) |
   2997 				 S_008F14_NUM_FORMAT(V_008F14_IMG_NUM_FORMAT_UINT);
   2998 		fmask_state[2] = S_008F18_WIDTH(width - 1) |
   2999 				 S_008F18_HEIGHT(height - 1);
   3000 		fmask_state[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) |
   3001 				 S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
   3002 				 S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
   3003 				 S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
   3004 				 S_008F1C_TILING_INDEX(tex->fmask.tile_mode_index) |
   3005 				 S_008F1C_TYPE(si_tex_dim(res->target, target, 0));
   3006 		fmask_state[4] = S_008F20_DEPTH(depth - 1) |
   3007 				 S_008F20_PITCH(tex->fmask.pitch_in_pixels - 1);
   3008 		fmask_state[5] = S_008F24_BASE_ARRAY(first_layer) |
   3009 				 S_008F24_LAST_ARRAY(last_layer);
   3010 		fmask_state[6] = 0;
   3011 		fmask_state[7] = 0;
   3012 	}
   3013 }
   3014 
   3015 /**
   3016  * Create a sampler view.
   3017  *
   3018  * @param ctx		context
   3019  * @param texture	texture
   3020  * @param state		sampler view template
   3021  * @param width0	width0 override (for compressed textures as int)
   3022  * @param height0	height0 override (for compressed textures as int)
   3023  * @param force_level   set the base address to the level (for compressed textures)
   3024  */
   3025 struct pipe_sampler_view *
   3026 si_create_sampler_view_custom(struct pipe_context *ctx,
   3027 			      struct pipe_resource *texture,
   3028 			      const struct pipe_sampler_view *state,
   3029 			      unsigned width0, unsigned height0,
   3030 			      unsigned force_level)
   3031 {
   3032 	struct si_context *sctx = (struct si_context*)ctx;
   3033 	struct si_sampler_view *view = CALLOC_STRUCT(si_sampler_view);
   3034 	struct r600_texture *tmp = (struct r600_texture*)texture;
   3035 	unsigned base_level, first_level, last_level;
   3036 	unsigned char state_swizzle[4];
   3037 	unsigned height, depth, width;
   3038 	unsigned last_layer = state->u.tex.last_layer;
   3039 	enum pipe_format pipe_format;
   3040 	const struct radeon_surf_level *surflevel;
   3041 
   3042 	if (!view)
   3043 		return NULL;
   3044 
   3045 	/* initialize base object */
   3046 	view->base = *state;
   3047 	view->base.texture = NULL;
   3048 	view->base.reference.count = 1;
   3049 	view->base.context = ctx;
   3050 
   3051 	assert(texture);
   3052 	pipe_resource_reference(&view->base.texture, texture);
   3053 
   3054 	if (state->format == PIPE_FORMAT_X24S8_UINT ||
   3055 	    state->format == PIPE_FORMAT_S8X24_UINT ||
   3056 	    state->format == PIPE_FORMAT_X32_S8X24_UINT ||
   3057 	    state->format == PIPE_FORMAT_S8_UINT)
   3058 		view->is_stencil_sampler = true;
   3059 
   3060 	/* Buffer resource. */
   3061 	if (texture->target == PIPE_BUFFER) {
   3062 		si_make_buffer_descriptor(sctx->screen,
   3063 					  (struct r600_resource *)texture,
   3064 					  state->format,
   3065 					  state->u.buf.offset,
   3066 					  state->u.buf.size,
   3067 					  view->state);
   3068 		return &view->base;
   3069 	}
   3070 
   3071 	state_swizzle[0] = state->swizzle_r;
   3072 	state_swizzle[1] = state->swizzle_g;
   3073 	state_swizzle[2] = state->swizzle_b;
   3074 	state_swizzle[3] = state->swizzle_a;
   3075 
   3076 	base_level = 0;
   3077 	first_level = state->u.tex.first_level;
   3078 	last_level = state->u.tex.last_level;
   3079 	width = width0;
   3080 	height = height0;
   3081 	depth = texture->depth0;
   3082 
   3083 	if (force_level) {
   3084 		assert(force_level == first_level &&
   3085 		       force_level == last_level);
   3086 		base_level = force_level;
   3087 		first_level = 0;
   3088 		last_level = 0;
   3089 		width = u_minify(width, force_level);
   3090 		height = u_minify(height, force_level);
   3091 		depth = u_minify(depth, force_level);
   3092 	}
   3093 
   3094 	/* This is not needed if state trackers set last_layer correctly. */
   3095 	if (state->target == PIPE_TEXTURE_1D ||
   3096 	    state->target == PIPE_TEXTURE_2D ||
   3097 	    state->target == PIPE_TEXTURE_RECT ||
   3098 	    state->target == PIPE_TEXTURE_CUBE)
   3099 		last_layer = state->u.tex.first_layer;
   3100 
   3101 	/* Texturing with separate depth and stencil. */
   3102 	pipe_format = state->format;
   3103 
   3104 	/* Depth/stencil texturing sometimes needs separate texture. */
   3105 	if (tmp->is_depth && !r600_can_sample_zs(tmp, view->is_stencil_sampler)) {
   3106 		if (!tmp->flushed_depth_texture &&
   3107 		    !r600_init_flushed_depth_texture(ctx, texture, NULL)) {
   3108 			pipe_resource_reference(&view->base.texture, NULL);
   3109 			FREE(view);
   3110 			return NULL;
   3111 		}
   3112 
   3113 		assert(tmp->flushed_depth_texture);
   3114 
   3115 		/* Override format for the case where the flushed texture
   3116 		 * contains only Z or only S.
   3117 		 */
   3118 		if (tmp->flushed_depth_texture->resource.b.b.format != tmp->resource.b.b.format)
   3119 			pipe_format = tmp->flushed_depth_texture->resource.b.b.format;
   3120 
   3121 		tmp = tmp->flushed_depth_texture;
   3122 	}
   3123 
   3124 	surflevel = tmp->surface.level;
   3125 
   3126 	if (tmp->db_compatible) {
   3127 		if (!view->is_stencil_sampler)
   3128 			pipe_format = tmp->db_render_format;
   3129 
   3130 		switch (pipe_format) {
   3131 		case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
   3132 			pipe_format = PIPE_FORMAT_Z32_FLOAT;
   3133 			break;
   3134 		case PIPE_FORMAT_X8Z24_UNORM:
   3135 		case PIPE_FORMAT_S8_UINT_Z24_UNORM:
   3136 			/* Z24 is always stored like this for DB
   3137 			 * compatibility.
   3138 			 */
   3139 			pipe_format = PIPE_FORMAT_Z24X8_UNORM;
   3140 			break;
   3141 		case PIPE_FORMAT_X24S8_UINT:
   3142 		case PIPE_FORMAT_S8X24_UINT:
   3143 		case PIPE_FORMAT_X32_S8X24_UINT:
   3144 			pipe_format = PIPE_FORMAT_S8_UINT;
   3145 			surflevel = tmp->surface.stencil_level;
   3146 			break;
   3147 		default:;
   3148 		}
   3149 	}
   3150 
   3151 	vi_dcc_disable_if_incompatible_format(&sctx->b, texture,
   3152 					      state->u.tex.first_level,
   3153 					      state->format);
   3154 
   3155 	si_make_texture_descriptor(sctx->screen, tmp, true,
   3156 				   state->target, pipe_format, state_swizzle,
   3157 				   first_level, last_level,
   3158 				   state->u.tex.first_layer, last_layer,
   3159 				   width, height, depth,
   3160 				   view->state, view->fmask_state);
   3161 
   3162 	view->base_level_info = &surflevel[base_level];
   3163 	view->base_level = base_level;
   3164 	view->block_width = util_format_get_blockwidth(pipe_format);
   3165 	return &view->base;
   3166 }
   3167 
   3168 static struct pipe_sampler_view *
   3169 si_create_sampler_view(struct pipe_context *ctx,
   3170 		       struct pipe_resource *texture,
   3171 		       const struct pipe_sampler_view *state)
   3172 {
   3173 	return si_create_sampler_view_custom(ctx, texture, state,
   3174 					     texture ? texture->width0 : 0,
   3175 					     texture ? texture->height0 : 0, 0);
   3176 }
   3177 
   3178 static void si_sampler_view_destroy(struct pipe_context *ctx,
   3179 				    struct pipe_sampler_view *state)
   3180 {
   3181 	struct si_sampler_view *view = (struct si_sampler_view *)state;
   3182 
   3183 	pipe_resource_reference(&state->texture, NULL);
   3184 	FREE(view);
   3185 }
   3186 
   3187 static bool wrap_mode_uses_border_color(unsigned wrap, bool linear_filter)
   3188 {
   3189 	return wrap == PIPE_TEX_WRAP_CLAMP_TO_BORDER ||
   3190 	       wrap == PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER ||
   3191 	       (linear_filter &&
   3192 	        (wrap == PIPE_TEX_WRAP_CLAMP ||
   3193 		 wrap == PIPE_TEX_WRAP_MIRROR_CLAMP));
   3194 }
   3195 
   3196 static bool sampler_state_needs_border_color(const struct pipe_sampler_state *state)
   3197 {
   3198 	bool linear_filter = state->min_img_filter != PIPE_TEX_FILTER_NEAREST ||
   3199 			     state->mag_img_filter != PIPE_TEX_FILTER_NEAREST;
   3200 
   3201 	return (state->border_color.ui[0] || state->border_color.ui[1] ||
   3202 		state->border_color.ui[2] || state->border_color.ui[3]) &&
   3203 	       (wrap_mode_uses_border_color(state->wrap_s, linear_filter) ||
   3204 		wrap_mode_uses_border_color(state->wrap_t, linear_filter) ||
   3205 		wrap_mode_uses_border_color(state->wrap_r, linear_filter));
   3206 }
   3207 
   3208 static void *si_create_sampler_state(struct pipe_context *ctx,
   3209 				     const struct pipe_sampler_state *state)
   3210 {
   3211 	struct si_context *sctx = (struct si_context *)ctx;
   3212 	struct r600_common_screen *rscreen = sctx->b.screen;
   3213 	struct si_sampler_state *rstate = CALLOC_STRUCT(si_sampler_state);
   3214 	unsigned border_color_type, border_color_index = 0;
   3215 	unsigned max_aniso = rscreen->force_aniso >= 0 ? rscreen->force_aniso
   3216 						       : state->max_anisotropy;
   3217 	unsigned max_aniso_ratio = r600_tex_aniso_filter(max_aniso);
   3218 
   3219 	if (!rstate) {
   3220 		return NULL;
   3221 	}
   3222 
   3223 	if (!sampler_state_needs_border_color(state))
   3224 		border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
   3225 	else if (state->border_color.f[0] == 0 &&
   3226 		 state->border_color.f[1] == 0 &&
   3227 		 state->border_color.f[2] == 0 &&
   3228 		 state->border_color.f[3] == 0)
   3229 		border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
   3230 	else if (state->border_color.f[0] == 0 &&
   3231 		 state->border_color.f[1] == 0 &&
   3232 		 state->border_color.f[2] == 0 &&
   3233 		 state->border_color.f[3] == 1)
   3234 		border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK;
   3235 	else if (state->border_color.f[0] == 1 &&
   3236 		 state->border_color.f[1] == 1 &&
   3237 		 state->border_color.f[2] == 1 &&
   3238 		 state->border_color.f[3] == 1)
   3239 		border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;
   3240 	else {
   3241 		int i;
   3242 
   3243 		border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER;
   3244 
   3245 		/* Check if the border has been uploaded already. */
   3246 		for (i = 0; i < sctx->border_color_count; i++)
   3247 			if (memcmp(&sctx->border_color_table[i], &state->border_color,
   3248 				   sizeof(state->border_color)) == 0)
   3249 				break;
   3250 
   3251 		if (i >= SI_MAX_BORDER_COLORS) {
   3252 			/* Getting 4096 unique border colors is very unlikely. */
   3253 			fprintf(stderr, "radeonsi: The border color table is full. "
   3254 				"Any new border colors will be just black. "
   3255 				"Please file a bug.\n");
   3256 			border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
   3257 		} else {
   3258 			if (i == sctx->border_color_count) {
   3259 				/* Upload a new border color. */
   3260 				memcpy(&sctx->border_color_table[i], &state->border_color,
   3261 				       sizeof(state->border_color));
   3262 				util_memcpy_cpu_to_le32(&sctx->border_color_map[i],
   3263 							&state->border_color,
   3264 							sizeof(state->border_color));
   3265 				sctx->border_color_count++;
   3266 			}
   3267 
   3268 			border_color_index = i;
   3269 		}
   3270 	}
   3271 
   3272 #ifdef DEBUG
   3273 	rstate->magic = SI_SAMPLER_STATE_MAGIC;
   3274 #endif
   3275 	rstate->val[0] = (S_008F30_CLAMP_X(si_tex_wrap(state->wrap_s)) |
   3276 			  S_008F30_CLAMP_Y(si_tex_wrap(state->wrap_t)) |
   3277 			  S_008F30_CLAMP_Z(si_tex_wrap(state->wrap_r)) |
   3278 			  S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) |
   3279 			  S_008F30_DEPTH_COMPARE_FUNC(si_tex_compare(state->compare_func)) |
   3280 			  S_008F30_FORCE_UNNORMALIZED(!state->normalized_coords) |
   3281 			  S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) |
   3282 			  S_008F30_ANISO_BIAS(max_aniso_ratio) |
   3283 			  S_008F30_DISABLE_CUBE_WRAP(!state->seamless_cube_map) |
   3284 			  S_008F30_COMPAT_MODE(sctx->b.chip_class >= VI));
   3285 	rstate->val[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 8)) |
   3286 			  S_008F34_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 8)) |
   3287 			  S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));
   3288 	rstate->val[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) |
   3289 			  S_008F38_XY_MAG_FILTER(eg_tex_filter(state->mag_img_filter, max_aniso)) |
   3290 			  S_008F38_XY_MIN_FILTER(eg_tex_filter(state->min_img_filter, max_aniso)) |
   3291 			  S_008F38_MIP_FILTER(si_tex_mipfilter(state->min_mip_filter)) |
   3292 			  S_008F38_MIP_POINT_PRECLAMP(1) |
   3293 			  S_008F38_DISABLE_LSB_CEIL(1) |
   3294 			  S_008F38_FILTER_PREC_FIX(1) |
   3295 			  S_008F38_ANISO_OVERRIDE(sctx->b.chip_class >= VI));
   3296 	rstate->val[3] = S_008F3C_BORDER_COLOR_PTR(border_color_index) |
   3297 			 S_008F3C_BORDER_COLOR_TYPE(border_color_type);
   3298 	return rstate;
   3299 }
   3300 
   3301 static void si_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask)
   3302 {
   3303 	struct si_context *sctx = (struct si_context *)ctx;
   3304 
   3305 	if (sctx->sample_mask.sample_mask == (uint16_t)sample_mask)
   3306 		return;
   3307 
   3308 	sctx->sample_mask.sample_mask = sample_mask;
   3309 	si_mark_atom_dirty(sctx, &sctx->sample_mask.atom);
   3310 }
   3311 
   3312 static void si_emit_sample_mask(struct si_context *sctx, struct r600_atom *atom)
   3313 {
   3314 	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
   3315 	unsigned mask = sctx->sample_mask.sample_mask;
   3316 
   3317 	/* Needed for line and polygon smoothing as well as for the Polaris
   3318 	 * small primitive filter. We expect the state tracker to take care of
   3319 	 * this for us.
   3320 	 */
   3321 	assert(mask == 0xffff || sctx->framebuffer.nr_samples > 1 ||
   3322 	       (mask & 1 && sctx->blitter->running));
   3323 
   3324 	radeon_set_context_reg_seq(cs, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2);
   3325 	radeon_emit(cs, mask | (mask << 16));
   3326 	radeon_emit(cs, mask | (mask << 16));
   3327 }
   3328 
   3329 static void si_delete_sampler_state(struct pipe_context *ctx, void *state)
   3330 {
   3331 #ifdef DEBUG
   3332 	struct si_sampler_state *s = state;
   3333 
   3334 	assert(s->magic == SI_SAMPLER_STATE_MAGIC);
   3335 	s->magic = 0;
   3336 #endif
   3337 	free(state);
   3338 }
   3339 
   3340 /*
   3341  * Vertex elements & buffers
   3342  */
   3343 
   3344 static void *si_create_vertex_elements(struct pipe_context *ctx,
   3345 				       unsigned count,
   3346 				       const struct pipe_vertex_element *elements)
   3347 {
   3348 	struct si_vertex_element *v = CALLOC_STRUCT(si_vertex_element);
   3349 	bool used[SI_NUM_VERTEX_BUFFERS] = {};
   3350 	int i;
   3351 
   3352 	assert(count <= SI_MAX_ATTRIBS);
   3353 	if (!v)
   3354 		return NULL;
   3355 
   3356 	v->count = count;
   3357 	for (i = 0; i < count; ++i) {
   3358 		const struct util_format_description *desc;
   3359 		const struct util_format_channel_description *channel;
   3360 		unsigned data_format, num_format;
   3361 		int first_non_void;
   3362 		unsigned vbo_index = elements[i].vertex_buffer_index;
   3363 
   3364 		if (vbo_index >= SI_NUM_VERTEX_BUFFERS) {
   3365 			FREE(v);
   3366 			return NULL;
   3367 		}
   3368 
   3369 		if (!used[vbo_index]) {
   3370 			v->first_vb_use_mask |= 1 << i;
   3371 			used[vbo_index] = true;
   3372 		}
   3373 
   3374 		desc = util_format_description(elements[i].src_format);
   3375 		first_non_void = util_format_get_first_non_void_channel(elements[i].src_format);
   3376 		data_format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void);
   3377 		num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void);
   3378 		channel = first_non_void >= 0 ? &desc->channel[first_non_void] : NULL;
   3379 
   3380 		v->rsrc_word3[i] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) |
   3381 				   S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) |
   3382 				   S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) |
   3383 				   S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) |
   3384 				   S_008F0C_NUM_FORMAT(num_format) |
   3385 				   S_008F0C_DATA_FORMAT(data_format);
   3386 		v->format_size[i] = desc->block.bits / 8;
   3387 
   3388 		/* The hardware always treats the 2-bit alpha channel as
   3389 		 * unsigned, so a shader workaround is needed.
   3390 		 */
   3391 		if (data_format == V_008F0C_BUF_DATA_FORMAT_2_10_10_10) {
   3392 			if (num_format == V_008F0C_BUF_NUM_FORMAT_SNORM) {
   3393 				v->fix_fetch |= (uint64_t)SI_FIX_FETCH_A2_SNORM << (4 * i);
   3394 			} else if (num_format == V_008F0C_BUF_NUM_FORMAT_SSCALED) {
   3395 				v->fix_fetch |= (uint64_t)SI_FIX_FETCH_A2_SSCALED << (4 * i);
   3396 			} else if (num_format == V_008F0C_BUF_NUM_FORMAT_SINT) {
   3397 				/* This isn't actually used in OpenGL. */
   3398 				v->fix_fetch |= (uint64_t)SI_FIX_FETCH_A2_SINT << (4 * i);
   3399 			}
   3400 		} else if (channel && channel->type == UTIL_FORMAT_TYPE_FIXED) {
   3401 			if (desc->swizzle[3] == PIPE_SWIZZLE_1)
   3402 				v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBX_32_FIXED << (4 * i);
   3403 			else
   3404 				v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBA_32_FIXED << (4 * i);
   3405 		} else if (channel && channel->size == 32 && !channel->pure_integer) {
   3406 			if (channel->type == UTIL_FORMAT_TYPE_SIGNED) {
   3407 				if (channel->normalized) {
   3408 					if (desc->swizzle[3] == PIPE_SWIZZLE_1)
   3409 						v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBX_32_SNORM << (4 * i);
   3410 					else
   3411 						v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBA_32_SNORM << (4 * i);
   3412 				} else {
   3413 					v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBA_32_SSCALED << (4 * i);
   3414 				}
   3415 			} else if (channel->type == UTIL_FORMAT_TYPE_UNSIGNED) {
   3416 				if (channel->normalized) {
   3417 					if (desc->swizzle[3] == PIPE_SWIZZLE_1)
   3418 						v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBX_32_UNORM << (4 * i);
   3419 					else
   3420 						v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBA_32_UNORM << (4 * i);
   3421 				} else {
   3422 					v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBA_32_USCALED << (4 * i);
   3423 				}
   3424 			}
   3425 		}
   3426 
   3427 		/* We work around the fact that 8_8_8 and 16_16_16 data formats
   3428 		 * do not exist by using the corresponding 4-component formats.
   3429 		 * This requires a fixup of the descriptor for bounds checks.
   3430 		 */
   3431 		if (desc->block.bits == 3 * 8 ||
   3432 		    desc->block.bits == 3 * 16) {
   3433 			v->fix_size3 |= (desc->block.bits / 24) << (2 * i);
   3434 		}
   3435 	}
   3436 	memcpy(v->elements, elements, sizeof(struct pipe_vertex_element) * count);
   3437 
   3438 	return v;
   3439 }
   3440 
   3441 static void si_bind_vertex_elements(struct pipe_context *ctx, void *state)
   3442 {
   3443 	struct si_context *sctx = (struct si_context *)ctx;
   3444 	struct si_vertex_element *v = (struct si_vertex_element*)state;
   3445 
   3446 	sctx->vertex_elements = v;
   3447 	sctx->vertex_buffers_dirty = true;
   3448 	sctx->do_update_shaders = true;
   3449 }
   3450 
   3451 static void si_delete_vertex_element(struct pipe_context *ctx, void *state)
   3452 {
   3453 	struct si_context *sctx = (struct si_context *)ctx;
   3454 
   3455 	if (sctx->vertex_elements == state)
   3456 		sctx->vertex_elements = NULL;
   3457 	FREE(state);
   3458 }
   3459 
   3460 static void si_set_vertex_buffers(struct pipe_context *ctx,
   3461 				  unsigned start_slot, unsigned count,
   3462 				  const struct pipe_vertex_buffer *buffers)
   3463 {
   3464 	struct si_context *sctx = (struct si_context *)ctx;
   3465 	struct pipe_vertex_buffer *dst = sctx->vertex_buffer + start_slot;
   3466 	int i;
   3467 
   3468 	assert(start_slot + count <= ARRAY_SIZE(sctx->vertex_buffer));
   3469 
   3470 	if (buffers) {
   3471 		for (i = 0; i < count; i++) {
   3472 			const struct pipe_vertex_buffer *src = buffers + i;
   3473 			struct pipe_vertex_buffer *dsti = dst + i;
   3474 			struct pipe_resource *buf = src->buffer;
   3475 
   3476 			pipe_resource_reference(&dsti->buffer, buf);
   3477 			dsti->buffer_offset = src->buffer_offset;
   3478 			dsti->stride = src->stride;
   3479 			r600_context_add_resource_size(ctx, buf);
   3480 			if (buf)
   3481 				r600_resource(buf)->bind_history |= PIPE_BIND_VERTEX_BUFFER;
   3482 		}
   3483 	} else {
   3484 		for (i = 0; i < count; i++) {
   3485 			pipe_resource_reference(&dst[i].buffer, NULL);
   3486 		}
   3487 	}
   3488 	sctx->vertex_buffers_dirty = true;
   3489 }
   3490 
   3491 static void si_set_index_buffer(struct pipe_context *ctx,
   3492 				const struct pipe_index_buffer *ib)
   3493 {
   3494 	struct si_context *sctx = (struct si_context *)ctx;
   3495 
   3496 	if (ib) {
   3497 		struct pipe_resource *buf = ib->buffer;
   3498 
   3499 		pipe_resource_reference(&sctx->index_buffer.buffer, buf);
   3500 	        memcpy(&sctx->index_buffer, ib, sizeof(*ib));
   3501 		r600_context_add_resource_size(ctx, buf);
   3502 		if (buf)
   3503 			r600_resource(buf)->bind_history |= PIPE_BIND_INDEX_BUFFER;
   3504 	} else {
   3505 		pipe_resource_reference(&sctx->index_buffer.buffer, NULL);
   3506 	}
   3507 }
   3508 
   3509 /*
   3510  * Misc
   3511  */
   3512 
   3513 static void si_set_tess_state(struct pipe_context *ctx,
   3514 			      const float default_outer_level[4],
   3515 			      const float default_inner_level[2])
   3516 {
   3517 	struct si_context *sctx = (struct si_context *)ctx;
   3518 	struct pipe_constant_buffer cb;
   3519 	float array[8];
   3520 
   3521 	memcpy(array, default_outer_level, sizeof(float) * 4);
   3522 	memcpy(array+4, default_inner_level, sizeof(float) * 2);
   3523 
   3524 	cb.buffer = NULL;
   3525 	cb.user_buffer = NULL;
   3526 	cb.buffer_size = sizeof(array);
   3527 
   3528 	si_upload_const_buffer(sctx, (struct r600_resource**)&cb.buffer,
   3529 			       (void*)array, sizeof(array),
   3530 			       &cb.buffer_offset);
   3531 
   3532 	si_set_rw_buffer(sctx, SI_HS_CONST_DEFAULT_TESS_LEVELS, &cb);
   3533 	pipe_resource_reference(&cb.buffer, NULL);
   3534 }
   3535 
   3536 static void si_texture_barrier(struct pipe_context *ctx, unsigned flags)
   3537 {
   3538 	struct si_context *sctx = (struct si_context *)ctx;
   3539 
   3540 	sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
   3541 			 SI_CONTEXT_INV_GLOBAL_L2 |
   3542 			 SI_CONTEXT_FLUSH_AND_INV_CB;
   3543 }
   3544 
   3545 /* This only ensures coherency for shader image/buffer stores. */
   3546 static void si_memory_barrier(struct pipe_context *ctx, unsigned flags)
   3547 {
   3548 	struct si_context *sctx = (struct si_context *)ctx;
   3549 
   3550 	/* Subsequent commands must wait for all shader invocations to
   3551 	 * complete. */
   3552 	sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
   3553 	                 SI_CONTEXT_CS_PARTIAL_FLUSH;
   3554 
   3555 	if (flags & PIPE_BARRIER_CONSTANT_BUFFER)
   3556 		sctx->b.flags |= SI_CONTEXT_INV_SMEM_L1 |
   3557 				 SI_CONTEXT_INV_VMEM_L1;
   3558 
   3559 	if (flags & (PIPE_BARRIER_VERTEX_BUFFER |
   3560 		     PIPE_BARRIER_SHADER_BUFFER |
   3561 		     PIPE_BARRIER_TEXTURE |
   3562 		     PIPE_BARRIER_IMAGE |
   3563 		     PIPE_BARRIER_STREAMOUT_BUFFER |
   3564 		     PIPE_BARRIER_GLOBAL_BUFFER)) {
   3565 		/* As far as I can tell, L1 contents are written back to L2
   3566 		 * automatically at end of shader, but the contents of other
   3567 		 * L1 caches might still be stale. */
   3568 		sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1;
   3569 	}
   3570 
   3571 	if (flags & PIPE_BARRIER_INDEX_BUFFER) {
   3572 		/* Indices are read through TC L2 since VI.
   3573 		 * L1 isn't used.
   3574 		 */
   3575 		if (sctx->screen->b.chip_class <= CIK)
   3576 			sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
   3577 	}
   3578 
   3579 	if (flags & PIPE_BARRIER_FRAMEBUFFER)
   3580 		sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
   3581 
   3582 	if (flags & (PIPE_BARRIER_FRAMEBUFFER |
   3583 		     PIPE_BARRIER_INDIRECT_BUFFER))
   3584 		sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
   3585 }
   3586 
   3587 static void *si_create_blend_custom(struct si_context *sctx, unsigned mode)
   3588 {
   3589 	struct pipe_blend_state blend;
   3590 
   3591 	memset(&blend, 0, sizeof(blend));
   3592 	blend.independent_blend_enable = true;
   3593 	blend.rt[0].colormask = 0xf;
   3594 	return si_create_blend_state_mode(&sctx->b.b, &blend, mode);
   3595 }
   3596 
   3597 static void si_need_gfx_cs_space(struct pipe_context *ctx, unsigned num_dw,
   3598 				 bool include_draw_vbo)
   3599 {
   3600 	si_need_cs_space((struct si_context*)ctx);
   3601 }
   3602 
   3603 static void si_init_config(struct si_context *sctx);
   3604 
   3605 void si_init_state_functions(struct si_context *sctx)
   3606 {
   3607 	si_init_external_atom(sctx, &sctx->b.render_cond_atom, &sctx->atoms.s.render_cond);
   3608 	si_init_external_atom(sctx, &sctx->b.streamout.begin_atom, &sctx->atoms.s.streamout_begin);
   3609 	si_init_external_atom(sctx, &sctx->b.streamout.enable_atom, &sctx->atoms.s.streamout_enable);
   3610 	si_init_external_atom(sctx, &sctx->b.scissors.atom, &sctx->atoms.s.scissors);
   3611 	si_init_external_atom(sctx, &sctx->b.viewports.atom, &sctx->atoms.s.viewports);
   3612 
   3613 	si_init_atom(sctx, &sctx->framebuffer.atom, &sctx->atoms.s.framebuffer, si_emit_framebuffer_state);
   3614 	si_init_atom(sctx, &sctx->msaa_sample_locs.atom, &sctx->atoms.s.msaa_sample_locs, si_emit_msaa_sample_locs);
   3615 	si_init_atom(sctx, &sctx->db_render_state, &sctx->atoms.s.db_render_state, si_emit_db_render_state);
   3616 	si_init_atom(sctx, &sctx->msaa_config, &sctx->atoms.s.msaa_config, si_emit_msaa_config);
   3617 	si_init_atom(sctx, &sctx->sample_mask.atom, &sctx->atoms.s.sample_mask, si_emit_sample_mask);
   3618 	si_init_atom(sctx, &sctx->cb_render_state, &sctx->atoms.s.cb_render_state, si_emit_cb_render_state);
   3619 	si_init_atom(sctx, &sctx->blend_color.atom, &sctx->atoms.s.blend_color, si_emit_blend_color);
   3620 	si_init_atom(sctx, &sctx->clip_regs, &sctx->atoms.s.clip_regs, si_emit_clip_regs);
   3621 	si_init_atom(sctx, &sctx->clip_state.atom, &sctx->atoms.s.clip_state, si_emit_clip_state);
   3622 	si_init_atom(sctx, &sctx->stencil_ref.atom, &sctx->atoms.s.stencil_ref, si_emit_stencil_ref);
   3623 
   3624 	sctx->b.b.create_blend_state = si_create_blend_state;
   3625 	sctx->b.b.bind_blend_state = si_bind_blend_state;
   3626 	sctx->b.b.delete_blend_state = si_delete_blend_state;
   3627 	sctx->b.b.set_blend_color = si_set_blend_color;
   3628 
   3629 	sctx->b.b.create_rasterizer_state = si_create_rs_state;
   3630 	sctx->b.b.bind_rasterizer_state = si_bind_rs_state;
   3631 	sctx->b.b.delete_rasterizer_state = si_delete_rs_state;
   3632 
   3633 	sctx->b.b.create_depth_stencil_alpha_state = si_create_dsa_state;
   3634 	sctx->b.b.bind_depth_stencil_alpha_state = si_bind_dsa_state;
   3635 	sctx->b.b.delete_depth_stencil_alpha_state = si_delete_dsa_state;
   3636 
   3637 	sctx->custom_dsa_flush = si_create_db_flush_dsa(sctx);
   3638 	sctx->custom_blend_resolve = si_create_blend_custom(sctx, V_028808_CB_RESOLVE);
   3639 	sctx->custom_blend_decompress = si_create_blend_custom(sctx, V_028808_CB_FMASK_DECOMPRESS);
   3640 	sctx->custom_blend_fastclear = si_create_blend_custom(sctx, V_028808_CB_ELIMINATE_FAST_CLEAR);
   3641 	sctx->custom_blend_dcc_decompress = si_create_blend_custom(sctx, V_028808_CB_DCC_DECOMPRESS);
   3642 
   3643 	sctx->b.b.set_clip_state = si_set_clip_state;
   3644 	sctx->b.b.set_stencil_ref = si_set_stencil_ref;
   3645 
   3646 	sctx->b.b.set_framebuffer_state = si_set_framebuffer_state;
   3647 	sctx->b.b.get_sample_position = cayman_get_sample_position;
   3648 
   3649 	sctx->b.b.create_sampler_state = si_create_sampler_state;
   3650 	sctx->b.b.delete_sampler_state = si_delete_sampler_state;
   3651 
   3652 	sctx->b.b.create_sampler_view = si_create_sampler_view;
   3653 	sctx->b.b.sampler_view_destroy = si_sampler_view_destroy;
   3654 
   3655 	sctx->b.b.set_sample_mask = si_set_sample_mask;
   3656 
   3657 	sctx->b.b.create_vertex_elements_state = si_create_vertex_elements;
   3658 	sctx->b.b.bind_vertex_elements_state = si_bind_vertex_elements;
   3659 	sctx->b.b.delete_vertex_elements_state = si_delete_vertex_element;
   3660 	sctx->b.b.set_vertex_buffers = si_set_vertex_buffers;
   3661 	sctx->b.b.set_index_buffer = si_set_index_buffer;
   3662 
   3663 	sctx->b.b.texture_barrier = si_texture_barrier;
   3664 	sctx->b.b.memory_barrier = si_memory_barrier;
   3665 	sctx->b.b.set_min_samples = si_set_min_samples;
   3666 	sctx->b.b.set_tess_state = si_set_tess_state;
   3667 
   3668 	sctx->b.b.set_active_query_state = si_set_active_query_state;
   3669 	sctx->b.set_occlusion_query_state = si_set_occlusion_query_state;
   3670 	sctx->b.save_qbo_state = si_save_qbo_state;
   3671 	sctx->b.need_gfx_cs_space = si_need_gfx_cs_space;
   3672 
   3673 	sctx->b.b.draw_vbo = si_draw_vbo;
   3674 
   3675 	si_init_config(sctx);
   3676 }
   3677 
   3678 static uint32_t si_get_bo_metadata_word1(struct r600_common_screen *rscreen)
   3679 {
   3680 	return (ATI_VENDOR_ID << 16) | rscreen->info.pci_id;
   3681 }
   3682 
   3683 static void si_query_opaque_metadata(struct r600_common_screen *rscreen,
   3684 				     struct r600_texture *rtex,
   3685 			             struct radeon_bo_metadata *md)
   3686 {
   3687 	struct si_screen *sscreen = (struct si_screen*)rscreen;
   3688 	struct pipe_resource *res = &rtex->resource.b.b;
   3689 	static const unsigned char swizzle[] = {
   3690 		PIPE_SWIZZLE_X,
   3691 		PIPE_SWIZZLE_Y,
   3692 		PIPE_SWIZZLE_Z,
   3693 		PIPE_SWIZZLE_W
   3694 	};
   3695 	uint32_t desc[8], i;
   3696 	bool is_array = util_resource_is_array_texture(res);
   3697 
   3698 	/* DRM 2.x.x doesn't support this. */
   3699 	if (rscreen->info.drm_major != 3)
   3700 		return;
   3701 
   3702 	assert(rtex->dcc_separate_buffer == NULL);
   3703 	assert(rtex->fmask.size == 0);
   3704 
   3705 	/* Metadata image format format version 1:
   3706 	 * [0] = 1 (metadata format identifier)
   3707 	 * [1] = (VENDOR_ID << 16) | PCI_ID
   3708 	 * [2:9] = image descriptor for the whole resource
   3709 	 *         [2] is always 0, because the base address is cleared
   3710 	 *         [9] is the DCC offset bits [39:8] from the beginning of
   3711 	 *             the buffer
   3712 	 * [10:10+LAST_LEVEL] = mipmap level offset bits [39:8] for each level
   3713 	 */
   3714 
   3715 	md->metadata[0] = 1; /* metadata image format version 1 */
   3716 
   3717 	/* TILE_MODE_INDEX is ambiguous without a PCI ID. */
   3718 	md->metadata[1] = si_get_bo_metadata_word1(rscreen);
   3719 
   3720 	si_make_texture_descriptor(sscreen, rtex, true,
   3721 				   res->target, res->format,
   3722 				   swizzle, 0, res->last_level, 0,
   3723 				   is_array ? res->array_size - 1 : 0,
   3724 				   res->width0, res->height0, res->depth0,
   3725 				   desc, NULL);
   3726 
   3727 	si_set_mutable_tex_desc_fields(rtex, &rtex->surface.level[0], 0, 0,
   3728 				       rtex->surface.blk_w, false, desc);
   3729 
   3730 	/* Clear the base address and set the relative DCC offset. */
   3731 	desc[0] = 0;
   3732 	desc[1] &= C_008F14_BASE_ADDRESS_HI;
   3733 	desc[7] = rtex->dcc_offset >> 8;
   3734 
   3735 	/* Dwords [2:9] contain the image descriptor. */
   3736 	memcpy(&md->metadata[2], desc, sizeof(desc));
   3737 
   3738 	/* Dwords [10:..] contain the mipmap level offsets. */
   3739 	for (i = 0; i <= res->last_level; i++)
   3740 		md->metadata[10+i] = rtex->surface.level[i].offset >> 8;
   3741 
   3742 	md->size_metadata = (11 + res->last_level) * 4;
   3743 }
   3744 
   3745 static void si_apply_opaque_metadata(struct r600_common_screen *rscreen,
   3746 				     struct r600_texture *rtex,
   3747 			             struct radeon_bo_metadata *md)
   3748 {
   3749 	uint32_t *desc = &md->metadata[2];
   3750 
   3751 	if (rscreen->chip_class < VI)
   3752 		return;
   3753 
   3754 	/* Return if DCC is enabled. The texture should be set up with it
   3755 	 * already.
   3756 	 */
   3757 	if (md->size_metadata >= 11 * 4 &&
   3758 	    md->metadata[0] != 0 &&
   3759 	    md->metadata[1] == si_get_bo_metadata_word1(rscreen) &&
   3760 	    G_008F28_COMPRESSION_EN(desc[6])) {
   3761 		assert(rtex->dcc_offset == ((uint64_t)desc[7] << 8));
   3762 		return;
   3763 	}
   3764 
   3765 	/* Disable DCC. These are always set by texture_from_handle and must
   3766 	 * be cleared here.
   3767 	 */
   3768 	rtex->dcc_offset = 0;
   3769 }
   3770 
   3771 void si_init_screen_state_functions(struct si_screen *sscreen)
   3772 {
   3773 	sscreen->b.b.is_format_supported = si_is_format_supported;
   3774 	sscreen->b.query_opaque_metadata = si_query_opaque_metadata;
   3775 	sscreen->b.apply_opaque_metadata = si_apply_opaque_metadata;
   3776 }
   3777 
   3778 static void
   3779 si_write_harvested_raster_configs(struct si_context *sctx,
   3780 				  struct si_pm4_state *pm4,
   3781 				  unsigned raster_config,
   3782 				  unsigned raster_config_1)
   3783 {
   3784 	unsigned sh_per_se = MAX2(sctx->screen->b.info.max_sh_per_se, 1);
   3785 	unsigned num_se = MAX2(sctx->screen->b.info.max_se, 1);
   3786 	unsigned rb_mask = sctx->screen->b.info.enabled_rb_mask;
   3787 	unsigned num_rb = MIN2(sctx->screen->b.info.num_render_backends, 16);
   3788 	unsigned rb_per_pkr = MIN2(num_rb / num_se / sh_per_se, 2);
   3789 	unsigned rb_per_se = num_rb / num_se;
   3790 	unsigned se_mask[4];
   3791 	unsigned se;
   3792 
   3793 	se_mask[0] = ((1 << rb_per_se) - 1);
   3794 	se_mask[1] = (se_mask[0] << rb_per_se);
   3795 	se_mask[2] = (se_mask[1] << rb_per_se);
   3796 	se_mask[3] = (se_mask[2] << rb_per_se);
   3797 
   3798 	se_mask[0] &= rb_mask;
   3799 	se_mask[1] &= rb_mask;
   3800 	se_mask[2] &= rb_mask;
   3801 	se_mask[3] &= rb_mask;
   3802 
   3803 	assert(num_se == 1 || num_se == 2 || num_se == 4);
   3804 	assert(sh_per_se == 1 || sh_per_se == 2);
   3805 	assert(rb_per_pkr == 1 || rb_per_pkr == 2);
   3806 
   3807 	/* XXX: I can't figure out what the *_XSEL and *_YSEL
   3808 	 * fields are for, so I'm leaving them as their default
   3809 	 * values. */
   3810 
   3811 	for (se = 0; se < num_se; se++) {
   3812 		unsigned raster_config_se = raster_config;
   3813 		unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
   3814 		unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
   3815 		int idx = (se / 2) * 2;
   3816 
   3817 		if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
   3818 			raster_config_se &= C_028350_SE_MAP;
   3819 
   3820 			if (!se_mask[idx]) {
   3821 				raster_config_se |=
   3822 					S_028350_SE_MAP(V_028350_RASTER_CONFIG_SE_MAP_3);
   3823 			} else {
   3824 				raster_config_se |=
   3825 					S_028350_SE_MAP(V_028350_RASTER_CONFIG_SE_MAP_0);
   3826 			}
   3827 		}
   3828 
   3829 		pkr0_mask &= rb_mask;
   3830 		pkr1_mask &= rb_mask;
   3831 		if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
   3832 			raster_config_se &= C_028350_PKR_MAP;
   3833 
   3834 			if (!pkr0_mask) {
   3835 				raster_config_se |=
   3836 					S_028350_PKR_MAP(V_028350_RASTER_CONFIG_PKR_MAP_3);
   3837 			} else {
   3838 				raster_config_se |=
   3839 					S_028350_PKR_MAP(V_028350_RASTER_CONFIG_PKR_MAP_0);
   3840 			}
   3841 		}
   3842 
   3843 		if (rb_per_se >= 2) {
   3844 			unsigned rb0_mask = 1 << (se * rb_per_se);
   3845 			unsigned rb1_mask = rb0_mask << 1;
   3846 
   3847 			rb0_mask &= rb_mask;
   3848 			rb1_mask &= rb_mask;
   3849 			if (!rb0_mask || !rb1_mask) {
   3850 				raster_config_se &= C_028350_RB_MAP_PKR0;
   3851 
   3852 				if (!rb0_mask) {
   3853 					raster_config_se |=
   3854 						S_028350_RB_MAP_PKR0(V_028350_RASTER_CONFIG_RB_MAP_3);
   3855 				} else {
   3856 					raster_config_se |=
   3857 						S_028350_RB_MAP_PKR0(V_028350_RASTER_CONFIG_RB_MAP_0);
   3858 				}
   3859 			}
   3860 
   3861 			if (rb_per_se > 2) {
   3862 				rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
   3863 				rb1_mask = rb0_mask << 1;
   3864 				rb0_mask &= rb_mask;
   3865 				rb1_mask &= rb_mask;
   3866 				if (!rb0_mask || !rb1_mask) {
   3867 					raster_config_se &= C_028350_RB_MAP_PKR1;
   3868 
   3869 					if (!rb0_mask) {
   3870 						raster_config_se |=
   3871 							S_028350_RB_MAP_PKR1(V_028350_RASTER_CONFIG_RB_MAP_3);
   3872 					} else {
   3873 						raster_config_se |=
   3874 							S_028350_RB_MAP_PKR1(V_028350_RASTER_CONFIG_RB_MAP_0);
   3875 					}
   3876 				}
   3877 			}
   3878 		}
   3879 
   3880 		/* GRBM_GFX_INDEX has a different offset on SI and CI+ */
   3881 		if (sctx->b.chip_class < CIK)
   3882 			si_pm4_set_reg(pm4, GRBM_GFX_INDEX,
   3883 				       SE_INDEX(se) | SH_BROADCAST_WRITES |
   3884 				       INSTANCE_BROADCAST_WRITES);
   3885 		else
   3886 			si_pm4_set_reg(pm4, R_030800_GRBM_GFX_INDEX,
   3887 				       S_030800_SE_INDEX(se) | S_030800_SH_BROADCAST_WRITES(1) |
   3888 				       S_030800_INSTANCE_BROADCAST_WRITES(1));
   3889 		si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, raster_config_se);
   3890 	}
   3891 
   3892 	/* GRBM_GFX_INDEX has a different offset on SI and CI+ */
   3893 	if (sctx->b.chip_class < CIK)
   3894 		si_pm4_set_reg(pm4, GRBM_GFX_INDEX,
   3895 			       SE_BROADCAST_WRITES | SH_BROADCAST_WRITES |
   3896 			       INSTANCE_BROADCAST_WRITES);
   3897 	else {
   3898 		si_pm4_set_reg(pm4, R_030800_GRBM_GFX_INDEX,
   3899 			       S_030800_SE_BROADCAST_WRITES(1) | S_030800_SH_BROADCAST_WRITES(1) |
   3900 			       S_030800_INSTANCE_BROADCAST_WRITES(1));
   3901 
   3902 		if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
   3903 		                     (!se_mask[2] && !se_mask[3]))) {
   3904 			raster_config_1 &= C_028354_SE_PAIR_MAP;
   3905 
   3906 			if (!se_mask[0] && !se_mask[1]) {
   3907 				raster_config_1 |=
   3908 					S_028354_SE_PAIR_MAP(V_028354_RASTER_CONFIG_SE_PAIR_MAP_3);
   3909 			} else {
   3910 				raster_config_1 |=
   3911 					S_028354_SE_PAIR_MAP(V_028354_RASTER_CONFIG_SE_PAIR_MAP_0);
   3912 			}
   3913 		}
   3914 
   3915 		si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1);
   3916 	}
   3917 }
   3918 
   3919 static void si_init_config(struct si_context *sctx)
   3920 {
   3921 	struct si_screen *sscreen = sctx->screen;
   3922 	unsigned num_rb = MIN2(sctx->screen->b.info.num_render_backends, 16);
   3923 	unsigned rb_mask = sctx->screen->b.info.enabled_rb_mask;
   3924 	unsigned raster_config, raster_config_1;
   3925 	uint64_t border_color_va = sctx->border_color_buffer->gpu_address;
   3926 	struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
   3927 
   3928 	if (!pm4)
   3929 		return;
   3930 
   3931 	si_pm4_cmd_begin(pm4, PKT3_CONTEXT_CONTROL);
   3932 	si_pm4_cmd_add(pm4, CONTEXT_CONTROL_LOAD_ENABLE(1));
   3933 	si_pm4_cmd_add(pm4, CONTEXT_CONTROL_SHADOW_ENABLE(1));
   3934 	si_pm4_cmd_end(pm4, false);
   3935 
   3936 	si_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64));
   3937 	si_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0));
   3938 
   3939 	/* FIXME calculate these values somehow ??? */
   3940 	si_pm4_set_reg(pm4, R_028A54_VGT_GS_PER_ES, SI_GS_PER_ES);
   3941 	si_pm4_set_reg(pm4, R_028A58_VGT_ES_PER_GS, 0x40);
   3942 	si_pm4_set_reg(pm4, R_028A5C_VGT_GS_PER_VS, 0x2);
   3943 
   3944 	si_pm4_set_reg(pm4, R_028A8C_VGT_PRIMITIVEID_RESET, 0x0);
   3945 	si_pm4_set_reg(pm4, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0);
   3946 
   3947 	si_pm4_set_reg(pm4, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0);
   3948 	si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0x0);
   3949 	if (sctx->b.chip_class < CIK)
   3950 		si_pm4_set_reg(pm4, R_008A14_PA_CL_ENHANCE, S_008A14_NUM_CLIP_SEQ(3) |
   3951 			       S_008A14_CLIP_VTX_REORDER_ENA(1));
   3952 
   3953 	si_pm4_set_reg(pm4, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 0x76543210);
   3954 	si_pm4_set_reg(pm4, R_028BD8_PA_SC_CENTROID_PRIORITY_1, 0xfedcba98);
   3955 
   3956 	si_pm4_set_reg(pm4, R_02882C_PA_SU_PRIM_FILTER_CNTL, 0);
   3957 
   3958 	switch (sctx->screen->b.family) {
   3959 	case CHIP_TAHITI:
   3960 	case CHIP_PITCAIRN:
   3961 		raster_config = 0x2a00126a;
   3962 		raster_config_1 = 0x00000000;
   3963 		break;
   3964 	case CHIP_VERDE:
   3965 		raster_config = 0x0000124a;
   3966 		raster_config_1 = 0x00000000;
   3967 		break;
   3968 	case CHIP_OLAND:
   3969 		raster_config = 0x00000082;
   3970 		raster_config_1 = 0x00000000;
   3971 		break;
   3972 	case CHIP_HAINAN:
   3973 		raster_config = 0x00000000;
   3974 		raster_config_1 = 0x00000000;
   3975 		break;
   3976 	case CHIP_BONAIRE:
   3977 		raster_config = 0x16000012;
   3978 		raster_config_1 = 0x00000000;
   3979 		break;
   3980 	case CHIP_HAWAII:
   3981 		raster_config = 0x3a00161a;
   3982 		raster_config_1 = 0x0000002e;
   3983 		break;
   3984 	case CHIP_FIJI:
   3985 		if (sscreen->b.info.cik_macrotile_mode_array[0] == 0x000000e8) {
   3986 			/* old kernels with old tiling config */
   3987 			raster_config = 0x16000012;
   3988 			raster_config_1 = 0x0000002a;
   3989 		} else {
   3990 			raster_config = 0x3a00161a;
   3991 			raster_config_1 = 0x0000002e;
   3992 		}
   3993 		break;
   3994 	case CHIP_POLARIS10:
   3995 		raster_config = 0x16000012;
   3996 		raster_config_1 = 0x0000002a;
   3997 		break;
   3998 	case CHIP_POLARIS11:
   3999 	case CHIP_POLARIS12:
   4000 		raster_config = 0x16000012;
   4001 		raster_config_1 = 0x00000000;
   4002 		break;
   4003 	case CHIP_TONGA:
   4004 		raster_config = 0x16000012;
   4005 		raster_config_1 = 0x0000002a;
   4006 		break;
   4007 	case CHIP_ICELAND:
   4008 		if (num_rb == 1)
   4009 			raster_config = 0x00000000;
   4010 		else
   4011 			raster_config = 0x00000002;
   4012 		raster_config_1 = 0x00000000;
   4013 		break;
   4014 	case CHIP_CARRIZO:
   4015 		raster_config = 0x00000002;
   4016 		raster_config_1 = 0x00000000;
   4017 		break;
   4018 	case CHIP_KAVERI:
   4019 		/* KV should be 0x00000002, but that causes problems with radeon */
   4020 		raster_config = 0x00000000; /* 0x00000002 */
   4021 		raster_config_1 = 0x00000000;
   4022 		break;
   4023 	case CHIP_KABINI:
   4024 	case CHIP_MULLINS:
   4025 	case CHIP_STONEY:
   4026 		raster_config = 0x00000000;
   4027 		raster_config_1 = 0x00000000;
   4028 		break;
   4029 	default:
   4030 		fprintf(stderr,
   4031 			"radeonsi: Unknown GPU, using 0 for raster_config\n");
   4032 		raster_config = 0x00000000;
   4033 		raster_config_1 = 0x00000000;
   4034 		break;
   4035 	}
   4036 
   4037 	/* Always use the default config when all backends are enabled
   4038 	 * (or when we failed to determine the enabled backends).
   4039 	 */
   4040 	if (!rb_mask || util_bitcount(rb_mask) >= num_rb) {
   4041 		si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG,
   4042 			       raster_config);
   4043 		if (sctx->b.chip_class >= CIK)
   4044 			si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1,
   4045 				       raster_config_1);
   4046 	} else {
   4047 		si_write_harvested_raster_configs(sctx, pm4, raster_config, raster_config_1);
   4048 	}
   4049 
   4050 	si_pm4_set_reg(pm4, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1));
   4051 	si_pm4_set_reg(pm4, R_028240_PA_SC_GENERIC_SCISSOR_TL, S_028240_WINDOW_OFFSET_DISABLE(1));
   4052 	si_pm4_set_reg(pm4, R_028244_PA_SC_GENERIC_SCISSOR_BR,
   4053 		       S_028244_BR_X(16384) | S_028244_BR_Y(16384));
   4054 	si_pm4_set_reg(pm4, R_028030_PA_SC_SCREEN_SCISSOR_TL, 0);
   4055 	si_pm4_set_reg(pm4, R_028034_PA_SC_SCREEN_SCISSOR_BR,
   4056 		       S_028034_BR_X(16384) | S_028034_BR_Y(16384));
   4057 
   4058 	si_pm4_set_reg(pm4, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF);
   4059 	si_pm4_set_reg(pm4, R_028230_PA_SC_EDGERULE,
   4060 		       S_028230_ER_TRI(0xA) |
   4061 		       S_028230_ER_POINT(0xA) |
   4062 		       S_028230_ER_RECT(0xA) |
   4063 		       /* Required by DX10_DIAMOND_TEST_ENA: */
   4064 		       S_028230_ER_LINE_LR(0x1A) |
   4065 		       S_028230_ER_LINE_RL(0x26) |
   4066 		       S_028230_ER_LINE_TB(0xA) |
   4067 		       S_028230_ER_LINE_BT(0xA));
   4068 	/* PA_SU_HARDWARE_SCREEN_OFFSET must be 0 due to hw bug on SI */
   4069 	si_pm4_set_reg(pm4, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0);
   4070 	si_pm4_set_reg(pm4, R_028820_PA_CL_NANINF_CNTL, 0);
   4071 	si_pm4_set_reg(pm4, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0);
   4072 	si_pm4_set_reg(pm4, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0);
   4073 	si_pm4_set_reg(pm4, R_028AC8_DB_PRELOAD_CONTROL, 0x0);
   4074 	si_pm4_set_reg(pm4, R_02800C_DB_RENDER_OVERRIDE, 0);
   4075 
   4076 	si_pm4_set_reg(pm4, R_028400_VGT_MAX_VTX_INDX, ~0);
   4077 	si_pm4_set_reg(pm4, R_028404_VGT_MIN_VTX_INDX, 0);
   4078 	si_pm4_set_reg(pm4, R_028408_VGT_INDX_OFFSET, 0);
   4079 
   4080 	if (sctx->b.chip_class >= CIK) {
   4081 		/* If this is 0, Bonaire can hang even if GS isn't being used.
   4082 		 * Other chips are unaffected. These are suboptimal values,
   4083 		 * but we don't use on-chip GS.
   4084 		 */
   4085 		si_pm4_set_reg(pm4, R_028A44_VGT_GS_ONCHIP_CNTL,
   4086 			       S_028A44_ES_VERTS_PER_SUBGRP(64) |
   4087 			       S_028A44_GS_PRIMS_PER_SUBGRP(4));
   4088 
   4089 		si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xffff));
   4090 		si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 0);
   4091 		si_pm4_set_reg(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, S_00B31C_CU_EN(0xffff));
   4092 		si_pm4_set_reg(pm4, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, S_00B21C_CU_EN(0xffff));
   4093 
   4094 		if (sscreen->b.info.num_good_compute_units /
   4095 		    (sscreen->b.info.max_se * sscreen->b.info.max_sh_per_se) <= 4) {
   4096 			/* Too few available compute units per SH. Disallowing
   4097 			 * VS to run on CU0 could hurt us more than late VS
   4098 			 * allocation would help.
   4099 			 *
   4100 			 * LATE_ALLOC_VS = 2 is the highest safe number.
   4101 			 */
   4102 			si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xffff));
   4103 			si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(2));
   4104 		} else {
   4105 			/* Set LATE_ALLOC_VS == 31. It should be less than
   4106 			 * the number of scratch waves. Limitations:
   4107 			 * - VS can't execute on CU0.
   4108 			 * - If HS writes outputs to LDS, LS can't execute on CU0.
   4109 			 */
   4110 			si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xfffe));
   4111 			si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(31));
   4112 		}
   4113 
   4114 		si_pm4_set_reg(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, S_00B01C_CU_EN(0xffff));
   4115 	}
   4116 
   4117 	if (sctx->b.chip_class >= VI) {
   4118 		unsigned vgt_tess_distribution;
   4119 
   4120 		si_pm4_set_reg(pm4, R_028424_CB_DCC_CONTROL,
   4121 			       S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(1) |
   4122 			       S_028424_OVERWRITE_COMBINER_WATERMARK(4));
   4123 		if (sctx->b.family < CHIP_POLARIS10)
   4124 			si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 30);
   4125 		si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 32);
   4126 
   4127 		vgt_tess_distribution =
   4128 			S_028B50_ACCUM_ISOLINE(32) |
   4129 			S_028B50_ACCUM_TRI(11) |
   4130 			S_028B50_ACCUM_QUAD(11) |
   4131 			S_028B50_DONUT_SPLIT(16);
   4132 
   4133 		/* Testing with Unigine Heaven extreme tesselation yielded best results
   4134 		 * with TRAP_SPLIT = 3.
   4135 		 */
   4136 		if (sctx->b.family == CHIP_FIJI ||
   4137 		    sctx->b.family >= CHIP_POLARIS10)
   4138 			vgt_tess_distribution |= S_028B50_TRAP_SPLIT(3);
   4139 
   4140 		si_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION, vgt_tess_distribution);
   4141 	} else {
   4142 		si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14);
   4143 		si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 16);
   4144 	}
   4145 
   4146 	if (sctx->b.family == CHIP_STONEY)
   4147 		si_pm4_set_reg(pm4, R_028C40_PA_SC_SHADER_CONTROL, 0);
   4148 
   4149 	si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8);
   4150 	if (sctx->b.chip_class >= CIK)
   4151 		si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, border_color_va >> 40);
   4152 	si_pm4_add_bo(pm4, sctx->border_color_buffer, RADEON_USAGE_READ,
   4153 		      RADEON_PRIO_BORDER_COLORS);
   4154 
   4155 	si_pm4_upload_indirect_buffer(sctx, pm4);
   4156 	sctx->init_config = pm4;
   4157 }
   4158