1 /* 2 * Copyright 2012 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 */ 23 24 #include "si_pipe.h" 25 #include "sid.h" 26 #include "gfx9d.h" 27 #include "radeon/r600_cs.h" 28 #include "radeon/r600_query.h" 29 30 #include "util/u_dual_blend.h" 31 #include "util/u_format.h" 32 #include "util/u_format_s3tc.h" 33 #include "util/u_memory.h" 34 #include "util/u_resource.h" 35 #include "util/u_upload_mgr.h" 36 37 /* Initialize an external atom (owned by ../radeon). */ 38 static void 39 si_init_external_atom(struct si_context *sctx, struct r600_atom *atom, 40 struct r600_atom **list_elem) 41 { 42 atom->id = list_elem - sctx->atoms.array; 43 *list_elem = atom; 44 } 45 46 /* Initialize an atom owned by radeonsi. */ 47 void si_init_atom(struct si_context *sctx, struct r600_atom *atom, 48 struct r600_atom **list_elem, 49 void (*emit_func)(struct si_context *ctx, struct r600_atom *state)) 50 { 51 atom->emit = (void*)emit_func; 52 atom->id = list_elem - sctx->atoms.array; 53 *list_elem = atom; 54 } 55 56 static unsigned si_map_swizzle(unsigned swizzle) 57 { 58 switch (swizzle) { 59 case PIPE_SWIZZLE_Y: 60 return V_008F0C_SQ_SEL_Y; 61 case PIPE_SWIZZLE_Z: 62 return V_008F0C_SQ_SEL_Z; 63 case PIPE_SWIZZLE_W: 64 return V_008F0C_SQ_SEL_W; 65 case PIPE_SWIZZLE_0: 66 return V_008F0C_SQ_SEL_0; 67 case PIPE_SWIZZLE_1: 68 return V_008F0C_SQ_SEL_1; 69 default: /* PIPE_SWIZZLE_X */ 70 return V_008F0C_SQ_SEL_X; 71 } 72 } 73 74 /* 12.4 fixed-point */ 75 static unsigned si_pack_float_12p4(float x) 76 { 77 return x <= 0 ? 0 : 78 x >= 4096 ? 0xffff : x * 16; 79 } 80 81 /* 82 * Inferred framebuffer and blender state. 83 * 84 * CB_TARGET_MASK is emitted here to avoid a hang with dual source blending 85 * if there is not enough PS outputs. 86 */ 87 static void si_emit_cb_render_state(struct si_context *sctx, struct r600_atom *atom) 88 { 89 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 90 struct si_state_blend *blend = sctx->queued.named.blend; 91 /* CB_COLORn_INFO.FORMAT=INVALID should disable unbound colorbuffers, 92 * but you never know. */ 93 uint32_t cb_target_mask = sctx->framebuffer.colorbuf_enabled_4bit; 94 unsigned i; 95 96 if (blend) 97 cb_target_mask &= blend->cb_target_mask; 98 99 /* Avoid a hang that happens when dual source blending is enabled 100 * but there is not enough color outputs. This is undefined behavior, 101 * so disable color writes completely. 102 * 103 * Reproducible with Unigine Heaven 4.0 and drirc missing. 104 */ 105 if (blend && blend->dual_src_blend && 106 sctx->ps_shader.cso && 107 (sctx->ps_shader.cso->info.colors_written & 0x3) != 0x3) 108 cb_target_mask = 0; 109 110 radeon_set_context_reg(cs, R_028238_CB_TARGET_MASK, cb_target_mask); 111 112 /* GFX9: Flush DFSM when CB_TARGET_MASK changes. 113 * I think we don't have to do anything between IBs. 114 */ 115 if (sctx->screen->dfsm_allowed && 116 sctx->last_cb_target_mask != cb_target_mask) { 117 sctx->last_cb_target_mask = cb_target_mask; 118 119 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); 120 radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0)); 121 } 122 123 if (sctx->b.chip_class >= VI) { 124 /* DCC MSAA workaround for blending. 125 * Alternatively, we can set CB_COLORi_DCC_CONTROL.OVERWRITE_- 126 * COMBINER_DISABLE, but that would be more complicated. 127 */ 128 bool oc_disable = (sctx->b.chip_class == VI || 129 sctx->b.chip_class == GFX9) && 130 blend && 131 blend->blend_enable_4bit & cb_target_mask && 132 sctx->framebuffer.nr_samples >= 2; 133 134 radeon_set_context_reg(cs, R_028424_CB_DCC_CONTROL, 135 S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(1) | 136 S_028424_OVERWRITE_COMBINER_WATERMARK(4) | 137 S_028424_OVERWRITE_COMBINER_DISABLE(oc_disable)); 138 } 139 140 /* RB+ register settings. */ 141 if (sctx->screen->rbplus_allowed) { 142 unsigned spi_shader_col_format = 143 sctx->ps_shader.cso ? 144 sctx->ps_shader.current->key.part.ps.epilog.spi_shader_col_format : 0; 145 unsigned sx_ps_downconvert = 0; 146 unsigned sx_blend_opt_epsilon = 0; 147 unsigned sx_blend_opt_control = 0; 148 149 for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) { 150 struct r600_surface *surf = 151 (struct r600_surface*)sctx->framebuffer.state.cbufs[i]; 152 unsigned format, swap, spi_format, colormask; 153 bool has_alpha, has_rgb; 154 155 if (!surf) 156 continue; 157 158 format = G_028C70_FORMAT(surf->cb_color_info); 159 swap = G_028C70_COMP_SWAP(surf->cb_color_info); 160 spi_format = (spi_shader_col_format >> (i * 4)) & 0xf; 161 colormask = (cb_target_mask >> (i * 4)) & 0xf; 162 163 /* Set if RGB and A are present. */ 164 has_alpha = !G_028C74_FORCE_DST_ALPHA_1(surf->cb_color_attrib); 165 166 if (format == V_028C70_COLOR_8 || 167 format == V_028C70_COLOR_16 || 168 format == V_028C70_COLOR_32) 169 has_rgb = !has_alpha; 170 else 171 has_rgb = true; 172 173 /* Check the colormask and export format. */ 174 if (!(colormask & (PIPE_MASK_RGBA & ~PIPE_MASK_A))) 175 has_rgb = false; 176 if (!(colormask & PIPE_MASK_A)) 177 has_alpha = false; 178 179 if (spi_format == V_028714_SPI_SHADER_ZERO) { 180 has_rgb = false; 181 has_alpha = false; 182 } 183 184 /* Disable value checking for disabled channels. */ 185 if (!has_rgb) 186 sx_blend_opt_control |= S_02875C_MRT0_COLOR_OPT_DISABLE(1) << (i * 4); 187 if (!has_alpha) 188 sx_blend_opt_control |= S_02875C_MRT0_ALPHA_OPT_DISABLE(1) << (i * 4); 189 190 /* Enable down-conversion for 32bpp and smaller formats. */ 191 switch (format) { 192 case V_028C70_COLOR_8: 193 case V_028C70_COLOR_8_8: 194 case V_028C70_COLOR_8_8_8_8: 195 /* For 1 and 2-channel formats, use the superset thereof. */ 196 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR || 197 spi_format == V_028714_SPI_SHADER_UINT16_ABGR || 198 spi_format == V_028714_SPI_SHADER_SINT16_ABGR) { 199 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_8_8_8_8 << (i * 4); 200 sx_blend_opt_epsilon |= V_028758_8BIT_FORMAT << (i * 4); 201 } 202 break; 203 204 case V_028C70_COLOR_5_6_5: 205 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 206 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_5_6_5 << (i * 4); 207 sx_blend_opt_epsilon |= V_028758_6BIT_FORMAT << (i * 4); 208 } 209 break; 210 211 case V_028C70_COLOR_1_5_5_5: 212 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 213 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_1_5_5_5 << (i * 4); 214 sx_blend_opt_epsilon |= V_028758_5BIT_FORMAT << (i * 4); 215 } 216 break; 217 218 case V_028C70_COLOR_4_4_4_4: 219 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 220 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_4_4_4_4 << (i * 4); 221 sx_blend_opt_epsilon |= V_028758_4BIT_FORMAT << (i * 4); 222 } 223 break; 224 225 case V_028C70_COLOR_32: 226 if (swap == V_028C70_SWAP_STD && 227 spi_format == V_028714_SPI_SHADER_32_R) 228 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_R << (i * 4); 229 else if (swap == V_028C70_SWAP_ALT_REV && 230 spi_format == V_028714_SPI_SHADER_32_AR) 231 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_A << (i * 4); 232 break; 233 234 case V_028C70_COLOR_16: 235 case V_028C70_COLOR_16_16: 236 /* For 1-channel formats, use the superset thereof. */ 237 if (spi_format == V_028714_SPI_SHADER_UNORM16_ABGR || 238 spi_format == V_028714_SPI_SHADER_SNORM16_ABGR || 239 spi_format == V_028714_SPI_SHADER_UINT16_ABGR || 240 spi_format == V_028714_SPI_SHADER_SINT16_ABGR) { 241 if (swap == V_028C70_SWAP_STD || 242 swap == V_028C70_SWAP_STD_REV) 243 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_GR << (i * 4); 244 else 245 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_AR << (i * 4); 246 } 247 break; 248 249 case V_028C70_COLOR_10_11_11: 250 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 251 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_10_11_11 << (i * 4); 252 sx_blend_opt_epsilon |= V_028758_11BIT_FORMAT << (i * 4); 253 } 254 break; 255 256 case V_028C70_COLOR_2_10_10_10: 257 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 258 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_2_10_10_10 << (i * 4); 259 sx_blend_opt_epsilon |= V_028758_10BIT_FORMAT << (i * 4); 260 } 261 break; 262 } 263 } 264 265 radeon_set_context_reg_seq(cs, R_028754_SX_PS_DOWNCONVERT, 3); 266 radeon_emit(cs, sx_ps_downconvert); /* R_028754_SX_PS_DOWNCONVERT */ 267 radeon_emit(cs, sx_blend_opt_epsilon); /* R_028758_SX_BLEND_OPT_EPSILON */ 268 radeon_emit(cs, sx_blend_opt_control); /* R_02875C_SX_BLEND_OPT_CONTROL */ 269 } else if (sctx->screen->has_rbplus) { 270 radeon_set_context_reg_seq(cs, R_028754_SX_PS_DOWNCONVERT, 3); 271 radeon_emit(cs, 0); /* R_028754_SX_PS_DOWNCONVERT */ 272 radeon_emit(cs, 0); /* R_028758_SX_BLEND_OPT_EPSILON */ 273 radeon_emit(cs, 0); /* R_02875C_SX_BLEND_OPT_CONTROL */ 274 } 275 } 276 277 /* 278 * Blender functions 279 */ 280 281 static uint32_t si_translate_blend_function(int blend_func) 282 { 283 switch (blend_func) { 284 case PIPE_BLEND_ADD: 285 return V_028780_COMB_DST_PLUS_SRC; 286 case PIPE_BLEND_SUBTRACT: 287 return V_028780_COMB_SRC_MINUS_DST; 288 case PIPE_BLEND_REVERSE_SUBTRACT: 289 return V_028780_COMB_DST_MINUS_SRC; 290 case PIPE_BLEND_MIN: 291 return V_028780_COMB_MIN_DST_SRC; 292 case PIPE_BLEND_MAX: 293 return V_028780_COMB_MAX_DST_SRC; 294 default: 295 R600_ERR("Unknown blend function %d\n", blend_func); 296 assert(0); 297 break; 298 } 299 return 0; 300 } 301 302 static uint32_t si_translate_blend_factor(int blend_fact) 303 { 304 switch (blend_fact) { 305 case PIPE_BLENDFACTOR_ONE: 306 return V_028780_BLEND_ONE; 307 case PIPE_BLENDFACTOR_SRC_COLOR: 308 return V_028780_BLEND_SRC_COLOR; 309 case PIPE_BLENDFACTOR_SRC_ALPHA: 310 return V_028780_BLEND_SRC_ALPHA; 311 case PIPE_BLENDFACTOR_DST_ALPHA: 312 return V_028780_BLEND_DST_ALPHA; 313 case PIPE_BLENDFACTOR_DST_COLOR: 314 return V_028780_BLEND_DST_COLOR; 315 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: 316 return V_028780_BLEND_SRC_ALPHA_SATURATE; 317 case PIPE_BLENDFACTOR_CONST_COLOR: 318 return V_028780_BLEND_CONSTANT_COLOR; 319 case PIPE_BLENDFACTOR_CONST_ALPHA: 320 return V_028780_BLEND_CONSTANT_ALPHA; 321 case PIPE_BLENDFACTOR_ZERO: 322 return V_028780_BLEND_ZERO; 323 case PIPE_BLENDFACTOR_INV_SRC_COLOR: 324 return V_028780_BLEND_ONE_MINUS_SRC_COLOR; 325 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: 326 return V_028780_BLEND_ONE_MINUS_SRC_ALPHA; 327 case PIPE_BLENDFACTOR_INV_DST_ALPHA: 328 return V_028780_BLEND_ONE_MINUS_DST_ALPHA; 329 case PIPE_BLENDFACTOR_INV_DST_COLOR: 330 return V_028780_BLEND_ONE_MINUS_DST_COLOR; 331 case PIPE_BLENDFACTOR_INV_CONST_COLOR: 332 return V_028780_BLEND_ONE_MINUS_CONSTANT_COLOR; 333 case PIPE_BLENDFACTOR_INV_CONST_ALPHA: 334 return V_028780_BLEND_ONE_MINUS_CONSTANT_ALPHA; 335 case PIPE_BLENDFACTOR_SRC1_COLOR: 336 return V_028780_BLEND_SRC1_COLOR; 337 case PIPE_BLENDFACTOR_SRC1_ALPHA: 338 return V_028780_BLEND_SRC1_ALPHA; 339 case PIPE_BLENDFACTOR_INV_SRC1_COLOR: 340 return V_028780_BLEND_INV_SRC1_COLOR; 341 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: 342 return V_028780_BLEND_INV_SRC1_ALPHA; 343 default: 344 R600_ERR("Bad blend factor %d not supported!\n", blend_fact); 345 assert(0); 346 break; 347 } 348 return 0; 349 } 350 351 static uint32_t si_translate_blend_opt_function(int blend_func) 352 { 353 switch (blend_func) { 354 case PIPE_BLEND_ADD: 355 return V_028760_OPT_COMB_ADD; 356 case PIPE_BLEND_SUBTRACT: 357 return V_028760_OPT_COMB_SUBTRACT; 358 case PIPE_BLEND_REVERSE_SUBTRACT: 359 return V_028760_OPT_COMB_REVSUBTRACT; 360 case PIPE_BLEND_MIN: 361 return V_028760_OPT_COMB_MIN; 362 case PIPE_BLEND_MAX: 363 return V_028760_OPT_COMB_MAX; 364 default: 365 return V_028760_OPT_COMB_BLEND_DISABLED; 366 } 367 } 368 369 static uint32_t si_translate_blend_opt_factor(int blend_fact, bool is_alpha) 370 { 371 switch (blend_fact) { 372 case PIPE_BLENDFACTOR_ZERO: 373 return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_ALL; 374 case PIPE_BLENDFACTOR_ONE: 375 return V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE; 376 case PIPE_BLENDFACTOR_SRC_COLOR: 377 return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0 378 : V_028760_BLEND_OPT_PRESERVE_C1_IGNORE_C0; 379 case PIPE_BLENDFACTOR_INV_SRC_COLOR: 380 return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1 381 : V_028760_BLEND_OPT_PRESERVE_C0_IGNORE_C1; 382 case PIPE_BLENDFACTOR_SRC_ALPHA: 383 return V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0; 384 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: 385 return V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1; 386 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: 387 return is_alpha ? V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE 388 : V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0; 389 default: 390 return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE; 391 } 392 } 393 394 static void si_blend_check_commutativity(struct si_screen *sscreen, 395 struct si_state_blend *blend, 396 enum pipe_blend_func func, 397 enum pipe_blendfactor src, 398 enum pipe_blendfactor dst, 399 unsigned chanmask) 400 { 401 /* Src factor is allowed when it does not depend on Dst */ 402 static const uint32_t src_allowed = 403 (1u << PIPE_BLENDFACTOR_ONE) | 404 (1u << PIPE_BLENDFACTOR_SRC_COLOR) | 405 (1u << PIPE_BLENDFACTOR_SRC_ALPHA) | 406 (1u << PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE) | 407 (1u << PIPE_BLENDFACTOR_CONST_COLOR) | 408 (1u << PIPE_BLENDFACTOR_CONST_ALPHA) | 409 (1u << PIPE_BLENDFACTOR_SRC1_COLOR) | 410 (1u << PIPE_BLENDFACTOR_SRC1_ALPHA) | 411 (1u << PIPE_BLENDFACTOR_ZERO) | 412 (1u << PIPE_BLENDFACTOR_INV_SRC_COLOR) | 413 (1u << PIPE_BLENDFACTOR_INV_SRC_ALPHA) | 414 (1u << PIPE_BLENDFACTOR_INV_CONST_COLOR) | 415 (1u << PIPE_BLENDFACTOR_INV_CONST_ALPHA) | 416 (1u << PIPE_BLENDFACTOR_INV_SRC1_COLOR) | 417 (1u << PIPE_BLENDFACTOR_INV_SRC1_ALPHA); 418 419 if (dst == PIPE_BLENDFACTOR_ONE && 420 (src_allowed & (1u << src))) { 421 /* Addition is commutative, but floating point addition isn't 422 * associative: subtle changes can be introduced via different 423 * rounding. 424 * 425 * Out-of-order is also non-deterministic, which means that 426 * this breaks OpenGL invariance requirements. So only enable 427 * out-of-order additive blending if explicitly allowed by a 428 * setting. 429 */ 430 if (func == PIPE_BLEND_MAX || func == PIPE_BLEND_MIN || 431 (func == PIPE_BLEND_ADD && sscreen->commutative_blend_add)) 432 blend->commutative_4bit |= chanmask; 433 } 434 } 435 436 /** 437 * Get rid of DST in the blend factors by commuting the operands: 438 * func(src * DST, dst * 0) ---> func(src * 0, dst * SRC) 439 */ 440 static void si_blend_remove_dst(unsigned *func, unsigned *src_factor, 441 unsigned *dst_factor, unsigned expected_dst, 442 unsigned replacement_src) 443 { 444 if (*src_factor == expected_dst && 445 *dst_factor == PIPE_BLENDFACTOR_ZERO) { 446 *src_factor = PIPE_BLENDFACTOR_ZERO; 447 *dst_factor = replacement_src; 448 449 /* Commuting the operands requires reversing subtractions. */ 450 if (*func == PIPE_BLEND_SUBTRACT) 451 *func = PIPE_BLEND_REVERSE_SUBTRACT; 452 else if (*func == PIPE_BLEND_REVERSE_SUBTRACT) 453 *func = PIPE_BLEND_SUBTRACT; 454 } 455 } 456 457 static bool si_blend_factor_uses_dst(unsigned factor) 458 { 459 return factor == PIPE_BLENDFACTOR_DST_COLOR || 460 factor == PIPE_BLENDFACTOR_DST_ALPHA || 461 factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || 462 factor == PIPE_BLENDFACTOR_INV_DST_ALPHA || 463 factor == PIPE_BLENDFACTOR_INV_DST_COLOR; 464 } 465 466 static void *si_create_blend_state_mode(struct pipe_context *ctx, 467 const struct pipe_blend_state *state, 468 unsigned mode) 469 { 470 struct si_context *sctx = (struct si_context*)ctx; 471 struct si_state_blend *blend = CALLOC_STRUCT(si_state_blend); 472 struct si_pm4_state *pm4 = &blend->pm4; 473 uint32_t sx_mrt_blend_opt[8] = {0}; 474 uint32_t color_control = 0; 475 476 if (!blend) 477 return NULL; 478 479 blend->alpha_to_coverage = state->alpha_to_coverage; 480 blend->alpha_to_one = state->alpha_to_one; 481 blend->dual_src_blend = util_blend_state_is_dual(state, 0); 482 blend->logicop_enable = state->logicop_enable; 483 484 if (state->logicop_enable) { 485 color_control |= S_028808_ROP3(state->logicop_func | (state->logicop_func << 4)); 486 } else { 487 color_control |= S_028808_ROP3(0xcc); 488 } 489 490 si_pm4_set_reg(pm4, R_028B70_DB_ALPHA_TO_MASK, 491 S_028B70_ALPHA_TO_MASK_ENABLE(state->alpha_to_coverage) | 492 S_028B70_ALPHA_TO_MASK_OFFSET0(2) | 493 S_028B70_ALPHA_TO_MASK_OFFSET1(2) | 494 S_028B70_ALPHA_TO_MASK_OFFSET2(2) | 495 S_028B70_ALPHA_TO_MASK_OFFSET3(2)); 496 497 if (state->alpha_to_coverage) 498 blend->need_src_alpha_4bit |= 0xf; 499 500 blend->cb_target_mask = 0; 501 blend->cb_target_enabled_4bit = 0; 502 503 for (int i = 0; i < 8; i++) { 504 /* state->rt entries > 0 only written if independent blending */ 505 const int j = state->independent_blend_enable ? i : 0; 506 507 unsigned eqRGB = state->rt[j].rgb_func; 508 unsigned srcRGB = state->rt[j].rgb_src_factor; 509 unsigned dstRGB = state->rt[j].rgb_dst_factor; 510 unsigned eqA = state->rt[j].alpha_func; 511 unsigned srcA = state->rt[j].alpha_src_factor; 512 unsigned dstA = state->rt[j].alpha_dst_factor; 513 514 unsigned srcRGB_opt, dstRGB_opt, srcA_opt, dstA_opt; 515 unsigned blend_cntl = 0; 516 517 sx_mrt_blend_opt[i] = 518 S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) | 519 S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED); 520 521 /* Only set dual source blending for MRT0 to avoid a hang. */ 522 if (i >= 1 && blend->dual_src_blend) { 523 /* Vulkan does this for dual source blending. */ 524 if (i == 1) 525 blend_cntl |= S_028780_ENABLE(1); 526 527 si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); 528 continue; 529 } 530 531 /* Only addition and subtraction equations are supported with 532 * dual source blending. 533 */ 534 if (blend->dual_src_blend && 535 (eqRGB == PIPE_BLEND_MIN || eqRGB == PIPE_BLEND_MAX || 536 eqA == PIPE_BLEND_MIN || eqA == PIPE_BLEND_MAX)) { 537 assert(!"Unsupported equation for dual source blending"); 538 si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); 539 continue; 540 } 541 542 /* cb_render_state will disable unused ones */ 543 blend->cb_target_mask |= (unsigned)state->rt[j].colormask << (4 * i); 544 if (state->rt[j].colormask) 545 blend->cb_target_enabled_4bit |= 0xf << (4 * i); 546 547 if (!state->rt[j].colormask || !state->rt[j].blend_enable) { 548 si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); 549 continue; 550 } 551 552 si_blend_check_commutativity(sctx->screen, blend, 553 eqRGB, srcRGB, dstRGB, 0x7 << (4 * i)); 554 si_blend_check_commutativity(sctx->screen, blend, 555 eqA, srcA, dstA, 0x8 << (4 * i)); 556 557 /* Blending optimizations for RB+. 558 * These transformations don't change the behavior. 559 * 560 * First, get rid of DST in the blend factors: 561 * func(src * DST, dst * 0) ---> func(src * 0, dst * SRC) 562 */ 563 si_blend_remove_dst(&eqRGB, &srcRGB, &dstRGB, 564 PIPE_BLENDFACTOR_DST_COLOR, 565 PIPE_BLENDFACTOR_SRC_COLOR); 566 si_blend_remove_dst(&eqA, &srcA, &dstA, 567 PIPE_BLENDFACTOR_DST_COLOR, 568 PIPE_BLENDFACTOR_SRC_COLOR); 569 si_blend_remove_dst(&eqA, &srcA, &dstA, 570 PIPE_BLENDFACTOR_DST_ALPHA, 571 PIPE_BLENDFACTOR_SRC_ALPHA); 572 573 /* Look up the ideal settings from tables. */ 574 srcRGB_opt = si_translate_blend_opt_factor(srcRGB, false); 575 dstRGB_opt = si_translate_blend_opt_factor(dstRGB, false); 576 srcA_opt = si_translate_blend_opt_factor(srcA, true); 577 dstA_opt = si_translate_blend_opt_factor(dstA, true); 578 579 /* Handle interdependencies. */ 580 if (si_blend_factor_uses_dst(srcRGB)) 581 dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE; 582 if (si_blend_factor_uses_dst(srcA)) 583 dstA_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE; 584 585 if (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE && 586 (dstRGB == PIPE_BLENDFACTOR_ZERO || 587 dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA || 588 dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE)) 589 dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0; 590 591 /* Set the final value. */ 592 sx_mrt_blend_opt[i] = 593 S_028760_COLOR_SRC_OPT(srcRGB_opt) | 594 S_028760_COLOR_DST_OPT(dstRGB_opt) | 595 S_028760_COLOR_COMB_FCN(si_translate_blend_opt_function(eqRGB)) | 596 S_028760_ALPHA_SRC_OPT(srcA_opt) | 597 S_028760_ALPHA_DST_OPT(dstA_opt) | 598 S_028760_ALPHA_COMB_FCN(si_translate_blend_opt_function(eqA)); 599 600 /* Set blend state. */ 601 blend_cntl |= S_028780_ENABLE(1); 602 blend_cntl |= S_028780_COLOR_COMB_FCN(si_translate_blend_function(eqRGB)); 603 blend_cntl |= S_028780_COLOR_SRCBLEND(si_translate_blend_factor(srcRGB)); 604 blend_cntl |= S_028780_COLOR_DESTBLEND(si_translate_blend_factor(dstRGB)); 605 606 if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) { 607 blend_cntl |= S_028780_SEPARATE_ALPHA_BLEND(1); 608 blend_cntl |= S_028780_ALPHA_COMB_FCN(si_translate_blend_function(eqA)); 609 blend_cntl |= S_028780_ALPHA_SRCBLEND(si_translate_blend_factor(srcA)); 610 blend_cntl |= S_028780_ALPHA_DESTBLEND(si_translate_blend_factor(dstA)); 611 } 612 si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); 613 614 blend->blend_enable_4bit |= 0xfu << (i * 4); 615 616 /* This is only important for formats without alpha. */ 617 if (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA || 618 dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA || 619 srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || 620 dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || 621 srcRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA || 622 dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA) 623 blend->need_src_alpha_4bit |= 0xfu << (i * 4); 624 } 625 626 if (blend->cb_target_mask) { 627 color_control |= S_028808_MODE(mode); 628 } else { 629 color_control |= S_028808_MODE(V_028808_CB_DISABLE); 630 } 631 632 if (sctx->screen->has_rbplus) { 633 /* Disable RB+ blend optimizations for dual source blending. 634 * Vulkan does this. 635 */ 636 if (blend->dual_src_blend) { 637 for (int i = 0; i < 8; i++) { 638 sx_mrt_blend_opt[i] = 639 S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_NONE) | 640 S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_NONE); 641 } 642 } 643 644 for (int i = 0; i < 8; i++) 645 si_pm4_set_reg(pm4, R_028760_SX_MRT0_BLEND_OPT + i * 4, 646 sx_mrt_blend_opt[i]); 647 648 /* RB+ doesn't work with dual source blending, logic op, and RESOLVE. */ 649 if (blend->dual_src_blend || state->logicop_enable || 650 mode == V_028808_CB_RESOLVE) 651 color_control |= S_028808_DISABLE_DUAL_QUAD(1); 652 } 653 654 si_pm4_set_reg(pm4, R_028808_CB_COLOR_CONTROL, color_control); 655 return blend; 656 } 657 658 static void *si_create_blend_state(struct pipe_context *ctx, 659 const struct pipe_blend_state *state) 660 { 661 return si_create_blend_state_mode(ctx, state, V_028808_CB_NORMAL); 662 } 663 664 static void si_bind_blend_state(struct pipe_context *ctx, void *state) 665 { 666 struct si_context *sctx = (struct si_context *)ctx; 667 struct si_state_blend *old_blend = sctx->queued.named.blend; 668 struct si_state_blend *blend = (struct si_state_blend *)state; 669 670 if (!state) 671 return; 672 673 si_pm4_bind_state(sctx, blend, state); 674 675 if (!old_blend || 676 old_blend->cb_target_mask != blend->cb_target_mask || 677 old_blend->dual_src_blend != blend->dual_src_blend || 678 (old_blend->blend_enable_4bit != blend->blend_enable_4bit && 679 sctx->framebuffer.nr_samples >= 2 && 680 sctx->screen->dcc_msaa_allowed)) 681 si_mark_atom_dirty(sctx, &sctx->cb_render_state); 682 683 if (!old_blend || 684 old_blend->cb_target_mask != blend->cb_target_mask || 685 old_blend->alpha_to_coverage != blend->alpha_to_coverage || 686 old_blend->alpha_to_one != blend->alpha_to_one || 687 old_blend->dual_src_blend != blend->dual_src_blend || 688 old_blend->blend_enable_4bit != blend->blend_enable_4bit || 689 old_blend->need_src_alpha_4bit != blend->need_src_alpha_4bit) 690 sctx->do_update_shaders = true; 691 692 if (sctx->screen->dpbb_allowed && 693 (!old_blend || 694 old_blend->alpha_to_coverage != blend->alpha_to_coverage || 695 old_blend->blend_enable_4bit != blend->blend_enable_4bit || 696 old_blend->cb_target_enabled_4bit != blend->cb_target_enabled_4bit)) 697 si_mark_atom_dirty(sctx, &sctx->dpbb_state); 698 699 if (sctx->screen->has_out_of_order_rast && 700 (!old_blend || 701 (old_blend->blend_enable_4bit != blend->blend_enable_4bit || 702 old_blend->cb_target_enabled_4bit != blend->cb_target_enabled_4bit || 703 old_blend->commutative_4bit != blend->commutative_4bit || 704 old_blend->logicop_enable != blend->logicop_enable))) 705 si_mark_atom_dirty(sctx, &sctx->msaa_config); 706 } 707 708 static void si_delete_blend_state(struct pipe_context *ctx, void *state) 709 { 710 struct si_context *sctx = (struct si_context *)ctx; 711 si_pm4_delete_state(sctx, blend, (struct si_state_blend *)state); 712 } 713 714 static void si_set_blend_color(struct pipe_context *ctx, 715 const struct pipe_blend_color *state) 716 { 717 struct si_context *sctx = (struct si_context *)ctx; 718 static const struct pipe_blend_color zeros; 719 720 sctx->blend_color.state = *state; 721 sctx->blend_color.any_nonzeros = memcmp(state, &zeros, sizeof(*state)) != 0; 722 si_mark_atom_dirty(sctx, &sctx->blend_color.atom); 723 } 724 725 static void si_emit_blend_color(struct si_context *sctx, struct r600_atom *atom) 726 { 727 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 728 729 radeon_set_context_reg_seq(cs, R_028414_CB_BLEND_RED, 4); 730 radeon_emit_array(cs, (uint32_t*)sctx->blend_color.state.color, 4); 731 } 732 733 /* 734 * Clipping 735 */ 736 737 static void si_set_clip_state(struct pipe_context *ctx, 738 const struct pipe_clip_state *state) 739 { 740 struct si_context *sctx = (struct si_context *)ctx; 741 struct pipe_constant_buffer cb; 742 static const struct pipe_clip_state zeros; 743 744 if (memcmp(&sctx->clip_state.state, state, sizeof(*state)) == 0) 745 return; 746 747 sctx->clip_state.state = *state; 748 sctx->clip_state.any_nonzeros = memcmp(state, &zeros, sizeof(*state)) != 0; 749 si_mark_atom_dirty(sctx, &sctx->clip_state.atom); 750 751 cb.buffer = NULL; 752 cb.user_buffer = state->ucp; 753 cb.buffer_offset = 0; 754 cb.buffer_size = 4*4*8; 755 si_set_rw_buffer(sctx, SI_VS_CONST_CLIP_PLANES, &cb); 756 pipe_resource_reference(&cb.buffer, NULL); 757 } 758 759 static void si_emit_clip_state(struct si_context *sctx, struct r600_atom *atom) 760 { 761 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 762 763 radeon_set_context_reg_seq(cs, R_0285BC_PA_CL_UCP_0_X, 6*4); 764 radeon_emit_array(cs, (uint32_t*)sctx->clip_state.state.ucp, 6*4); 765 } 766 767 static void si_emit_clip_regs(struct si_context *sctx, struct r600_atom *atom) 768 { 769 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 770 struct si_shader *vs = si_get_vs_state(sctx); 771 struct si_shader_selector *vs_sel = vs->selector; 772 struct tgsi_shader_info *info = &vs_sel->info; 773 struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; 774 unsigned window_space = 775 info->properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION]; 776 unsigned clipdist_mask = vs_sel->clipdist_mask; 777 unsigned ucp_mask = clipdist_mask ? 0 : rs->clip_plane_enable & SIX_BITS; 778 unsigned culldist_mask = vs_sel->culldist_mask; 779 unsigned total_mask; 780 781 if (vs->key.opt.clip_disable) { 782 assert(!info->culldist_writemask); 783 clipdist_mask = 0; 784 culldist_mask = 0; 785 } 786 total_mask = clipdist_mask | culldist_mask; 787 788 /* Clip distances on points have no effect, so need to be implemented 789 * as cull distances. This applies for the clipvertex case as well. 790 * 791 * Setting this for primitives other than points should have no adverse 792 * effects. 793 */ 794 clipdist_mask &= rs->clip_plane_enable; 795 culldist_mask |= clipdist_mask; 796 797 radeon_set_context_reg(cs, R_02881C_PA_CL_VS_OUT_CNTL, 798 vs_sel->pa_cl_vs_out_cntl | 799 S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0F) != 0) | 800 S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xF0) != 0) | 801 clipdist_mask | (culldist_mask << 8)); 802 radeon_set_context_reg(cs, R_028810_PA_CL_CLIP_CNTL, 803 rs->pa_cl_clip_cntl | 804 ucp_mask | 805 S_028810_CLIP_DISABLE(window_space)); 806 } 807 808 /* 809 * inferred state between framebuffer and rasterizer 810 */ 811 static void si_update_poly_offset_state(struct si_context *sctx) 812 { 813 struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; 814 815 if (!rs || !rs->uses_poly_offset || !sctx->framebuffer.state.zsbuf) { 816 si_pm4_bind_state(sctx, poly_offset, NULL); 817 return; 818 } 819 820 /* Use the user format, not db_render_format, so that the polygon 821 * offset behaves as expected by applications. 822 */ 823 switch (sctx->framebuffer.state.zsbuf->texture->format) { 824 case PIPE_FORMAT_Z16_UNORM: 825 si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[0]); 826 break; 827 default: /* 24-bit */ 828 si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[1]); 829 break; 830 case PIPE_FORMAT_Z32_FLOAT: 831 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 832 si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[2]); 833 break; 834 } 835 } 836 837 /* 838 * Rasterizer 839 */ 840 841 static uint32_t si_translate_fill(uint32_t func) 842 { 843 switch(func) { 844 case PIPE_POLYGON_MODE_FILL: 845 return V_028814_X_DRAW_TRIANGLES; 846 case PIPE_POLYGON_MODE_LINE: 847 return V_028814_X_DRAW_LINES; 848 case PIPE_POLYGON_MODE_POINT: 849 return V_028814_X_DRAW_POINTS; 850 default: 851 assert(0); 852 return V_028814_X_DRAW_POINTS; 853 } 854 } 855 856 static void *si_create_rs_state(struct pipe_context *ctx, 857 const struct pipe_rasterizer_state *state) 858 { 859 struct si_screen *sscreen = ((struct si_context *)ctx)->screen; 860 struct si_state_rasterizer *rs = CALLOC_STRUCT(si_state_rasterizer); 861 struct si_pm4_state *pm4 = &rs->pm4; 862 unsigned tmp, i; 863 float psize_min, psize_max; 864 865 if (!rs) { 866 return NULL; 867 } 868 869 rs->scissor_enable = state->scissor; 870 rs->clip_halfz = state->clip_halfz; 871 rs->two_side = state->light_twoside; 872 rs->multisample_enable = state->multisample; 873 rs->force_persample_interp = state->force_persample_interp; 874 rs->clip_plane_enable = state->clip_plane_enable; 875 rs->line_stipple_enable = state->line_stipple_enable; 876 rs->poly_stipple_enable = state->poly_stipple_enable; 877 rs->line_smooth = state->line_smooth; 878 rs->line_width = state->line_width; 879 rs->poly_smooth = state->poly_smooth; 880 rs->uses_poly_offset = state->offset_point || state->offset_line || 881 state->offset_tri; 882 rs->clamp_fragment_color = state->clamp_fragment_color; 883 rs->clamp_vertex_color = state->clamp_vertex_color; 884 rs->flatshade = state->flatshade; 885 rs->sprite_coord_enable = state->sprite_coord_enable; 886 rs->rasterizer_discard = state->rasterizer_discard; 887 rs->pa_sc_line_stipple = state->line_stipple_enable ? 888 S_028A0C_LINE_PATTERN(state->line_stipple_pattern) | 889 S_028A0C_REPEAT_COUNT(state->line_stipple_factor) : 0; 890 rs->pa_cl_clip_cntl = 891 S_028810_DX_CLIP_SPACE_DEF(state->clip_halfz) | 892 S_028810_ZCLIP_NEAR_DISABLE(!state->depth_clip) | 893 S_028810_ZCLIP_FAR_DISABLE(!state->depth_clip) | 894 S_028810_DX_RASTERIZATION_KILL(state->rasterizer_discard) | 895 S_028810_DX_LINEAR_ATTR_CLIP_ENA(1); 896 897 si_pm4_set_reg(pm4, R_0286D4_SPI_INTERP_CONTROL_0, 898 S_0286D4_FLAT_SHADE_ENA(1) | 899 S_0286D4_PNT_SPRITE_ENA(state->point_quad_rasterization) | 900 S_0286D4_PNT_SPRITE_OVRD_X(V_0286D4_SPI_PNT_SPRITE_SEL_S) | 901 S_0286D4_PNT_SPRITE_OVRD_Y(V_0286D4_SPI_PNT_SPRITE_SEL_T) | 902 S_0286D4_PNT_SPRITE_OVRD_Z(V_0286D4_SPI_PNT_SPRITE_SEL_0) | 903 S_0286D4_PNT_SPRITE_OVRD_W(V_0286D4_SPI_PNT_SPRITE_SEL_1) | 904 S_0286D4_PNT_SPRITE_TOP_1(state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT)); 905 906 /* point size 12.4 fixed point */ 907 tmp = (unsigned)(state->point_size * 8.0); 908 si_pm4_set_reg(pm4, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp)); 909 910 if (state->point_size_per_vertex) { 911 psize_min = util_get_min_point_size(state); 912 psize_max = 8192; 913 } else { 914 /* Force the point size to be as if the vertex output was disabled. */ 915 psize_min = state->point_size; 916 psize_max = state->point_size; 917 } 918 rs->max_point_size = psize_max; 919 920 /* Divide by two, because 0.5 = 1 pixel. */ 921 si_pm4_set_reg(pm4, R_028A04_PA_SU_POINT_MINMAX, 922 S_028A04_MIN_SIZE(si_pack_float_12p4(psize_min/2)) | 923 S_028A04_MAX_SIZE(si_pack_float_12p4(psize_max/2))); 924 925 si_pm4_set_reg(pm4, R_028A08_PA_SU_LINE_CNTL, 926 S_028A08_WIDTH(si_pack_float_12p4(state->line_width/2))); 927 si_pm4_set_reg(pm4, R_028A48_PA_SC_MODE_CNTL_0, 928 S_028A48_LINE_STIPPLE_ENABLE(state->line_stipple_enable) | 929 S_028A48_MSAA_ENABLE(state->multisample || 930 state->poly_smooth || 931 state->line_smooth) | 932 S_028A48_VPORT_SCISSOR_ENABLE(1) | 933 S_028A48_ALTERNATE_RBS_PER_TILE(sscreen->info.chip_class >= GFX9)); 934 935 si_pm4_set_reg(pm4, R_028BE4_PA_SU_VTX_CNTL, 936 S_028BE4_PIX_CENTER(state->half_pixel_center) | 937 S_028BE4_QUANT_MODE(V_028BE4_X_16_8_FIXED_POINT_1_256TH)); 938 939 si_pm4_set_reg(pm4, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, fui(state->offset_clamp)); 940 si_pm4_set_reg(pm4, R_028814_PA_SU_SC_MODE_CNTL, 941 S_028814_PROVOKING_VTX_LAST(!state->flatshade_first) | 942 S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) | 943 S_028814_CULL_BACK((state->cull_face & PIPE_FACE_BACK) ? 1 : 0) | 944 S_028814_FACE(!state->front_ccw) | 945 S_028814_POLY_OFFSET_FRONT_ENABLE(util_get_offset(state, state->fill_front)) | 946 S_028814_POLY_OFFSET_BACK_ENABLE(util_get_offset(state, state->fill_back)) | 947 S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_point || state->offset_line) | 948 S_028814_POLY_MODE(state->fill_front != PIPE_POLYGON_MODE_FILL || 949 state->fill_back != PIPE_POLYGON_MODE_FILL) | 950 S_028814_POLYMODE_FRONT_PTYPE(si_translate_fill(state->fill_front)) | 951 S_028814_POLYMODE_BACK_PTYPE(si_translate_fill(state->fill_back))); 952 953 if (!rs->uses_poly_offset) 954 return rs; 955 956 rs->pm4_poly_offset = CALLOC(3, sizeof(struct si_pm4_state)); 957 if (!rs->pm4_poly_offset) { 958 FREE(rs); 959 return NULL; 960 } 961 962 /* Precalculate polygon offset states for 16-bit, 24-bit, and 32-bit zbuffers. */ 963 for (i = 0; i < 3; i++) { 964 struct si_pm4_state *pm4 = &rs->pm4_poly_offset[i]; 965 float offset_units = state->offset_units; 966 float offset_scale = state->offset_scale * 16.0f; 967 uint32_t pa_su_poly_offset_db_fmt_cntl = 0; 968 969 if (!state->offset_units_unscaled) { 970 switch (i) { 971 case 0: /* 16-bit zbuffer */ 972 offset_units *= 4.0f; 973 pa_su_poly_offset_db_fmt_cntl = 974 S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16); 975 break; 976 case 1: /* 24-bit zbuffer */ 977 offset_units *= 2.0f; 978 pa_su_poly_offset_db_fmt_cntl = 979 S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24); 980 break; 981 case 2: /* 32-bit zbuffer */ 982 offset_units *= 1.0f; 983 pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) | 984 S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1); 985 break; 986 } 987 } 988 989 si_pm4_set_reg(pm4, R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE, 990 fui(offset_scale)); 991 si_pm4_set_reg(pm4, R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET, 992 fui(offset_units)); 993 si_pm4_set_reg(pm4, R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE, 994 fui(offset_scale)); 995 si_pm4_set_reg(pm4, R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET, 996 fui(offset_units)); 997 si_pm4_set_reg(pm4, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL, 998 pa_su_poly_offset_db_fmt_cntl); 999 } 1000 1001 return rs; 1002 } 1003 1004 static void si_bind_rs_state(struct pipe_context *ctx, void *state) 1005 { 1006 struct si_context *sctx = (struct si_context *)ctx; 1007 struct si_state_rasterizer *old_rs = 1008 (struct si_state_rasterizer*)sctx->queued.named.rasterizer; 1009 struct si_state_rasterizer *rs = (struct si_state_rasterizer *)state; 1010 1011 if (!state) 1012 return; 1013 1014 if (!old_rs || old_rs->multisample_enable != rs->multisample_enable) { 1015 si_mark_atom_dirty(sctx, &sctx->db_render_state); 1016 1017 /* Update the small primitive filter workaround if necessary. */ 1018 if (sctx->screen->has_msaa_sample_loc_bug && 1019 sctx->framebuffer.nr_samples > 1) 1020 si_mark_atom_dirty(sctx, &sctx->msaa_sample_locs.atom); 1021 } 1022 1023 sctx->current_vs_state &= C_VS_STATE_CLAMP_VERTEX_COLOR; 1024 sctx->current_vs_state |= S_VS_STATE_CLAMP_VERTEX_COLOR(rs->clamp_vertex_color); 1025 1026 si_pm4_bind_state(sctx, rasterizer, rs); 1027 si_update_poly_offset_state(sctx); 1028 1029 if (!old_rs || 1030 (old_rs->scissor_enable != rs->scissor_enable || 1031 old_rs->line_width != rs->line_width || 1032 old_rs->max_point_size != rs->max_point_size)) { 1033 sctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1; 1034 si_mark_atom_dirty(sctx, &sctx->scissors.atom); 1035 } 1036 1037 if (!old_rs || 1038 old_rs->clip_halfz != rs->clip_halfz) { 1039 sctx->viewports.depth_range_dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1; 1040 si_mark_atom_dirty(sctx, &sctx->viewports.atom); 1041 } 1042 1043 if (!old_rs || 1044 old_rs->clip_plane_enable != rs->clip_plane_enable || 1045 old_rs->pa_cl_clip_cntl != rs->pa_cl_clip_cntl) 1046 si_mark_atom_dirty(sctx, &sctx->clip_regs); 1047 1048 sctx->ia_multi_vgt_param_key.u.line_stipple_enabled = 1049 rs->line_stipple_enable; 1050 1051 if (!old_rs || 1052 old_rs->clip_plane_enable != rs->clip_plane_enable || 1053 old_rs->rasterizer_discard != rs->rasterizer_discard || 1054 old_rs->sprite_coord_enable != rs->sprite_coord_enable || 1055 old_rs->flatshade != rs->flatshade || 1056 old_rs->two_side != rs->two_side || 1057 old_rs->multisample_enable != rs->multisample_enable || 1058 old_rs->poly_stipple_enable != rs->poly_stipple_enable || 1059 old_rs->poly_smooth != rs->poly_smooth || 1060 old_rs->line_smooth != rs->line_smooth || 1061 old_rs->clamp_fragment_color != rs->clamp_fragment_color || 1062 old_rs->force_persample_interp != rs->force_persample_interp) 1063 sctx->do_update_shaders = true; 1064 } 1065 1066 static void si_delete_rs_state(struct pipe_context *ctx, void *state) 1067 { 1068 struct si_context *sctx = (struct si_context *)ctx; 1069 struct si_state_rasterizer *rs = (struct si_state_rasterizer *)state; 1070 1071 if (sctx->queued.named.rasterizer == state) 1072 si_pm4_bind_state(sctx, poly_offset, NULL); 1073 1074 FREE(rs->pm4_poly_offset); 1075 si_pm4_delete_state(sctx, rasterizer, rs); 1076 } 1077 1078 /* 1079 * infeered state between dsa and stencil ref 1080 */ 1081 static void si_emit_stencil_ref(struct si_context *sctx, struct r600_atom *atom) 1082 { 1083 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 1084 struct pipe_stencil_ref *ref = &sctx->stencil_ref.state; 1085 struct si_dsa_stencil_ref_part *dsa = &sctx->stencil_ref.dsa_part; 1086 1087 radeon_set_context_reg_seq(cs, R_028430_DB_STENCILREFMASK, 2); 1088 radeon_emit(cs, S_028430_STENCILTESTVAL(ref->ref_value[0]) | 1089 S_028430_STENCILMASK(dsa->valuemask[0]) | 1090 S_028430_STENCILWRITEMASK(dsa->writemask[0]) | 1091 S_028430_STENCILOPVAL(1)); 1092 radeon_emit(cs, S_028434_STENCILTESTVAL_BF(ref->ref_value[1]) | 1093 S_028434_STENCILMASK_BF(dsa->valuemask[1]) | 1094 S_028434_STENCILWRITEMASK_BF(dsa->writemask[1]) | 1095 S_028434_STENCILOPVAL_BF(1)); 1096 } 1097 1098 static void si_set_stencil_ref(struct pipe_context *ctx, 1099 const struct pipe_stencil_ref *state) 1100 { 1101 struct si_context *sctx = (struct si_context *)ctx; 1102 1103 if (memcmp(&sctx->stencil_ref.state, state, sizeof(*state)) == 0) 1104 return; 1105 1106 sctx->stencil_ref.state = *state; 1107 si_mark_atom_dirty(sctx, &sctx->stencil_ref.atom); 1108 } 1109 1110 1111 /* 1112 * DSA 1113 */ 1114 1115 static uint32_t si_translate_stencil_op(int s_op) 1116 { 1117 switch (s_op) { 1118 case PIPE_STENCIL_OP_KEEP: 1119 return V_02842C_STENCIL_KEEP; 1120 case PIPE_STENCIL_OP_ZERO: 1121 return V_02842C_STENCIL_ZERO; 1122 case PIPE_STENCIL_OP_REPLACE: 1123 return V_02842C_STENCIL_REPLACE_TEST; 1124 case PIPE_STENCIL_OP_INCR: 1125 return V_02842C_STENCIL_ADD_CLAMP; 1126 case PIPE_STENCIL_OP_DECR: 1127 return V_02842C_STENCIL_SUB_CLAMP; 1128 case PIPE_STENCIL_OP_INCR_WRAP: 1129 return V_02842C_STENCIL_ADD_WRAP; 1130 case PIPE_STENCIL_OP_DECR_WRAP: 1131 return V_02842C_STENCIL_SUB_WRAP; 1132 case PIPE_STENCIL_OP_INVERT: 1133 return V_02842C_STENCIL_INVERT; 1134 default: 1135 R600_ERR("Unknown stencil op %d", s_op); 1136 assert(0); 1137 break; 1138 } 1139 return 0; 1140 } 1141 1142 static bool si_dsa_writes_stencil(const struct pipe_stencil_state *s) 1143 { 1144 return s->enabled && s->writemask && 1145 (s->fail_op != PIPE_STENCIL_OP_KEEP || 1146 s->zfail_op != PIPE_STENCIL_OP_KEEP || 1147 s->zpass_op != PIPE_STENCIL_OP_KEEP); 1148 } 1149 1150 static bool si_order_invariant_stencil_op(enum pipe_stencil_op op) 1151 { 1152 /* REPLACE is normally order invariant, except when the stencil 1153 * reference value is written by the fragment shader. Tracking this 1154 * interaction does not seem worth the effort, so be conservative. */ 1155 return op != PIPE_STENCIL_OP_INCR && 1156 op != PIPE_STENCIL_OP_DECR && 1157 op != PIPE_STENCIL_OP_REPLACE; 1158 } 1159 1160 /* Compute whether, assuming Z writes are disabled, this stencil state is order 1161 * invariant in the sense that the set of passing fragments as well as the 1162 * final stencil buffer result does not depend on the order of fragments. */ 1163 static bool si_order_invariant_stencil_state(const struct pipe_stencil_state *state) 1164 { 1165 return !state->enabled || !state->writemask || 1166 /* The following assumes that Z writes are disabled. */ 1167 (state->func == PIPE_FUNC_ALWAYS && 1168 si_order_invariant_stencil_op(state->zpass_op) && 1169 si_order_invariant_stencil_op(state->zfail_op)) || 1170 (state->func == PIPE_FUNC_NEVER && 1171 si_order_invariant_stencil_op(state->fail_op)); 1172 } 1173 1174 static void *si_create_dsa_state(struct pipe_context *ctx, 1175 const struct pipe_depth_stencil_alpha_state *state) 1176 { 1177 struct si_context *sctx = (struct si_context *)ctx; 1178 struct si_state_dsa *dsa = CALLOC_STRUCT(si_state_dsa); 1179 struct si_pm4_state *pm4 = &dsa->pm4; 1180 unsigned db_depth_control; 1181 uint32_t db_stencil_control = 0; 1182 1183 if (!dsa) { 1184 return NULL; 1185 } 1186 1187 dsa->stencil_ref.valuemask[0] = state->stencil[0].valuemask; 1188 dsa->stencil_ref.valuemask[1] = state->stencil[1].valuemask; 1189 dsa->stencil_ref.writemask[0] = state->stencil[0].writemask; 1190 dsa->stencil_ref.writemask[1] = state->stencil[1].writemask; 1191 1192 db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) | 1193 S_028800_Z_WRITE_ENABLE(state->depth.writemask) | 1194 S_028800_ZFUNC(state->depth.func) | 1195 S_028800_DEPTH_BOUNDS_ENABLE(state->depth.bounds_test); 1196 1197 /* stencil */ 1198 if (state->stencil[0].enabled) { 1199 db_depth_control |= S_028800_STENCIL_ENABLE(1); 1200 db_depth_control |= S_028800_STENCILFUNC(state->stencil[0].func); 1201 db_stencil_control |= S_02842C_STENCILFAIL(si_translate_stencil_op(state->stencil[0].fail_op)); 1202 db_stencil_control |= S_02842C_STENCILZPASS(si_translate_stencil_op(state->stencil[0].zpass_op)); 1203 db_stencil_control |= S_02842C_STENCILZFAIL(si_translate_stencil_op(state->stencil[0].zfail_op)); 1204 1205 if (state->stencil[1].enabled) { 1206 db_depth_control |= S_028800_BACKFACE_ENABLE(1); 1207 db_depth_control |= S_028800_STENCILFUNC_BF(state->stencil[1].func); 1208 db_stencil_control |= S_02842C_STENCILFAIL_BF(si_translate_stencil_op(state->stencil[1].fail_op)); 1209 db_stencil_control |= S_02842C_STENCILZPASS_BF(si_translate_stencil_op(state->stencil[1].zpass_op)); 1210 db_stencil_control |= S_02842C_STENCILZFAIL_BF(si_translate_stencil_op(state->stencil[1].zfail_op)); 1211 } 1212 } 1213 1214 /* alpha */ 1215 if (state->alpha.enabled) { 1216 dsa->alpha_func = state->alpha.func; 1217 1218 si_pm4_set_reg(pm4, R_00B030_SPI_SHADER_USER_DATA_PS_0 + 1219 SI_SGPR_ALPHA_REF * 4, fui(state->alpha.ref_value)); 1220 } else { 1221 dsa->alpha_func = PIPE_FUNC_ALWAYS; 1222 } 1223 1224 si_pm4_set_reg(pm4, R_028800_DB_DEPTH_CONTROL, db_depth_control); 1225 if (state->stencil[0].enabled) 1226 si_pm4_set_reg(pm4, R_02842C_DB_STENCIL_CONTROL, db_stencil_control); 1227 if (state->depth.bounds_test) { 1228 si_pm4_set_reg(pm4, R_028020_DB_DEPTH_BOUNDS_MIN, fui(state->depth.bounds_min)); 1229 si_pm4_set_reg(pm4, R_028024_DB_DEPTH_BOUNDS_MAX, fui(state->depth.bounds_max)); 1230 } 1231 1232 dsa->depth_enabled = state->depth.enabled; 1233 dsa->depth_write_enabled = state->depth.enabled && 1234 state->depth.writemask; 1235 dsa->stencil_enabled = state->stencil[0].enabled; 1236 dsa->stencil_write_enabled = state->stencil[0].enabled && 1237 (si_dsa_writes_stencil(&state->stencil[0]) || 1238 si_dsa_writes_stencil(&state->stencil[1])); 1239 dsa->db_can_write = dsa->depth_write_enabled || 1240 dsa->stencil_write_enabled; 1241 1242 bool zfunc_is_ordered = 1243 state->depth.func == PIPE_FUNC_NEVER || 1244 state->depth.func == PIPE_FUNC_LESS || 1245 state->depth.func == PIPE_FUNC_LEQUAL || 1246 state->depth.func == PIPE_FUNC_GREATER || 1247 state->depth.func == PIPE_FUNC_GEQUAL; 1248 1249 bool nozwrite_and_order_invariant_stencil = 1250 !dsa->db_can_write || 1251 (!dsa->depth_write_enabled && 1252 si_order_invariant_stencil_state(&state->stencil[0]) && 1253 si_order_invariant_stencil_state(&state->stencil[1])); 1254 1255 dsa->order_invariance[1].zs = 1256 nozwrite_and_order_invariant_stencil || 1257 (!dsa->stencil_write_enabled && zfunc_is_ordered); 1258 dsa->order_invariance[0].zs = !dsa->depth_write_enabled || zfunc_is_ordered; 1259 1260 dsa->order_invariance[1].pass_set = 1261 nozwrite_and_order_invariant_stencil || 1262 (!dsa->stencil_write_enabled && 1263 (state->depth.func == PIPE_FUNC_ALWAYS || 1264 state->depth.func == PIPE_FUNC_NEVER)); 1265 dsa->order_invariance[0].pass_set = 1266 !dsa->depth_write_enabled || 1267 (state->depth.func == PIPE_FUNC_ALWAYS || 1268 state->depth.func == PIPE_FUNC_NEVER); 1269 1270 dsa->order_invariance[1].pass_last = 1271 sctx->screen->assume_no_z_fights && 1272 !dsa->stencil_write_enabled && 1273 dsa->depth_write_enabled && zfunc_is_ordered; 1274 dsa->order_invariance[0].pass_last = 1275 sctx->screen->assume_no_z_fights && 1276 dsa->depth_write_enabled && zfunc_is_ordered; 1277 1278 return dsa; 1279 } 1280 1281 static void si_bind_dsa_state(struct pipe_context *ctx, void *state) 1282 { 1283 struct si_context *sctx = (struct si_context *)ctx; 1284 struct si_state_dsa *old_dsa = sctx->queued.named.dsa; 1285 struct si_state_dsa *dsa = state; 1286 1287 if (!state) 1288 return; 1289 1290 si_pm4_bind_state(sctx, dsa, dsa); 1291 1292 if (memcmp(&dsa->stencil_ref, &sctx->stencil_ref.dsa_part, 1293 sizeof(struct si_dsa_stencil_ref_part)) != 0) { 1294 sctx->stencil_ref.dsa_part = dsa->stencil_ref; 1295 si_mark_atom_dirty(sctx, &sctx->stencil_ref.atom); 1296 } 1297 1298 if (!old_dsa || old_dsa->alpha_func != dsa->alpha_func) 1299 sctx->do_update_shaders = true; 1300 1301 if (sctx->screen->dpbb_allowed && 1302 (!old_dsa || 1303 (old_dsa->depth_enabled != dsa->depth_enabled || 1304 old_dsa->stencil_enabled != dsa->stencil_enabled || 1305 old_dsa->db_can_write != dsa->db_can_write))) 1306 si_mark_atom_dirty(sctx, &sctx->dpbb_state); 1307 1308 if (sctx->screen->has_out_of_order_rast && 1309 (!old_dsa || 1310 memcmp(old_dsa->order_invariance, dsa->order_invariance, 1311 sizeof(old_dsa->order_invariance)))) 1312 si_mark_atom_dirty(sctx, &sctx->msaa_config); 1313 } 1314 1315 static void si_delete_dsa_state(struct pipe_context *ctx, void *state) 1316 { 1317 struct si_context *sctx = (struct si_context *)ctx; 1318 si_pm4_delete_state(sctx, dsa, (struct si_state_dsa *)state); 1319 } 1320 1321 static void *si_create_db_flush_dsa(struct si_context *sctx) 1322 { 1323 struct pipe_depth_stencil_alpha_state dsa = {}; 1324 1325 return sctx->b.b.create_depth_stencil_alpha_state(&sctx->b.b, &dsa); 1326 } 1327 1328 /* DB RENDER STATE */ 1329 1330 static void si_set_active_query_state(struct pipe_context *ctx, boolean enable) 1331 { 1332 struct si_context *sctx = (struct si_context*)ctx; 1333 1334 /* Pipeline stat & streamout queries. */ 1335 if (enable) { 1336 sctx->b.flags &= ~SI_CONTEXT_STOP_PIPELINE_STATS; 1337 sctx->b.flags |= SI_CONTEXT_START_PIPELINE_STATS; 1338 } else { 1339 sctx->b.flags &= ~SI_CONTEXT_START_PIPELINE_STATS; 1340 sctx->b.flags |= SI_CONTEXT_STOP_PIPELINE_STATS; 1341 } 1342 1343 /* Occlusion queries. */ 1344 if (sctx->occlusion_queries_disabled != !enable) { 1345 sctx->occlusion_queries_disabled = !enable; 1346 si_mark_atom_dirty(sctx, &sctx->db_render_state); 1347 } 1348 } 1349 1350 static void si_set_occlusion_query_state(struct pipe_context *ctx, 1351 bool old_enable, 1352 bool old_perfect_enable) 1353 { 1354 struct si_context *sctx = (struct si_context*)ctx; 1355 1356 si_mark_atom_dirty(sctx, &sctx->db_render_state); 1357 1358 bool perfect_enable = sctx->b.num_perfect_occlusion_queries != 0; 1359 1360 if (perfect_enable != old_perfect_enable) 1361 si_mark_atom_dirty(sctx, &sctx->msaa_config); 1362 } 1363 1364 static void si_save_qbo_state(struct pipe_context *ctx, struct r600_qbo_state *st) 1365 { 1366 struct si_context *sctx = (struct si_context*)ctx; 1367 1368 st->saved_compute = sctx->cs_shader_state.program; 1369 1370 si_get_pipe_constant_buffer(sctx, PIPE_SHADER_COMPUTE, 0, &st->saved_const0); 1371 si_get_shader_buffers(sctx, PIPE_SHADER_COMPUTE, 0, 3, st->saved_ssbo); 1372 } 1373 1374 static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *state) 1375 { 1376 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 1377 struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; 1378 unsigned db_shader_control; 1379 1380 radeon_set_context_reg_seq(cs, R_028000_DB_RENDER_CONTROL, 2); 1381 1382 /* DB_RENDER_CONTROL */ 1383 if (sctx->dbcb_depth_copy_enabled || 1384 sctx->dbcb_stencil_copy_enabled) { 1385 radeon_emit(cs, 1386 S_028000_DEPTH_COPY(sctx->dbcb_depth_copy_enabled) | 1387 S_028000_STENCIL_COPY(sctx->dbcb_stencil_copy_enabled) | 1388 S_028000_COPY_CENTROID(1) | 1389 S_028000_COPY_SAMPLE(sctx->dbcb_copy_sample)); 1390 } else if (sctx->db_flush_depth_inplace || sctx->db_flush_stencil_inplace) { 1391 radeon_emit(cs, 1392 S_028000_DEPTH_COMPRESS_DISABLE(sctx->db_flush_depth_inplace) | 1393 S_028000_STENCIL_COMPRESS_DISABLE(sctx->db_flush_stencil_inplace)); 1394 } else { 1395 radeon_emit(cs, 1396 S_028000_DEPTH_CLEAR_ENABLE(sctx->db_depth_clear) | 1397 S_028000_STENCIL_CLEAR_ENABLE(sctx->db_stencil_clear)); 1398 } 1399 1400 /* DB_COUNT_CONTROL (occlusion queries) */ 1401 if (sctx->b.num_occlusion_queries > 0 && 1402 !sctx->occlusion_queries_disabled) { 1403 bool perfect = sctx->b.num_perfect_occlusion_queries > 0; 1404 1405 if (sctx->b.chip_class >= CIK) { 1406 radeon_emit(cs, 1407 S_028004_PERFECT_ZPASS_COUNTS(perfect) | 1408 S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples) | 1409 S_028004_ZPASS_ENABLE(1) | 1410 S_028004_SLICE_EVEN_ENABLE(1) | 1411 S_028004_SLICE_ODD_ENABLE(1)); 1412 } else { 1413 radeon_emit(cs, 1414 S_028004_PERFECT_ZPASS_COUNTS(perfect) | 1415 S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples)); 1416 } 1417 } else { 1418 /* Disable occlusion queries. */ 1419 if (sctx->b.chip_class >= CIK) { 1420 radeon_emit(cs, 0); 1421 } else { 1422 radeon_emit(cs, S_028004_ZPASS_INCREMENT_DISABLE(1)); 1423 } 1424 } 1425 1426 /* DB_RENDER_OVERRIDE2 */ 1427 radeon_set_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2, 1428 S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(sctx->db_depth_disable_expclear) | 1429 S_028010_DISABLE_SMEM_EXPCLEAR_OPTIMIZATION(sctx->db_stencil_disable_expclear) | 1430 S_028010_DECOMPRESS_Z_ON_FLUSH(sctx->framebuffer.nr_samples >= 4)); 1431 1432 db_shader_control = sctx->ps_db_shader_control; 1433 1434 /* Bug workaround for smoothing (overrasterization) on SI. */ 1435 if (sctx->b.chip_class == SI && sctx->smoothing_enabled) { 1436 db_shader_control &= C_02880C_Z_ORDER; 1437 db_shader_control |= S_02880C_Z_ORDER(V_02880C_LATE_Z); 1438 } 1439 1440 /* Disable the gl_SampleMask fragment shader output if MSAA is disabled. */ 1441 if (!rs || !rs->multisample_enable) 1442 db_shader_control &= C_02880C_MASK_EXPORT_ENABLE; 1443 1444 if (sctx->screen->has_rbplus && 1445 !sctx->screen->rbplus_allowed) 1446 db_shader_control |= S_02880C_DUAL_QUAD_DISABLE(1); 1447 1448 radeon_set_context_reg(cs, R_02880C_DB_SHADER_CONTROL, 1449 db_shader_control); 1450 } 1451 1452 /* 1453 * format translation 1454 */ 1455 static uint32_t si_translate_colorformat(enum pipe_format format) 1456 { 1457 const struct util_format_description *desc = util_format_description(format); 1458 if (!desc) 1459 return V_028C70_COLOR_INVALID; 1460 1461 #define HAS_SIZE(x,y,z,w) \ 1462 (desc->channel[0].size == (x) && desc->channel[1].size == (y) && \ 1463 desc->channel[2].size == (z) && desc->channel[3].size == (w)) 1464 1465 if (format == PIPE_FORMAT_R11G11B10_FLOAT) /* isn't plain */ 1466 return V_028C70_COLOR_10_11_11; 1467 1468 if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) 1469 return V_028C70_COLOR_INVALID; 1470 1471 /* hw cannot support mixed formats (except depth/stencil, since 1472 * stencil is not written to). */ 1473 if (desc->is_mixed && desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) 1474 return V_028C70_COLOR_INVALID; 1475 1476 switch (desc->nr_channels) { 1477 case 1: 1478 switch (desc->channel[0].size) { 1479 case 8: 1480 return V_028C70_COLOR_8; 1481 case 16: 1482 return V_028C70_COLOR_16; 1483 case 32: 1484 return V_028C70_COLOR_32; 1485 } 1486 break; 1487 case 2: 1488 if (desc->channel[0].size == desc->channel[1].size) { 1489 switch (desc->channel[0].size) { 1490 case 8: 1491 return V_028C70_COLOR_8_8; 1492 case 16: 1493 return V_028C70_COLOR_16_16; 1494 case 32: 1495 return V_028C70_COLOR_32_32; 1496 } 1497 } else if (HAS_SIZE(8,24,0,0)) { 1498 return V_028C70_COLOR_24_8; 1499 } else if (HAS_SIZE(24,8,0,0)) { 1500 return V_028C70_COLOR_8_24; 1501 } 1502 break; 1503 case 3: 1504 if (HAS_SIZE(5,6,5,0)) { 1505 return V_028C70_COLOR_5_6_5; 1506 } else if (HAS_SIZE(32,8,24,0)) { 1507 return V_028C70_COLOR_X24_8_32_FLOAT; 1508 } 1509 break; 1510 case 4: 1511 if (desc->channel[0].size == desc->channel[1].size && 1512 desc->channel[0].size == desc->channel[2].size && 1513 desc->channel[0].size == desc->channel[3].size) { 1514 switch (desc->channel[0].size) { 1515 case 4: 1516 return V_028C70_COLOR_4_4_4_4; 1517 case 8: 1518 return V_028C70_COLOR_8_8_8_8; 1519 case 16: 1520 return V_028C70_COLOR_16_16_16_16; 1521 case 32: 1522 return V_028C70_COLOR_32_32_32_32; 1523 } 1524 } else if (HAS_SIZE(5,5,5,1)) { 1525 return V_028C70_COLOR_1_5_5_5; 1526 } else if (HAS_SIZE(1,5,5,5)) { 1527 return V_028C70_COLOR_5_5_5_1; 1528 } else if (HAS_SIZE(10,10,10,2)) { 1529 return V_028C70_COLOR_2_10_10_10; 1530 } 1531 break; 1532 } 1533 return V_028C70_COLOR_INVALID; 1534 } 1535 1536 static uint32_t si_colorformat_endian_swap(uint32_t colorformat) 1537 { 1538 if (SI_BIG_ENDIAN) { 1539 switch(colorformat) { 1540 /* 8-bit buffers. */ 1541 case V_028C70_COLOR_8: 1542 return V_028C70_ENDIAN_NONE; 1543 1544 /* 16-bit buffers. */ 1545 case V_028C70_COLOR_5_6_5: 1546 case V_028C70_COLOR_1_5_5_5: 1547 case V_028C70_COLOR_4_4_4_4: 1548 case V_028C70_COLOR_16: 1549 case V_028C70_COLOR_8_8: 1550 return V_028C70_ENDIAN_8IN16; 1551 1552 /* 32-bit buffers. */ 1553 case V_028C70_COLOR_8_8_8_8: 1554 case V_028C70_COLOR_2_10_10_10: 1555 case V_028C70_COLOR_8_24: 1556 case V_028C70_COLOR_24_8: 1557 case V_028C70_COLOR_16_16: 1558 return V_028C70_ENDIAN_8IN32; 1559 1560 /* 64-bit buffers. */ 1561 case V_028C70_COLOR_16_16_16_16: 1562 return V_028C70_ENDIAN_8IN16; 1563 1564 case V_028C70_COLOR_32_32: 1565 return V_028C70_ENDIAN_8IN32; 1566 1567 /* 128-bit buffers. */ 1568 case V_028C70_COLOR_32_32_32_32: 1569 return V_028C70_ENDIAN_8IN32; 1570 default: 1571 return V_028C70_ENDIAN_NONE; /* Unsupported. */ 1572 } 1573 } else { 1574 return V_028C70_ENDIAN_NONE; 1575 } 1576 } 1577 1578 static uint32_t si_translate_dbformat(enum pipe_format format) 1579 { 1580 switch (format) { 1581 case PIPE_FORMAT_Z16_UNORM: 1582 return V_028040_Z_16; 1583 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 1584 case PIPE_FORMAT_X8Z24_UNORM: 1585 case PIPE_FORMAT_Z24X8_UNORM: 1586 case PIPE_FORMAT_Z24_UNORM_S8_UINT: 1587 return V_028040_Z_24; /* deprecated on SI */ 1588 case PIPE_FORMAT_Z32_FLOAT: 1589 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 1590 return V_028040_Z_32_FLOAT; 1591 default: 1592 return V_028040_Z_INVALID; 1593 } 1594 } 1595 1596 /* 1597 * Texture translation 1598 */ 1599 1600 static uint32_t si_translate_texformat(struct pipe_screen *screen, 1601 enum pipe_format format, 1602 const struct util_format_description *desc, 1603 int first_non_void) 1604 { 1605 struct si_screen *sscreen = (struct si_screen*)screen; 1606 bool enable_compressed_formats = (sscreen->info.drm_major == 2 && 1607 sscreen->info.drm_minor >= 31) || 1608 sscreen->info.drm_major == 3; 1609 bool uniform = true; 1610 int i; 1611 1612 /* Colorspace (return non-RGB formats directly). */ 1613 switch (desc->colorspace) { 1614 /* Depth stencil formats */ 1615 case UTIL_FORMAT_COLORSPACE_ZS: 1616 switch (format) { 1617 case PIPE_FORMAT_Z16_UNORM: 1618 return V_008F14_IMG_DATA_FORMAT_16; 1619 case PIPE_FORMAT_X24S8_UINT: 1620 case PIPE_FORMAT_S8X24_UINT: 1621 /* 1622 * Implemented as an 8_8_8_8 data format to fix texture 1623 * gathers in stencil sampling. This affects at least 1624 * GL45-CTS.texture_cube_map_array.sampling on VI. 1625 */ 1626 return V_008F14_IMG_DATA_FORMAT_8_8_8_8; 1627 case PIPE_FORMAT_Z24X8_UNORM: 1628 case PIPE_FORMAT_Z24_UNORM_S8_UINT: 1629 return V_008F14_IMG_DATA_FORMAT_8_24; 1630 case PIPE_FORMAT_X8Z24_UNORM: 1631 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 1632 return V_008F14_IMG_DATA_FORMAT_24_8; 1633 case PIPE_FORMAT_S8_UINT: 1634 return V_008F14_IMG_DATA_FORMAT_8; 1635 case PIPE_FORMAT_Z32_FLOAT: 1636 return V_008F14_IMG_DATA_FORMAT_32; 1637 case PIPE_FORMAT_X32_S8X24_UINT: 1638 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 1639 return V_008F14_IMG_DATA_FORMAT_X24_8_32; 1640 default: 1641 goto out_unknown; 1642 } 1643 1644 case UTIL_FORMAT_COLORSPACE_YUV: 1645 goto out_unknown; /* TODO */ 1646 1647 case UTIL_FORMAT_COLORSPACE_SRGB: 1648 if (desc->nr_channels != 4 && desc->nr_channels != 1) 1649 goto out_unknown; 1650 break; 1651 1652 default: 1653 break; 1654 } 1655 1656 if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) { 1657 if (!enable_compressed_formats) 1658 goto out_unknown; 1659 1660 switch (format) { 1661 case PIPE_FORMAT_RGTC1_SNORM: 1662 case PIPE_FORMAT_LATC1_SNORM: 1663 case PIPE_FORMAT_RGTC1_UNORM: 1664 case PIPE_FORMAT_LATC1_UNORM: 1665 return V_008F14_IMG_DATA_FORMAT_BC4; 1666 case PIPE_FORMAT_RGTC2_SNORM: 1667 case PIPE_FORMAT_LATC2_SNORM: 1668 case PIPE_FORMAT_RGTC2_UNORM: 1669 case PIPE_FORMAT_LATC2_UNORM: 1670 return V_008F14_IMG_DATA_FORMAT_BC5; 1671 default: 1672 goto out_unknown; 1673 } 1674 } 1675 1676 if (desc->layout == UTIL_FORMAT_LAYOUT_ETC && 1677 (sscreen->info.family == CHIP_STONEY || 1678 sscreen->info.chip_class >= GFX9)) { 1679 switch (format) { 1680 case PIPE_FORMAT_ETC1_RGB8: 1681 case PIPE_FORMAT_ETC2_RGB8: 1682 case PIPE_FORMAT_ETC2_SRGB8: 1683 return V_008F14_IMG_DATA_FORMAT_ETC2_RGB; 1684 case PIPE_FORMAT_ETC2_RGB8A1: 1685 case PIPE_FORMAT_ETC2_SRGB8A1: 1686 return V_008F14_IMG_DATA_FORMAT_ETC2_RGBA1; 1687 case PIPE_FORMAT_ETC2_RGBA8: 1688 case PIPE_FORMAT_ETC2_SRGBA8: 1689 return V_008F14_IMG_DATA_FORMAT_ETC2_RGBA; 1690 case PIPE_FORMAT_ETC2_R11_UNORM: 1691 case PIPE_FORMAT_ETC2_R11_SNORM: 1692 return V_008F14_IMG_DATA_FORMAT_ETC2_R; 1693 case PIPE_FORMAT_ETC2_RG11_UNORM: 1694 case PIPE_FORMAT_ETC2_RG11_SNORM: 1695 return V_008F14_IMG_DATA_FORMAT_ETC2_RG; 1696 default: 1697 goto out_unknown; 1698 } 1699 } 1700 1701 if (desc->layout == UTIL_FORMAT_LAYOUT_BPTC) { 1702 if (!enable_compressed_formats) 1703 goto out_unknown; 1704 1705 switch (format) { 1706 case PIPE_FORMAT_BPTC_RGBA_UNORM: 1707 case PIPE_FORMAT_BPTC_SRGBA: 1708 return V_008F14_IMG_DATA_FORMAT_BC7; 1709 case PIPE_FORMAT_BPTC_RGB_FLOAT: 1710 case PIPE_FORMAT_BPTC_RGB_UFLOAT: 1711 return V_008F14_IMG_DATA_FORMAT_BC6; 1712 default: 1713 goto out_unknown; 1714 } 1715 } 1716 1717 if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) { 1718 switch (format) { 1719 case PIPE_FORMAT_R8G8_B8G8_UNORM: 1720 case PIPE_FORMAT_G8R8_B8R8_UNORM: 1721 return V_008F14_IMG_DATA_FORMAT_GB_GR; 1722 case PIPE_FORMAT_G8R8_G8B8_UNORM: 1723 case PIPE_FORMAT_R8G8_R8B8_UNORM: 1724 return V_008F14_IMG_DATA_FORMAT_BG_RG; 1725 default: 1726 goto out_unknown; 1727 } 1728 } 1729 1730 if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { 1731 if (!enable_compressed_formats) 1732 goto out_unknown; 1733 1734 switch (format) { 1735 case PIPE_FORMAT_DXT1_RGB: 1736 case PIPE_FORMAT_DXT1_RGBA: 1737 case PIPE_FORMAT_DXT1_SRGB: 1738 case PIPE_FORMAT_DXT1_SRGBA: 1739 return V_008F14_IMG_DATA_FORMAT_BC1; 1740 case PIPE_FORMAT_DXT3_RGBA: 1741 case PIPE_FORMAT_DXT3_SRGBA: 1742 return V_008F14_IMG_DATA_FORMAT_BC2; 1743 case PIPE_FORMAT_DXT5_RGBA: 1744 case PIPE_FORMAT_DXT5_SRGBA: 1745 return V_008F14_IMG_DATA_FORMAT_BC3; 1746 default: 1747 goto out_unknown; 1748 } 1749 } 1750 1751 if (format == PIPE_FORMAT_R9G9B9E5_FLOAT) { 1752 return V_008F14_IMG_DATA_FORMAT_5_9_9_9; 1753 } else if (format == PIPE_FORMAT_R11G11B10_FLOAT) { 1754 return V_008F14_IMG_DATA_FORMAT_10_11_11; 1755 } 1756 1757 /* R8G8Bx_SNORM - TODO CxV8U8 */ 1758 1759 /* hw cannot support mixed formats (except depth/stencil, since only 1760 * depth is read).*/ 1761 if (desc->is_mixed && desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) 1762 goto out_unknown; 1763 1764 /* See whether the components are of the same size. */ 1765 for (i = 1; i < desc->nr_channels; i++) { 1766 uniform = uniform && desc->channel[0].size == desc->channel[i].size; 1767 } 1768 1769 /* Non-uniform formats. */ 1770 if (!uniform) { 1771 switch(desc->nr_channels) { 1772 case 3: 1773 if (desc->channel[0].size == 5 && 1774 desc->channel[1].size == 6 && 1775 desc->channel[2].size == 5) { 1776 return V_008F14_IMG_DATA_FORMAT_5_6_5; 1777 } 1778 goto out_unknown; 1779 case 4: 1780 if (desc->channel[0].size == 5 && 1781 desc->channel[1].size == 5 && 1782 desc->channel[2].size == 5 && 1783 desc->channel[3].size == 1) { 1784 return V_008F14_IMG_DATA_FORMAT_1_5_5_5; 1785 } 1786 if (desc->channel[0].size == 1 && 1787 desc->channel[1].size == 5 && 1788 desc->channel[2].size == 5 && 1789 desc->channel[3].size == 5) { 1790 return V_008F14_IMG_DATA_FORMAT_5_5_5_1; 1791 } 1792 if (desc->channel[0].size == 10 && 1793 desc->channel[1].size == 10 && 1794 desc->channel[2].size == 10 && 1795 desc->channel[3].size == 2) { 1796 return V_008F14_IMG_DATA_FORMAT_2_10_10_10; 1797 } 1798 goto out_unknown; 1799 } 1800 goto out_unknown; 1801 } 1802 1803 if (first_non_void < 0 || first_non_void > 3) 1804 goto out_unknown; 1805 1806 /* uniform formats */ 1807 switch (desc->channel[first_non_void].size) { 1808 case 4: 1809 switch (desc->nr_channels) { 1810 #if 0 /* Not supported for render targets */ 1811 case 2: 1812 return V_008F14_IMG_DATA_FORMAT_4_4; 1813 #endif 1814 case 4: 1815 return V_008F14_IMG_DATA_FORMAT_4_4_4_4; 1816 } 1817 break; 1818 case 8: 1819 switch (desc->nr_channels) { 1820 case 1: 1821 return V_008F14_IMG_DATA_FORMAT_8; 1822 case 2: 1823 return V_008F14_IMG_DATA_FORMAT_8_8; 1824 case 4: 1825 return V_008F14_IMG_DATA_FORMAT_8_8_8_8; 1826 } 1827 break; 1828 case 16: 1829 switch (desc->nr_channels) { 1830 case 1: 1831 return V_008F14_IMG_DATA_FORMAT_16; 1832 case 2: 1833 return V_008F14_IMG_DATA_FORMAT_16_16; 1834 case 4: 1835 return V_008F14_IMG_DATA_FORMAT_16_16_16_16; 1836 } 1837 break; 1838 case 32: 1839 switch (desc->nr_channels) { 1840 case 1: 1841 return V_008F14_IMG_DATA_FORMAT_32; 1842 case 2: 1843 return V_008F14_IMG_DATA_FORMAT_32_32; 1844 #if 0 /* Not supported for render targets */ 1845 case 3: 1846 return V_008F14_IMG_DATA_FORMAT_32_32_32; 1847 #endif 1848 case 4: 1849 return V_008F14_IMG_DATA_FORMAT_32_32_32_32; 1850 } 1851 } 1852 1853 out_unknown: 1854 /* R600_ERR("Unable to handle texformat %d %s\n", format, util_format_name(format)); */ 1855 return ~0; 1856 } 1857 1858 static unsigned si_tex_wrap(unsigned wrap) 1859 { 1860 switch (wrap) { 1861 default: 1862 case PIPE_TEX_WRAP_REPEAT: 1863 return V_008F30_SQ_TEX_WRAP; 1864 case PIPE_TEX_WRAP_CLAMP: 1865 return V_008F30_SQ_TEX_CLAMP_HALF_BORDER; 1866 case PIPE_TEX_WRAP_CLAMP_TO_EDGE: 1867 return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL; 1868 case PIPE_TEX_WRAP_CLAMP_TO_BORDER: 1869 return V_008F30_SQ_TEX_CLAMP_BORDER; 1870 case PIPE_TEX_WRAP_MIRROR_REPEAT: 1871 return V_008F30_SQ_TEX_MIRROR; 1872 case PIPE_TEX_WRAP_MIRROR_CLAMP: 1873 return V_008F30_SQ_TEX_MIRROR_ONCE_HALF_BORDER; 1874 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: 1875 return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL; 1876 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: 1877 return V_008F30_SQ_TEX_MIRROR_ONCE_BORDER; 1878 } 1879 } 1880 1881 static unsigned si_tex_mipfilter(unsigned filter) 1882 { 1883 switch (filter) { 1884 case PIPE_TEX_MIPFILTER_NEAREST: 1885 return V_008F38_SQ_TEX_Z_FILTER_POINT; 1886 case PIPE_TEX_MIPFILTER_LINEAR: 1887 return V_008F38_SQ_TEX_Z_FILTER_LINEAR; 1888 default: 1889 case PIPE_TEX_MIPFILTER_NONE: 1890 return V_008F38_SQ_TEX_Z_FILTER_NONE; 1891 } 1892 } 1893 1894 static unsigned si_tex_compare(unsigned compare) 1895 { 1896 switch (compare) { 1897 default: 1898 case PIPE_FUNC_NEVER: 1899 return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER; 1900 case PIPE_FUNC_LESS: 1901 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS; 1902 case PIPE_FUNC_EQUAL: 1903 return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL; 1904 case PIPE_FUNC_LEQUAL: 1905 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL; 1906 case PIPE_FUNC_GREATER: 1907 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER; 1908 case PIPE_FUNC_NOTEQUAL: 1909 return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL; 1910 case PIPE_FUNC_GEQUAL: 1911 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL; 1912 case PIPE_FUNC_ALWAYS: 1913 return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS; 1914 } 1915 } 1916 1917 static unsigned si_tex_dim(struct si_screen *sscreen, struct r600_texture *rtex, 1918 unsigned view_target, unsigned nr_samples) 1919 { 1920 unsigned res_target = rtex->resource.b.b.target; 1921 1922 if (view_target == PIPE_TEXTURE_CUBE || 1923 view_target == PIPE_TEXTURE_CUBE_ARRAY) 1924 res_target = view_target; 1925 /* If interpreting cubemaps as something else, set 2D_ARRAY. */ 1926 else if (res_target == PIPE_TEXTURE_CUBE || 1927 res_target == PIPE_TEXTURE_CUBE_ARRAY) 1928 res_target = PIPE_TEXTURE_2D_ARRAY; 1929 1930 /* GFX9 allocates 1D textures as 2D. */ 1931 if ((res_target == PIPE_TEXTURE_1D || 1932 res_target == PIPE_TEXTURE_1D_ARRAY) && 1933 sscreen->info.chip_class >= GFX9 && 1934 rtex->surface.u.gfx9.resource_type == RADEON_RESOURCE_2D) { 1935 if (res_target == PIPE_TEXTURE_1D) 1936 res_target = PIPE_TEXTURE_2D; 1937 else 1938 res_target = PIPE_TEXTURE_2D_ARRAY; 1939 } 1940 1941 switch (res_target) { 1942 default: 1943 case PIPE_TEXTURE_1D: 1944 return V_008F1C_SQ_RSRC_IMG_1D; 1945 case PIPE_TEXTURE_1D_ARRAY: 1946 return V_008F1C_SQ_RSRC_IMG_1D_ARRAY; 1947 case PIPE_TEXTURE_2D: 1948 case PIPE_TEXTURE_RECT: 1949 return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA : 1950 V_008F1C_SQ_RSRC_IMG_2D; 1951 case PIPE_TEXTURE_2D_ARRAY: 1952 return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY : 1953 V_008F1C_SQ_RSRC_IMG_2D_ARRAY; 1954 case PIPE_TEXTURE_3D: 1955 return V_008F1C_SQ_RSRC_IMG_3D; 1956 case PIPE_TEXTURE_CUBE: 1957 case PIPE_TEXTURE_CUBE_ARRAY: 1958 return V_008F1C_SQ_RSRC_IMG_CUBE; 1959 } 1960 } 1961 1962 /* 1963 * Format support testing 1964 */ 1965 1966 static bool si_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format) 1967 { 1968 const struct util_format_description *desc = util_format_description(format); 1969 if (!desc) 1970 return false; 1971 1972 return si_translate_texformat(screen, format, desc, 1973 util_format_get_first_non_void_channel(format)) != ~0U; 1974 } 1975 1976 static uint32_t si_translate_buffer_dataformat(struct pipe_screen *screen, 1977 const struct util_format_description *desc, 1978 int first_non_void) 1979 { 1980 int i; 1981 1982 if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT) 1983 return V_008F0C_BUF_DATA_FORMAT_10_11_11; 1984 1985 assert(first_non_void >= 0); 1986 1987 if (desc->nr_channels == 4 && 1988 desc->channel[0].size == 10 && 1989 desc->channel[1].size == 10 && 1990 desc->channel[2].size == 10 && 1991 desc->channel[3].size == 2) 1992 return V_008F0C_BUF_DATA_FORMAT_2_10_10_10; 1993 1994 /* See whether the components are of the same size. */ 1995 for (i = 0; i < desc->nr_channels; i++) { 1996 if (desc->channel[first_non_void].size != desc->channel[i].size) 1997 return V_008F0C_BUF_DATA_FORMAT_INVALID; 1998 } 1999 2000 switch (desc->channel[first_non_void].size) { 2001 case 8: 2002 switch (desc->nr_channels) { 2003 case 1: 2004 case 3: /* 3 loads */ 2005 return V_008F0C_BUF_DATA_FORMAT_8; 2006 case 2: 2007 return V_008F0C_BUF_DATA_FORMAT_8_8; 2008 case 4: 2009 return V_008F0C_BUF_DATA_FORMAT_8_8_8_8; 2010 } 2011 break; 2012 case 16: 2013 switch (desc->nr_channels) { 2014 case 1: 2015 case 3: /* 3 loads */ 2016 return V_008F0C_BUF_DATA_FORMAT_16; 2017 case 2: 2018 return V_008F0C_BUF_DATA_FORMAT_16_16; 2019 case 4: 2020 return V_008F0C_BUF_DATA_FORMAT_16_16_16_16; 2021 } 2022 break; 2023 case 32: 2024 switch (desc->nr_channels) { 2025 case 1: 2026 return V_008F0C_BUF_DATA_FORMAT_32; 2027 case 2: 2028 return V_008F0C_BUF_DATA_FORMAT_32_32; 2029 case 3: 2030 return V_008F0C_BUF_DATA_FORMAT_32_32_32; 2031 case 4: 2032 return V_008F0C_BUF_DATA_FORMAT_32_32_32_32; 2033 } 2034 break; 2035 case 64: 2036 /* Legacy double formats. */ 2037 switch (desc->nr_channels) { 2038 case 1: /* 1 load */ 2039 return V_008F0C_BUF_DATA_FORMAT_32_32; 2040 case 2: /* 1 load */ 2041 return V_008F0C_BUF_DATA_FORMAT_32_32_32_32; 2042 case 3: /* 3 loads */ 2043 return V_008F0C_BUF_DATA_FORMAT_32_32; 2044 case 4: /* 2 loads */ 2045 return V_008F0C_BUF_DATA_FORMAT_32_32_32_32; 2046 } 2047 break; 2048 } 2049 2050 return V_008F0C_BUF_DATA_FORMAT_INVALID; 2051 } 2052 2053 static uint32_t si_translate_buffer_numformat(struct pipe_screen *screen, 2054 const struct util_format_description *desc, 2055 int first_non_void) 2056 { 2057 if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT) 2058 return V_008F0C_BUF_NUM_FORMAT_FLOAT; 2059 2060 assert(first_non_void >= 0); 2061 2062 switch (desc->channel[first_non_void].type) { 2063 case UTIL_FORMAT_TYPE_SIGNED: 2064 case UTIL_FORMAT_TYPE_FIXED: 2065 if (desc->channel[first_non_void].size >= 32 || 2066 desc->channel[first_non_void].pure_integer) 2067 return V_008F0C_BUF_NUM_FORMAT_SINT; 2068 else if (desc->channel[first_non_void].normalized) 2069 return V_008F0C_BUF_NUM_FORMAT_SNORM; 2070 else 2071 return V_008F0C_BUF_NUM_FORMAT_SSCALED; 2072 break; 2073 case UTIL_FORMAT_TYPE_UNSIGNED: 2074 if (desc->channel[first_non_void].size >= 32 || 2075 desc->channel[first_non_void].pure_integer) 2076 return V_008F0C_BUF_NUM_FORMAT_UINT; 2077 else if (desc->channel[first_non_void].normalized) 2078 return V_008F0C_BUF_NUM_FORMAT_UNORM; 2079 else 2080 return V_008F0C_BUF_NUM_FORMAT_USCALED; 2081 break; 2082 case UTIL_FORMAT_TYPE_FLOAT: 2083 default: 2084 return V_008F0C_BUF_NUM_FORMAT_FLOAT; 2085 } 2086 } 2087 2088 static unsigned si_is_vertex_format_supported(struct pipe_screen *screen, 2089 enum pipe_format format, 2090 unsigned usage) 2091 { 2092 const struct util_format_description *desc; 2093 int first_non_void; 2094 unsigned data_format; 2095 2096 assert((usage & ~(PIPE_BIND_SHADER_IMAGE | 2097 PIPE_BIND_SAMPLER_VIEW | 2098 PIPE_BIND_VERTEX_BUFFER)) == 0); 2099 2100 desc = util_format_description(format); 2101 if (!desc) 2102 return 0; 2103 2104 /* There are no native 8_8_8 or 16_16_16 data formats, and we currently 2105 * select 8_8_8_8 and 16_16_16_16 instead. This works reasonably well 2106 * for read-only access (with caveats surrounding bounds checks), but 2107 * obviously fails for write access which we have to implement for 2108 * shader images. Luckily, OpenGL doesn't expect this to be supported 2109 * anyway, and so the only impact is on PBO uploads / downloads, which 2110 * shouldn't be expected to be fast for GL_RGB anyway. 2111 */ 2112 if (desc->block.bits == 3 * 8 || 2113 desc->block.bits == 3 * 16) { 2114 if (usage & (PIPE_BIND_SHADER_IMAGE | PIPE_BIND_SAMPLER_VIEW)) { 2115 usage &= ~(PIPE_BIND_SHADER_IMAGE | PIPE_BIND_SAMPLER_VIEW); 2116 if (!usage) 2117 return 0; 2118 } 2119 } 2120 2121 first_non_void = util_format_get_first_non_void_channel(format); 2122 data_format = si_translate_buffer_dataformat(screen, desc, first_non_void); 2123 if (data_format == V_008F0C_BUF_DATA_FORMAT_INVALID) 2124 return 0; 2125 2126 return usage; 2127 } 2128 2129 static bool si_is_colorbuffer_format_supported(enum pipe_format format) 2130 { 2131 return si_translate_colorformat(format) != V_028C70_COLOR_INVALID && 2132 si_translate_colorswap(format, false) != ~0U; 2133 } 2134 2135 static bool si_is_zs_format_supported(enum pipe_format format) 2136 { 2137 return si_translate_dbformat(format) != V_028040_Z_INVALID; 2138 } 2139 2140 static boolean si_is_format_supported(struct pipe_screen *screen, 2141 enum pipe_format format, 2142 enum pipe_texture_target target, 2143 unsigned sample_count, 2144 unsigned usage) 2145 { 2146 unsigned retval = 0; 2147 2148 if (target >= PIPE_MAX_TEXTURE_TYPES) { 2149 R600_ERR("r600: unsupported texture type %d\n", target); 2150 return false; 2151 } 2152 2153 if (!util_format_is_supported(format, usage)) 2154 return false; 2155 2156 if (sample_count > 1) { 2157 if (!screen->get_param(screen, PIPE_CAP_TEXTURE_MULTISAMPLE)) 2158 return false; 2159 2160 if (usage & PIPE_BIND_SHADER_IMAGE) 2161 return false; 2162 2163 switch (sample_count) { 2164 case 2: 2165 case 4: 2166 case 8: 2167 break; 2168 case 16: 2169 if (format == PIPE_FORMAT_NONE) 2170 return true; 2171 else 2172 return false; 2173 default: 2174 return false; 2175 } 2176 } 2177 2178 if (usage & (PIPE_BIND_SAMPLER_VIEW | 2179 PIPE_BIND_SHADER_IMAGE)) { 2180 if (target == PIPE_BUFFER) { 2181 retval |= si_is_vertex_format_supported( 2182 screen, format, usage & (PIPE_BIND_SAMPLER_VIEW | 2183 PIPE_BIND_SHADER_IMAGE)); 2184 } else { 2185 if (si_is_sampler_format_supported(screen, format)) 2186 retval |= usage & (PIPE_BIND_SAMPLER_VIEW | 2187 PIPE_BIND_SHADER_IMAGE); 2188 } 2189 } 2190 2191 if ((usage & (PIPE_BIND_RENDER_TARGET | 2192 PIPE_BIND_DISPLAY_TARGET | 2193 PIPE_BIND_SCANOUT | 2194 PIPE_BIND_SHARED | 2195 PIPE_BIND_BLENDABLE)) && 2196 si_is_colorbuffer_format_supported(format)) { 2197 retval |= usage & 2198 (PIPE_BIND_RENDER_TARGET | 2199 PIPE_BIND_DISPLAY_TARGET | 2200 PIPE_BIND_SCANOUT | 2201 PIPE_BIND_SHARED); 2202 if (!util_format_is_pure_integer(format) && 2203 !util_format_is_depth_or_stencil(format)) 2204 retval |= usage & PIPE_BIND_BLENDABLE; 2205 } 2206 2207 if ((usage & PIPE_BIND_DEPTH_STENCIL) && 2208 si_is_zs_format_supported(format)) { 2209 retval |= PIPE_BIND_DEPTH_STENCIL; 2210 } 2211 2212 if (usage & PIPE_BIND_VERTEX_BUFFER) { 2213 retval |= si_is_vertex_format_supported(screen, format, 2214 PIPE_BIND_VERTEX_BUFFER); 2215 } 2216 2217 if ((usage & PIPE_BIND_LINEAR) && 2218 !util_format_is_compressed(format) && 2219 !(usage & PIPE_BIND_DEPTH_STENCIL)) 2220 retval |= PIPE_BIND_LINEAR; 2221 2222 return retval == usage; 2223 } 2224 2225 /* 2226 * framebuffer handling 2227 */ 2228 2229 static void si_choose_spi_color_formats(struct r600_surface *surf, 2230 unsigned format, unsigned swap, 2231 unsigned ntype, bool is_depth) 2232 { 2233 /* Alpha is needed for alpha-to-coverage. 2234 * Blending may be with or without alpha. 2235 */ 2236 unsigned normal = 0; /* most optimal, may not support blending or export alpha */ 2237 unsigned alpha = 0; /* exports alpha, but may not support blending */ 2238 unsigned blend = 0; /* supports blending, but may not export alpha */ 2239 unsigned blend_alpha = 0; /* least optimal, supports blending and exports alpha */ 2240 2241 /* Choose the SPI color formats. These are required values for RB+. 2242 * Other chips have multiple choices, though they are not necessarily better. 2243 */ 2244 switch (format) { 2245 case V_028C70_COLOR_5_6_5: 2246 case V_028C70_COLOR_1_5_5_5: 2247 case V_028C70_COLOR_5_5_5_1: 2248 case V_028C70_COLOR_4_4_4_4: 2249 case V_028C70_COLOR_10_11_11: 2250 case V_028C70_COLOR_11_11_10: 2251 case V_028C70_COLOR_8: 2252 case V_028C70_COLOR_8_8: 2253 case V_028C70_COLOR_8_8_8_8: 2254 case V_028C70_COLOR_10_10_10_2: 2255 case V_028C70_COLOR_2_10_10_10: 2256 if (ntype == V_028C70_NUMBER_UINT) 2257 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_UINT16_ABGR; 2258 else if (ntype == V_028C70_NUMBER_SINT) 2259 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_SINT16_ABGR; 2260 else 2261 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_FP16_ABGR; 2262 break; 2263 2264 case V_028C70_COLOR_16: 2265 case V_028C70_COLOR_16_16: 2266 case V_028C70_COLOR_16_16_16_16: 2267 if (ntype == V_028C70_NUMBER_UNORM || 2268 ntype == V_028C70_NUMBER_SNORM) { 2269 /* UNORM16 and SNORM16 don't support blending */ 2270 if (ntype == V_028C70_NUMBER_UNORM) 2271 normal = alpha = V_028714_SPI_SHADER_UNORM16_ABGR; 2272 else 2273 normal = alpha = V_028714_SPI_SHADER_SNORM16_ABGR; 2274 2275 /* Use 32 bits per channel for blending. */ 2276 if (format == V_028C70_COLOR_16) { 2277 if (swap == V_028C70_SWAP_STD) { /* R */ 2278 blend = V_028714_SPI_SHADER_32_R; 2279 blend_alpha = V_028714_SPI_SHADER_32_AR; 2280 } else if (swap == V_028C70_SWAP_ALT_REV) /* A */ 2281 blend = blend_alpha = V_028714_SPI_SHADER_32_AR; 2282 else 2283 assert(0); 2284 } else if (format == V_028C70_COLOR_16_16) { 2285 if (swap == V_028C70_SWAP_STD) { /* RG */ 2286 blend = V_028714_SPI_SHADER_32_GR; 2287 blend_alpha = V_028714_SPI_SHADER_32_ABGR; 2288 } else if (swap == V_028C70_SWAP_ALT) /* RA */ 2289 blend = blend_alpha = V_028714_SPI_SHADER_32_AR; 2290 else 2291 assert(0); 2292 } else /* 16_16_16_16 */ 2293 blend = blend_alpha = V_028714_SPI_SHADER_32_ABGR; 2294 } else if (ntype == V_028C70_NUMBER_UINT) 2295 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_UINT16_ABGR; 2296 else if (ntype == V_028C70_NUMBER_SINT) 2297 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_SINT16_ABGR; 2298 else if (ntype == V_028C70_NUMBER_FLOAT) 2299 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_FP16_ABGR; 2300 else 2301 assert(0); 2302 break; 2303 2304 case V_028C70_COLOR_32: 2305 if (swap == V_028C70_SWAP_STD) { /* R */ 2306 blend = normal = V_028714_SPI_SHADER_32_R; 2307 alpha = blend_alpha = V_028714_SPI_SHADER_32_AR; 2308 } else if (swap == V_028C70_SWAP_ALT_REV) /* A */ 2309 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_AR; 2310 else 2311 assert(0); 2312 break; 2313 2314 case V_028C70_COLOR_32_32: 2315 if (swap == V_028C70_SWAP_STD) { /* RG */ 2316 blend = normal = V_028714_SPI_SHADER_32_GR; 2317 alpha = blend_alpha = V_028714_SPI_SHADER_32_ABGR; 2318 } else if (swap == V_028C70_SWAP_ALT) /* RA */ 2319 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_AR; 2320 else 2321 assert(0); 2322 break; 2323 2324 case V_028C70_COLOR_32_32_32_32: 2325 case V_028C70_COLOR_8_24: 2326 case V_028C70_COLOR_24_8: 2327 case V_028C70_COLOR_X24_8_32_FLOAT: 2328 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_ABGR; 2329 break; 2330 2331 default: 2332 assert(0); 2333 return; 2334 } 2335 2336 /* The DB->CB copy needs 32_ABGR. */ 2337 if (is_depth) 2338 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_ABGR; 2339 2340 surf->spi_shader_col_format = normal; 2341 surf->spi_shader_col_format_alpha = alpha; 2342 surf->spi_shader_col_format_blend = blend; 2343 surf->spi_shader_col_format_blend_alpha = blend_alpha; 2344 } 2345 2346 static void si_initialize_color_surface(struct si_context *sctx, 2347 struct r600_surface *surf) 2348 { 2349 struct r600_texture *rtex = (struct r600_texture*)surf->base.texture; 2350 unsigned color_info, color_attrib; 2351 unsigned format, swap, ntype, endian; 2352 const struct util_format_description *desc; 2353 int firstchan; 2354 unsigned blend_clamp = 0, blend_bypass = 0; 2355 2356 desc = util_format_description(surf->base.format); 2357 for (firstchan = 0; firstchan < 4; firstchan++) { 2358 if (desc->channel[firstchan].type != UTIL_FORMAT_TYPE_VOID) { 2359 break; 2360 } 2361 } 2362 if (firstchan == 4 || desc->channel[firstchan].type == UTIL_FORMAT_TYPE_FLOAT) { 2363 ntype = V_028C70_NUMBER_FLOAT; 2364 } else { 2365 ntype = V_028C70_NUMBER_UNORM; 2366 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) 2367 ntype = V_028C70_NUMBER_SRGB; 2368 else if (desc->channel[firstchan].type == UTIL_FORMAT_TYPE_SIGNED) { 2369 if (desc->channel[firstchan].pure_integer) { 2370 ntype = V_028C70_NUMBER_SINT; 2371 } else { 2372 assert(desc->channel[firstchan].normalized); 2373 ntype = V_028C70_NUMBER_SNORM; 2374 } 2375 } else if (desc->channel[firstchan].type == UTIL_FORMAT_TYPE_UNSIGNED) { 2376 if (desc->channel[firstchan].pure_integer) { 2377 ntype = V_028C70_NUMBER_UINT; 2378 } else { 2379 assert(desc->channel[firstchan].normalized); 2380 ntype = V_028C70_NUMBER_UNORM; 2381 } 2382 } 2383 } 2384 2385 format = si_translate_colorformat(surf->base.format); 2386 if (format == V_028C70_COLOR_INVALID) { 2387 R600_ERR("Invalid CB format: %d, disabling CB.\n", surf->base.format); 2388 } 2389 assert(format != V_028C70_COLOR_INVALID); 2390 swap = si_translate_colorswap(surf->base.format, false); 2391 endian = si_colorformat_endian_swap(format); 2392 2393 /* blend clamp should be set for all NORM/SRGB types */ 2394 if (ntype == V_028C70_NUMBER_UNORM || 2395 ntype == V_028C70_NUMBER_SNORM || 2396 ntype == V_028C70_NUMBER_SRGB) 2397 blend_clamp = 1; 2398 2399 /* set blend bypass according to docs if SINT/UINT or 2400 8/24 COLOR variants */ 2401 if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT || 2402 format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 || 2403 format == V_028C70_COLOR_X24_8_32_FLOAT) { 2404 blend_clamp = 0; 2405 blend_bypass = 1; 2406 } 2407 2408 if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) { 2409 if (format == V_028C70_COLOR_8 || 2410 format == V_028C70_COLOR_8_8 || 2411 format == V_028C70_COLOR_8_8_8_8) 2412 surf->color_is_int8 = true; 2413 else if (format == V_028C70_COLOR_10_10_10_2 || 2414 format == V_028C70_COLOR_2_10_10_10) 2415 surf->color_is_int10 = true; 2416 } 2417 2418 color_info = S_028C70_FORMAT(format) | 2419 S_028C70_COMP_SWAP(swap) | 2420 S_028C70_BLEND_CLAMP(blend_clamp) | 2421 S_028C70_BLEND_BYPASS(blend_bypass) | 2422 S_028C70_SIMPLE_FLOAT(1) | 2423 S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM && 2424 ntype != V_028C70_NUMBER_SNORM && 2425 ntype != V_028C70_NUMBER_SRGB && 2426 format != V_028C70_COLOR_8_24 && 2427 format != V_028C70_COLOR_24_8) | 2428 S_028C70_NUMBER_TYPE(ntype) | 2429 S_028C70_ENDIAN(endian); 2430 2431 /* Intensity is implemented as Red, so treat it that way. */ 2432 color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == PIPE_SWIZZLE_1 || 2433 util_format_is_intensity(surf->base.format)); 2434 2435 if (rtex->resource.b.b.nr_samples > 1) { 2436 unsigned log_samples = util_logbase2(rtex->resource.b.b.nr_samples); 2437 2438 color_attrib |= S_028C74_NUM_SAMPLES(log_samples) | 2439 S_028C74_NUM_FRAGMENTS(log_samples); 2440 2441 if (rtex->fmask.size) { 2442 color_info |= S_028C70_COMPRESSION(1); 2443 unsigned fmask_bankh = util_logbase2(rtex->fmask.bank_height); 2444 2445 if (sctx->b.chip_class == SI) { 2446 /* due to a hw bug, FMASK_BANK_HEIGHT must be set on SI too */ 2447 color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh); 2448 } 2449 } 2450 } 2451 2452 if (sctx->b.chip_class >= VI) { 2453 unsigned max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_256B; 2454 unsigned min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_32B; 2455 2456 /* amdvlk: [min-compressed-block-size] should be set to 32 for dGPU and 2457 64 for APU because all of our APUs to date use DIMMs which have 2458 a request granularity size of 64B while all other chips have a 2459 32B request size */ 2460 if (!sctx->screen->info.has_dedicated_vram) 2461 min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_64B; 2462 2463 if (rtex->resource.b.b.nr_samples > 1) { 2464 if (rtex->surface.bpe == 1) 2465 max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B; 2466 else if (rtex->surface.bpe == 2) 2467 max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_128B; 2468 } 2469 2470 surf->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) | 2471 S_028C78_MIN_COMPRESSED_BLOCK_SIZE(min_compressed_block_size) | 2472 S_028C78_INDEPENDENT_64B_BLOCKS(1); 2473 } 2474 2475 /* This must be set for fast clear to work without FMASK. */ 2476 if (!rtex->fmask.size && sctx->b.chip_class == SI) { 2477 unsigned bankh = util_logbase2(rtex->surface.u.legacy.bankh); 2478 color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh); 2479 } 2480 2481 unsigned color_view = S_028C6C_SLICE_START(surf->base.u.tex.first_layer) | 2482 S_028C6C_SLICE_MAX(surf->base.u.tex.last_layer); 2483 2484 if (sctx->b.chip_class >= GFX9) { 2485 unsigned mip0_depth = util_max_layer(&rtex->resource.b.b, 0); 2486 2487 color_view |= S_028C6C_MIP_LEVEL(surf->base.u.tex.level); 2488 color_attrib |= S_028C74_MIP0_DEPTH(mip0_depth) | 2489 S_028C74_RESOURCE_TYPE(rtex->surface.u.gfx9.resource_type); 2490 surf->cb_color_attrib2 = S_028C68_MIP0_WIDTH(surf->width0 - 1) | 2491 S_028C68_MIP0_HEIGHT(surf->height0 - 1) | 2492 S_028C68_MAX_MIP(rtex->resource.b.b.last_level); 2493 } 2494 2495 surf->cb_color_view = color_view; 2496 surf->cb_color_info = color_info; 2497 surf->cb_color_attrib = color_attrib; 2498 2499 /* Determine pixel shader export format */ 2500 si_choose_spi_color_formats(surf, format, swap, ntype, rtex->is_depth); 2501 2502 surf->color_initialized = true; 2503 } 2504 2505 static void si_init_depth_surface(struct si_context *sctx, 2506 struct r600_surface *surf) 2507 { 2508 struct r600_texture *rtex = (struct r600_texture*)surf->base.texture; 2509 unsigned level = surf->base.u.tex.level; 2510 unsigned format, stencil_format; 2511 uint32_t z_info, s_info; 2512 2513 format = si_translate_dbformat(rtex->db_render_format); 2514 stencil_format = rtex->surface.has_stencil ? 2515 V_028044_STENCIL_8 : V_028044_STENCIL_INVALID; 2516 2517 assert(format != V_028040_Z_INVALID); 2518 if (format == V_028040_Z_INVALID) 2519 R600_ERR("Invalid DB format: %d, disabling DB.\n", rtex->resource.b.b.format); 2520 2521 surf->db_depth_view = S_028008_SLICE_START(surf->base.u.tex.first_layer) | 2522 S_028008_SLICE_MAX(surf->base.u.tex.last_layer); 2523 surf->db_htile_data_base = 0; 2524 surf->db_htile_surface = 0; 2525 2526 if (sctx->b.chip_class >= GFX9) { 2527 assert(rtex->surface.u.gfx9.surf_offset == 0); 2528 surf->db_depth_base = rtex->resource.gpu_address >> 8; 2529 surf->db_stencil_base = (rtex->resource.gpu_address + 2530 rtex->surface.u.gfx9.stencil_offset) >> 8; 2531 z_info = S_028038_FORMAT(format) | 2532 S_028038_NUM_SAMPLES(util_logbase2(rtex->resource.b.b.nr_samples)) | 2533 S_028038_SW_MODE(rtex->surface.u.gfx9.surf.swizzle_mode) | 2534 S_028038_MAXMIP(rtex->resource.b.b.last_level); 2535 s_info = S_02803C_FORMAT(stencil_format) | 2536 S_02803C_SW_MODE(rtex->surface.u.gfx9.stencil.swizzle_mode); 2537 surf->db_z_info2 = S_028068_EPITCH(rtex->surface.u.gfx9.surf.epitch); 2538 surf->db_stencil_info2 = S_02806C_EPITCH(rtex->surface.u.gfx9.stencil.epitch); 2539 surf->db_depth_view |= S_028008_MIPID(level); 2540 surf->db_depth_size = S_02801C_X_MAX(rtex->resource.b.b.width0 - 1) | 2541 S_02801C_Y_MAX(rtex->resource.b.b.height0 - 1); 2542 2543 if (si_htile_enabled(rtex, level)) { 2544 z_info |= S_028038_TILE_SURFACE_ENABLE(1) | 2545 S_028038_ALLOW_EXPCLEAR(1); 2546 2547 if (rtex->tc_compatible_htile) { 2548 unsigned max_zplanes = 4; 2549 2550 if (rtex->db_render_format == PIPE_FORMAT_Z16_UNORM && 2551 rtex->resource.b.b.nr_samples > 1) 2552 max_zplanes = 2; 2553 2554 z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes + 1) | 2555 S_028038_ITERATE_FLUSH(1); 2556 s_info |= S_02803C_ITERATE_FLUSH(1); 2557 } 2558 2559 if (rtex->surface.has_stencil) { 2560 /* Stencil buffer workaround ported from the SI-CI-VI code. 2561 * See that for explanation. 2562 */ 2563 s_info |= S_02803C_ALLOW_EXPCLEAR(rtex->resource.b.b.nr_samples <= 1); 2564 } else { 2565 /* Use all HTILE for depth if there's no stencil. */ 2566 s_info |= S_02803C_TILE_STENCIL_DISABLE(1); 2567 } 2568 2569 surf->db_htile_data_base = (rtex->resource.gpu_address + 2570 rtex->htile_offset) >> 8; 2571 surf->db_htile_surface = S_028ABC_FULL_CACHE(1) | 2572 S_028ABC_PIPE_ALIGNED(rtex->surface.u.gfx9.htile.pipe_aligned) | 2573 S_028ABC_RB_ALIGNED(rtex->surface.u.gfx9.htile.rb_aligned); 2574 } 2575 } else { 2576 /* SI-CI-VI */ 2577 struct legacy_surf_level *levelinfo = &rtex->surface.u.legacy.level[level]; 2578 2579 assert(levelinfo->nblk_x % 8 == 0 && levelinfo->nblk_y % 8 == 0); 2580 2581 surf->db_depth_base = (rtex->resource.gpu_address + 2582 rtex->surface.u.legacy.level[level].offset) >> 8; 2583 surf->db_stencil_base = (rtex->resource.gpu_address + 2584 rtex->surface.u.legacy.stencil_level[level].offset) >> 8; 2585 2586 z_info = S_028040_FORMAT(format) | 2587 S_028040_NUM_SAMPLES(util_logbase2(rtex->resource.b.b.nr_samples)); 2588 s_info = S_028044_FORMAT(stencil_format); 2589 surf->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!rtex->tc_compatible_htile); 2590 2591 if (sctx->b.chip_class >= CIK) { 2592 struct radeon_info *info = &sctx->screen->info; 2593 unsigned index = rtex->surface.u.legacy.tiling_index[level]; 2594 unsigned stencil_index = rtex->surface.u.legacy.stencil_tiling_index[level]; 2595 unsigned macro_index = rtex->surface.u.legacy.macro_tile_index; 2596 unsigned tile_mode = info->si_tile_mode_array[index]; 2597 unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index]; 2598 unsigned macro_mode = info->cik_macrotile_mode_array[macro_index]; 2599 2600 surf->db_depth_info |= 2601 S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) | 2602 S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) | 2603 S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) | 2604 S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) | 2605 S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) | 2606 S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode)); 2607 z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode)); 2608 s_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode)); 2609 } else { 2610 unsigned tile_mode_index = si_tile_mode_index(rtex, level, false); 2611 z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index); 2612 tile_mode_index = si_tile_mode_index(rtex, level, true); 2613 s_info |= S_028044_TILE_MODE_INDEX(tile_mode_index); 2614 } 2615 2616 surf->db_depth_size = S_028058_PITCH_TILE_MAX((levelinfo->nblk_x / 8) - 1) | 2617 S_028058_HEIGHT_TILE_MAX((levelinfo->nblk_y / 8) - 1); 2618 surf->db_depth_slice = S_02805C_SLICE_TILE_MAX((levelinfo->nblk_x * 2619 levelinfo->nblk_y) / 64 - 1); 2620 2621 if (si_htile_enabled(rtex, level)) { 2622 z_info |= S_028040_TILE_SURFACE_ENABLE(1) | 2623 S_028040_ALLOW_EXPCLEAR(1); 2624 2625 if (rtex->surface.has_stencil) { 2626 /* Workaround: For a not yet understood reason, the 2627 * combination of MSAA, fast stencil clear and stencil 2628 * decompress messes with subsequent stencil buffer 2629 * uses. Problem was reproduced on Verde, Bonaire, 2630 * Tonga, and Carrizo. 2631 * 2632 * Disabling EXPCLEAR works around the problem. 2633 * 2634 * Check piglit's arb_texture_multisample-stencil-clear 2635 * test if you want to try changing this. 2636 */ 2637 if (rtex->resource.b.b.nr_samples <= 1) 2638 s_info |= S_028044_ALLOW_EXPCLEAR(1); 2639 } else if (!rtex->tc_compatible_htile) { 2640 /* Use all of the htile_buffer for depth if there's no stencil. 2641 * This must not be set when TC-compatible HTILE is enabled 2642 * due to a hw bug. 2643 */ 2644 s_info |= S_028044_TILE_STENCIL_DISABLE(1); 2645 } 2646 2647 surf->db_htile_data_base = (rtex->resource.gpu_address + 2648 rtex->htile_offset) >> 8; 2649 surf->db_htile_surface = S_028ABC_FULL_CACHE(1); 2650 2651 if (rtex->tc_compatible_htile) { 2652 surf->db_htile_surface |= S_028ABC_TC_COMPATIBLE(1); 2653 2654 if (rtex->resource.b.b.nr_samples <= 1) 2655 z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(5); 2656 else if (rtex->resource.b.b.nr_samples <= 4) 2657 z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(3); 2658 else 2659 z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(2); 2660 } 2661 } 2662 } 2663 2664 surf->db_z_info = z_info; 2665 surf->db_stencil_info = s_info; 2666 2667 surf->depth_initialized = true; 2668 } 2669 2670 void si_update_fb_dirtiness_after_rendering(struct si_context *sctx) 2671 { 2672 if (sctx->decompression_enabled) 2673 return; 2674 2675 if (sctx->framebuffer.state.zsbuf) { 2676 struct pipe_surface *surf = sctx->framebuffer.state.zsbuf; 2677 struct r600_texture *rtex = (struct r600_texture *)surf->texture; 2678 2679 rtex->dirty_level_mask |= 1 << surf->u.tex.level; 2680 2681 if (rtex->surface.has_stencil) 2682 rtex->stencil_dirty_level_mask |= 1 << surf->u.tex.level; 2683 } 2684 2685 unsigned compressed_cb_mask = sctx->framebuffer.compressed_cb_mask; 2686 while (compressed_cb_mask) { 2687 unsigned i = u_bit_scan(&compressed_cb_mask); 2688 struct pipe_surface *surf = sctx->framebuffer.state.cbufs[i]; 2689 struct r600_texture *rtex = (struct r600_texture*)surf->texture; 2690 2691 if (rtex->fmask.size) 2692 rtex->dirty_level_mask |= 1 << surf->u.tex.level; 2693 if (rtex->dcc_gather_statistics) 2694 rtex->separate_dcc_dirty = true; 2695 } 2696 } 2697 2698 static void si_dec_framebuffer_counters(const struct pipe_framebuffer_state *state) 2699 { 2700 for (int i = 0; i < state->nr_cbufs; ++i) { 2701 struct r600_surface *surf = NULL; 2702 struct r600_texture *rtex; 2703 2704 if (!state->cbufs[i]) 2705 continue; 2706 surf = (struct r600_surface*)state->cbufs[i]; 2707 rtex = (struct r600_texture*)surf->base.texture; 2708 2709 p_atomic_dec(&rtex->framebuffers_bound); 2710 } 2711 } 2712 2713 static void si_set_framebuffer_state(struct pipe_context *ctx, 2714 const struct pipe_framebuffer_state *state) 2715 { 2716 struct si_context *sctx = (struct si_context *)ctx; 2717 struct pipe_constant_buffer constbuf = {0}; 2718 struct r600_surface *surf = NULL; 2719 struct r600_texture *rtex; 2720 bool old_any_dst_linear = sctx->framebuffer.any_dst_linear; 2721 unsigned old_nr_samples = sctx->framebuffer.nr_samples; 2722 unsigned old_colorbuf_enabled_4bit = sctx->framebuffer.colorbuf_enabled_4bit; 2723 bool old_has_zsbuf = !!sctx->framebuffer.state.zsbuf; 2724 bool old_has_stencil = 2725 old_has_zsbuf && 2726 ((struct r600_texture*)sctx->framebuffer.state.zsbuf->texture)->surface.has_stencil; 2727 bool unbound = false; 2728 int i; 2729 2730 si_update_fb_dirtiness_after_rendering(sctx); 2731 2732 for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) { 2733 if (!sctx->framebuffer.state.cbufs[i]) 2734 continue; 2735 2736 rtex = (struct r600_texture*)sctx->framebuffer.state.cbufs[i]->texture; 2737 if (rtex->dcc_gather_statistics) 2738 vi_separate_dcc_stop_query(ctx, rtex); 2739 } 2740 2741 /* Disable DCC if the formats are incompatible. */ 2742 for (i = 0; i < state->nr_cbufs; i++) { 2743 if (!state->cbufs[i]) 2744 continue; 2745 2746 surf = (struct r600_surface*)state->cbufs[i]; 2747 rtex = (struct r600_texture*)surf->base.texture; 2748 2749 if (!surf->dcc_incompatible) 2750 continue; 2751 2752 /* Since the DCC decompression calls back into set_framebuffer- 2753 * _state, we need to unbind the framebuffer, so that 2754 * vi_separate_dcc_stop_query isn't called twice with the same 2755 * color buffer. 2756 */ 2757 if (!unbound) { 2758 util_copy_framebuffer_state(&sctx->framebuffer.state, NULL); 2759 unbound = true; 2760 } 2761 2762 if (vi_dcc_enabled(rtex, surf->base.u.tex.level)) 2763 if (!si_texture_disable_dcc(&sctx->b, rtex)) 2764 sctx->b.decompress_dcc(ctx, rtex); 2765 2766 surf->dcc_incompatible = false; 2767 } 2768 2769 /* Only flush TC when changing the framebuffer state, because 2770 * the only client not using TC that can change textures is 2771 * the framebuffer. 2772 * 2773 * Wait for compute shaders because of possible transitions: 2774 * - FB write -> shader read 2775 * - shader write -> FB read 2776 * 2777 * DB caches are flushed on demand (using si_decompress_textures). 2778 * 2779 * When MSAA is enabled, CB and TC caches are flushed on demand 2780 * (after FMASK decompression). Shader write -> FB read transitions 2781 * cannot happen for MSAA textures, because MSAA shader images are 2782 * not supported. 2783 * 2784 * Only flush and wait for CB if there is actually a bound color buffer. 2785 */ 2786 if (sctx->framebuffer.nr_samples <= 1 && 2787 sctx->framebuffer.state.nr_cbufs) 2788 si_make_CB_shader_coherent(sctx, sctx->framebuffer.nr_samples, 2789 sctx->framebuffer.CB_has_shader_readable_metadata); 2790 2791 sctx->b.flags |= SI_CONTEXT_CS_PARTIAL_FLUSH; 2792 2793 /* u_blitter doesn't invoke depth decompression when it does multiple 2794 * blits in a row, but the only case when it matters for DB is when 2795 * doing generate_mipmap. So here we flush DB manually between 2796 * individual generate_mipmap blits. 2797 * Note that lower mipmap levels aren't compressed. 2798 */ 2799 if (sctx->generate_mipmap_for_depth) { 2800 si_make_DB_shader_coherent(sctx, 1, false, 2801 sctx->framebuffer.DB_has_shader_readable_metadata); 2802 } else if (sctx->b.chip_class == GFX9) { 2803 /* It appears that DB metadata "leaks" in a sequence of: 2804 * - depth clear 2805 * - DCC decompress for shader image writes (with DB disabled) 2806 * - render with DEPTH_BEFORE_SHADER=1 2807 * Flushing DB metadata works around the problem. 2808 */ 2809 sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_DB_META; 2810 } 2811 2812 /* Take the maximum of the old and new count. If the new count is lower, 2813 * dirtying is needed to disable the unbound colorbuffers. 2814 */ 2815 sctx->framebuffer.dirty_cbufs |= 2816 (1 << MAX2(sctx->framebuffer.state.nr_cbufs, state->nr_cbufs)) - 1; 2817 sctx->framebuffer.dirty_zsbuf |= sctx->framebuffer.state.zsbuf != state->zsbuf; 2818 2819 si_dec_framebuffer_counters(&sctx->framebuffer.state); 2820 util_copy_framebuffer_state(&sctx->framebuffer.state, state); 2821 2822 sctx->framebuffer.colorbuf_enabled_4bit = 0; 2823 sctx->framebuffer.spi_shader_col_format = 0; 2824 sctx->framebuffer.spi_shader_col_format_alpha = 0; 2825 sctx->framebuffer.spi_shader_col_format_blend = 0; 2826 sctx->framebuffer.spi_shader_col_format_blend_alpha = 0; 2827 sctx->framebuffer.color_is_int8 = 0; 2828 sctx->framebuffer.color_is_int10 = 0; 2829 2830 sctx->framebuffer.compressed_cb_mask = 0; 2831 sctx->framebuffer.nr_samples = util_framebuffer_get_num_samples(state); 2832 sctx->framebuffer.log_samples = util_logbase2(sctx->framebuffer.nr_samples); 2833 sctx->framebuffer.any_dst_linear = false; 2834 sctx->framebuffer.CB_has_shader_readable_metadata = false; 2835 sctx->framebuffer.DB_has_shader_readable_metadata = false; 2836 2837 for (i = 0; i < state->nr_cbufs; i++) { 2838 if (!state->cbufs[i]) 2839 continue; 2840 2841 surf = (struct r600_surface*)state->cbufs[i]; 2842 rtex = (struct r600_texture*)surf->base.texture; 2843 2844 if (!surf->color_initialized) { 2845 si_initialize_color_surface(sctx, surf); 2846 } 2847 2848 sctx->framebuffer.colorbuf_enabled_4bit |= 0xf << (i * 4); 2849 sctx->framebuffer.spi_shader_col_format |= 2850 surf->spi_shader_col_format << (i * 4); 2851 sctx->framebuffer.spi_shader_col_format_alpha |= 2852 surf->spi_shader_col_format_alpha << (i * 4); 2853 sctx->framebuffer.spi_shader_col_format_blend |= 2854 surf->spi_shader_col_format_blend << (i * 4); 2855 sctx->framebuffer.spi_shader_col_format_blend_alpha |= 2856 surf->spi_shader_col_format_blend_alpha << (i * 4); 2857 2858 if (surf->color_is_int8) 2859 sctx->framebuffer.color_is_int8 |= 1 << i; 2860 if (surf->color_is_int10) 2861 sctx->framebuffer.color_is_int10 |= 1 << i; 2862 2863 if (rtex->fmask.size) { 2864 sctx->framebuffer.compressed_cb_mask |= 1 << i; 2865 } 2866 2867 if (rtex->surface.is_linear) 2868 sctx->framebuffer.any_dst_linear = true; 2869 2870 if (vi_dcc_enabled(rtex, surf->base.u.tex.level)) 2871 sctx->framebuffer.CB_has_shader_readable_metadata = true; 2872 2873 si_context_add_resource_size(ctx, surf->base.texture); 2874 2875 p_atomic_inc(&rtex->framebuffers_bound); 2876 2877 if (rtex->dcc_gather_statistics) { 2878 /* Dirty tracking must be enabled for DCC usage analysis. */ 2879 sctx->framebuffer.compressed_cb_mask |= 1 << i; 2880 vi_separate_dcc_start_query(ctx, rtex); 2881 } 2882 } 2883 2884 struct r600_texture *zstex = NULL; 2885 2886 if (state->zsbuf) { 2887 surf = (struct r600_surface*)state->zsbuf; 2888 zstex = (struct r600_texture*)surf->base.texture; 2889 2890 if (!surf->depth_initialized) { 2891 si_init_depth_surface(sctx, surf); 2892 } 2893 2894 if (vi_tc_compat_htile_enabled(zstex, surf->base.u.tex.level)) 2895 sctx->framebuffer.DB_has_shader_readable_metadata = true; 2896 2897 si_context_add_resource_size(ctx, surf->base.texture); 2898 } 2899 2900 si_update_poly_offset_state(sctx); 2901 si_mark_atom_dirty(sctx, &sctx->cb_render_state); 2902 si_mark_atom_dirty(sctx, &sctx->framebuffer.atom); 2903 2904 if (sctx->screen->dpbb_allowed) 2905 si_mark_atom_dirty(sctx, &sctx->dpbb_state); 2906 2907 if (sctx->framebuffer.any_dst_linear != old_any_dst_linear) 2908 si_mark_atom_dirty(sctx, &sctx->msaa_config); 2909 2910 if (sctx->screen->has_out_of_order_rast && 2911 (sctx->framebuffer.colorbuf_enabled_4bit != old_colorbuf_enabled_4bit || 2912 !!sctx->framebuffer.state.zsbuf != old_has_zsbuf || 2913 (zstex && zstex->surface.has_stencil != old_has_stencil))) 2914 si_mark_atom_dirty(sctx, &sctx->msaa_config); 2915 2916 if (sctx->framebuffer.nr_samples != old_nr_samples) { 2917 si_mark_atom_dirty(sctx, &sctx->msaa_config); 2918 si_mark_atom_dirty(sctx, &sctx->db_render_state); 2919 2920 /* Set sample locations as fragment shader constants. */ 2921 switch (sctx->framebuffer.nr_samples) { 2922 case 1: 2923 constbuf.user_buffer = sctx->sample_locations_1x; 2924 break; 2925 case 2: 2926 constbuf.user_buffer = sctx->sample_locations_2x; 2927 break; 2928 case 4: 2929 constbuf.user_buffer = sctx->sample_locations_4x; 2930 break; 2931 case 8: 2932 constbuf.user_buffer = sctx->sample_locations_8x; 2933 break; 2934 case 16: 2935 constbuf.user_buffer = sctx->sample_locations_16x; 2936 break; 2937 default: 2938 R600_ERR("Requested an invalid number of samples %i.\n", 2939 sctx->framebuffer.nr_samples); 2940 assert(0); 2941 } 2942 constbuf.buffer_size = sctx->framebuffer.nr_samples * 2 * 4; 2943 si_set_rw_buffer(sctx, SI_PS_CONST_SAMPLE_POSITIONS, &constbuf); 2944 2945 si_mark_atom_dirty(sctx, &sctx->msaa_sample_locs.atom); 2946 } 2947 2948 sctx->do_update_shaders = true; 2949 2950 if (!sctx->decompression_enabled) { 2951 /* Prevent textures decompression when the framebuffer state 2952 * changes come from the decompression passes themselves. 2953 */ 2954 sctx->need_check_render_feedback = true; 2955 } 2956 } 2957 2958 static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom *atom) 2959 { 2960 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 2961 struct pipe_framebuffer_state *state = &sctx->framebuffer.state; 2962 unsigned i, nr_cbufs = state->nr_cbufs; 2963 struct r600_texture *tex = NULL; 2964 struct r600_surface *cb = NULL; 2965 unsigned cb_color_info = 0; 2966 2967 /* Colorbuffers. */ 2968 for (i = 0; i < nr_cbufs; i++) { 2969 uint64_t cb_color_base, cb_color_fmask, cb_dcc_base; 2970 unsigned cb_color_attrib; 2971 2972 if (!(sctx->framebuffer.dirty_cbufs & (1 << i))) 2973 continue; 2974 2975 cb = (struct r600_surface*)state->cbufs[i]; 2976 if (!cb) { 2977 radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 2978 S_028C70_FORMAT(V_028C70_COLOR_INVALID)); 2979 continue; 2980 } 2981 2982 tex = (struct r600_texture *)cb->base.texture; 2983 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, 2984 &tex->resource, RADEON_USAGE_READWRITE, 2985 tex->resource.b.b.nr_samples > 1 ? 2986 RADEON_PRIO_COLOR_BUFFER_MSAA : 2987 RADEON_PRIO_COLOR_BUFFER); 2988 2989 if (tex->cmask_buffer && tex->cmask_buffer != &tex->resource) { 2990 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, 2991 tex->cmask_buffer, RADEON_USAGE_READWRITE, 2992 RADEON_PRIO_CMASK); 2993 } 2994 2995 if (tex->dcc_separate_buffer) 2996 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, 2997 tex->dcc_separate_buffer, 2998 RADEON_USAGE_READWRITE, 2999 RADEON_PRIO_DCC); 3000 3001 /* Compute mutable surface parameters. */ 3002 cb_color_base = tex->resource.gpu_address >> 8; 3003 cb_color_fmask = 0; 3004 cb_dcc_base = 0; 3005 cb_color_info = cb->cb_color_info | tex->cb_color_info; 3006 cb_color_attrib = cb->cb_color_attrib; 3007 3008 if (tex->fmask.size) { 3009 cb_color_fmask = (tex->resource.gpu_address + tex->fmask.offset) >> 8; 3010 cb_color_fmask |= tex->fmask.tile_swizzle; 3011 } 3012 3013 /* Set up DCC. */ 3014 if (vi_dcc_enabled(tex, cb->base.u.tex.level)) { 3015 bool is_msaa_resolve_dst = state->cbufs[0] && 3016 state->cbufs[0]->texture->nr_samples > 1 && 3017 state->cbufs[1] == &cb->base && 3018 state->cbufs[1]->texture->nr_samples <= 1; 3019 3020 if (!is_msaa_resolve_dst) 3021 cb_color_info |= S_028C70_DCC_ENABLE(1); 3022 3023 cb_dcc_base = ((!tex->dcc_separate_buffer ? tex->resource.gpu_address : 0) + 3024 tex->dcc_offset) >> 8; 3025 cb_dcc_base |= tex->surface.tile_swizzle; 3026 } 3027 3028 if (sctx->b.chip_class >= GFX9) { 3029 struct gfx9_surf_meta_flags meta; 3030 3031 if (tex->dcc_offset) 3032 meta = tex->surface.u.gfx9.dcc; 3033 else 3034 meta = tex->surface.u.gfx9.cmask; 3035 3036 /* Set mutable surface parameters. */ 3037 cb_color_base += tex->surface.u.gfx9.surf_offset >> 8; 3038 cb_color_base |= tex->surface.tile_swizzle; 3039 if (!tex->fmask.size) 3040 cb_color_fmask = cb_color_base; 3041 cb_color_attrib |= S_028C74_COLOR_SW_MODE(tex->surface.u.gfx9.surf.swizzle_mode) | 3042 S_028C74_FMASK_SW_MODE(tex->surface.u.gfx9.fmask.swizzle_mode) | 3043 S_028C74_RB_ALIGNED(meta.rb_aligned) | 3044 S_028C74_PIPE_ALIGNED(meta.pipe_aligned); 3045 3046 radeon_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 15); 3047 radeon_emit(cs, cb_color_base); /* CB_COLOR0_BASE */ 3048 radeon_emit(cs, cb_color_base >> 32); /* CB_COLOR0_BASE_EXT */ 3049 radeon_emit(cs, cb->cb_color_attrib2); /* CB_COLOR0_ATTRIB2 */ 3050 radeon_emit(cs, cb->cb_color_view); /* CB_COLOR0_VIEW */ 3051 radeon_emit(cs, cb_color_info); /* CB_COLOR0_INFO */ 3052 radeon_emit(cs, cb_color_attrib); /* CB_COLOR0_ATTRIB */ 3053 radeon_emit(cs, cb->cb_dcc_control); /* CB_COLOR0_DCC_CONTROL */ 3054 radeon_emit(cs, tex->cmask.base_address_reg); /* CB_COLOR0_CMASK */ 3055 radeon_emit(cs, tex->cmask.base_address_reg >> 32); /* CB_COLOR0_CMASK_BASE_EXT */ 3056 radeon_emit(cs, cb_color_fmask); /* CB_COLOR0_FMASK */ 3057 radeon_emit(cs, cb_color_fmask >> 32); /* CB_COLOR0_FMASK_BASE_EXT */ 3058 radeon_emit(cs, tex->color_clear_value[0]); /* CB_COLOR0_CLEAR_WORD0 */ 3059 radeon_emit(cs, tex->color_clear_value[1]); /* CB_COLOR0_CLEAR_WORD1 */ 3060 radeon_emit(cs, cb_dcc_base); /* CB_COLOR0_DCC_BASE */ 3061 radeon_emit(cs, cb_dcc_base >> 32); /* CB_COLOR0_DCC_BASE_EXT */ 3062 3063 radeon_set_context_reg(cs, R_0287A0_CB_MRT0_EPITCH + i * 4, 3064 S_0287A0_EPITCH(tex->surface.u.gfx9.surf.epitch)); 3065 } else { 3066 /* Compute mutable surface parameters (SI-CI-VI). */ 3067 const struct legacy_surf_level *level_info = 3068 &tex->surface.u.legacy.level[cb->base.u.tex.level]; 3069 unsigned pitch_tile_max, slice_tile_max, tile_mode_index; 3070 unsigned cb_color_pitch, cb_color_slice, cb_color_fmask_slice; 3071 3072 cb_color_base += level_info->offset >> 8; 3073 /* Only macrotiled modes can set tile swizzle. */ 3074 if (level_info->mode == RADEON_SURF_MODE_2D) 3075 cb_color_base |= tex->surface.tile_swizzle; 3076 3077 if (!tex->fmask.size) 3078 cb_color_fmask = cb_color_base; 3079 if (cb_dcc_base) 3080 cb_dcc_base += level_info->dcc_offset >> 8; 3081 3082 pitch_tile_max = level_info->nblk_x / 8 - 1; 3083 slice_tile_max = level_info->nblk_x * 3084 level_info->nblk_y / 64 - 1; 3085 tile_mode_index = si_tile_mode_index(tex, cb->base.u.tex.level, false); 3086 3087 cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index); 3088 cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max); 3089 cb_color_slice = S_028C68_TILE_MAX(slice_tile_max); 3090 3091 if (tex->fmask.size) { 3092 if (sctx->b.chip_class >= CIK) 3093 cb_color_pitch |= S_028C64_FMASK_TILE_MAX(tex->fmask.pitch_in_pixels / 8 - 1); 3094 cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tex->fmask.tile_mode_index); 3095 cb_color_fmask_slice = S_028C88_TILE_MAX(tex->fmask.slice_tile_max); 3096 } else { 3097 /* This must be set for fast clear to work without FMASK. */ 3098 if (sctx->b.chip_class >= CIK) 3099 cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max); 3100 cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index); 3101 cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max); 3102 } 3103 3104 radeon_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 3105 sctx->b.chip_class >= VI ? 14 : 13); 3106 radeon_emit(cs, cb_color_base); /* CB_COLOR0_BASE */ 3107 radeon_emit(cs, cb_color_pitch); /* CB_COLOR0_PITCH */ 3108 radeon_emit(cs, cb_color_slice); /* CB_COLOR0_SLICE */ 3109 radeon_emit(cs, cb->cb_color_view); /* CB_COLOR0_VIEW */ 3110 radeon_emit(cs, cb_color_info); /* CB_COLOR0_INFO */ 3111 radeon_emit(cs, cb_color_attrib); /* CB_COLOR0_ATTRIB */ 3112 radeon_emit(cs, cb->cb_dcc_control); /* CB_COLOR0_DCC_CONTROL */ 3113 radeon_emit(cs, tex->cmask.base_address_reg); /* CB_COLOR0_CMASK */ 3114 radeon_emit(cs, tex->cmask.slice_tile_max); /* CB_COLOR0_CMASK_SLICE */ 3115 radeon_emit(cs, cb_color_fmask); /* CB_COLOR0_FMASK */ 3116 radeon_emit(cs, cb_color_fmask_slice); /* CB_COLOR0_FMASK_SLICE */ 3117 radeon_emit(cs, tex->color_clear_value[0]); /* CB_COLOR0_CLEAR_WORD0 */ 3118 radeon_emit(cs, tex->color_clear_value[1]); /* CB_COLOR0_CLEAR_WORD1 */ 3119 3120 if (sctx->b.chip_class >= VI) /* R_028C94_CB_COLOR0_DCC_BASE */ 3121 radeon_emit(cs, cb_dcc_base); 3122 } 3123 } 3124 for (; i < 8 ; i++) 3125 if (sctx->framebuffer.dirty_cbufs & (1 << i)) 3126 radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0); 3127 3128 /* ZS buffer. */ 3129 if (state->zsbuf && sctx->framebuffer.dirty_zsbuf) { 3130 struct r600_surface *zb = (struct r600_surface*)state->zsbuf; 3131 struct r600_texture *rtex = (struct r600_texture*)zb->base.texture; 3132 3133 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, 3134 &rtex->resource, RADEON_USAGE_READWRITE, 3135 zb->base.texture->nr_samples > 1 ? 3136 RADEON_PRIO_DEPTH_BUFFER_MSAA : 3137 RADEON_PRIO_DEPTH_BUFFER); 3138 3139 if (sctx->b.chip_class >= GFX9) { 3140 radeon_set_context_reg_seq(cs, R_028014_DB_HTILE_DATA_BASE, 3); 3141 radeon_emit(cs, zb->db_htile_data_base); /* DB_HTILE_DATA_BASE */ 3142 radeon_emit(cs, zb->db_htile_data_base >> 32); /* DB_HTILE_DATA_BASE_HI */ 3143 radeon_emit(cs, zb->db_depth_size); /* DB_DEPTH_SIZE */ 3144 3145 radeon_set_context_reg_seq(cs, R_028038_DB_Z_INFO, 10); 3146 radeon_emit(cs, zb->db_z_info | /* DB_Z_INFO */ 3147 S_028038_ZRANGE_PRECISION(rtex->depth_clear_value != 0)); 3148 radeon_emit(cs, zb->db_stencil_info); /* DB_STENCIL_INFO */ 3149 radeon_emit(cs, zb->db_depth_base); /* DB_Z_READ_BASE */ 3150 radeon_emit(cs, zb->db_depth_base >> 32); /* DB_Z_READ_BASE_HI */ 3151 radeon_emit(cs, zb->db_stencil_base); /* DB_STENCIL_READ_BASE */ 3152 radeon_emit(cs, zb->db_stencil_base >> 32); /* DB_STENCIL_READ_BASE_HI */ 3153 radeon_emit(cs, zb->db_depth_base); /* DB_Z_WRITE_BASE */ 3154 radeon_emit(cs, zb->db_depth_base >> 32); /* DB_Z_WRITE_BASE_HI */ 3155 radeon_emit(cs, zb->db_stencil_base); /* DB_STENCIL_WRITE_BASE */ 3156 radeon_emit(cs, zb->db_stencil_base >> 32); /* DB_STENCIL_WRITE_BASE_HI */ 3157 3158 radeon_set_context_reg_seq(cs, R_028068_DB_Z_INFO2, 2); 3159 radeon_emit(cs, zb->db_z_info2); /* DB_Z_INFO2 */ 3160 radeon_emit(cs, zb->db_stencil_info2); /* DB_STENCIL_INFO2 */ 3161 } else { 3162 radeon_set_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, zb->db_htile_data_base); 3163 3164 radeon_set_context_reg_seq(cs, R_02803C_DB_DEPTH_INFO, 9); 3165 radeon_emit(cs, zb->db_depth_info); /* DB_DEPTH_INFO */ 3166 radeon_emit(cs, zb->db_z_info | /* DB_Z_INFO */ 3167 S_028040_ZRANGE_PRECISION(rtex->depth_clear_value != 0)); 3168 radeon_emit(cs, zb->db_stencil_info); /* DB_STENCIL_INFO */ 3169 radeon_emit(cs, zb->db_depth_base); /* DB_Z_READ_BASE */ 3170 radeon_emit(cs, zb->db_stencil_base); /* DB_STENCIL_READ_BASE */ 3171 radeon_emit(cs, zb->db_depth_base); /* DB_Z_WRITE_BASE */ 3172 radeon_emit(cs, zb->db_stencil_base); /* DB_STENCIL_WRITE_BASE */ 3173 radeon_emit(cs, zb->db_depth_size); /* DB_DEPTH_SIZE */ 3174 radeon_emit(cs, zb->db_depth_slice); /* DB_DEPTH_SLICE */ 3175 } 3176 3177 radeon_set_context_reg_seq(cs, R_028028_DB_STENCIL_CLEAR, 2); 3178 radeon_emit(cs, rtex->stencil_clear_value); /* R_028028_DB_STENCIL_CLEAR */ 3179 radeon_emit(cs, fui(rtex->depth_clear_value)); /* R_02802C_DB_DEPTH_CLEAR */ 3180 3181 radeon_set_context_reg(cs, R_028008_DB_DEPTH_VIEW, zb->db_depth_view); 3182 radeon_set_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, zb->db_htile_surface); 3183 } else if (sctx->framebuffer.dirty_zsbuf) { 3184 if (sctx->b.chip_class >= GFX9) 3185 radeon_set_context_reg_seq(cs, R_028038_DB_Z_INFO, 2); 3186 else 3187 radeon_set_context_reg_seq(cs, R_028040_DB_Z_INFO, 2); 3188 3189 radeon_emit(cs, S_028040_FORMAT(V_028040_Z_INVALID)); /* DB_Z_INFO */ 3190 radeon_emit(cs, S_028044_FORMAT(V_028044_STENCIL_INVALID)); /* DB_STENCIL_INFO */ 3191 } 3192 3193 /* Framebuffer dimensions. */ 3194 /* PA_SC_WINDOW_SCISSOR_TL is set in si_init_config() */ 3195 radeon_set_context_reg(cs, R_028208_PA_SC_WINDOW_SCISSOR_BR, 3196 S_028208_BR_X(state->width) | S_028208_BR_Y(state->height)); 3197 3198 if (sctx->screen->dfsm_allowed) { 3199 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); 3200 radeon_emit(cs, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0)); 3201 } 3202 3203 sctx->framebuffer.dirty_cbufs = 0; 3204 sctx->framebuffer.dirty_zsbuf = false; 3205 } 3206 3207 static void si_emit_msaa_sample_locs(struct si_context *sctx, 3208 struct r600_atom *atom) 3209 { 3210 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 3211 unsigned nr_samples = sctx->framebuffer.nr_samples; 3212 bool has_msaa_sample_loc_bug = sctx->screen->has_msaa_sample_loc_bug; 3213 3214 /* Smoothing (only possible with nr_samples == 1) uses the same 3215 * sample locations as the MSAA it simulates. 3216 */ 3217 if (nr_samples <= 1 && sctx->smoothing_enabled) 3218 nr_samples = SI_NUM_SMOOTH_AA_SAMPLES; 3219 3220 /* On Polaris, the small primitive filter uses the sample locations 3221 * even when MSAA is off, so we need to make sure they're set to 0. 3222 */ 3223 if (has_msaa_sample_loc_bug) 3224 nr_samples = MAX2(nr_samples, 1); 3225 3226 if (nr_samples != sctx->msaa_sample_locs.nr_samples) { 3227 sctx->msaa_sample_locs.nr_samples = nr_samples; 3228 si_emit_sample_locations(cs, nr_samples); 3229 } 3230 3231 if (sctx->b.family >= CHIP_POLARIS10) { 3232 struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; 3233 unsigned small_prim_filter_cntl = 3234 S_028830_SMALL_PRIM_FILTER_ENABLE(1) | 3235 /* line bug */ 3236 S_028830_LINE_FILTER_DISABLE(sctx->b.family <= CHIP_POLARIS12); 3237 3238 /* The alternative of setting sample locations to 0 would 3239 * require a DB flush to avoid Z errors, see 3240 * https://bugs.freedesktop.org/show_bug.cgi?id=96908 3241 */ 3242 if (has_msaa_sample_loc_bug && 3243 sctx->framebuffer.nr_samples > 1 && 3244 rs && !rs->multisample_enable) 3245 small_prim_filter_cntl &= C_028830_SMALL_PRIM_FILTER_ENABLE; 3246 3247 radeon_set_context_reg(cs, R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL, 3248 small_prim_filter_cntl); 3249 } 3250 } 3251 3252 static bool si_out_of_order_rasterization(struct si_context *sctx) 3253 { 3254 struct si_state_blend *blend = sctx->queued.named.blend; 3255 struct si_state_dsa *dsa = sctx->queued.named.dsa; 3256 3257 if (!sctx->screen->has_out_of_order_rast) 3258 return false; 3259 3260 unsigned colormask = sctx->framebuffer.colorbuf_enabled_4bit; 3261 3262 if (blend) { 3263 colormask &= blend->cb_target_enabled_4bit; 3264 } else { 3265 colormask = 0; 3266 } 3267 3268 /* Conservative: No logic op. */ 3269 if (colormask && blend->logicop_enable) 3270 return false; 3271 3272 struct si_dsa_order_invariance dsa_order_invariant = { 3273 .zs = true, .pass_set = true, .pass_last = false 3274 }; 3275 3276 if (sctx->framebuffer.state.zsbuf) { 3277 struct r600_texture *zstex = 3278 (struct r600_texture*)sctx->framebuffer.state.zsbuf->texture; 3279 bool has_stencil = zstex->surface.has_stencil; 3280 dsa_order_invariant = dsa->order_invariance[has_stencil]; 3281 if (!dsa_order_invariant.zs) 3282 return false; 3283 3284 /* The set of PS invocations is always order invariant, 3285 * except when early Z/S tests are requested. */ 3286 if (sctx->ps_shader.cso && 3287 sctx->ps_shader.cso->info.writes_memory && 3288 sctx->ps_shader.cso->info.properties[TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL] && 3289 !dsa_order_invariant.pass_set) 3290 return false; 3291 3292 if (sctx->b.num_perfect_occlusion_queries != 0 && 3293 !dsa_order_invariant.pass_set) 3294 return false; 3295 } 3296 3297 if (!colormask) 3298 return true; 3299 3300 unsigned blendmask = colormask & blend->blend_enable_4bit; 3301 3302 if (blendmask) { 3303 /* Only commutative blending. */ 3304 if (blendmask & ~blend->commutative_4bit) 3305 return false; 3306 3307 if (!dsa_order_invariant.pass_set) 3308 return false; 3309 } 3310 3311 if (colormask & ~blendmask) { 3312 if (!dsa_order_invariant.pass_last) 3313 return false; 3314 } 3315 3316 return true; 3317 } 3318 3319 static void si_emit_msaa_config(struct si_context *sctx, struct r600_atom *atom) 3320 { 3321 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 3322 unsigned num_tile_pipes = sctx->screen->info.num_tile_pipes; 3323 /* 33% faster rendering to linear color buffers */ 3324 bool dst_is_linear = sctx->framebuffer.any_dst_linear; 3325 bool out_of_order_rast = si_out_of_order_rasterization(sctx); 3326 unsigned sc_mode_cntl_1 = 3327 S_028A4C_WALK_SIZE(dst_is_linear) | 3328 S_028A4C_WALK_FENCE_ENABLE(!dst_is_linear) | 3329 S_028A4C_WALK_FENCE_SIZE(num_tile_pipes == 2 ? 2 : 3) | 3330 S_028A4C_OUT_OF_ORDER_PRIMITIVE_ENABLE(out_of_order_rast) | 3331 S_028A4C_OUT_OF_ORDER_WATER_MARK(0x7) | 3332 /* always 1: */ 3333 S_028A4C_WALK_ALIGN8_PRIM_FITS_ST(1) | 3334 S_028A4C_SUPERTILE_WALK_ORDER_ENABLE(1) | 3335 S_028A4C_TILE_WALK_ORDER_ENABLE(1) | 3336 S_028A4C_MULTI_SHADER_ENGINE_PRIM_DISCARD_ENABLE(1) | 3337 S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) | 3338 S_028A4C_FORCE_EOV_REZ_ENABLE(1); 3339 3340 int setup_samples = sctx->framebuffer.nr_samples > 1 ? sctx->framebuffer.nr_samples : 3341 sctx->smoothing_enabled ? SI_NUM_SMOOTH_AA_SAMPLES : 0; 3342 3343 /* Required by OpenGL line rasterization. 3344 * 3345 * TODO: We should also enable perpendicular endcaps for AA lines, 3346 * but that requires implementing line stippling in the pixel 3347 * shader. SC can only do line stippling with axis-aligned 3348 * endcaps. 3349 */ 3350 unsigned sc_line_cntl = S_028BDC_DX10_DIAMOND_TEST_ENA(1); 3351 3352 if (setup_samples > 1) { 3353 /* distance from the pixel center, indexed by log2(nr_samples) */ 3354 static unsigned max_dist[] = { 3355 0, /* unused */ 3356 4, /* 2x MSAA */ 3357 6, /* 4x MSAA */ 3358 7, /* 8x MSAA */ 3359 8, /* 16x MSAA */ 3360 }; 3361 unsigned log_samples = util_logbase2(setup_samples); 3362 unsigned log_ps_iter_samples = 3363 util_logbase2(util_next_power_of_two(sctx->ps_iter_samples)); 3364 3365 radeon_set_context_reg_seq(cs, R_028BDC_PA_SC_LINE_CNTL, 2); 3366 radeon_emit(cs, sc_line_cntl | 3367 S_028BDC_EXPAND_LINE_WIDTH(1)); /* CM_R_028BDC_PA_SC_LINE_CNTL */ 3368 radeon_emit(cs, S_028BE0_MSAA_NUM_SAMPLES(log_samples) | 3369 S_028BE0_MAX_SAMPLE_DIST(max_dist[log_samples]) | 3370 S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples)); /* CM_R_028BE0_PA_SC_AA_CONFIG */ 3371 3372 if (sctx->framebuffer.nr_samples > 1) { 3373 radeon_set_context_reg(cs, R_028804_DB_EQAA, 3374 S_028804_MAX_ANCHOR_SAMPLES(log_samples) | 3375 S_028804_PS_ITER_SAMPLES(log_ps_iter_samples) | 3376 S_028804_MASK_EXPORT_NUM_SAMPLES(log_samples) | 3377 S_028804_ALPHA_TO_MASK_NUM_SAMPLES(log_samples) | 3378 S_028804_HIGH_QUALITY_INTERSECTIONS(1) | 3379 S_028804_STATIC_ANCHOR_ASSOCIATIONS(1)); 3380 radeon_set_context_reg(cs, R_028A4C_PA_SC_MODE_CNTL_1, 3381 S_028A4C_PS_ITER_SAMPLE(sctx->ps_iter_samples > 1) | 3382 sc_mode_cntl_1); 3383 } else if (sctx->smoothing_enabled) { 3384 radeon_set_context_reg(cs, R_028804_DB_EQAA, 3385 S_028804_HIGH_QUALITY_INTERSECTIONS(1) | 3386 S_028804_STATIC_ANCHOR_ASSOCIATIONS(1) | 3387 S_028804_OVERRASTERIZATION_AMOUNT(log_samples)); 3388 radeon_set_context_reg(cs, R_028A4C_PA_SC_MODE_CNTL_1, 3389 sc_mode_cntl_1); 3390 } 3391 } else { 3392 radeon_set_context_reg_seq(cs, R_028BDC_PA_SC_LINE_CNTL, 2); 3393 radeon_emit(cs, sc_line_cntl); /* CM_R_028BDC_PA_SC_LINE_CNTL */ 3394 radeon_emit(cs, 0); /* CM_R_028BE0_PA_SC_AA_CONFIG */ 3395 3396 radeon_set_context_reg(cs, R_028804_DB_EQAA, 3397 S_028804_HIGH_QUALITY_INTERSECTIONS(1) | 3398 S_028804_STATIC_ANCHOR_ASSOCIATIONS(1)); 3399 radeon_set_context_reg(cs, R_028A4C_PA_SC_MODE_CNTL_1, 3400 sc_mode_cntl_1); 3401 } 3402 3403 /* GFX9: Flush DFSM when the AA mode changes. */ 3404 if (sctx->screen->dfsm_allowed) { 3405 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); 3406 radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0)); 3407 } 3408 } 3409 3410 static void si_set_min_samples(struct pipe_context *ctx, unsigned min_samples) 3411 { 3412 struct si_context *sctx = (struct si_context *)ctx; 3413 3414 if (sctx->ps_iter_samples == min_samples) 3415 return; 3416 3417 sctx->ps_iter_samples = min_samples; 3418 sctx->do_update_shaders = true; 3419 3420 if (sctx->framebuffer.nr_samples > 1) 3421 si_mark_atom_dirty(sctx, &sctx->msaa_config); 3422 if (sctx->screen->dpbb_allowed) 3423 si_mark_atom_dirty(sctx, &sctx->dpbb_state); 3424 } 3425 3426 /* 3427 * Samplers 3428 */ 3429 3430 /** 3431 * Build the sampler view descriptor for a buffer texture. 3432 * @param state 256-bit descriptor; only the high 128 bits are filled in 3433 */ 3434 void 3435 si_make_buffer_descriptor(struct si_screen *screen, struct r600_resource *buf, 3436 enum pipe_format format, 3437 unsigned offset, unsigned size, 3438 uint32_t *state) 3439 { 3440 const struct util_format_description *desc; 3441 int first_non_void; 3442 unsigned stride; 3443 unsigned num_records; 3444 unsigned num_format, data_format; 3445 3446 desc = util_format_description(format); 3447 first_non_void = util_format_get_first_non_void_channel(format); 3448 stride = desc->block.bits / 8; 3449 num_format = si_translate_buffer_numformat(&screen->b, desc, first_non_void); 3450 data_format = si_translate_buffer_dataformat(&screen->b, desc, first_non_void); 3451 3452 num_records = size / stride; 3453 num_records = MIN2(num_records, (buf->b.b.width0 - offset) / stride); 3454 3455 /* The NUM_RECORDS field has a different meaning depending on the chip, 3456 * instruction type, STRIDE, and SWIZZLE_ENABLE. 3457 * 3458 * SI-CIK: 3459 * - If STRIDE == 0, it's in byte units. 3460 * - If STRIDE != 0, it's in units of STRIDE, used with inst.IDXEN. 3461 * 3462 * VI: 3463 * - For SMEM and STRIDE == 0, it's in byte units. 3464 * - For SMEM and STRIDE != 0, it's in units of STRIDE. 3465 * - For VMEM and STRIDE == 0 or SWIZZLE_ENABLE == 0, it's in byte units. 3466 * - For VMEM and STRIDE != 0 and SWIZZLE_ENABLE == 1, it's in units of STRIDE. 3467 * NOTE: There is incompatibility between VMEM and SMEM opcodes due to SWIZZLE_- 3468 * ENABLE. The workaround is to set STRIDE = 0 if SWIZZLE_ENABLE == 0 when 3469 * using SMEM. This can be done in the shader by clearing STRIDE with s_and. 3470 * That way the same descriptor can be used by both SMEM and VMEM. 3471 * 3472 * GFX9: 3473 * - For SMEM and STRIDE == 0, it's in byte units. 3474 * - For SMEM and STRIDE != 0, it's in units of STRIDE. 3475 * - For VMEM and inst.IDXEN == 0 or STRIDE == 0, it's in byte units. 3476 * - For VMEM and inst.IDXEN == 1 and STRIDE != 0, it's in units of STRIDE. 3477 */ 3478 if (screen->info.chip_class >= GFX9) 3479 /* When vindex == 0, LLVM sets IDXEN = 0, thus changing units 3480 * from STRIDE to bytes. This works around it by setting 3481 * NUM_RECORDS to at least the size of one element, so that 3482 * the first element is readable when IDXEN == 0. 3483 * 3484 * TODO: Fix this in LLVM, but do we need a new intrinsic where 3485 * IDXEN is enforced? 3486 */ 3487 num_records = num_records ? MAX2(num_records, stride) : 0; 3488 else if (screen->info.chip_class == VI) 3489 num_records *= stride; 3490 3491 state[4] = 0; 3492 state[5] = S_008F04_STRIDE(stride); 3493 state[6] = num_records; 3494 state[7] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) | 3495 S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) | 3496 S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) | 3497 S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) | 3498 S_008F0C_NUM_FORMAT(num_format) | 3499 S_008F0C_DATA_FORMAT(data_format); 3500 } 3501 3502 static unsigned gfx9_border_color_swizzle(const unsigned char swizzle[4]) 3503 { 3504 unsigned bc_swizzle = V_008F20_BC_SWIZZLE_XYZW; 3505 3506 if (swizzle[3] == PIPE_SWIZZLE_X) { 3507 /* For the pre-defined border color values (white, opaque 3508 * black, transparent black), the only thing that matters is 3509 * that the alpha channel winds up in the correct place 3510 * (because the RGB channels are all the same) so either of 3511 * these enumerations will work. 3512 */ 3513 if (swizzle[2] == PIPE_SWIZZLE_Y) 3514 bc_swizzle = V_008F20_BC_SWIZZLE_WZYX; 3515 else 3516 bc_swizzle = V_008F20_BC_SWIZZLE_WXYZ; 3517 } else if (swizzle[0] == PIPE_SWIZZLE_X) { 3518 if (swizzle[1] == PIPE_SWIZZLE_Y) 3519 bc_swizzle = V_008F20_BC_SWIZZLE_XYZW; 3520 else 3521 bc_swizzle = V_008F20_BC_SWIZZLE_XWYZ; 3522 } else if (swizzle[1] == PIPE_SWIZZLE_X) { 3523 bc_swizzle = V_008F20_BC_SWIZZLE_YXWZ; 3524 } else if (swizzle[2] == PIPE_SWIZZLE_X) { 3525 bc_swizzle = V_008F20_BC_SWIZZLE_ZYXW; 3526 } 3527 3528 return bc_swizzle; 3529 } 3530 3531 /** 3532 * Build the sampler view descriptor for a texture. 3533 */ 3534 void 3535 si_make_texture_descriptor(struct si_screen *screen, 3536 struct r600_texture *tex, 3537 bool sampler, 3538 enum pipe_texture_target target, 3539 enum pipe_format pipe_format, 3540 const unsigned char state_swizzle[4], 3541 unsigned first_level, unsigned last_level, 3542 unsigned first_layer, unsigned last_layer, 3543 unsigned width, unsigned height, unsigned depth, 3544 uint32_t *state, 3545 uint32_t *fmask_state) 3546 { 3547 struct pipe_resource *res = &tex->resource.b.b; 3548 const struct util_format_description *desc; 3549 unsigned char swizzle[4]; 3550 int first_non_void; 3551 unsigned num_format, data_format, type; 3552 uint64_t va; 3553 3554 desc = util_format_description(pipe_format); 3555 3556 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { 3557 const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0}; 3558 const unsigned char swizzle_yyyy[4] = {1, 1, 1, 1}; 3559 const unsigned char swizzle_wwww[4] = {3, 3, 3, 3}; 3560 3561 switch (pipe_format) { 3562 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 3563 case PIPE_FORMAT_X32_S8X24_UINT: 3564 case PIPE_FORMAT_X8Z24_UNORM: 3565 util_format_compose_swizzles(swizzle_yyyy, state_swizzle, swizzle); 3566 break; 3567 case PIPE_FORMAT_X24S8_UINT: 3568 /* 3569 * X24S8 is implemented as an 8_8_8_8 data format, to 3570 * fix texture gathers. This affects at least 3571 * GL45-CTS.texture_cube_map_array.sampling on VI. 3572 */ 3573 util_format_compose_swizzles(swizzle_wwww, state_swizzle, swizzle); 3574 break; 3575 default: 3576 util_format_compose_swizzles(swizzle_xxxx, state_swizzle, swizzle); 3577 } 3578 } else { 3579 util_format_compose_swizzles(desc->swizzle, state_swizzle, swizzle); 3580 } 3581 3582 first_non_void = util_format_get_first_non_void_channel(pipe_format); 3583 3584 switch (pipe_format) { 3585 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 3586 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 3587 break; 3588 default: 3589 if (first_non_void < 0) { 3590 if (util_format_is_compressed(pipe_format)) { 3591 switch (pipe_format) { 3592 case PIPE_FORMAT_DXT1_SRGB: 3593 case PIPE_FORMAT_DXT1_SRGBA: 3594 case PIPE_FORMAT_DXT3_SRGBA: 3595 case PIPE_FORMAT_DXT5_SRGBA: 3596 case PIPE_FORMAT_BPTC_SRGBA: 3597 case PIPE_FORMAT_ETC2_SRGB8: 3598 case PIPE_FORMAT_ETC2_SRGB8A1: 3599 case PIPE_FORMAT_ETC2_SRGBA8: 3600 num_format = V_008F14_IMG_NUM_FORMAT_SRGB; 3601 break; 3602 case PIPE_FORMAT_RGTC1_SNORM: 3603 case PIPE_FORMAT_LATC1_SNORM: 3604 case PIPE_FORMAT_RGTC2_SNORM: 3605 case PIPE_FORMAT_LATC2_SNORM: 3606 case PIPE_FORMAT_ETC2_R11_SNORM: 3607 case PIPE_FORMAT_ETC2_RG11_SNORM: 3608 /* implies float, so use SNORM/UNORM to determine 3609 whether data is signed or not */ 3610 case PIPE_FORMAT_BPTC_RGB_FLOAT: 3611 num_format = V_008F14_IMG_NUM_FORMAT_SNORM; 3612 break; 3613 default: 3614 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 3615 break; 3616 } 3617 } else if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) { 3618 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 3619 } else { 3620 num_format = V_008F14_IMG_NUM_FORMAT_FLOAT; 3621 } 3622 } else if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) { 3623 num_format = V_008F14_IMG_NUM_FORMAT_SRGB; 3624 } else { 3625 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 3626 3627 switch (desc->channel[first_non_void].type) { 3628 case UTIL_FORMAT_TYPE_FLOAT: 3629 num_format = V_008F14_IMG_NUM_FORMAT_FLOAT; 3630 break; 3631 case UTIL_FORMAT_TYPE_SIGNED: 3632 if (desc->channel[first_non_void].normalized) 3633 num_format = V_008F14_IMG_NUM_FORMAT_SNORM; 3634 else if (desc->channel[first_non_void].pure_integer) 3635 num_format = V_008F14_IMG_NUM_FORMAT_SINT; 3636 else 3637 num_format = V_008F14_IMG_NUM_FORMAT_SSCALED; 3638 break; 3639 case UTIL_FORMAT_TYPE_UNSIGNED: 3640 if (desc->channel[first_non_void].normalized) 3641 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 3642 else if (desc->channel[first_non_void].pure_integer) 3643 num_format = V_008F14_IMG_NUM_FORMAT_UINT; 3644 else 3645 num_format = V_008F14_IMG_NUM_FORMAT_USCALED; 3646 } 3647 } 3648 } 3649 3650 data_format = si_translate_texformat(&screen->b, pipe_format, desc, first_non_void); 3651 if (data_format == ~0) { 3652 data_format = 0; 3653 } 3654 3655 /* S8 with Z32 HTILE needs a special format. */ 3656 if (screen->info.chip_class >= GFX9 && 3657 pipe_format == PIPE_FORMAT_S8_UINT && 3658 tex->tc_compatible_htile) 3659 data_format = V_008F14_IMG_DATA_FORMAT_S8_32; 3660 3661 if (!sampler && 3662 (res->target == PIPE_TEXTURE_CUBE || 3663 res->target == PIPE_TEXTURE_CUBE_ARRAY || 3664 (screen->info.chip_class <= VI && 3665 res->target == PIPE_TEXTURE_3D))) { 3666 /* For the purpose of shader images, treat cube maps and 3D 3667 * textures as 2D arrays. For 3D textures, the address 3668 * calculations for mipmaps are different, so we rely on the 3669 * caller to effectively disable mipmaps. 3670 */ 3671 type = V_008F1C_SQ_RSRC_IMG_2D_ARRAY; 3672 3673 assert(res->target != PIPE_TEXTURE_3D || (first_level == 0 && last_level == 0)); 3674 } else { 3675 type = si_tex_dim(screen, tex, target, res->nr_samples); 3676 } 3677 3678 if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) { 3679 height = 1; 3680 depth = res->array_size; 3681 } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY || 3682 type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) { 3683 if (sampler || res->target != PIPE_TEXTURE_3D) 3684 depth = res->array_size; 3685 } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE) 3686 depth = res->array_size / 6; 3687 3688 state[0] = 0; 3689 state[1] = (S_008F14_DATA_FORMAT_GFX6(data_format) | 3690 S_008F14_NUM_FORMAT_GFX6(num_format)); 3691 state[2] = (S_008F18_WIDTH(width - 1) | 3692 S_008F18_HEIGHT(height - 1) | 3693 S_008F18_PERF_MOD(4)); 3694 state[3] = (S_008F1C_DST_SEL_X(si_map_swizzle(swizzle[0])) | 3695 S_008F1C_DST_SEL_Y(si_map_swizzle(swizzle[1])) | 3696 S_008F1C_DST_SEL_Z(si_map_swizzle(swizzle[2])) | 3697 S_008F1C_DST_SEL_W(si_map_swizzle(swizzle[3])) | 3698 S_008F1C_BASE_LEVEL(res->nr_samples > 1 ? 3699 0 : first_level) | 3700 S_008F1C_LAST_LEVEL(res->nr_samples > 1 ? 3701 util_logbase2(res->nr_samples) : 3702 last_level) | 3703 S_008F1C_TYPE(type)); 3704 state[4] = 0; 3705 state[5] = S_008F24_BASE_ARRAY(first_layer); 3706 state[6] = 0; 3707 state[7] = 0; 3708 3709 if (screen->info.chip_class >= GFX9) { 3710 unsigned bc_swizzle = gfx9_border_color_swizzle(desc->swizzle); 3711 3712 /* Depth is the the last accessible layer on Gfx9. 3713 * The hw doesn't need to know the total number of layers. 3714 */ 3715 if (type == V_008F1C_SQ_RSRC_IMG_3D) 3716 state[4] |= S_008F20_DEPTH(depth - 1); 3717 else 3718 state[4] |= S_008F20_DEPTH(last_layer); 3719 3720 state[4] |= S_008F20_BC_SWIZZLE(bc_swizzle); 3721 state[5] |= S_008F24_MAX_MIP(res->nr_samples > 1 ? 3722 util_logbase2(res->nr_samples) : 3723 tex->resource.b.b.last_level); 3724 } else { 3725 state[3] |= S_008F1C_POW2_PAD(res->last_level > 0); 3726 state[4] |= S_008F20_DEPTH(depth - 1); 3727 state[5] |= S_008F24_LAST_ARRAY(last_layer); 3728 } 3729 3730 if (tex->dcc_offset) { 3731 unsigned swap = si_translate_colorswap(pipe_format, false); 3732 3733 state[6] = S_008F28_ALPHA_IS_ON_MSB(swap <= 1); 3734 } else { 3735 /* The last dword is unused by hw. The shader uses it to clear 3736 * bits in the first dword of sampler state. 3737 */ 3738 if (screen->info.chip_class <= CIK && res->nr_samples <= 1) { 3739 if (first_level == last_level) 3740 state[7] = C_008F30_MAX_ANISO_RATIO; 3741 else 3742 state[7] = 0xffffffff; 3743 } 3744 } 3745 3746 /* Initialize the sampler view for FMASK. */ 3747 if (tex->fmask.size) { 3748 uint32_t data_format, num_format; 3749 3750 va = tex->resource.gpu_address + tex->fmask.offset; 3751 3752 if (screen->info.chip_class >= GFX9) { 3753 data_format = V_008F14_IMG_DATA_FORMAT_FMASK; 3754 switch (res->nr_samples) { 3755 case 2: 3756 num_format = V_008F14_IMG_FMASK_8_2_2; 3757 break; 3758 case 4: 3759 num_format = V_008F14_IMG_FMASK_8_4_4; 3760 break; 3761 case 8: 3762 num_format = V_008F14_IMG_FMASK_32_8_8; 3763 break; 3764 default: 3765 unreachable("invalid nr_samples"); 3766 } 3767 } else { 3768 switch (res->nr_samples) { 3769 case 2: 3770 data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2; 3771 break; 3772 case 4: 3773 data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4; 3774 break; 3775 case 8: 3776 data_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8; 3777 break; 3778 default: 3779 unreachable("invalid nr_samples"); 3780 } 3781 num_format = V_008F14_IMG_NUM_FORMAT_UINT; 3782 } 3783 3784 fmask_state[0] = (va >> 8) | tex->fmask.tile_swizzle; 3785 fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) | 3786 S_008F14_DATA_FORMAT_GFX6(data_format) | 3787 S_008F14_NUM_FORMAT_GFX6(num_format); 3788 fmask_state[2] = S_008F18_WIDTH(width - 1) | 3789 S_008F18_HEIGHT(height - 1); 3790 fmask_state[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) | 3791 S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) | 3792 S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) | 3793 S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) | 3794 S_008F1C_TYPE(si_tex_dim(screen, tex, target, 0)); 3795 fmask_state[4] = 0; 3796 fmask_state[5] = S_008F24_BASE_ARRAY(first_layer); 3797 fmask_state[6] = 0; 3798 fmask_state[7] = 0; 3799 3800 if (screen->info.chip_class >= GFX9) { 3801 fmask_state[3] |= S_008F1C_SW_MODE(tex->surface.u.gfx9.fmask.swizzle_mode); 3802 fmask_state[4] |= S_008F20_DEPTH(last_layer) | 3803 S_008F20_PITCH_GFX9(tex->surface.u.gfx9.fmask.epitch); 3804 fmask_state[5] |= S_008F24_META_PIPE_ALIGNED(tex->surface.u.gfx9.cmask.pipe_aligned) | 3805 S_008F24_META_RB_ALIGNED(tex->surface.u.gfx9.cmask.rb_aligned); 3806 } else { 3807 fmask_state[3] |= S_008F1C_TILING_INDEX(tex->fmask.tile_mode_index); 3808 fmask_state[4] |= S_008F20_DEPTH(depth - 1) | 3809 S_008F20_PITCH_GFX6(tex->fmask.pitch_in_pixels - 1); 3810 fmask_state[5] |= S_008F24_LAST_ARRAY(last_layer); 3811 } 3812 } 3813 } 3814 3815 /** 3816 * Create a sampler view. 3817 * 3818 * @param ctx context 3819 * @param texture texture 3820 * @param state sampler view template 3821 * @param width0 width0 override (for compressed textures as int) 3822 * @param height0 height0 override (for compressed textures as int) 3823 * @param force_level set the base address to the level (for compressed textures) 3824 */ 3825 struct pipe_sampler_view * 3826 si_create_sampler_view_custom(struct pipe_context *ctx, 3827 struct pipe_resource *texture, 3828 const struct pipe_sampler_view *state, 3829 unsigned width0, unsigned height0, 3830 unsigned force_level) 3831 { 3832 struct si_context *sctx = (struct si_context*)ctx; 3833 struct si_sampler_view *view = CALLOC_STRUCT(si_sampler_view); 3834 struct r600_texture *tmp = (struct r600_texture*)texture; 3835 unsigned base_level, first_level, last_level; 3836 unsigned char state_swizzle[4]; 3837 unsigned height, depth, width; 3838 unsigned last_layer = state->u.tex.last_layer; 3839 enum pipe_format pipe_format; 3840 const struct legacy_surf_level *surflevel; 3841 3842 if (!view) 3843 return NULL; 3844 3845 /* initialize base object */ 3846 view->base = *state; 3847 view->base.texture = NULL; 3848 view->base.reference.count = 1; 3849 view->base.context = ctx; 3850 3851 assert(texture); 3852 pipe_resource_reference(&view->base.texture, texture); 3853 3854 if (state->format == PIPE_FORMAT_X24S8_UINT || 3855 state->format == PIPE_FORMAT_S8X24_UINT || 3856 state->format == PIPE_FORMAT_X32_S8X24_UINT || 3857 state->format == PIPE_FORMAT_S8_UINT) 3858 view->is_stencil_sampler = true; 3859 3860 /* Buffer resource. */ 3861 if (texture->target == PIPE_BUFFER) { 3862 si_make_buffer_descriptor(sctx->screen, 3863 (struct r600_resource *)texture, 3864 state->format, 3865 state->u.buf.offset, 3866 state->u.buf.size, 3867 view->state); 3868 return &view->base; 3869 } 3870 3871 state_swizzle[0] = state->swizzle_r; 3872 state_swizzle[1] = state->swizzle_g; 3873 state_swizzle[2] = state->swizzle_b; 3874 state_swizzle[3] = state->swizzle_a; 3875 3876 base_level = 0; 3877 first_level = state->u.tex.first_level; 3878 last_level = state->u.tex.last_level; 3879 width = width0; 3880 height = height0; 3881 depth = texture->depth0; 3882 3883 if (sctx->b.chip_class <= VI && force_level) { 3884 assert(force_level == first_level && 3885 force_level == last_level); 3886 base_level = force_level; 3887 first_level = 0; 3888 last_level = 0; 3889 width = u_minify(width, force_level); 3890 height = u_minify(height, force_level); 3891 depth = u_minify(depth, force_level); 3892 } 3893 3894 /* This is not needed if state trackers set last_layer correctly. */ 3895 if (state->target == PIPE_TEXTURE_1D || 3896 state->target == PIPE_TEXTURE_2D || 3897 state->target == PIPE_TEXTURE_RECT || 3898 state->target == PIPE_TEXTURE_CUBE) 3899 last_layer = state->u.tex.first_layer; 3900 3901 /* Texturing with separate depth and stencil. */ 3902 pipe_format = state->format; 3903 3904 /* Depth/stencil texturing sometimes needs separate texture. */ 3905 if (tmp->is_depth && !si_can_sample_zs(tmp, view->is_stencil_sampler)) { 3906 if (!tmp->flushed_depth_texture && 3907 !si_init_flushed_depth_texture(ctx, texture, NULL)) { 3908 pipe_resource_reference(&view->base.texture, NULL); 3909 FREE(view); 3910 return NULL; 3911 } 3912 3913 assert(tmp->flushed_depth_texture); 3914 3915 /* Override format for the case where the flushed texture 3916 * contains only Z or only S. 3917 */ 3918 if (tmp->flushed_depth_texture->resource.b.b.format != tmp->resource.b.b.format) 3919 pipe_format = tmp->flushed_depth_texture->resource.b.b.format; 3920 3921 tmp = tmp->flushed_depth_texture; 3922 } 3923 3924 surflevel = tmp->surface.u.legacy.level; 3925 3926 if (tmp->db_compatible) { 3927 if (!view->is_stencil_sampler) 3928 pipe_format = tmp->db_render_format; 3929 3930 switch (pipe_format) { 3931 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 3932 pipe_format = PIPE_FORMAT_Z32_FLOAT; 3933 break; 3934 case PIPE_FORMAT_X8Z24_UNORM: 3935 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 3936 /* Z24 is always stored like this for DB 3937 * compatibility. 3938 */ 3939 pipe_format = PIPE_FORMAT_Z24X8_UNORM; 3940 break; 3941 case PIPE_FORMAT_X24S8_UINT: 3942 case PIPE_FORMAT_S8X24_UINT: 3943 case PIPE_FORMAT_X32_S8X24_UINT: 3944 pipe_format = PIPE_FORMAT_S8_UINT; 3945 surflevel = tmp->surface.u.legacy.stencil_level; 3946 break; 3947 default:; 3948 } 3949 } 3950 3951 view->dcc_incompatible = 3952 vi_dcc_formats_are_incompatible(texture, 3953 state->u.tex.first_level, 3954 state->format); 3955 3956 si_make_texture_descriptor(sctx->screen, tmp, true, 3957 state->target, pipe_format, state_swizzle, 3958 first_level, last_level, 3959 state->u.tex.first_layer, last_layer, 3960 width, height, depth, 3961 view->state, view->fmask_state); 3962 3963 unsigned num_format = G_008F14_NUM_FORMAT_GFX6(view->state[1]); 3964 view->is_integer = 3965 num_format == V_008F14_IMG_NUM_FORMAT_USCALED || 3966 num_format == V_008F14_IMG_NUM_FORMAT_SSCALED || 3967 num_format == V_008F14_IMG_NUM_FORMAT_UINT || 3968 num_format == V_008F14_IMG_NUM_FORMAT_SINT; 3969 view->base_level_info = &surflevel[base_level]; 3970 view->base_level = base_level; 3971 view->block_width = util_format_get_blockwidth(pipe_format); 3972 return &view->base; 3973 } 3974 3975 static struct pipe_sampler_view * 3976 si_create_sampler_view(struct pipe_context *ctx, 3977 struct pipe_resource *texture, 3978 const struct pipe_sampler_view *state) 3979 { 3980 return si_create_sampler_view_custom(ctx, texture, state, 3981 texture ? texture->width0 : 0, 3982 texture ? texture->height0 : 0, 0); 3983 } 3984 3985 static void si_sampler_view_destroy(struct pipe_context *ctx, 3986 struct pipe_sampler_view *state) 3987 { 3988 struct si_sampler_view *view = (struct si_sampler_view *)state; 3989 3990 pipe_resource_reference(&state->texture, NULL); 3991 FREE(view); 3992 } 3993 3994 static bool wrap_mode_uses_border_color(unsigned wrap, bool linear_filter) 3995 { 3996 return wrap == PIPE_TEX_WRAP_CLAMP_TO_BORDER || 3997 wrap == PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER || 3998 (linear_filter && 3999 (wrap == PIPE_TEX_WRAP_CLAMP || 4000 wrap == PIPE_TEX_WRAP_MIRROR_CLAMP)); 4001 } 4002 4003 static uint32_t si_translate_border_color(struct si_context *sctx, 4004 const struct pipe_sampler_state *state, 4005 const union pipe_color_union *color, 4006 bool is_integer) 4007 { 4008 bool linear_filter = state->min_img_filter != PIPE_TEX_FILTER_NEAREST || 4009 state->mag_img_filter != PIPE_TEX_FILTER_NEAREST; 4010 4011 if (!wrap_mode_uses_border_color(state->wrap_s, linear_filter) && 4012 !wrap_mode_uses_border_color(state->wrap_t, linear_filter) && 4013 !wrap_mode_uses_border_color(state->wrap_r, linear_filter)) 4014 return S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK); 4015 4016 #define simple_border_types(elt) \ 4017 do { \ 4018 if (color->elt[0] == 0 && color->elt[1] == 0 && \ 4019 color->elt[2] == 0 && color->elt[3] == 0) \ 4020 return S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK); \ 4021 if (color->elt[0] == 0 && color->elt[1] == 0 && \ 4022 color->elt[2] == 0 && color->elt[3] == 1) \ 4023 return S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK); \ 4024 if (color->elt[0] == 1 && color->elt[1] == 1 && \ 4025 color->elt[2] == 1 && color->elt[3] == 1) \ 4026 return S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE); \ 4027 } while (false) 4028 4029 if (is_integer) 4030 simple_border_types(ui); 4031 else 4032 simple_border_types(f); 4033 4034 #undef simple_border_types 4035 4036 int i; 4037 4038 /* Check if the border has been uploaded already. */ 4039 for (i = 0; i < sctx->border_color_count; i++) 4040 if (memcmp(&sctx->border_color_table[i], color, 4041 sizeof(*color)) == 0) 4042 break; 4043 4044 if (i >= SI_MAX_BORDER_COLORS) { 4045 /* Getting 4096 unique border colors is very unlikely. */ 4046 fprintf(stderr, "radeonsi: The border color table is full. " 4047 "Any new border colors will be just black. " 4048 "Please file a bug.\n"); 4049 return S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK); 4050 } 4051 4052 if (i == sctx->border_color_count) { 4053 /* Upload a new border color. */ 4054 memcpy(&sctx->border_color_table[i], color, 4055 sizeof(*color)); 4056 util_memcpy_cpu_to_le32(&sctx->border_color_map[i], 4057 color, sizeof(*color)); 4058 sctx->border_color_count++; 4059 } 4060 4061 return S_008F3C_BORDER_COLOR_PTR(i) | 4062 S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER); 4063 } 4064 4065 static inline int S_FIXED(float value, unsigned frac_bits) 4066 { 4067 return value * (1 << frac_bits); 4068 } 4069 4070 static inline unsigned si_tex_filter(unsigned filter, unsigned max_aniso) 4071 { 4072 if (filter == PIPE_TEX_FILTER_LINEAR) 4073 return max_aniso > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR 4074 : V_008F38_SQ_TEX_XY_FILTER_BILINEAR; 4075 else 4076 return max_aniso > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT 4077 : V_008F38_SQ_TEX_XY_FILTER_POINT; 4078 } 4079 4080 static inline unsigned si_tex_aniso_filter(unsigned filter) 4081 { 4082 if (filter < 2) 4083 return 0; 4084 if (filter < 4) 4085 return 1; 4086 if (filter < 8) 4087 return 2; 4088 if (filter < 16) 4089 return 3; 4090 return 4; 4091 } 4092 4093 static void *si_create_sampler_state(struct pipe_context *ctx, 4094 const struct pipe_sampler_state *state) 4095 { 4096 struct si_context *sctx = (struct si_context *)ctx; 4097 struct si_screen *sscreen = sctx->screen; 4098 struct si_sampler_state *rstate = CALLOC_STRUCT(si_sampler_state); 4099 unsigned max_aniso = sscreen->force_aniso >= 0 ? sscreen->force_aniso 4100 : state->max_anisotropy; 4101 unsigned max_aniso_ratio = si_tex_aniso_filter(max_aniso); 4102 union pipe_color_union clamped_border_color; 4103 4104 if (!rstate) { 4105 return NULL; 4106 } 4107 4108 #ifdef DEBUG 4109 rstate->magic = SI_SAMPLER_STATE_MAGIC; 4110 #endif 4111 rstate->val[0] = (S_008F30_CLAMP_X(si_tex_wrap(state->wrap_s)) | 4112 S_008F30_CLAMP_Y(si_tex_wrap(state->wrap_t)) | 4113 S_008F30_CLAMP_Z(si_tex_wrap(state->wrap_r)) | 4114 S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) | 4115 S_008F30_DEPTH_COMPARE_FUNC(si_tex_compare(state->compare_func)) | 4116 S_008F30_FORCE_UNNORMALIZED(!state->normalized_coords) | 4117 S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) | 4118 S_008F30_ANISO_BIAS(max_aniso_ratio) | 4119 S_008F30_DISABLE_CUBE_WRAP(!state->seamless_cube_map) | 4120 S_008F30_COMPAT_MODE(sctx->b.chip_class >= VI)); 4121 rstate->val[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 8)) | 4122 S_008F34_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 8)) | 4123 S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0)); 4124 rstate->val[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) | 4125 S_008F38_XY_MAG_FILTER(si_tex_filter(state->mag_img_filter, max_aniso)) | 4126 S_008F38_XY_MIN_FILTER(si_tex_filter(state->min_img_filter, max_aniso)) | 4127 S_008F38_MIP_FILTER(si_tex_mipfilter(state->min_mip_filter)) | 4128 S_008F38_MIP_POINT_PRECLAMP(0) | 4129 S_008F38_DISABLE_LSB_CEIL(sctx->b.chip_class <= VI) | 4130 S_008F38_FILTER_PREC_FIX(1) | 4131 S_008F38_ANISO_OVERRIDE(sctx->b.chip_class >= VI)); 4132 rstate->val[3] = si_translate_border_color(sctx, state, &state->border_color, false); 4133 4134 /* Create sampler resource for integer textures. */ 4135 memcpy(rstate->integer_val, rstate->val, sizeof(rstate->val)); 4136 rstate->integer_val[3] = si_translate_border_color(sctx, state, &state->border_color, true); 4137 4138 /* Create sampler resource for upgraded depth textures. */ 4139 memcpy(rstate->upgraded_depth_val, rstate->val, sizeof(rstate->val)); 4140 4141 for (unsigned i = 0; i < 4; ++i) { 4142 /* Use channel 0 on purpose, so that we can use OPAQUE_WHITE 4143 * when the border color is 1.0. */ 4144 clamped_border_color.f[i] = CLAMP(state->border_color.f[0], 0, 1); 4145 } 4146 4147 if (memcmp(&state->border_color, &clamped_border_color, sizeof(clamped_border_color)) == 0) 4148 rstate->upgraded_depth_val[3] |= S_008F3C_UPGRADED_DEPTH(1); 4149 else 4150 rstate->upgraded_depth_val[3] = 4151 si_translate_border_color(sctx, state, &clamped_border_color, false) | 4152 S_008F3C_UPGRADED_DEPTH(1); 4153 4154 return rstate; 4155 } 4156 4157 static void si_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask) 4158 { 4159 struct si_context *sctx = (struct si_context *)ctx; 4160 4161 if (sctx->sample_mask.sample_mask == (uint16_t)sample_mask) 4162 return; 4163 4164 sctx->sample_mask.sample_mask = sample_mask; 4165 si_mark_atom_dirty(sctx, &sctx->sample_mask.atom); 4166 } 4167 4168 static void si_emit_sample_mask(struct si_context *sctx, struct r600_atom *atom) 4169 { 4170 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 4171 unsigned mask = sctx->sample_mask.sample_mask; 4172 4173 /* Needed for line and polygon smoothing as well as for the Polaris 4174 * small primitive filter. We expect the state tracker to take care of 4175 * this for us. 4176 */ 4177 assert(mask == 0xffff || sctx->framebuffer.nr_samples > 1 || 4178 (mask & 1 && sctx->blitter->running)); 4179 4180 radeon_set_context_reg_seq(cs, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2); 4181 radeon_emit(cs, mask | (mask << 16)); 4182 radeon_emit(cs, mask | (mask << 16)); 4183 } 4184 4185 static void si_delete_sampler_state(struct pipe_context *ctx, void *state) 4186 { 4187 #ifdef DEBUG 4188 struct si_sampler_state *s = state; 4189 4190 assert(s->magic == SI_SAMPLER_STATE_MAGIC); 4191 s->magic = 0; 4192 #endif 4193 free(state); 4194 } 4195 4196 /* 4197 * Vertex elements & buffers 4198 */ 4199 4200 static void *si_create_vertex_elements(struct pipe_context *ctx, 4201 unsigned count, 4202 const struct pipe_vertex_element *elements) 4203 { 4204 struct si_screen *sscreen = (struct si_screen*)ctx->screen; 4205 struct si_vertex_elements *v = CALLOC_STRUCT(si_vertex_elements); 4206 bool used[SI_NUM_VERTEX_BUFFERS] = {}; 4207 int i; 4208 4209 assert(count <= SI_MAX_ATTRIBS); 4210 if (!v) 4211 return NULL; 4212 4213 v->count = count; 4214 v->desc_list_byte_size = align(count * 16, SI_CPDMA_ALIGNMENT); 4215 4216 for (i = 0; i < count; ++i) { 4217 const struct util_format_description *desc; 4218 const struct util_format_channel_description *channel; 4219 unsigned data_format, num_format; 4220 int first_non_void; 4221 unsigned vbo_index = elements[i].vertex_buffer_index; 4222 unsigned char swizzle[4]; 4223 4224 if (vbo_index >= SI_NUM_VERTEX_BUFFERS) { 4225 FREE(v); 4226 return NULL; 4227 } 4228 4229 if (elements[i].instance_divisor) { 4230 v->uses_instance_divisors = true; 4231 v->instance_divisors[i] = elements[i].instance_divisor; 4232 4233 if (v->instance_divisors[i] == 1) 4234 v->instance_divisor_is_one |= 1u << i; 4235 else 4236 v->instance_divisor_is_fetched |= 1u << i; 4237 } 4238 4239 if (!used[vbo_index]) { 4240 v->first_vb_use_mask |= 1 << i; 4241 used[vbo_index] = true; 4242 } 4243 4244 desc = util_format_description(elements[i].src_format); 4245 first_non_void = util_format_get_first_non_void_channel(elements[i].src_format); 4246 data_format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void); 4247 num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void); 4248 channel = first_non_void >= 0 ? &desc->channel[first_non_void] : NULL; 4249 memcpy(swizzle, desc->swizzle, sizeof(swizzle)); 4250 4251 v->format_size[i] = desc->block.bits / 8; 4252 v->src_offset[i] = elements[i].src_offset; 4253 v->vertex_buffer_index[i] = vbo_index; 4254 4255 /* The hardware always treats the 2-bit alpha channel as 4256 * unsigned, so a shader workaround is needed. The affected 4257 * chips are VI and older except Stoney (GFX8.1). 4258 */ 4259 if (data_format == V_008F0C_BUF_DATA_FORMAT_2_10_10_10 && 4260 sscreen->info.chip_class <= VI && 4261 sscreen->info.family != CHIP_STONEY) { 4262 if (num_format == V_008F0C_BUF_NUM_FORMAT_SNORM) { 4263 v->fix_fetch[i] = SI_FIX_FETCH_A2_SNORM; 4264 } else if (num_format == V_008F0C_BUF_NUM_FORMAT_SSCALED) { 4265 v->fix_fetch[i] = SI_FIX_FETCH_A2_SSCALED; 4266 } else if (num_format == V_008F0C_BUF_NUM_FORMAT_SINT) { 4267 /* This isn't actually used in OpenGL. */ 4268 v->fix_fetch[i] = SI_FIX_FETCH_A2_SINT; 4269 } 4270 } else if (channel && channel->type == UTIL_FORMAT_TYPE_FIXED) { 4271 if (desc->swizzle[3] == PIPE_SWIZZLE_1) 4272 v->fix_fetch[i] = SI_FIX_FETCH_RGBX_32_FIXED; 4273 else 4274 v->fix_fetch[i] = SI_FIX_FETCH_RGBA_32_FIXED; 4275 } else if (channel && channel->size == 32 && !channel->pure_integer) { 4276 if (channel->type == UTIL_FORMAT_TYPE_SIGNED) { 4277 if (channel->normalized) { 4278 if (desc->swizzle[3] == PIPE_SWIZZLE_1) 4279 v->fix_fetch[i] = SI_FIX_FETCH_RGBX_32_SNORM; 4280 else 4281 v->fix_fetch[i] = SI_FIX_FETCH_RGBA_32_SNORM; 4282 } else { 4283 v->fix_fetch[i] = SI_FIX_FETCH_RGBA_32_SSCALED; 4284 } 4285 } else if (channel->type == UTIL_FORMAT_TYPE_UNSIGNED) { 4286 if (channel->normalized) { 4287 if (desc->swizzle[3] == PIPE_SWIZZLE_1) 4288 v->fix_fetch[i] = SI_FIX_FETCH_RGBX_32_UNORM; 4289 else 4290 v->fix_fetch[i] = SI_FIX_FETCH_RGBA_32_UNORM; 4291 } else { 4292 v->fix_fetch[i] = SI_FIX_FETCH_RGBA_32_USCALED; 4293 } 4294 } 4295 } else if (channel && channel->size == 64 && 4296 channel->type == UTIL_FORMAT_TYPE_FLOAT) { 4297 switch (desc->nr_channels) { 4298 case 1: 4299 case 2: 4300 v->fix_fetch[i] = SI_FIX_FETCH_RG_64_FLOAT; 4301 swizzle[0] = PIPE_SWIZZLE_X; 4302 swizzle[1] = PIPE_SWIZZLE_Y; 4303 swizzle[2] = desc->nr_channels == 2 ? PIPE_SWIZZLE_Z : PIPE_SWIZZLE_0; 4304 swizzle[3] = desc->nr_channels == 2 ? PIPE_SWIZZLE_W : PIPE_SWIZZLE_0; 4305 break; 4306 case 3: 4307 v->fix_fetch[i] = SI_FIX_FETCH_RGB_64_FLOAT; 4308 swizzle[0] = PIPE_SWIZZLE_X; /* 3 loads */ 4309 swizzle[1] = PIPE_SWIZZLE_Y; 4310 swizzle[2] = PIPE_SWIZZLE_0; 4311 swizzle[3] = PIPE_SWIZZLE_0; 4312 break; 4313 case 4: 4314 v->fix_fetch[i] = SI_FIX_FETCH_RGBA_64_FLOAT; 4315 swizzle[0] = PIPE_SWIZZLE_X; /* 2 loads */ 4316 swizzle[1] = PIPE_SWIZZLE_Y; 4317 swizzle[2] = PIPE_SWIZZLE_Z; 4318 swizzle[3] = PIPE_SWIZZLE_W; 4319 break; 4320 default: 4321 assert(0); 4322 } 4323 } else if (channel && desc->nr_channels == 3) { 4324 assert(desc->swizzle[0] == PIPE_SWIZZLE_X); 4325 4326 if (channel->size == 8) { 4327 if (channel->pure_integer) 4328 v->fix_fetch[i] = SI_FIX_FETCH_RGB_8_INT; 4329 else 4330 v->fix_fetch[i] = SI_FIX_FETCH_RGB_8; 4331 } else if (channel->size == 16) { 4332 if (channel->pure_integer) 4333 v->fix_fetch[i] = SI_FIX_FETCH_RGB_16_INT; 4334 else 4335 v->fix_fetch[i] = SI_FIX_FETCH_RGB_16; 4336 } 4337 } 4338 4339 v->rsrc_word3[i] = S_008F0C_DST_SEL_X(si_map_swizzle(swizzle[0])) | 4340 S_008F0C_DST_SEL_Y(si_map_swizzle(swizzle[1])) | 4341 S_008F0C_DST_SEL_Z(si_map_swizzle(swizzle[2])) | 4342 S_008F0C_DST_SEL_W(si_map_swizzle(swizzle[3])) | 4343 S_008F0C_NUM_FORMAT(num_format) | 4344 S_008F0C_DATA_FORMAT(data_format); 4345 } 4346 return v; 4347 } 4348 4349 static void si_bind_vertex_elements(struct pipe_context *ctx, void *state) 4350 { 4351 struct si_context *sctx = (struct si_context *)ctx; 4352 struct si_vertex_elements *old = sctx->vertex_elements; 4353 struct si_vertex_elements *v = (struct si_vertex_elements*)state; 4354 4355 sctx->vertex_elements = v; 4356 sctx->vertex_buffers_dirty = true; 4357 4358 if (v && 4359 (!old || 4360 old->count != v->count || 4361 old->uses_instance_divisors != v->uses_instance_divisors || 4362 v->uses_instance_divisors || /* we don't check which divisors changed */ 4363 memcmp(old->fix_fetch, v->fix_fetch, sizeof(v->fix_fetch[0]) * v->count))) 4364 sctx->do_update_shaders = true; 4365 4366 if (v && v->instance_divisor_is_fetched) { 4367 struct pipe_constant_buffer cb; 4368 4369 cb.buffer = NULL; 4370 cb.user_buffer = v->instance_divisors; 4371 cb.buffer_offset = 0; 4372 cb.buffer_size = sizeof(uint32_t) * v->count; 4373 si_set_rw_buffer(sctx, SI_VS_CONST_INSTANCE_DIVISORS, &cb); 4374 } 4375 } 4376 4377 static void si_delete_vertex_element(struct pipe_context *ctx, void *state) 4378 { 4379 struct si_context *sctx = (struct si_context *)ctx; 4380 4381 if (sctx->vertex_elements == state) 4382 sctx->vertex_elements = NULL; 4383 FREE(state); 4384 } 4385 4386 static void si_set_vertex_buffers(struct pipe_context *ctx, 4387 unsigned start_slot, unsigned count, 4388 const struct pipe_vertex_buffer *buffers) 4389 { 4390 struct si_context *sctx = (struct si_context *)ctx; 4391 struct pipe_vertex_buffer *dst = sctx->vertex_buffer + start_slot; 4392 int i; 4393 4394 assert(start_slot + count <= ARRAY_SIZE(sctx->vertex_buffer)); 4395 4396 if (buffers) { 4397 for (i = 0; i < count; i++) { 4398 const struct pipe_vertex_buffer *src = buffers + i; 4399 struct pipe_vertex_buffer *dsti = dst + i; 4400 struct pipe_resource *buf = src->buffer.resource; 4401 4402 pipe_resource_reference(&dsti->buffer.resource, buf); 4403 dsti->buffer_offset = src->buffer_offset; 4404 dsti->stride = src->stride; 4405 si_context_add_resource_size(ctx, buf); 4406 if (buf) 4407 r600_resource(buf)->bind_history |= PIPE_BIND_VERTEX_BUFFER; 4408 } 4409 } else { 4410 for (i = 0; i < count; i++) { 4411 pipe_resource_reference(&dst[i].buffer.resource, NULL); 4412 } 4413 } 4414 sctx->vertex_buffers_dirty = true; 4415 } 4416 4417 /* 4418 * Misc 4419 */ 4420 4421 static void si_set_tess_state(struct pipe_context *ctx, 4422 const float default_outer_level[4], 4423 const float default_inner_level[2]) 4424 { 4425 struct si_context *sctx = (struct si_context *)ctx; 4426 struct pipe_constant_buffer cb; 4427 float array[8]; 4428 4429 memcpy(array, default_outer_level, sizeof(float) * 4); 4430 memcpy(array+4, default_inner_level, sizeof(float) * 2); 4431 4432 cb.buffer = NULL; 4433 cb.user_buffer = NULL; 4434 cb.buffer_size = sizeof(array); 4435 4436 si_upload_const_buffer(sctx, (struct r600_resource**)&cb.buffer, 4437 (void*)array, sizeof(array), 4438 &cb.buffer_offset); 4439 4440 si_set_rw_buffer(sctx, SI_HS_CONST_DEFAULT_TESS_LEVELS, &cb); 4441 pipe_resource_reference(&cb.buffer, NULL); 4442 } 4443 4444 static void si_texture_barrier(struct pipe_context *ctx, unsigned flags) 4445 { 4446 struct si_context *sctx = (struct si_context *)ctx; 4447 4448 si_update_fb_dirtiness_after_rendering(sctx); 4449 4450 /* Multisample surfaces are flushed in si_decompress_textures. */ 4451 if (sctx->framebuffer.nr_samples <= 1 && 4452 sctx->framebuffer.state.nr_cbufs) 4453 si_make_CB_shader_coherent(sctx, sctx->framebuffer.nr_samples, 4454 sctx->framebuffer.CB_has_shader_readable_metadata); 4455 } 4456 4457 /* This only ensures coherency for shader image/buffer stores. */ 4458 static void si_memory_barrier(struct pipe_context *ctx, unsigned flags) 4459 { 4460 struct si_context *sctx = (struct si_context *)ctx; 4461 4462 /* Subsequent commands must wait for all shader invocations to 4463 * complete. */ 4464 sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | 4465 SI_CONTEXT_CS_PARTIAL_FLUSH; 4466 4467 if (flags & PIPE_BARRIER_CONSTANT_BUFFER) 4468 sctx->b.flags |= SI_CONTEXT_INV_SMEM_L1 | 4469 SI_CONTEXT_INV_VMEM_L1; 4470 4471 if (flags & (PIPE_BARRIER_VERTEX_BUFFER | 4472 PIPE_BARRIER_SHADER_BUFFER | 4473 PIPE_BARRIER_TEXTURE | 4474 PIPE_BARRIER_IMAGE | 4475 PIPE_BARRIER_STREAMOUT_BUFFER | 4476 PIPE_BARRIER_GLOBAL_BUFFER)) { 4477 /* As far as I can tell, L1 contents are written back to L2 4478 * automatically at end of shader, but the contents of other 4479 * L1 caches might still be stale. */ 4480 sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1; 4481 } 4482 4483 if (flags & PIPE_BARRIER_INDEX_BUFFER) { 4484 /* Indices are read through TC L2 since VI. 4485 * L1 isn't used. 4486 */ 4487 if (sctx->screen->info.chip_class <= CIK) 4488 sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; 4489 } 4490 4491 /* MSAA color, any depth and any stencil are flushed in 4492 * si_decompress_textures when needed. 4493 */ 4494 if (flags & PIPE_BARRIER_FRAMEBUFFER && 4495 sctx->framebuffer.nr_samples <= 1 && 4496 sctx->framebuffer.state.nr_cbufs) { 4497 sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB; 4498 4499 if (sctx->b.chip_class <= VI) 4500 sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; 4501 } 4502 4503 /* Indirect buffers use TC L2 on GFX9, but not older hw. */ 4504 if (sctx->screen->info.chip_class <= VI && 4505 flags & PIPE_BARRIER_INDIRECT_BUFFER) 4506 sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; 4507 } 4508 4509 static void *si_create_blend_custom(struct si_context *sctx, unsigned mode) 4510 { 4511 struct pipe_blend_state blend; 4512 4513 memset(&blend, 0, sizeof(blend)); 4514 blend.independent_blend_enable = true; 4515 blend.rt[0].colormask = 0xf; 4516 return si_create_blend_state_mode(&sctx->b.b, &blend, mode); 4517 } 4518 4519 static void si_need_gfx_cs_space(struct pipe_context *ctx, unsigned num_dw, 4520 bool include_draw_vbo) 4521 { 4522 si_need_cs_space((struct si_context*)ctx); 4523 } 4524 4525 static void si_init_config(struct si_context *sctx); 4526 4527 void si_init_state_functions(struct si_context *sctx) 4528 { 4529 si_init_external_atom(sctx, &sctx->b.render_cond_atom, &sctx->atoms.s.render_cond); 4530 si_init_external_atom(sctx, &sctx->streamout.begin_atom, &sctx->atoms.s.streamout_begin); 4531 si_init_external_atom(sctx, &sctx->streamout.enable_atom, &sctx->atoms.s.streamout_enable); 4532 si_init_external_atom(sctx, &sctx->scissors.atom, &sctx->atoms.s.scissors); 4533 si_init_external_atom(sctx, &sctx->viewports.atom, &sctx->atoms.s.viewports); 4534 4535 si_init_atom(sctx, &sctx->framebuffer.atom, &sctx->atoms.s.framebuffer, si_emit_framebuffer_state); 4536 si_init_atom(sctx, &sctx->msaa_sample_locs.atom, &sctx->atoms.s.msaa_sample_locs, si_emit_msaa_sample_locs); 4537 si_init_atom(sctx, &sctx->db_render_state, &sctx->atoms.s.db_render_state, si_emit_db_render_state); 4538 si_init_atom(sctx, &sctx->dpbb_state, &sctx->atoms.s.dpbb_state, si_emit_dpbb_state); 4539 si_init_atom(sctx, &sctx->msaa_config, &sctx->atoms.s.msaa_config, si_emit_msaa_config); 4540 si_init_atom(sctx, &sctx->sample_mask.atom, &sctx->atoms.s.sample_mask, si_emit_sample_mask); 4541 si_init_atom(sctx, &sctx->cb_render_state, &sctx->atoms.s.cb_render_state, si_emit_cb_render_state); 4542 si_init_atom(sctx, &sctx->blend_color.atom, &sctx->atoms.s.blend_color, si_emit_blend_color); 4543 si_init_atom(sctx, &sctx->clip_regs, &sctx->atoms.s.clip_regs, si_emit_clip_regs); 4544 si_init_atom(sctx, &sctx->clip_state.atom, &sctx->atoms.s.clip_state, si_emit_clip_state); 4545 si_init_atom(sctx, &sctx->stencil_ref.atom, &sctx->atoms.s.stencil_ref, si_emit_stencil_ref); 4546 4547 sctx->b.b.create_blend_state = si_create_blend_state; 4548 sctx->b.b.bind_blend_state = si_bind_blend_state; 4549 sctx->b.b.delete_blend_state = si_delete_blend_state; 4550 sctx->b.b.set_blend_color = si_set_blend_color; 4551 4552 sctx->b.b.create_rasterizer_state = si_create_rs_state; 4553 sctx->b.b.bind_rasterizer_state = si_bind_rs_state; 4554 sctx->b.b.delete_rasterizer_state = si_delete_rs_state; 4555 4556 sctx->b.b.create_depth_stencil_alpha_state = si_create_dsa_state; 4557 sctx->b.b.bind_depth_stencil_alpha_state = si_bind_dsa_state; 4558 sctx->b.b.delete_depth_stencil_alpha_state = si_delete_dsa_state; 4559 4560 sctx->custom_dsa_flush = si_create_db_flush_dsa(sctx); 4561 sctx->custom_blend_resolve = si_create_blend_custom(sctx, V_028808_CB_RESOLVE); 4562 sctx->custom_blend_fmask_decompress = si_create_blend_custom(sctx, V_028808_CB_FMASK_DECOMPRESS); 4563 sctx->custom_blend_eliminate_fastclear = si_create_blend_custom(sctx, V_028808_CB_ELIMINATE_FAST_CLEAR); 4564 sctx->custom_blend_dcc_decompress = si_create_blend_custom(sctx, V_028808_CB_DCC_DECOMPRESS); 4565 4566 sctx->b.b.set_clip_state = si_set_clip_state; 4567 sctx->b.b.set_stencil_ref = si_set_stencil_ref; 4568 4569 sctx->b.b.set_framebuffer_state = si_set_framebuffer_state; 4570 4571 sctx->b.b.create_sampler_state = si_create_sampler_state; 4572 sctx->b.b.delete_sampler_state = si_delete_sampler_state; 4573 4574 sctx->b.b.create_sampler_view = si_create_sampler_view; 4575 sctx->b.b.sampler_view_destroy = si_sampler_view_destroy; 4576 4577 sctx->b.b.set_sample_mask = si_set_sample_mask; 4578 4579 sctx->b.b.create_vertex_elements_state = si_create_vertex_elements; 4580 sctx->b.b.bind_vertex_elements_state = si_bind_vertex_elements; 4581 sctx->b.b.delete_vertex_elements_state = si_delete_vertex_element; 4582 sctx->b.b.set_vertex_buffers = si_set_vertex_buffers; 4583 4584 sctx->b.b.texture_barrier = si_texture_barrier; 4585 sctx->b.b.memory_barrier = si_memory_barrier; 4586 sctx->b.b.set_min_samples = si_set_min_samples; 4587 sctx->b.b.set_tess_state = si_set_tess_state; 4588 4589 sctx->b.b.set_active_query_state = si_set_active_query_state; 4590 sctx->b.set_occlusion_query_state = si_set_occlusion_query_state; 4591 sctx->b.save_qbo_state = si_save_qbo_state; 4592 sctx->b.need_gfx_cs_space = si_need_gfx_cs_space; 4593 4594 sctx->b.b.draw_vbo = si_draw_vbo; 4595 4596 si_init_config(sctx); 4597 } 4598 4599 void si_init_screen_state_functions(struct si_screen *sscreen) 4600 { 4601 sscreen->b.is_format_supported = si_is_format_supported; 4602 } 4603 4604 static void si_set_grbm_gfx_index(struct si_context *sctx, 4605 struct si_pm4_state *pm4, unsigned value) 4606 { 4607 unsigned reg = sctx->b.chip_class >= CIK ? R_030800_GRBM_GFX_INDEX : 4608 R_00802C_GRBM_GFX_INDEX; 4609 si_pm4_set_reg(pm4, reg, value); 4610 } 4611 4612 static void si_set_grbm_gfx_index_se(struct si_context *sctx, 4613 struct si_pm4_state *pm4, unsigned se) 4614 { 4615 assert(se == ~0 || se < sctx->screen->info.max_se); 4616 si_set_grbm_gfx_index(sctx, pm4, 4617 (se == ~0 ? S_030800_SE_BROADCAST_WRITES(1) : 4618 S_030800_SE_INDEX(se)) | 4619 S_030800_SH_BROADCAST_WRITES(1) | 4620 S_030800_INSTANCE_BROADCAST_WRITES(1)); 4621 } 4622 4623 static void 4624 si_write_harvested_raster_configs(struct si_context *sctx, 4625 struct si_pm4_state *pm4, 4626 unsigned raster_config, 4627 unsigned raster_config_1) 4628 { 4629 unsigned sh_per_se = MAX2(sctx->screen->info.max_sh_per_se, 1); 4630 unsigned num_se = MAX2(sctx->screen->info.max_se, 1); 4631 unsigned rb_mask = sctx->screen->info.enabled_rb_mask; 4632 unsigned num_rb = MIN2(sctx->screen->info.num_render_backends, 16); 4633 unsigned rb_per_pkr = MIN2(num_rb / num_se / sh_per_se, 2); 4634 unsigned rb_per_se = num_rb / num_se; 4635 unsigned se_mask[4]; 4636 unsigned se; 4637 4638 se_mask[0] = ((1 << rb_per_se) - 1); 4639 se_mask[1] = (se_mask[0] << rb_per_se); 4640 se_mask[2] = (se_mask[1] << rb_per_se); 4641 se_mask[3] = (se_mask[2] << rb_per_se); 4642 4643 se_mask[0] &= rb_mask; 4644 se_mask[1] &= rb_mask; 4645 se_mask[2] &= rb_mask; 4646 se_mask[3] &= rb_mask; 4647 4648 assert(num_se == 1 || num_se == 2 || num_se == 4); 4649 assert(sh_per_se == 1 || sh_per_se == 2); 4650 assert(rb_per_pkr == 1 || rb_per_pkr == 2); 4651 4652 /* XXX: I can't figure out what the *_XSEL and *_YSEL 4653 * fields are for, so I'm leaving them as their default 4654 * values. */ 4655 4656 for (se = 0; se < num_se; se++) { 4657 unsigned raster_config_se = raster_config; 4658 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se); 4659 unsigned pkr1_mask = pkr0_mask << rb_per_pkr; 4660 int idx = (se / 2) * 2; 4661 4662 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) { 4663 raster_config_se &= C_028350_SE_MAP; 4664 4665 if (!se_mask[idx]) { 4666 raster_config_se |= 4667 S_028350_SE_MAP(V_028350_RASTER_CONFIG_SE_MAP_3); 4668 } else { 4669 raster_config_se |= 4670 S_028350_SE_MAP(V_028350_RASTER_CONFIG_SE_MAP_0); 4671 } 4672 } 4673 4674 pkr0_mask &= rb_mask; 4675 pkr1_mask &= rb_mask; 4676 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) { 4677 raster_config_se &= C_028350_PKR_MAP; 4678 4679 if (!pkr0_mask) { 4680 raster_config_se |= 4681 S_028350_PKR_MAP(V_028350_RASTER_CONFIG_PKR_MAP_3); 4682 } else { 4683 raster_config_se |= 4684 S_028350_PKR_MAP(V_028350_RASTER_CONFIG_PKR_MAP_0); 4685 } 4686 } 4687 4688 if (rb_per_se >= 2) { 4689 unsigned rb0_mask = 1 << (se * rb_per_se); 4690 unsigned rb1_mask = rb0_mask << 1; 4691 4692 rb0_mask &= rb_mask; 4693 rb1_mask &= rb_mask; 4694 if (!rb0_mask || !rb1_mask) { 4695 raster_config_se &= C_028350_RB_MAP_PKR0; 4696 4697 if (!rb0_mask) { 4698 raster_config_se |= 4699 S_028350_RB_MAP_PKR0(V_028350_RASTER_CONFIG_RB_MAP_3); 4700 } else { 4701 raster_config_se |= 4702 S_028350_RB_MAP_PKR0(V_028350_RASTER_CONFIG_RB_MAP_0); 4703 } 4704 } 4705 4706 if (rb_per_se > 2) { 4707 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr); 4708 rb1_mask = rb0_mask << 1; 4709 rb0_mask &= rb_mask; 4710 rb1_mask &= rb_mask; 4711 if (!rb0_mask || !rb1_mask) { 4712 raster_config_se &= C_028350_RB_MAP_PKR1; 4713 4714 if (!rb0_mask) { 4715 raster_config_se |= 4716 S_028350_RB_MAP_PKR1(V_028350_RASTER_CONFIG_RB_MAP_3); 4717 } else { 4718 raster_config_se |= 4719 S_028350_RB_MAP_PKR1(V_028350_RASTER_CONFIG_RB_MAP_0); 4720 } 4721 } 4722 } 4723 } 4724 4725 si_set_grbm_gfx_index_se(sctx, pm4, se); 4726 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, raster_config_se); 4727 } 4728 si_set_grbm_gfx_index(sctx, pm4, ~0); 4729 4730 if (sctx->b.chip_class >= CIK) { 4731 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) || 4732 (!se_mask[2] && !se_mask[3]))) { 4733 raster_config_1 &= C_028354_SE_PAIR_MAP; 4734 4735 if (!se_mask[0] && !se_mask[1]) { 4736 raster_config_1 |= 4737 S_028354_SE_PAIR_MAP(V_028354_RASTER_CONFIG_SE_PAIR_MAP_3); 4738 } else { 4739 raster_config_1 |= 4740 S_028354_SE_PAIR_MAP(V_028354_RASTER_CONFIG_SE_PAIR_MAP_0); 4741 } 4742 } 4743 4744 si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1); 4745 } 4746 } 4747 4748 static void si_set_raster_config(struct si_context *sctx, struct si_pm4_state *pm4) 4749 { 4750 struct si_screen *sscreen = sctx->screen; 4751 unsigned num_rb = MIN2(sctx->screen->info.num_render_backends, 16); 4752 unsigned rb_mask = sctx->screen->info.enabled_rb_mask; 4753 unsigned raster_config, raster_config_1; 4754 4755 switch (sctx->b.family) { 4756 case CHIP_TAHITI: 4757 case CHIP_PITCAIRN: 4758 raster_config = 0x2a00126a; 4759 raster_config_1 = 0x00000000; 4760 break; 4761 case CHIP_VERDE: 4762 raster_config = 0x0000124a; 4763 raster_config_1 = 0x00000000; 4764 break; 4765 case CHIP_OLAND: 4766 raster_config = 0x00000082; 4767 raster_config_1 = 0x00000000; 4768 break; 4769 case CHIP_HAINAN: 4770 raster_config = 0x00000000; 4771 raster_config_1 = 0x00000000; 4772 break; 4773 case CHIP_BONAIRE: 4774 raster_config = 0x16000012; 4775 raster_config_1 = 0x00000000; 4776 break; 4777 case CHIP_HAWAII: 4778 raster_config = 0x3a00161a; 4779 raster_config_1 = 0x0000002e; 4780 break; 4781 case CHIP_FIJI: 4782 if (sscreen->info.cik_macrotile_mode_array[0] == 0x000000e8) { 4783 /* old kernels with old tiling config */ 4784 raster_config = 0x16000012; 4785 raster_config_1 = 0x0000002a; 4786 } else { 4787 raster_config = 0x3a00161a; 4788 raster_config_1 = 0x0000002e; 4789 } 4790 break; 4791 case CHIP_POLARIS10: 4792 raster_config = 0x16000012; 4793 raster_config_1 = 0x0000002a; 4794 break; 4795 case CHIP_POLARIS11: 4796 case CHIP_POLARIS12: 4797 raster_config = 0x16000012; 4798 raster_config_1 = 0x00000000; 4799 break; 4800 case CHIP_TONGA: 4801 raster_config = 0x16000012; 4802 raster_config_1 = 0x0000002a; 4803 break; 4804 case CHIP_ICELAND: 4805 if (num_rb == 1) 4806 raster_config = 0x00000000; 4807 else 4808 raster_config = 0x00000002; 4809 raster_config_1 = 0x00000000; 4810 break; 4811 case CHIP_CARRIZO: 4812 raster_config = 0x00000002; 4813 raster_config_1 = 0x00000000; 4814 break; 4815 case CHIP_KAVERI: 4816 /* KV should be 0x00000002, but that causes problems with radeon */ 4817 raster_config = 0x00000000; /* 0x00000002 */ 4818 raster_config_1 = 0x00000000; 4819 break; 4820 case CHIP_KABINI: 4821 case CHIP_MULLINS: 4822 case CHIP_STONEY: 4823 raster_config = 0x00000000; 4824 raster_config_1 = 0x00000000; 4825 break; 4826 default: 4827 fprintf(stderr, 4828 "radeonsi: Unknown GPU, using 0 for raster_config\n"); 4829 raster_config = 0x00000000; 4830 raster_config_1 = 0x00000000; 4831 } 4832 4833 if (!rb_mask || util_bitcount(rb_mask) >= num_rb) { 4834 /* Always use the default config when all backends are enabled 4835 * (or when we failed to determine the enabled backends). 4836 */ 4837 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 4838 raster_config); 4839 if (sctx->b.chip_class >= CIK) 4840 si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 4841 raster_config_1); 4842 } else { 4843 si_write_harvested_raster_configs(sctx, pm4, raster_config, raster_config_1); 4844 } 4845 } 4846 4847 static void si_init_config(struct si_context *sctx) 4848 { 4849 struct si_screen *sscreen = sctx->screen; 4850 uint64_t border_color_va = sctx->border_color_buffer->gpu_address; 4851 bool has_clear_state = sscreen->has_clear_state; 4852 struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state); 4853 4854 /* Only SI can disable CLEAR_STATE for now. */ 4855 assert(has_clear_state || sscreen->info.chip_class == SI); 4856 4857 if (!pm4) 4858 return; 4859 4860 si_pm4_cmd_begin(pm4, PKT3_CONTEXT_CONTROL); 4861 si_pm4_cmd_add(pm4, CONTEXT_CONTROL_LOAD_ENABLE(1)); 4862 si_pm4_cmd_add(pm4, CONTEXT_CONTROL_SHADOW_ENABLE(1)); 4863 si_pm4_cmd_end(pm4, false); 4864 4865 if (has_clear_state) { 4866 si_pm4_cmd_begin(pm4, PKT3_CLEAR_STATE); 4867 si_pm4_cmd_add(pm4, 0); 4868 si_pm4_cmd_end(pm4, false); 4869 } 4870 4871 if (sctx->b.chip_class <= VI) 4872 si_set_raster_config(sctx, pm4); 4873 4874 si_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64)); 4875 if (!has_clear_state) 4876 si_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0)); 4877 4878 /* FIXME calculate these values somehow ??? */ 4879 if (sctx->b.chip_class <= VI) { 4880 si_pm4_set_reg(pm4, R_028A54_VGT_GS_PER_ES, SI_GS_PER_ES); 4881 si_pm4_set_reg(pm4, R_028A58_VGT_ES_PER_GS, 0x40); 4882 } 4883 4884 if (!has_clear_state) { 4885 si_pm4_set_reg(pm4, R_028A5C_VGT_GS_PER_VS, 0x2); 4886 si_pm4_set_reg(pm4, R_028A8C_VGT_PRIMITIVEID_RESET, 0x0); 4887 si_pm4_set_reg(pm4, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0); 4888 } 4889 4890 si_pm4_set_reg(pm4, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 1); 4891 if (!has_clear_state) 4892 si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0x0); 4893 if (sctx->b.chip_class < CIK) 4894 si_pm4_set_reg(pm4, R_008A14_PA_CL_ENHANCE, S_008A14_NUM_CLIP_SEQ(3) | 4895 S_008A14_CLIP_VTX_REORDER_ENA(1)); 4896 4897 si_pm4_set_reg(pm4, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 0x76543210); 4898 si_pm4_set_reg(pm4, R_028BD8_PA_SC_CENTROID_PRIORITY_1, 0xfedcba98); 4899 4900 if (!has_clear_state) 4901 si_pm4_set_reg(pm4, R_02882C_PA_SU_PRIM_FILTER_CNTL, 0); 4902 4903 /* CLEAR_STATE doesn't clear these correctly on certain generations. 4904 * I don't know why. Deduced by trial and error. 4905 */ 4906 if (sctx->b.chip_class <= CIK) { 4907 si_pm4_set_reg(pm4, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0); 4908 si_pm4_set_reg(pm4, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1)); 4909 si_pm4_set_reg(pm4, R_028240_PA_SC_GENERIC_SCISSOR_TL, S_028240_WINDOW_OFFSET_DISABLE(1)); 4910 si_pm4_set_reg(pm4, R_028244_PA_SC_GENERIC_SCISSOR_BR, 4911 S_028244_BR_X(16384) | S_028244_BR_Y(16384)); 4912 si_pm4_set_reg(pm4, R_028030_PA_SC_SCREEN_SCISSOR_TL, 0); 4913 si_pm4_set_reg(pm4, R_028034_PA_SC_SCREEN_SCISSOR_BR, 4914 S_028034_BR_X(16384) | S_028034_BR_Y(16384)); 4915 } 4916 4917 if (!has_clear_state) { 4918 si_pm4_set_reg(pm4, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF); 4919 si_pm4_set_reg(pm4, R_028230_PA_SC_EDGERULE, 4920 S_028230_ER_TRI(0xA) | 4921 S_028230_ER_POINT(0xA) | 4922 S_028230_ER_RECT(0xA) | 4923 /* Required by DX10_DIAMOND_TEST_ENA: */ 4924 S_028230_ER_LINE_LR(0x1A) | 4925 S_028230_ER_LINE_RL(0x26) | 4926 S_028230_ER_LINE_TB(0xA) | 4927 S_028230_ER_LINE_BT(0xA)); 4928 /* PA_SU_HARDWARE_SCREEN_OFFSET must be 0 due to hw bug on SI */ 4929 si_pm4_set_reg(pm4, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0); 4930 si_pm4_set_reg(pm4, R_028820_PA_CL_NANINF_CNTL, 0); 4931 si_pm4_set_reg(pm4, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0); 4932 si_pm4_set_reg(pm4, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0); 4933 si_pm4_set_reg(pm4, R_028AC8_DB_PRELOAD_CONTROL, 0x0); 4934 si_pm4_set_reg(pm4, R_02800C_DB_RENDER_OVERRIDE, 0); 4935 } 4936 4937 if (sctx->b.chip_class >= GFX9) { 4938 si_pm4_set_reg(pm4, R_030920_VGT_MAX_VTX_INDX, ~0); 4939 si_pm4_set_reg(pm4, R_030924_VGT_MIN_VTX_INDX, 0); 4940 si_pm4_set_reg(pm4, R_030928_VGT_INDX_OFFSET, 0); 4941 } else { 4942 /* These registers, when written, also overwrite the CLEAR_STATE 4943 * context, so we can't rely on CLEAR_STATE setting them. 4944 * It would be an issue if there was another UMD changing them. 4945 */ 4946 si_pm4_set_reg(pm4, R_028400_VGT_MAX_VTX_INDX, ~0); 4947 si_pm4_set_reg(pm4, R_028404_VGT_MIN_VTX_INDX, 0); 4948 si_pm4_set_reg(pm4, R_028408_VGT_INDX_OFFSET, 0); 4949 } 4950 4951 if (sctx->b.chip_class >= CIK) { 4952 if (sctx->b.chip_class >= GFX9) { 4953 si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 4954 S_00B41C_CU_EN(0xffff) | S_00B41C_WAVE_LIMIT(0x3F)); 4955 } else { 4956 si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, 4957 S_00B51C_CU_EN(0xffff) | S_00B51C_WAVE_LIMIT(0x3F)); 4958 si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 4959 S_00B41C_WAVE_LIMIT(0x3F)); 4960 si_pm4_set_reg(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, 4961 S_00B31C_CU_EN(0xffff) | S_00B31C_WAVE_LIMIT(0x3F)); 4962 4963 /* If this is 0, Bonaire can hang even if GS isn't being used. 4964 * Other chips are unaffected. These are suboptimal values, 4965 * but we don't use on-chip GS. 4966 */ 4967 si_pm4_set_reg(pm4, R_028A44_VGT_GS_ONCHIP_CNTL, 4968 S_028A44_ES_VERTS_PER_SUBGRP(64) | 4969 S_028A44_GS_PRIMS_PER_SUBGRP(4)); 4970 } 4971 si_pm4_set_reg(pm4, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, 4972 S_00B21C_CU_EN(0xffff) | S_00B21C_WAVE_LIMIT(0x3F)); 4973 4974 /* Compute LATE_ALLOC_VS.LIMIT. */ 4975 unsigned num_cu_per_sh = sscreen->info.num_good_compute_units / 4976 (sscreen->info.max_se * 4977 sscreen->info.max_sh_per_se); 4978 unsigned late_alloc_limit; /* The limit is per SH. */ 4979 4980 if (sctx->b.family == CHIP_KABINI) { 4981 late_alloc_limit = 0; /* Potential hang on Kabini. */ 4982 } else if (num_cu_per_sh <= 4) { 4983 /* Too few available compute units per SH. Disallowing 4984 * VS to run on one CU could hurt us more than late VS 4985 * allocation would help. 4986 * 4987 * 2 is the highest safe number that allows us to keep 4988 * all CUs enabled. 4989 */ 4990 late_alloc_limit = 2; 4991 } else { 4992 /* This is a good initial value, allowing 1 late_alloc 4993 * wave per SIMD on num_cu - 2. 4994 */ 4995 late_alloc_limit = (num_cu_per_sh - 2) * 4; 4996 4997 /* The limit is 0-based, so 0 means 1. */ 4998 assert(late_alloc_limit > 0 && late_alloc_limit <= 64); 4999 late_alloc_limit -= 1; 5000 } 5001 5002 /* VS can't execute on one CU if the limit is > 2. */ 5003 si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, 5004 S_00B118_CU_EN(late_alloc_limit > 2 ? 0xfffe : 0xffff) | 5005 S_00B118_WAVE_LIMIT(0x3F)); 5006 si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, 5007 S_00B11C_LIMIT(late_alloc_limit)); 5008 si_pm4_set_reg(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, 5009 S_00B01C_CU_EN(0xffff) | S_00B01C_WAVE_LIMIT(0x3F)); 5010 } 5011 5012 if (sctx->b.chip_class >= VI) { 5013 unsigned vgt_tess_distribution; 5014 5015 vgt_tess_distribution = 5016 S_028B50_ACCUM_ISOLINE(32) | 5017 S_028B50_ACCUM_TRI(11) | 5018 S_028B50_ACCUM_QUAD(11) | 5019 S_028B50_DONUT_SPLIT(16); 5020 5021 /* Testing with Unigine Heaven extreme tesselation yielded best results 5022 * with TRAP_SPLIT = 3. 5023 */ 5024 if (sctx->b.family == CHIP_FIJI || 5025 sctx->b.family >= CHIP_POLARIS10) 5026 vgt_tess_distribution |= S_028B50_TRAP_SPLIT(3); 5027 5028 si_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION, vgt_tess_distribution); 5029 } else if (!has_clear_state) { 5030 si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14); 5031 si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 16); 5032 } 5033 5034 si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8); 5035 if (sctx->b.chip_class >= CIK) 5036 si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, border_color_va >> 40); 5037 si_pm4_add_bo(pm4, sctx->border_color_buffer, RADEON_USAGE_READ, 5038 RADEON_PRIO_BORDER_COLORS); 5039 5040 if (sctx->b.chip_class >= GFX9) { 5041 unsigned num_se = sscreen->info.max_se; 5042 unsigned pc_lines = 0; 5043 5044 switch (sctx->b.family) { 5045 case CHIP_VEGA10: 5046 pc_lines = 4096; 5047 break; 5048 case CHIP_RAVEN: 5049 pc_lines = 1024; 5050 break; 5051 default: 5052 assert(0); 5053 } 5054 5055 si_pm4_set_reg(pm4, R_028C48_PA_SC_BINNER_CNTL_1, 5056 S_028C48_MAX_ALLOC_COUNT(MIN2(128, pc_lines / (4 * num_se))) | 5057 S_028C48_MAX_PRIM_PER_BATCH(1023)); 5058 si_pm4_set_reg(pm4, R_028C4C_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL, 5059 S_028C4C_NULL_SQUAD_AA_MASK_ENABLE(1)); 5060 si_pm4_set_reg(pm4, R_030968_VGT_INSTANCE_BASE_ID, 0); 5061 } 5062 5063 si_pm4_upload_indirect_buffer(sctx, pm4); 5064 sctx->init_config = pm4; 5065 } 5066