1 /* 2 * Copyright 2012 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: 24 * Christian Knig <christian.koenig (at) amd.com> 25 */ 26 27 #include "si_pipe.h" 28 #include "sid.h" 29 #include "radeon/r600_cs.h" 30 #include "radeon/r600_query.h" 31 32 #include "util/u_dual_blend.h" 33 #include "util/u_format.h" 34 #include "util/u_format_s3tc.h" 35 #include "util/u_memory.h" 36 #include "util/u_resource.h" 37 38 /* Initialize an external atom (owned by ../radeon). */ 39 static void 40 si_init_external_atom(struct si_context *sctx, struct r600_atom *atom, 41 struct r600_atom **list_elem) 42 { 43 atom->id = list_elem - sctx->atoms.array + 1; 44 *list_elem = atom; 45 } 46 47 /* Initialize an atom owned by radeonsi. */ 48 void si_init_atom(struct si_context *sctx, struct r600_atom *atom, 49 struct r600_atom **list_elem, 50 void (*emit_func)(struct si_context *ctx, struct r600_atom *state)) 51 { 52 atom->emit = (void*)emit_func; 53 atom->id = list_elem - sctx->atoms.array + 1; /* index+1 in the atom array */ 54 *list_elem = atom; 55 } 56 57 static unsigned si_map_swizzle(unsigned swizzle) 58 { 59 switch (swizzle) { 60 case PIPE_SWIZZLE_Y: 61 return V_008F0C_SQ_SEL_Y; 62 case PIPE_SWIZZLE_Z: 63 return V_008F0C_SQ_SEL_Z; 64 case PIPE_SWIZZLE_W: 65 return V_008F0C_SQ_SEL_W; 66 case PIPE_SWIZZLE_0: 67 return V_008F0C_SQ_SEL_0; 68 case PIPE_SWIZZLE_1: 69 return V_008F0C_SQ_SEL_1; 70 default: /* PIPE_SWIZZLE_X */ 71 return V_008F0C_SQ_SEL_X; 72 } 73 } 74 75 static uint32_t S_FIXED(float value, uint32_t frac_bits) 76 { 77 return value * (1 << frac_bits); 78 } 79 80 /* 12.4 fixed-point */ 81 static unsigned si_pack_float_12p4(float x) 82 { 83 return x <= 0 ? 0 : 84 x >= 4096 ? 0xffff : x * 16; 85 } 86 87 /* 88 * Inferred framebuffer and blender state. 89 * 90 * CB_TARGET_MASK is emitted here to avoid a hang with dual source blending 91 * if there is not enough PS outputs. 92 */ 93 static void si_emit_cb_render_state(struct si_context *sctx, struct r600_atom *atom) 94 { 95 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 96 struct si_state_blend *blend = sctx->queued.named.blend; 97 /* CB_COLORn_INFO.FORMAT=INVALID should disable unbound colorbuffers, 98 * but you never know. */ 99 uint32_t cb_target_mask = sctx->framebuffer.colorbuf_enabled_4bit; 100 unsigned i; 101 102 if (blend) 103 cb_target_mask &= blend->cb_target_mask; 104 105 /* Avoid a hang that happens when dual source blending is enabled 106 * but there is not enough color outputs. This is undefined behavior, 107 * so disable color writes completely. 108 * 109 * Reproducible with Unigine Heaven 4.0 and drirc missing. 110 */ 111 if (blend && blend->dual_src_blend && 112 sctx->ps_shader.cso && 113 (sctx->ps_shader.cso->info.colors_written & 0x3) != 0x3) 114 cb_target_mask = 0; 115 116 radeon_set_context_reg(cs, R_028238_CB_TARGET_MASK, cb_target_mask); 117 118 /* STONEY-specific register settings. */ 119 if (sctx->b.family == CHIP_STONEY) { 120 unsigned spi_shader_col_format = 121 sctx->ps_shader.cso ? 122 sctx->ps_shader.current->key.part.ps.epilog.spi_shader_col_format : 0; 123 unsigned sx_ps_downconvert = 0; 124 unsigned sx_blend_opt_epsilon = 0; 125 unsigned sx_blend_opt_control = 0; 126 127 for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) { 128 struct r600_surface *surf = 129 (struct r600_surface*)sctx->framebuffer.state.cbufs[i]; 130 unsigned format, swap, spi_format, colormask; 131 bool has_alpha, has_rgb; 132 133 if (!surf) 134 continue; 135 136 format = G_028C70_FORMAT(surf->cb_color_info); 137 swap = G_028C70_COMP_SWAP(surf->cb_color_info); 138 spi_format = (spi_shader_col_format >> (i * 4)) & 0xf; 139 colormask = (cb_target_mask >> (i * 4)) & 0xf; 140 141 /* Set if RGB and A are present. */ 142 has_alpha = !G_028C74_FORCE_DST_ALPHA_1(surf->cb_color_attrib); 143 144 if (format == V_028C70_COLOR_8 || 145 format == V_028C70_COLOR_16 || 146 format == V_028C70_COLOR_32) 147 has_rgb = !has_alpha; 148 else 149 has_rgb = true; 150 151 /* Check the colormask and export format. */ 152 if (!(colormask & (PIPE_MASK_RGBA & ~PIPE_MASK_A))) 153 has_rgb = false; 154 if (!(colormask & PIPE_MASK_A)) 155 has_alpha = false; 156 157 if (spi_format == V_028714_SPI_SHADER_ZERO) { 158 has_rgb = false; 159 has_alpha = false; 160 } 161 162 /* Disable value checking for disabled channels. */ 163 if (!has_rgb) 164 sx_blend_opt_control |= S_02875C_MRT0_COLOR_OPT_DISABLE(1) << (i * 4); 165 if (!has_alpha) 166 sx_blend_opt_control |= S_02875C_MRT0_ALPHA_OPT_DISABLE(1) << (i * 4); 167 168 /* Enable down-conversion for 32bpp and smaller formats. */ 169 switch (format) { 170 case V_028C70_COLOR_8: 171 case V_028C70_COLOR_8_8: 172 case V_028C70_COLOR_8_8_8_8: 173 /* For 1 and 2-channel formats, use the superset thereof. */ 174 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR || 175 spi_format == V_028714_SPI_SHADER_UINT16_ABGR || 176 spi_format == V_028714_SPI_SHADER_SINT16_ABGR) { 177 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_8_8_8_8 << (i * 4); 178 sx_blend_opt_epsilon |= V_028758_8BIT_FORMAT << (i * 4); 179 } 180 break; 181 182 case V_028C70_COLOR_5_6_5: 183 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 184 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_5_6_5 << (i * 4); 185 sx_blend_opt_epsilon |= V_028758_6BIT_FORMAT << (i * 4); 186 } 187 break; 188 189 case V_028C70_COLOR_1_5_5_5: 190 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 191 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_1_5_5_5 << (i * 4); 192 sx_blend_opt_epsilon |= V_028758_5BIT_FORMAT << (i * 4); 193 } 194 break; 195 196 case V_028C70_COLOR_4_4_4_4: 197 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 198 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_4_4_4_4 << (i * 4); 199 sx_blend_opt_epsilon |= V_028758_4BIT_FORMAT << (i * 4); 200 } 201 break; 202 203 case V_028C70_COLOR_32: 204 if (swap == V_0280A0_SWAP_STD && 205 spi_format == V_028714_SPI_SHADER_32_R) 206 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_R << (i * 4); 207 else if (swap == V_0280A0_SWAP_ALT_REV && 208 spi_format == V_028714_SPI_SHADER_32_AR) 209 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_A << (i * 4); 210 break; 211 212 case V_028C70_COLOR_16: 213 case V_028C70_COLOR_16_16: 214 /* For 1-channel formats, use the superset thereof. */ 215 if (spi_format == V_028714_SPI_SHADER_UNORM16_ABGR || 216 spi_format == V_028714_SPI_SHADER_SNORM16_ABGR || 217 spi_format == V_028714_SPI_SHADER_UINT16_ABGR || 218 spi_format == V_028714_SPI_SHADER_SINT16_ABGR) { 219 if (swap == V_0280A0_SWAP_STD || 220 swap == V_0280A0_SWAP_STD_REV) 221 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_GR << (i * 4); 222 else 223 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_AR << (i * 4); 224 } 225 break; 226 227 case V_028C70_COLOR_10_11_11: 228 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 229 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_10_11_11 << (i * 4); 230 sx_blend_opt_epsilon |= V_028758_11BIT_FORMAT << (i * 4); 231 } 232 break; 233 234 case V_028C70_COLOR_2_10_10_10: 235 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 236 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_2_10_10_10 << (i * 4); 237 sx_blend_opt_epsilon |= V_028758_10BIT_FORMAT << (i * 4); 238 } 239 break; 240 } 241 } 242 243 if (sctx->screen->b.debug_flags & DBG_NO_RB_PLUS) { 244 sx_ps_downconvert = 0; 245 sx_blend_opt_epsilon = 0; 246 sx_blend_opt_control = 0; 247 } 248 249 radeon_set_context_reg_seq(cs, R_028754_SX_PS_DOWNCONVERT, 3); 250 radeon_emit(cs, sx_ps_downconvert); /* R_028754_SX_PS_DOWNCONVERT */ 251 radeon_emit(cs, sx_blend_opt_epsilon); /* R_028758_SX_BLEND_OPT_EPSILON */ 252 radeon_emit(cs, sx_blend_opt_control); /* R_02875C_SX_BLEND_OPT_CONTROL */ 253 } 254 } 255 256 /* 257 * Blender functions 258 */ 259 260 static uint32_t si_translate_blend_function(int blend_func) 261 { 262 switch (blend_func) { 263 case PIPE_BLEND_ADD: 264 return V_028780_COMB_DST_PLUS_SRC; 265 case PIPE_BLEND_SUBTRACT: 266 return V_028780_COMB_SRC_MINUS_DST; 267 case PIPE_BLEND_REVERSE_SUBTRACT: 268 return V_028780_COMB_DST_MINUS_SRC; 269 case PIPE_BLEND_MIN: 270 return V_028780_COMB_MIN_DST_SRC; 271 case PIPE_BLEND_MAX: 272 return V_028780_COMB_MAX_DST_SRC; 273 default: 274 R600_ERR("Unknown blend function %d\n", blend_func); 275 assert(0); 276 break; 277 } 278 return 0; 279 } 280 281 static uint32_t si_translate_blend_factor(int blend_fact) 282 { 283 switch (blend_fact) { 284 case PIPE_BLENDFACTOR_ONE: 285 return V_028780_BLEND_ONE; 286 case PIPE_BLENDFACTOR_SRC_COLOR: 287 return V_028780_BLEND_SRC_COLOR; 288 case PIPE_BLENDFACTOR_SRC_ALPHA: 289 return V_028780_BLEND_SRC_ALPHA; 290 case PIPE_BLENDFACTOR_DST_ALPHA: 291 return V_028780_BLEND_DST_ALPHA; 292 case PIPE_BLENDFACTOR_DST_COLOR: 293 return V_028780_BLEND_DST_COLOR; 294 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: 295 return V_028780_BLEND_SRC_ALPHA_SATURATE; 296 case PIPE_BLENDFACTOR_CONST_COLOR: 297 return V_028780_BLEND_CONSTANT_COLOR; 298 case PIPE_BLENDFACTOR_CONST_ALPHA: 299 return V_028780_BLEND_CONSTANT_ALPHA; 300 case PIPE_BLENDFACTOR_ZERO: 301 return V_028780_BLEND_ZERO; 302 case PIPE_BLENDFACTOR_INV_SRC_COLOR: 303 return V_028780_BLEND_ONE_MINUS_SRC_COLOR; 304 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: 305 return V_028780_BLEND_ONE_MINUS_SRC_ALPHA; 306 case PIPE_BLENDFACTOR_INV_DST_ALPHA: 307 return V_028780_BLEND_ONE_MINUS_DST_ALPHA; 308 case PIPE_BLENDFACTOR_INV_DST_COLOR: 309 return V_028780_BLEND_ONE_MINUS_DST_COLOR; 310 case PIPE_BLENDFACTOR_INV_CONST_COLOR: 311 return V_028780_BLEND_ONE_MINUS_CONSTANT_COLOR; 312 case PIPE_BLENDFACTOR_INV_CONST_ALPHA: 313 return V_028780_BLEND_ONE_MINUS_CONSTANT_ALPHA; 314 case PIPE_BLENDFACTOR_SRC1_COLOR: 315 return V_028780_BLEND_SRC1_COLOR; 316 case PIPE_BLENDFACTOR_SRC1_ALPHA: 317 return V_028780_BLEND_SRC1_ALPHA; 318 case PIPE_BLENDFACTOR_INV_SRC1_COLOR: 319 return V_028780_BLEND_INV_SRC1_COLOR; 320 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: 321 return V_028780_BLEND_INV_SRC1_ALPHA; 322 default: 323 R600_ERR("Bad blend factor %d not supported!\n", blend_fact); 324 assert(0); 325 break; 326 } 327 return 0; 328 } 329 330 static uint32_t si_translate_blend_opt_function(int blend_func) 331 { 332 switch (blend_func) { 333 case PIPE_BLEND_ADD: 334 return V_028760_OPT_COMB_ADD; 335 case PIPE_BLEND_SUBTRACT: 336 return V_028760_OPT_COMB_SUBTRACT; 337 case PIPE_BLEND_REVERSE_SUBTRACT: 338 return V_028760_OPT_COMB_REVSUBTRACT; 339 case PIPE_BLEND_MIN: 340 return V_028760_OPT_COMB_MIN; 341 case PIPE_BLEND_MAX: 342 return V_028760_OPT_COMB_MAX; 343 default: 344 return V_028760_OPT_COMB_BLEND_DISABLED; 345 } 346 } 347 348 static uint32_t si_translate_blend_opt_factor(int blend_fact, bool is_alpha) 349 { 350 switch (blend_fact) { 351 case PIPE_BLENDFACTOR_ZERO: 352 return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_ALL; 353 case PIPE_BLENDFACTOR_ONE: 354 return V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE; 355 case PIPE_BLENDFACTOR_SRC_COLOR: 356 return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0 357 : V_028760_BLEND_OPT_PRESERVE_C1_IGNORE_C0; 358 case PIPE_BLENDFACTOR_INV_SRC_COLOR: 359 return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1 360 : V_028760_BLEND_OPT_PRESERVE_C0_IGNORE_C1; 361 case PIPE_BLENDFACTOR_SRC_ALPHA: 362 return V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0; 363 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: 364 return V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1; 365 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: 366 return is_alpha ? V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE 367 : V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0; 368 default: 369 return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE; 370 } 371 } 372 373 /** 374 * Get rid of DST in the blend factors by commuting the operands: 375 * func(src * DST, dst * 0) ---> func(src * 0, dst * SRC) 376 */ 377 static void si_blend_remove_dst(unsigned *func, unsigned *src_factor, 378 unsigned *dst_factor, unsigned expected_dst, 379 unsigned replacement_src) 380 { 381 if (*src_factor == expected_dst && 382 *dst_factor == PIPE_BLENDFACTOR_ZERO) { 383 *src_factor = PIPE_BLENDFACTOR_ZERO; 384 *dst_factor = replacement_src; 385 386 /* Commuting the operands requires reversing subtractions. */ 387 if (*func == PIPE_BLEND_SUBTRACT) 388 *func = PIPE_BLEND_REVERSE_SUBTRACT; 389 else if (*func == PIPE_BLEND_REVERSE_SUBTRACT) 390 *func = PIPE_BLEND_SUBTRACT; 391 } 392 } 393 394 static bool si_blend_factor_uses_dst(unsigned factor) 395 { 396 return factor == PIPE_BLENDFACTOR_DST_COLOR || 397 factor == PIPE_BLENDFACTOR_DST_ALPHA || 398 factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || 399 factor == PIPE_BLENDFACTOR_INV_DST_ALPHA || 400 factor == PIPE_BLENDFACTOR_INV_DST_COLOR; 401 } 402 403 static void *si_create_blend_state_mode(struct pipe_context *ctx, 404 const struct pipe_blend_state *state, 405 unsigned mode) 406 { 407 struct si_context *sctx = (struct si_context*)ctx; 408 struct si_state_blend *blend = CALLOC_STRUCT(si_state_blend); 409 struct si_pm4_state *pm4 = &blend->pm4; 410 uint32_t sx_mrt_blend_opt[8] = {0}; 411 uint32_t color_control = 0; 412 413 if (!blend) 414 return NULL; 415 416 blend->alpha_to_coverage = state->alpha_to_coverage; 417 blend->alpha_to_one = state->alpha_to_one; 418 blend->dual_src_blend = util_blend_state_is_dual(state, 0); 419 420 if (state->logicop_enable) { 421 color_control |= S_028808_ROP3(state->logicop_func | (state->logicop_func << 4)); 422 } else { 423 color_control |= S_028808_ROP3(0xcc); 424 } 425 426 si_pm4_set_reg(pm4, R_028B70_DB_ALPHA_TO_MASK, 427 S_028B70_ALPHA_TO_MASK_ENABLE(state->alpha_to_coverage) | 428 S_028B70_ALPHA_TO_MASK_OFFSET0(2) | 429 S_028B70_ALPHA_TO_MASK_OFFSET1(2) | 430 S_028B70_ALPHA_TO_MASK_OFFSET2(2) | 431 S_028B70_ALPHA_TO_MASK_OFFSET3(2)); 432 433 if (state->alpha_to_coverage) 434 blend->need_src_alpha_4bit |= 0xf; 435 436 blend->cb_target_mask = 0; 437 for (int i = 0; i < 8; i++) { 438 /* state->rt entries > 0 only written if independent blending */ 439 const int j = state->independent_blend_enable ? i : 0; 440 441 unsigned eqRGB = state->rt[j].rgb_func; 442 unsigned srcRGB = state->rt[j].rgb_src_factor; 443 unsigned dstRGB = state->rt[j].rgb_dst_factor; 444 unsigned eqA = state->rt[j].alpha_func; 445 unsigned srcA = state->rt[j].alpha_src_factor; 446 unsigned dstA = state->rt[j].alpha_dst_factor; 447 448 unsigned srcRGB_opt, dstRGB_opt, srcA_opt, dstA_opt; 449 unsigned blend_cntl = 0; 450 451 sx_mrt_blend_opt[i] = 452 S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) | 453 S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED); 454 455 /* Only set dual source blending for MRT0 to avoid a hang. */ 456 if (i >= 1 && blend->dual_src_blend) { 457 /* Vulkan does this for dual source blending. */ 458 if (i == 1) 459 blend_cntl |= S_028780_ENABLE(1); 460 461 si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); 462 continue; 463 } 464 465 /* Only addition and subtraction equations are supported with 466 * dual source blending. 467 */ 468 if (blend->dual_src_blend && 469 (eqRGB == PIPE_BLEND_MIN || eqRGB == PIPE_BLEND_MAX || 470 eqA == PIPE_BLEND_MIN || eqA == PIPE_BLEND_MAX)) { 471 assert(!"Unsupported equation for dual source blending"); 472 si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); 473 continue; 474 } 475 476 /* cb_render_state will disable unused ones */ 477 blend->cb_target_mask |= (unsigned)state->rt[j].colormask << (4 * i); 478 479 if (!state->rt[j].colormask || !state->rt[j].blend_enable) { 480 si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); 481 continue; 482 } 483 484 /* Blending optimizations for Stoney. 485 * These transformations don't change the behavior. 486 * 487 * First, get rid of DST in the blend factors: 488 * func(src * DST, dst * 0) ---> func(src * 0, dst * SRC) 489 */ 490 si_blend_remove_dst(&eqRGB, &srcRGB, &dstRGB, 491 PIPE_BLENDFACTOR_DST_COLOR, 492 PIPE_BLENDFACTOR_SRC_COLOR); 493 si_blend_remove_dst(&eqA, &srcA, &dstA, 494 PIPE_BLENDFACTOR_DST_COLOR, 495 PIPE_BLENDFACTOR_SRC_COLOR); 496 si_blend_remove_dst(&eqA, &srcA, &dstA, 497 PIPE_BLENDFACTOR_DST_ALPHA, 498 PIPE_BLENDFACTOR_SRC_ALPHA); 499 500 /* Look up the ideal settings from tables. */ 501 srcRGB_opt = si_translate_blend_opt_factor(srcRGB, false); 502 dstRGB_opt = si_translate_blend_opt_factor(dstRGB, false); 503 srcA_opt = si_translate_blend_opt_factor(srcA, true); 504 dstA_opt = si_translate_blend_opt_factor(dstA, true); 505 506 /* Handle interdependencies. */ 507 if (si_blend_factor_uses_dst(srcRGB)) 508 dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE; 509 if (si_blend_factor_uses_dst(srcA)) 510 dstA_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE; 511 512 if (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE && 513 (dstRGB == PIPE_BLENDFACTOR_ZERO || 514 dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA || 515 dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE)) 516 dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0; 517 518 /* Set the final value. */ 519 sx_mrt_blend_opt[i] = 520 S_028760_COLOR_SRC_OPT(srcRGB_opt) | 521 S_028760_COLOR_DST_OPT(dstRGB_opt) | 522 S_028760_COLOR_COMB_FCN(si_translate_blend_opt_function(eqRGB)) | 523 S_028760_ALPHA_SRC_OPT(srcA_opt) | 524 S_028760_ALPHA_DST_OPT(dstA_opt) | 525 S_028760_ALPHA_COMB_FCN(si_translate_blend_opt_function(eqA)); 526 527 /* Set blend state. */ 528 blend_cntl |= S_028780_ENABLE(1); 529 blend_cntl |= S_028780_COLOR_COMB_FCN(si_translate_blend_function(eqRGB)); 530 blend_cntl |= S_028780_COLOR_SRCBLEND(si_translate_blend_factor(srcRGB)); 531 blend_cntl |= S_028780_COLOR_DESTBLEND(si_translate_blend_factor(dstRGB)); 532 533 if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) { 534 blend_cntl |= S_028780_SEPARATE_ALPHA_BLEND(1); 535 blend_cntl |= S_028780_ALPHA_COMB_FCN(si_translate_blend_function(eqA)); 536 blend_cntl |= S_028780_ALPHA_SRCBLEND(si_translate_blend_factor(srcA)); 537 blend_cntl |= S_028780_ALPHA_DESTBLEND(si_translate_blend_factor(dstA)); 538 } 539 si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); 540 541 blend->blend_enable_4bit |= 0xfu << (i * 4); 542 543 /* This is only important for formats without alpha. */ 544 if (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA || 545 dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA || 546 srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || 547 dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || 548 srcRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA || 549 dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA) 550 blend->need_src_alpha_4bit |= 0xfu << (i * 4); 551 } 552 553 if (blend->cb_target_mask) { 554 color_control |= S_028808_MODE(mode); 555 } else { 556 color_control |= S_028808_MODE(V_028808_CB_DISABLE); 557 } 558 559 if (sctx->b.family == CHIP_STONEY) { 560 /* Disable RB+ blend optimizations for dual source blending. 561 * Vulkan does this. 562 */ 563 if (blend->dual_src_blend) { 564 for (int i = 0; i < 8; i++) { 565 sx_mrt_blend_opt[i] = 566 S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_NONE) | 567 S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_NONE); 568 } 569 } 570 571 for (int i = 0; i < 8; i++) 572 si_pm4_set_reg(pm4, R_028760_SX_MRT0_BLEND_OPT + i * 4, 573 sx_mrt_blend_opt[i]); 574 575 /* RB+ doesn't work with dual source blending, logic op, and RESOLVE. */ 576 if (blend->dual_src_blend || state->logicop_enable || 577 mode == V_028808_CB_RESOLVE) 578 color_control |= S_028808_DISABLE_DUAL_QUAD(1); 579 } 580 581 si_pm4_set_reg(pm4, R_028808_CB_COLOR_CONTROL, color_control); 582 return blend; 583 } 584 585 static void *si_create_blend_state(struct pipe_context *ctx, 586 const struct pipe_blend_state *state) 587 { 588 return si_create_blend_state_mode(ctx, state, V_028808_CB_NORMAL); 589 } 590 591 static void si_bind_blend_state(struct pipe_context *ctx, void *state) 592 { 593 struct si_context *sctx = (struct si_context *)ctx; 594 si_pm4_bind_state(sctx, blend, (struct si_state_blend *)state); 595 si_mark_atom_dirty(sctx, &sctx->cb_render_state); 596 sctx->do_update_shaders = true; 597 } 598 599 static void si_delete_blend_state(struct pipe_context *ctx, void *state) 600 { 601 struct si_context *sctx = (struct si_context *)ctx; 602 si_pm4_delete_state(sctx, blend, (struct si_state_blend *)state); 603 } 604 605 static void si_set_blend_color(struct pipe_context *ctx, 606 const struct pipe_blend_color *state) 607 { 608 struct si_context *sctx = (struct si_context *)ctx; 609 610 if (memcmp(&sctx->blend_color.state, state, sizeof(*state)) == 0) 611 return; 612 613 sctx->blend_color.state = *state; 614 si_mark_atom_dirty(sctx, &sctx->blend_color.atom); 615 } 616 617 static void si_emit_blend_color(struct si_context *sctx, struct r600_atom *atom) 618 { 619 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 620 621 radeon_set_context_reg_seq(cs, R_028414_CB_BLEND_RED, 4); 622 radeon_emit_array(cs, (uint32_t*)sctx->blend_color.state.color, 4); 623 } 624 625 /* 626 * Clipping 627 */ 628 629 static void si_set_clip_state(struct pipe_context *ctx, 630 const struct pipe_clip_state *state) 631 { 632 struct si_context *sctx = (struct si_context *)ctx; 633 struct pipe_constant_buffer cb; 634 635 if (memcmp(&sctx->clip_state.state, state, sizeof(*state)) == 0) 636 return; 637 638 sctx->clip_state.state = *state; 639 si_mark_atom_dirty(sctx, &sctx->clip_state.atom); 640 641 cb.buffer = NULL; 642 cb.user_buffer = state->ucp; 643 cb.buffer_offset = 0; 644 cb.buffer_size = 4*4*8; 645 si_set_rw_buffer(sctx, SI_VS_CONST_CLIP_PLANES, &cb); 646 pipe_resource_reference(&cb.buffer, NULL); 647 } 648 649 static void si_emit_clip_state(struct si_context *sctx, struct r600_atom *atom) 650 { 651 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 652 653 radeon_set_context_reg_seq(cs, R_0285BC_PA_CL_UCP_0_X, 6*4); 654 radeon_emit_array(cs, (uint32_t*)sctx->clip_state.state.ucp, 6*4); 655 } 656 657 #define SIX_BITS 0x3F 658 659 static void si_emit_clip_regs(struct si_context *sctx, struct r600_atom *atom) 660 { 661 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 662 struct si_shader *vs = si_get_vs_state(sctx); 663 struct tgsi_shader_info *info = si_get_vs_info(sctx); 664 struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; 665 unsigned window_space = 666 info->properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION]; 667 unsigned clipdist_mask = 668 info->writes_clipvertex ? SIX_BITS : info->clipdist_writemask; 669 unsigned ucp_mask = clipdist_mask ? 0 : rs->clip_plane_enable & SIX_BITS; 670 unsigned culldist_mask = info->culldist_writemask << info->num_written_clipdistance; 671 unsigned total_mask; 672 bool misc_vec_ena; 673 674 if (vs->key.opt.hw_vs.clip_disable) { 675 assert(!info->culldist_writemask); 676 clipdist_mask = 0; 677 culldist_mask = 0; 678 } 679 total_mask = clipdist_mask | culldist_mask; 680 681 /* Clip distances on points have no effect, so need to be implemented 682 * as cull distances. This applies for the clipvertex case as well. 683 * 684 * Setting this for primitives other than points should have no adverse 685 * effects. 686 */ 687 clipdist_mask &= rs->clip_plane_enable; 688 culldist_mask |= clipdist_mask; 689 690 misc_vec_ena = info->writes_psize || info->writes_edgeflag || 691 info->writes_layer || info->writes_viewport_index; 692 693 radeon_set_context_reg(cs, R_02881C_PA_CL_VS_OUT_CNTL, 694 S_02881C_USE_VTX_POINT_SIZE(info->writes_psize) | 695 S_02881C_USE_VTX_EDGE_FLAG(info->writes_edgeflag) | 696 S_02881C_USE_VTX_RENDER_TARGET_INDX(info->writes_layer) | 697 S_02881C_USE_VTX_VIEWPORT_INDX(info->writes_viewport_index) | 698 S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0F) != 0) | 699 S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xF0) != 0) | 700 S_02881C_VS_OUT_MISC_VEC_ENA(misc_vec_ena) | 701 S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(misc_vec_ena) | 702 clipdist_mask | (culldist_mask << 8)); 703 radeon_set_context_reg(cs, R_028810_PA_CL_CLIP_CNTL, 704 rs->pa_cl_clip_cntl | 705 ucp_mask | 706 S_028810_CLIP_DISABLE(window_space)); 707 708 /* reuse needs to be set off if we write oViewport */ 709 radeon_set_context_reg(cs, R_028AB4_VGT_REUSE_OFF, 710 S_028AB4_REUSE_OFF(info->writes_viewport_index)); 711 } 712 713 /* 714 * inferred state between framebuffer and rasterizer 715 */ 716 static void si_update_poly_offset_state(struct si_context *sctx) 717 { 718 struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; 719 720 if (!rs || !rs->uses_poly_offset || !sctx->framebuffer.state.zsbuf) { 721 si_pm4_bind_state(sctx, poly_offset, NULL); 722 return; 723 } 724 725 /* Use the user format, not db_render_format, so that the polygon 726 * offset behaves as expected by applications. 727 */ 728 switch (sctx->framebuffer.state.zsbuf->texture->format) { 729 case PIPE_FORMAT_Z16_UNORM: 730 si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[0]); 731 break; 732 default: /* 24-bit */ 733 si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[1]); 734 break; 735 case PIPE_FORMAT_Z32_FLOAT: 736 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 737 si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[2]); 738 break; 739 } 740 } 741 742 /* 743 * Rasterizer 744 */ 745 746 static uint32_t si_translate_fill(uint32_t func) 747 { 748 switch(func) { 749 case PIPE_POLYGON_MODE_FILL: 750 return V_028814_X_DRAW_TRIANGLES; 751 case PIPE_POLYGON_MODE_LINE: 752 return V_028814_X_DRAW_LINES; 753 case PIPE_POLYGON_MODE_POINT: 754 return V_028814_X_DRAW_POINTS; 755 default: 756 assert(0); 757 return V_028814_X_DRAW_POINTS; 758 } 759 } 760 761 static void *si_create_rs_state(struct pipe_context *ctx, 762 const struct pipe_rasterizer_state *state) 763 { 764 struct si_state_rasterizer *rs = CALLOC_STRUCT(si_state_rasterizer); 765 struct si_pm4_state *pm4 = &rs->pm4; 766 unsigned tmp, i; 767 float psize_min, psize_max; 768 769 if (!rs) { 770 return NULL; 771 } 772 773 rs->scissor_enable = state->scissor; 774 rs->clip_halfz = state->clip_halfz; 775 rs->two_side = state->light_twoside; 776 rs->multisample_enable = state->multisample; 777 rs->force_persample_interp = state->force_persample_interp; 778 rs->clip_plane_enable = state->clip_plane_enable; 779 rs->line_stipple_enable = state->line_stipple_enable; 780 rs->poly_stipple_enable = state->poly_stipple_enable; 781 rs->line_smooth = state->line_smooth; 782 rs->poly_smooth = state->poly_smooth; 783 rs->uses_poly_offset = state->offset_point || state->offset_line || 784 state->offset_tri; 785 rs->clamp_fragment_color = state->clamp_fragment_color; 786 rs->flatshade = state->flatshade; 787 rs->sprite_coord_enable = state->sprite_coord_enable; 788 rs->rasterizer_discard = state->rasterizer_discard; 789 rs->pa_sc_line_stipple = state->line_stipple_enable ? 790 S_028A0C_LINE_PATTERN(state->line_stipple_pattern) | 791 S_028A0C_REPEAT_COUNT(state->line_stipple_factor) : 0; 792 rs->pa_cl_clip_cntl = 793 S_028810_DX_CLIP_SPACE_DEF(state->clip_halfz) | 794 S_028810_ZCLIP_NEAR_DISABLE(!state->depth_clip) | 795 S_028810_ZCLIP_FAR_DISABLE(!state->depth_clip) | 796 S_028810_DX_RASTERIZATION_KILL(state->rasterizer_discard) | 797 S_028810_DX_LINEAR_ATTR_CLIP_ENA(1); 798 799 si_pm4_set_reg(pm4, R_0286D4_SPI_INTERP_CONTROL_0, 800 S_0286D4_FLAT_SHADE_ENA(1) | 801 S_0286D4_PNT_SPRITE_ENA(1) | 802 S_0286D4_PNT_SPRITE_OVRD_X(V_0286D4_SPI_PNT_SPRITE_SEL_S) | 803 S_0286D4_PNT_SPRITE_OVRD_Y(V_0286D4_SPI_PNT_SPRITE_SEL_T) | 804 S_0286D4_PNT_SPRITE_OVRD_Z(V_0286D4_SPI_PNT_SPRITE_SEL_0) | 805 S_0286D4_PNT_SPRITE_OVRD_W(V_0286D4_SPI_PNT_SPRITE_SEL_1) | 806 S_0286D4_PNT_SPRITE_TOP_1(state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT)); 807 808 /* point size 12.4 fixed point */ 809 tmp = (unsigned)(state->point_size * 8.0); 810 si_pm4_set_reg(pm4, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp)); 811 812 if (state->point_size_per_vertex) { 813 psize_min = util_get_min_point_size(state); 814 psize_max = 8192; 815 } else { 816 /* Force the point size to be as if the vertex output was disabled. */ 817 psize_min = state->point_size; 818 psize_max = state->point_size; 819 } 820 /* Divide by two, because 0.5 = 1 pixel. */ 821 si_pm4_set_reg(pm4, R_028A04_PA_SU_POINT_MINMAX, 822 S_028A04_MIN_SIZE(si_pack_float_12p4(psize_min/2)) | 823 S_028A04_MAX_SIZE(si_pack_float_12p4(psize_max/2))); 824 825 tmp = (unsigned)state->line_width * 8; 826 si_pm4_set_reg(pm4, R_028A08_PA_SU_LINE_CNTL, S_028A08_WIDTH(tmp)); 827 si_pm4_set_reg(pm4, R_028A48_PA_SC_MODE_CNTL_0, 828 S_028A48_LINE_STIPPLE_ENABLE(state->line_stipple_enable) | 829 S_028A48_MSAA_ENABLE(state->multisample || 830 state->poly_smooth || 831 state->line_smooth) | 832 S_028A48_VPORT_SCISSOR_ENABLE(1)); 833 834 si_pm4_set_reg(pm4, R_028BE4_PA_SU_VTX_CNTL, 835 S_028BE4_PIX_CENTER(state->half_pixel_center) | 836 S_028BE4_QUANT_MODE(V_028BE4_X_16_8_FIXED_POINT_1_256TH)); 837 838 si_pm4_set_reg(pm4, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, fui(state->offset_clamp)); 839 si_pm4_set_reg(pm4, R_028814_PA_SU_SC_MODE_CNTL, 840 S_028814_PROVOKING_VTX_LAST(!state->flatshade_first) | 841 S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) | 842 S_028814_CULL_BACK((state->cull_face & PIPE_FACE_BACK) ? 1 : 0) | 843 S_028814_FACE(!state->front_ccw) | 844 S_028814_POLY_OFFSET_FRONT_ENABLE(util_get_offset(state, state->fill_front)) | 845 S_028814_POLY_OFFSET_BACK_ENABLE(util_get_offset(state, state->fill_back)) | 846 S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_point || state->offset_line) | 847 S_028814_POLY_MODE(state->fill_front != PIPE_POLYGON_MODE_FILL || 848 state->fill_back != PIPE_POLYGON_MODE_FILL) | 849 S_028814_POLYMODE_FRONT_PTYPE(si_translate_fill(state->fill_front)) | 850 S_028814_POLYMODE_BACK_PTYPE(si_translate_fill(state->fill_back))); 851 si_pm4_set_reg(pm4, R_00B130_SPI_SHADER_USER_DATA_VS_0 + 852 SI_SGPR_VS_STATE_BITS * 4, state->clamp_vertex_color); 853 854 /* Precalculate polygon offset states for 16-bit, 24-bit, and 32-bit zbuffers. */ 855 for (i = 0; i < 3; i++) { 856 struct si_pm4_state *pm4 = &rs->pm4_poly_offset[i]; 857 float offset_units = state->offset_units; 858 float offset_scale = state->offset_scale * 16.0f; 859 uint32_t pa_su_poly_offset_db_fmt_cntl = 0; 860 861 if (!state->offset_units_unscaled) { 862 switch (i) { 863 case 0: /* 16-bit zbuffer */ 864 offset_units *= 4.0f; 865 pa_su_poly_offset_db_fmt_cntl = 866 S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16); 867 break; 868 case 1: /* 24-bit zbuffer */ 869 offset_units *= 2.0f; 870 pa_su_poly_offset_db_fmt_cntl = 871 S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24); 872 break; 873 case 2: /* 32-bit zbuffer */ 874 offset_units *= 1.0f; 875 pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) | 876 S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1); 877 break; 878 } 879 } 880 881 si_pm4_set_reg(pm4, R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE, 882 fui(offset_scale)); 883 si_pm4_set_reg(pm4, R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET, 884 fui(offset_units)); 885 si_pm4_set_reg(pm4, R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE, 886 fui(offset_scale)); 887 si_pm4_set_reg(pm4, R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET, 888 fui(offset_units)); 889 si_pm4_set_reg(pm4, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL, 890 pa_su_poly_offset_db_fmt_cntl); 891 } 892 893 return rs; 894 } 895 896 static void si_bind_rs_state(struct pipe_context *ctx, void *state) 897 { 898 struct si_context *sctx = (struct si_context *)ctx; 899 struct si_state_rasterizer *old_rs = 900 (struct si_state_rasterizer*)sctx->queued.named.rasterizer; 901 struct si_state_rasterizer *rs = (struct si_state_rasterizer *)state; 902 903 if (!state) 904 return; 905 906 if (!old_rs || old_rs->multisample_enable != rs->multisample_enable) { 907 si_mark_atom_dirty(sctx, &sctx->db_render_state); 908 909 /* Update the small primitive filter workaround if necessary. */ 910 if (sctx->b.family >= CHIP_POLARIS10 && 911 sctx->framebuffer.nr_samples > 1) 912 si_mark_atom_dirty(sctx, &sctx->msaa_sample_locs.atom); 913 } 914 915 r600_viewport_set_rast_deps(&sctx->b, rs->scissor_enable, rs->clip_halfz); 916 917 si_pm4_bind_state(sctx, rasterizer, rs); 918 si_update_poly_offset_state(sctx); 919 920 si_mark_atom_dirty(sctx, &sctx->clip_regs); 921 sctx->do_update_shaders = true; 922 } 923 924 static void si_delete_rs_state(struct pipe_context *ctx, void *state) 925 { 926 struct si_context *sctx = (struct si_context *)ctx; 927 928 if (sctx->queued.named.rasterizer == state) 929 si_pm4_bind_state(sctx, poly_offset, NULL); 930 si_pm4_delete_state(sctx, rasterizer, (struct si_state_rasterizer *)state); 931 } 932 933 /* 934 * infeered state between dsa and stencil ref 935 */ 936 static void si_emit_stencil_ref(struct si_context *sctx, struct r600_atom *atom) 937 { 938 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 939 struct pipe_stencil_ref *ref = &sctx->stencil_ref.state; 940 struct si_dsa_stencil_ref_part *dsa = &sctx->stencil_ref.dsa_part; 941 942 radeon_set_context_reg_seq(cs, R_028430_DB_STENCILREFMASK, 2); 943 radeon_emit(cs, S_028430_STENCILTESTVAL(ref->ref_value[0]) | 944 S_028430_STENCILMASK(dsa->valuemask[0]) | 945 S_028430_STENCILWRITEMASK(dsa->writemask[0]) | 946 S_028430_STENCILOPVAL(1)); 947 radeon_emit(cs, S_028434_STENCILTESTVAL_BF(ref->ref_value[1]) | 948 S_028434_STENCILMASK_BF(dsa->valuemask[1]) | 949 S_028434_STENCILWRITEMASK_BF(dsa->writemask[1]) | 950 S_028434_STENCILOPVAL_BF(1)); 951 } 952 953 static void si_set_stencil_ref(struct pipe_context *ctx, 954 const struct pipe_stencil_ref *state) 955 { 956 struct si_context *sctx = (struct si_context *)ctx; 957 958 if (memcmp(&sctx->stencil_ref.state, state, sizeof(*state)) == 0) 959 return; 960 961 sctx->stencil_ref.state = *state; 962 si_mark_atom_dirty(sctx, &sctx->stencil_ref.atom); 963 } 964 965 966 /* 967 * DSA 968 */ 969 970 static uint32_t si_translate_stencil_op(int s_op) 971 { 972 switch (s_op) { 973 case PIPE_STENCIL_OP_KEEP: 974 return V_02842C_STENCIL_KEEP; 975 case PIPE_STENCIL_OP_ZERO: 976 return V_02842C_STENCIL_ZERO; 977 case PIPE_STENCIL_OP_REPLACE: 978 return V_02842C_STENCIL_REPLACE_TEST; 979 case PIPE_STENCIL_OP_INCR: 980 return V_02842C_STENCIL_ADD_CLAMP; 981 case PIPE_STENCIL_OP_DECR: 982 return V_02842C_STENCIL_SUB_CLAMP; 983 case PIPE_STENCIL_OP_INCR_WRAP: 984 return V_02842C_STENCIL_ADD_WRAP; 985 case PIPE_STENCIL_OP_DECR_WRAP: 986 return V_02842C_STENCIL_SUB_WRAP; 987 case PIPE_STENCIL_OP_INVERT: 988 return V_02842C_STENCIL_INVERT; 989 default: 990 R600_ERR("Unknown stencil op %d", s_op); 991 assert(0); 992 break; 993 } 994 return 0; 995 } 996 997 static void *si_create_dsa_state(struct pipe_context *ctx, 998 const struct pipe_depth_stencil_alpha_state *state) 999 { 1000 struct si_state_dsa *dsa = CALLOC_STRUCT(si_state_dsa); 1001 struct si_pm4_state *pm4 = &dsa->pm4; 1002 unsigned db_depth_control; 1003 uint32_t db_stencil_control = 0; 1004 1005 if (!dsa) { 1006 return NULL; 1007 } 1008 1009 dsa->stencil_ref.valuemask[0] = state->stencil[0].valuemask; 1010 dsa->stencil_ref.valuemask[1] = state->stencil[1].valuemask; 1011 dsa->stencil_ref.writemask[0] = state->stencil[0].writemask; 1012 dsa->stencil_ref.writemask[1] = state->stencil[1].writemask; 1013 1014 db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) | 1015 S_028800_Z_WRITE_ENABLE(state->depth.writemask) | 1016 S_028800_ZFUNC(state->depth.func) | 1017 S_028800_DEPTH_BOUNDS_ENABLE(state->depth.bounds_test); 1018 1019 /* stencil */ 1020 if (state->stencil[0].enabled) { 1021 db_depth_control |= S_028800_STENCIL_ENABLE(1); 1022 db_depth_control |= S_028800_STENCILFUNC(state->stencil[0].func); 1023 db_stencil_control |= S_02842C_STENCILFAIL(si_translate_stencil_op(state->stencil[0].fail_op)); 1024 db_stencil_control |= S_02842C_STENCILZPASS(si_translate_stencil_op(state->stencil[0].zpass_op)); 1025 db_stencil_control |= S_02842C_STENCILZFAIL(si_translate_stencil_op(state->stencil[0].zfail_op)); 1026 1027 if (state->stencil[1].enabled) { 1028 db_depth_control |= S_028800_BACKFACE_ENABLE(1); 1029 db_depth_control |= S_028800_STENCILFUNC_BF(state->stencil[1].func); 1030 db_stencil_control |= S_02842C_STENCILFAIL_BF(si_translate_stencil_op(state->stencil[1].fail_op)); 1031 db_stencil_control |= S_02842C_STENCILZPASS_BF(si_translate_stencil_op(state->stencil[1].zpass_op)); 1032 db_stencil_control |= S_02842C_STENCILZFAIL_BF(si_translate_stencil_op(state->stencil[1].zfail_op)); 1033 } 1034 } 1035 1036 /* alpha */ 1037 if (state->alpha.enabled) { 1038 dsa->alpha_func = state->alpha.func; 1039 1040 si_pm4_set_reg(pm4, R_00B030_SPI_SHADER_USER_DATA_PS_0 + 1041 SI_SGPR_ALPHA_REF * 4, fui(state->alpha.ref_value)); 1042 } else { 1043 dsa->alpha_func = PIPE_FUNC_ALWAYS; 1044 } 1045 1046 si_pm4_set_reg(pm4, R_028800_DB_DEPTH_CONTROL, db_depth_control); 1047 si_pm4_set_reg(pm4, R_02842C_DB_STENCIL_CONTROL, db_stencil_control); 1048 if (state->depth.bounds_test) { 1049 si_pm4_set_reg(pm4, R_028020_DB_DEPTH_BOUNDS_MIN, fui(state->depth.bounds_min)); 1050 si_pm4_set_reg(pm4, R_028024_DB_DEPTH_BOUNDS_MAX, fui(state->depth.bounds_max)); 1051 } 1052 1053 return dsa; 1054 } 1055 1056 static void si_bind_dsa_state(struct pipe_context *ctx, void *state) 1057 { 1058 struct si_context *sctx = (struct si_context *)ctx; 1059 struct si_state_dsa *dsa = state; 1060 1061 if (!state) 1062 return; 1063 1064 si_pm4_bind_state(sctx, dsa, dsa); 1065 1066 if (memcmp(&dsa->stencil_ref, &sctx->stencil_ref.dsa_part, 1067 sizeof(struct si_dsa_stencil_ref_part)) != 0) { 1068 sctx->stencil_ref.dsa_part = dsa->stencil_ref; 1069 si_mark_atom_dirty(sctx, &sctx->stencil_ref.atom); 1070 } 1071 sctx->do_update_shaders = true; 1072 } 1073 1074 static void si_delete_dsa_state(struct pipe_context *ctx, void *state) 1075 { 1076 struct si_context *sctx = (struct si_context *)ctx; 1077 si_pm4_delete_state(sctx, dsa, (struct si_state_dsa *)state); 1078 } 1079 1080 static void *si_create_db_flush_dsa(struct si_context *sctx) 1081 { 1082 struct pipe_depth_stencil_alpha_state dsa = {}; 1083 1084 return sctx->b.b.create_depth_stencil_alpha_state(&sctx->b.b, &dsa); 1085 } 1086 1087 /* DB RENDER STATE */ 1088 1089 static void si_set_active_query_state(struct pipe_context *ctx, boolean enable) 1090 { 1091 struct si_context *sctx = (struct si_context*)ctx; 1092 1093 /* Pipeline stat & streamout queries. */ 1094 if (enable) { 1095 sctx->b.flags &= ~R600_CONTEXT_STOP_PIPELINE_STATS; 1096 sctx->b.flags |= R600_CONTEXT_START_PIPELINE_STATS; 1097 } else { 1098 sctx->b.flags &= ~R600_CONTEXT_START_PIPELINE_STATS; 1099 sctx->b.flags |= R600_CONTEXT_STOP_PIPELINE_STATS; 1100 } 1101 1102 /* Occlusion queries. */ 1103 if (sctx->occlusion_queries_disabled != !enable) { 1104 sctx->occlusion_queries_disabled = !enable; 1105 si_mark_atom_dirty(sctx, &sctx->db_render_state); 1106 } 1107 } 1108 1109 static void si_set_occlusion_query_state(struct pipe_context *ctx, bool enable) 1110 { 1111 struct si_context *sctx = (struct si_context*)ctx; 1112 1113 si_mark_atom_dirty(sctx, &sctx->db_render_state); 1114 } 1115 1116 static void si_save_qbo_state(struct pipe_context *ctx, struct r600_qbo_state *st) 1117 { 1118 struct si_context *sctx = (struct si_context*)ctx; 1119 1120 st->saved_compute = sctx->cs_shader_state.program; 1121 1122 si_get_pipe_constant_buffer(sctx, PIPE_SHADER_COMPUTE, 0, &st->saved_const0); 1123 si_get_shader_buffers(sctx, PIPE_SHADER_COMPUTE, 0, 3, st->saved_ssbo); 1124 } 1125 1126 static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *state) 1127 { 1128 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 1129 struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; 1130 unsigned db_shader_control; 1131 1132 radeon_set_context_reg_seq(cs, R_028000_DB_RENDER_CONTROL, 2); 1133 1134 /* DB_RENDER_CONTROL */ 1135 if (sctx->dbcb_depth_copy_enabled || 1136 sctx->dbcb_stencil_copy_enabled) { 1137 radeon_emit(cs, 1138 S_028000_DEPTH_COPY(sctx->dbcb_depth_copy_enabled) | 1139 S_028000_STENCIL_COPY(sctx->dbcb_stencil_copy_enabled) | 1140 S_028000_COPY_CENTROID(1) | 1141 S_028000_COPY_SAMPLE(sctx->dbcb_copy_sample)); 1142 } else if (sctx->db_flush_depth_inplace || sctx->db_flush_stencil_inplace) { 1143 radeon_emit(cs, 1144 S_028000_DEPTH_COMPRESS_DISABLE(sctx->db_flush_depth_inplace) | 1145 S_028000_STENCIL_COMPRESS_DISABLE(sctx->db_flush_stencil_inplace)); 1146 } else { 1147 radeon_emit(cs, 1148 S_028000_DEPTH_CLEAR_ENABLE(sctx->db_depth_clear) | 1149 S_028000_STENCIL_CLEAR_ENABLE(sctx->db_stencil_clear)); 1150 } 1151 1152 /* DB_COUNT_CONTROL (occlusion queries) */ 1153 if (sctx->b.num_occlusion_queries > 0 && 1154 !sctx->occlusion_queries_disabled) { 1155 bool perfect = sctx->b.num_perfect_occlusion_queries > 0; 1156 1157 if (sctx->b.chip_class >= CIK) { 1158 radeon_emit(cs, 1159 S_028004_PERFECT_ZPASS_COUNTS(perfect) | 1160 S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples) | 1161 S_028004_ZPASS_ENABLE(1) | 1162 S_028004_SLICE_EVEN_ENABLE(1) | 1163 S_028004_SLICE_ODD_ENABLE(1)); 1164 } else { 1165 radeon_emit(cs, 1166 S_028004_PERFECT_ZPASS_COUNTS(perfect) | 1167 S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples)); 1168 } 1169 } else { 1170 /* Disable occlusion queries. */ 1171 if (sctx->b.chip_class >= CIK) { 1172 radeon_emit(cs, 0); 1173 } else { 1174 radeon_emit(cs, S_028004_ZPASS_INCREMENT_DISABLE(1)); 1175 } 1176 } 1177 1178 /* DB_RENDER_OVERRIDE2 */ 1179 radeon_set_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2, 1180 S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(sctx->db_depth_disable_expclear) | 1181 S_028010_DISABLE_SMEM_EXPCLEAR_OPTIMIZATION(sctx->db_stencil_disable_expclear) | 1182 S_028010_DECOMPRESS_Z_ON_FLUSH(sctx->framebuffer.nr_samples >= 4)); 1183 1184 db_shader_control = sctx->ps_db_shader_control; 1185 1186 /* Bug workaround for smoothing (overrasterization) on SI. */ 1187 if (sctx->b.chip_class == SI && sctx->smoothing_enabled) { 1188 db_shader_control &= C_02880C_Z_ORDER; 1189 db_shader_control |= S_02880C_Z_ORDER(V_02880C_LATE_Z); 1190 } 1191 1192 /* Disable the gl_SampleMask fragment shader output if MSAA is disabled. */ 1193 if (!rs || !rs->multisample_enable) 1194 db_shader_control &= C_02880C_MASK_EXPORT_ENABLE; 1195 1196 if (sctx->b.family == CHIP_STONEY && 1197 sctx->screen->b.debug_flags & DBG_NO_RB_PLUS) 1198 db_shader_control |= S_02880C_DUAL_QUAD_DISABLE(1); 1199 1200 radeon_set_context_reg(cs, R_02880C_DB_SHADER_CONTROL, 1201 db_shader_control); 1202 } 1203 1204 /* 1205 * format translation 1206 */ 1207 static uint32_t si_translate_colorformat(enum pipe_format format) 1208 { 1209 const struct util_format_description *desc = util_format_description(format); 1210 1211 #define HAS_SIZE(x,y,z,w) \ 1212 (desc->channel[0].size == (x) && desc->channel[1].size == (y) && \ 1213 desc->channel[2].size == (z) && desc->channel[3].size == (w)) 1214 1215 if (format == PIPE_FORMAT_R11G11B10_FLOAT) /* isn't plain */ 1216 return V_028C70_COLOR_10_11_11; 1217 1218 if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) 1219 return V_028C70_COLOR_INVALID; 1220 1221 /* hw cannot support mixed formats (except depth/stencil, since 1222 * stencil is not written to). */ 1223 if (desc->is_mixed && desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) 1224 return V_028C70_COLOR_INVALID; 1225 1226 switch (desc->nr_channels) { 1227 case 1: 1228 switch (desc->channel[0].size) { 1229 case 8: 1230 return V_028C70_COLOR_8; 1231 case 16: 1232 return V_028C70_COLOR_16; 1233 case 32: 1234 return V_028C70_COLOR_32; 1235 } 1236 break; 1237 case 2: 1238 if (desc->channel[0].size == desc->channel[1].size) { 1239 switch (desc->channel[0].size) { 1240 case 8: 1241 return V_028C70_COLOR_8_8; 1242 case 16: 1243 return V_028C70_COLOR_16_16; 1244 case 32: 1245 return V_028C70_COLOR_32_32; 1246 } 1247 } else if (HAS_SIZE(8,24,0,0)) { 1248 return V_028C70_COLOR_24_8; 1249 } else if (HAS_SIZE(24,8,0,0)) { 1250 return V_028C70_COLOR_8_24; 1251 } 1252 break; 1253 case 3: 1254 if (HAS_SIZE(5,6,5,0)) { 1255 return V_028C70_COLOR_5_6_5; 1256 } else if (HAS_SIZE(32,8,24,0)) { 1257 return V_028C70_COLOR_X24_8_32_FLOAT; 1258 } 1259 break; 1260 case 4: 1261 if (desc->channel[0].size == desc->channel[1].size && 1262 desc->channel[0].size == desc->channel[2].size && 1263 desc->channel[0].size == desc->channel[3].size) { 1264 switch (desc->channel[0].size) { 1265 case 4: 1266 return V_028C70_COLOR_4_4_4_4; 1267 case 8: 1268 return V_028C70_COLOR_8_8_8_8; 1269 case 16: 1270 return V_028C70_COLOR_16_16_16_16; 1271 case 32: 1272 return V_028C70_COLOR_32_32_32_32; 1273 } 1274 } else if (HAS_SIZE(5,5,5,1)) { 1275 return V_028C70_COLOR_1_5_5_5; 1276 } else if (HAS_SIZE(10,10,10,2)) { 1277 return V_028C70_COLOR_2_10_10_10; 1278 } 1279 break; 1280 } 1281 return V_028C70_COLOR_INVALID; 1282 } 1283 1284 static uint32_t si_colorformat_endian_swap(uint32_t colorformat) 1285 { 1286 if (SI_BIG_ENDIAN) { 1287 switch(colorformat) { 1288 /* 8-bit buffers. */ 1289 case V_028C70_COLOR_8: 1290 return V_028C70_ENDIAN_NONE; 1291 1292 /* 16-bit buffers. */ 1293 case V_028C70_COLOR_5_6_5: 1294 case V_028C70_COLOR_1_5_5_5: 1295 case V_028C70_COLOR_4_4_4_4: 1296 case V_028C70_COLOR_16: 1297 case V_028C70_COLOR_8_8: 1298 return V_028C70_ENDIAN_8IN16; 1299 1300 /* 32-bit buffers. */ 1301 case V_028C70_COLOR_8_8_8_8: 1302 case V_028C70_COLOR_2_10_10_10: 1303 case V_028C70_COLOR_8_24: 1304 case V_028C70_COLOR_24_8: 1305 case V_028C70_COLOR_16_16: 1306 return V_028C70_ENDIAN_8IN32; 1307 1308 /* 64-bit buffers. */ 1309 case V_028C70_COLOR_16_16_16_16: 1310 return V_028C70_ENDIAN_8IN16; 1311 1312 case V_028C70_COLOR_32_32: 1313 return V_028C70_ENDIAN_8IN32; 1314 1315 /* 128-bit buffers. */ 1316 case V_028C70_COLOR_32_32_32_32: 1317 return V_028C70_ENDIAN_8IN32; 1318 default: 1319 return V_028C70_ENDIAN_NONE; /* Unsupported. */ 1320 } 1321 } else { 1322 return V_028C70_ENDIAN_NONE; 1323 } 1324 } 1325 1326 static uint32_t si_translate_dbformat(enum pipe_format format) 1327 { 1328 switch (format) { 1329 case PIPE_FORMAT_Z16_UNORM: 1330 return V_028040_Z_16; 1331 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 1332 case PIPE_FORMAT_X8Z24_UNORM: 1333 case PIPE_FORMAT_Z24X8_UNORM: 1334 case PIPE_FORMAT_Z24_UNORM_S8_UINT: 1335 return V_028040_Z_24; /* deprecated on SI */ 1336 case PIPE_FORMAT_Z32_FLOAT: 1337 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 1338 return V_028040_Z_32_FLOAT; 1339 default: 1340 return V_028040_Z_INVALID; 1341 } 1342 } 1343 1344 /* 1345 * Texture translation 1346 */ 1347 1348 static uint32_t si_translate_texformat(struct pipe_screen *screen, 1349 enum pipe_format format, 1350 const struct util_format_description *desc, 1351 int first_non_void) 1352 { 1353 struct si_screen *sscreen = (struct si_screen*)screen; 1354 bool enable_compressed_formats = (sscreen->b.info.drm_major == 2 && 1355 sscreen->b.info.drm_minor >= 31) || 1356 sscreen->b.info.drm_major == 3; 1357 bool uniform = true; 1358 int i; 1359 1360 /* Colorspace (return non-RGB formats directly). */ 1361 switch (desc->colorspace) { 1362 /* Depth stencil formats */ 1363 case UTIL_FORMAT_COLORSPACE_ZS: 1364 switch (format) { 1365 case PIPE_FORMAT_Z16_UNORM: 1366 return V_008F14_IMG_DATA_FORMAT_16; 1367 case PIPE_FORMAT_X24S8_UINT: 1368 case PIPE_FORMAT_S8X24_UINT: 1369 /* 1370 * Implemented as an 8_8_8_8 data format to fix texture 1371 * gathers in stencil sampling. This affects at least 1372 * GL45-CTS.texture_cube_map_array.sampling on VI. 1373 */ 1374 return V_008F14_IMG_DATA_FORMAT_8_8_8_8; 1375 case PIPE_FORMAT_Z24X8_UNORM: 1376 case PIPE_FORMAT_Z24_UNORM_S8_UINT: 1377 return V_008F14_IMG_DATA_FORMAT_8_24; 1378 case PIPE_FORMAT_X8Z24_UNORM: 1379 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 1380 return V_008F14_IMG_DATA_FORMAT_24_8; 1381 case PIPE_FORMAT_S8_UINT: 1382 return V_008F14_IMG_DATA_FORMAT_8; 1383 case PIPE_FORMAT_Z32_FLOAT: 1384 return V_008F14_IMG_DATA_FORMAT_32; 1385 case PIPE_FORMAT_X32_S8X24_UINT: 1386 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 1387 return V_008F14_IMG_DATA_FORMAT_X24_8_32; 1388 default: 1389 goto out_unknown; 1390 } 1391 1392 case UTIL_FORMAT_COLORSPACE_YUV: 1393 goto out_unknown; /* TODO */ 1394 1395 case UTIL_FORMAT_COLORSPACE_SRGB: 1396 if (desc->nr_channels != 4 && desc->nr_channels != 1) 1397 goto out_unknown; 1398 break; 1399 1400 default: 1401 break; 1402 } 1403 1404 if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) { 1405 if (!enable_compressed_formats) 1406 goto out_unknown; 1407 1408 switch (format) { 1409 case PIPE_FORMAT_RGTC1_SNORM: 1410 case PIPE_FORMAT_LATC1_SNORM: 1411 case PIPE_FORMAT_RGTC1_UNORM: 1412 case PIPE_FORMAT_LATC1_UNORM: 1413 return V_008F14_IMG_DATA_FORMAT_BC4; 1414 case PIPE_FORMAT_RGTC2_SNORM: 1415 case PIPE_FORMAT_LATC2_SNORM: 1416 case PIPE_FORMAT_RGTC2_UNORM: 1417 case PIPE_FORMAT_LATC2_UNORM: 1418 return V_008F14_IMG_DATA_FORMAT_BC5; 1419 default: 1420 goto out_unknown; 1421 } 1422 } 1423 1424 if (desc->layout == UTIL_FORMAT_LAYOUT_ETC && 1425 sscreen->b.family == CHIP_STONEY) { 1426 switch (format) { 1427 case PIPE_FORMAT_ETC1_RGB8: 1428 case PIPE_FORMAT_ETC2_RGB8: 1429 case PIPE_FORMAT_ETC2_SRGB8: 1430 return V_008F14_IMG_DATA_FORMAT_ETC2_RGB; 1431 case PIPE_FORMAT_ETC2_RGB8A1: 1432 case PIPE_FORMAT_ETC2_SRGB8A1: 1433 return V_008F14_IMG_DATA_FORMAT_ETC2_RGBA1; 1434 case PIPE_FORMAT_ETC2_RGBA8: 1435 case PIPE_FORMAT_ETC2_SRGBA8: 1436 return V_008F14_IMG_DATA_FORMAT_ETC2_RGBA; 1437 case PIPE_FORMAT_ETC2_R11_UNORM: 1438 case PIPE_FORMAT_ETC2_R11_SNORM: 1439 return V_008F14_IMG_DATA_FORMAT_ETC2_R; 1440 case PIPE_FORMAT_ETC2_RG11_UNORM: 1441 case PIPE_FORMAT_ETC2_RG11_SNORM: 1442 return V_008F14_IMG_DATA_FORMAT_ETC2_RG; 1443 default: 1444 goto out_unknown; 1445 } 1446 } 1447 1448 if (desc->layout == UTIL_FORMAT_LAYOUT_BPTC) { 1449 if (!enable_compressed_formats) 1450 goto out_unknown; 1451 1452 switch (format) { 1453 case PIPE_FORMAT_BPTC_RGBA_UNORM: 1454 case PIPE_FORMAT_BPTC_SRGBA: 1455 return V_008F14_IMG_DATA_FORMAT_BC7; 1456 case PIPE_FORMAT_BPTC_RGB_FLOAT: 1457 case PIPE_FORMAT_BPTC_RGB_UFLOAT: 1458 return V_008F14_IMG_DATA_FORMAT_BC6; 1459 default: 1460 goto out_unknown; 1461 } 1462 } 1463 1464 if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) { 1465 switch (format) { 1466 case PIPE_FORMAT_R8G8_B8G8_UNORM: 1467 case PIPE_FORMAT_G8R8_B8R8_UNORM: 1468 return V_008F14_IMG_DATA_FORMAT_GB_GR; 1469 case PIPE_FORMAT_G8R8_G8B8_UNORM: 1470 case PIPE_FORMAT_R8G8_R8B8_UNORM: 1471 return V_008F14_IMG_DATA_FORMAT_BG_RG; 1472 default: 1473 goto out_unknown; 1474 } 1475 } 1476 1477 if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { 1478 if (!enable_compressed_formats) 1479 goto out_unknown; 1480 1481 if (!util_format_s3tc_enabled) { 1482 goto out_unknown; 1483 } 1484 1485 switch (format) { 1486 case PIPE_FORMAT_DXT1_RGB: 1487 case PIPE_FORMAT_DXT1_RGBA: 1488 case PIPE_FORMAT_DXT1_SRGB: 1489 case PIPE_FORMAT_DXT1_SRGBA: 1490 return V_008F14_IMG_DATA_FORMAT_BC1; 1491 case PIPE_FORMAT_DXT3_RGBA: 1492 case PIPE_FORMAT_DXT3_SRGBA: 1493 return V_008F14_IMG_DATA_FORMAT_BC2; 1494 case PIPE_FORMAT_DXT5_RGBA: 1495 case PIPE_FORMAT_DXT5_SRGBA: 1496 return V_008F14_IMG_DATA_FORMAT_BC3; 1497 default: 1498 goto out_unknown; 1499 } 1500 } 1501 1502 if (format == PIPE_FORMAT_R9G9B9E5_FLOAT) { 1503 return V_008F14_IMG_DATA_FORMAT_5_9_9_9; 1504 } else if (format == PIPE_FORMAT_R11G11B10_FLOAT) { 1505 return V_008F14_IMG_DATA_FORMAT_10_11_11; 1506 } 1507 1508 /* R8G8Bx_SNORM - TODO CxV8U8 */ 1509 1510 /* hw cannot support mixed formats (except depth/stencil, since only 1511 * depth is read).*/ 1512 if (desc->is_mixed && desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) 1513 goto out_unknown; 1514 1515 /* See whether the components are of the same size. */ 1516 for (i = 1; i < desc->nr_channels; i++) { 1517 uniform = uniform && desc->channel[0].size == desc->channel[i].size; 1518 } 1519 1520 /* Non-uniform formats. */ 1521 if (!uniform) { 1522 switch(desc->nr_channels) { 1523 case 3: 1524 if (desc->channel[0].size == 5 && 1525 desc->channel[1].size == 6 && 1526 desc->channel[2].size == 5) { 1527 return V_008F14_IMG_DATA_FORMAT_5_6_5; 1528 } 1529 goto out_unknown; 1530 case 4: 1531 if (desc->channel[0].size == 5 && 1532 desc->channel[1].size == 5 && 1533 desc->channel[2].size == 5 && 1534 desc->channel[3].size == 1) { 1535 return V_008F14_IMG_DATA_FORMAT_1_5_5_5; 1536 } 1537 if (desc->channel[0].size == 10 && 1538 desc->channel[1].size == 10 && 1539 desc->channel[2].size == 10 && 1540 desc->channel[3].size == 2) { 1541 return V_008F14_IMG_DATA_FORMAT_2_10_10_10; 1542 } 1543 goto out_unknown; 1544 } 1545 goto out_unknown; 1546 } 1547 1548 if (first_non_void < 0 || first_non_void > 3) 1549 goto out_unknown; 1550 1551 /* uniform formats */ 1552 switch (desc->channel[first_non_void].size) { 1553 case 4: 1554 switch (desc->nr_channels) { 1555 #if 0 /* Not supported for render targets */ 1556 case 2: 1557 return V_008F14_IMG_DATA_FORMAT_4_4; 1558 #endif 1559 case 4: 1560 return V_008F14_IMG_DATA_FORMAT_4_4_4_4; 1561 } 1562 break; 1563 case 8: 1564 switch (desc->nr_channels) { 1565 case 1: 1566 return V_008F14_IMG_DATA_FORMAT_8; 1567 case 2: 1568 return V_008F14_IMG_DATA_FORMAT_8_8; 1569 case 4: 1570 return V_008F14_IMG_DATA_FORMAT_8_8_8_8; 1571 } 1572 break; 1573 case 16: 1574 switch (desc->nr_channels) { 1575 case 1: 1576 return V_008F14_IMG_DATA_FORMAT_16; 1577 case 2: 1578 return V_008F14_IMG_DATA_FORMAT_16_16; 1579 case 4: 1580 return V_008F14_IMG_DATA_FORMAT_16_16_16_16; 1581 } 1582 break; 1583 case 32: 1584 switch (desc->nr_channels) { 1585 case 1: 1586 return V_008F14_IMG_DATA_FORMAT_32; 1587 case 2: 1588 return V_008F14_IMG_DATA_FORMAT_32_32; 1589 #if 0 /* Not supported for render targets */ 1590 case 3: 1591 return V_008F14_IMG_DATA_FORMAT_32_32_32; 1592 #endif 1593 case 4: 1594 return V_008F14_IMG_DATA_FORMAT_32_32_32_32; 1595 } 1596 } 1597 1598 out_unknown: 1599 /* R600_ERR("Unable to handle texformat %d %s\n", format, util_format_name(format)); */ 1600 return ~0; 1601 } 1602 1603 static unsigned si_tex_wrap(unsigned wrap) 1604 { 1605 switch (wrap) { 1606 default: 1607 case PIPE_TEX_WRAP_REPEAT: 1608 return V_008F30_SQ_TEX_WRAP; 1609 case PIPE_TEX_WRAP_CLAMP: 1610 return V_008F30_SQ_TEX_CLAMP_HALF_BORDER; 1611 case PIPE_TEX_WRAP_CLAMP_TO_EDGE: 1612 return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL; 1613 case PIPE_TEX_WRAP_CLAMP_TO_BORDER: 1614 return V_008F30_SQ_TEX_CLAMP_BORDER; 1615 case PIPE_TEX_WRAP_MIRROR_REPEAT: 1616 return V_008F30_SQ_TEX_MIRROR; 1617 case PIPE_TEX_WRAP_MIRROR_CLAMP: 1618 return V_008F30_SQ_TEX_MIRROR_ONCE_HALF_BORDER; 1619 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: 1620 return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL; 1621 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: 1622 return V_008F30_SQ_TEX_MIRROR_ONCE_BORDER; 1623 } 1624 } 1625 1626 static unsigned si_tex_mipfilter(unsigned filter) 1627 { 1628 switch (filter) { 1629 case PIPE_TEX_MIPFILTER_NEAREST: 1630 return V_008F38_SQ_TEX_Z_FILTER_POINT; 1631 case PIPE_TEX_MIPFILTER_LINEAR: 1632 return V_008F38_SQ_TEX_Z_FILTER_LINEAR; 1633 default: 1634 case PIPE_TEX_MIPFILTER_NONE: 1635 return V_008F38_SQ_TEX_Z_FILTER_NONE; 1636 } 1637 } 1638 1639 static unsigned si_tex_compare(unsigned compare) 1640 { 1641 switch (compare) { 1642 default: 1643 case PIPE_FUNC_NEVER: 1644 return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER; 1645 case PIPE_FUNC_LESS: 1646 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS; 1647 case PIPE_FUNC_EQUAL: 1648 return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL; 1649 case PIPE_FUNC_LEQUAL: 1650 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL; 1651 case PIPE_FUNC_GREATER: 1652 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER; 1653 case PIPE_FUNC_NOTEQUAL: 1654 return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL; 1655 case PIPE_FUNC_GEQUAL: 1656 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL; 1657 case PIPE_FUNC_ALWAYS: 1658 return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS; 1659 } 1660 } 1661 1662 static unsigned si_tex_dim(unsigned res_target, unsigned view_target, 1663 unsigned nr_samples) 1664 { 1665 if (view_target == PIPE_TEXTURE_CUBE || 1666 view_target == PIPE_TEXTURE_CUBE_ARRAY) 1667 res_target = view_target; 1668 /* If interpreting cubemaps as something else, set 2D_ARRAY. */ 1669 else if (res_target == PIPE_TEXTURE_CUBE || 1670 res_target == PIPE_TEXTURE_CUBE_ARRAY) 1671 res_target = PIPE_TEXTURE_2D_ARRAY; 1672 1673 switch (res_target) { 1674 default: 1675 case PIPE_TEXTURE_1D: 1676 return V_008F1C_SQ_RSRC_IMG_1D; 1677 case PIPE_TEXTURE_1D_ARRAY: 1678 return V_008F1C_SQ_RSRC_IMG_1D_ARRAY; 1679 case PIPE_TEXTURE_2D: 1680 case PIPE_TEXTURE_RECT: 1681 return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA : 1682 V_008F1C_SQ_RSRC_IMG_2D; 1683 case PIPE_TEXTURE_2D_ARRAY: 1684 return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY : 1685 V_008F1C_SQ_RSRC_IMG_2D_ARRAY; 1686 case PIPE_TEXTURE_3D: 1687 return V_008F1C_SQ_RSRC_IMG_3D; 1688 case PIPE_TEXTURE_CUBE: 1689 case PIPE_TEXTURE_CUBE_ARRAY: 1690 return V_008F1C_SQ_RSRC_IMG_CUBE; 1691 } 1692 } 1693 1694 /* 1695 * Format support testing 1696 */ 1697 1698 static bool si_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format) 1699 { 1700 return si_translate_texformat(screen, format, util_format_description(format), 1701 util_format_get_first_non_void_channel(format)) != ~0U; 1702 } 1703 1704 static uint32_t si_translate_buffer_dataformat(struct pipe_screen *screen, 1705 const struct util_format_description *desc, 1706 int first_non_void) 1707 { 1708 int i; 1709 1710 if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT) 1711 return V_008F0C_BUF_DATA_FORMAT_10_11_11; 1712 1713 assert(first_non_void >= 0); 1714 1715 if (desc->nr_channels == 4 && 1716 desc->channel[0].size == 10 && 1717 desc->channel[1].size == 10 && 1718 desc->channel[2].size == 10 && 1719 desc->channel[3].size == 2) 1720 return V_008F0C_BUF_DATA_FORMAT_2_10_10_10; 1721 1722 /* See whether the components are of the same size. */ 1723 for (i = 0; i < desc->nr_channels; i++) { 1724 if (desc->channel[first_non_void].size != desc->channel[i].size) 1725 return V_008F0C_BUF_DATA_FORMAT_INVALID; 1726 } 1727 1728 switch (desc->channel[first_non_void].size) { 1729 case 8: 1730 switch (desc->nr_channels) { 1731 case 1: 1732 return V_008F0C_BUF_DATA_FORMAT_8; 1733 case 2: 1734 return V_008F0C_BUF_DATA_FORMAT_8_8; 1735 case 3: 1736 case 4: 1737 return V_008F0C_BUF_DATA_FORMAT_8_8_8_8; 1738 } 1739 break; 1740 case 16: 1741 switch (desc->nr_channels) { 1742 case 1: 1743 return V_008F0C_BUF_DATA_FORMAT_16; 1744 case 2: 1745 return V_008F0C_BUF_DATA_FORMAT_16_16; 1746 case 3: 1747 case 4: 1748 return V_008F0C_BUF_DATA_FORMAT_16_16_16_16; 1749 } 1750 break; 1751 case 32: 1752 switch (desc->nr_channels) { 1753 case 1: 1754 return V_008F0C_BUF_DATA_FORMAT_32; 1755 case 2: 1756 return V_008F0C_BUF_DATA_FORMAT_32_32; 1757 case 3: 1758 return V_008F0C_BUF_DATA_FORMAT_32_32_32; 1759 case 4: 1760 return V_008F0C_BUF_DATA_FORMAT_32_32_32_32; 1761 } 1762 break; 1763 } 1764 1765 return V_008F0C_BUF_DATA_FORMAT_INVALID; 1766 } 1767 1768 static uint32_t si_translate_buffer_numformat(struct pipe_screen *screen, 1769 const struct util_format_description *desc, 1770 int first_non_void) 1771 { 1772 if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT) 1773 return V_008F0C_BUF_NUM_FORMAT_FLOAT; 1774 1775 assert(first_non_void >= 0); 1776 1777 switch (desc->channel[first_non_void].type) { 1778 case UTIL_FORMAT_TYPE_SIGNED: 1779 case UTIL_FORMAT_TYPE_FIXED: 1780 if (desc->channel[first_non_void].size >= 32 || 1781 desc->channel[first_non_void].pure_integer) 1782 return V_008F0C_BUF_NUM_FORMAT_SINT; 1783 else if (desc->channel[first_non_void].normalized) 1784 return V_008F0C_BUF_NUM_FORMAT_SNORM; 1785 else 1786 return V_008F0C_BUF_NUM_FORMAT_SSCALED; 1787 break; 1788 case UTIL_FORMAT_TYPE_UNSIGNED: 1789 if (desc->channel[first_non_void].size >= 32 || 1790 desc->channel[first_non_void].pure_integer) 1791 return V_008F0C_BUF_NUM_FORMAT_UINT; 1792 else if (desc->channel[first_non_void].normalized) 1793 return V_008F0C_BUF_NUM_FORMAT_UNORM; 1794 else 1795 return V_008F0C_BUF_NUM_FORMAT_USCALED; 1796 break; 1797 case UTIL_FORMAT_TYPE_FLOAT: 1798 default: 1799 return V_008F0C_BUF_NUM_FORMAT_FLOAT; 1800 } 1801 } 1802 1803 static unsigned si_is_vertex_format_supported(struct pipe_screen *screen, 1804 enum pipe_format format, 1805 unsigned usage) 1806 { 1807 const struct util_format_description *desc; 1808 int first_non_void; 1809 unsigned data_format; 1810 1811 assert((usage & ~(PIPE_BIND_SHADER_IMAGE | 1812 PIPE_BIND_SAMPLER_VIEW | 1813 PIPE_BIND_VERTEX_BUFFER)) == 0); 1814 1815 desc = util_format_description(format); 1816 1817 /* There are no native 8_8_8 or 16_16_16 data formats, and we currently 1818 * select 8_8_8_8 and 16_16_16_16 instead. This works reasonably well 1819 * for read-only access (with caveats surrounding bounds checks), but 1820 * obviously fails for write access which we have to implement for 1821 * shader images. Luckily, OpenGL doesn't expect this to be supported 1822 * anyway, and so the only impact is on PBO uploads / downloads, which 1823 * shouldn't be expected to be fast for GL_RGB anyway. 1824 */ 1825 if (desc->block.bits == 3 * 8 || 1826 desc->block.bits == 3 * 16) { 1827 if (usage & (PIPE_BIND_SHADER_IMAGE | PIPE_BIND_SAMPLER_VIEW)) { 1828 usage &= ~(PIPE_BIND_SHADER_IMAGE | PIPE_BIND_SAMPLER_VIEW); 1829 if (!usage) 1830 return 0; 1831 } 1832 } 1833 1834 first_non_void = util_format_get_first_non_void_channel(format); 1835 data_format = si_translate_buffer_dataformat(screen, desc, first_non_void); 1836 if (data_format == V_008F0C_BUF_DATA_FORMAT_INVALID) 1837 return 0; 1838 1839 return usage; 1840 } 1841 1842 static bool si_is_colorbuffer_format_supported(enum pipe_format format) 1843 { 1844 return si_translate_colorformat(format) != V_028C70_COLOR_INVALID && 1845 r600_translate_colorswap(format, false) != ~0U; 1846 } 1847 1848 static bool si_is_zs_format_supported(enum pipe_format format) 1849 { 1850 return si_translate_dbformat(format) != V_028040_Z_INVALID; 1851 } 1852 1853 static boolean si_is_format_supported(struct pipe_screen *screen, 1854 enum pipe_format format, 1855 enum pipe_texture_target target, 1856 unsigned sample_count, 1857 unsigned usage) 1858 { 1859 unsigned retval = 0; 1860 1861 if (target >= PIPE_MAX_TEXTURE_TYPES) { 1862 R600_ERR("r600: unsupported texture type %d\n", target); 1863 return false; 1864 } 1865 1866 if (!util_format_is_supported(format, usage)) 1867 return false; 1868 1869 if (sample_count > 1) { 1870 if (!screen->get_param(screen, PIPE_CAP_TEXTURE_MULTISAMPLE)) 1871 return false; 1872 1873 if (usage & PIPE_BIND_SHADER_IMAGE) 1874 return false; 1875 1876 switch (sample_count) { 1877 case 2: 1878 case 4: 1879 case 8: 1880 break; 1881 case 16: 1882 if (format == PIPE_FORMAT_NONE) 1883 return true; 1884 else 1885 return false; 1886 default: 1887 return false; 1888 } 1889 } 1890 1891 if (usage & (PIPE_BIND_SAMPLER_VIEW | 1892 PIPE_BIND_SHADER_IMAGE)) { 1893 if (target == PIPE_BUFFER) { 1894 retval |= si_is_vertex_format_supported( 1895 screen, format, usage & (PIPE_BIND_SAMPLER_VIEW | 1896 PIPE_BIND_SHADER_IMAGE)); 1897 } else { 1898 if (si_is_sampler_format_supported(screen, format)) 1899 retval |= usage & (PIPE_BIND_SAMPLER_VIEW | 1900 PIPE_BIND_SHADER_IMAGE); 1901 } 1902 } 1903 1904 if ((usage & (PIPE_BIND_RENDER_TARGET | 1905 PIPE_BIND_DISPLAY_TARGET | 1906 PIPE_BIND_SCANOUT | 1907 PIPE_BIND_SHARED | 1908 PIPE_BIND_BLENDABLE)) && 1909 si_is_colorbuffer_format_supported(format)) { 1910 retval |= usage & 1911 (PIPE_BIND_RENDER_TARGET | 1912 PIPE_BIND_DISPLAY_TARGET | 1913 PIPE_BIND_SCANOUT | 1914 PIPE_BIND_SHARED); 1915 if (!util_format_is_pure_integer(format) && 1916 !util_format_is_depth_or_stencil(format)) 1917 retval |= usage & PIPE_BIND_BLENDABLE; 1918 } 1919 1920 if ((usage & PIPE_BIND_DEPTH_STENCIL) && 1921 si_is_zs_format_supported(format)) { 1922 retval |= PIPE_BIND_DEPTH_STENCIL; 1923 } 1924 1925 if (usage & PIPE_BIND_VERTEX_BUFFER) { 1926 retval |= si_is_vertex_format_supported(screen, format, 1927 PIPE_BIND_VERTEX_BUFFER); 1928 } 1929 1930 if ((usage & PIPE_BIND_LINEAR) && 1931 !util_format_is_compressed(format) && 1932 !(usage & PIPE_BIND_DEPTH_STENCIL)) 1933 retval |= PIPE_BIND_LINEAR; 1934 1935 return retval == usage; 1936 } 1937 1938 /* 1939 * framebuffer handling 1940 */ 1941 1942 static void si_choose_spi_color_formats(struct r600_surface *surf, 1943 unsigned format, unsigned swap, 1944 unsigned ntype, bool is_depth) 1945 { 1946 /* Alpha is needed for alpha-to-coverage. 1947 * Blending may be with or without alpha. 1948 */ 1949 unsigned normal = 0; /* most optimal, may not support blending or export alpha */ 1950 unsigned alpha = 0; /* exports alpha, but may not support blending */ 1951 unsigned blend = 0; /* supports blending, but may not export alpha */ 1952 unsigned blend_alpha = 0; /* least optimal, supports blending and exports alpha */ 1953 1954 /* Choose the SPI color formats. These are required values for Stoney/RB+. 1955 * Other chips have multiple choices, though they are not necessarily better. 1956 */ 1957 switch (format) { 1958 case V_028C70_COLOR_5_6_5: 1959 case V_028C70_COLOR_1_5_5_5: 1960 case V_028C70_COLOR_5_5_5_1: 1961 case V_028C70_COLOR_4_4_4_4: 1962 case V_028C70_COLOR_10_11_11: 1963 case V_028C70_COLOR_11_11_10: 1964 case V_028C70_COLOR_8: 1965 case V_028C70_COLOR_8_8: 1966 case V_028C70_COLOR_8_8_8_8: 1967 case V_028C70_COLOR_10_10_10_2: 1968 case V_028C70_COLOR_2_10_10_10: 1969 if (ntype == V_028C70_NUMBER_UINT) 1970 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_UINT16_ABGR; 1971 else if (ntype == V_028C70_NUMBER_SINT) 1972 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_SINT16_ABGR; 1973 else 1974 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_FP16_ABGR; 1975 break; 1976 1977 case V_028C70_COLOR_16: 1978 case V_028C70_COLOR_16_16: 1979 case V_028C70_COLOR_16_16_16_16: 1980 if (ntype == V_028C70_NUMBER_UNORM || 1981 ntype == V_028C70_NUMBER_SNORM) { 1982 /* UNORM16 and SNORM16 don't support blending */ 1983 if (ntype == V_028C70_NUMBER_UNORM) 1984 normal = alpha = V_028714_SPI_SHADER_UNORM16_ABGR; 1985 else 1986 normal = alpha = V_028714_SPI_SHADER_SNORM16_ABGR; 1987 1988 /* Use 32 bits per channel for blending. */ 1989 if (format == V_028C70_COLOR_16) { 1990 if (swap == V_028C70_SWAP_STD) { /* R */ 1991 blend = V_028714_SPI_SHADER_32_R; 1992 blend_alpha = V_028714_SPI_SHADER_32_AR; 1993 } else if (swap == V_028C70_SWAP_ALT_REV) /* A */ 1994 blend = blend_alpha = V_028714_SPI_SHADER_32_AR; 1995 else 1996 assert(0); 1997 } else if (format == V_028C70_COLOR_16_16) { 1998 if (swap == V_028C70_SWAP_STD) { /* RG */ 1999 blend = V_028714_SPI_SHADER_32_GR; 2000 blend_alpha = V_028714_SPI_SHADER_32_ABGR; 2001 } else if (swap == V_028C70_SWAP_ALT) /* RA */ 2002 blend = blend_alpha = V_028714_SPI_SHADER_32_AR; 2003 else 2004 assert(0); 2005 } else /* 16_16_16_16 */ 2006 blend = blend_alpha = V_028714_SPI_SHADER_32_ABGR; 2007 } else if (ntype == V_028C70_NUMBER_UINT) 2008 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_UINT16_ABGR; 2009 else if (ntype == V_028C70_NUMBER_SINT) 2010 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_SINT16_ABGR; 2011 else if (ntype == V_028C70_NUMBER_FLOAT) 2012 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_FP16_ABGR; 2013 else 2014 assert(0); 2015 break; 2016 2017 case V_028C70_COLOR_32: 2018 if (swap == V_028C70_SWAP_STD) { /* R */ 2019 blend = normal = V_028714_SPI_SHADER_32_R; 2020 alpha = blend_alpha = V_028714_SPI_SHADER_32_AR; 2021 } else if (swap == V_028C70_SWAP_ALT_REV) /* A */ 2022 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_AR; 2023 else 2024 assert(0); 2025 break; 2026 2027 case V_028C70_COLOR_32_32: 2028 if (swap == V_028C70_SWAP_STD) { /* RG */ 2029 blend = normal = V_028714_SPI_SHADER_32_GR; 2030 alpha = blend_alpha = V_028714_SPI_SHADER_32_ABGR; 2031 } else if (swap == V_028C70_SWAP_ALT) /* RA */ 2032 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_AR; 2033 else 2034 assert(0); 2035 break; 2036 2037 case V_028C70_COLOR_32_32_32_32: 2038 case V_028C70_COLOR_8_24: 2039 case V_028C70_COLOR_24_8: 2040 case V_028C70_COLOR_X24_8_32_FLOAT: 2041 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_ABGR; 2042 break; 2043 2044 default: 2045 assert(0); 2046 return; 2047 } 2048 2049 /* The DB->CB copy needs 32_ABGR. */ 2050 if (is_depth) 2051 alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_ABGR; 2052 2053 surf->spi_shader_col_format = normal; 2054 surf->spi_shader_col_format_alpha = alpha; 2055 surf->spi_shader_col_format_blend = blend; 2056 surf->spi_shader_col_format_blend_alpha = blend_alpha; 2057 } 2058 2059 static void si_initialize_color_surface(struct si_context *sctx, 2060 struct r600_surface *surf) 2061 { 2062 struct r600_texture *rtex = (struct r600_texture*)surf->base.texture; 2063 unsigned color_info, color_attrib, color_view; 2064 unsigned format, swap, ntype, endian; 2065 const struct util_format_description *desc; 2066 int i; 2067 unsigned blend_clamp = 0, blend_bypass = 0; 2068 2069 color_view = S_028C6C_SLICE_START(surf->base.u.tex.first_layer) | 2070 S_028C6C_SLICE_MAX(surf->base.u.tex.last_layer); 2071 2072 desc = util_format_description(surf->base.format); 2073 for (i = 0; i < 4; i++) { 2074 if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { 2075 break; 2076 } 2077 } 2078 if (i == 4 || desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT) { 2079 ntype = V_028C70_NUMBER_FLOAT; 2080 } else { 2081 ntype = V_028C70_NUMBER_UNORM; 2082 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) 2083 ntype = V_028C70_NUMBER_SRGB; 2084 else if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) { 2085 if (desc->channel[i].pure_integer) { 2086 ntype = V_028C70_NUMBER_SINT; 2087 } else { 2088 assert(desc->channel[i].normalized); 2089 ntype = V_028C70_NUMBER_SNORM; 2090 } 2091 } else if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) { 2092 if (desc->channel[i].pure_integer) { 2093 ntype = V_028C70_NUMBER_UINT; 2094 } else { 2095 assert(desc->channel[i].normalized); 2096 ntype = V_028C70_NUMBER_UNORM; 2097 } 2098 } 2099 } 2100 2101 format = si_translate_colorformat(surf->base.format); 2102 if (format == V_028C70_COLOR_INVALID) { 2103 R600_ERR("Invalid CB format: %d, disabling CB.\n", surf->base.format); 2104 } 2105 assert(format != V_028C70_COLOR_INVALID); 2106 swap = r600_translate_colorswap(surf->base.format, false); 2107 endian = si_colorformat_endian_swap(format); 2108 2109 /* blend clamp should be set for all NORM/SRGB types */ 2110 if (ntype == V_028C70_NUMBER_UNORM || 2111 ntype == V_028C70_NUMBER_SNORM || 2112 ntype == V_028C70_NUMBER_SRGB) 2113 blend_clamp = 1; 2114 2115 /* set blend bypass according to docs if SINT/UINT or 2116 8/24 COLOR variants */ 2117 if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT || 2118 format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 || 2119 format == V_028C70_COLOR_X24_8_32_FLOAT) { 2120 blend_clamp = 0; 2121 blend_bypass = 1; 2122 } 2123 2124 if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) { 2125 if (format == V_028C70_COLOR_8 || 2126 format == V_028C70_COLOR_8_8 || 2127 format == V_028C70_COLOR_8_8_8_8) 2128 surf->color_is_int8 = true; 2129 else if (format == V_028C70_COLOR_10_10_10_2 || 2130 format == V_028C70_COLOR_2_10_10_10) 2131 surf->color_is_int10 = true; 2132 } 2133 2134 color_info = S_028C70_FORMAT(format) | 2135 S_028C70_COMP_SWAP(swap) | 2136 S_028C70_BLEND_CLAMP(blend_clamp) | 2137 S_028C70_BLEND_BYPASS(blend_bypass) | 2138 S_028C70_SIMPLE_FLOAT(1) | 2139 S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM && 2140 ntype != V_028C70_NUMBER_SNORM && 2141 ntype != V_028C70_NUMBER_SRGB && 2142 format != V_028C70_COLOR_8_24 && 2143 format != V_028C70_COLOR_24_8) | 2144 S_028C70_NUMBER_TYPE(ntype) | 2145 S_028C70_ENDIAN(endian); 2146 2147 /* Intensity is implemented as Red, so treat it that way. */ 2148 color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == PIPE_SWIZZLE_1 || 2149 util_format_is_intensity(surf->base.format)); 2150 2151 if (rtex->resource.b.b.nr_samples > 1) { 2152 unsigned log_samples = util_logbase2(rtex->resource.b.b.nr_samples); 2153 2154 color_attrib |= S_028C74_NUM_SAMPLES(log_samples) | 2155 S_028C74_NUM_FRAGMENTS(log_samples); 2156 2157 if (rtex->fmask.size) { 2158 color_info |= S_028C70_COMPRESSION(1); 2159 unsigned fmask_bankh = util_logbase2(rtex->fmask.bank_height); 2160 2161 if (sctx->b.chip_class == SI) { 2162 /* due to a hw bug, FMASK_BANK_HEIGHT must be set on SI too */ 2163 color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh); 2164 } 2165 } 2166 } 2167 2168 surf->cb_color_view = color_view; 2169 surf->cb_color_info = color_info; 2170 surf->cb_color_attrib = color_attrib; 2171 2172 if (sctx->b.chip_class >= VI) { 2173 unsigned max_uncompressed_block_size = 2; 2174 2175 if (rtex->resource.b.b.nr_samples > 1) { 2176 if (rtex->surface.bpe == 1) 2177 max_uncompressed_block_size = 0; 2178 else if (rtex->surface.bpe == 2) 2179 max_uncompressed_block_size = 1; 2180 } 2181 2182 surf->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) | 2183 S_028C78_INDEPENDENT_64B_BLOCKS(1); 2184 } 2185 2186 /* This must be set for fast clear to work without FMASK. */ 2187 if (!rtex->fmask.size && sctx->b.chip_class == SI) { 2188 unsigned bankh = util_logbase2(rtex->surface.bankh); 2189 surf->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh); 2190 } 2191 2192 /* Determine pixel shader export format */ 2193 si_choose_spi_color_formats(surf, format, swap, ntype, rtex->is_depth); 2194 2195 surf->color_initialized = true; 2196 } 2197 2198 static void si_init_depth_surface(struct si_context *sctx, 2199 struct r600_surface *surf) 2200 { 2201 struct r600_texture *rtex = (struct r600_texture*)surf->base.texture; 2202 unsigned level = surf->base.u.tex.level; 2203 struct radeon_surf_level *levelinfo = &rtex->surface.level[level]; 2204 unsigned format; 2205 uint32_t z_info, s_info, db_depth_info; 2206 uint64_t z_offs, s_offs; 2207 uint32_t db_htile_data_base, db_htile_surface; 2208 2209 format = si_translate_dbformat(rtex->db_render_format); 2210 2211 if (format == V_028040_Z_INVALID) { 2212 R600_ERR("Invalid DB format: %d, disabling DB.\n", rtex->resource.b.b.format); 2213 } 2214 assert(format != V_028040_Z_INVALID); 2215 2216 s_offs = z_offs = rtex->resource.gpu_address; 2217 z_offs += rtex->surface.level[level].offset; 2218 s_offs += rtex->surface.stencil_level[level].offset; 2219 2220 db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!rtex->tc_compatible_htile); 2221 2222 z_info = S_028040_FORMAT(format); 2223 if (rtex->resource.b.b.nr_samples > 1) { 2224 z_info |= S_028040_NUM_SAMPLES(util_logbase2(rtex->resource.b.b.nr_samples)); 2225 } 2226 2227 if (rtex->surface.flags & RADEON_SURF_SBUFFER) 2228 s_info = S_028044_FORMAT(V_028044_STENCIL_8); 2229 else 2230 s_info = S_028044_FORMAT(V_028044_STENCIL_INVALID); 2231 2232 if (sctx->b.chip_class >= CIK) { 2233 struct radeon_info *info = &sctx->screen->b.info; 2234 unsigned index = rtex->surface.tiling_index[level]; 2235 unsigned stencil_index = rtex->surface.stencil_tiling_index[level]; 2236 unsigned macro_index = rtex->surface.macro_tile_index; 2237 unsigned tile_mode = info->si_tile_mode_array[index]; 2238 unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index]; 2239 unsigned macro_mode = info->cik_macrotile_mode_array[macro_index]; 2240 2241 db_depth_info |= 2242 S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) | 2243 S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) | 2244 S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) | 2245 S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) | 2246 S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) | 2247 S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode)); 2248 z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode)); 2249 s_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode)); 2250 } else { 2251 unsigned tile_mode_index = si_tile_mode_index(rtex, level, false); 2252 z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index); 2253 tile_mode_index = si_tile_mode_index(rtex, level, true); 2254 s_info |= S_028044_TILE_MODE_INDEX(tile_mode_index); 2255 } 2256 2257 /* HiZ aka depth buffer htile */ 2258 /* use htile only for first level */ 2259 if (rtex->htile_buffer && !level) { 2260 z_info |= S_028040_TILE_SURFACE_ENABLE(1) | 2261 S_028040_ALLOW_EXPCLEAR(1); 2262 2263 if (rtex->surface.flags & RADEON_SURF_SBUFFER) { 2264 /* Workaround: For a not yet understood reason, the 2265 * combination of MSAA, fast stencil clear and stencil 2266 * decompress messes with subsequent stencil buffer 2267 * uses. Problem was reproduced on Verde, Bonaire, 2268 * Tonga, and Carrizo. 2269 * 2270 * Disabling EXPCLEAR works around the problem. 2271 * 2272 * Check piglit's arb_texture_multisample-stencil-clear 2273 * test if you want to try changing this. 2274 */ 2275 if (rtex->resource.b.b.nr_samples <= 1) 2276 s_info |= S_028044_ALLOW_EXPCLEAR(1); 2277 } else if (!rtex->tc_compatible_htile) { 2278 /* Use all of the htile_buffer for depth if there's no stencil. 2279 * This must not be set when TC-compatible HTILE is enabled 2280 * due to a hw bug. 2281 */ 2282 s_info |= S_028044_TILE_STENCIL_DISABLE(1); 2283 } 2284 2285 uint64_t va = rtex->htile_buffer->gpu_address; 2286 db_htile_data_base = va >> 8; 2287 db_htile_surface = S_028ABC_FULL_CACHE(1); 2288 2289 if (rtex->tc_compatible_htile) { 2290 db_htile_surface |= S_028ABC_TC_COMPATIBLE(1); 2291 2292 switch (rtex->resource.b.b.nr_samples) { 2293 case 0: 2294 case 1: 2295 z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(5); 2296 break; 2297 case 2: 2298 case 4: 2299 z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(3); 2300 break; 2301 case 8: 2302 z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(2); 2303 break; 2304 default: 2305 assert(0); 2306 } 2307 } 2308 } else { 2309 db_htile_data_base = 0; 2310 db_htile_surface = 0; 2311 } 2312 2313 assert(levelinfo->nblk_x % 8 == 0 && levelinfo->nblk_y % 8 == 0); 2314 2315 surf->db_depth_view = S_028008_SLICE_START(surf->base.u.tex.first_layer) | 2316 S_028008_SLICE_MAX(surf->base.u.tex.last_layer); 2317 surf->db_htile_data_base = db_htile_data_base; 2318 surf->db_depth_info = db_depth_info; 2319 surf->db_z_info = z_info; 2320 surf->db_stencil_info = s_info; 2321 surf->db_depth_base = z_offs >> 8; 2322 surf->db_stencil_base = s_offs >> 8; 2323 surf->db_depth_size = S_028058_PITCH_TILE_MAX((levelinfo->nblk_x / 8) - 1) | 2324 S_028058_HEIGHT_TILE_MAX((levelinfo->nblk_y / 8) - 1); 2325 surf->db_depth_slice = S_02805C_SLICE_TILE_MAX((levelinfo->nblk_x * 2326 levelinfo->nblk_y) / 64 - 1); 2327 surf->db_htile_surface = db_htile_surface; 2328 2329 surf->depth_initialized = true; 2330 } 2331 2332 static void si_dec_framebuffer_counters(const struct pipe_framebuffer_state *state) 2333 { 2334 for (int i = 0; i < state->nr_cbufs; ++i) { 2335 struct r600_surface *surf = NULL; 2336 struct r600_texture *rtex; 2337 2338 if (!state->cbufs[i]) 2339 continue; 2340 surf = (struct r600_surface*)state->cbufs[i]; 2341 rtex = (struct r600_texture*)surf->base.texture; 2342 2343 p_atomic_dec(&rtex->framebuffers_bound); 2344 } 2345 } 2346 2347 static void si_set_framebuffer_state(struct pipe_context *ctx, 2348 const struct pipe_framebuffer_state *state) 2349 { 2350 struct si_context *sctx = (struct si_context *)ctx; 2351 struct pipe_constant_buffer constbuf = {0}; 2352 struct r600_surface *surf = NULL; 2353 struct r600_texture *rtex; 2354 bool old_any_dst_linear = sctx->framebuffer.any_dst_linear; 2355 unsigned old_nr_samples = sctx->framebuffer.nr_samples; 2356 int i; 2357 2358 for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) { 2359 if (!sctx->framebuffer.state.cbufs[i]) 2360 continue; 2361 2362 rtex = (struct r600_texture*)sctx->framebuffer.state.cbufs[i]->texture; 2363 if (rtex->dcc_gather_statistics) 2364 vi_separate_dcc_stop_query(ctx, rtex); 2365 } 2366 2367 /* Only flush TC when changing the framebuffer state, because 2368 * the only client not using TC that can change textures is 2369 * the framebuffer. 2370 * 2371 * Flush all CB and DB caches here because all buffers can be used 2372 * for write by both TC (with shader image stores) and CB/DB. 2373 */ 2374 sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 | 2375 SI_CONTEXT_INV_GLOBAL_L2 | 2376 SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER | 2377 SI_CONTEXT_CS_PARTIAL_FLUSH; 2378 2379 /* Take the maximum of the old and new count. If the new count is lower, 2380 * dirtying is needed to disable the unbound colorbuffers. 2381 */ 2382 sctx->framebuffer.dirty_cbufs |= 2383 (1 << MAX2(sctx->framebuffer.state.nr_cbufs, state->nr_cbufs)) - 1; 2384 sctx->framebuffer.dirty_zsbuf |= sctx->framebuffer.state.zsbuf != state->zsbuf; 2385 2386 si_dec_framebuffer_counters(&sctx->framebuffer.state); 2387 util_copy_framebuffer_state(&sctx->framebuffer.state, state); 2388 2389 sctx->framebuffer.colorbuf_enabled_4bit = 0; 2390 sctx->framebuffer.spi_shader_col_format = 0; 2391 sctx->framebuffer.spi_shader_col_format_alpha = 0; 2392 sctx->framebuffer.spi_shader_col_format_blend = 0; 2393 sctx->framebuffer.spi_shader_col_format_blend_alpha = 0; 2394 sctx->framebuffer.color_is_int8 = 0; 2395 sctx->framebuffer.color_is_int10 = 0; 2396 2397 sctx->framebuffer.compressed_cb_mask = 0; 2398 sctx->framebuffer.nr_samples = util_framebuffer_get_num_samples(state); 2399 sctx->framebuffer.log_samples = util_logbase2(sctx->framebuffer.nr_samples); 2400 sctx->framebuffer.any_dst_linear = false; 2401 2402 for (i = 0; i < state->nr_cbufs; i++) { 2403 if (!state->cbufs[i]) 2404 continue; 2405 2406 surf = (struct r600_surface*)state->cbufs[i]; 2407 rtex = (struct r600_texture*)surf->base.texture; 2408 2409 if (!surf->color_initialized) { 2410 si_initialize_color_surface(sctx, surf); 2411 } 2412 2413 sctx->framebuffer.colorbuf_enabled_4bit |= 0xf << (i * 4); 2414 sctx->framebuffer.spi_shader_col_format |= 2415 surf->spi_shader_col_format << (i * 4); 2416 sctx->framebuffer.spi_shader_col_format_alpha |= 2417 surf->spi_shader_col_format_alpha << (i * 4); 2418 sctx->framebuffer.spi_shader_col_format_blend |= 2419 surf->spi_shader_col_format_blend << (i * 4); 2420 sctx->framebuffer.spi_shader_col_format_blend_alpha |= 2421 surf->spi_shader_col_format_blend_alpha << (i * 4); 2422 2423 if (surf->color_is_int8) 2424 sctx->framebuffer.color_is_int8 |= 1 << i; 2425 if (surf->color_is_int10) 2426 sctx->framebuffer.color_is_int10 |= 1 << i; 2427 2428 if (rtex->fmask.size) { 2429 sctx->framebuffer.compressed_cb_mask |= 1 << i; 2430 } 2431 2432 if (rtex->surface.is_linear) 2433 sctx->framebuffer.any_dst_linear = true; 2434 2435 r600_context_add_resource_size(ctx, surf->base.texture); 2436 2437 p_atomic_inc(&rtex->framebuffers_bound); 2438 2439 if (rtex->dcc_gather_statistics) { 2440 /* Dirty tracking must be enabled for DCC usage analysis. */ 2441 sctx->framebuffer.compressed_cb_mask |= 1 << i; 2442 vi_separate_dcc_start_query(ctx, rtex); 2443 } 2444 } 2445 2446 if (state->zsbuf) { 2447 surf = (struct r600_surface*)state->zsbuf; 2448 rtex = (struct r600_texture*)surf->base.texture; 2449 2450 if (!surf->depth_initialized) { 2451 si_init_depth_surface(sctx, surf); 2452 } 2453 r600_context_add_resource_size(ctx, surf->base.texture); 2454 } 2455 2456 si_update_poly_offset_state(sctx); 2457 si_mark_atom_dirty(sctx, &sctx->cb_render_state); 2458 si_mark_atom_dirty(sctx, &sctx->framebuffer.atom); 2459 2460 if (sctx->framebuffer.any_dst_linear != old_any_dst_linear) 2461 si_mark_atom_dirty(sctx, &sctx->msaa_config); 2462 2463 if (sctx->framebuffer.nr_samples != old_nr_samples) { 2464 si_mark_atom_dirty(sctx, &sctx->msaa_config); 2465 si_mark_atom_dirty(sctx, &sctx->db_render_state); 2466 2467 /* Set sample locations as fragment shader constants. */ 2468 switch (sctx->framebuffer.nr_samples) { 2469 case 1: 2470 constbuf.user_buffer = sctx->b.sample_locations_1x; 2471 break; 2472 case 2: 2473 constbuf.user_buffer = sctx->b.sample_locations_2x; 2474 break; 2475 case 4: 2476 constbuf.user_buffer = sctx->b.sample_locations_4x; 2477 break; 2478 case 8: 2479 constbuf.user_buffer = sctx->b.sample_locations_8x; 2480 break; 2481 case 16: 2482 constbuf.user_buffer = sctx->b.sample_locations_16x; 2483 break; 2484 default: 2485 R600_ERR("Requested an invalid number of samples %i.\n", 2486 sctx->framebuffer.nr_samples); 2487 assert(0); 2488 } 2489 constbuf.buffer_size = sctx->framebuffer.nr_samples * 2 * 4; 2490 si_set_rw_buffer(sctx, SI_PS_CONST_SAMPLE_POSITIONS, &constbuf); 2491 2492 si_mark_atom_dirty(sctx, &sctx->msaa_sample_locs.atom); 2493 } 2494 2495 sctx->need_check_render_feedback = true; 2496 sctx->do_update_shaders = true; 2497 } 2498 2499 static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom *atom) 2500 { 2501 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 2502 struct pipe_framebuffer_state *state = &sctx->framebuffer.state; 2503 unsigned i, nr_cbufs = state->nr_cbufs; 2504 struct r600_texture *tex = NULL; 2505 struct r600_surface *cb = NULL; 2506 unsigned cb_color_info = 0; 2507 2508 /* Colorbuffers. */ 2509 for (i = 0; i < nr_cbufs; i++) { 2510 const struct radeon_surf_level *level_info; 2511 unsigned pitch_tile_max, slice_tile_max, tile_mode_index; 2512 unsigned cb_color_base, cb_color_fmask, cb_color_attrib; 2513 unsigned cb_color_pitch, cb_color_slice, cb_color_fmask_slice; 2514 2515 if (!(sctx->framebuffer.dirty_cbufs & (1 << i))) 2516 continue; 2517 2518 cb = (struct r600_surface*)state->cbufs[i]; 2519 if (!cb) { 2520 radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 2521 S_028C70_FORMAT(V_028C70_COLOR_INVALID)); 2522 continue; 2523 } 2524 2525 tex = (struct r600_texture *)cb->base.texture; 2526 level_info = &tex->surface.level[cb->base.u.tex.level]; 2527 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, 2528 &tex->resource, RADEON_USAGE_READWRITE, 2529 tex->resource.b.b.nr_samples > 1 ? 2530 RADEON_PRIO_COLOR_BUFFER_MSAA : 2531 RADEON_PRIO_COLOR_BUFFER); 2532 2533 if (tex->cmask_buffer && tex->cmask_buffer != &tex->resource) { 2534 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, 2535 tex->cmask_buffer, RADEON_USAGE_READWRITE, 2536 RADEON_PRIO_CMASK); 2537 } 2538 2539 if (tex->dcc_separate_buffer) 2540 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, 2541 tex->dcc_separate_buffer, 2542 RADEON_USAGE_READWRITE, 2543 RADEON_PRIO_DCC); 2544 2545 /* Compute mutable surface parameters. */ 2546 pitch_tile_max = level_info->nblk_x / 8 - 1; 2547 slice_tile_max = level_info->nblk_x * 2548 level_info->nblk_y / 64 - 1; 2549 tile_mode_index = si_tile_mode_index(tex, cb->base.u.tex.level, false); 2550 2551 cb_color_base = (tex->resource.gpu_address + level_info->offset) >> 8; 2552 cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max); 2553 cb_color_slice = S_028C68_TILE_MAX(slice_tile_max); 2554 cb_color_attrib = cb->cb_color_attrib | 2555 S_028C74_TILE_MODE_INDEX(tile_mode_index); 2556 2557 if (tex->fmask.size) { 2558 if (sctx->b.chip_class >= CIK) 2559 cb_color_pitch |= S_028C64_FMASK_TILE_MAX(tex->fmask.pitch_in_pixels / 8 - 1); 2560 cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tex->fmask.tile_mode_index); 2561 cb_color_fmask = (tex->resource.gpu_address + tex->fmask.offset) >> 8; 2562 cb_color_fmask_slice = S_028C88_TILE_MAX(tex->fmask.slice_tile_max); 2563 } else { 2564 /* This must be set for fast clear to work without FMASK. */ 2565 if (sctx->b.chip_class >= CIK) 2566 cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max); 2567 cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index); 2568 cb_color_fmask = cb_color_base; 2569 cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max); 2570 } 2571 2572 cb_color_info = cb->cb_color_info | tex->cb_color_info; 2573 2574 if (tex->dcc_offset && cb->base.u.tex.level < tex->surface.num_dcc_levels) { 2575 bool is_msaa_resolve_dst = state->cbufs[0] && 2576 state->cbufs[0]->texture->nr_samples > 1 && 2577 state->cbufs[1] == &cb->base && 2578 state->cbufs[1]->texture->nr_samples <= 1; 2579 2580 if (!is_msaa_resolve_dst) 2581 cb_color_info |= S_028C70_DCC_ENABLE(1); 2582 } 2583 2584 radeon_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 2585 sctx->b.chip_class >= VI ? 14 : 13); 2586 radeon_emit(cs, cb_color_base); /* R_028C60_CB_COLOR0_BASE */ 2587 radeon_emit(cs, cb_color_pitch); /* R_028C64_CB_COLOR0_PITCH */ 2588 radeon_emit(cs, cb_color_slice); /* R_028C68_CB_COLOR0_SLICE */ 2589 radeon_emit(cs, cb->cb_color_view); /* R_028C6C_CB_COLOR0_VIEW */ 2590 radeon_emit(cs, cb_color_info); /* R_028C70_CB_COLOR0_INFO */ 2591 radeon_emit(cs, cb_color_attrib); /* R_028C74_CB_COLOR0_ATTRIB */ 2592 radeon_emit(cs, cb->cb_dcc_control); /* R_028C78_CB_COLOR0_DCC_CONTROL */ 2593 radeon_emit(cs, tex->cmask.base_address_reg); /* R_028C7C_CB_COLOR0_CMASK */ 2594 radeon_emit(cs, tex->cmask.slice_tile_max); /* R_028C80_CB_COLOR0_CMASK_SLICE */ 2595 radeon_emit(cs, cb_color_fmask); /* R_028C84_CB_COLOR0_FMASK */ 2596 radeon_emit(cs, cb_color_fmask_slice); /* R_028C88_CB_COLOR0_FMASK_SLICE */ 2597 radeon_emit(cs, tex->color_clear_value[0]); /* R_028C8C_CB_COLOR0_CLEAR_WORD0 */ 2598 radeon_emit(cs, tex->color_clear_value[1]); /* R_028C90_CB_COLOR0_CLEAR_WORD1 */ 2599 2600 if (sctx->b.chip_class >= VI) /* R_028C94_CB_COLOR0_DCC_BASE */ 2601 radeon_emit(cs, ((!tex->dcc_separate_buffer ? tex->resource.gpu_address : 0) + 2602 tex->dcc_offset + 2603 tex->surface.level[cb->base.u.tex.level].dcc_offset) >> 8); 2604 } 2605 for (; i < 8 ; i++) 2606 if (sctx->framebuffer.dirty_cbufs & (1 << i)) 2607 radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0); 2608 2609 /* ZS buffer. */ 2610 if (state->zsbuf && sctx->framebuffer.dirty_zsbuf) { 2611 struct r600_surface *zb = (struct r600_surface*)state->zsbuf; 2612 struct r600_texture *rtex = (struct r600_texture*)zb->base.texture; 2613 2614 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, 2615 &rtex->resource, RADEON_USAGE_READWRITE, 2616 zb->base.texture->nr_samples > 1 ? 2617 RADEON_PRIO_DEPTH_BUFFER_MSAA : 2618 RADEON_PRIO_DEPTH_BUFFER); 2619 2620 if (zb->db_htile_data_base) { 2621 radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, 2622 rtex->htile_buffer, RADEON_USAGE_READWRITE, 2623 RADEON_PRIO_HTILE); 2624 } 2625 2626 radeon_set_context_reg(cs, R_028008_DB_DEPTH_VIEW, zb->db_depth_view); 2627 radeon_set_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, zb->db_htile_data_base); 2628 2629 radeon_set_context_reg_seq(cs, R_02803C_DB_DEPTH_INFO, 9); 2630 radeon_emit(cs, zb->db_depth_info); /* R_02803C_DB_DEPTH_INFO */ 2631 radeon_emit(cs, zb->db_z_info | /* R_028040_DB_Z_INFO */ 2632 S_028040_ZRANGE_PRECISION(rtex->depth_clear_value != 0)); 2633 radeon_emit(cs, zb->db_stencil_info); /* R_028044_DB_STENCIL_INFO */ 2634 radeon_emit(cs, zb->db_depth_base); /* R_028048_DB_Z_READ_BASE */ 2635 radeon_emit(cs, zb->db_stencil_base); /* R_02804C_DB_STENCIL_READ_BASE */ 2636 radeon_emit(cs, zb->db_depth_base); /* R_028050_DB_Z_WRITE_BASE */ 2637 radeon_emit(cs, zb->db_stencil_base); /* R_028054_DB_STENCIL_WRITE_BASE */ 2638 radeon_emit(cs, zb->db_depth_size); /* R_028058_DB_DEPTH_SIZE */ 2639 radeon_emit(cs, zb->db_depth_slice); /* R_02805C_DB_DEPTH_SLICE */ 2640 2641 radeon_set_context_reg_seq(cs, R_028028_DB_STENCIL_CLEAR, 2); 2642 radeon_emit(cs, rtex->stencil_clear_value); /* R_028028_DB_STENCIL_CLEAR */ 2643 radeon_emit(cs, fui(rtex->depth_clear_value)); /* R_02802C_DB_DEPTH_CLEAR */ 2644 2645 radeon_set_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, zb->db_htile_surface); 2646 } else if (sctx->framebuffer.dirty_zsbuf) { 2647 radeon_set_context_reg_seq(cs, R_028040_DB_Z_INFO, 2); 2648 radeon_emit(cs, S_028040_FORMAT(V_028040_Z_INVALID)); /* R_028040_DB_Z_INFO */ 2649 radeon_emit(cs, S_028044_FORMAT(V_028044_STENCIL_INVALID)); /* R_028044_DB_STENCIL_INFO */ 2650 } 2651 2652 /* Framebuffer dimensions. */ 2653 /* PA_SC_WINDOW_SCISSOR_TL is set in si_init_config() */ 2654 radeon_set_context_reg(cs, R_028208_PA_SC_WINDOW_SCISSOR_BR, 2655 S_028208_BR_X(state->width) | S_028208_BR_Y(state->height)); 2656 2657 sctx->framebuffer.dirty_cbufs = 0; 2658 sctx->framebuffer.dirty_zsbuf = false; 2659 } 2660 2661 static void si_emit_msaa_sample_locs(struct si_context *sctx, 2662 struct r600_atom *atom) 2663 { 2664 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 2665 unsigned nr_samples = sctx->framebuffer.nr_samples; 2666 2667 /* Smoothing (only possible with nr_samples == 1) uses the same 2668 * sample locations as the MSAA it simulates. 2669 */ 2670 if (nr_samples <= 1 && sctx->smoothing_enabled) 2671 nr_samples = SI_NUM_SMOOTH_AA_SAMPLES; 2672 2673 /* On Polaris, the small primitive filter uses the sample locations 2674 * even when MSAA is off, so we need to make sure they're set to 0. 2675 */ 2676 if (sctx->b.family >= CHIP_POLARIS10) 2677 nr_samples = MAX2(nr_samples, 1); 2678 2679 if (nr_samples >= 1 && 2680 (nr_samples != sctx->msaa_sample_locs.nr_samples)) { 2681 sctx->msaa_sample_locs.nr_samples = nr_samples; 2682 cayman_emit_msaa_sample_locs(cs, nr_samples); 2683 } 2684 2685 if (sctx->b.family >= CHIP_POLARIS10) { 2686 struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; 2687 unsigned small_prim_filter_cntl = 2688 S_028830_SMALL_PRIM_FILTER_ENABLE(1) | 2689 S_028830_LINE_FILTER_DISABLE(1); /* line bug */ 2690 2691 /* The alternative of setting sample locations to 0 would 2692 * require a DB flush to avoid Z errors, see 2693 * https://bugs.freedesktop.org/show_bug.cgi?id=96908 2694 */ 2695 if (sctx->framebuffer.nr_samples > 1 && rs && !rs->multisample_enable) 2696 small_prim_filter_cntl &= C_028830_SMALL_PRIM_FILTER_ENABLE; 2697 2698 radeon_set_context_reg(cs, R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL, 2699 small_prim_filter_cntl); 2700 } 2701 } 2702 2703 static void si_emit_msaa_config(struct si_context *sctx, struct r600_atom *atom) 2704 { 2705 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 2706 unsigned num_tile_pipes = sctx->screen->b.info.num_tile_pipes; 2707 /* 33% faster rendering to linear color buffers */ 2708 bool dst_is_linear = sctx->framebuffer.any_dst_linear; 2709 unsigned sc_mode_cntl_1 = 2710 S_028A4C_WALK_SIZE(dst_is_linear) | 2711 S_028A4C_WALK_FENCE_ENABLE(!dst_is_linear) | 2712 S_028A4C_WALK_FENCE_SIZE(num_tile_pipes == 2 ? 2 : 3) | 2713 /* always 1: */ 2714 S_028A4C_WALK_ALIGN8_PRIM_FITS_ST(1) | 2715 S_028A4C_SUPERTILE_WALK_ORDER_ENABLE(1) | 2716 S_028A4C_TILE_WALK_ORDER_ENABLE(1) | 2717 S_028A4C_MULTI_SHADER_ENGINE_PRIM_DISCARD_ENABLE(1) | 2718 S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) | 2719 S_028A4C_FORCE_EOV_REZ_ENABLE(1); 2720 2721 cayman_emit_msaa_config(cs, sctx->framebuffer.nr_samples, 2722 sctx->ps_iter_samples, 2723 sctx->smoothing_enabled ? SI_NUM_SMOOTH_AA_SAMPLES : 0, 2724 sc_mode_cntl_1); 2725 } 2726 2727 static void si_set_min_samples(struct pipe_context *ctx, unsigned min_samples) 2728 { 2729 struct si_context *sctx = (struct si_context *)ctx; 2730 2731 if (sctx->ps_iter_samples == min_samples) 2732 return; 2733 2734 sctx->ps_iter_samples = min_samples; 2735 sctx->do_update_shaders = true; 2736 2737 if (sctx->framebuffer.nr_samples > 1) 2738 si_mark_atom_dirty(sctx, &sctx->msaa_config); 2739 } 2740 2741 /* 2742 * Samplers 2743 */ 2744 2745 /** 2746 * Build the sampler view descriptor for a buffer texture. 2747 * @param state 256-bit descriptor; only the high 128 bits are filled in 2748 */ 2749 void 2750 si_make_buffer_descriptor(struct si_screen *screen, struct r600_resource *buf, 2751 enum pipe_format format, 2752 unsigned offset, unsigned size, 2753 uint32_t *state) 2754 { 2755 const struct util_format_description *desc; 2756 int first_non_void; 2757 unsigned stride; 2758 unsigned num_records; 2759 unsigned num_format, data_format; 2760 2761 desc = util_format_description(format); 2762 first_non_void = util_format_get_first_non_void_channel(format); 2763 stride = desc->block.bits / 8; 2764 num_format = si_translate_buffer_numformat(&screen->b.b, desc, first_non_void); 2765 data_format = si_translate_buffer_dataformat(&screen->b.b, desc, first_non_void); 2766 2767 num_records = size / stride; 2768 num_records = MIN2(num_records, (buf->b.b.width0 - offset) / stride); 2769 2770 if (screen->b.chip_class >= VI) 2771 num_records *= stride; 2772 2773 state[4] = 0; 2774 state[5] = S_008F04_STRIDE(stride); 2775 state[6] = num_records; 2776 state[7] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) | 2777 S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) | 2778 S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) | 2779 S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) | 2780 S_008F0C_NUM_FORMAT(num_format) | 2781 S_008F0C_DATA_FORMAT(data_format); 2782 } 2783 2784 /** 2785 * Build the sampler view descriptor for a texture. 2786 */ 2787 void 2788 si_make_texture_descriptor(struct si_screen *screen, 2789 struct r600_texture *tex, 2790 bool sampler, 2791 enum pipe_texture_target target, 2792 enum pipe_format pipe_format, 2793 const unsigned char state_swizzle[4], 2794 unsigned first_level, unsigned last_level, 2795 unsigned first_layer, unsigned last_layer, 2796 unsigned width, unsigned height, unsigned depth, 2797 uint32_t *state, 2798 uint32_t *fmask_state) 2799 { 2800 struct pipe_resource *res = &tex->resource.b.b; 2801 const struct util_format_description *desc; 2802 unsigned char swizzle[4]; 2803 int first_non_void; 2804 unsigned num_format, data_format, type; 2805 uint64_t va; 2806 2807 desc = util_format_description(pipe_format); 2808 2809 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { 2810 const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0}; 2811 const unsigned char swizzle_yyyy[4] = {1, 1, 1, 1}; 2812 const unsigned char swizzle_wwww[4] = {3, 3, 3, 3}; 2813 2814 switch (pipe_format) { 2815 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 2816 case PIPE_FORMAT_X32_S8X24_UINT: 2817 case PIPE_FORMAT_X8Z24_UNORM: 2818 util_format_compose_swizzles(swizzle_yyyy, state_swizzle, swizzle); 2819 break; 2820 case PIPE_FORMAT_X24S8_UINT: 2821 /* 2822 * X24S8 is implemented as an 8_8_8_8 data format, to 2823 * fix texture gathers. This affects at least 2824 * GL45-CTS.texture_cube_map_array.sampling on VI. 2825 */ 2826 util_format_compose_swizzles(swizzle_wwww, state_swizzle, swizzle); 2827 break; 2828 default: 2829 util_format_compose_swizzles(swizzle_xxxx, state_swizzle, swizzle); 2830 } 2831 } else { 2832 util_format_compose_swizzles(desc->swizzle, state_swizzle, swizzle); 2833 } 2834 2835 first_non_void = util_format_get_first_non_void_channel(pipe_format); 2836 2837 switch (pipe_format) { 2838 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 2839 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 2840 break; 2841 default: 2842 if (first_non_void < 0) { 2843 if (util_format_is_compressed(pipe_format)) { 2844 switch (pipe_format) { 2845 case PIPE_FORMAT_DXT1_SRGB: 2846 case PIPE_FORMAT_DXT1_SRGBA: 2847 case PIPE_FORMAT_DXT3_SRGBA: 2848 case PIPE_FORMAT_DXT5_SRGBA: 2849 case PIPE_FORMAT_BPTC_SRGBA: 2850 case PIPE_FORMAT_ETC2_SRGB8: 2851 case PIPE_FORMAT_ETC2_SRGB8A1: 2852 case PIPE_FORMAT_ETC2_SRGBA8: 2853 num_format = V_008F14_IMG_NUM_FORMAT_SRGB; 2854 break; 2855 case PIPE_FORMAT_RGTC1_SNORM: 2856 case PIPE_FORMAT_LATC1_SNORM: 2857 case PIPE_FORMAT_RGTC2_SNORM: 2858 case PIPE_FORMAT_LATC2_SNORM: 2859 case PIPE_FORMAT_ETC2_R11_SNORM: 2860 case PIPE_FORMAT_ETC2_RG11_SNORM: 2861 /* implies float, so use SNORM/UNORM to determine 2862 whether data is signed or not */ 2863 case PIPE_FORMAT_BPTC_RGB_FLOAT: 2864 num_format = V_008F14_IMG_NUM_FORMAT_SNORM; 2865 break; 2866 default: 2867 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 2868 break; 2869 } 2870 } else if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) { 2871 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 2872 } else { 2873 num_format = V_008F14_IMG_NUM_FORMAT_FLOAT; 2874 } 2875 } else if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) { 2876 num_format = V_008F14_IMG_NUM_FORMAT_SRGB; 2877 } else { 2878 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 2879 2880 switch (desc->channel[first_non_void].type) { 2881 case UTIL_FORMAT_TYPE_FLOAT: 2882 num_format = V_008F14_IMG_NUM_FORMAT_FLOAT; 2883 break; 2884 case UTIL_FORMAT_TYPE_SIGNED: 2885 if (desc->channel[first_non_void].normalized) 2886 num_format = V_008F14_IMG_NUM_FORMAT_SNORM; 2887 else if (desc->channel[first_non_void].pure_integer) 2888 num_format = V_008F14_IMG_NUM_FORMAT_SINT; 2889 else 2890 num_format = V_008F14_IMG_NUM_FORMAT_SSCALED; 2891 break; 2892 case UTIL_FORMAT_TYPE_UNSIGNED: 2893 if (desc->channel[first_non_void].normalized) 2894 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 2895 else if (desc->channel[first_non_void].pure_integer) 2896 num_format = V_008F14_IMG_NUM_FORMAT_UINT; 2897 else 2898 num_format = V_008F14_IMG_NUM_FORMAT_USCALED; 2899 } 2900 } 2901 } 2902 2903 data_format = si_translate_texformat(&screen->b.b, pipe_format, desc, first_non_void); 2904 if (data_format == ~0) { 2905 data_format = 0; 2906 } 2907 2908 if (!sampler && 2909 (res->target == PIPE_TEXTURE_CUBE || 2910 res->target == PIPE_TEXTURE_CUBE_ARRAY || 2911 res->target == PIPE_TEXTURE_3D)) { 2912 /* For the purpose of shader images, treat cube maps and 3D 2913 * textures as 2D arrays. For 3D textures, the address 2914 * calculations for mipmaps are different, so we rely on the 2915 * caller to effectively disable mipmaps. 2916 */ 2917 type = V_008F1C_SQ_RSRC_IMG_2D_ARRAY; 2918 2919 assert(res->target != PIPE_TEXTURE_3D || (first_level == 0 && last_level == 0)); 2920 } else { 2921 type = si_tex_dim(res->target, target, res->nr_samples); 2922 } 2923 2924 if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) { 2925 height = 1; 2926 depth = res->array_size; 2927 } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY || 2928 type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) { 2929 if (sampler || res->target != PIPE_TEXTURE_3D) 2930 depth = res->array_size; 2931 } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE) 2932 depth = res->array_size / 6; 2933 2934 state[0] = 0; 2935 state[1] = (S_008F14_DATA_FORMAT(data_format) | 2936 S_008F14_NUM_FORMAT(num_format)); 2937 state[2] = (S_008F18_WIDTH(width - 1) | 2938 S_008F18_HEIGHT(height - 1) | 2939 S_008F18_PERF_MOD(4)); 2940 state[3] = (S_008F1C_DST_SEL_X(si_map_swizzle(swizzle[0])) | 2941 S_008F1C_DST_SEL_Y(si_map_swizzle(swizzle[1])) | 2942 S_008F1C_DST_SEL_Z(si_map_swizzle(swizzle[2])) | 2943 S_008F1C_DST_SEL_W(si_map_swizzle(swizzle[3])) | 2944 S_008F1C_BASE_LEVEL(res->nr_samples > 1 ? 2945 0 : first_level) | 2946 S_008F1C_LAST_LEVEL(res->nr_samples > 1 ? 2947 util_logbase2(res->nr_samples) : 2948 last_level) | 2949 S_008F1C_POW2_PAD(res->last_level > 0) | 2950 S_008F1C_TYPE(type)); 2951 state[4] = S_008F20_DEPTH(depth - 1); 2952 state[5] = (S_008F24_BASE_ARRAY(first_layer) | 2953 S_008F24_LAST_ARRAY(last_layer)); 2954 state[6] = 0; 2955 state[7] = 0; 2956 2957 if (tex->dcc_offset) { 2958 unsigned swap = r600_translate_colorswap(pipe_format, false); 2959 2960 state[6] = S_008F28_ALPHA_IS_ON_MSB(swap <= 1); 2961 } else { 2962 /* The last dword is unused by hw. The shader uses it to clear 2963 * bits in the first dword of sampler state. 2964 */ 2965 if (screen->b.chip_class <= CIK && res->nr_samples <= 1) { 2966 if (first_level == last_level) 2967 state[7] = C_008F30_MAX_ANISO_RATIO; 2968 else 2969 state[7] = 0xffffffff; 2970 } 2971 } 2972 2973 /* Initialize the sampler view for FMASK. */ 2974 if (tex->fmask.size) { 2975 uint32_t fmask_format; 2976 2977 va = tex->resource.gpu_address + tex->fmask.offset; 2978 2979 switch (res->nr_samples) { 2980 case 2: 2981 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2; 2982 break; 2983 case 4: 2984 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4; 2985 break; 2986 case 8: 2987 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8; 2988 break; 2989 default: 2990 assert(0); 2991 fmask_format = V_008F14_IMG_DATA_FORMAT_INVALID; 2992 } 2993 2994 fmask_state[0] = va >> 8; 2995 fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) | 2996 S_008F14_DATA_FORMAT(fmask_format) | 2997 S_008F14_NUM_FORMAT(V_008F14_IMG_NUM_FORMAT_UINT); 2998 fmask_state[2] = S_008F18_WIDTH(width - 1) | 2999 S_008F18_HEIGHT(height - 1); 3000 fmask_state[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) | 3001 S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) | 3002 S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) | 3003 S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) | 3004 S_008F1C_TILING_INDEX(tex->fmask.tile_mode_index) | 3005 S_008F1C_TYPE(si_tex_dim(res->target, target, 0)); 3006 fmask_state[4] = S_008F20_DEPTH(depth - 1) | 3007 S_008F20_PITCH(tex->fmask.pitch_in_pixels - 1); 3008 fmask_state[5] = S_008F24_BASE_ARRAY(first_layer) | 3009 S_008F24_LAST_ARRAY(last_layer); 3010 fmask_state[6] = 0; 3011 fmask_state[7] = 0; 3012 } 3013 } 3014 3015 /** 3016 * Create a sampler view. 3017 * 3018 * @param ctx context 3019 * @param texture texture 3020 * @param state sampler view template 3021 * @param width0 width0 override (for compressed textures as int) 3022 * @param height0 height0 override (for compressed textures as int) 3023 * @param force_level set the base address to the level (for compressed textures) 3024 */ 3025 struct pipe_sampler_view * 3026 si_create_sampler_view_custom(struct pipe_context *ctx, 3027 struct pipe_resource *texture, 3028 const struct pipe_sampler_view *state, 3029 unsigned width0, unsigned height0, 3030 unsigned force_level) 3031 { 3032 struct si_context *sctx = (struct si_context*)ctx; 3033 struct si_sampler_view *view = CALLOC_STRUCT(si_sampler_view); 3034 struct r600_texture *tmp = (struct r600_texture*)texture; 3035 unsigned base_level, first_level, last_level; 3036 unsigned char state_swizzle[4]; 3037 unsigned height, depth, width; 3038 unsigned last_layer = state->u.tex.last_layer; 3039 enum pipe_format pipe_format; 3040 const struct radeon_surf_level *surflevel; 3041 3042 if (!view) 3043 return NULL; 3044 3045 /* initialize base object */ 3046 view->base = *state; 3047 view->base.texture = NULL; 3048 view->base.reference.count = 1; 3049 view->base.context = ctx; 3050 3051 assert(texture); 3052 pipe_resource_reference(&view->base.texture, texture); 3053 3054 if (state->format == PIPE_FORMAT_X24S8_UINT || 3055 state->format == PIPE_FORMAT_S8X24_UINT || 3056 state->format == PIPE_FORMAT_X32_S8X24_UINT || 3057 state->format == PIPE_FORMAT_S8_UINT) 3058 view->is_stencil_sampler = true; 3059 3060 /* Buffer resource. */ 3061 if (texture->target == PIPE_BUFFER) { 3062 si_make_buffer_descriptor(sctx->screen, 3063 (struct r600_resource *)texture, 3064 state->format, 3065 state->u.buf.offset, 3066 state->u.buf.size, 3067 view->state); 3068 return &view->base; 3069 } 3070 3071 state_swizzle[0] = state->swizzle_r; 3072 state_swizzle[1] = state->swizzle_g; 3073 state_swizzle[2] = state->swizzle_b; 3074 state_swizzle[3] = state->swizzle_a; 3075 3076 base_level = 0; 3077 first_level = state->u.tex.first_level; 3078 last_level = state->u.tex.last_level; 3079 width = width0; 3080 height = height0; 3081 depth = texture->depth0; 3082 3083 if (force_level) { 3084 assert(force_level == first_level && 3085 force_level == last_level); 3086 base_level = force_level; 3087 first_level = 0; 3088 last_level = 0; 3089 width = u_minify(width, force_level); 3090 height = u_minify(height, force_level); 3091 depth = u_minify(depth, force_level); 3092 } 3093 3094 /* This is not needed if state trackers set last_layer correctly. */ 3095 if (state->target == PIPE_TEXTURE_1D || 3096 state->target == PIPE_TEXTURE_2D || 3097 state->target == PIPE_TEXTURE_RECT || 3098 state->target == PIPE_TEXTURE_CUBE) 3099 last_layer = state->u.tex.first_layer; 3100 3101 /* Texturing with separate depth and stencil. */ 3102 pipe_format = state->format; 3103 3104 /* Depth/stencil texturing sometimes needs separate texture. */ 3105 if (tmp->is_depth && !r600_can_sample_zs(tmp, view->is_stencil_sampler)) { 3106 if (!tmp->flushed_depth_texture && 3107 !r600_init_flushed_depth_texture(ctx, texture, NULL)) { 3108 pipe_resource_reference(&view->base.texture, NULL); 3109 FREE(view); 3110 return NULL; 3111 } 3112 3113 assert(tmp->flushed_depth_texture); 3114 3115 /* Override format for the case where the flushed texture 3116 * contains only Z or only S. 3117 */ 3118 if (tmp->flushed_depth_texture->resource.b.b.format != tmp->resource.b.b.format) 3119 pipe_format = tmp->flushed_depth_texture->resource.b.b.format; 3120 3121 tmp = tmp->flushed_depth_texture; 3122 } 3123 3124 surflevel = tmp->surface.level; 3125 3126 if (tmp->db_compatible) { 3127 if (!view->is_stencil_sampler) 3128 pipe_format = tmp->db_render_format; 3129 3130 switch (pipe_format) { 3131 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 3132 pipe_format = PIPE_FORMAT_Z32_FLOAT; 3133 break; 3134 case PIPE_FORMAT_X8Z24_UNORM: 3135 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 3136 /* Z24 is always stored like this for DB 3137 * compatibility. 3138 */ 3139 pipe_format = PIPE_FORMAT_Z24X8_UNORM; 3140 break; 3141 case PIPE_FORMAT_X24S8_UINT: 3142 case PIPE_FORMAT_S8X24_UINT: 3143 case PIPE_FORMAT_X32_S8X24_UINT: 3144 pipe_format = PIPE_FORMAT_S8_UINT; 3145 surflevel = tmp->surface.stencil_level; 3146 break; 3147 default:; 3148 } 3149 } 3150 3151 vi_dcc_disable_if_incompatible_format(&sctx->b, texture, 3152 state->u.tex.first_level, 3153 state->format); 3154 3155 si_make_texture_descriptor(sctx->screen, tmp, true, 3156 state->target, pipe_format, state_swizzle, 3157 first_level, last_level, 3158 state->u.tex.first_layer, last_layer, 3159 width, height, depth, 3160 view->state, view->fmask_state); 3161 3162 view->base_level_info = &surflevel[base_level]; 3163 view->base_level = base_level; 3164 view->block_width = util_format_get_blockwidth(pipe_format); 3165 return &view->base; 3166 } 3167 3168 static struct pipe_sampler_view * 3169 si_create_sampler_view(struct pipe_context *ctx, 3170 struct pipe_resource *texture, 3171 const struct pipe_sampler_view *state) 3172 { 3173 return si_create_sampler_view_custom(ctx, texture, state, 3174 texture ? texture->width0 : 0, 3175 texture ? texture->height0 : 0, 0); 3176 } 3177 3178 static void si_sampler_view_destroy(struct pipe_context *ctx, 3179 struct pipe_sampler_view *state) 3180 { 3181 struct si_sampler_view *view = (struct si_sampler_view *)state; 3182 3183 pipe_resource_reference(&state->texture, NULL); 3184 FREE(view); 3185 } 3186 3187 static bool wrap_mode_uses_border_color(unsigned wrap, bool linear_filter) 3188 { 3189 return wrap == PIPE_TEX_WRAP_CLAMP_TO_BORDER || 3190 wrap == PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER || 3191 (linear_filter && 3192 (wrap == PIPE_TEX_WRAP_CLAMP || 3193 wrap == PIPE_TEX_WRAP_MIRROR_CLAMP)); 3194 } 3195 3196 static bool sampler_state_needs_border_color(const struct pipe_sampler_state *state) 3197 { 3198 bool linear_filter = state->min_img_filter != PIPE_TEX_FILTER_NEAREST || 3199 state->mag_img_filter != PIPE_TEX_FILTER_NEAREST; 3200 3201 return (state->border_color.ui[0] || state->border_color.ui[1] || 3202 state->border_color.ui[2] || state->border_color.ui[3]) && 3203 (wrap_mode_uses_border_color(state->wrap_s, linear_filter) || 3204 wrap_mode_uses_border_color(state->wrap_t, linear_filter) || 3205 wrap_mode_uses_border_color(state->wrap_r, linear_filter)); 3206 } 3207 3208 static void *si_create_sampler_state(struct pipe_context *ctx, 3209 const struct pipe_sampler_state *state) 3210 { 3211 struct si_context *sctx = (struct si_context *)ctx; 3212 struct r600_common_screen *rscreen = sctx->b.screen; 3213 struct si_sampler_state *rstate = CALLOC_STRUCT(si_sampler_state); 3214 unsigned border_color_type, border_color_index = 0; 3215 unsigned max_aniso = rscreen->force_aniso >= 0 ? rscreen->force_aniso 3216 : state->max_anisotropy; 3217 unsigned max_aniso_ratio = r600_tex_aniso_filter(max_aniso); 3218 3219 if (!rstate) { 3220 return NULL; 3221 } 3222 3223 if (!sampler_state_needs_border_color(state)) 3224 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK; 3225 else if (state->border_color.f[0] == 0 && 3226 state->border_color.f[1] == 0 && 3227 state->border_color.f[2] == 0 && 3228 state->border_color.f[3] == 0) 3229 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK; 3230 else if (state->border_color.f[0] == 0 && 3231 state->border_color.f[1] == 0 && 3232 state->border_color.f[2] == 0 && 3233 state->border_color.f[3] == 1) 3234 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK; 3235 else if (state->border_color.f[0] == 1 && 3236 state->border_color.f[1] == 1 && 3237 state->border_color.f[2] == 1 && 3238 state->border_color.f[3] == 1) 3239 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE; 3240 else { 3241 int i; 3242 3243 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER; 3244 3245 /* Check if the border has been uploaded already. */ 3246 for (i = 0; i < sctx->border_color_count; i++) 3247 if (memcmp(&sctx->border_color_table[i], &state->border_color, 3248 sizeof(state->border_color)) == 0) 3249 break; 3250 3251 if (i >= SI_MAX_BORDER_COLORS) { 3252 /* Getting 4096 unique border colors is very unlikely. */ 3253 fprintf(stderr, "radeonsi: The border color table is full. " 3254 "Any new border colors will be just black. " 3255 "Please file a bug.\n"); 3256 border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK; 3257 } else { 3258 if (i == sctx->border_color_count) { 3259 /* Upload a new border color. */ 3260 memcpy(&sctx->border_color_table[i], &state->border_color, 3261 sizeof(state->border_color)); 3262 util_memcpy_cpu_to_le32(&sctx->border_color_map[i], 3263 &state->border_color, 3264 sizeof(state->border_color)); 3265 sctx->border_color_count++; 3266 } 3267 3268 border_color_index = i; 3269 } 3270 } 3271 3272 #ifdef DEBUG 3273 rstate->magic = SI_SAMPLER_STATE_MAGIC; 3274 #endif 3275 rstate->val[0] = (S_008F30_CLAMP_X(si_tex_wrap(state->wrap_s)) | 3276 S_008F30_CLAMP_Y(si_tex_wrap(state->wrap_t)) | 3277 S_008F30_CLAMP_Z(si_tex_wrap(state->wrap_r)) | 3278 S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) | 3279 S_008F30_DEPTH_COMPARE_FUNC(si_tex_compare(state->compare_func)) | 3280 S_008F30_FORCE_UNNORMALIZED(!state->normalized_coords) | 3281 S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) | 3282 S_008F30_ANISO_BIAS(max_aniso_ratio) | 3283 S_008F30_DISABLE_CUBE_WRAP(!state->seamless_cube_map) | 3284 S_008F30_COMPAT_MODE(sctx->b.chip_class >= VI)); 3285 rstate->val[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 8)) | 3286 S_008F34_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 8)) | 3287 S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0)); 3288 rstate->val[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) | 3289 S_008F38_XY_MAG_FILTER(eg_tex_filter(state->mag_img_filter, max_aniso)) | 3290 S_008F38_XY_MIN_FILTER(eg_tex_filter(state->min_img_filter, max_aniso)) | 3291 S_008F38_MIP_FILTER(si_tex_mipfilter(state->min_mip_filter)) | 3292 S_008F38_MIP_POINT_PRECLAMP(1) | 3293 S_008F38_DISABLE_LSB_CEIL(1) | 3294 S_008F38_FILTER_PREC_FIX(1) | 3295 S_008F38_ANISO_OVERRIDE(sctx->b.chip_class >= VI)); 3296 rstate->val[3] = S_008F3C_BORDER_COLOR_PTR(border_color_index) | 3297 S_008F3C_BORDER_COLOR_TYPE(border_color_type); 3298 return rstate; 3299 } 3300 3301 static void si_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask) 3302 { 3303 struct si_context *sctx = (struct si_context *)ctx; 3304 3305 if (sctx->sample_mask.sample_mask == (uint16_t)sample_mask) 3306 return; 3307 3308 sctx->sample_mask.sample_mask = sample_mask; 3309 si_mark_atom_dirty(sctx, &sctx->sample_mask.atom); 3310 } 3311 3312 static void si_emit_sample_mask(struct si_context *sctx, struct r600_atom *atom) 3313 { 3314 struct radeon_winsys_cs *cs = sctx->b.gfx.cs; 3315 unsigned mask = sctx->sample_mask.sample_mask; 3316 3317 /* Needed for line and polygon smoothing as well as for the Polaris 3318 * small primitive filter. We expect the state tracker to take care of 3319 * this for us. 3320 */ 3321 assert(mask == 0xffff || sctx->framebuffer.nr_samples > 1 || 3322 (mask & 1 && sctx->blitter->running)); 3323 3324 radeon_set_context_reg_seq(cs, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2); 3325 radeon_emit(cs, mask | (mask << 16)); 3326 radeon_emit(cs, mask | (mask << 16)); 3327 } 3328 3329 static void si_delete_sampler_state(struct pipe_context *ctx, void *state) 3330 { 3331 #ifdef DEBUG 3332 struct si_sampler_state *s = state; 3333 3334 assert(s->magic == SI_SAMPLER_STATE_MAGIC); 3335 s->magic = 0; 3336 #endif 3337 free(state); 3338 } 3339 3340 /* 3341 * Vertex elements & buffers 3342 */ 3343 3344 static void *si_create_vertex_elements(struct pipe_context *ctx, 3345 unsigned count, 3346 const struct pipe_vertex_element *elements) 3347 { 3348 struct si_vertex_element *v = CALLOC_STRUCT(si_vertex_element); 3349 bool used[SI_NUM_VERTEX_BUFFERS] = {}; 3350 int i; 3351 3352 assert(count <= SI_MAX_ATTRIBS); 3353 if (!v) 3354 return NULL; 3355 3356 v->count = count; 3357 for (i = 0; i < count; ++i) { 3358 const struct util_format_description *desc; 3359 const struct util_format_channel_description *channel; 3360 unsigned data_format, num_format; 3361 int first_non_void; 3362 unsigned vbo_index = elements[i].vertex_buffer_index; 3363 3364 if (vbo_index >= SI_NUM_VERTEX_BUFFERS) { 3365 FREE(v); 3366 return NULL; 3367 } 3368 3369 if (!used[vbo_index]) { 3370 v->first_vb_use_mask |= 1 << i; 3371 used[vbo_index] = true; 3372 } 3373 3374 desc = util_format_description(elements[i].src_format); 3375 first_non_void = util_format_get_first_non_void_channel(elements[i].src_format); 3376 data_format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void); 3377 num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void); 3378 channel = first_non_void >= 0 ? &desc->channel[first_non_void] : NULL; 3379 3380 v->rsrc_word3[i] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) | 3381 S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) | 3382 S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) | 3383 S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) | 3384 S_008F0C_NUM_FORMAT(num_format) | 3385 S_008F0C_DATA_FORMAT(data_format); 3386 v->format_size[i] = desc->block.bits / 8; 3387 3388 /* The hardware always treats the 2-bit alpha channel as 3389 * unsigned, so a shader workaround is needed. 3390 */ 3391 if (data_format == V_008F0C_BUF_DATA_FORMAT_2_10_10_10) { 3392 if (num_format == V_008F0C_BUF_NUM_FORMAT_SNORM) { 3393 v->fix_fetch |= (uint64_t)SI_FIX_FETCH_A2_SNORM << (4 * i); 3394 } else if (num_format == V_008F0C_BUF_NUM_FORMAT_SSCALED) { 3395 v->fix_fetch |= (uint64_t)SI_FIX_FETCH_A2_SSCALED << (4 * i); 3396 } else if (num_format == V_008F0C_BUF_NUM_FORMAT_SINT) { 3397 /* This isn't actually used in OpenGL. */ 3398 v->fix_fetch |= (uint64_t)SI_FIX_FETCH_A2_SINT << (4 * i); 3399 } 3400 } else if (channel && channel->type == UTIL_FORMAT_TYPE_FIXED) { 3401 if (desc->swizzle[3] == PIPE_SWIZZLE_1) 3402 v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBX_32_FIXED << (4 * i); 3403 else 3404 v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBA_32_FIXED << (4 * i); 3405 } else if (channel && channel->size == 32 && !channel->pure_integer) { 3406 if (channel->type == UTIL_FORMAT_TYPE_SIGNED) { 3407 if (channel->normalized) { 3408 if (desc->swizzle[3] == PIPE_SWIZZLE_1) 3409 v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBX_32_SNORM << (4 * i); 3410 else 3411 v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBA_32_SNORM << (4 * i); 3412 } else { 3413 v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBA_32_SSCALED << (4 * i); 3414 } 3415 } else if (channel->type == UTIL_FORMAT_TYPE_UNSIGNED) { 3416 if (channel->normalized) { 3417 if (desc->swizzle[3] == PIPE_SWIZZLE_1) 3418 v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBX_32_UNORM << (4 * i); 3419 else 3420 v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBA_32_UNORM << (4 * i); 3421 } else { 3422 v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBA_32_USCALED << (4 * i); 3423 } 3424 } 3425 } 3426 3427 /* We work around the fact that 8_8_8 and 16_16_16 data formats 3428 * do not exist by using the corresponding 4-component formats. 3429 * This requires a fixup of the descriptor for bounds checks. 3430 */ 3431 if (desc->block.bits == 3 * 8 || 3432 desc->block.bits == 3 * 16) { 3433 v->fix_size3 |= (desc->block.bits / 24) << (2 * i); 3434 } 3435 } 3436 memcpy(v->elements, elements, sizeof(struct pipe_vertex_element) * count); 3437 3438 return v; 3439 } 3440 3441 static void si_bind_vertex_elements(struct pipe_context *ctx, void *state) 3442 { 3443 struct si_context *sctx = (struct si_context *)ctx; 3444 struct si_vertex_element *v = (struct si_vertex_element*)state; 3445 3446 sctx->vertex_elements = v; 3447 sctx->vertex_buffers_dirty = true; 3448 sctx->do_update_shaders = true; 3449 } 3450 3451 static void si_delete_vertex_element(struct pipe_context *ctx, void *state) 3452 { 3453 struct si_context *sctx = (struct si_context *)ctx; 3454 3455 if (sctx->vertex_elements == state) 3456 sctx->vertex_elements = NULL; 3457 FREE(state); 3458 } 3459 3460 static void si_set_vertex_buffers(struct pipe_context *ctx, 3461 unsigned start_slot, unsigned count, 3462 const struct pipe_vertex_buffer *buffers) 3463 { 3464 struct si_context *sctx = (struct si_context *)ctx; 3465 struct pipe_vertex_buffer *dst = sctx->vertex_buffer + start_slot; 3466 int i; 3467 3468 assert(start_slot + count <= ARRAY_SIZE(sctx->vertex_buffer)); 3469 3470 if (buffers) { 3471 for (i = 0; i < count; i++) { 3472 const struct pipe_vertex_buffer *src = buffers + i; 3473 struct pipe_vertex_buffer *dsti = dst + i; 3474 struct pipe_resource *buf = src->buffer; 3475 3476 pipe_resource_reference(&dsti->buffer, buf); 3477 dsti->buffer_offset = src->buffer_offset; 3478 dsti->stride = src->stride; 3479 r600_context_add_resource_size(ctx, buf); 3480 if (buf) 3481 r600_resource(buf)->bind_history |= PIPE_BIND_VERTEX_BUFFER; 3482 } 3483 } else { 3484 for (i = 0; i < count; i++) { 3485 pipe_resource_reference(&dst[i].buffer, NULL); 3486 } 3487 } 3488 sctx->vertex_buffers_dirty = true; 3489 } 3490 3491 static void si_set_index_buffer(struct pipe_context *ctx, 3492 const struct pipe_index_buffer *ib) 3493 { 3494 struct si_context *sctx = (struct si_context *)ctx; 3495 3496 if (ib) { 3497 struct pipe_resource *buf = ib->buffer; 3498 3499 pipe_resource_reference(&sctx->index_buffer.buffer, buf); 3500 memcpy(&sctx->index_buffer, ib, sizeof(*ib)); 3501 r600_context_add_resource_size(ctx, buf); 3502 if (buf) 3503 r600_resource(buf)->bind_history |= PIPE_BIND_INDEX_BUFFER; 3504 } else { 3505 pipe_resource_reference(&sctx->index_buffer.buffer, NULL); 3506 } 3507 } 3508 3509 /* 3510 * Misc 3511 */ 3512 3513 static void si_set_tess_state(struct pipe_context *ctx, 3514 const float default_outer_level[4], 3515 const float default_inner_level[2]) 3516 { 3517 struct si_context *sctx = (struct si_context *)ctx; 3518 struct pipe_constant_buffer cb; 3519 float array[8]; 3520 3521 memcpy(array, default_outer_level, sizeof(float) * 4); 3522 memcpy(array+4, default_inner_level, sizeof(float) * 2); 3523 3524 cb.buffer = NULL; 3525 cb.user_buffer = NULL; 3526 cb.buffer_size = sizeof(array); 3527 3528 si_upload_const_buffer(sctx, (struct r600_resource**)&cb.buffer, 3529 (void*)array, sizeof(array), 3530 &cb.buffer_offset); 3531 3532 si_set_rw_buffer(sctx, SI_HS_CONST_DEFAULT_TESS_LEVELS, &cb); 3533 pipe_resource_reference(&cb.buffer, NULL); 3534 } 3535 3536 static void si_texture_barrier(struct pipe_context *ctx, unsigned flags) 3537 { 3538 struct si_context *sctx = (struct si_context *)ctx; 3539 3540 sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 | 3541 SI_CONTEXT_INV_GLOBAL_L2 | 3542 SI_CONTEXT_FLUSH_AND_INV_CB; 3543 } 3544 3545 /* This only ensures coherency for shader image/buffer stores. */ 3546 static void si_memory_barrier(struct pipe_context *ctx, unsigned flags) 3547 { 3548 struct si_context *sctx = (struct si_context *)ctx; 3549 3550 /* Subsequent commands must wait for all shader invocations to 3551 * complete. */ 3552 sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | 3553 SI_CONTEXT_CS_PARTIAL_FLUSH; 3554 3555 if (flags & PIPE_BARRIER_CONSTANT_BUFFER) 3556 sctx->b.flags |= SI_CONTEXT_INV_SMEM_L1 | 3557 SI_CONTEXT_INV_VMEM_L1; 3558 3559 if (flags & (PIPE_BARRIER_VERTEX_BUFFER | 3560 PIPE_BARRIER_SHADER_BUFFER | 3561 PIPE_BARRIER_TEXTURE | 3562 PIPE_BARRIER_IMAGE | 3563 PIPE_BARRIER_STREAMOUT_BUFFER | 3564 PIPE_BARRIER_GLOBAL_BUFFER)) { 3565 /* As far as I can tell, L1 contents are written back to L2 3566 * automatically at end of shader, but the contents of other 3567 * L1 caches might still be stale. */ 3568 sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1; 3569 } 3570 3571 if (flags & PIPE_BARRIER_INDEX_BUFFER) { 3572 /* Indices are read through TC L2 since VI. 3573 * L1 isn't used. 3574 */ 3575 if (sctx->screen->b.chip_class <= CIK) 3576 sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; 3577 } 3578 3579 if (flags & PIPE_BARRIER_FRAMEBUFFER) 3580 sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER; 3581 3582 if (flags & (PIPE_BARRIER_FRAMEBUFFER | 3583 PIPE_BARRIER_INDIRECT_BUFFER)) 3584 sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; 3585 } 3586 3587 static void *si_create_blend_custom(struct si_context *sctx, unsigned mode) 3588 { 3589 struct pipe_blend_state blend; 3590 3591 memset(&blend, 0, sizeof(blend)); 3592 blend.independent_blend_enable = true; 3593 blend.rt[0].colormask = 0xf; 3594 return si_create_blend_state_mode(&sctx->b.b, &blend, mode); 3595 } 3596 3597 static void si_need_gfx_cs_space(struct pipe_context *ctx, unsigned num_dw, 3598 bool include_draw_vbo) 3599 { 3600 si_need_cs_space((struct si_context*)ctx); 3601 } 3602 3603 static void si_init_config(struct si_context *sctx); 3604 3605 void si_init_state_functions(struct si_context *sctx) 3606 { 3607 si_init_external_atom(sctx, &sctx->b.render_cond_atom, &sctx->atoms.s.render_cond); 3608 si_init_external_atom(sctx, &sctx->b.streamout.begin_atom, &sctx->atoms.s.streamout_begin); 3609 si_init_external_atom(sctx, &sctx->b.streamout.enable_atom, &sctx->atoms.s.streamout_enable); 3610 si_init_external_atom(sctx, &sctx->b.scissors.atom, &sctx->atoms.s.scissors); 3611 si_init_external_atom(sctx, &sctx->b.viewports.atom, &sctx->atoms.s.viewports); 3612 3613 si_init_atom(sctx, &sctx->framebuffer.atom, &sctx->atoms.s.framebuffer, si_emit_framebuffer_state); 3614 si_init_atom(sctx, &sctx->msaa_sample_locs.atom, &sctx->atoms.s.msaa_sample_locs, si_emit_msaa_sample_locs); 3615 si_init_atom(sctx, &sctx->db_render_state, &sctx->atoms.s.db_render_state, si_emit_db_render_state); 3616 si_init_atom(sctx, &sctx->msaa_config, &sctx->atoms.s.msaa_config, si_emit_msaa_config); 3617 si_init_atom(sctx, &sctx->sample_mask.atom, &sctx->atoms.s.sample_mask, si_emit_sample_mask); 3618 si_init_atom(sctx, &sctx->cb_render_state, &sctx->atoms.s.cb_render_state, si_emit_cb_render_state); 3619 si_init_atom(sctx, &sctx->blend_color.atom, &sctx->atoms.s.blend_color, si_emit_blend_color); 3620 si_init_atom(sctx, &sctx->clip_regs, &sctx->atoms.s.clip_regs, si_emit_clip_regs); 3621 si_init_atom(sctx, &sctx->clip_state.atom, &sctx->atoms.s.clip_state, si_emit_clip_state); 3622 si_init_atom(sctx, &sctx->stencil_ref.atom, &sctx->atoms.s.stencil_ref, si_emit_stencil_ref); 3623 3624 sctx->b.b.create_blend_state = si_create_blend_state; 3625 sctx->b.b.bind_blend_state = si_bind_blend_state; 3626 sctx->b.b.delete_blend_state = si_delete_blend_state; 3627 sctx->b.b.set_blend_color = si_set_blend_color; 3628 3629 sctx->b.b.create_rasterizer_state = si_create_rs_state; 3630 sctx->b.b.bind_rasterizer_state = si_bind_rs_state; 3631 sctx->b.b.delete_rasterizer_state = si_delete_rs_state; 3632 3633 sctx->b.b.create_depth_stencil_alpha_state = si_create_dsa_state; 3634 sctx->b.b.bind_depth_stencil_alpha_state = si_bind_dsa_state; 3635 sctx->b.b.delete_depth_stencil_alpha_state = si_delete_dsa_state; 3636 3637 sctx->custom_dsa_flush = si_create_db_flush_dsa(sctx); 3638 sctx->custom_blend_resolve = si_create_blend_custom(sctx, V_028808_CB_RESOLVE); 3639 sctx->custom_blend_decompress = si_create_blend_custom(sctx, V_028808_CB_FMASK_DECOMPRESS); 3640 sctx->custom_blend_fastclear = si_create_blend_custom(sctx, V_028808_CB_ELIMINATE_FAST_CLEAR); 3641 sctx->custom_blend_dcc_decompress = si_create_blend_custom(sctx, V_028808_CB_DCC_DECOMPRESS); 3642 3643 sctx->b.b.set_clip_state = si_set_clip_state; 3644 sctx->b.b.set_stencil_ref = si_set_stencil_ref; 3645 3646 sctx->b.b.set_framebuffer_state = si_set_framebuffer_state; 3647 sctx->b.b.get_sample_position = cayman_get_sample_position; 3648 3649 sctx->b.b.create_sampler_state = si_create_sampler_state; 3650 sctx->b.b.delete_sampler_state = si_delete_sampler_state; 3651 3652 sctx->b.b.create_sampler_view = si_create_sampler_view; 3653 sctx->b.b.sampler_view_destroy = si_sampler_view_destroy; 3654 3655 sctx->b.b.set_sample_mask = si_set_sample_mask; 3656 3657 sctx->b.b.create_vertex_elements_state = si_create_vertex_elements; 3658 sctx->b.b.bind_vertex_elements_state = si_bind_vertex_elements; 3659 sctx->b.b.delete_vertex_elements_state = si_delete_vertex_element; 3660 sctx->b.b.set_vertex_buffers = si_set_vertex_buffers; 3661 sctx->b.b.set_index_buffer = si_set_index_buffer; 3662 3663 sctx->b.b.texture_barrier = si_texture_barrier; 3664 sctx->b.b.memory_barrier = si_memory_barrier; 3665 sctx->b.b.set_min_samples = si_set_min_samples; 3666 sctx->b.b.set_tess_state = si_set_tess_state; 3667 3668 sctx->b.b.set_active_query_state = si_set_active_query_state; 3669 sctx->b.set_occlusion_query_state = si_set_occlusion_query_state; 3670 sctx->b.save_qbo_state = si_save_qbo_state; 3671 sctx->b.need_gfx_cs_space = si_need_gfx_cs_space; 3672 3673 sctx->b.b.draw_vbo = si_draw_vbo; 3674 3675 si_init_config(sctx); 3676 } 3677 3678 static uint32_t si_get_bo_metadata_word1(struct r600_common_screen *rscreen) 3679 { 3680 return (ATI_VENDOR_ID << 16) | rscreen->info.pci_id; 3681 } 3682 3683 static void si_query_opaque_metadata(struct r600_common_screen *rscreen, 3684 struct r600_texture *rtex, 3685 struct radeon_bo_metadata *md) 3686 { 3687 struct si_screen *sscreen = (struct si_screen*)rscreen; 3688 struct pipe_resource *res = &rtex->resource.b.b; 3689 static const unsigned char swizzle[] = { 3690 PIPE_SWIZZLE_X, 3691 PIPE_SWIZZLE_Y, 3692 PIPE_SWIZZLE_Z, 3693 PIPE_SWIZZLE_W 3694 }; 3695 uint32_t desc[8], i; 3696 bool is_array = util_resource_is_array_texture(res); 3697 3698 /* DRM 2.x.x doesn't support this. */ 3699 if (rscreen->info.drm_major != 3) 3700 return; 3701 3702 assert(rtex->dcc_separate_buffer == NULL); 3703 assert(rtex->fmask.size == 0); 3704 3705 /* Metadata image format format version 1: 3706 * [0] = 1 (metadata format identifier) 3707 * [1] = (VENDOR_ID << 16) | PCI_ID 3708 * [2:9] = image descriptor for the whole resource 3709 * [2] is always 0, because the base address is cleared 3710 * [9] is the DCC offset bits [39:8] from the beginning of 3711 * the buffer 3712 * [10:10+LAST_LEVEL] = mipmap level offset bits [39:8] for each level 3713 */ 3714 3715 md->metadata[0] = 1; /* metadata image format version 1 */ 3716 3717 /* TILE_MODE_INDEX is ambiguous without a PCI ID. */ 3718 md->metadata[1] = si_get_bo_metadata_word1(rscreen); 3719 3720 si_make_texture_descriptor(sscreen, rtex, true, 3721 res->target, res->format, 3722 swizzle, 0, res->last_level, 0, 3723 is_array ? res->array_size - 1 : 0, 3724 res->width0, res->height0, res->depth0, 3725 desc, NULL); 3726 3727 si_set_mutable_tex_desc_fields(rtex, &rtex->surface.level[0], 0, 0, 3728 rtex->surface.blk_w, false, desc); 3729 3730 /* Clear the base address and set the relative DCC offset. */ 3731 desc[0] = 0; 3732 desc[1] &= C_008F14_BASE_ADDRESS_HI; 3733 desc[7] = rtex->dcc_offset >> 8; 3734 3735 /* Dwords [2:9] contain the image descriptor. */ 3736 memcpy(&md->metadata[2], desc, sizeof(desc)); 3737 3738 /* Dwords [10:..] contain the mipmap level offsets. */ 3739 for (i = 0; i <= res->last_level; i++) 3740 md->metadata[10+i] = rtex->surface.level[i].offset >> 8; 3741 3742 md->size_metadata = (11 + res->last_level) * 4; 3743 } 3744 3745 static void si_apply_opaque_metadata(struct r600_common_screen *rscreen, 3746 struct r600_texture *rtex, 3747 struct radeon_bo_metadata *md) 3748 { 3749 uint32_t *desc = &md->metadata[2]; 3750 3751 if (rscreen->chip_class < VI) 3752 return; 3753 3754 /* Return if DCC is enabled. The texture should be set up with it 3755 * already. 3756 */ 3757 if (md->size_metadata >= 11 * 4 && 3758 md->metadata[0] != 0 && 3759 md->metadata[1] == si_get_bo_metadata_word1(rscreen) && 3760 G_008F28_COMPRESSION_EN(desc[6])) { 3761 assert(rtex->dcc_offset == ((uint64_t)desc[7] << 8)); 3762 return; 3763 } 3764 3765 /* Disable DCC. These are always set by texture_from_handle and must 3766 * be cleared here. 3767 */ 3768 rtex->dcc_offset = 0; 3769 } 3770 3771 void si_init_screen_state_functions(struct si_screen *sscreen) 3772 { 3773 sscreen->b.b.is_format_supported = si_is_format_supported; 3774 sscreen->b.query_opaque_metadata = si_query_opaque_metadata; 3775 sscreen->b.apply_opaque_metadata = si_apply_opaque_metadata; 3776 } 3777 3778 static void 3779 si_write_harvested_raster_configs(struct si_context *sctx, 3780 struct si_pm4_state *pm4, 3781 unsigned raster_config, 3782 unsigned raster_config_1) 3783 { 3784 unsigned sh_per_se = MAX2(sctx->screen->b.info.max_sh_per_se, 1); 3785 unsigned num_se = MAX2(sctx->screen->b.info.max_se, 1); 3786 unsigned rb_mask = sctx->screen->b.info.enabled_rb_mask; 3787 unsigned num_rb = MIN2(sctx->screen->b.info.num_render_backends, 16); 3788 unsigned rb_per_pkr = MIN2(num_rb / num_se / sh_per_se, 2); 3789 unsigned rb_per_se = num_rb / num_se; 3790 unsigned se_mask[4]; 3791 unsigned se; 3792 3793 se_mask[0] = ((1 << rb_per_se) - 1); 3794 se_mask[1] = (se_mask[0] << rb_per_se); 3795 se_mask[2] = (se_mask[1] << rb_per_se); 3796 se_mask[3] = (se_mask[2] << rb_per_se); 3797 3798 se_mask[0] &= rb_mask; 3799 se_mask[1] &= rb_mask; 3800 se_mask[2] &= rb_mask; 3801 se_mask[3] &= rb_mask; 3802 3803 assert(num_se == 1 || num_se == 2 || num_se == 4); 3804 assert(sh_per_se == 1 || sh_per_se == 2); 3805 assert(rb_per_pkr == 1 || rb_per_pkr == 2); 3806 3807 /* XXX: I can't figure out what the *_XSEL and *_YSEL 3808 * fields are for, so I'm leaving them as their default 3809 * values. */ 3810 3811 for (se = 0; se < num_se; se++) { 3812 unsigned raster_config_se = raster_config; 3813 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se); 3814 unsigned pkr1_mask = pkr0_mask << rb_per_pkr; 3815 int idx = (se / 2) * 2; 3816 3817 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) { 3818 raster_config_se &= C_028350_SE_MAP; 3819 3820 if (!se_mask[idx]) { 3821 raster_config_se |= 3822 S_028350_SE_MAP(V_028350_RASTER_CONFIG_SE_MAP_3); 3823 } else { 3824 raster_config_se |= 3825 S_028350_SE_MAP(V_028350_RASTER_CONFIG_SE_MAP_0); 3826 } 3827 } 3828 3829 pkr0_mask &= rb_mask; 3830 pkr1_mask &= rb_mask; 3831 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) { 3832 raster_config_se &= C_028350_PKR_MAP; 3833 3834 if (!pkr0_mask) { 3835 raster_config_se |= 3836 S_028350_PKR_MAP(V_028350_RASTER_CONFIG_PKR_MAP_3); 3837 } else { 3838 raster_config_se |= 3839 S_028350_PKR_MAP(V_028350_RASTER_CONFIG_PKR_MAP_0); 3840 } 3841 } 3842 3843 if (rb_per_se >= 2) { 3844 unsigned rb0_mask = 1 << (se * rb_per_se); 3845 unsigned rb1_mask = rb0_mask << 1; 3846 3847 rb0_mask &= rb_mask; 3848 rb1_mask &= rb_mask; 3849 if (!rb0_mask || !rb1_mask) { 3850 raster_config_se &= C_028350_RB_MAP_PKR0; 3851 3852 if (!rb0_mask) { 3853 raster_config_se |= 3854 S_028350_RB_MAP_PKR0(V_028350_RASTER_CONFIG_RB_MAP_3); 3855 } else { 3856 raster_config_se |= 3857 S_028350_RB_MAP_PKR0(V_028350_RASTER_CONFIG_RB_MAP_0); 3858 } 3859 } 3860 3861 if (rb_per_se > 2) { 3862 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr); 3863 rb1_mask = rb0_mask << 1; 3864 rb0_mask &= rb_mask; 3865 rb1_mask &= rb_mask; 3866 if (!rb0_mask || !rb1_mask) { 3867 raster_config_se &= C_028350_RB_MAP_PKR1; 3868 3869 if (!rb0_mask) { 3870 raster_config_se |= 3871 S_028350_RB_MAP_PKR1(V_028350_RASTER_CONFIG_RB_MAP_3); 3872 } else { 3873 raster_config_se |= 3874 S_028350_RB_MAP_PKR1(V_028350_RASTER_CONFIG_RB_MAP_0); 3875 } 3876 } 3877 } 3878 } 3879 3880 /* GRBM_GFX_INDEX has a different offset on SI and CI+ */ 3881 if (sctx->b.chip_class < CIK) 3882 si_pm4_set_reg(pm4, GRBM_GFX_INDEX, 3883 SE_INDEX(se) | SH_BROADCAST_WRITES | 3884 INSTANCE_BROADCAST_WRITES); 3885 else 3886 si_pm4_set_reg(pm4, R_030800_GRBM_GFX_INDEX, 3887 S_030800_SE_INDEX(se) | S_030800_SH_BROADCAST_WRITES(1) | 3888 S_030800_INSTANCE_BROADCAST_WRITES(1)); 3889 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, raster_config_se); 3890 } 3891 3892 /* GRBM_GFX_INDEX has a different offset on SI and CI+ */ 3893 if (sctx->b.chip_class < CIK) 3894 si_pm4_set_reg(pm4, GRBM_GFX_INDEX, 3895 SE_BROADCAST_WRITES | SH_BROADCAST_WRITES | 3896 INSTANCE_BROADCAST_WRITES); 3897 else { 3898 si_pm4_set_reg(pm4, R_030800_GRBM_GFX_INDEX, 3899 S_030800_SE_BROADCAST_WRITES(1) | S_030800_SH_BROADCAST_WRITES(1) | 3900 S_030800_INSTANCE_BROADCAST_WRITES(1)); 3901 3902 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) || 3903 (!se_mask[2] && !se_mask[3]))) { 3904 raster_config_1 &= C_028354_SE_PAIR_MAP; 3905 3906 if (!se_mask[0] && !se_mask[1]) { 3907 raster_config_1 |= 3908 S_028354_SE_PAIR_MAP(V_028354_RASTER_CONFIG_SE_PAIR_MAP_3); 3909 } else { 3910 raster_config_1 |= 3911 S_028354_SE_PAIR_MAP(V_028354_RASTER_CONFIG_SE_PAIR_MAP_0); 3912 } 3913 } 3914 3915 si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1); 3916 } 3917 } 3918 3919 static void si_init_config(struct si_context *sctx) 3920 { 3921 struct si_screen *sscreen = sctx->screen; 3922 unsigned num_rb = MIN2(sctx->screen->b.info.num_render_backends, 16); 3923 unsigned rb_mask = sctx->screen->b.info.enabled_rb_mask; 3924 unsigned raster_config, raster_config_1; 3925 uint64_t border_color_va = sctx->border_color_buffer->gpu_address; 3926 struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state); 3927 3928 if (!pm4) 3929 return; 3930 3931 si_pm4_cmd_begin(pm4, PKT3_CONTEXT_CONTROL); 3932 si_pm4_cmd_add(pm4, CONTEXT_CONTROL_LOAD_ENABLE(1)); 3933 si_pm4_cmd_add(pm4, CONTEXT_CONTROL_SHADOW_ENABLE(1)); 3934 si_pm4_cmd_end(pm4, false); 3935 3936 si_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64)); 3937 si_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0)); 3938 3939 /* FIXME calculate these values somehow ??? */ 3940 si_pm4_set_reg(pm4, R_028A54_VGT_GS_PER_ES, SI_GS_PER_ES); 3941 si_pm4_set_reg(pm4, R_028A58_VGT_ES_PER_GS, 0x40); 3942 si_pm4_set_reg(pm4, R_028A5C_VGT_GS_PER_VS, 0x2); 3943 3944 si_pm4_set_reg(pm4, R_028A8C_VGT_PRIMITIVEID_RESET, 0x0); 3945 si_pm4_set_reg(pm4, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0); 3946 3947 si_pm4_set_reg(pm4, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0); 3948 si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0x0); 3949 if (sctx->b.chip_class < CIK) 3950 si_pm4_set_reg(pm4, R_008A14_PA_CL_ENHANCE, S_008A14_NUM_CLIP_SEQ(3) | 3951 S_008A14_CLIP_VTX_REORDER_ENA(1)); 3952 3953 si_pm4_set_reg(pm4, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 0x76543210); 3954 si_pm4_set_reg(pm4, R_028BD8_PA_SC_CENTROID_PRIORITY_1, 0xfedcba98); 3955 3956 si_pm4_set_reg(pm4, R_02882C_PA_SU_PRIM_FILTER_CNTL, 0); 3957 3958 switch (sctx->screen->b.family) { 3959 case CHIP_TAHITI: 3960 case CHIP_PITCAIRN: 3961 raster_config = 0x2a00126a; 3962 raster_config_1 = 0x00000000; 3963 break; 3964 case CHIP_VERDE: 3965 raster_config = 0x0000124a; 3966 raster_config_1 = 0x00000000; 3967 break; 3968 case CHIP_OLAND: 3969 raster_config = 0x00000082; 3970 raster_config_1 = 0x00000000; 3971 break; 3972 case CHIP_HAINAN: 3973 raster_config = 0x00000000; 3974 raster_config_1 = 0x00000000; 3975 break; 3976 case CHIP_BONAIRE: 3977 raster_config = 0x16000012; 3978 raster_config_1 = 0x00000000; 3979 break; 3980 case CHIP_HAWAII: 3981 raster_config = 0x3a00161a; 3982 raster_config_1 = 0x0000002e; 3983 break; 3984 case CHIP_FIJI: 3985 if (sscreen->b.info.cik_macrotile_mode_array[0] == 0x000000e8) { 3986 /* old kernels with old tiling config */ 3987 raster_config = 0x16000012; 3988 raster_config_1 = 0x0000002a; 3989 } else { 3990 raster_config = 0x3a00161a; 3991 raster_config_1 = 0x0000002e; 3992 } 3993 break; 3994 case CHIP_POLARIS10: 3995 raster_config = 0x16000012; 3996 raster_config_1 = 0x0000002a; 3997 break; 3998 case CHIP_POLARIS11: 3999 case CHIP_POLARIS12: 4000 raster_config = 0x16000012; 4001 raster_config_1 = 0x00000000; 4002 break; 4003 case CHIP_TONGA: 4004 raster_config = 0x16000012; 4005 raster_config_1 = 0x0000002a; 4006 break; 4007 case CHIP_ICELAND: 4008 if (num_rb == 1) 4009 raster_config = 0x00000000; 4010 else 4011 raster_config = 0x00000002; 4012 raster_config_1 = 0x00000000; 4013 break; 4014 case CHIP_CARRIZO: 4015 raster_config = 0x00000002; 4016 raster_config_1 = 0x00000000; 4017 break; 4018 case CHIP_KAVERI: 4019 /* KV should be 0x00000002, but that causes problems with radeon */ 4020 raster_config = 0x00000000; /* 0x00000002 */ 4021 raster_config_1 = 0x00000000; 4022 break; 4023 case CHIP_KABINI: 4024 case CHIP_MULLINS: 4025 case CHIP_STONEY: 4026 raster_config = 0x00000000; 4027 raster_config_1 = 0x00000000; 4028 break; 4029 default: 4030 fprintf(stderr, 4031 "radeonsi: Unknown GPU, using 0 for raster_config\n"); 4032 raster_config = 0x00000000; 4033 raster_config_1 = 0x00000000; 4034 break; 4035 } 4036 4037 /* Always use the default config when all backends are enabled 4038 * (or when we failed to determine the enabled backends). 4039 */ 4040 if (!rb_mask || util_bitcount(rb_mask) >= num_rb) { 4041 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 4042 raster_config); 4043 if (sctx->b.chip_class >= CIK) 4044 si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 4045 raster_config_1); 4046 } else { 4047 si_write_harvested_raster_configs(sctx, pm4, raster_config, raster_config_1); 4048 } 4049 4050 si_pm4_set_reg(pm4, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1)); 4051 si_pm4_set_reg(pm4, R_028240_PA_SC_GENERIC_SCISSOR_TL, S_028240_WINDOW_OFFSET_DISABLE(1)); 4052 si_pm4_set_reg(pm4, R_028244_PA_SC_GENERIC_SCISSOR_BR, 4053 S_028244_BR_X(16384) | S_028244_BR_Y(16384)); 4054 si_pm4_set_reg(pm4, R_028030_PA_SC_SCREEN_SCISSOR_TL, 0); 4055 si_pm4_set_reg(pm4, R_028034_PA_SC_SCREEN_SCISSOR_BR, 4056 S_028034_BR_X(16384) | S_028034_BR_Y(16384)); 4057 4058 si_pm4_set_reg(pm4, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF); 4059 si_pm4_set_reg(pm4, R_028230_PA_SC_EDGERULE, 4060 S_028230_ER_TRI(0xA) | 4061 S_028230_ER_POINT(0xA) | 4062 S_028230_ER_RECT(0xA) | 4063 /* Required by DX10_DIAMOND_TEST_ENA: */ 4064 S_028230_ER_LINE_LR(0x1A) | 4065 S_028230_ER_LINE_RL(0x26) | 4066 S_028230_ER_LINE_TB(0xA) | 4067 S_028230_ER_LINE_BT(0xA)); 4068 /* PA_SU_HARDWARE_SCREEN_OFFSET must be 0 due to hw bug on SI */ 4069 si_pm4_set_reg(pm4, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0); 4070 si_pm4_set_reg(pm4, R_028820_PA_CL_NANINF_CNTL, 0); 4071 si_pm4_set_reg(pm4, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0); 4072 si_pm4_set_reg(pm4, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0); 4073 si_pm4_set_reg(pm4, R_028AC8_DB_PRELOAD_CONTROL, 0x0); 4074 si_pm4_set_reg(pm4, R_02800C_DB_RENDER_OVERRIDE, 0); 4075 4076 si_pm4_set_reg(pm4, R_028400_VGT_MAX_VTX_INDX, ~0); 4077 si_pm4_set_reg(pm4, R_028404_VGT_MIN_VTX_INDX, 0); 4078 si_pm4_set_reg(pm4, R_028408_VGT_INDX_OFFSET, 0); 4079 4080 if (sctx->b.chip_class >= CIK) { 4081 /* If this is 0, Bonaire can hang even if GS isn't being used. 4082 * Other chips are unaffected. These are suboptimal values, 4083 * but we don't use on-chip GS. 4084 */ 4085 si_pm4_set_reg(pm4, R_028A44_VGT_GS_ONCHIP_CNTL, 4086 S_028A44_ES_VERTS_PER_SUBGRP(64) | 4087 S_028A44_GS_PRIMS_PER_SUBGRP(4)); 4088 4089 si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xffff)); 4090 si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 0); 4091 si_pm4_set_reg(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, S_00B31C_CU_EN(0xffff)); 4092 si_pm4_set_reg(pm4, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, S_00B21C_CU_EN(0xffff)); 4093 4094 if (sscreen->b.info.num_good_compute_units / 4095 (sscreen->b.info.max_se * sscreen->b.info.max_sh_per_se) <= 4) { 4096 /* Too few available compute units per SH. Disallowing 4097 * VS to run on CU0 could hurt us more than late VS 4098 * allocation would help. 4099 * 4100 * LATE_ALLOC_VS = 2 is the highest safe number. 4101 */ 4102 si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xffff)); 4103 si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(2)); 4104 } else { 4105 /* Set LATE_ALLOC_VS == 31. It should be less than 4106 * the number of scratch waves. Limitations: 4107 * - VS can't execute on CU0. 4108 * - If HS writes outputs to LDS, LS can't execute on CU0. 4109 */ 4110 si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xfffe)); 4111 si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(31)); 4112 } 4113 4114 si_pm4_set_reg(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, S_00B01C_CU_EN(0xffff)); 4115 } 4116 4117 if (sctx->b.chip_class >= VI) { 4118 unsigned vgt_tess_distribution; 4119 4120 si_pm4_set_reg(pm4, R_028424_CB_DCC_CONTROL, 4121 S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(1) | 4122 S_028424_OVERWRITE_COMBINER_WATERMARK(4)); 4123 if (sctx->b.family < CHIP_POLARIS10) 4124 si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 30); 4125 si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 32); 4126 4127 vgt_tess_distribution = 4128 S_028B50_ACCUM_ISOLINE(32) | 4129 S_028B50_ACCUM_TRI(11) | 4130 S_028B50_ACCUM_QUAD(11) | 4131 S_028B50_DONUT_SPLIT(16); 4132 4133 /* Testing with Unigine Heaven extreme tesselation yielded best results 4134 * with TRAP_SPLIT = 3. 4135 */ 4136 if (sctx->b.family == CHIP_FIJI || 4137 sctx->b.family >= CHIP_POLARIS10) 4138 vgt_tess_distribution |= S_028B50_TRAP_SPLIT(3); 4139 4140 si_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION, vgt_tess_distribution); 4141 } else { 4142 si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14); 4143 si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 16); 4144 } 4145 4146 if (sctx->b.family == CHIP_STONEY) 4147 si_pm4_set_reg(pm4, R_028C40_PA_SC_SHADER_CONTROL, 0); 4148 4149 si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8); 4150 if (sctx->b.chip_class >= CIK) 4151 si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, border_color_va >> 40); 4152 si_pm4_add_bo(pm4, sctx->border_color_buffer, RADEON_USAGE_READ, 4153 RADEON_PRIO_BORDER_COLORS); 4154 4155 si_pm4_upload_indirect_buffer(sctx, pm4); 4156 sctx->init_config = pm4; 4157 } 4158