1 /* 2 * Copyright 2015 Broadcom 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24 /** 25 * Implements most of the fixed function fragment pipeline in shader code. 26 * 27 * VC4 doesn't have any hardware support for blending, alpha test, logic ops, 28 * or color mask. Instead, you read the current contents of the destination 29 * from the tile buffer after having waited for the scoreboard (which is 30 * handled by vc4_qpu_emit.c), then do math using your output color and that 31 * destination value, and update the output color appropriately. 32 * 33 * Once this pass is done, the color write will either have one component (for 34 * single sample) with packed argb8888, or 4 components with the per-sample 35 * argb8888 result. 36 */ 37 38 /** 39 * Lowers fixed-function blending to a load of the destination color and a 40 * series of ALU operations before the store of the output. 41 */ 42 #include "util/u_format.h" 43 #include "vc4_qir.h" 44 #include "compiler/nir/nir_builder.h" 45 #include "vc4_context.h" 46 47 static bool 48 blend_depends_on_dst_color(struct vc4_compile *c) 49 { 50 return (c->fs_key->blend.blend_enable || 51 c->fs_key->blend.colormask != 0xf || 52 c->fs_key->logicop_func != PIPE_LOGICOP_COPY); 53 } 54 55 /** Emits a load of the previous fragment color from the tile buffer. */ 56 static nir_ssa_def * 57 vc4_nir_get_dst_color(nir_builder *b, int sample) 58 { 59 nir_intrinsic_instr *load = 60 nir_intrinsic_instr_create(b->shader, 61 nir_intrinsic_load_input); 62 load->num_components = 1; 63 nir_intrinsic_set_base(load, VC4_NIR_TLB_COLOR_READ_INPUT + sample); 64 load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0)); 65 nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, NULL); 66 nir_builder_instr_insert(b, &load->instr); 67 return &load->dest.ssa; 68 } 69 70 static nir_ssa_def * 71 vc4_nir_srgb_decode(nir_builder *b, nir_ssa_def *srgb) 72 { 73 nir_ssa_def *is_low = nir_flt(b, srgb, nir_imm_float(b, 0.04045)); 74 nir_ssa_def *low = nir_fmul(b, srgb, nir_imm_float(b, 1.0 / 12.92)); 75 nir_ssa_def *high = nir_fpow(b, 76 nir_fmul(b, 77 nir_fadd(b, srgb, 78 nir_imm_float(b, 0.055)), 79 nir_imm_float(b, 1.0 / 1.055)), 80 nir_imm_float(b, 2.4)); 81 82 return nir_bcsel(b, is_low, low, high); 83 } 84 85 static nir_ssa_def * 86 vc4_nir_srgb_encode(nir_builder *b, nir_ssa_def *linear) 87 { 88 nir_ssa_def *is_low = nir_flt(b, linear, nir_imm_float(b, 0.0031308)); 89 nir_ssa_def *low = nir_fmul(b, linear, nir_imm_float(b, 12.92)); 90 nir_ssa_def *high = nir_fsub(b, 91 nir_fmul(b, 92 nir_imm_float(b, 1.055), 93 nir_fpow(b, 94 linear, 95 nir_imm_float(b, 0.41666))), 96 nir_imm_float(b, 0.055)); 97 98 return nir_bcsel(b, is_low, low, high); 99 } 100 101 static nir_ssa_def * 102 vc4_blend_channel_f(nir_builder *b, 103 nir_ssa_def **src, 104 nir_ssa_def **dst, 105 unsigned factor, 106 int channel) 107 { 108 switch(factor) { 109 case PIPE_BLENDFACTOR_ONE: 110 return nir_imm_float(b, 1.0); 111 case PIPE_BLENDFACTOR_SRC_COLOR: 112 return src[channel]; 113 case PIPE_BLENDFACTOR_SRC_ALPHA: 114 return src[3]; 115 case PIPE_BLENDFACTOR_DST_ALPHA: 116 return dst[3]; 117 case PIPE_BLENDFACTOR_DST_COLOR: 118 return dst[channel]; 119 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: 120 if (channel != 3) { 121 return nir_fmin(b, 122 src[3], 123 nir_fsub(b, 124 nir_imm_float(b, 1.0), 125 dst[3])); 126 } else { 127 return nir_imm_float(b, 1.0); 128 } 129 case PIPE_BLENDFACTOR_CONST_COLOR: 130 return nir_load_system_value(b, 131 nir_intrinsic_load_blend_const_color_r_float + 132 channel, 133 0); 134 case PIPE_BLENDFACTOR_CONST_ALPHA: 135 return nir_load_blend_const_color_a_float(b); 136 case PIPE_BLENDFACTOR_ZERO: 137 return nir_imm_float(b, 0.0); 138 case PIPE_BLENDFACTOR_INV_SRC_COLOR: 139 return nir_fsub(b, nir_imm_float(b, 1.0), src[channel]); 140 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: 141 return nir_fsub(b, nir_imm_float(b, 1.0), src[3]); 142 case PIPE_BLENDFACTOR_INV_DST_ALPHA: 143 return nir_fsub(b, nir_imm_float(b, 1.0), dst[3]); 144 case PIPE_BLENDFACTOR_INV_DST_COLOR: 145 return nir_fsub(b, nir_imm_float(b, 1.0), dst[channel]); 146 case PIPE_BLENDFACTOR_INV_CONST_COLOR: 147 return nir_fsub(b, nir_imm_float(b, 1.0), 148 nir_load_system_value(b, 149 nir_intrinsic_load_blend_const_color_r_float + 150 channel, 151 0)); 152 case PIPE_BLENDFACTOR_INV_CONST_ALPHA: 153 return nir_fsub(b, nir_imm_float(b, 1.0), 154 nir_load_blend_const_color_a_float(b)); 155 156 default: 157 case PIPE_BLENDFACTOR_SRC1_COLOR: 158 case PIPE_BLENDFACTOR_SRC1_ALPHA: 159 case PIPE_BLENDFACTOR_INV_SRC1_COLOR: 160 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: 161 /* Unsupported. */ 162 fprintf(stderr, "Unknown blend factor %d\n", factor); 163 return nir_imm_float(b, 1.0); 164 } 165 } 166 167 static nir_ssa_def * 168 vc4_nir_set_packed_chan(nir_builder *b, nir_ssa_def *src0, nir_ssa_def *src1, 169 int chan) 170 { 171 unsigned chan_mask = 0xff << (chan * 8); 172 return nir_ior(b, 173 nir_iand(b, src0, nir_imm_int(b, ~chan_mask)), 174 nir_iand(b, src1, nir_imm_int(b, chan_mask))); 175 } 176 177 static nir_ssa_def * 178 vc4_blend_channel_i(nir_builder *b, 179 nir_ssa_def *src, 180 nir_ssa_def *dst, 181 nir_ssa_def *src_a, 182 nir_ssa_def *dst_a, 183 unsigned factor, 184 int a_chan) 185 { 186 switch (factor) { 187 case PIPE_BLENDFACTOR_ONE: 188 return nir_imm_int(b, ~0); 189 case PIPE_BLENDFACTOR_SRC_COLOR: 190 return src; 191 case PIPE_BLENDFACTOR_SRC_ALPHA: 192 return src_a; 193 case PIPE_BLENDFACTOR_DST_ALPHA: 194 return dst_a; 195 case PIPE_BLENDFACTOR_DST_COLOR: 196 return dst; 197 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: 198 return vc4_nir_set_packed_chan(b, 199 nir_umin_4x8(b, 200 src_a, 201 nir_inot(b, dst_a)), 202 nir_imm_int(b, ~0), 203 a_chan); 204 case PIPE_BLENDFACTOR_CONST_COLOR: 205 return nir_load_blend_const_color_rgba8888_unorm(b); 206 case PIPE_BLENDFACTOR_CONST_ALPHA: 207 return nir_load_blend_const_color_aaaa8888_unorm(b); 208 case PIPE_BLENDFACTOR_ZERO: 209 return nir_imm_int(b, 0); 210 case PIPE_BLENDFACTOR_INV_SRC_COLOR: 211 return nir_inot(b, src); 212 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: 213 return nir_inot(b, src_a); 214 case PIPE_BLENDFACTOR_INV_DST_ALPHA: 215 return nir_inot(b, dst_a); 216 case PIPE_BLENDFACTOR_INV_DST_COLOR: 217 return nir_inot(b, dst); 218 case PIPE_BLENDFACTOR_INV_CONST_COLOR: 219 return nir_inot(b, 220 nir_load_blend_const_color_rgba8888_unorm(b)); 221 case PIPE_BLENDFACTOR_INV_CONST_ALPHA: 222 return nir_inot(b, 223 nir_load_blend_const_color_aaaa8888_unorm(b)); 224 225 default: 226 case PIPE_BLENDFACTOR_SRC1_COLOR: 227 case PIPE_BLENDFACTOR_SRC1_ALPHA: 228 case PIPE_BLENDFACTOR_INV_SRC1_COLOR: 229 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: 230 /* Unsupported. */ 231 fprintf(stderr, "Unknown blend factor %d\n", factor); 232 return nir_imm_int(b, ~0); 233 } 234 } 235 236 static nir_ssa_def * 237 vc4_blend_func_f(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst, 238 unsigned func) 239 { 240 switch (func) { 241 case PIPE_BLEND_ADD: 242 return nir_fadd(b, src, dst); 243 case PIPE_BLEND_SUBTRACT: 244 return nir_fsub(b, src, dst); 245 case PIPE_BLEND_REVERSE_SUBTRACT: 246 return nir_fsub(b, dst, src); 247 case PIPE_BLEND_MIN: 248 return nir_fmin(b, src, dst); 249 case PIPE_BLEND_MAX: 250 return nir_fmax(b, src, dst); 251 252 default: 253 /* Unsupported. */ 254 fprintf(stderr, "Unknown blend func %d\n", func); 255 return src; 256 257 } 258 } 259 260 static nir_ssa_def * 261 vc4_blend_func_i(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst, 262 unsigned func) 263 { 264 switch (func) { 265 case PIPE_BLEND_ADD: 266 return nir_usadd_4x8(b, src, dst); 267 case PIPE_BLEND_SUBTRACT: 268 return nir_ussub_4x8(b, src, dst); 269 case PIPE_BLEND_REVERSE_SUBTRACT: 270 return nir_ussub_4x8(b, dst, src); 271 case PIPE_BLEND_MIN: 272 return nir_umin_4x8(b, src, dst); 273 case PIPE_BLEND_MAX: 274 return nir_umax_4x8(b, src, dst); 275 276 default: 277 /* Unsupported. */ 278 fprintf(stderr, "Unknown blend func %d\n", func); 279 return src; 280 281 } 282 } 283 284 static void 285 vc4_do_blending_f(struct vc4_compile *c, nir_builder *b, nir_ssa_def **result, 286 nir_ssa_def **src_color, nir_ssa_def **dst_color) 287 { 288 struct pipe_rt_blend_state *blend = &c->fs_key->blend; 289 290 if (!blend->blend_enable) { 291 for (int i = 0; i < 4; i++) 292 result[i] = src_color[i]; 293 return; 294 } 295 296 /* Clamp the src color to [0, 1]. Dest is already clamped. */ 297 for (int i = 0; i < 4; i++) 298 src_color[i] = nir_fsat(b, src_color[i]); 299 300 nir_ssa_def *src_blend[4], *dst_blend[4]; 301 for (int i = 0; i < 4; i++) { 302 int src_factor = ((i != 3) ? blend->rgb_src_factor : 303 blend->alpha_src_factor); 304 int dst_factor = ((i != 3) ? blend->rgb_dst_factor : 305 blend->alpha_dst_factor); 306 src_blend[i] = nir_fmul(b, src_color[i], 307 vc4_blend_channel_f(b, 308 src_color, dst_color, 309 src_factor, i)); 310 dst_blend[i] = nir_fmul(b, dst_color[i], 311 vc4_blend_channel_f(b, 312 src_color, dst_color, 313 dst_factor, i)); 314 } 315 316 for (int i = 0; i < 4; i++) { 317 result[i] = vc4_blend_func_f(b, src_blend[i], dst_blend[i], 318 ((i != 3) ? blend->rgb_func : 319 blend->alpha_func)); 320 } 321 } 322 323 static nir_ssa_def * 324 vc4_nir_splat(nir_builder *b, nir_ssa_def *src) 325 { 326 nir_ssa_def *or1 = nir_ior(b, src, nir_ishl(b, src, nir_imm_int(b, 8))); 327 return nir_ior(b, or1, nir_ishl(b, or1, nir_imm_int(b, 16))); 328 } 329 330 static nir_ssa_def * 331 vc4_do_blending_i(struct vc4_compile *c, nir_builder *b, 332 nir_ssa_def *src_color, nir_ssa_def *dst_color, 333 nir_ssa_def *src_float_a) 334 { 335 struct pipe_rt_blend_state *blend = &c->fs_key->blend; 336 337 if (!blend->blend_enable) 338 return src_color; 339 340 enum pipe_format color_format = c->fs_key->color_format; 341 const uint8_t *format_swiz = vc4_get_format_swizzle(color_format); 342 nir_ssa_def *imm_0xff = nir_imm_int(b, 0xff); 343 nir_ssa_def *src_a = nir_pack_unorm_4x8(b, src_float_a); 344 nir_ssa_def *dst_a; 345 int alpha_chan; 346 for (alpha_chan = 0; alpha_chan < 4; alpha_chan++) { 347 if (format_swiz[alpha_chan] == 3) 348 break; 349 } 350 if (alpha_chan != 4) { 351 nir_ssa_def *shift = nir_imm_int(b, alpha_chan * 8); 352 dst_a = vc4_nir_splat(b, nir_iand(b, nir_ushr(b, dst_color, 353 shift), imm_0xff)); 354 } else { 355 dst_a = nir_imm_int(b, ~0); 356 } 357 358 nir_ssa_def *src_factor = vc4_blend_channel_i(b, 359 src_color, dst_color, 360 src_a, dst_a, 361 blend->rgb_src_factor, 362 alpha_chan); 363 nir_ssa_def *dst_factor = vc4_blend_channel_i(b, 364 src_color, dst_color, 365 src_a, dst_a, 366 blend->rgb_dst_factor, 367 alpha_chan); 368 369 if (alpha_chan != 4 && 370 blend->alpha_src_factor != blend->rgb_src_factor) { 371 nir_ssa_def *src_alpha_factor = 372 vc4_blend_channel_i(b, 373 src_color, dst_color, 374 src_a, dst_a, 375 blend->alpha_src_factor, 376 alpha_chan); 377 src_factor = vc4_nir_set_packed_chan(b, src_factor, 378 src_alpha_factor, 379 alpha_chan); 380 } 381 if (alpha_chan != 4 && 382 blend->alpha_dst_factor != blend->rgb_dst_factor) { 383 nir_ssa_def *dst_alpha_factor = 384 vc4_blend_channel_i(b, 385 src_color, dst_color, 386 src_a, dst_a, 387 blend->alpha_dst_factor, 388 alpha_chan); 389 dst_factor = vc4_nir_set_packed_chan(b, dst_factor, 390 dst_alpha_factor, 391 alpha_chan); 392 } 393 nir_ssa_def *src_blend = nir_umul_unorm_4x8(b, src_color, src_factor); 394 nir_ssa_def *dst_blend = nir_umul_unorm_4x8(b, dst_color, dst_factor); 395 396 nir_ssa_def *result = 397 vc4_blend_func_i(b, src_blend, dst_blend, blend->rgb_func); 398 if (alpha_chan != 4 && blend->alpha_func != blend->rgb_func) { 399 nir_ssa_def *result_a = vc4_blend_func_i(b, 400 src_blend, 401 dst_blend, 402 blend->alpha_func); 403 result = vc4_nir_set_packed_chan(b, result, result_a, 404 alpha_chan); 405 } 406 return result; 407 } 408 409 static nir_ssa_def * 410 vc4_logicop(nir_builder *b, int logicop_func, 411 nir_ssa_def *src, nir_ssa_def *dst) 412 { 413 switch (logicop_func) { 414 case PIPE_LOGICOP_CLEAR: 415 return nir_imm_int(b, 0); 416 case PIPE_LOGICOP_NOR: 417 return nir_inot(b, nir_ior(b, src, dst)); 418 case PIPE_LOGICOP_AND_INVERTED: 419 return nir_iand(b, nir_inot(b, src), dst); 420 case PIPE_LOGICOP_COPY_INVERTED: 421 return nir_inot(b, src); 422 case PIPE_LOGICOP_AND_REVERSE: 423 return nir_iand(b, src, nir_inot(b, dst)); 424 case PIPE_LOGICOP_INVERT: 425 return nir_inot(b, dst); 426 case PIPE_LOGICOP_XOR: 427 return nir_ixor(b, src, dst); 428 case PIPE_LOGICOP_NAND: 429 return nir_inot(b, nir_iand(b, src, dst)); 430 case PIPE_LOGICOP_AND: 431 return nir_iand(b, src, dst); 432 case PIPE_LOGICOP_EQUIV: 433 return nir_inot(b, nir_ixor(b, src, dst)); 434 case PIPE_LOGICOP_NOOP: 435 return dst; 436 case PIPE_LOGICOP_OR_INVERTED: 437 return nir_ior(b, nir_inot(b, src), dst); 438 case PIPE_LOGICOP_OR_REVERSE: 439 return nir_ior(b, src, nir_inot(b, dst)); 440 case PIPE_LOGICOP_OR: 441 return nir_ior(b, src, dst); 442 case PIPE_LOGICOP_SET: 443 return nir_imm_int(b, ~0); 444 default: 445 fprintf(stderr, "Unknown logic op %d\n", logicop_func); 446 /* FALLTHROUGH */ 447 case PIPE_LOGICOP_COPY: 448 return src; 449 } 450 } 451 452 static nir_ssa_def * 453 vc4_nir_swizzle_and_pack(struct vc4_compile *c, nir_builder *b, 454 nir_ssa_def **colors) 455 { 456 enum pipe_format color_format = c->fs_key->color_format; 457 const uint8_t *format_swiz = vc4_get_format_swizzle(color_format); 458 459 nir_ssa_def *swizzled[4]; 460 for (int i = 0; i < 4; i++) { 461 swizzled[i] = vc4_nir_get_swizzled_channel(b, colors, 462 format_swiz[i]); 463 } 464 465 return nir_pack_unorm_4x8(b, 466 nir_vec4(b, 467 swizzled[0], swizzled[1], 468 swizzled[2], swizzled[3])); 469 470 } 471 472 static nir_ssa_def * 473 vc4_nir_blend_pipeline(struct vc4_compile *c, nir_builder *b, nir_ssa_def *src, 474 int sample) 475 { 476 enum pipe_format color_format = c->fs_key->color_format; 477 const uint8_t *format_swiz = vc4_get_format_swizzle(color_format); 478 bool srgb = util_format_is_srgb(color_format); 479 480 /* Pull out the float src/dst color components. */ 481 nir_ssa_def *packed_dst_color = vc4_nir_get_dst_color(b, sample); 482 nir_ssa_def *dst_vec4 = nir_unpack_unorm_4x8(b, packed_dst_color); 483 nir_ssa_def *src_color[4], *unpacked_dst_color[4]; 484 for (unsigned i = 0; i < 4; i++) { 485 src_color[i] = nir_channel(b, src, i); 486 unpacked_dst_color[i] = nir_channel(b, dst_vec4, i); 487 } 488 489 if (c->fs_key->sample_alpha_to_one && c->fs_key->msaa) 490 src_color[3] = nir_imm_float(b, 1.0); 491 492 nir_ssa_def *packed_color; 493 if (srgb) { 494 /* Unswizzle the destination color. */ 495 nir_ssa_def *dst_color[4]; 496 for (unsigned i = 0; i < 4; i++) { 497 dst_color[i] = vc4_nir_get_swizzled_channel(b, 498 unpacked_dst_color, 499 format_swiz[i]); 500 } 501 502 /* Turn dst color to linear. */ 503 for (int i = 0; i < 3; i++) 504 dst_color[i] = vc4_nir_srgb_decode(b, dst_color[i]); 505 506 nir_ssa_def *blend_color[4]; 507 vc4_do_blending_f(c, b, blend_color, src_color, dst_color); 508 509 /* sRGB encode the output color */ 510 for (int i = 0; i < 3; i++) 511 blend_color[i] = vc4_nir_srgb_encode(b, blend_color[i]); 512 513 packed_color = vc4_nir_swizzle_and_pack(c, b, blend_color); 514 } else { 515 nir_ssa_def *packed_src_color = 516 vc4_nir_swizzle_and_pack(c, b, src_color); 517 518 packed_color = 519 vc4_do_blending_i(c, b, 520 packed_src_color, packed_dst_color, 521 src_color[3]); 522 } 523 524 packed_color = vc4_logicop(b, c->fs_key->logicop_func, 525 packed_color, packed_dst_color); 526 527 /* If the bit isn't set in the color mask, then just return the 528 * original dst color, instead. 529 */ 530 uint32_t colormask = 0xffffffff; 531 for (int i = 0; i < 4; i++) { 532 if (format_swiz[i] < 4 && 533 !(c->fs_key->blend.colormask & (1 << format_swiz[i]))) { 534 colormask &= ~(0xff << (i * 8)); 535 } 536 } 537 538 return nir_ior(b, 539 nir_iand(b, packed_color, 540 nir_imm_int(b, colormask)), 541 nir_iand(b, packed_dst_color, 542 nir_imm_int(b, ~colormask))); 543 } 544 545 static int 546 vc4_nir_next_output_driver_location(nir_shader *s) 547 { 548 int maxloc = -1; 549 550 nir_foreach_variable(var, &s->outputs) 551 maxloc = MAX2(maxloc, (int)var->data.driver_location); 552 553 return maxloc + 1; 554 } 555 556 static void 557 vc4_nir_store_sample_mask(struct vc4_compile *c, nir_builder *b, 558 nir_ssa_def *val) 559 { 560 nir_variable *sample_mask = nir_variable_create(c->s, nir_var_shader_out, 561 glsl_uint_type(), 562 "sample_mask"); 563 sample_mask->data.driver_location = 564 vc4_nir_next_output_driver_location(c->s); 565 sample_mask->data.location = FRAG_RESULT_SAMPLE_MASK; 566 567 nir_intrinsic_instr *intr = 568 nir_intrinsic_instr_create(c->s, nir_intrinsic_store_output); 569 intr->num_components = 1; 570 nir_intrinsic_set_base(intr, sample_mask->data.driver_location); 571 572 intr->src[0] = nir_src_for_ssa(val); 573 intr->src[1] = nir_src_for_ssa(nir_imm_int(b, 0)); 574 nir_builder_instr_insert(b, &intr->instr); 575 } 576 577 static void 578 vc4_nir_lower_blend_instr(struct vc4_compile *c, nir_builder *b, 579 nir_intrinsic_instr *intr) 580 { 581 nir_ssa_def *frag_color = intr->src[0].ssa; 582 583 if (c->fs_key->sample_alpha_to_coverage) { 584 nir_ssa_def *a = nir_channel(b, frag_color, 3); 585 586 /* XXX: We should do a nice dither based on the fragment 587 * coordinate, instead. 588 */ 589 nir_ssa_def *num_samples = nir_imm_float(b, VC4_MAX_SAMPLES); 590 nir_ssa_def *num_bits = nir_f2i32(b, nir_fmul(b, a, num_samples)); 591 nir_ssa_def *bitmask = nir_isub(b, 592 nir_ishl(b, 593 nir_imm_int(b, 1), 594 num_bits), 595 nir_imm_int(b, 1)); 596 vc4_nir_store_sample_mask(c, b, bitmask); 597 } 598 599 /* The TLB color read returns each sample in turn, so if our blending 600 * depends on the destination color, we're going to have to run the 601 * blending function separately for each destination sample value, and 602 * then output the per-sample color using TLB_COLOR_MS. 603 */ 604 nir_ssa_def *blend_output; 605 if (c->fs_key->msaa && blend_depends_on_dst_color(c)) { 606 c->msaa_per_sample_output = true; 607 608 nir_ssa_def *samples[4]; 609 for (int i = 0; i < VC4_MAX_SAMPLES; i++) 610 samples[i] = vc4_nir_blend_pipeline(c, b, frag_color, i); 611 blend_output = nir_vec4(b, 612 samples[0], samples[1], 613 samples[2], samples[3]); 614 } else { 615 blend_output = vc4_nir_blend_pipeline(c, b, frag_color, 0); 616 } 617 618 nir_instr_rewrite_src(&intr->instr, &intr->src[0], 619 nir_src_for_ssa(blend_output)); 620 intr->num_components = blend_output->num_components; 621 } 622 623 static bool 624 vc4_nir_lower_blend_block(nir_block *block, struct vc4_compile *c) 625 { 626 nir_foreach_instr_safe(instr, block) { 627 if (instr->type != nir_instr_type_intrinsic) 628 continue; 629 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 630 if (intr->intrinsic != nir_intrinsic_store_output) 631 continue; 632 633 nir_variable *output_var = NULL; 634 nir_foreach_variable(var, &c->s->outputs) { 635 if (var->data.driver_location == 636 nir_intrinsic_base(intr)) { 637 output_var = var; 638 break; 639 } 640 } 641 assert(output_var); 642 643 if (output_var->data.location != FRAG_RESULT_COLOR && 644 output_var->data.location != FRAG_RESULT_DATA0) { 645 continue; 646 } 647 648 nir_function_impl *impl = 649 nir_cf_node_get_function(&block->cf_node); 650 nir_builder b; 651 nir_builder_init(&b, impl); 652 b.cursor = nir_before_instr(&intr->instr); 653 vc4_nir_lower_blend_instr(c, &b, intr); 654 } 655 return true; 656 } 657 658 void 659 vc4_nir_lower_blend(nir_shader *s, struct vc4_compile *c) 660 { 661 nir_foreach_function(function, s) { 662 if (function->impl) { 663 nir_foreach_block(block, function->impl) { 664 vc4_nir_lower_blend_block(block, c); 665 } 666 667 nir_metadata_preserve(function->impl, 668 nir_metadata_block_index | 669 nir_metadata_dominance); 670 } 671 } 672 673 /* If we didn't do alpha-to-coverage on the output color, we still 674 * need to pass glSampleMask() through. 675 */ 676 if (c->fs_key->sample_coverage && !c->fs_key->sample_alpha_to_coverage) { 677 nir_function_impl *impl = nir_shader_get_entrypoint(s); 678 nir_builder b; 679 nir_builder_init(&b, impl); 680 b.cursor = nir_after_block(nir_impl_last_block(impl)); 681 682 vc4_nir_store_sample_mask(c, &b, nir_load_sample_mask_in(&b)); 683 } 684 } 685