Home | History | Annotate | Download | only in vc4
      1 /*
      2  * Copyright  2015 Broadcom
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     21  * IN THE SOFTWARE.
     22  */
     23 
     24 /**
     25  * Implements most of the fixed function fragment pipeline in shader code.
     26  *
     27  * VC4 doesn't have any hardware support for blending, alpha test, logic ops,
     28  * or color mask.  Instead, you read the current contents of the destination
     29  * from the tile buffer after having waited for the scoreboard (which is
     30  * handled by vc4_qpu_emit.c), then do math using your output color and that
     31  * destination value, and update the output color appropriately.
     32  *
     33  * Once this pass is done, the color write will either have one component (for
     34  * single sample) with packed argb8888, or 4 components with the per-sample
     35  * argb8888 result.
     36  */
     37 
     38 /**
     39  * Lowers fixed-function blending to a load of the destination color and a
     40  * series of ALU operations before the store of the output.
     41  */
     42 #include "util/u_format.h"
     43 #include "vc4_qir.h"
     44 #include "compiler/nir/nir_builder.h"
     45 #include "vc4_context.h"
     46 
     47 static bool
     48 blend_depends_on_dst_color(struct vc4_compile *c)
     49 {
     50         return (c->fs_key->blend.blend_enable ||
     51                 c->fs_key->blend.colormask != 0xf ||
     52                 c->fs_key->logicop_func != PIPE_LOGICOP_COPY);
     53 }
     54 
     55 /** Emits a load of the previous fragment color from the tile buffer. */
     56 static nir_ssa_def *
     57 vc4_nir_get_dst_color(nir_builder *b, int sample)
     58 {
     59         nir_intrinsic_instr *load =
     60                 nir_intrinsic_instr_create(b->shader,
     61                                            nir_intrinsic_load_input);
     62         load->num_components = 1;
     63         nir_intrinsic_set_base(load, VC4_NIR_TLB_COLOR_READ_INPUT + sample);
     64         load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
     65         nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, NULL);
     66         nir_builder_instr_insert(b, &load->instr);
     67         return &load->dest.ssa;
     68 }
     69 
     70 static  nir_ssa_def *
     71 vc4_nir_srgb_decode(nir_builder *b, nir_ssa_def *srgb)
     72 {
     73         nir_ssa_def *is_low = nir_flt(b, srgb, nir_imm_float(b, 0.04045));
     74         nir_ssa_def *low = nir_fmul(b, srgb, nir_imm_float(b, 1.0 / 12.92));
     75         nir_ssa_def *high = nir_fpow(b,
     76                                      nir_fmul(b,
     77                                               nir_fadd(b, srgb,
     78                                                        nir_imm_float(b, 0.055)),
     79                                               nir_imm_float(b, 1.0 / 1.055)),
     80                                      nir_imm_float(b, 2.4));
     81 
     82         return nir_bcsel(b, is_low, low, high);
     83 }
     84 
     85 static  nir_ssa_def *
     86 vc4_nir_srgb_encode(nir_builder *b, nir_ssa_def *linear)
     87 {
     88         nir_ssa_def *is_low = nir_flt(b, linear, nir_imm_float(b, 0.0031308));
     89         nir_ssa_def *low = nir_fmul(b, linear, nir_imm_float(b, 12.92));
     90         nir_ssa_def *high = nir_fsub(b,
     91                                      nir_fmul(b,
     92                                               nir_imm_float(b, 1.055),
     93                                               nir_fpow(b,
     94                                                        linear,
     95                                                        nir_imm_float(b, 0.41666))),
     96                                      nir_imm_float(b, 0.055));
     97 
     98         return nir_bcsel(b, is_low, low, high);
     99 }
    100 
    101 static nir_ssa_def *
    102 vc4_blend_channel_f(nir_builder *b,
    103                     nir_ssa_def **src,
    104                     nir_ssa_def **dst,
    105                     unsigned factor,
    106                     int channel)
    107 {
    108         switch(factor) {
    109         case PIPE_BLENDFACTOR_ONE:
    110                 return nir_imm_float(b, 1.0);
    111         case PIPE_BLENDFACTOR_SRC_COLOR:
    112                 return src[channel];
    113         case PIPE_BLENDFACTOR_SRC_ALPHA:
    114                 return src[3];
    115         case PIPE_BLENDFACTOR_DST_ALPHA:
    116                 return dst[3];
    117         case PIPE_BLENDFACTOR_DST_COLOR:
    118                 return dst[channel];
    119         case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
    120                 if (channel != 3) {
    121                         return nir_fmin(b,
    122                                         src[3],
    123                                         nir_fsub(b,
    124                                                  nir_imm_float(b, 1.0),
    125                                                  dst[3]));
    126                 } else {
    127                         return nir_imm_float(b, 1.0);
    128                 }
    129         case PIPE_BLENDFACTOR_CONST_COLOR:
    130                 return nir_load_system_value(b,
    131                                              nir_intrinsic_load_blend_const_color_r_float +
    132                                              channel,
    133                                              0);
    134         case PIPE_BLENDFACTOR_CONST_ALPHA:
    135                 return nir_load_blend_const_color_a_float(b);
    136         case PIPE_BLENDFACTOR_ZERO:
    137                 return nir_imm_float(b, 0.0);
    138         case PIPE_BLENDFACTOR_INV_SRC_COLOR:
    139                 return nir_fsub(b, nir_imm_float(b, 1.0), src[channel]);
    140         case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
    141                 return nir_fsub(b, nir_imm_float(b, 1.0), src[3]);
    142         case PIPE_BLENDFACTOR_INV_DST_ALPHA:
    143                 return nir_fsub(b, nir_imm_float(b, 1.0), dst[3]);
    144         case PIPE_BLENDFACTOR_INV_DST_COLOR:
    145                 return nir_fsub(b, nir_imm_float(b, 1.0), dst[channel]);
    146         case PIPE_BLENDFACTOR_INV_CONST_COLOR:
    147                 return nir_fsub(b, nir_imm_float(b, 1.0),
    148                                 nir_load_system_value(b,
    149                                                       nir_intrinsic_load_blend_const_color_r_float +
    150                                                       channel,
    151                                                       0));
    152         case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
    153                 return nir_fsub(b, nir_imm_float(b, 1.0),
    154                                 nir_load_blend_const_color_a_float(b));
    155 
    156         default:
    157         case PIPE_BLENDFACTOR_SRC1_COLOR:
    158         case PIPE_BLENDFACTOR_SRC1_ALPHA:
    159         case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
    160         case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
    161                 /* Unsupported. */
    162                 fprintf(stderr, "Unknown blend factor %d\n", factor);
    163                 return nir_imm_float(b, 1.0);
    164         }
    165 }
    166 
    167 static nir_ssa_def *
    168 vc4_nir_set_packed_chan(nir_builder *b, nir_ssa_def *src0, nir_ssa_def *src1,
    169                         int chan)
    170 {
    171         unsigned chan_mask = 0xff << (chan * 8);
    172         return nir_ior(b,
    173                        nir_iand(b, src0, nir_imm_int(b, ~chan_mask)),
    174                        nir_iand(b, src1, nir_imm_int(b, chan_mask)));
    175 }
    176 
    177 static nir_ssa_def *
    178 vc4_blend_channel_i(nir_builder *b,
    179                     nir_ssa_def *src,
    180                     nir_ssa_def *dst,
    181                     nir_ssa_def *src_a,
    182                     nir_ssa_def *dst_a,
    183                     unsigned factor,
    184                     int a_chan)
    185 {
    186         switch (factor) {
    187         case PIPE_BLENDFACTOR_ONE:
    188                 return nir_imm_int(b, ~0);
    189         case PIPE_BLENDFACTOR_SRC_COLOR:
    190                 return src;
    191         case PIPE_BLENDFACTOR_SRC_ALPHA:
    192                 return src_a;
    193         case PIPE_BLENDFACTOR_DST_ALPHA:
    194                 return dst_a;
    195         case PIPE_BLENDFACTOR_DST_COLOR:
    196                 return dst;
    197         case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
    198                 return vc4_nir_set_packed_chan(b,
    199                                                nir_umin_4x8(b,
    200                                                             src_a,
    201                                                             nir_inot(b, dst_a)),
    202                                                nir_imm_int(b, ~0),
    203                                                a_chan);
    204         case PIPE_BLENDFACTOR_CONST_COLOR:
    205                 return nir_load_blend_const_color_rgba8888_unorm(b);
    206         case PIPE_BLENDFACTOR_CONST_ALPHA:
    207                 return nir_load_blend_const_color_aaaa8888_unorm(b);
    208         case PIPE_BLENDFACTOR_ZERO:
    209                 return nir_imm_int(b, 0);
    210         case PIPE_BLENDFACTOR_INV_SRC_COLOR:
    211                 return nir_inot(b, src);
    212         case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
    213                 return nir_inot(b, src_a);
    214         case PIPE_BLENDFACTOR_INV_DST_ALPHA:
    215                 return nir_inot(b, dst_a);
    216         case PIPE_BLENDFACTOR_INV_DST_COLOR:
    217                 return nir_inot(b, dst);
    218         case PIPE_BLENDFACTOR_INV_CONST_COLOR:
    219                 return nir_inot(b,
    220                                 nir_load_blend_const_color_rgba8888_unorm(b));
    221         case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
    222                 return nir_inot(b,
    223                                 nir_load_blend_const_color_aaaa8888_unorm(b));
    224 
    225         default:
    226         case PIPE_BLENDFACTOR_SRC1_COLOR:
    227         case PIPE_BLENDFACTOR_SRC1_ALPHA:
    228         case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
    229         case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
    230                 /* Unsupported. */
    231                 fprintf(stderr, "Unknown blend factor %d\n", factor);
    232                 return nir_imm_int(b, ~0);
    233         }
    234 }
    235 
    236 static nir_ssa_def *
    237 vc4_blend_func_f(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst,
    238                  unsigned func)
    239 {
    240         switch (func) {
    241         case PIPE_BLEND_ADD:
    242                 return nir_fadd(b, src, dst);
    243         case PIPE_BLEND_SUBTRACT:
    244                 return nir_fsub(b, src, dst);
    245         case PIPE_BLEND_REVERSE_SUBTRACT:
    246                 return nir_fsub(b, dst, src);
    247         case PIPE_BLEND_MIN:
    248                 return nir_fmin(b, src, dst);
    249         case PIPE_BLEND_MAX:
    250                 return nir_fmax(b, src, dst);
    251 
    252         default:
    253                 /* Unsupported. */
    254                 fprintf(stderr, "Unknown blend func %d\n", func);
    255                 return src;
    256 
    257         }
    258 }
    259 
    260 static nir_ssa_def *
    261 vc4_blend_func_i(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst,
    262                  unsigned func)
    263 {
    264         switch (func) {
    265         case PIPE_BLEND_ADD:
    266                 return nir_usadd_4x8(b, src, dst);
    267         case PIPE_BLEND_SUBTRACT:
    268                 return nir_ussub_4x8(b, src, dst);
    269         case PIPE_BLEND_REVERSE_SUBTRACT:
    270                 return nir_ussub_4x8(b, dst, src);
    271         case PIPE_BLEND_MIN:
    272                 return nir_umin_4x8(b, src, dst);
    273         case PIPE_BLEND_MAX:
    274                 return nir_umax_4x8(b, src, dst);
    275 
    276         default:
    277                 /* Unsupported. */
    278                 fprintf(stderr, "Unknown blend func %d\n", func);
    279                 return src;
    280 
    281         }
    282 }
    283 
    284 static void
    285 vc4_do_blending_f(struct vc4_compile *c, nir_builder *b, nir_ssa_def **result,
    286                   nir_ssa_def **src_color, nir_ssa_def **dst_color)
    287 {
    288         struct pipe_rt_blend_state *blend = &c->fs_key->blend;
    289 
    290         if (!blend->blend_enable) {
    291                 for (int i = 0; i < 4; i++)
    292                         result[i] = src_color[i];
    293                 return;
    294         }
    295 
    296         /* Clamp the src color to [0, 1].  Dest is already clamped. */
    297         for (int i = 0; i < 4; i++)
    298                 src_color[i] = nir_fsat(b, src_color[i]);
    299 
    300         nir_ssa_def *src_blend[4], *dst_blend[4];
    301         for (int i = 0; i < 4; i++) {
    302                 int src_factor = ((i != 3) ? blend->rgb_src_factor :
    303                                   blend->alpha_src_factor);
    304                 int dst_factor = ((i != 3) ? blend->rgb_dst_factor :
    305                                   blend->alpha_dst_factor);
    306                 src_blend[i] = nir_fmul(b, src_color[i],
    307                                         vc4_blend_channel_f(b,
    308                                                             src_color, dst_color,
    309                                                             src_factor, i));
    310                 dst_blend[i] = nir_fmul(b, dst_color[i],
    311                                         vc4_blend_channel_f(b,
    312                                                             src_color, dst_color,
    313                                                             dst_factor, i));
    314         }
    315 
    316         for (int i = 0; i < 4; i++) {
    317                 result[i] = vc4_blend_func_f(b, src_blend[i], dst_blend[i],
    318                                              ((i != 3) ? blend->rgb_func :
    319                                               blend->alpha_func));
    320         }
    321 }
    322 
    323 static nir_ssa_def *
    324 vc4_nir_splat(nir_builder *b, nir_ssa_def *src)
    325 {
    326         nir_ssa_def *or1 = nir_ior(b, src, nir_ishl(b, src, nir_imm_int(b, 8)));
    327         return nir_ior(b, or1, nir_ishl(b, or1, nir_imm_int(b, 16)));
    328 }
    329 
    330 static nir_ssa_def *
    331 vc4_do_blending_i(struct vc4_compile *c, nir_builder *b,
    332                   nir_ssa_def *src_color, nir_ssa_def *dst_color,
    333                   nir_ssa_def *src_float_a)
    334 {
    335         struct pipe_rt_blend_state *blend = &c->fs_key->blend;
    336 
    337         if (!blend->blend_enable)
    338                 return src_color;
    339 
    340         enum pipe_format color_format = c->fs_key->color_format;
    341         const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
    342         nir_ssa_def *imm_0xff = nir_imm_int(b, 0xff);
    343         nir_ssa_def *src_a = nir_pack_unorm_4x8(b, src_float_a);
    344         nir_ssa_def *dst_a;
    345         int alpha_chan;
    346         for (alpha_chan = 0; alpha_chan < 4; alpha_chan++) {
    347                 if (format_swiz[alpha_chan] == 3)
    348                         break;
    349         }
    350         if (alpha_chan != 4) {
    351                 nir_ssa_def *shift = nir_imm_int(b, alpha_chan * 8);
    352                 dst_a = vc4_nir_splat(b, nir_iand(b, nir_ushr(b, dst_color,
    353                                                               shift), imm_0xff));
    354         } else {
    355                 dst_a = nir_imm_int(b, ~0);
    356         }
    357 
    358         nir_ssa_def *src_factor = vc4_blend_channel_i(b,
    359                                                       src_color, dst_color,
    360                                                       src_a, dst_a,
    361                                                       blend->rgb_src_factor,
    362                                                       alpha_chan);
    363         nir_ssa_def *dst_factor = vc4_blend_channel_i(b,
    364                                                       src_color, dst_color,
    365                                                       src_a, dst_a,
    366                                                       blend->rgb_dst_factor,
    367                                                       alpha_chan);
    368 
    369         if (alpha_chan != 4 &&
    370             blend->alpha_src_factor != blend->rgb_src_factor) {
    371                 nir_ssa_def *src_alpha_factor =
    372                         vc4_blend_channel_i(b,
    373                                             src_color, dst_color,
    374                                             src_a, dst_a,
    375                                             blend->alpha_src_factor,
    376                                             alpha_chan);
    377                 src_factor = vc4_nir_set_packed_chan(b, src_factor,
    378                                                      src_alpha_factor,
    379                                                      alpha_chan);
    380         }
    381         if (alpha_chan != 4 &&
    382             blend->alpha_dst_factor != blend->rgb_dst_factor) {
    383                 nir_ssa_def *dst_alpha_factor =
    384                         vc4_blend_channel_i(b,
    385                                             src_color, dst_color,
    386                                             src_a, dst_a,
    387                                             blend->alpha_dst_factor,
    388                                             alpha_chan);
    389                 dst_factor = vc4_nir_set_packed_chan(b, dst_factor,
    390                                                      dst_alpha_factor,
    391                                                      alpha_chan);
    392         }
    393         nir_ssa_def *src_blend = nir_umul_unorm_4x8(b, src_color, src_factor);
    394         nir_ssa_def *dst_blend = nir_umul_unorm_4x8(b, dst_color, dst_factor);
    395 
    396         nir_ssa_def *result =
    397                 vc4_blend_func_i(b, src_blend, dst_blend, blend->rgb_func);
    398         if (alpha_chan != 4 && blend->alpha_func != blend->rgb_func) {
    399                 nir_ssa_def *result_a = vc4_blend_func_i(b,
    400                                                          src_blend,
    401                                                          dst_blend,
    402                                                          blend->alpha_func);
    403                 result = vc4_nir_set_packed_chan(b, result, result_a,
    404                                                  alpha_chan);
    405         }
    406         return result;
    407 }
    408 
    409 static nir_ssa_def *
    410 vc4_logicop(nir_builder *b, int logicop_func,
    411             nir_ssa_def *src, nir_ssa_def *dst)
    412 {
    413         switch (logicop_func) {
    414         case PIPE_LOGICOP_CLEAR:
    415                 return nir_imm_int(b, 0);
    416         case PIPE_LOGICOP_NOR:
    417                 return nir_inot(b, nir_ior(b, src, dst));
    418         case PIPE_LOGICOP_AND_INVERTED:
    419                 return nir_iand(b, nir_inot(b, src), dst);
    420         case PIPE_LOGICOP_COPY_INVERTED:
    421                 return nir_inot(b, src);
    422         case PIPE_LOGICOP_AND_REVERSE:
    423                 return nir_iand(b, src, nir_inot(b, dst));
    424         case PIPE_LOGICOP_INVERT:
    425                 return nir_inot(b, dst);
    426         case PIPE_LOGICOP_XOR:
    427                 return nir_ixor(b, src, dst);
    428         case PIPE_LOGICOP_NAND:
    429                 return nir_inot(b, nir_iand(b, src, dst));
    430         case PIPE_LOGICOP_AND:
    431                 return nir_iand(b, src, dst);
    432         case PIPE_LOGICOP_EQUIV:
    433                 return nir_inot(b, nir_ixor(b, src, dst));
    434         case PIPE_LOGICOP_NOOP:
    435                 return dst;
    436         case PIPE_LOGICOP_OR_INVERTED:
    437                 return nir_ior(b, nir_inot(b, src), dst);
    438         case PIPE_LOGICOP_OR_REVERSE:
    439                 return nir_ior(b, src, nir_inot(b, dst));
    440         case PIPE_LOGICOP_OR:
    441                 return nir_ior(b, src, dst);
    442         case PIPE_LOGICOP_SET:
    443                 return nir_imm_int(b, ~0);
    444         default:
    445                 fprintf(stderr, "Unknown logic op %d\n", logicop_func);
    446                 /* FALLTHROUGH */
    447         case PIPE_LOGICOP_COPY:
    448                 return src;
    449         }
    450 }
    451 
    452 static nir_ssa_def *
    453 vc4_nir_pipe_compare_func(nir_builder *b, int func,
    454                           nir_ssa_def *src0, nir_ssa_def *src1)
    455 {
    456         switch (func) {
    457         default:
    458                 fprintf(stderr, "Unknown compare func %d\n", func);
    459                 /* FALLTHROUGH */
    460         case PIPE_FUNC_NEVER:
    461                 return nir_imm_int(b, 0);
    462         case PIPE_FUNC_ALWAYS:
    463                 return nir_imm_int(b, ~0);
    464         case PIPE_FUNC_EQUAL:
    465                 return nir_feq(b, src0, src1);
    466         case PIPE_FUNC_NOTEQUAL:
    467                 return nir_fne(b, src0, src1);
    468         case PIPE_FUNC_GREATER:
    469                 return nir_flt(b, src1, src0);
    470         case PIPE_FUNC_GEQUAL:
    471                 return nir_fge(b, src0, src1);
    472         case PIPE_FUNC_LESS:
    473                 return nir_flt(b, src0, src1);
    474         case PIPE_FUNC_LEQUAL:
    475                 return nir_fge(b, src1, src0);
    476         }
    477 }
    478 
    479 static void
    480 vc4_nir_emit_alpha_test_discard(struct vc4_compile *c, nir_builder *b,
    481                                 nir_ssa_def *alpha)
    482 {
    483         if (!c->fs_key->alpha_test)
    484                 return;
    485 
    486         nir_ssa_def *condition =
    487                 vc4_nir_pipe_compare_func(b, c->fs_key->alpha_test_func,
    488                                           alpha,
    489                                           nir_load_alpha_ref_float(b));
    490 
    491         nir_intrinsic_instr *discard =
    492                 nir_intrinsic_instr_create(b->shader,
    493                                            nir_intrinsic_discard_if);
    494         discard->num_components = 1;
    495         discard->src[0] = nir_src_for_ssa(nir_inot(b, condition));
    496         nir_builder_instr_insert(b, &discard->instr);
    497         c->s->info->fs.uses_discard = true;
    498 }
    499 
    500 static nir_ssa_def *
    501 vc4_nir_swizzle_and_pack(struct vc4_compile *c, nir_builder *b,
    502                          nir_ssa_def **colors)
    503 {
    504         enum pipe_format color_format = c->fs_key->color_format;
    505         const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
    506 
    507         nir_ssa_def *swizzled[4];
    508         for (int i = 0; i < 4; i++) {
    509                 swizzled[i] = vc4_nir_get_swizzled_channel(b, colors,
    510                                                            format_swiz[i]);
    511         }
    512 
    513         return nir_pack_unorm_4x8(b,
    514                                   nir_vec4(b,
    515                                            swizzled[0], swizzled[1],
    516                                            swizzled[2], swizzled[3]));
    517 
    518 }
    519 
    520 static nir_ssa_def *
    521 vc4_nir_blend_pipeline(struct vc4_compile *c, nir_builder *b, nir_ssa_def *src,
    522                        int sample)
    523 {
    524         enum pipe_format color_format = c->fs_key->color_format;
    525         const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
    526         bool srgb = util_format_is_srgb(color_format);
    527 
    528         /* Pull out the float src/dst color components. */
    529         nir_ssa_def *packed_dst_color = vc4_nir_get_dst_color(b, sample);
    530         nir_ssa_def *dst_vec4 = nir_unpack_unorm_4x8(b, packed_dst_color);
    531         nir_ssa_def *src_color[4], *unpacked_dst_color[4];
    532         for (unsigned i = 0; i < 4; i++) {
    533                 src_color[i] = nir_channel(b, src, i);
    534                 unpacked_dst_color[i] = nir_channel(b, dst_vec4, i);
    535         }
    536 
    537         if (c->fs_key->sample_alpha_to_one && c->fs_key->msaa)
    538                 src_color[3] = nir_imm_float(b, 1.0);
    539 
    540         vc4_nir_emit_alpha_test_discard(c, b, src_color[3]);
    541 
    542         nir_ssa_def *packed_color;
    543         if (srgb) {
    544                 /* Unswizzle the destination color. */
    545                 nir_ssa_def *dst_color[4];
    546                 for (unsigned i = 0; i < 4; i++) {
    547                         dst_color[i] = vc4_nir_get_swizzled_channel(b,
    548                                                                     unpacked_dst_color,
    549                                                                     format_swiz[i]);
    550                 }
    551 
    552                 /* Turn dst color to linear. */
    553                 for (int i = 0; i < 3; i++)
    554                         dst_color[i] = vc4_nir_srgb_decode(b, dst_color[i]);
    555 
    556                 nir_ssa_def *blend_color[4];
    557                 vc4_do_blending_f(c, b, blend_color, src_color, dst_color);
    558 
    559                 /* sRGB encode the output color */
    560                 for (int i = 0; i < 3; i++)
    561                         blend_color[i] = vc4_nir_srgb_encode(b, blend_color[i]);
    562 
    563                 packed_color = vc4_nir_swizzle_and_pack(c, b, blend_color);
    564         } else {
    565                 nir_ssa_def *packed_src_color =
    566                         vc4_nir_swizzle_and_pack(c, b, src_color);
    567 
    568                 packed_color =
    569                         vc4_do_blending_i(c, b,
    570                                           packed_src_color, packed_dst_color,
    571                                           src_color[3]);
    572         }
    573 
    574         packed_color = vc4_logicop(b, c->fs_key->logicop_func,
    575                                    packed_color, packed_dst_color);
    576 
    577         /* If the bit isn't set in the color mask, then just return the
    578          * original dst color, instead.
    579          */
    580         uint32_t colormask = 0xffffffff;
    581         for (int i = 0; i < 4; i++) {
    582                 if (format_swiz[i] < 4 &&
    583                     !(c->fs_key->blend.colormask & (1 << format_swiz[i]))) {
    584                         colormask &= ~(0xff << (i * 8));
    585                 }
    586         }
    587 
    588         return nir_ior(b,
    589                        nir_iand(b, packed_color,
    590                                 nir_imm_int(b, colormask)),
    591                        nir_iand(b, packed_dst_color,
    592                                 nir_imm_int(b, ~colormask)));
    593 }
    594 
    595 static int
    596 vc4_nir_next_output_driver_location(nir_shader *s)
    597 {
    598         int maxloc = -1;
    599 
    600         nir_foreach_variable(var, &s->outputs)
    601                 maxloc = MAX2(maxloc, (int)var->data.driver_location);
    602 
    603         return maxloc + 1;
    604 }
    605 
    606 static void
    607 vc4_nir_store_sample_mask(struct vc4_compile *c, nir_builder *b,
    608                           nir_ssa_def *val)
    609 {
    610         nir_variable *sample_mask = nir_variable_create(c->s, nir_var_shader_out,
    611                                                         glsl_uint_type(),
    612                                                         "sample_mask");
    613         sample_mask->data.driver_location =
    614                 vc4_nir_next_output_driver_location(c->s);
    615         sample_mask->data.location = FRAG_RESULT_SAMPLE_MASK;
    616 
    617         nir_intrinsic_instr *intr =
    618                 nir_intrinsic_instr_create(c->s, nir_intrinsic_store_output);
    619         intr->num_components = 1;
    620         nir_intrinsic_set_base(intr, sample_mask->data.driver_location);
    621 
    622         intr->src[0] = nir_src_for_ssa(val);
    623         intr->src[1] = nir_src_for_ssa(nir_imm_int(b, 0));
    624         nir_builder_instr_insert(b, &intr->instr);
    625 }
    626 
    627 static void
    628 vc4_nir_lower_blend_instr(struct vc4_compile *c, nir_builder *b,
    629                           nir_intrinsic_instr *intr)
    630 {
    631         nir_ssa_def *frag_color = intr->src[0].ssa;
    632 
    633         if (c->fs_key->sample_coverage) {
    634                 nir_intrinsic_instr *load =
    635                         nir_intrinsic_instr_create(b->shader,
    636                                                    nir_intrinsic_load_sample_mask_in);
    637                 load->num_components = 1;
    638                 nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, NULL);
    639                 nir_builder_instr_insert(b, &load->instr);
    640 
    641                 nir_ssa_def *bitmask = &load->dest.ssa;
    642 
    643                 vc4_nir_store_sample_mask(c, b, bitmask);
    644         } else if (c->fs_key->sample_alpha_to_coverage) {
    645                 nir_ssa_def *a = nir_channel(b, frag_color, 3);
    646 
    647                 /* XXX: We should do a nice dither based on the fragment
    648                  * coordinate, instead.
    649                  */
    650                 nir_ssa_def *num_samples = nir_imm_float(b, VC4_MAX_SAMPLES);
    651                 nir_ssa_def *num_bits = nir_f2i(b, nir_fmul(b, a, num_samples));
    652                 nir_ssa_def *bitmask = nir_isub(b,
    653                                                 nir_ishl(b,
    654                                                          nir_imm_int(b, 1),
    655                                                          num_bits),
    656                                                 nir_imm_int(b, 1));
    657                 vc4_nir_store_sample_mask(c, b, bitmask);
    658         }
    659 
    660         /* The TLB color read returns each sample in turn, so if our blending
    661          * depends on the destination color, we're going to have to run the
    662          * blending function separately for each destination sample value, and
    663          * then output the per-sample color using TLB_COLOR_MS.
    664          */
    665         nir_ssa_def *blend_output;
    666         if (c->fs_key->msaa && blend_depends_on_dst_color(c)) {
    667                 c->msaa_per_sample_output = true;
    668 
    669                 nir_ssa_def *samples[4];
    670                 for (int i = 0; i < VC4_MAX_SAMPLES; i++)
    671                         samples[i] = vc4_nir_blend_pipeline(c, b, frag_color, i);
    672                 blend_output = nir_vec4(b,
    673                                         samples[0], samples[1],
    674                                         samples[2], samples[3]);
    675         } else {
    676                 blend_output = vc4_nir_blend_pipeline(c, b, frag_color, 0);
    677         }
    678 
    679         nir_instr_rewrite_src(&intr->instr, &intr->src[0],
    680                               nir_src_for_ssa(blend_output));
    681         intr->num_components = blend_output->num_components;
    682 }
    683 
    684 static bool
    685 vc4_nir_lower_blend_block(nir_block *block, struct vc4_compile *c)
    686 {
    687         nir_foreach_instr_safe(instr, block) {
    688                 if (instr->type != nir_instr_type_intrinsic)
    689                         continue;
    690                 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
    691                 if (intr->intrinsic != nir_intrinsic_store_output)
    692                         continue;
    693 
    694                 nir_variable *output_var = NULL;
    695                 nir_foreach_variable(var, &c->s->outputs) {
    696                         if (var->data.driver_location ==
    697                             nir_intrinsic_base(intr)) {
    698                                 output_var = var;
    699                                 break;
    700                         }
    701                 }
    702                 assert(output_var);
    703 
    704                 if (output_var->data.location != FRAG_RESULT_COLOR &&
    705                     output_var->data.location != FRAG_RESULT_DATA0) {
    706                         continue;
    707                 }
    708 
    709                 nir_function_impl *impl =
    710                         nir_cf_node_get_function(&block->cf_node);
    711                 nir_builder b;
    712                 nir_builder_init(&b, impl);
    713                 b.cursor = nir_before_instr(&intr->instr);
    714                 vc4_nir_lower_blend_instr(c, &b, intr);
    715         }
    716         return true;
    717 }
    718 
    719 void
    720 vc4_nir_lower_blend(nir_shader *s, struct vc4_compile *c)
    721 {
    722         nir_foreach_function(function, s) {
    723                 if (function->impl) {
    724                         nir_foreach_block(block, function->impl) {
    725                                 vc4_nir_lower_blend_block(block, c);
    726                         }
    727 
    728                         nir_metadata_preserve(function->impl,
    729                                               nir_metadata_block_index |
    730                                               nir_metadata_dominance);
    731                 }
    732         }
    733 }
    734