Home | History | Annotate | Download | only in softpipe
      1 /**************************************************************************
      2  *
      3  * Copyright 2007 VMware, Inc.
      4  * All Rights Reserved.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the
      8  * "Software"), to deal in the Software without restriction, including
      9  * without limitation the rights to use, copy, modify, merge, publish,
     10  * distribute, sub license, and/or sell copies of the Software, and to
     11  * permit persons to whom the Software is furnished to do so, subject to
     12  * the following conditions:
     13  *
     14  * The above copyright notice and this permission notice (including the
     15  * next paragraph) shall be included in all copies or substantial portions
     16  * of the Software.
     17  *
     18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
     19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
     21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
     22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
     23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
     24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     25  *
     26  **************************************************************************/
     27 
     28 /**
     29  * quad blending
     30  * \author Brian Paul
     31  */
     32 
     33 #include "pipe/p_defines.h"
     34 #include "util/u_math.h"
     35 #include "util/u_memory.h"
     36 #include "util/u_format.h"
     37 #include "util/u_dual_blend.h"
     38 #include "sp_context.h"
     39 #include "sp_state.h"
     40 #include "sp_quad.h"
     41 #include "sp_tile_cache.h"
     42 #include "sp_quad_pipe.h"
     43 
     44 
     45 enum format
     46 {
     47    RGBA,
     48    RGB,
     49    LUMINANCE,
     50    LUMINANCE_ALPHA,
     51    INTENSITY
     52 };
     53 
     54 
     55 /** Subclass of quad_stage */
     56 struct blend_quad_stage
     57 {
     58    struct quad_stage base;
     59    boolean clamp[PIPE_MAX_COLOR_BUFS];  /**< clamp colors to [0,1]? */
     60    enum format base_format[PIPE_MAX_COLOR_BUFS];
     61    enum util_format_type format_type[PIPE_MAX_COLOR_BUFS];
     62 };
     63 
     64 
     65 /** cast wrapper */
     66 static inline struct blend_quad_stage *
     67 blend_quad_stage(struct quad_stage *stage)
     68 {
     69    return (struct blend_quad_stage *) stage;
     70 }
     71 
     72 
     73 #define VEC4_COPY(DST, SRC) \
     74 do { \
     75     DST[0] = SRC[0]; \
     76     DST[1] = SRC[1]; \
     77     DST[2] = SRC[2]; \
     78     DST[3] = SRC[3]; \
     79 } while(0)
     80 
     81 #define VEC4_SCALAR(DST, SRC) \
     82 do { \
     83     DST[0] = SRC; \
     84     DST[1] = SRC; \
     85     DST[2] = SRC; \
     86     DST[3] = SRC; \
     87 } while(0)
     88 
     89 #define VEC4_ADD(R, A, B) \
     90 do { \
     91    R[0] = A[0] + B[0]; \
     92    R[1] = A[1] + B[1]; \
     93    R[2] = A[2] + B[2]; \
     94    R[3] = A[3] + B[3]; \
     95 } while (0)
     96 
     97 #define VEC4_SUB(R, A, B) \
     98 do { \
     99    R[0] = A[0] - B[0]; \
    100    R[1] = A[1] - B[1]; \
    101    R[2] = A[2] - B[2]; \
    102    R[3] = A[3] - B[3]; \
    103 } while (0)
    104 
    105 /** Add and limit result to ceiling of 1.0 */
    106 #define VEC4_ADD_SAT(R, A, B) \
    107 do { \
    108    R[0] = A[0] + B[0];  if (R[0] > 1.0f) R[0] = 1.0f; \
    109    R[1] = A[1] + B[1];  if (R[1] > 1.0f) R[1] = 1.0f; \
    110    R[2] = A[2] + B[2];  if (R[2] > 1.0f) R[2] = 1.0f; \
    111    R[3] = A[3] + B[3];  if (R[3] > 1.0f) R[3] = 1.0f; \
    112 } while (0)
    113 
    114 /** Subtract and limit result to floor of 0.0 */
    115 #define VEC4_SUB_SAT(R, A, B) \
    116 do { \
    117    R[0] = A[0] - B[0];  if (R[0] < 0.0f) R[0] = 0.0f; \
    118    R[1] = A[1] - B[1];  if (R[1] < 0.0f) R[1] = 0.0f; \
    119    R[2] = A[2] - B[2];  if (R[2] < 0.0f) R[2] = 0.0f; \
    120    R[3] = A[3] - B[3];  if (R[3] < 0.0f) R[3] = 0.0f; \
    121 } while (0)
    122 
    123 #define VEC4_MUL(R, A, B) \
    124 do { \
    125    R[0] = A[0] * B[0]; \
    126    R[1] = A[1] * B[1]; \
    127    R[2] = A[2] * B[2]; \
    128    R[3] = A[3] * B[3]; \
    129 } while (0)
    130 
    131 #define VEC4_MIN(R, A, B) \
    132 do { \
    133    R[0] = (A[0] < B[0]) ? A[0] : B[0]; \
    134    R[1] = (A[1] < B[1]) ? A[1] : B[1]; \
    135    R[2] = (A[2] < B[2]) ? A[2] : B[2]; \
    136    R[3] = (A[3] < B[3]) ? A[3] : B[3]; \
    137 } while (0)
    138 
    139 #define VEC4_MAX(R, A, B) \
    140 do { \
    141    R[0] = (A[0] > B[0]) ? A[0] : B[0]; \
    142    R[1] = (A[1] > B[1]) ? A[1] : B[1]; \
    143    R[2] = (A[2] > B[2]) ? A[2] : B[2]; \
    144    R[3] = (A[3] > B[3]) ? A[3] : B[3]; \
    145 } while (0)
    146 
    147 
    148 
    149 static void
    150 logicop_quad(struct quad_stage *qs,
    151              float (*quadColor)[4],
    152              float (*dest)[4])
    153 {
    154    struct softpipe_context *softpipe = qs->softpipe;
    155    ubyte src[4][4], dst[4][4], res[4][4];
    156    uint *src4 = (uint *) src;
    157    uint *dst4 = (uint *) dst;
    158    uint *res4 = (uint *) res;
    159    uint j;
    160 
    161 
    162    /* convert to ubyte */
    163    for (j = 0; j < 4; j++) { /* loop over R,G,B,A channels */
    164       dst[j][0] = float_to_ubyte(dest[j][0]); /* P0 */
    165       dst[j][1] = float_to_ubyte(dest[j][1]); /* P1 */
    166       dst[j][2] = float_to_ubyte(dest[j][2]); /* P2 */
    167       dst[j][3] = float_to_ubyte(dest[j][3]); /* P3 */
    168 
    169       src[j][0] = float_to_ubyte(quadColor[j][0]); /* P0 */
    170       src[j][1] = float_to_ubyte(quadColor[j][1]); /* P1 */
    171       src[j][2] = float_to_ubyte(quadColor[j][2]); /* P2 */
    172       src[j][3] = float_to_ubyte(quadColor[j][3]); /* P3 */
    173    }
    174 
    175    switch (softpipe->blend->logicop_func) {
    176    case PIPE_LOGICOP_CLEAR:
    177       for (j = 0; j < 4; j++)
    178          res4[j] = 0;
    179       break;
    180    case PIPE_LOGICOP_NOR:
    181       for (j = 0; j < 4; j++)
    182          res4[j] = ~(src4[j] | dst4[j]);
    183       break;
    184    case PIPE_LOGICOP_AND_INVERTED:
    185       for (j = 0; j < 4; j++)
    186          res4[j] = ~src4[j] & dst4[j];
    187       break;
    188    case PIPE_LOGICOP_COPY_INVERTED:
    189       for (j = 0; j < 4; j++)
    190          res4[j] = ~src4[j];
    191       break;
    192    case PIPE_LOGICOP_AND_REVERSE:
    193       for (j = 0; j < 4; j++)
    194          res4[j] = src4[j] & ~dst4[j];
    195       break;
    196    case PIPE_LOGICOP_INVERT:
    197       for (j = 0; j < 4; j++)
    198          res4[j] = ~dst4[j];
    199       break;
    200    case PIPE_LOGICOP_XOR:
    201       for (j = 0; j < 4; j++)
    202          res4[j] = dst4[j] ^ src4[j];
    203       break;
    204    case PIPE_LOGICOP_NAND:
    205       for (j = 0; j < 4; j++)
    206          res4[j] = ~(src4[j] & dst4[j]);
    207       break;
    208    case PIPE_LOGICOP_AND:
    209       for (j = 0; j < 4; j++)
    210          res4[j] = src4[j] & dst4[j];
    211       break;
    212    case PIPE_LOGICOP_EQUIV:
    213       for (j = 0; j < 4; j++)
    214          res4[j] = ~(src4[j] ^ dst4[j]);
    215       break;
    216    case PIPE_LOGICOP_NOOP:
    217       for (j = 0; j < 4; j++)
    218          res4[j] = dst4[j];
    219       break;
    220    case PIPE_LOGICOP_OR_INVERTED:
    221       for (j = 0; j < 4; j++)
    222          res4[j] = ~src4[j] | dst4[j];
    223       break;
    224    case PIPE_LOGICOP_COPY:
    225       for (j = 0; j < 4; j++)
    226          res4[j] = src4[j];
    227       break;
    228    case PIPE_LOGICOP_OR_REVERSE:
    229       for (j = 0; j < 4; j++)
    230          res4[j] = src4[j] | ~dst4[j];
    231       break;
    232    case PIPE_LOGICOP_OR:
    233       for (j = 0; j < 4; j++)
    234          res4[j] = src4[j] | dst4[j];
    235       break;
    236    case PIPE_LOGICOP_SET:
    237       for (j = 0; j < 4; j++)
    238          res4[j] = ~0;
    239       break;
    240    default:
    241       assert(0 && "invalid logicop mode");
    242    }
    243 
    244    for (j = 0; j < 4; j++) {
    245       quadColor[j][0] = ubyte_to_float(res[j][0]);
    246       quadColor[j][1] = ubyte_to_float(res[j][1]);
    247       quadColor[j][2] = ubyte_to_float(res[j][2]);
    248       quadColor[j][3] = ubyte_to_float(res[j][3]);
    249    }
    250 }
    251 
    252 
    253 
    254 /**
    255  * Do blending for a 2x2 quad for one color buffer.
    256  * \param quadColor  the incoming quad colors
    257  * \param dest  the destination/framebuffer quad colors
    258  * \param const_blend_color  the constant blend color
    259  * \param blend_index  which set of blending terms to use
    260  */
    261 static void
    262 blend_quad(struct quad_stage *qs,
    263            float (*quadColor)[4],
    264            float (*quadColor2)[4],
    265            float (*dest)[4],
    266            const float const_blend_color[4],
    267            unsigned blend_index)
    268 {
    269    static const float zero[4] = { 0, 0, 0, 0 };
    270    static const float one[4] = { 1, 1, 1, 1 };
    271    struct softpipe_context *softpipe = qs->softpipe;
    272    float source[4][TGSI_QUAD_SIZE] = { { 0 } };
    273    float blend_dest[4][TGSI_QUAD_SIZE];
    274 
    275    /*
    276     * Compute src/first term RGB
    277     */
    278    switch (softpipe->blend->rt[blend_index].rgb_src_factor) {
    279    case PIPE_BLENDFACTOR_ONE:
    280       VEC4_COPY(source[0], quadColor[0]); /* R */
    281       VEC4_COPY(source[1], quadColor[1]); /* G */
    282       VEC4_COPY(source[2], quadColor[2]); /* B */
    283       break;
    284    case PIPE_BLENDFACTOR_SRC_COLOR:
    285       VEC4_MUL(source[0], quadColor[0], quadColor[0]); /* R */
    286       VEC4_MUL(source[1], quadColor[1], quadColor[1]); /* G */
    287       VEC4_MUL(source[2], quadColor[2], quadColor[2]); /* B */
    288       break;
    289    case PIPE_BLENDFACTOR_SRC_ALPHA:
    290       {
    291          const float *alpha = quadColor[3];
    292          VEC4_MUL(source[0], quadColor[0], alpha); /* R */
    293          VEC4_MUL(source[1], quadColor[1], alpha); /* G */
    294          VEC4_MUL(source[2], quadColor[2], alpha); /* B */
    295       }
    296       break;
    297    case PIPE_BLENDFACTOR_DST_COLOR:
    298       VEC4_MUL(source[0], quadColor[0], dest[0]); /* R */
    299       VEC4_MUL(source[1], quadColor[1], dest[1]); /* G */
    300       VEC4_MUL(source[2], quadColor[2], dest[2]); /* B */
    301       break;
    302    case PIPE_BLENDFACTOR_DST_ALPHA:
    303       {
    304          const float *alpha = dest[3];
    305          VEC4_MUL(source[0], quadColor[0], alpha); /* R */
    306          VEC4_MUL(source[1], quadColor[1], alpha); /* G */
    307          VEC4_MUL(source[2], quadColor[2], alpha); /* B */
    308       }
    309       break;
    310    case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
    311       {
    312          const float *alpha = quadColor[3];
    313          float diff[4], temp[4];
    314          VEC4_SUB(diff, one, dest[3]);
    315          VEC4_MIN(temp, alpha, diff);
    316          VEC4_MUL(source[0], quadColor[0], temp); /* R */
    317          VEC4_MUL(source[1], quadColor[1], temp); /* G */
    318          VEC4_MUL(source[2], quadColor[2], temp); /* B */
    319       }
    320       break;
    321    case PIPE_BLENDFACTOR_CONST_COLOR:
    322       {
    323          float comp[4];
    324          VEC4_SCALAR(comp, const_blend_color[0]); /* R */
    325          VEC4_MUL(source[0], quadColor[0], comp); /* R */
    326          VEC4_SCALAR(comp, const_blend_color[1]); /* G */
    327          VEC4_MUL(source[1], quadColor[1], comp); /* G */
    328          VEC4_SCALAR(comp, const_blend_color[2]); /* B */
    329          VEC4_MUL(source[2], quadColor[2], comp); /* B */
    330       }
    331       break;
    332    case PIPE_BLENDFACTOR_CONST_ALPHA:
    333       {
    334          float alpha[4];
    335          VEC4_SCALAR(alpha, const_blend_color[3]);
    336          VEC4_MUL(source[0], quadColor[0], alpha); /* R */
    337          VEC4_MUL(source[1], quadColor[1], alpha); /* G */
    338          VEC4_MUL(source[2], quadColor[2], alpha); /* B */
    339       }
    340       break;
    341    case PIPE_BLENDFACTOR_SRC1_COLOR:
    342       VEC4_MUL(source[0], quadColor[0], quadColor2[0]); /* R */
    343       VEC4_MUL(source[1], quadColor[1], quadColor2[1]); /* G */
    344       VEC4_MUL(source[2], quadColor[2], quadColor2[2]); /* B */
    345       break;
    346    case PIPE_BLENDFACTOR_SRC1_ALPHA:
    347       {
    348          const float *alpha = quadColor2[3];
    349          VEC4_MUL(source[0], quadColor[0], alpha); /* R */
    350          VEC4_MUL(source[1], quadColor[1], alpha); /* G */
    351          VEC4_MUL(source[2], quadColor[2], alpha); /* B */
    352       }
    353       break;
    354    case PIPE_BLENDFACTOR_ZERO:
    355       VEC4_COPY(source[0], zero); /* R */
    356       VEC4_COPY(source[1], zero); /* G */
    357       VEC4_COPY(source[2], zero); /* B */
    358       break;
    359    case PIPE_BLENDFACTOR_INV_SRC_COLOR:
    360       {
    361          float inv_comp[4];
    362          VEC4_SUB(inv_comp, one, quadColor[0]); /* R */
    363          VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */
    364          VEC4_SUB(inv_comp, one, quadColor[1]); /* G */
    365          VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */
    366          VEC4_SUB(inv_comp, one, quadColor[2]); /* B */
    367          VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */
    368       }
    369       break;
    370    case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
    371       {
    372          float inv_alpha[4];
    373          VEC4_SUB(inv_alpha, one, quadColor[3]);
    374          VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
    375          VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
    376          VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
    377       }
    378       break;
    379    case PIPE_BLENDFACTOR_INV_DST_ALPHA:
    380       {
    381          float inv_alpha[4];
    382          VEC4_SUB(inv_alpha, one, dest[3]);
    383          VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
    384          VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
    385          VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
    386       }
    387       break;
    388    case PIPE_BLENDFACTOR_INV_DST_COLOR:
    389       {
    390          float inv_comp[4];
    391          VEC4_SUB(inv_comp, one, dest[0]); /* R */
    392          VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */
    393          VEC4_SUB(inv_comp, one, dest[1]); /* G */
    394          VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */
    395          VEC4_SUB(inv_comp, one, dest[2]); /* B */
    396          VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */
    397       }
    398       break;
    399    case PIPE_BLENDFACTOR_INV_CONST_COLOR:
    400       {
    401          float inv_comp[4];
    402          /* R */
    403          VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[0]);
    404          VEC4_MUL(source[0], quadColor[0], inv_comp);
    405          /* G */
    406          VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[1]);
    407          VEC4_MUL(source[1], quadColor[1], inv_comp);
    408          /* B */
    409          VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[2]);
    410          VEC4_MUL(source[2], quadColor[2], inv_comp);
    411       }
    412       break;
    413    case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
    414       {
    415          float inv_alpha[4];
    416          VEC4_SCALAR(inv_alpha, 1.0f - const_blend_color[3]);
    417          VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
    418          VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
    419          VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
    420       }
    421       break;
    422    case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
    423       {
    424          float inv_comp[4];
    425          VEC4_SUB(inv_comp, one, quadColor2[0]); /* R */
    426          VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */
    427          VEC4_SUB(inv_comp, one, quadColor2[1]); /* G */
    428          VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */
    429          VEC4_SUB(inv_comp, one, quadColor2[2]); /* B */
    430          VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */
    431       }
    432       break;
    433    case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
    434       {
    435          float inv_alpha[4];
    436          VEC4_SUB(inv_alpha, one, quadColor2[3]);
    437          VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
    438          VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
    439          VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
    440       }
    441       break;
    442    default:
    443       assert(0 && "invalid rgb src factor");
    444    }
    445 
    446    /*
    447     * Compute src/first term A
    448     */
    449    switch (softpipe->blend->rt[blend_index].alpha_src_factor) {
    450    case PIPE_BLENDFACTOR_ONE:
    451       VEC4_COPY(source[3], quadColor[3]); /* A */
    452       break;
    453    case PIPE_BLENDFACTOR_SRC_COLOR:
    454       /* fall-through */
    455    case PIPE_BLENDFACTOR_SRC_ALPHA:
    456       {
    457          const float *alpha = quadColor[3];
    458          VEC4_MUL(source[3], quadColor[3], alpha); /* A */
    459       }
    460       break;
    461    case PIPE_BLENDFACTOR_DST_COLOR:
    462       /* fall-through */
    463    case PIPE_BLENDFACTOR_DST_ALPHA:
    464       VEC4_MUL(source[3], quadColor[3], dest[3]); /* A */
    465       break;
    466    case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
    467       /* multiply alpha by 1.0 */
    468       VEC4_COPY(source[3], quadColor[3]); /* A */
    469       break;
    470    case PIPE_BLENDFACTOR_CONST_COLOR:
    471       /* fall-through */
    472    case PIPE_BLENDFACTOR_CONST_ALPHA:
    473       {
    474          float comp[4];
    475          VEC4_SCALAR(comp, const_blend_color[3]); /* A */
    476          VEC4_MUL(source[3], quadColor[3], comp); /* A */
    477       }
    478       break;
    479    case PIPE_BLENDFACTOR_ZERO:
    480       VEC4_COPY(source[3], zero); /* A */
    481       break;
    482    case PIPE_BLENDFACTOR_INV_SRC_COLOR:
    483       /* fall-through */
    484    case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
    485       {
    486          float inv_alpha[4];
    487          VEC4_SUB(inv_alpha, one, quadColor[3]);
    488          VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */
    489       }
    490       break;
    491    case PIPE_BLENDFACTOR_INV_DST_COLOR:
    492       /* fall-through */
    493    case PIPE_BLENDFACTOR_INV_DST_ALPHA:
    494       {
    495          float inv_alpha[4];
    496          VEC4_SUB(inv_alpha, one, dest[3]);
    497          VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */
    498       }
    499       break;
    500    case PIPE_BLENDFACTOR_INV_CONST_COLOR:
    501       /* fall-through */
    502    case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
    503       {
    504          float inv_comp[4];
    505          /* A */
    506          VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[3]);
    507          VEC4_MUL(source[3], quadColor[3], inv_comp);
    508       }
    509       break;
    510    case PIPE_BLENDFACTOR_SRC1_COLOR:
    511       /* fall-through */
    512    case PIPE_BLENDFACTOR_SRC1_ALPHA:
    513       {
    514          const float *alpha = quadColor2[3];
    515          VEC4_MUL(source[3], quadColor[3], alpha); /* A */
    516       }
    517       break;
    518    case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
    519       /* fall-through */
    520    case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
    521       {
    522          float inv_alpha[4];
    523          VEC4_SUB(inv_alpha, one, quadColor2[3]);
    524          VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */
    525       }
    526       break;
    527    default:
    528       assert(0 && "invalid alpha src factor");
    529    }
    530 
    531    /* Save the original dest for use in masking */
    532    VEC4_COPY(blend_dest[0], dest[0]);
    533    VEC4_COPY(blend_dest[1], dest[1]);
    534    VEC4_COPY(blend_dest[2], dest[2]);
    535    VEC4_COPY(blend_dest[3], dest[3]);
    536 
    537 
    538    /*
    539     * Compute blend_dest/second term RGB
    540     */
    541    switch (softpipe->blend->rt[blend_index].rgb_dst_factor) {
    542    case PIPE_BLENDFACTOR_ONE:
    543       /* blend_dest = blend_dest * 1   NO-OP, leave blend_dest as-is */
    544       break;
    545    case PIPE_BLENDFACTOR_SRC_COLOR:
    546       VEC4_MUL(blend_dest[0], blend_dest[0], quadColor[0]); /* R */
    547       VEC4_MUL(blend_dest[1], blend_dest[1], quadColor[1]); /* G */
    548       VEC4_MUL(blend_dest[2], blend_dest[2], quadColor[2]); /* B */
    549       break;
    550    case PIPE_BLENDFACTOR_SRC_ALPHA:
    551       VEC4_MUL(blend_dest[0], blend_dest[0], quadColor[3]); /* R * A */
    552       VEC4_MUL(blend_dest[1], blend_dest[1], quadColor[3]); /* G * A */
    553       VEC4_MUL(blend_dest[2], blend_dest[2], quadColor[3]); /* B * A */
    554       break;
    555    case PIPE_BLENDFACTOR_DST_ALPHA:
    556       VEC4_MUL(blend_dest[0], blend_dest[0], blend_dest[3]); /* R * A */
    557       VEC4_MUL(blend_dest[1], blend_dest[1], blend_dest[3]); /* G * A */
    558       VEC4_MUL(blend_dest[2], blend_dest[2], blend_dest[3]); /* B * A */
    559       break;
    560    case PIPE_BLENDFACTOR_DST_COLOR:
    561       VEC4_MUL(blend_dest[0], blend_dest[0], blend_dest[0]); /* R */
    562       VEC4_MUL(blend_dest[1], blend_dest[1], blend_dest[1]); /* G */
    563       VEC4_MUL(blend_dest[2], blend_dest[2], blend_dest[2]); /* B */
    564       break;
    565    case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
    566       {
    567          const float *alpha = quadColor[3];
    568          float diff[4], temp[4];
    569          VEC4_SUB(diff, one, blend_dest[3]);
    570          VEC4_MIN(temp, alpha, diff);
    571          VEC4_MUL(blend_dest[0], blend_dest[0], temp); /* R */
    572          VEC4_MUL(blend_dest[1], blend_dest[1], temp); /* G */
    573          VEC4_MUL(blend_dest[2], blend_dest[2], temp); /* B */
    574       }
    575       break;
    576    case PIPE_BLENDFACTOR_CONST_COLOR:
    577       {
    578          float comp[4];
    579          VEC4_SCALAR(comp, const_blend_color[0]); /* R */
    580          VEC4_MUL(blend_dest[0], blend_dest[0], comp); /* R */
    581          VEC4_SCALAR(comp, const_blend_color[1]); /* G */
    582          VEC4_MUL(blend_dest[1], blend_dest[1], comp); /* G */
    583          VEC4_SCALAR(comp, const_blend_color[2]); /* B */
    584          VEC4_MUL(blend_dest[2], blend_dest[2], comp); /* B */
    585       }
    586       break;
    587    case PIPE_BLENDFACTOR_CONST_ALPHA:
    588       {
    589          float comp[4];
    590          VEC4_SCALAR(comp, const_blend_color[3]); /* A */
    591          VEC4_MUL(blend_dest[0], blend_dest[0], comp); /* R */
    592          VEC4_MUL(blend_dest[1], blend_dest[1], comp); /* G */
    593          VEC4_MUL(blend_dest[2], blend_dest[2], comp); /* B */
    594       }
    595       break;
    596    case PIPE_BLENDFACTOR_ZERO:
    597       VEC4_COPY(blend_dest[0], zero); /* R */
    598       VEC4_COPY(blend_dest[1], zero); /* G */
    599       VEC4_COPY(blend_dest[2], zero); /* B */
    600       break;
    601    case PIPE_BLENDFACTOR_SRC1_COLOR:
    602       VEC4_MUL(blend_dest[0], blend_dest[0], quadColor2[0]); /* R */
    603       VEC4_MUL(blend_dest[1], blend_dest[1], quadColor2[1]); /* G */
    604       VEC4_MUL(blend_dest[2], blend_dest[2], quadColor2[2]); /* B */
    605       break;
    606    case PIPE_BLENDFACTOR_SRC1_ALPHA:
    607       VEC4_MUL(blend_dest[0], blend_dest[0], quadColor2[3]); /* R * A */
    608       VEC4_MUL(blend_dest[1], blend_dest[1], quadColor2[3]); /* G * A */
    609       VEC4_MUL(blend_dest[2], blend_dest[2], quadColor2[3]); /* B * A */
    610       break;
    611    case PIPE_BLENDFACTOR_INV_SRC_COLOR:
    612       {
    613          float inv_comp[4];
    614          VEC4_SUB(inv_comp, one, quadColor[0]); /* R */
    615          VEC4_MUL(blend_dest[0], inv_comp, blend_dest[0]); /* R */
    616          VEC4_SUB(inv_comp, one, quadColor[1]); /* G */
    617          VEC4_MUL(blend_dest[1], inv_comp, blend_dest[1]); /* G */
    618          VEC4_SUB(inv_comp, one, quadColor[2]); /* B */
    619          VEC4_MUL(blend_dest[2], inv_comp, blend_dest[2]); /* B */
    620       }
    621       break;
    622    case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
    623       {
    624          float one_minus_alpha[TGSI_QUAD_SIZE];
    625          VEC4_SUB(one_minus_alpha, one, quadColor[3]);
    626          VEC4_MUL(blend_dest[0], blend_dest[0], one_minus_alpha); /* R */
    627          VEC4_MUL(blend_dest[1], blend_dest[1], one_minus_alpha); /* G */
    628          VEC4_MUL(blend_dest[2], blend_dest[2], one_minus_alpha); /* B */
    629       }
    630       break;
    631    case PIPE_BLENDFACTOR_INV_DST_ALPHA:
    632       {
    633          float inv_comp[4];
    634          VEC4_SUB(inv_comp, one, blend_dest[3]); /* A */
    635          VEC4_MUL(blend_dest[0], inv_comp, blend_dest[0]); /* R */
    636          VEC4_MUL(blend_dest[1], inv_comp, blend_dest[1]); /* G */
    637          VEC4_MUL(blend_dest[2], inv_comp, blend_dest[2]); /* B */
    638       }
    639       break;
    640    case PIPE_BLENDFACTOR_INV_DST_COLOR:
    641       {
    642          float inv_comp[4];
    643          VEC4_SUB(inv_comp, one, blend_dest[0]); /* R */
    644          VEC4_MUL(blend_dest[0], blend_dest[0], inv_comp); /* R */
    645          VEC4_SUB(inv_comp, one, blend_dest[1]); /* G */
    646          VEC4_MUL(blend_dest[1], blend_dest[1], inv_comp); /* G */
    647          VEC4_SUB(inv_comp, one, blend_dest[2]); /* B */
    648          VEC4_MUL(blend_dest[2], blend_dest[2], inv_comp); /* B */
    649       }
    650       break;
    651    case PIPE_BLENDFACTOR_INV_CONST_COLOR:
    652       {
    653          float inv_comp[4];
    654          /* R */
    655          VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[0]);
    656          VEC4_MUL(blend_dest[0], blend_dest[0], inv_comp);
    657          /* G */
    658          VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[1]);
    659          VEC4_MUL(blend_dest[1], blend_dest[1], inv_comp);
    660          /* B */
    661          VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[2]);
    662          VEC4_MUL(blend_dest[2], blend_dest[2], inv_comp);
    663       }
    664       break;
    665    case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
    666       {
    667          float inv_comp[4];
    668          VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[3]);
    669          VEC4_MUL(blend_dest[0], blend_dest[0], inv_comp);
    670          VEC4_MUL(blend_dest[1], blend_dest[1], inv_comp);
    671          VEC4_MUL(blend_dest[2], blend_dest[2], inv_comp);
    672       }
    673       break;
    674    case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
    675       {
    676          float inv_comp[4];
    677          VEC4_SUB(inv_comp, one, quadColor2[0]); /* R */
    678          VEC4_MUL(blend_dest[0], inv_comp, blend_dest[0]); /* R */
    679          VEC4_SUB(inv_comp, one, quadColor2[1]); /* G */
    680          VEC4_MUL(blend_dest[1], inv_comp, blend_dest[1]); /* G */
    681          VEC4_SUB(inv_comp, one, quadColor2[2]); /* B */
    682          VEC4_MUL(blend_dest[2], inv_comp, blend_dest[2]); /* B */
    683       }
    684       break;
    685    case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
    686       {
    687          float one_minus_alpha[TGSI_QUAD_SIZE];
    688          VEC4_SUB(one_minus_alpha, one, quadColor2[3]);
    689          VEC4_MUL(blend_dest[0], blend_dest[0], one_minus_alpha); /* R */
    690          VEC4_MUL(blend_dest[1], blend_dest[1], one_minus_alpha); /* G */
    691          VEC4_MUL(blend_dest[2], blend_dest[2], one_minus_alpha); /* B */
    692       }
    693       break;
    694    default:
    695       assert(0 && "invalid rgb dst factor");
    696    }
    697 
    698    /*
    699     * Compute blend_dest/second term A
    700     */
    701    switch (softpipe->blend->rt[blend_index].alpha_dst_factor) {
    702    case PIPE_BLENDFACTOR_ONE:
    703       /* blend_dest = blend_dest * 1   NO-OP, leave blend_dest as-is */
    704       break;
    705    case PIPE_BLENDFACTOR_SRC_COLOR:
    706       /* fall-through */
    707    case PIPE_BLENDFACTOR_SRC_ALPHA:
    708       VEC4_MUL(blend_dest[3], blend_dest[3], quadColor[3]); /* A * A */
    709       break;
    710    case PIPE_BLENDFACTOR_DST_COLOR:
    711       /* fall-through */
    712    case PIPE_BLENDFACTOR_DST_ALPHA:
    713       VEC4_MUL(blend_dest[3], blend_dest[3], blend_dest[3]); /* A */
    714       break;
    715    case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
    716       /* blend_dest = blend_dest * 1   NO-OP, leave blend_dest as-is */
    717       break;
    718    case PIPE_BLENDFACTOR_CONST_COLOR:
    719       /* fall-through */
    720    case PIPE_BLENDFACTOR_CONST_ALPHA:
    721       {
    722          float comp[4];
    723          VEC4_SCALAR(comp, const_blend_color[3]); /* A */
    724          VEC4_MUL(blend_dest[3], blend_dest[3], comp); /* A */
    725       }
    726       break;
    727    case PIPE_BLENDFACTOR_ZERO:
    728       VEC4_COPY(blend_dest[3], zero); /* A */
    729       break;
    730    case PIPE_BLENDFACTOR_INV_SRC_COLOR:
    731       /* fall-through */
    732    case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
    733       {
    734          float one_minus_alpha[TGSI_QUAD_SIZE];
    735          VEC4_SUB(one_minus_alpha, one, quadColor[3]);
    736          VEC4_MUL(blend_dest[3], blend_dest[3], one_minus_alpha); /* A */
    737       }
    738       break;
    739    case PIPE_BLENDFACTOR_INV_DST_COLOR:
    740       /* fall-through */
    741    case PIPE_BLENDFACTOR_INV_DST_ALPHA:
    742       {
    743          float inv_comp[4];
    744          VEC4_SUB(inv_comp, one, blend_dest[3]); /* A */
    745          VEC4_MUL(blend_dest[3], inv_comp, blend_dest[3]); /* A */
    746       }
    747       break;
    748    case PIPE_BLENDFACTOR_INV_CONST_COLOR:
    749       /* fall-through */
    750    case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
    751       {
    752          float inv_comp[4];
    753          VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[3]);
    754          VEC4_MUL(blend_dest[3], blend_dest[3], inv_comp);
    755       }
    756       break;
    757    case PIPE_BLENDFACTOR_SRC1_COLOR:
    758       /* fall-through */
    759    case PIPE_BLENDFACTOR_SRC1_ALPHA:
    760       VEC4_MUL(blend_dest[3], blend_dest[3], quadColor2[3]); /* A * A */
    761       break;
    762    case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
    763       /* fall-through */
    764    case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
    765       {
    766          float one_minus_alpha[TGSI_QUAD_SIZE];
    767          VEC4_SUB(one_minus_alpha, one, quadColor2[3]);
    768          VEC4_MUL(blend_dest[3], blend_dest[3], one_minus_alpha); /* A */
    769       }
    770       break;
    771    default:
    772       assert(0 && "invalid alpha dst factor");
    773    }
    774 
    775    /*
    776     * Combine RGB terms
    777     */
    778    switch (softpipe->blend->rt[blend_index].rgb_func) {
    779    case PIPE_BLEND_ADD:
    780       VEC4_ADD(quadColor[0], source[0], blend_dest[0]); /* R */
    781       VEC4_ADD(quadColor[1], source[1], blend_dest[1]); /* G */
    782       VEC4_ADD(quadColor[2], source[2], blend_dest[2]); /* B */
    783       break;
    784    case PIPE_BLEND_SUBTRACT:
    785       VEC4_SUB(quadColor[0], source[0], blend_dest[0]); /* R */
    786       VEC4_SUB(quadColor[1], source[1], blend_dest[1]); /* G */
    787       VEC4_SUB(quadColor[2], source[2], blend_dest[2]); /* B */
    788       break;
    789    case PIPE_BLEND_REVERSE_SUBTRACT:
    790       VEC4_SUB(quadColor[0], blend_dest[0], source[0]); /* R */
    791       VEC4_SUB(quadColor[1], blend_dest[1], source[1]); /* G */
    792       VEC4_SUB(quadColor[2], blend_dest[2], source[2]); /* B */
    793       break;
    794    case PIPE_BLEND_MIN:
    795       VEC4_MIN(quadColor[0], source[0], blend_dest[0]); /* R */
    796       VEC4_MIN(quadColor[1], source[1], blend_dest[1]); /* G */
    797       VEC4_MIN(quadColor[2], source[2], blend_dest[2]); /* B */
    798       break;
    799    case PIPE_BLEND_MAX:
    800       VEC4_MAX(quadColor[0], source[0], blend_dest[0]); /* R */
    801       VEC4_MAX(quadColor[1], source[1], blend_dest[1]); /* G */
    802       VEC4_MAX(quadColor[2], source[2], blend_dest[2]); /* B */
    803       break;
    804    default:
    805       assert(0 && "invalid rgb blend func");
    806    }
    807 
    808    /*
    809     * Combine A terms
    810     */
    811    switch (softpipe->blend->rt[blend_index].alpha_func) {
    812    case PIPE_BLEND_ADD:
    813       VEC4_ADD(quadColor[3], source[3], blend_dest[3]); /* A */
    814       break;
    815    case PIPE_BLEND_SUBTRACT:
    816       VEC4_SUB(quadColor[3], source[3], blend_dest[3]); /* A */
    817       break;
    818    case PIPE_BLEND_REVERSE_SUBTRACT:
    819       VEC4_SUB(quadColor[3], blend_dest[3], source[3]); /* A */
    820       break;
    821    case PIPE_BLEND_MIN:
    822       VEC4_MIN(quadColor[3], source[3], blend_dest[3]); /* A */
    823       break;
    824    case PIPE_BLEND_MAX:
    825       VEC4_MAX(quadColor[3], source[3], blend_dest[3]); /* A */
    826       break;
    827    default:
    828       assert(0 && "invalid alpha blend func");
    829    }
    830 }
    831 
    832 static void
    833 colormask_quad(unsigned colormask,
    834                float (*quadColor)[4],
    835                float (*dest)[4])
    836 {
    837    /* R */
    838    if (!(colormask & PIPE_MASK_R))
    839       COPY_4V(quadColor[0], dest[0]);
    840 
    841    /* G */
    842    if (!(colormask & PIPE_MASK_G))
    843       COPY_4V(quadColor[1], dest[1]);
    844 
    845    /* B */
    846    if (!(colormask & PIPE_MASK_B))
    847       COPY_4V(quadColor[2], dest[2]);
    848 
    849    /* A */
    850    if (!(colormask & PIPE_MASK_A))
    851       COPY_4V(quadColor[3], dest[3]);
    852 }
    853 
    854 
    855 /**
    856  * Clamp all colors in a quad to [0, 1]
    857  */
    858 static void
    859 clamp_colors(float (*quadColor)[4])
    860 {
    861    unsigned i, j;
    862 
    863    for (i = 0; i < 4; i++) {
    864       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    865          quadColor[i][j] = CLAMP(quadColor[i][j], 0.0F, 1.0F);
    866       }
    867    }
    868 }
    869 
    870 
    871 /**
    872  * If we're drawing to a luminance, luminance/alpha or intensity surface
    873  * we have to adjust (rebase) the fragment/quad colors before writing them
    874  * to the tile cache.  The tile cache always stores RGBA colors but if
    875  * we're caching a L/A surface (for example) we need to be sure that R=G=B
    876  * so that subsequent reads from the surface cache appear to return L/A
    877  * values.
    878  * The piglit fbo-blending-formats test will exercise this.
    879  */
    880 static void
    881 rebase_colors(enum format base_format, float (*quadColor)[4])
    882 {
    883    unsigned i;
    884 
    885    switch (base_format) {
    886    case RGB:
    887       for (i = 0; i < 4; i++) {
    888          /* A = 1 */
    889          quadColor[3][i] = 1.0F;
    890       }
    891       break;
    892    case LUMINANCE:
    893       for (i = 0; i < 4; i++) {
    894          /* B = G = R */
    895          quadColor[2][i] = quadColor[1][i] = quadColor[0][i];
    896          /* A = 1 */
    897          quadColor[3][i] = 1.0F;
    898       }
    899       break;
    900    case LUMINANCE_ALPHA:
    901       for (i = 0; i < 4; i++) {
    902          /* B = G = R */
    903          quadColor[2][i] = quadColor[1][i] = quadColor[0][i];
    904       }
    905       break;
    906    case INTENSITY:
    907       for (i = 0; i < 4; i++) {
    908          /* A = B = G = R */
    909          quadColor[3][i] = quadColor[2][i] = quadColor[1][i] = quadColor[0][i];
    910       }
    911       break;
    912    default:
    913       ; /* nothing */
    914    }
    915 }
    916 
    917 static void
    918 blend_fallback(struct quad_stage *qs,
    919                struct quad_header *quads[],
    920                unsigned nr)
    921 {
    922    const struct blend_quad_stage *bqs = blend_quad_stage(qs);
    923    struct softpipe_context *softpipe = qs->softpipe;
    924    const struct pipe_blend_state *blend = softpipe->blend;
    925    unsigned cbuf;
    926    boolean write_all =
    927       softpipe->fs_variant->info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS];
    928 
    929    for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) {
    930       if (softpipe->framebuffer.cbufs[cbuf]) {
    931          /* which blend/mask state index to use: */
    932          const uint blend_buf = blend->independent_blend_enable ? cbuf : 0;
    933          float dest[4][TGSI_QUAD_SIZE];
    934          struct softpipe_cached_tile *tile
    935             = sp_get_cached_tile(softpipe->cbuf_cache[cbuf],
    936                                  quads[0]->input.x0,
    937                                  quads[0]->input.y0, quads[0]->input.layer);
    938          const boolean clamp = bqs->clamp[cbuf];
    939          const float *blend_color;
    940          const boolean dual_source_blend = util_blend_state_is_dual(blend, cbuf);
    941          uint q, i, j;
    942 
    943          if (clamp)
    944             blend_color = softpipe->blend_color_clamped.color;
    945          else
    946             blend_color = softpipe->blend_color.color;
    947 
    948          for (q = 0; q < nr; q++) {
    949             struct quad_header *quad = quads[q];
    950             float (*quadColor)[4];
    951             float (*quadColor2)[4] = NULL;
    952             float temp_quad_color[TGSI_QUAD_SIZE][4];
    953             const int itx = (quad->input.x0 & (TILE_SIZE-1));
    954             const int ity = (quad->input.y0 & (TILE_SIZE-1));
    955 
    956             if (write_all) {
    957                for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    958                   for (i = 0; i < 4; i++) {
    959                      temp_quad_color[i][j] = quad->output.color[0][i][j];
    960                   }
    961                }
    962                quadColor = temp_quad_color;
    963             } else {
    964                quadColor = quad->output.color[cbuf];
    965                if (dual_source_blend)
    966                   quadColor2 = quad->output.color[cbuf + 1];
    967             }
    968 
    969             /* If fixed-point dest color buffer, need to clamp the incoming
    970              * fragment colors now.
    971              */
    972             if (clamp || softpipe->rasterizer->clamp_fragment_color) {
    973                clamp_colors(quadColor);
    974             }
    975 
    976             /* get/swizzle dest colors
    977              */
    978             for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    979                int x = itx + (j & 1);
    980                int y = ity + (j >> 1);
    981                for (i = 0; i < 4; i++) {
    982                   dest[i][j] = tile->data.color[y][x][i];
    983                }
    984             }
    985 
    986 
    987             if (blend->logicop_enable) {
    988                if (bqs->format_type[cbuf] != UTIL_FORMAT_TYPE_FLOAT) {
    989                   logicop_quad( qs, quadColor, dest );
    990                }
    991             }
    992             else if (blend->rt[blend_buf].blend_enable) {
    993                blend_quad(qs, quadColor, quadColor2, dest, blend_color, blend_buf);
    994 
    995                /* If fixed-point dest color buffer, need to clamp the outgoing
    996                 * fragment colors now.
    997                 */
    998                if (clamp) {
    999                   clamp_colors(quadColor);
   1000                }
   1001             }
   1002 
   1003             rebase_colors(bqs->base_format[cbuf], quadColor);
   1004 
   1005             if (blend->rt[blend_buf].colormask != 0xf)
   1006                colormask_quad( blend->rt[cbuf].colormask, quadColor, dest);
   1007 
   1008             /* Output color values
   1009              */
   1010             for (j = 0; j < TGSI_QUAD_SIZE; j++) {
   1011                if (quad->inout.mask & (1 << j)) {
   1012                   int x = itx + (j & 1);
   1013                   int y = ity + (j >> 1);
   1014                   for (i = 0; i < 4; i++) { /* loop over color chans */
   1015                      tile->data.color[y][x][i] = quadColor[i][j];
   1016                   }
   1017                }
   1018             }
   1019          }
   1020       }
   1021    }
   1022 }
   1023 
   1024 
   1025 static void
   1026 blend_single_add_src_alpha_inv_src_alpha(struct quad_stage *qs,
   1027                                          struct quad_header *quads[],
   1028                                          unsigned nr)
   1029 {
   1030    const struct blend_quad_stage *bqs = blend_quad_stage(qs);
   1031    static const float one[4] = { 1, 1, 1, 1 };
   1032    float one_minus_alpha[TGSI_QUAD_SIZE];
   1033    float dest[4][TGSI_QUAD_SIZE];
   1034    float source[4][TGSI_QUAD_SIZE];
   1035    uint i, j, q;
   1036 
   1037    struct softpipe_cached_tile *tile
   1038       = sp_get_cached_tile(qs->softpipe->cbuf_cache[0],
   1039                            quads[0]->input.x0,
   1040                            quads[0]->input.y0, quads[0]->input.layer);
   1041 
   1042    for (q = 0; q < nr; q++) {
   1043       struct quad_header *quad = quads[q];
   1044       float (*quadColor)[4] = quad->output.color[0];
   1045       const float *alpha = quadColor[3];
   1046       const int itx = (quad->input.x0 & (TILE_SIZE-1));
   1047       const int ity = (quad->input.y0 & (TILE_SIZE-1));
   1048 
   1049       /* get/swizzle dest colors */
   1050       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
   1051          int x = itx + (j & 1);
   1052          int y = ity + (j >> 1);
   1053          for (i = 0; i < 4; i++) {
   1054             dest[i][j] = tile->data.color[y][x][i];
   1055          }
   1056       }
   1057 
   1058       /* If fixed-point dest color buffer, need to clamp the incoming
   1059        * fragment colors now.
   1060        */
   1061       if (bqs->clamp[0] || qs->softpipe->rasterizer->clamp_fragment_color) {
   1062          clamp_colors(quadColor);
   1063       }
   1064 
   1065       VEC4_MUL(source[0], quadColor[0], alpha); /* R */
   1066       VEC4_MUL(source[1], quadColor[1], alpha); /* G */
   1067       VEC4_MUL(source[2], quadColor[2], alpha); /* B */
   1068       VEC4_MUL(source[3], quadColor[3], alpha); /* A */
   1069 
   1070       VEC4_SUB(one_minus_alpha, one, alpha);
   1071       VEC4_MUL(dest[0], dest[0], one_minus_alpha); /* R */
   1072       VEC4_MUL(dest[1], dest[1], one_minus_alpha); /* G */
   1073       VEC4_MUL(dest[2], dest[2], one_minus_alpha); /* B */
   1074       VEC4_MUL(dest[3], dest[3], one_minus_alpha); /* A */
   1075 
   1076       VEC4_ADD(quadColor[0], source[0], dest[0]); /* R */
   1077       VEC4_ADD(quadColor[1], source[1], dest[1]); /* G */
   1078       VEC4_ADD(quadColor[2], source[2], dest[2]); /* B */
   1079       VEC4_ADD(quadColor[3], source[3], dest[3]); /* A */
   1080 
   1081       /* If fixed-point dest color buffer, need to clamp the outgoing
   1082        * fragment colors now.
   1083        */
   1084       if (bqs->clamp[0]) {
   1085          clamp_colors(quadColor);
   1086       }
   1087 
   1088       rebase_colors(bqs->base_format[0], quadColor);
   1089 
   1090       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
   1091          if (quad->inout.mask & (1 << j)) {
   1092             int x = itx + (j & 1);
   1093             int y = ity + (j >> 1);
   1094             for (i = 0; i < 4; i++) { /* loop over color chans */
   1095                tile->data.color[y][x][i] = quadColor[i][j];
   1096             }
   1097          }
   1098       }
   1099    }
   1100 }
   1101 
   1102 static void
   1103 blend_single_add_one_one(struct quad_stage *qs,
   1104                          struct quad_header *quads[],
   1105                          unsigned nr)
   1106 {
   1107    const struct blend_quad_stage *bqs = blend_quad_stage(qs);
   1108    float dest[4][TGSI_QUAD_SIZE];
   1109    uint i, j, q;
   1110 
   1111    struct softpipe_cached_tile *tile
   1112       = sp_get_cached_tile(qs->softpipe->cbuf_cache[0],
   1113                            quads[0]->input.x0,
   1114                            quads[0]->input.y0, quads[0]->input.layer);
   1115 
   1116    for (q = 0; q < nr; q++) {
   1117       struct quad_header *quad = quads[q];
   1118       float (*quadColor)[4] = quad->output.color[0];
   1119       const int itx = (quad->input.x0 & (TILE_SIZE-1));
   1120       const int ity = (quad->input.y0 & (TILE_SIZE-1));
   1121 
   1122       /* get/swizzle dest colors */
   1123       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
   1124          int x = itx + (j & 1);
   1125          int y = ity + (j >> 1);
   1126          for (i = 0; i < 4; i++) {
   1127             dest[i][j] = tile->data.color[y][x][i];
   1128          }
   1129       }
   1130 
   1131       /* If fixed-point dest color buffer, need to clamp the incoming
   1132        * fragment colors now.
   1133        */
   1134       if (bqs->clamp[0] || qs->softpipe->rasterizer->clamp_fragment_color) {
   1135          clamp_colors(quadColor);
   1136       }
   1137 
   1138       VEC4_ADD(quadColor[0], quadColor[0], dest[0]); /* R */
   1139       VEC4_ADD(quadColor[1], quadColor[1], dest[1]); /* G */
   1140       VEC4_ADD(quadColor[2], quadColor[2], dest[2]); /* B */
   1141       VEC4_ADD(quadColor[3], quadColor[3], dest[3]); /* A */
   1142 
   1143       /* If fixed-point dest color buffer, need to clamp the outgoing
   1144        * fragment colors now.
   1145        */
   1146       if (bqs->clamp[0]) {
   1147          clamp_colors(quadColor);
   1148       }
   1149 
   1150       rebase_colors(bqs->base_format[0], quadColor);
   1151 
   1152       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
   1153          if (quad->inout.mask & (1 << j)) {
   1154             int x = itx + (j & 1);
   1155             int y = ity + (j >> 1);
   1156             for (i = 0; i < 4; i++) { /* loop over color chans */
   1157                tile->data.color[y][x][i] = quadColor[i][j];
   1158             }
   1159          }
   1160       }
   1161    }
   1162 }
   1163 
   1164 
   1165 /**
   1166  * Just copy the quad color to the framebuffer tile (respecting the writemask),
   1167  * for one color buffer.
   1168  * Clamping will be done, if needed (depending on the color buffer's
   1169  * datatype) when we write/pack the colors later.
   1170  */
   1171 static void
   1172 single_output_color(struct quad_stage *qs,
   1173                     struct quad_header *quads[],
   1174                     unsigned nr)
   1175 {
   1176    const struct blend_quad_stage *bqs = blend_quad_stage(qs);
   1177    uint i, j, q;
   1178 
   1179    struct softpipe_cached_tile *tile
   1180       = sp_get_cached_tile(qs->softpipe->cbuf_cache[0],
   1181                            quads[0]->input.x0,
   1182                            quads[0]->input.y0, quads[0]->input.layer);
   1183 
   1184    for (q = 0; q < nr; q++) {
   1185       struct quad_header *quad = quads[q];
   1186       float (*quadColor)[4] = quad->output.color[0];
   1187       const int itx = (quad->input.x0 & (TILE_SIZE-1));
   1188       const int ity = (quad->input.y0 & (TILE_SIZE-1));
   1189 
   1190       if (qs->softpipe->rasterizer->clamp_fragment_color)
   1191          clamp_colors(quadColor);
   1192 
   1193       rebase_colors(bqs->base_format[0], quadColor);
   1194 
   1195       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
   1196          if (quad->inout.mask & (1 << j)) {
   1197             int x = itx + (j & 1);
   1198             int y = ity + (j >> 1);
   1199             for (i = 0; i < 4; i++) { /* loop over color chans */
   1200                tile->data.color[y][x][i] = quadColor[i][j];
   1201             }
   1202          }
   1203       }
   1204    }
   1205 }
   1206 
   1207 static void
   1208 blend_noop(struct quad_stage *qs,
   1209            struct quad_header *quads[],
   1210            unsigned nr)
   1211 {
   1212 }
   1213 
   1214 
   1215 static void
   1216 choose_blend_quad(struct quad_stage *qs,
   1217                   struct quad_header *quads[],
   1218                   unsigned nr)
   1219 {
   1220    struct blend_quad_stage *bqs = blend_quad_stage(qs);
   1221    struct softpipe_context *softpipe = qs->softpipe;
   1222    const struct pipe_blend_state *blend = softpipe->blend;
   1223    unsigned i;
   1224 
   1225    qs->run = blend_fallback;
   1226 
   1227    if (softpipe->framebuffer.nr_cbufs == 0) {
   1228       qs->run = blend_noop;
   1229    }
   1230    else if (!softpipe->blend->logicop_enable &&
   1231             softpipe->blend->rt[0].colormask == 0xf &&
   1232             softpipe->framebuffer.nr_cbufs == 1)
   1233    {
   1234       if (softpipe->framebuffer.cbufs[0] == NULL) {
   1235          qs->run = blend_noop;
   1236       }
   1237       else if (!blend->rt[0].blend_enable) {
   1238          qs->run = single_output_color;
   1239       }
   1240       else if (blend->rt[0].rgb_src_factor == blend->rt[0].alpha_src_factor &&
   1241                blend->rt[0].rgb_dst_factor == blend->rt[0].alpha_dst_factor &&
   1242                blend->rt[0].rgb_func == blend->rt[0].alpha_func)
   1243       {
   1244          if (blend->rt[0].alpha_func == PIPE_BLEND_ADD) {
   1245             if (blend->rt[0].rgb_src_factor == PIPE_BLENDFACTOR_ONE &&
   1246                 blend->rt[0].rgb_dst_factor == PIPE_BLENDFACTOR_ONE) {
   1247                qs->run = blend_single_add_one_one;
   1248             }
   1249             else if (blend->rt[0].rgb_src_factor == PIPE_BLENDFACTOR_SRC_ALPHA &&
   1250                 blend->rt[0].rgb_dst_factor == PIPE_BLENDFACTOR_INV_SRC_ALPHA)
   1251                qs->run = blend_single_add_src_alpha_inv_src_alpha;
   1252 
   1253          }
   1254       }
   1255    }
   1256 
   1257    /* For each color buffer, determine if the buffer has destination alpha and
   1258     * whether color clamping is needed.
   1259     */
   1260    for (i = 0; i < softpipe->framebuffer.nr_cbufs; i++) {
   1261       if (softpipe->framebuffer.cbufs[i]) {
   1262          const enum pipe_format format = softpipe->framebuffer.cbufs[i]->format;
   1263          const struct util_format_description *desc =
   1264             util_format_description(format);
   1265          /* assuming all or no color channels are normalized: */
   1266          bqs->clamp[i] = desc->channel[0].normalized;
   1267          bqs->format_type[i] = desc->channel[0].type;
   1268 
   1269          if (util_format_is_intensity(format))
   1270             bqs->base_format[i] = INTENSITY;
   1271          else if (util_format_is_luminance(format))
   1272             bqs->base_format[i] = LUMINANCE;
   1273          else if (util_format_is_luminance_alpha(format))
   1274             bqs->base_format[i] = LUMINANCE_ALPHA;
   1275          else if (!util_format_has_alpha(format))
   1276             bqs->base_format[i] = RGB;
   1277          else
   1278             bqs->base_format[i] = RGBA;
   1279       }
   1280    }
   1281 
   1282    qs->run(qs, quads, nr);
   1283 }
   1284 
   1285 
   1286 static void blend_begin(struct quad_stage *qs)
   1287 {
   1288    qs->run = choose_blend_quad;
   1289 }
   1290 
   1291 
   1292 static void blend_destroy(struct quad_stage *qs)
   1293 {
   1294    FREE( qs );
   1295 }
   1296 
   1297 
   1298 struct quad_stage *sp_quad_blend_stage( struct softpipe_context *softpipe )
   1299 {
   1300    struct blend_quad_stage *stage = CALLOC_STRUCT(blend_quad_stage);
   1301 
   1302    if (!stage)
   1303       return NULL;
   1304 
   1305    stage->base.softpipe = softpipe;
   1306    stage->base.begin = blend_begin;
   1307    stage->base.run = choose_blend_quad;
   1308    stage->base.destroy = blend_destroy;
   1309 
   1310    return &stage->base;
   1311 }
   1312