1 /************************************************************************** 2 * 3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28 /** 29 * quad blending 30 * \author Brian Paul 31 */ 32 33 #include "pipe/p_defines.h" 34 #include "util/u_math.h" 35 #include "util/u_memory.h" 36 #include "util/u_format.h" 37 #include "util/u_dual_blend.h" 38 #include "sp_context.h" 39 #include "sp_state.h" 40 #include "sp_quad.h" 41 #include "sp_tile_cache.h" 42 #include "sp_quad_pipe.h" 43 44 45 enum format 46 { 47 RGBA, 48 RGB, 49 LUMINANCE, 50 LUMINANCE_ALPHA, 51 INTENSITY 52 }; 53 54 55 /** Subclass of quad_stage */ 56 struct blend_quad_stage 57 { 58 struct quad_stage base; 59 boolean clamp[PIPE_MAX_COLOR_BUFS]; /**< clamp colors to [0,1]? */ 60 enum format base_format[PIPE_MAX_COLOR_BUFS]; 61 enum util_format_type format_type[PIPE_MAX_COLOR_BUFS]; 62 }; 63 64 65 /** cast wrapper */ 66 static INLINE struct blend_quad_stage * 67 blend_quad_stage(struct quad_stage *stage) 68 { 69 return (struct blend_quad_stage *) stage; 70 } 71 72 73 #define VEC4_COPY(DST, SRC) \ 74 do { \ 75 DST[0] = SRC[0]; \ 76 DST[1] = SRC[1]; \ 77 DST[2] = SRC[2]; \ 78 DST[3] = SRC[3]; \ 79 } while(0) 80 81 #define VEC4_SCALAR(DST, SRC) \ 82 do { \ 83 DST[0] = SRC; \ 84 DST[1] = SRC; \ 85 DST[2] = SRC; \ 86 DST[3] = SRC; \ 87 } while(0) 88 89 #define VEC4_ADD(R, A, B) \ 90 do { \ 91 R[0] = A[0] + B[0]; \ 92 R[1] = A[1] + B[1]; \ 93 R[2] = A[2] + B[2]; \ 94 R[3] = A[3] + B[3]; \ 95 } while (0) 96 97 #define VEC4_SUB(R, A, B) \ 98 do { \ 99 R[0] = A[0] - B[0]; \ 100 R[1] = A[1] - B[1]; \ 101 R[2] = A[2] - B[2]; \ 102 R[3] = A[3] - B[3]; \ 103 } while (0) 104 105 /** Add and limit result to ceiling of 1.0 */ 106 #define VEC4_ADD_SAT(R, A, B) \ 107 do { \ 108 R[0] = A[0] + B[0]; if (R[0] > 1.0f) R[0] = 1.0f; \ 109 R[1] = A[1] + B[1]; if (R[1] > 1.0f) R[1] = 1.0f; \ 110 R[2] = A[2] + B[2]; if (R[2] > 1.0f) R[2] = 1.0f; \ 111 R[3] = A[3] + B[3]; if (R[3] > 1.0f) R[3] = 1.0f; \ 112 } while (0) 113 114 /** Subtract and limit result to floor of 0.0 */ 115 #define VEC4_SUB_SAT(R, A, B) \ 116 do { \ 117 R[0] = A[0] - B[0]; if (R[0] < 0.0f) R[0] = 0.0f; \ 118 R[1] = A[1] - B[1]; if (R[1] < 0.0f) R[1] = 0.0f; \ 119 R[2] = A[2] - B[2]; if (R[2] < 0.0f) R[2] = 0.0f; \ 120 R[3] = A[3] - B[3]; if (R[3] < 0.0f) R[3] = 0.0f; \ 121 } while (0) 122 123 #define VEC4_MUL(R, A, B) \ 124 do { \ 125 R[0] = A[0] * B[0]; \ 126 R[1] = A[1] * B[1]; \ 127 R[2] = A[2] * B[2]; \ 128 R[3] = A[3] * B[3]; \ 129 } while (0) 130 131 #define VEC4_MIN(R, A, B) \ 132 do { \ 133 R[0] = (A[0] < B[0]) ? A[0] : B[0]; \ 134 R[1] = (A[1] < B[1]) ? A[1] : B[1]; \ 135 R[2] = (A[2] < B[2]) ? A[2] : B[2]; \ 136 R[3] = (A[3] < B[3]) ? A[3] : B[3]; \ 137 } while (0) 138 139 #define VEC4_MAX(R, A, B) \ 140 do { \ 141 R[0] = (A[0] > B[0]) ? A[0] : B[0]; \ 142 R[1] = (A[1] > B[1]) ? A[1] : B[1]; \ 143 R[2] = (A[2] > B[2]) ? A[2] : B[2]; \ 144 R[3] = (A[3] > B[3]) ? A[3] : B[3]; \ 145 } while (0) 146 147 148 149 static void 150 logicop_quad(struct quad_stage *qs, 151 float (*quadColor)[4], 152 float (*dest)[4]) 153 { 154 struct softpipe_context *softpipe = qs->softpipe; 155 ubyte src[4][4], dst[4][4], res[4][4]; 156 uint *src4 = (uint *) src; 157 uint *dst4 = (uint *) dst; 158 uint *res4 = (uint *) res; 159 uint j; 160 161 162 /* convert to ubyte */ 163 for (j = 0; j < 4; j++) { /* loop over R,G,B,A channels */ 164 dst[j][0] = float_to_ubyte(dest[j][0]); /* P0 */ 165 dst[j][1] = float_to_ubyte(dest[j][1]); /* P1 */ 166 dst[j][2] = float_to_ubyte(dest[j][2]); /* P2 */ 167 dst[j][3] = float_to_ubyte(dest[j][3]); /* P3 */ 168 169 src[j][0] = float_to_ubyte(quadColor[j][0]); /* P0 */ 170 src[j][1] = float_to_ubyte(quadColor[j][1]); /* P1 */ 171 src[j][2] = float_to_ubyte(quadColor[j][2]); /* P2 */ 172 src[j][3] = float_to_ubyte(quadColor[j][3]); /* P3 */ 173 } 174 175 switch (softpipe->blend->logicop_func) { 176 case PIPE_LOGICOP_CLEAR: 177 for (j = 0; j < 4; j++) 178 res4[j] = 0; 179 break; 180 case PIPE_LOGICOP_NOR: 181 for (j = 0; j < 4; j++) 182 res4[j] = ~(src4[j] | dst4[j]); 183 break; 184 case PIPE_LOGICOP_AND_INVERTED: 185 for (j = 0; j < 4; j++) 186 res4[j] = ~src4[j] & dst4[j]; 187 break; 188 case PIPE_LOGICOP_COPY_INVERTED: 189 for (j = 0; j < 4; j++) 190 res4[j] = ~src4[j]; 191 break; 192 case PIPE_LOGICOP_AND_REVERSE: 193 for (j = 0; j < 4; j++) 194 res4[j] = src4[j] & ~dst4[j]; 195 break; 196 case PIPE_LOGICOP_INVERT: 197 for (j = 0; j < 4; j++) 198 res4[j] = ~dst4[j]; 199 break; 200 case PIPE_LOGICOP_XOR: 201 for (j = 0; j < 4; j++) 202 res4[j] = dst4[j] ^ src4[j]; 203 break; 204 case PIPE_LOGICOP_NAND: 205 for (j = 0; j < 4; j++) 206 res4[j] = ~(src4[j] & dst4[j]); 207 break; 208 case PIPE_LOGICOP_AND: 209 for (j = 0; j < 4; j++) 210 res4[j] = src4[j] & dst4[j]; 211 break; 212 case PIPE_LOGICOP_EQUIV: 213 for (j = 0; j < 4; j++) 214 res4[j] = ~(src4[j] ^ dst4[j]); 215 break; 216 case PIPE_LOGICOP_NOOP: 217 for (j = 0; j < 4; j++) 218 res4[j] = dst4[j]; 219 break; 220 case PIPE_LOGICOP_OR_INVERTED: 221 for (j = 0; j < 4; j++) 222 res4[j] = ~src4[j] | dst4[j]; 223 break; 224 case PIPE_LOGICOP_COPY: 225 for (j = 0; j < 4; j++) 226 res4[j] = src4[j]; 227 break; 228 case PIPE_LOGICOP_OR_REVERSE: 229 for (j = 0; j < 4; j++) 230 res4[j] = src4[j] | ~dst4[j]; 231 break; 232 case PIPE_LOGICOP_OR: 233 for (j = 0; j < 4; j++) 234 res4[j] = src4[j] | dst4[j]; 235 break; 236 case PIPE_LOGICOP_SET: 237 for (j = 0; j < 4; j++) 238 res4[j] = ~0; 239 break; 240 default: 241 assert(0 && "invalid logicop mode"); 242 } 243 244 for (j = 0; j < 4; j++) { 245 quadColor[j][0] = ubyte_to_float(res[j][0]); 246 quadColor[j][1] = ubyte_to_float(res[j][1]); 247 quadColor[j][2] = ubyte_to_float(res[j][2]); 248 quadColor[j][3] = ubyte_to_float(res[j][3]); 249 } 250 } 251 252 253 254 /** 255 * Do blending for a 2x2 quad for one color buffer. 256 * \param quadColor the incoming quad colors 257 * \param dest the destination/framebuffer quad colors 258 * \param const_blend_color the constant blend color 259 * \param blend_index which set of blending terms to use 260 */ 261 static void 262 blend_quad(struct quad_stage *qs, 263 float (*quadColor)[4], 264 float (*quadColor2)[4], 265 float (*dest)[4], 266 const float const_blend_color[4], 267 unsigned blend_index) 268 { 269 static const float zero[4] = { 0, 0, 0, 0 }; 270 static const float one[4] = { 1, 1, 1, 1 }; 271 struct softpipe_context *softpipe = qs->softpipe; 272 float source[4][TGSI_QUAD_SIZE] = { { 0 } }; 273 float blend_dest[4][TGSI_QUAD_SIZE]; 274 275 /* 276 * Compute src/first term RGB 277 */ 278 switch (softpipe->blend->rt[blend_index].rgb_src_factor) { 279 case PIPE_BLENDFACTOR_ONE: 280 VEC4_COPY(source[0], quadColor[0]); /* R */ 281 VEC4_COPY(source[1], quadColor[1]); /* G */ 282 VEC4_COPY(source[2], quadColor[2]); /* B */ 283 break; 284 case PIPE_BLENDFACTOR_SRC_COLOR: 285 VEC4_MUL(source[0], quadColor[0], quadColor[0]); /* R */ 286 VEC4_MUL(source[1], quadColor[1], quadColor[1]); /* G */ 287 VEC4_MUL(source[2], quadColor[2], quadColor[2]); /* B */ 288 break; 289 case PIPE_BLENDFACTOR_SRC_ALPHA: 290 { 291 const float *alpha = quadColor[3]; 292 VEC4_MUL(source[0], quadColor[0], alpha); /* R */ 293 VEC4_MUL(source[1], quadColor[1], alpha); /* G */ 294 VEC4_MUL(source[2], quadColor[2], alpha); /* B */ 295 } 296 break; 297 case PIPE_BLENDFACTOR_DST_COLOR: 298 VEC4_MUL(source[0], quadColor[0], dest[0]); /* R */ 299 VEC4_MUL(source[1], quadColor[1], dest[1]); /* G */ 300 VEC4_MUL(source[2], quadColor[2], dest[2]); /* B */ 301 break; 302 case PIPE_BLENDFACTOR_DST_ALPHA: 303 { 304 const float *alpha = dest[3]; 305 VEC4_MUL(source[0], quadColor[0], alpha); /* R */ 306 VEC4_MUL(source[1], quadColor[1], alpha); /* G */ 307 VEC4_MUL(source[2], quadColor[2], alpha); /* B */ 308 } 309 break; 310 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: 311 { 312 const float *alpha = quadColor[3]; 313 float diff[4], temp[4]; 314 VEC4_SUB(diff, one, dest[3]); 315 VEC4_MIN(temp, alpha, diff); 316 VEC4_MUL(source[0], quadColor[0], temp); /* R */ 317 VEC4_MUL(source[1], quadColor[1], temp); /* G */ 318 VEC4_MUL(source[2], quadColor[2], temp); /* B */ 319 } 320 break; 321 case PIPE_BLENDFACTOR_CONST_COLOR: 322 { 323 float comp[4]; 324 VEC4_SCALAR(comp, const_blend_color[0]); /* R */ 325 VEC4_MUL(source[0], quadColor[0], comp); /* R */ 326 VEC4_SCALAR(comp, const_blend_color[1]); /* G */ 327 VEC4_MUL(source[1], quadColor[1], comp); /* G */ 328 VEC4_SCALAR(comp, const_blend_color[2]); /* B */ 329 VEC4_MUL(source[2], quadColor[2], comp); /* B */ 330 } 331 break; 332 case PIPE_BLENDFACTOR_CONST_ALPHA: 333 { 334 float alpha[4]; 335 VEC4_SCALAR(alpha, const_blend_color[3]); 336 VEC4_MUL(source[0], quadColor[0], alpha); /* R */ 337 VEC4_MUL(source[1], quadColor[1], alpha); /* G */ 338 VEC4_MUL(source[2], quadColor[2], alpha); /* B */ 339 } 340 break; 341 case PIPE_BLENDFACTOR_SRC1_COLOR: 342 VEC4_MUL(source[0], quadColor[0], quadColor2[0]); /* R */ 343 VEC4_MUL(source[1], quadColor[1], quadColor2[1]); /* G */ 344 VEC4_MUL(source[2], quadColor[2], quadColor2[2]); /* B */ 345 break; 346 case PIPE_BLENDFACTOR_SRC1_ALPHA: 347 { 348 const float *alpha = quadColor2[3]; 349 VEC4_MUL(source[0], quadColor[0], alpha); /* R */ 350 VEC4_MUL(source[1], quadColor[1], alpha); /* G */ 351 VEC4_MUL(source[2], quadColor[2], alpha); /* B */ 352 } 353 break; 354 case PIPE_BLENDFACTOR_ZERO: 355 VEC4_COPY(source[0], zero); /* R */ 356 VEC4_COPY(source[1], zero); /* G */ 357 VEC4_COPY(source[2], zero); /* B */ 358 break; 359 case PIPE_BLENDFACTOR_INV_SRC_COLOR: 360 { 361 float inv_comp[4]; 362 VEC4_SUB(inv_comp, one, quadColor[0]); /* R */ 363 VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */ 364 VEC4_SUB(inv_comp, one, quadColor[1]); /* G */ 365 VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */ 366 VEC4_SUB(inv_comp, one, quadColor[2]); /* B */ 367 VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */ 368 } 369 break; 370 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: 371 { 372 float inv_alpha[4]; 373 VEC4_SUB(inv_alpha, one, quadColor[3]); 374 VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */ 375 VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */ 376 VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */ 377 } 378 break; 379 case PIPE_BLENDFACTOR_INV_DST_ALPHA: 380 { 381 float inv_alpha[4]; 382 VEC4_SUB(inv_alpha, one, dest[3]); 383 VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */ 384 VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */ 385 VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */ 386 } 387 break; 388 case PIPE_BLENDFACTOR_INV_DST_COLOR: 389 { 390 float inv_comp[4]; 391 VEC4_SUB(inv_comp, one, dest[0]); /* R */ 392 VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */ 393 VEC4_SUB(inv_comp, one, dest[1]); /* G */ 394 VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */ 395 VEC4_SUB(inv_comp, one, dest[2]); /* B */ 396 VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */ 397 } 398 break; 399 case PIPE_BLENDFACTOR_INV_CONST_COLOR: 400 { 401 float inv_comp[4]; 402 /* R */ 403 VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[0]); 404 VEC4_MUL(source[0], quadColor[0], inv_comp); 405 /* G */ 406 VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[1]); 407 VEC4_MUL(source[1], quadColor[1], inv_comp); 408 /* B */ 409 VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[2]); 410 VEC4_MUL(source[2], quadColor[2], inv_comp); 411 } 412 break; 413 case PIPE_BLENDFACTOR_INV_CONST_ALPHA: 414 { 415 float inv_alpha[4]; 416 VEC4_SCALAR(inv_alpha, 1.0f - const_blend_color[3]); 417 VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */ 418 VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */ 419 VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */ 420 } 421 break; 422 case PIPE_BLENDFACTOR_INV_SRC1_COLOR: 423 { 424 float inv_comp[4]; 425 VEC4_SUB(inv_comp, one, quadColor2[0]); /* R */ 426 VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */ 427 VEC4_SUB(inv_comp, one, quadColor2[1]); /* G */ 428 VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */ 429 VEC4_SUB(inv_comp, one, quadColor2[2]); /* B */ 430 VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */ 431 } 432 break; 433 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: 434 { 435 float inv_alpha[4]; 436 VEC4_SUB(inv_alpha, one, quadColor2[3]); 437 VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */ 438 VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */ 439 VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */ 440 } 441 break; 442 default: 443 assert(0 && "invalid rgb src factor"); 444 } 445 446 /* 447 * Compute src/first term A 448 */ 449 switch (softpipe->blend->rt[blend_index].alpha_src_factor) { 450 case PIPE_BLENDFACTOR_ONE: 451 VEC4_COPY(source[3], quadColor[3]); /* A */ 452 break; 453 case PIPE_BLENDFACTOR_SRC_COLOR: 454 /* fall-through */ 455 case PIPE_BLENDFACTOR_SRC_ALPHA: 456 { 457 const float *alpha = quadColor[3]; 458 VEC4_MUL(source[3], quadColor[3], alpha); /* A */ 459 } 460 break; 461 case PIPE_BLENDFACTOR_DST_COLOR: 462 /* fall-through */ 463 case PIPE_BLENDFACTOR_DST_ALPHA: 464 VEC4_MUL(source[3], quadColor[3], dest[3]); /* A */ 465 break; 466 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: 467 /* multiply alpha by 1.0 */ 468 VEC4_COPY(source[3], quadColor[3]); /* A */ 469 break; 470 case PIPE_BLENDFACTOR_CONST_COLOR: 471 /* fall-through */ 472 case PIPE_BLENDFACTOR_CONST_ALPHA: 473 { 474 float comp[4]; 475 VEC4_SCALAR(comp, const_blend_color[3]); /* A */ 476 VEC4_MUL(source[3], quadColor[3], comp); /* A */ 477 } 478 break; 479 case PIPE_BLENDFACTOR_ZERO: 480 VEC4_COPY(source[3], zero); /* A */ 481 break; 482 case PIPE_BLENDFACTOR_INV_SRC_COLOR: 483 /* fall-through */ 484 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: 485 { 486 float inv_alpha[4]; 487 VEC4_SUB(inv_alpha, one, quadColor[3]); 488 VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */ 489 } 490 break; 491 case PIPE_BLENDFACTOR_INV_DST_COLOR: 492 /* fall-through */ 493 case PIPE_BLENDFACTOR_INV_DST_ALPHA: 494 { 495 float inv_alpha[4]; 496 VEC4_SUB(inv_alpha, one, dest[3]); 497 VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */ 498 } 499 break; 500 case PIPE_BLENDFACTOR_INV_CONST_COLOR: 501 /* fall-through */ 502 case PIPE_BLENDFACTOR_INV_CONST_ALPHA: 503 { 504 float inv_comp[4]; 505 /* A */ 506 VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[3]); 507 VEC4_MUL(source[3], quadColor[3], inv_comp); 508 } 509 break; 510 case PIPE_BLENDFACTOR_SRC1_COLOR: 511 /* fall-through */ 512 case PIPE_BLENDFACTOR_SRC1_ALPHA: 513 { 514 const float *alpha = quadColor2[3]; 515 VEC4_MUL(source[3], quadColor[3], alpha); /* A */ 516 } 517 break; 518 case PIPE_BLENDFACTOR_INV_SRC1_COLOR: 519 /* fall-through */ 520 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: 521 { 522 float inv_alpha[4]; 523 VEC4_SUB(inv_alpha, one, quadColor2[3]); 524 VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */ 525 } 526 break; 527 default: 528 assert(0 && "invalid alpha src factor"); 529 } 530 531 /* Save the original dest for use in masking */ 532 VEC4_COPY(blend_dest[0], dest[0]); 533 VEC4_COPY(blend_dest[1], dest[1]); 534 VEC4_COPY(blend_dest[2], dest[2]); 535 VEC4_COPY(blend_dest[3], dest[3]); 536 537 538 /* 539 * Compute blend_dest/second term RGB 540 */ 541 switch (softpipe->blend->rt[blend_index].rgb_dst_factor) { 542 case PIPE_BLENDFACTOR_ONE: 543 /* blend_dest = blend_dest * 1 NO-OP, leave blend_dest as-is */ 544 break; 545 case PIPE_BLENDFACTOR_SRC_COLOR: 546 VEC4_MUL(blend_dest[0], blend_dest[0], quadColor[0]); /* R */ 547 VEC4_MUL(blend_dest[1], blend_dest[1], quadColor[1]); /* G */ 548 VEC4_MUL(blend_dest[2], blend_dest[2], quadColor[2]); /* B */ 549 break; 550 case PIPE_BLENDFACTOR_SRC_ALPHA: 551 VEC4_MUL(blend_dest[0], blend_dest[0], quadColor[3]); /* R * A */ 552 VEC4_MUL(blend_dest[1], blend_dest[1], quadColor[3]); /* G * A */ 553 VEC4_MUL(blend_dest[2], blend_dest[2], quadColor[3]); /* B * A */ 554 break; 555 case PIPE_BLENDFACTOR_DST_ALPHA: 556 VEC4_MUL(blend_dest[0], blend_dest[0], blend_dest[3]); /* R * A */ 557 VEC4_MUL(blend_dest[1], blend_dest[1], blend_dest[3]); /* G * A */ 558 VEC4_MUL(blend_dest[2], blend_dest[2], blend_dest[3]); /* B * A */ 559 break; 560 case PIPE_BLENDFACTOR_DST_COLOR: 561 VEC4_MUL(blend_dest[0], blend_dest[0], blend_dest[0]); /* R */ 562 VEC4_MUL(blend_dest[1], blend_dest[1], blend_dest[1]); /* G */ 563 VEC4_MUL(blend_dest[2], blend_dest[2], blend_dest[2]); /* B */ 564 break; 565 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: 566 { 567 const float *alpha = quadColor[3]; 568 float diff[4], temp[4]; 569 VEC4_SUB(diff, one, blend_dest[3]); 570 VEC4_MIN(temp, alpha, diff); 571 VEC4_MUL(blend_dest[0], quadColor[0], temp); /* R */ 572 VEC4_MUL(blend_dest[1], quadColor[1], temp); /* G */ 573 VEC4_MUL(blend_dest[2], quadColor[2], temp); /* B */ 574 } 575 break; 576 case PIPE_BLENDFACTOR_CONST_COLOR: 577 { 578 float comp[4]; 579 VEC4_SCALAR(comp, const_blend_color[0]); /* R */ 580 VEC4_MUL(blend_dest[0], blend_dest[0], comp); /* R */ 581 VEC4_SCALAR(comp, const_blend_color[1]); /* G */ 582 VEC4_MUL(blend_dest[1], blend_dest[1], comp); /* G */ 583 VEC4_SCALAR(comp, const_blend_color[2]); /* B */ 584 VEC4_MUL(blend_dest[2], blend_dest[2], comp); /* B */ 585 } 586 break; 587 case PIPE_BLENDFACTOR_CONST_ALPHA: 588 { 589 float comp[4]; 590 VEC4_SCALAR(comp, const_blend_color[3]); /* A */ 591 VEC4_MUL(blend_dest[0], blend_dest[0], comp); /* R */ 592 VEC4_MUL(blend_dest[1], blend_dest[1], comp); /* G */ 593 VEC4_MUL(blend_dest[2], blend_dest[2], comp); /* B */ 594 } 595 break; 596 case PIPE_BLENDFACTOR_ZERO: 597 VEC4_COPY(blend_dest[0], zero); /* R */ 598 VEC4_COPY(blend_dest[1], zero); /* G */ 599 VEC4_COPY(blend_dest[2], zero); /* B */ 600 break; 601 case PIPE_BLENDFACTOR_SRC1_COLOR: 602 VEC4_MUL(blend_dest[0], blend_dest[0], quadColor2[0]); /* R */ 603 VEC4_MUL(blend_dest[1], blend_dest[1], quadColor2[1]); /* G */ 604 VEC4_MUL(blend_dest[2], blend_dest[2], quadColor2[2]); /* B */ 605 break; 606 case PIPE_BLENDFACTOR_SRC1_ALPHA: 607 VEC4_MUL(blend_dest[0], blend_dest[0], quadColor2[3]); /* R * A */ 608 VEC4_MUL(blend_dest[1], blend_dest[1], quadColor2[3]); /* G * A */ 609 VEC4_MUL(blend_dest[2], blend_dest[2], quadColor2[3]); /* B * A */ 610 break; 611 case PIPE_BLENDFACTOR_INV_SRC_COLOR: 612 { 613 float inv_comp[4]; 614 VEC4_SUB(inv_comp, one, quadColor[0]); /* R */ 615 VEC4_MUL(blend_dest[0], inv_comp, blend_dest[0]); /* R */ 616 VEC4_SUB(inv_comp, one, quadColor[1]); /* G */ 617 VEC4_MUL(blend_dest[1], inv_comp, blend_dest[1]); /* G */ 618 VEC4_SUB(inv_comp, one, quadColor[2]); /* B */ 619 VEC4_MUL(blend_dest[2], inv_comp, blend_dest[2]); /* B */ 620 } 621 break; 622 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: 623 { 624 float one_minus_alpha[TGSI_QUAD_SIZE]; 625 VEC4_SUB(one_minus_alpha, one, quadColor[3]); 626 VEC4_MUL(blend_dest[0], blend_dest[0], one_minus_alpha); /* R */ 627 VEC4_MUL(blend_dest[1], blend_dest[1], one_minus_alpha); /* G */ 628 VEC4_MUL(blend_dest[2], blend_dest[2], one_minus_alpha); /* B */ 629 } 630 break; 631 case PIPE_BLENDFACTOR_INV_DST_ALPHA: 632 { 633 float inv_comp[4]; 634 VEC4_SUB(inv_comp, one, blend_dest[3]); /* A */ 635 VEC4_MUL(blend_dest[0], inv_comp, blend_dest[0]); /* R */ 636 VEC4_MUL(blend_dest[1], inv_comp, blend_dest[1]); /* G */ 637 VEC4_MUL(blend_dest[2], inv_comp, blend_dest[2]); /* B */ 638 } 639 break; 640 case PIPE_BLENDFACTOR_INV_DST_COLOR: 641 { 642 float inv_comp[4]; 643 VEC4_SUB(inv_comp, one, blend_dest[0]); /* R */ 644 VEC4_MUL(blend_dest[0], blend_dest[0], inv_comp); /* R */ 645 VEC4_SUB(inv_comp, one, blend_dest[1]); /* G */ 646 VEC4_MUL(blend_dest[1], blend_dest[1], inv_comp); /* G */ 647 VEC4_SUB(inv_comp, one, blend_dest[2]); /* B */ 648 VEC4_MUL(blend_dest[2], blend_dest[2], inv_comp); /* B */ 649 } 650 break; 651 case PIPE_BLENDFACTOR_INV_CONST_COLOR: 652 { 653 float inv_comp[4]; 654 /* R */ 655 VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[0]); 656 VEC4_MUL(blend_dest[0], blend_dest[0], inv_comp); 657 /* G */ 658 VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[1]); 659 VEC4_MUL(blend_dest[1], blend_dest[1], inv_comp); 660 /* B */ 661 VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[2]); 662 VEC4_MUL(blend_dest[2], blend_dest[2], inv_comp); 663 } 664 break; 665 case PIPE_BLENDFACTOR_INV_CONST_ALPHA: 666 { 667 float inv_comp[4]; 668 VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[3]); 669 VEC4_MUL(blend_dest[0], blend_dest[0], inv_comp); 670 VEC4_MUL(blend_dest[1], blend_dest[1], inv_comp); 671 VEC4_MUL(blend_dest[2], blend_dest[2], inv_comp); 672 } 673 break; 674 case PIPE_BLENDFACTOR_INV_SRC1_COLOR: 675 { 676 float inv_comp[4]; 677 VEC4_SUB(inv_comp, one, quadColor2[0]); /* R */ 678 VEC4_MUL(blend_dest[0], inv_comp, blend_dest[0]); /* R */ 679 VEC4_SUB(inv_comp, one, quadColor2[1]); /* G */ 680 VEC4_MUL(blend_dest[1], inv_comp, blend_dest[1]); /* G */ 681 VEC4_SUB(inv_comp, one, quadColor2[2]); /* B */ 682 VEC4_MUL(blend_dest[2], inv_comp, blend_dest[2]); /* B */ 683 } 684 break; 685 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: 686 { 687 float one_minus_alpha[TGSI_QUAD_SIZE]; 688 VEC4_SUB(one_minus_alpha, one, quadColor2[3]); 689 VEC4_MUL(blend_dest[0], blend_dest[0], one_minus_alpha); /* R */ 690 VEC4_MUL(blend_dest[1], blend_dest[1], one_minus_alpha); /* G */ 691 VEC4_MUL(blend_dest[2], blend_dest[2], one_minus_alpha); /* B */ 692 } 693 break; 694 default: 695 assert(0 && "invalid rgb dst factor"); 696 } 697 698 /* 699 * Compute blend_dest/second term A 700 */ 701 switch (softpipe->blend->rt[blend_index].alpha_dst_factor) { 702 case PIPE_BLENDFACTOR_ONE: 703 /* blend_dest = blend_dest * 1 NO-OP, leave blend_dest as-is */ 704 break; 705 case PIPE_BLENDFACTOR_SRC_COLOR: 706 /* fall-through */ 707 case PIPE_BLENDFACTOR_SRC_ALPHA: 708 VEC4_MUL(blend_dest[3], blend_dest[3], quadColor[3]); /* A * A */ 709 break; 710 case PIPE_BLENDFACTOR_DST_COLOR: 711 /* fall-through */ 712 case PIPE_BLENDFACTOR_DST_ALPHA: 713 VEC4_MUL(blend_dest[3], blend_dest[3], blend_dest[3]); /* A */ 714 break; 715 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: 716 /* blend_dest = blend_dest * 1 NO-OP, leave blend_dest as-is */ 717 break; 718 case PIPE_BLENDFACTOR_CONST_COLOR: 719 /* fall-through */ 720 case PIPE_BLENDFACTOR_CONST_ALPHA: 721 { 722 float comp[4]; 723 VEC4_SCALAR(comp, const_blend_color[3]); /* A */ 724 VEC4_MUL(blend_dest[3], blend_dest[3], comp); /* A */ 725 } 726 break; 727 case PIPE_BLENDFACTOR_ZERO: 728 VEC4_COPY(blend_dest[3], zero); /* A */ 729 break; 730 case PIPE_BLENDFACTOR_INV_SRC_COLOR: 731 /* fall-through */ 732 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: 733 { 734 float one_minus_alpha[TGSI_QUAD_SIZE]; 735 VEC4_SUB(one_minus_alpha, one, quadColor[3]); 736 VEC4_MUL(blend_dest[3], blend_dest[3], one_minus_alpha); /* A */ 737 } 738 break; 739 case PIPE_BLENDFACTOR_INV_DST_COLOR: 740 /* fall-through */ 741 case PIPE_BLENDFACTOR_INV_DST_ALPHA: 742 { 743 float inv_comp[4]; 744 VEC4_SUB(inv_comp, one, blend_dest[3]); /* A */ 745 VEC4_MUL(blend_dest[3], inv_comp, blend_dest[3]); /* A */ 746 } 747 break; 748 case PIPE_BLENDFACTOR_INV_CONST_COLOR: 749 /* fall-through */ 750 case PIPE_BLENDFACTOR_INV_CONST_ALPHA: 751 { 752 float inv_comp[4]; 753 VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[3]); 754 VEC4_MUL(blend_dest[3], blend_dest[3], inv_comp); 755 } 756 break; 757 case PIPE_BLENDFACTOR_SRC1_COLOR: 758 /* fall-through */ 759 case PIPE_BLENDFACTOR_SRC1_ALPHA: 760 VEC4_MUL(blend_dest[3], blend_dest[3], quadColor2[3]); /* A * A */ 761 break; 762 case PIPE_BLENDFACTOR_INV_SRC1_COLOR: 763 /* fall-through */ 764 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: 765 { 766 float one_minus_alpha[TGSI_QUAD_SIZE]; 767 VEC4_SUB(one_minus_alpha, one, quadColor2[3]); 768 VEC4_MUL(blend_dest[3], blend_dest[3], one_minus_alpha); /* A */ 769 } 770 break; 771 default: 772 assert(0 && "invalid alpha dst factor"); 773 } 774 775 /* 776 * Combine RGB terms 777 */ 778 switch (softpipe->blend->rt[blend_index].rgb_func) { 779 case PIPE_BLEND_ADD: 780 VEC4_ADD(quadColor[0], source[0], blend_dest[0]); /* R */ 781 VEC4_ADD(quadColor[1], source[1], blend_dest[1]); /* G */ 782 VEC4_ADD(quadColor[2], source[2], blend_dest[2]); /* B */ 783 break; 784 case PIPE_BLEND_SUBTRACT: 785 VEC4_SUB(quadColor[0], source[0], blend_dest[0]); /* R */ 786 VEC4_SUB(quadColor[1], source[1], blend_dest[1]); /* G */ 787 VEC4_SUB(quadColor[2], source[2], blend_dest[2]); /* B */ 788 break; 789 case PIPE_BLEND_REVERSE_SUBTRACT: 790 VEC4_SUB(quadColor[0], blend_dest[0], source[0]); /* R */ 791 VEC4_SUB(quadColor[1], blend_dest[1], source[1]); /* G */ 792 VEC4_SUB(quadColor[2], blend_dest[2], source[2]); /* B */ 793 break; 794 case PIPE_BLEND_MIN: 795 VEC4_MIN(quadColor[0], source[0], blend_dest[0]); /* R */ 796 VEC4_MIN(quadColor[1], source[1], blend_dest[1]); /* G */ 797 VEC4_MIN(quadColor[2], source[2], blend_dest[2]); /* B */ 798 break; 799 case PIPE_BLEND_MAX: 800 VEC4_MAX(quadColor[0], source[0], blend_dest[0]); /* R */ 801 VEC4_MAX(quadColor[1], source[1], blend_dest[1]); /* G */ 802 VEC4_MAX(quadColor[2], source[2], blend_dest[2]); /* B */ 803 break; 804 default: 805 assert(0 && "invalid rgb blend func"); 806 } 807 808 /* 809 * Combine A terms 810 */ 811 switch (softpipe->blend->rt[blend_index].alpha_func) { 812 case PIPE_BLEND_ADD: 813 VEC4_ADD(quadColor[3], source[3], blend_dest[3]); /* A */ 814 break; 815 case PIPE_BLEND_SUBTRACT: 816 VEC4_SUB(quadColor[3], source[3], blend_dest[3]); /* A */ 817 break; 818 case PIPE_BLEND_REVERSE_SUBTRACT: 819 VEC4_SUB(quadColor[3], blend_dest[3], source[3]); /* A */ 820 break; 821 case PIPE_BLEND_MIN: 822 VEC4_MIN(quadColor[3], source[3], blend_dest[3]); /* A */ 823 break; 824 case PIPE_BLEND_MAX: 825 VEC4_MAX(quadColor[3], source[3], blend_dest[3]); /* A */ 826 break; 827 default: 828 assert(0 && "invalid alpha blend func"); 829 } 830 } 831 832 static void 833 colormask_quad(unsigned colormask, 834 float (*quadColor)[4], 835 float (*dest)[4]) 836 { 837 /* R */ 838 if (!(colormask & PIPE_MASK_R)) 839 COPY_4V(quadColor[0], dest[0]); 840 841 /* G */ 842 if (!(colormask & PIPE_MASK_G)) 843 COPY_4V(quadColor[1], dest[1]); 844 845 /* B */ 846 if (!(colormask & PIPE_MASK_B)) 847 COPY_4V(quadColor[2], dest[2]); 848 849 /* A */ 850 if (!(colormask & PIPE_MASK_A)) 851 COPY_4V(quadColor[3], dest[3]); 852 } 853 854 855 /** 856 * Clamp all colors in a quad to [0, 1] 857 */ 858 static void 859 clamp_colors(float (*quadColor)[4]) 860 { 861 unsigned i, j; 862 863 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 864 for (i = 0; i < 4; i++) { 865 quadColor[i][j] = CLAMP(quadColor[i][j], 0.0F, 1.0F); 866 } 867 } 868 } 869 870 871 /** 872 * If we're drawing to a luminance, luminance/alpha or intensity surface 873 * we have to adjust (rebase) the fragment/quad colors before writing them 874 * to the tile cache. The tile cache always stores RGBA colors but if 875 * we're caching a L/A surface (for example) we need to be sure that R=G=B 876 * so that subsequent reads from the surface cache appear to return L/A 877 * values. 878 * The piglit fbo-blending-formats test will exercise this. 879 */ 880 static void 881 rebase_colors(enum format base_format, float (*quadColor)[4]) 882 { 883 unsigned i; 884 885 switch (base_format) { 886 case RGB: 887 for (i = 0; i < 4; i++) { 888 /* A = 1 */ 889 quadColor[3][i] = 1.0F; 890 } 891 break; 892 case LUMINANCE: 893 for (i = 0; i < 4; i++) { 894 /* B = G = R */ 895 quadColor[2][i] = quadColor[1][i] = quadColor[0][i]; 896 /* A = 1 */ 897 quadColor[3][i] = 1.0F; 898 } 899 break; 900 case LUMINANCE_ALPHA: 901 for (i = 0; i < 4; i++) { 902 /* B = G = R */ 903 quadColor[2][i] = quadColor[1][i] = quadColor[0][i]; 904 } 905 break; 906 case INTENSITY: 907 for (i = 0; i < 4; i++) { 908 /* A = B = G = R */ 909 quadColor[3][i] = quadColor[2][i] = quadColor[1][i] = quadColor[0][i]; 910 } 911 break; 912 default: 913 ; /* nothing */ 914 } 915 } 916 917 static void 918 blend_fallback(struct quad_stage *qs, 919 struct quad_header *quads[], 920 unsigned nr) 921 { 922 const struct blend_quad_stage *bqs = blend_quad_stage(qs); 923 struct softpipe_context *softpipe = qs->softpipe; 924 const struct pipe_blend_state *blend = softpipe->blend; 925 unsigned cbuf; 926 boolean write_all; 927 928 write_all = softpipe->fs_variant->info.color0_writes_all_cbufs; 929 930 for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) 931 { 932 /* which blend/mask state index to use: */ 933 const uint blend_buf = blend->independent_blend_enable ? cbuf : 0; 934 float dest[4][TGSI_QUAD_SIZE]; 935 struct softpipe_cached_tile *tile 936 = sp_get_cached_tile(softpipe->cbuf_cache[cbuf], 937 quads[0]->input.x0, 938 quads[0]->input.y0); 939 const boolean clamp = bqs->clamp[cbuf]; 940 const float *blend_color; 941 const boolean dual_source_blend = util_blend_state_is_dual(blend, cbuf); 942 uint q, i, j; 943 944 if (clamp) 945 blend_color = softpipe->blend_color_clamped.color; 946 else 947 blend_color = softpipe->blend_color.color; 948 949 for (q = 0; q < nr; q++) { 950 struct quad_header *quad = quads[q]; 951 float (*quadColor)[4]; 952 float (*quadColor2)[4]; 953 float temp_quad_color[TGSI_QUAD_SIZE][4]; 954 const int itx = (quad->input.x0 & (TILE_SIZE-1)); 955 const int ity = (quad->input.y0 & (TILE_SIZE-1)); 956 957 if (write_all) { 958 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 959 for (i = 0; i < 4; i++) { 960 temp_quad_color[i][j] = quad->output.color[0][i][j]; 961 } 962 } 963 quadColor = temp_quad_color; 964 } else { 965 quadColor = quad->output.color[cbuf]; 966 if (dual_source_blend) 967 quadColor2 = quad->output.color[cbuf + 1]; 968 } 969 970 /* If fixed-point dest color buffer, need to clamp the incoming 971 * fragment colors now. 972 */ 973 if (clamp || softpipe->rasterizer->clamp_fragment_color) { 974 clamp_colors(quadColor); 975 } 976 977 /* get/swizzle dest colors 978 */ 979 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 980 int x = itx + (j & 1); 981 int y = ity + (j >> 1); 982 for (i = 0; i < 4; i++) { 983 dest[i][j] = tile->data.color[y][x][i]; 984 } 985 } 986 987 988 if (blend->logicop_enable) { 989 if (bqs->format_type[cbuf] != UTIL_FORMAT_TYPE_FLOAT) { 990 logicop_quad( qs, quadColor, dest ); 991 } 992 } 993 else if (blend->rt[blend_buf].blend_enable) { 994 blend_quad(qs, quadColor, quadColor2, dest, blend_color, blend_buf); 995 996 /* If fixed-point dest color buffer, need to clamp the outgoing 997 * fragment colors now. 998 */ 999 if (clamp) { 1000 clamp_colors(quadColor); 1001 } 1002 } 1003 1004 rebase_colors(bqs->base_format[cbuf], quadColor); 1005 1006 if (blend->rt[blend_buf].colormask != 0xf) 1007 colormask_quad( blend->rt[cbuf].colormask, quadColor, dest); 1008 1009 /* Output color values 1010 */ 1011 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 1012 if (quad->inout.mask & (1 << j)) { 1013 int x = itx + (j & 1); 1014 int y = ity + (j >> 1); 1015 for (i = 0; i < 4; i++) { /* loop over color chans */ 1016 tile->data.color[y][x][i] = quadColor[i][j]; 1017 } 1018 } 1019 } 1020 } 1021 } 1022 } 1023 1024 1025 static void 1026 blend_single_add_src_alpha_inv_src_alpha(struct quad_stage *qs, 1027 struct quad_header *quads[], 1028 unsigned nr) 1029 { 1030 const struct blend_quad_stage *bqs = blend_quad_stage(qs); 1031 static const float one[4] = { 1, 1, 1, 1 }; 1032 float one_minus_alpha[TGSI_QUAD_SIZE]; 1033 float dest[4][TGSI_QUAD_SIZE]; 1034 float source[4][TGSI_QUAD_SIZE]; 1035 uint i, j, q; 1036 1037 struct softpipe_cached_tile *tile 1038 = sp_get_cached_tile(qs->softpipe->cbuf_cache[0], 1039 quads[0]->input.x0, 1040 quads[0]->input.y0); 1041 1042 for (q = 0; q < nr; q++) { 1043 struct quad_header *quad = quads[q]; 1044 float (*quadColor)[4] = quad->output.color[0]; 1045 const float *alpha = quadColor[3]; 1046 const int itx = (quad->input.x0 & (TILE_SIZE-1)); 1047 const int ity = (quad->input.y0 & (TILE_SIZE-1)); 1048 1049 /* get/swizzle dest colors */ 1050 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 1051 int x = itx + (j & 1); 1052 int y = ity + (j >> 1); 1053 for (i = 0; i < 4; i++) { 1054 dest[i][j] = tile->data.color[y][x][i]; 1055 } 1056 } 1057 1058 /* If fixed-point dest color buffer, need to clamp the incoming 1059 * fragment colors now. 1060 */ 1061 if (bqs->clamp[0] || qs->softpipe->rasterizer->clamp_fragment_color) { 1062 clamp_colors(quadColor); 1063 } 1064 1065 VEC4_MUL(source[0], quadColor[0], alpha); /* R */ 1066 VEC4_MUL(source[1], quadColor[1], alpha); /* G */ 1067 VEC4_MUL(source[2], quadColor[2], alpha); /* B */ 1068 VEC4_MUL(source[3], quadColor[3], alpha); /* A */ 1069 1070 VEC4_SUB(one_minus_alpha, one, alpha); 1071 VEC4_MUL(dest[0], dest[0], one_minus_alpha); /* R */ 1072 VEC4_MUL(dest[1], dest[1], one_minus_alpha); /* G */ 1073 VEC4_MUL(dest[2], dest[2], one_minus_alpha); /* B */ 1074 VEC4_MUL(dest[3], dest[3], one_minus_alpha); /* A */ 1075 1076 VEC4_ADD(quadColor[0], source[0], dest[0]); /* R */ 1077 VEC4_ADD(quadColor[1], source[1], dest[1]); /* G */ 1078 VEC4_ADD(quadColor[2], source[2], dest[2]); /* B */ 1079 VEC4_ADD(quadColor[3], source[3], dest[3]); /* A */ 1080 1081 /* If fixed-point dest color buffer, need to clamp the outgoing 1082 * fragment colors now. 1083 */ 1084 if (bqs->clamp[0]) { 1085 clamp_colors(quadColor); 1086 } 1087 1088 rebase_colors(bqs->base_format[0], quadColor); 1089 1090 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 1091 if (quad->inout.mask & (1 << j)) { 1092 int x = itx + (j & 1); 1093 int y = ity + (j >> 1); 1094 for (i = 0; i < 4; i++) { /* loop over color chans */ 1095 tile->data.color[y][x][i] = quadColor[i][j]; 1096 } 1097 } 1098 } 1099 } 1100 } 1101 1102 static void 1103 blend_single_add_one_one(struct quad_stage *qs, 1104 struct quad_header *quads[], 1105 unsigned nr) 1106 { 1107 const struct blend_quad_stage *bqs = blend_quad_stage(qs); 1108 float dest[4][TGSI_QUAD_SIZE]; 1109 uint i, j, q; 1110 1111 struct softpipe_cached_tile *tile 1112 = sp_get_cached_tile(qs->softpipe->cbuf_cache[0], 1113 quads[0]->input.x0, 1114 quads[0]->input.y0); 1115 1116 for (q = 0; q < nr; q++) { 1117 struct quad_header *quad = quads[q]; 1118 float (*quadColor)[4] = quad->output.color[0]; 1119 const int itx = (quad->input.x0 & (TILE_SIZE-1)); 1120 const int ity = (quad->input.y0 & (TILE_SIZE-1)); 1121 1122 /* get/swizzle dest colors */ 1123 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 1124 int x = itx + (j & 1); 1125 int y = ity + (j >> 1); 1126 for (i = 0; i < 4; i++) { 1127 dest[i][j] = tile->data.color[y][x][i]; 1128 } 1129 } 1130 1131 /* If fixed-point dest color buffer, need to clamp the incoming 1132 * fragment colors now. 1133 */ 1134 if (bqs->clamp[0] || qs->softpipe->rasterizer->clamp_fragment_color) { 1135 clamp_colors(quadColor); 1136 } 1137 1138 VEC4_ADD(quadColor[0], quadColor[0], dest[0]); /* R */ 1139 VEC4_ADD(quadColor[1], quadColor[1], dest[1]); /* G */ 1140 VEC4_ADD(quadColor[2], quadColor[2], dest[2]); /* B */ 1141 VEC4_ADD(quadColor[3], quadColor[3], dest[3]); /* A */ 1142 1143 /* If fixed-point dest color buffer, need to clamp the outgoing 1144 * fragment colors now. 1145 */ 1146 if (bqs->clamp[0]) { 1147 clamp_colors(quadColor); 1148 } 1149 1150 rebase_colors(bqs->base_format[0], quadColor); 1151 1152 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 1153 if (quad->inout.mask & (1 << j)) { 1154 int x = itx + (j & 1); 1155 int y = ity + (j >> 1); 1156 for (i = 0; i < 4; i++) { /* loop over color chans */ 1157 tile->data.color[y][x][i] = quadColor[i][j]; 1158 } 1159 } 1160 } 1161 } 1162 } 1163 1164 1165 /** 1166 * Just copy the quad color to the framebuffer tile (respecting the writemask), 1167 * for one color buffer. 1168 * Clamping will be done, if needed (depending on the color buffer's 1169 * datatype) when we write/pack the colors later. 1170 */ 1171 static void 1172 single_output_color(struct quad_stage *qs, 1173 struct quad_header *quads[], 1174 unsigned nr) 1175 { 1176 const struct blend_quad_stage *bqs = blend_quad_stage(qs); 1177 uint i, j, q; 1178 1179 struct softpipe_cached_tile *tile 1180 = sp_get_cached_tile(qs->softpipe->cbuf_cache[0], 1181 quads[0]->input.x0, 1182 quads[0]->input.y0); 1183 1184 for (q = 0; q < nr; q++) { 1185 struct quad_header *quad = quads[q]; 1186 float (*quadColor)[4] = quad->output.color[0]; 1187 const int itx = (quad->input.x0 & (TILE_SIZE-1)); 1188 const int ity = (quad->input.y0 & (TILE_SIZE-1)); 1189 1190 if (qs->softpipe->rasterizer->clamp_fragment_color) 1191 clamp_colors(quadColor); 1192 1193 rebase_colors(bqs->base_format[0], quadColor); 1194 1195 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 1196 if (quad->inout.mask & (1 << j)) { 1197 int x = itx + (j & 1); 1198 int y = ity + (j >> 1); 1199 for (i = 0; i < 4; i++) { /* loop over color chans */ 1200 tile->data.color[y][x][i] = quadColor[i][j]; 1201 } 1202 } 1203 } 1204 } 1205 } 1206 1207 static void 1208 blend_noop(struct quad_stage *qs, 1209 struct quad_header *quads[], 1210 unsigned nr) 1211 { 1212 } 1213 1214 1215 static void 1216 choose_blend_quad(struct quad_stage *qs, 1217 struct quad_header *quads[], 1218 unsigned nr) 1219 { 1220 struct blend_quad_stage *bqs = blend_quad_stage(qs); 1221 struct softpipe_context *softpipe = qs->softpipe; 1222 const struct pipe_blend_state *blend = softpipe->blend; 1223 unsigned i; 1224 1225 qs->run = blend_fallback; 1226 1227 if (softpipe->framebuffer.nr_cbufs == 0) { 1228 qs->run = blend_noop; 1229 } 1230 else if (!softpipe->blend->logicop_enable && 1231 softpipe->blend->rt[0].colormask == 0xf && 1232 softpipe->framebuffer.nr_cbufs == 1) 1233 { 1234 if (!blend->rt[0].blend_enable) { 1235 qs->run = single_output_color; 1236 } 1237 else if (blend->rt[0].rgb_src_factor == blend->rt[0].alpha_src_factor && 1238 blend->rt[0].rgb_dst_factor == blend->rt[0].alpha_dst_factor && 1239 blend->rt[0].rgb_func == blend->rt[0].alpha_func) 1240 { 1241 if (blend->rt[0].alpha_func == PIPE_BLEND_ADD) { 1242 if (blend->rt[0].rgb_src_factor == PIPE_BLENDFACTOR_ONE && 1243 blend->rt[0].rgb_dst_factor == PIPE_BLENDFACTOR_ONE) { 1244 qs->run = blend_single_add_one_one; 1245 } 1246 else if (blend->rt[0].rgb_src_factor == PIPE_BLENDFACTOR_SRC_ALPHA && 1247 blend->rt[0].rgb_dst_factor == PIPE_BLENDFACTOR_INV_SRC_ALPHA) 1248 qs->run = blend_single_add_src_alpha_inv_src_alpha; 1249 1250 } 1251 } 1252 } 1253 1254 /* For each color buffer, determine if the buffer has destination alpha and 1255 * whether color clamping is needed. 1256 */ 1257 for (i = 0; i < softpipe->framebuffer.nr_cbufs; i++) { 1258 const enum pipe_format format = softpipe->framebuffer.cbufs[i]->format; 1259 const struct util_format_description *desc = 1260 util_format_description(format); 1261 /* assuming all or no color channels are normalized: */ 1262 bqs->clamp[i] = desc->channel[0].normalized; 1263 bqs->format_type[i] = desc->channel[0].type; 1264 1265 if (util_format_is_intensity(format)) 1266 bqs->base_format[i] = INTENSITY; 1267 else if (util_format_is_luminance(format)) 1268 bqs->base_format[i] = LUMINANCE; 1269 else if (util_format_is_luminance_alpha(format)) 1270 bqs->base_format[i] = LUMINANCE_ALPHA; 1271 else if (util_format_is_rgb_no_alpha(format)) 1272 bqs->base_format[i] = RGB; 1273 else 1274 bqs->base_format[i] = RGBA; 1275 } 1276 1277 qs->run(qs, quads, nr); 1278 } 1279 1280 1281 static void blend_begin(struct quad_stage *qs) 1282 { 1283 qs->run = choose_blend_quad; 1284 } 1285 1286 1287 static void blend_destroy(struct quad_stage *qs) 1288 { 1289 FREE( qs ); 1290 } 1291 1292 1293 struct quad_stage *sp_quad_blend_stage( struct softpipe_context *softpipe ) 1294 { 1295 struct blend_quad_stage *stage = CALLOC_STRUCT(blend_quad_stage); 1296 1297 if (!stage) 1298 return NULL; 1299 1300 stage->base.softpipe = softpipe; 1301 stage->base.begin = blend_begin; 1302 stage->base.run = choose_blend_quad; 1303 stage->base.destroy = blend_destroy; 1304 1305 return &stage->base; 1306 } 1307