1 /************************************************************************** 2 * 3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. 4 * Copyright 2010 VMware, Inc. 5 * All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation the rights to use, copy, modify, merge, publish, 11 * distribute, sub license, and/or sell copies of the Software, and to 12 * permit persons to whom the Software is furnished to do so, subject to 13 * the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the 16 * next paragraph) shall be included in all copies or substantial portions 17 * of the Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 22 * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR 23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 * 27 **************************************************************************/ 28 29 /** 30 * \brief Quad depth / stencil testing 31 */ 32 33 #include "pipe/p_defines.h" 34 #include "util/u_format.h" 35 #include "util/u_math.h" 36 #include "util/u_memory.h" 37 #include "tgsi/tgsi_scan.h" 38 #include "sp_context.h" 39 #include "sp_quad.h" 40 #include "sp_quad_pipe.h" 41 #include "sp_tile_cache.h" 42 #include "sp_state.h" /* for sp_fragment_shader */ 43 44 45 struct depth_data { 46 struct pipe_surface *ps; 47 enum pipe_format format; 48 unsigned bzzzz[TGSI_QUAD_SIZE]; /**< Z values fetched from depth buffer */ 49 unsigned qzzzz[TGSI_QUAD_SIZE]; /**< Z values from the quad */ 50 ubyte stencilVals[TGSI_QUAD_SIZE]; 51 boolean use_shader_stencil_refs; 52 ubyte shader_stencil_refs[TGSI_QUAD_SIZE]; 53 struct softpipe_cached_tile *tile; 54 }; 55 56 57 58 static void 59 get_depth_stencil_values( struct depth_data *data, 60 const struct quad_header *quad ) 61 { 62 unsigned j; 63 const struct softpipe_cached_tile *tile = data->tile; 64 65 switch (data->format) { 66 case PIPE_FORMAT_Z16_UNORM: 67 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 68 int x = quad->input.x0 % TILE_SIZE + (j & 1); 69 int y = quad->input.y0 % TILE_SIZE + (j >> 1); 70 data->bzzzz[j] = tile->data.depth16[y][x]; 71 } 72 break; 73 case PIPE_FORMAT_Z32_UNORM: 74 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 75 int x = quad->input.x0 % TILE_SIZE + (j & 1); 76 int y = quad->input.y0 % TILE_SIZE + (j >> 1); 77 data->bzzzz[j] = tile->data.depth32[y][x]; 78 } 79 break; 80 case PIPE_FORMAT_Z24X8_UNORM: 81 case PIPE_FORMAT_Z24_UNORM_S8_UINT: 82 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 83 int x = quad->input.x0 % TILE_SIZE + (j & 1); 84 int y = quad->input.y0 % TILE_SIZE + (j >> 1); 85 data->bzzzz[j] = tile->data.depth32[y][x] & 0xffffff; 86 data->stencilVals[j] = tile->data.depth32[y][x] >> 24; 87 } 88 break; 89 case PIPE_FORMAT_X8Z24_UNORM: 90 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 91 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 92 int x = quad->input.x0 % TILE_SIZE + (j & 1); 93 int y = quad->input.y0 % TILE_SIZE + (j >> 1); 94 data->bzzzz[j] = tile->data.depth32[y][x] >> 8; 95 data->stencilVals[j] = tile->data.depth32[y][x] & 0xff; 96 } 97 break; 98 case PIPE_FORMAT_S8_UINT: 99 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 100 int x = quad->input.x0 % TILE_SIZE + (j & 1); 101 int y = quad->input.y0 % TILE_SIZE + (j >> 1); 102 data->bzzzz[j] = 0; 103 data->stencilVals[j] = tile->data.stencil8[y][x]; 104 } 105 break; 106 case PIPE_FORMAT_Z32_FLOAT: 107 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 108 int x = quad->input.x0 % TILE_SIZE + (j & 1); 109 int y = quad->input.y0 % TILE_SIZE + (j >> 1); 110 data->bzzzz[j] = tile->data.depth32[y][x]; 111 } 112 break; 113 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 114 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 115 int x = quad->input.x0 % TILE_SIZE + (j & 1); 116 int y = quad->input.y0 % TILE_SIZE + (j >> 1); 117 data->bzzzz[j] = tile->data.depth64[y][x] & 0xffffffff; 118 data->stencilVals[j] = (tile->data.depth64[y][x] >> 32) & 0xff; 119 } 120 break; 121 default: 122 assert(0); 123 } 124 } 125 126 127 /** 128 * If the shader has not been run, interpolate the depth values 129 * ourselves. 130 */ 131 static void 132 interpolate_quad_depth( struct quad_header *quad ) 133 { 134 const float fx = (float) quad->input.x0; 135 const float fy = (float) quad->input.y0; 136 const float dzdx = quad->posCoef->dadx[2]; 137 const float dzdy = quad->posCoef->dady[2]; 138 const float z0 = quad->posCoef->a0[2] + dzdx * fx + dzdy * fy; 139 140 quad->output.depth[0] = z0; 141 quad->output.depth[1] = z0 + dzdx; 142 quad->output.depth[2] = z0 + dzdy; 143 quad->output.depth[3] = z0 + dzdx + dzdy; 144 } 145 146 147 /** 148 * Compute the depth_data::qzzzz[] values from the float fragment Z values. 149 */ 150 static void 151 convert_quad_depth( struct depth_data *data, 152 const struct quad_header *quad ) 153 { 154 unsigned j; 155 156 /* Convert quad's float depth values to int depth values (qzzzz). 157 * If the Z buffer stores integer values, we _have_ to do the depth 158 * compares with integers (not floats). Otherwise, the float->int->float 159 * conversion of Z values (which isn't an identity function) will cause 160 * Z-fighting errors. 161 */ 162 switch (data->format) { 163 case PIPE_FORMAT_Z16_UNORM: 164 { 165 float scale = 65535.0; 166 167 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 168 data->qzzzz[j] = (unsigned) (quad->output.depth[j] * scale); 169 } 170 } 171 break; 172 case PIPE_FORMAT_Z32_UNORM: 173 { 174 double scale = (double) (uint) ~0UL; 175 176 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 177 data->qzzzz[j] = (unsigned) (quad->output.depth[j] * scale); 178 } 179 } 180 break; 181 case PIPE_FORMAT_Z24X8_UNORM: 182 case PIPE_FORMAT_Z24_UNORM_S8_UINT: 183 { 184 float scale = (float) ((1 << 24) - 1); 185 186 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 187 data->qzzzz[j] = (unsigned) (quad->output.depth[j] * scale); 188 } 189 } 190 break; 191 case PIPE_FORMAT_X8Z24_UNORM: 192 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 193 { 194 float scale = (float) ((1 << 24) - 1); 195 196 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 197 data->qzzzz[j] = (unsigned) (quad->output.depth[j] * scale); 198 } 199 } 200 break; 201 case PIPE_FORMAT_Z32_FLOAT: 202 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 203 { 204 union fi fui; 205 206 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 207 fui.f = quad->output.depth[j]; 208 data->qzzzz[j] = fui.ui; 209 } 210 } 211 break; 212 default: 213 assert(0); 214 } 215 } 216 217 218 /** 219 * Compute the depth_data::shader_stencil_refs[] values from the float 220 * fragment stencil values. 221 */ 222 static void 223 convert_quad_stencil( struct depth_data *data, 224 const struct quad_header *quad ) 225 { 226 unsigned j; 227 228 data->use_shader_stencil_refs = TRUE; 229 /* Copy quads stencil values 230 */ 231 switch (data->format) { 232 case PIPE_FORMAT_Z24X8_UNORM: 233 case PIPE_FORMAT_Z24_UNORM_S8_UINT: 234 case PIPE_FORMAT_X8Z24_UNORM: 235 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 236 case PIPE_FORMAT_S8_UINT: 237 case PIPE_FORMAT_Z32_FLOAT: 238 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 239 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 240 data->shader_stencil_refs[j] = ((unsigned)(quad->output.stencil[j])); 241 } 242 break; 243 default: 244 assert(0); 245 } 246 } 247 248 249 /** 250 * Write data->bzzzz[] values and data->stencilVals into the Z/stencil buffer. 251 */ 252 static void 253 write_depth_stencil_values( struct depth_data *data, 254 struct quad_header *quad ) 255 { 256 struct softpipe_cached_tile *tile = data->tile; 257 unsigned j; 258 259 /* put updated Z values back into cached tile */ 260 switch (data->format) { 261 case PIPE_FORMAT_Z16_UNORM: 262 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 263 int x = quad->input.x0 % TILE_SIZE + (j & 1); 264 int y = quad->input.y0 % TILE_SIZE + (j >> 1); 265 tile->data.depth16[y][x] = (ushort) data->bzzzz[j]; 266 } 267 break; 268 case PIPE_FORMAT_Z24X8_UNORM: 269 case PIPE_FORMAT_Z32_UNORM: 270 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 271 int x = quad->input.x0 % TILE_SIZE + (j & 1); 272 int y = quad->input.y0 % TILE_SIZE + (j >> 1); 273 tile->data.depth32[y][x] = data->bzzzz[j]; 274 } 275 break; 276 case PIPE_FORMAT_Z24_UNORM_S8_UINT: 277 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 278 int x = quad->input.x0 % TILE_SIZE + (j & 1); 279 int y = quad->input.y0 % TILE_SIZE + (j >> 1); 280 tile->data.depth32[y][x] = (data->stencilVals[j] << 24) | data->bzzzz[j]; 281 } 282 break; 283 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 284 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 285 int x = quad->input.x0 % TILE_SIZE + (j & 1); 286 int y = quad->input.y0 % TILE_SIZE + (j >> 1); 287 tile->data.depth32[y][x] = (data->bzzzz[j] << 8) | data->stencilVals[j]; 288 } 289 break; 290 case PIPE_FORMAT_X8Z24_UNORM: 291 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 292 int x = quad->input.x0 % TILE_SIZE + (j & 1); 293 int y = quad->input.y0 % TILE_SIZE + (j >> 1); 294 tile->data.depth32[y][x] = data->bzzzz[j] << 8; 295 } 296 break; 297 case PIPE_FORMAT_S8_UINT: 298 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 299 int x = quad->input.x0 % TILE_SIZE + (j & 1); 300 int y = quad->input.y0 % TILE_SIZE + (j >> 1); 301 tile->data.stencil8[y][x] = data->stencilVals[j]; 302 } 303 break; 304 case PIPE_FORMAT_Z32_FLOAT: 305 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 306 int x = quad->input.x0 % TILE_SIZE + (j & 1); 307 int y = quad->input.y0 % TILE_SIZE + (j >> 1); 308 tile->data.depth32[y][x] = data->bzzzz[j]; 309 } 310 break; 311 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 312 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 313 int x = quad->input.x0 % TILE_SIZE + (j & 1); 314 int y = quad->input.y0 % TILE_SIZE + (j >> 1); 315 tile->data.depth64[y][x] = (uint64_t)data->bzzzz[j] | ((uint64_t)data->stencilVals[j] << 32); 316 } 317 break; 318 default: 319 assert(0); 320 } 321 } 322 323 324 325 /** Only 8-bit stencil supported */ 326 #define STENCIL_MAX 0xff 327 328 329 /** 330 * Do the basic stencil test (compare stencil buffer values against the 331 * reference value. 332 * 333 * \param data->stencilVals the stencil values from the stencil buffer 334 * \param func the stencil func (PIPE_FUNC_x) 335 * \param ref the stencil reference value 336 * \param valMask the stencil value mask indicating which bits of the stencil 337 * values and ref value are to be used. 338 * \return mask indicating which pixels passed the stencil test 339 */ 340 static unsigned 341 do_stencil_test(struct depth_data *data, 342 unsigned func, 343 unsigned ref, unsigned valMask) 344 { 345 unsigned passMask = 0x0; 346 unsigned j; 347 ubyte refs[TGSI_QUAD_SIZE]; 348 349 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 350 if (data->use_shader_stencil_refs) 351 refs[j] = data->shader_stencil_refs[j] & valMask; 352 else 353 refs[j] = ref & valMask; 354 } 355 356 switch (func) { 357 case PIPE_FUNC_NEVER: 358 /* passMask = 0x0 */ 359 break; 360 case PIPE_FUNC_LESS: 361 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 362 if (refs[j] < (data->stencilVals[j] & valMask)) { 363 passMask |= (1 << j); 364 } 365 } 366 break; 367 case PIPE_FUNC_EQUAL: 368 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 369 if (refs[j] == (data->stencilVals[j] & valMask)) { 370 passMask |= (1 << j); 371 } 372 } 373 break; 374 case PIPE_FUNC_LEQUAL: 375 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 376 if (refs[j] <= (data->stencilVals[j] & valMask)) { 377 passMask |= (1 << j); 378 } 379 } 380 break; 381 case PIPE_FUNC_GREATER: 382 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 383 if (refs[j] > (data->stencilVals[j] & valMask)) { 384 passMask |= (1 << j); 385 } 386 } 387 break; 388 case PIPE_FUNC_NOTEQUAL: 389 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 390 if (refs[j] != (data->stencilVals[j] & valMask)) { 391 passMask |= (1 << j); 392 } 393 } 394 break; 395 case PIPE_FUNC_GEQUAL: 396 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 397 if (refs[j] >= (data->stencilVals[j] & valMask)) { 398 passMask |= (1 << j); 399 } 400 } 401 break; 402 case PIPE_FUNC_ALWAYS: 403 passMask = MASK_ALL; 404 break; 405 default: 406 assert(0); 407 } 408 409 return passMask; 410 } 411 412 413 /** 414 * Apply the stencil operator to stencil values. 415 * 416 * \param data->stencilVals the stencil buffer values (read and written) 417 * \param mask indicates which pixels to update 418 * \param op the stencil operator (PIPE_STENCIL_OP_x) 419 * \param ref the stencil reference value 420 * \param wrtMask writemask controlling which bits are changed in the 421 * stencil values 422 */ 423 static void 424 apply_stencil_op(struct depth_data *data, 425 unsigned mask, unsigned op, ubyte ref, ubyte wrtMask) 426 { 427 unsigned j; 428 ubyte newstencil[TGSI_QUAD_SIZE]; 429 ubyte refs[TGSI_QUAD_SIZE]; 430 431 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 432 newstencil[j] = data->stencilVals[j]; 433 if (data->use_shader_stencil_refs) 434 refs[j] = data->shader_stencil_refs[j]; 435 else 436 refs[j] = ref; 437 } 438 439 switch (op) { 440 case PIPE_STENCIL_OP_KEEP: 441 /* no-op */ 442 break; 443 case PIPE_STENCIL_OP_ZERO: 444 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 445 if (mask & (1 << j)) { 446 newstencil[j] = 0; 447 } 448 } 449 break; 450 case PIPE_STENCIL_OP_REPLACE: 451 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 452 if (mask & (1 << j)) { 453 newstencil[j] = refs[j]; 454 } 455 } 456 break; 457 case PIPE_STENCIL_OP_INCR: 458 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 459 if (mask & (1 << j)) { 460 if (data->stencilVals[j] < STENCIL_MAX) { 461 newstencil[j] = data->stencilVals[j] + 1; 462 } 463 } 464 } 465 break; 466 case PIPE_STENCIL_OP_DECR: 467 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 468 if (mask & (1 << j)) { 469 if (data->stencilVals[j] > 0) { 470 newstencil[j] = data->stencilVals[j] - 1; 471 } 472 } 473 } 474 break; 475 case PIPE_STENCIL_OP_INCR_WRAP: 476 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 477 if (mask & (1 << j)) { 478 newstencil[j] = data->stencilVals[j] + 1; 479 } 480 } 481 break; 482 case PIPE_STENCIL_OP_DECR_WRAP: 483 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 484 if (mask & (1 << j)) { 485 newstencil[j] = data->stencilVals[j] - 1; 486 } 487 } 488 break; 489 case PIPE_STENCIL_OP_INVERT: 490 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 491 if (mask & (1 << j)) { 492 newstencil[j] = ~data->stencilVals[j]; 493 } 494 } 495 break; 496 default: 497 assert(0); 498 } 499 500 /* 501 * update the stencil values 502 */ 503 if (wrtMask != STENCIL_MAX) { 504 /* apply bit-wise stencil buffer writemask */ 505 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 506 data->stencilVals[j] = (wrtMask & newstencil[j]) | (~wrtMask & data->stencilVals[j]); 507 } 508 } 509 else { 510 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 511 data->stencilVals[j] = newstencil[j]; 512 } 513 } 514 } 515 516 517 518 /** 519 * To increase efficiency, we should probably have multiple versions 520 * of this function that are specifically for Z16, Z32 and FP Z buffers. 521 * Try to effectively do that with codegen... 522 */ 523 static boolean 524 depth_test_quad(struct quad_stage *qs, 525 struct depth_data *data, 526 struct quad_header *quad) 527 { 528 struct softpipe_context *softpipe = qs->softpipe; 529 unsigned zmask = 0; 530 unsigned j; 531 532 switch (softpipe->depth_stencil->depth.func) { 533 case PIPE_FUNC_NEVER: 534 /* zmask = 0 */ 535 break; 536 case PIPE_FUNC_LESS: 537 /* Note this is pretty much a single sse or cell instruction. 538 * Like this: quad->mask &= (quad->outputs.depth < zzzz); 539 */ 540 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 541 if (data->qzzzz[j] < data->bzzzz[j]) 542 zmask |= 1 << j; 543 } 544 break; 545 case PIPE_FUNC_EQUAL: 546 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 547 if (data->qzzzz[j] == data->bzzzz[j]) 548 zmask |= 1 << j; 549 } 550 break; 551 case PIPE_FUNC_LEQUAL: 552 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 553 if (data->qzzzz[j] <= data->bzzzz[j]) 554 zmask |= (1 << j); 555 } 556 break; 557 case PIPE_FUNC_GREATER: 558 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 559 if (data->qzzzz[j] > data->bzzzz[j]) 560 zmask |= (1 << j); 561 } 562 break; 563 case PIPE_FUNC_NOTEQUAL: 564 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 565 if (data->qzzzz[j] != data->bzzzz[j]) 566 zmask |= (1 << j); 567 } 568 break; 569 case PIPE_FUNC_GEQUAL: 570 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 571 if (data->qzzzz[j] >= data->bzzzz[j]) 572 zmask |= (1 << j); 573 } 574 break; 575 case PIPE_FUNC_ALWAYS: 576 zmask = MASK_ALL; 577 break; 578 default: 579 assert(0); 580 } 581 582 quad->inout.mask &= zmask; 583 if (quad->inout.mask == 0) 584 return FALSE; 585 586 /* Update our internal copy only if writemask set. Even if 587 * depth.writemask is FALSE, may still need to write out buffer 588 * data due to stencil changes. 589 */ 590 if (softpipe->depth_stencil->depth.writemask) { 591 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 592 if (quad->inout.mask & (1 << j)) { 593 data->bzzzz[j] = data->qzzzz[j]; 594 } 595 } 596 } 597 598 return TRUE; 599 } 600 601 602 603 /** 604 * Do stencil (and depth) testing. Stenciling depends on the outcome of 605 * depth testing. 606 */ 607 static void 608 depth_stencil_test_quad(struct quad_stage *qs, 609 struct depth_data *data, 610 struct quad_header *quad) 611 { 612 struct softpipe_context *softpipe = qs->softpipe; 613 unsigned func, zFailOp, zPassOp, failOp; 614 ubyte ref, wrtMask, valMask; 615 uint face = quad->input.facing; 616 617 if (!softpipe->depth_stencil->stencil[1].enabled) { 618 /* single-sided stencil test, use front (face=0) state */ 619 face = 0; 620 } 621 622 /* 0 = front-face, 1 = back-face */ 623 assert(face == 0 || face == 1); 624 625 /* choose front or back face function, operator, etc */ 626 /* XXX we could do these initializations once per primitive */ 627 func = softpipe->depth_stencil->stencil[face].func; 628 failOp = softpipe->depth_stencil->stencil[face].fail_op; 629 zFailOp = softpipe->depth_stencil->stencil[face].zfail_op; 630 zPassOp = softpipe->depth_stencil->stencil[face].zpass_op; 631 ref = softpipe->stencil_ref.ref_value[face]; 632 wrtMask = softpipe->depth_stencil->stencil[face].writemask; 633 valMask = softpipe->depth_stencil->stencil[face].valuemask; 634 635 /* do the stencil test first */ 636 { 637 unsigned passMask, failMask; 638 passMask = do_stencil_test(data, func, ref, valMask); 639 failMask = quad->inout.mask & ~passMask; 640 quad->inout.mask &= passMask; 641 642 if (failOp != PIPE_STENCIL_OP_KEEP) { 643 apply_stencil_op(data, failMask, failOp, ref, wrtMask); 644 } 645 } 646 647 if (quad->inout.mask) { 648 /* now the pixels that passed the stencil test are depth tested */ 649 if (softpipe->depth_stencil->depth.enabled) { 650 const unsigned origMask = quad->inout.mask; 651 652 depth_test_quad(qs, data, quad); /* quad->mask is updated */ 653 654 /* update stencil buffer values according to z pass/fail result */ 655 if (zFailOp != PIPE_STENCIL_OP_KEEP) { 656 const unsigned zFailMask = origMask & ~quad->inout.mask; 657 apply_stencil_op(data, zFailMask, zFailOp, ref, wrtMask); 658 } 659 660 if (zPassOp != PIPE_STENCIL_OP_KEEP) { 661 const unsigned zPassMask = origMask & quad->inout.mask; 662 apply_stencil_op(data, zPassMask, zPassOp, ref, wrtMask); 663 } 664 } 665 else { 666 /* no depth test, apply Zpass operator to stencil buffer values */ 667 apply_stencil_op(data, quad->inout.mask, zPassOp, ref, wrtMask); 668 } 669 } 670 } 671 672 673 #define ALPHATEST( FUNC, COMP ) \ 674 static unsigned \ 675 alpha_test_quads_##FUNC( struct quad_stage *qs, \ 676 struct quad_header *quads[], \ 677 unsigned nr ) \ 678 { \ 679 const float ref = qs->softpipe->depth_stencil->alpha.ref_value; \ 680 const uint cbuf = 0; /* only output[0].alpha is tested */ \ 681 unsigned pass_nr = 0; \ 682 unsigned i; \ 683 \ 684 for (i = 0; i < nr; i++) { \ 685 const float *aaaa = quads[i]->output.color[cbuf][3]; \ 686 unsigned passMask = 0; \ 687 \ 688 if (aaaa[0] COMP ref) passMask |= (1 << 0); \ 689 if (aaaa[1] COMP ref) passMask |= (1 << 1); \ 690 if (aaaa[2] COMP ref) passMask |= (1 << 2); \ 691 if (aaaa[3] COMP ref) passMask |= (1 << 3); \ 692 \ 693 quads[i]->inout.mask &= passMask; \ 694 \ 695 if (quads[i]->inout.mask) \ 696 quads[pass_nr++] = quads[i]; \ 697 } \ 698 \ 699 return pass_nr; \ 700 } 701 702 703 ALPHATEST( LESS, < ) 704 ALPHATEST( EQUAL, == ) 705 ALPHATEST( LEQUAL, <= ) 706 ALPHATEST( GREATER, > ) 707 ALPHATEST( NOTEQUAL, != ) 708 ALPHATEST( GEQUAL, >= ) 709 710 711 /* XXX: Incorporate into shader using KILP. 712 */ 713 static unsigned 714 alpha_test_quads(struct quad_stage *qs, 715 struct quad_header *quads[], 716 unsigned nr) 717 { 718 switch (qs->softpipe->depth_stencil->alpha.func) { 719 case PIPE_FUNC_LESS: 720 return alpha_test_quads_LESS( qs, quads, nr ); 721 case PIPE_FUNC_EQUAL: 722 return alpha_test_quads_EQUAL( qs, quads, nr ); 723 case PIPE_FUNC_LEQUAL: 724 return alpha_test_quads_LEQUAL( qs, quads, nr ); 725 case PIPE_FUNC_GREATER: 726 return alpha_test_quads_GREATER( qs, quads, nr ); 727 case PIPE_FUNC_NOTEQUAL: 728 return alpha_test_quads_NOTEQUAL( qs, quads, nr ); 729 case PIPE_FUNC_GEQUAL: 730 return alpha_test_quads_GEQUAL( qs, quads, nr ); 731 case PIPE_FUNC_ALWAYS: 732 return nr; 733 case PIPE_FUNC_NEVER: 734 default: 735 return 0; 736 } 737 } 738 739 740 static unsigned mask_count[16] = 741 { 742 0, /* 0x0 */ 743 1, /* 0x1 */ 744 1, /* 0x2 */ 745 2, /* 0x3 */ 746 1, /* 0x4 */ 747 2, /* 0x5 */ 748 2, /* 0x6 */ 749 3, /* 0x7 */ 750 1, /* 0x8 */ 751 2, /* 0x9 */ 752 2, /* 0xa */ 753 3, /* 0xb */ 754 2, /* 0xc */ 755 3, /* 0xd */ 756 3, /* 0xe */ 757 4, /* 0xf */ 758 }; 759 760 761 762 /** 763 * General depth/stencil test function. Used when there's no fast-path. 764 */ 765 static void 766 depth_test_quads_fallback(struct quad_stage *qs, 767 struct quad_header *quads[], 768 unsigned nr) 769 { 770 unsigned i, pass = 0; 771 const struct tgsi_shader_info *fsInfo = &qs->softpipe->fs_variant->info; 772 boolean interp_depth = !fsInfo->writes_z; 773 boolean shader_stencil_ref = fsInfo->writes_stencil; 774 struct depth_data data; 775 776 data.use_shader_stencil_refs = FALSE; 777 778 if (qs->softpipe->depth_stencil->alpha.enabled) { 779 nr = alpha_test_quads(qs, quads, nr); 780 } 781 782 if (qs->softpipe->framebuffer.zsbuf && 783 (qs->softpipe->depth_stencil->depth.enabled || 784 qs->softpipe->depth_stencil->stencil[0].enabled)) { 785 786 data.ps = qs->softpipe->framebuffer.zsbuf; 787 data.format = data.ps->format; 788 data.tile = sp_get_cached_tile(qs->softpipe->zsbuf_cache, 789 quads[0]->input.x0, 790 quads[0]->input.y0); 791 792 for (i = 0; i < nr; i++) { 793 get_depth_stencil_values(&data, quads[i]); 794 795 if (qs->softpipe->depth_stencil->depth.enabled) { 796 if (interp_depth) 797 interpolate_quad_depth(quads[i]); 798 799 convert_quad_depth(&data, quads[i]); 800 } 801 802 if (qs->softpipe->depth_stencil->stencil[0].enabled) { 803 if (shader_stencil_ref) 804 convert_quad_stencil(&data, quads[i]); 805 806 depth_stencil_test_quad(qs, &data, quads[i]); 807 write_depth_stencil_values(&data, quads[i]); 808 } 809 else { 810 if (!depth_test_quad(qs, &data, quads[i])) 811 continue; 812 813 if (qs->softpipe->depth_stencil->depth.writemask) 814 write_depth_stencil_values(&data, quads[i]); 815 } 816 817 quads[pass++] = quads[i]; 818 } 819 820 nr = pass; 821 } 822 823 if (qs->softpipe->active_query_count) { 824 for (i = 0; i < nr; i++) 825 qs->softpipe->occlusion_count += mask_count[quads[i]->inout.mask]; 826 } 827 828 if (nr) 829 qs->next->run(qs->next, quads, nr); 830 } 831 832 833 /** 834 * Special-case Z testing for 16-bit Zbuffer and Z buffer writes enabled. 835 */ 836 837 #define NAME depth_interp_z16_less_write 838 #define OPERATOR < 839 #include "sp_quad_depth_test_tmp.h" 840 841 #define NAME depth_interp_z16_equal_write 842 #define OPERATOR == 843 #include "sp_quad_depth_test_tmp.h" 844 845 #define NAME depth_interp_z16_lequal_write 846 #define OPERATOR <= 847 #include "sp_quad_depth_test_tmp.h" 848 849 #define NAME depth_interp_z16_greater_write 850 #define OPERATOR > 851 #include "sp_quad_depth_test_tmp.h" 852 853 #define NAME depth_interp_z16_notequal_write 854 #define OPERATOR != 855 #include "sp_quad_depth_test_tmp.h" 856 857 #define NAME depth_interp_z16_gequal_write 858 #define OPERATOR >= 859 #include "sp_quad_depth_test_tmp.h" 860 861 #define NAME depth_interp_z16_always_write 862 #define ALWAYS 1 863 #include "sp_quad_depth_test_tmp.h" 864 865 866 867 static void 868 depth_noop(struct quad_stage *qs, 869 struct quad_header *quads[], 870 unsigned nr) 871 { 872 qs->next->run(qs->next, quads, nr); 873 } 874 875 876 877 static void 878 choose_depth_test(struct quad_stage *qs, 879 struct quad_header *quads[], 880 unsigned nr) 881 { 882 const struct tgsi_shader_info *fsInfo = &qs->softpipe->fs_variant->info; 883 884 boolean interp_depth = !fsInfo->writes_z; 885 886 boolean alpha = qs->softpipe->depth_stencil->alpha.enabled; 887 888 boolean depth = qs->softpipe->depth_stencil->depth.enabled; 889 890 unsigned depthfunc = qs->softpipe->depth_stencil->depth.func; 891 892 boolean stencil = qs->softpipe->depth_stencil->stencil[0].enabled; 893 894 boolean depthwrite = qs->softpipe->depth_stencil->depth.writemask; 895 896 boolean occlusion = qs->softpipe->active_query_count; 897 898 if(!qs->softpipe->framebuffer.zsbuf) 899 depth = depthwrite = stencil = FALSE; 900 901 /* default */ 902 qs->run = depth_test_quads_fallback; 903 904 /* look for special cases */ 905 if (!alpha && 906 !depth && 907 !occlusion && 908 !stencil) { 909 qs->run = depth_noop; 910 } 911 else if (!alpha && 912 interp_depth && 913 depth && 914 depthwrite && 915 !occlusion && 916 !stencil) 917 { 918 if (qs->softpipe->framebuffer.zsbuf->format == PIPE_FORMAT_Z16_UNORM) { 919 switch (depthfunc) { 920 case PIPE_FUNC_NEVER: 921 qs->run = depth_test_quads_fallback; 922 break; 923 case PIPE_FUNC_LESS: 924 qs->run = depth_interp_z16_less_write; 925 break; 926 case PIPE_FUNC_EQUAL: 927 qs->run = depth_interp_z16_equal_write; 928 break; 929 case PIPE_FUNC_LEQUAL: 930 qs->run = depth_interp_z16_lequal_write; 931 break; 932 case PIPE_FUNC_GREATER: 933 qs->run = depth_interp_z16_greater_write; 934 break; 935 case PIPE_FUNC_NOTEQUAL: 936 qs->run = depth_interp_z16_notequal_write; 937 break; 938 case PIPE_FUNC_GEQUAL: 939 qs->run = depth_interp_z16_gequal_write; 940 break; 941 case PIPE_FUNC_ALWAYS: 942 qs->run = depth_interp_z16_always_write; 943 break; 944 default: 945 qs->run = depth_test_quads_fallback; 946 break; 947 } 948 } 949 } 950 951 /* next quad/fragment stage */ 952 qs->run( qs, quads, nr ); 953 } 954 955 956 957 static void 958 depth_test_begin(struct quad_stage *qs) 959 { 960 qs->run = choose_depth_test; 961 qs->next->begin(qs->next); 962 } 963 964 965 static void 966 depth_test_destroy(struct quad_stage *qs) 967 { 968 FREE( qs ); 969 } 970 971 972 struct quad_stage * 973 sp_quad_depth_test_stage(struct softpipe_context *softpipe) 974 { 975 struct quad_stage *stage = CALLOC_STRUCT(quad_stage); 976 977 stage->softpipe = softpipe; 978 stage->begin = depth_test_begin; 979 stage->run = choose_depth_test; 980 stage->destroy = depth_test_destroy; 981 982 return stage; 983 } 984