Home | History | Annotate | Download | only in softpipe
      1 /**************************************************************************
      2  *
      3  * Copyright 2007 VMware, Inc.
      4  * Copyright 2010 VMware, Inc.
      5  * All Rights Reserved.
      6  *
      7  * Permission is hereby granted, free of charge, to any person obtaining a
      8  * copy of this software and associated documentation files (the
      9  * "Software"), to deal in the Software without restriction, including
     10  * without limitation the rights to use, copy, modify, merge, publish,
     11  * distribute, sub license, and/or sell copies of the Software, and to
     12  * permit persons to whom the Software is furnished to do so, subject to
     13  * the following conditions:
     14  *
     15  * The above copyright notice and this permission notice (including the
     16  * next paragraph) shall be included in all copies or substantial portions
     17  * of the Software.
     18  *
     19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
     20  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
     22  * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
     23  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
     24  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
     25  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     26  *
     27  **************************************************************************/
     28 
     29 /**
     30  * \brief  Quad depth / stencil testing
     31  */
     32 
     33 #include "pipe/p_defines.h"
     34 #include "util/u_format.h"
     35 #include "util/u_math.h"
     36 #include "util/u_memory.h"
     37 #include "tgsi/tgsi_scan.h"
     38 #include "sp_context.h"
     39 #include "sp_quad.h"
     40 #include "sp_quad_pipe.h"
     41 #include "sp_tile_cache.h"
     42 #include "sp_state.h"           /* for sp_fragment_shader */
     43 
     44 
     45 struct depth_data {
     46    struct pipe_surface *ps;
     47    enum pipe_format format;
     48    unsigned bzzzz[TGSI_QUAD_SIZE];  /**< Z values fetched from depth buffer */
     49    unsigned qzzzz[TGSI_QUAD_SIZE];  /**< Z values from the quad */
     50    ubyte stencilVals[TGSI_QUAD_SIZE];
     51    boolean use_shader_stencil_refs;
     52    ubyte shader_stencil_refs[TGSI_QUAD_SIZE];
     53    struct softpipe_cached_tile *tile;
     54    float minval, maxval;
     55    bool clamp;
     56 };
     57 
     58 
     59 
     60 static void
     61 get_depth_stencil_values( struct depth_data *data,
     62                           const struct quad_header *quad )
     63 {
     64    unsigned j;
     65    const struct softpipe_cached_tile *tile = data->tile;
     66 
     67    switch (data->format) {
     68    case PIPE_FORMAT_Z16_UNORM:
     69       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
     70          int x = quad->input.x0 % TILE_SIZE + (j & 1);
     71          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
     72          data->bzzzz[j] = tile->data.depth16[y][x];
     73       }
     74       break;
     75    case PIPE_FORMAT_Z32_UNORM:
     76       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
     77          int x = quad->input.x0 % TILE_SIZE + (j & 1);
     78          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
     79          data->bzzzz[j] = tile->data.depth32[y][x];
     80       }
     81       break;
     82    case PIPE_FORMAT_Z24X8_UNORM:
     83    case PIPE_FORMAT_Z24_UNORM_S8_UINT:
     84       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
     85          int x = quad->input.x0 % TILE_SIZE + (j & 1);
     86          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
     87          data->bzzzz[j] = tile->data.depth32[y][x] & 0xffffff;
     88          data->stencilVals[j] = tile->data.depth32[y][x] >> 24;
     89       }
     90       break;
     91    case PIPE_FORMAT_X8Z24_UNORM:
     92    case PIPE_FORMAT_S8_UINT_Z24_UNORM:
     93       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
     94          int x = quad->input.x0 % TILE_SIZE + (j & 1);
     95          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
     96          data->bzzzz[j] = tile->data.depth32[y][x] >> 8;
     97          data->stencilVals[j] = tile->data.depth32[y][x] & 0xff;
     98       }
     99       break;
    100    case PIPE_FORMAT_S8_UINT:
    101       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    102          int x = quad->input.x0 % TILE_SIZE + (j & 1);
    103          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
    104          data->bzzzz[j] = 0;
    105          data->stencilVals[j] = tile->data.stencil8[y][x];
    106       }
    107       break;
    108    case PIPE_FORMAT_Z32_FLOAT:
    109       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    110          int x = quad->input.x0 % TILE_SIZE + (j & 1);
    111          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
    112          data->bzzzz[j] = tile->data.depth32[y][x];
    113       }
    114       break;
    115    case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
    116       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    117          int x = quad->input.x0 % TILE_SIZE + (j & 1);
    118          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
    119          data->bzzzz[j] = tile->data.depth64[y][x] & 0xffffffff;
    120          data->stencilVals[j] = (tile->data.depth64[y][x] >> 32) & 0xff;
    121       }
    122       break;
    123    default:
    124       assert(0);
    125    }
    126 }
    127 
    128 
    129 /**
    130  * If the shader has not been run, interpolate the depth values
    131  * ourselves.
    132  */
    133 static void
    134 interpolate_quad_depth( struct quad_header *quad )
    135 {
    136    const float fx = (float) quad->input.x0;
    137    const float fy = (float) quad->input.y0;
    138    const float dzdx = quad->posCoef->dadx[2];
    139    const float dzdy = quad->posCoef->dady[2];
    140    const float z0 = quad->posCoef->a0[2] + dzdx * fx + dzdy * fy;
    141 
    142    quad->output.depth[0] = z0;
    143    quad->output.depth[1] = z0 + dzdx;
    144    quad->output.depth[2] = z0 + dzdy;
    145    quad->output.depth[3] = z0 + dzdx + dzdy;
    146 }
    147 
    148 
    149 /**
    150  * Compute the depth_data::qzzzz[] values from the float fragment Z values.
    151  */
    152 static void
    153 convert_quad_depth( struct depth_data *data,
    154                     const struct quad_header *quad )
    155 {
    156    unsigned j;
    157    float dvals[TGSI_QUAD_SIZE];
    158 
    159    /* Convert quad's float depth values to int depth values (qzzzz).
    160     * If the Z buffer stores integer values, we _have_ to do the depth
    161     * compares with integers (not floats).  Otherwise, the float->int->float
    162     * conversion of Z values (which isn't an identity function) will cause
    163     * Z-fighting errors.
    164     */
    165    if (data->clamp) {
    166       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    167          dvals[j] = CLAMP(quad->output.depth[j], data->minval, data->maxval);
    168       }
    169    } else {
    170       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    171          dvals[j] = quad->output.depth[j];
    172       }
    173    }
    174 
    175    switch (data->format) {
    176    case PIPE_FORMAT_Z16_UNORM:
    177       {
    178          float scale = 65535.0;
    179 
    180          for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    181             data->qzzzz[j] = (unsigned) (dvals[j] * scale);
    182          }
    183       }
    184       break;
    185    case PIPE_FORMAT_Z32_UNORM:
    186       {
    187          double scale = (double) (uint) ~0UL;
    188 
    189          for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    190             data->qzzzz[j] = (unsigned) (dvals[j] * scale);
    191          }
    192       }
    193       break;
    194    case PIPE_FORMAT_Z24X8_UNORM:
    195    case PIPE_FORMAT_Z24_UNORM_S8_UINT:
    196       {
    197          float scale = (float) ((1 << 24) - 1);
    198 
    199          for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    200             data->qzzzz[j] = (unsigned) (dvals[j] * scale);
    201          }
    202       }
    203       break;
    204    case PIPE_FORMAT_X8Z24_UNORM:
    205    case PIPE_FORMAT_S8_UINT_Z24_UNORM:
    206       {
    207          float scale = (float) ((1 << 24) - 1);
    208 
    209          for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    210             data->qzzzz[j] = (unsigned) (dvals[j] * scale);
    211          }
    212       }
    213       break;
    214    case PIPE_FORMAT_Z32_FLOAT:
    215    case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
    216       {
    217          union fi fui;
    218 
    219          for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    220             fui.f = dvals[j];
    221             data->qzzzz[j] = fui.ui;
    222          }
    223       }
    224       break;
    225    default:
    226       assert(0);
    227    }
    228 }
    229 
    230 
    231 /**
    232  * Compute the depth_data::shader_stencil_refs[] values from the float
    233  * fragment stencil values.
    234  */
    235 static void
    236 convert_quad_stencil( struct depth_data *data,
    237                       const struct quad_header *quad )
    238 {
    239    unsigned j;
    240 
    241    data->use_shader_stencil_refs = TRUE;
    242    /* Copy quads stencil values
    243     */
    244    switch (data->format) {
    245    case PIPE_FORMAT_Z24X8_UNORM:
    246    case PIPE_FORMAT_Z24_UNORM_S8_UINT:
    247    case PIPE_FORMAT_X8Z24_UNORM:
    248    case PIPE_FORMAT_S8_UINT_Z24_UNORM:
    249    case PIPE_FORMAT_S8_UINT:
    250    case PIPE_FORMAT_Z32_FLOAT:
    251    case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
    252       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    253          data->shader_stencil_refs[j] = ((unsigned)(quad->output.stencil[j]));
    254       }
    255       break;
    256    default:
    257       assert(0);
    258    }
    259 }
    260 
    261 
    262 /**
    263  * Write data->bzzzz[] values and data->stencilVals into the Z/stencil buffer.
    264  */
    265 static void
    266 write_depth_stencil_values( struct depth_data *data,
    267                             struct quad_header *quad )
    268 {
    269    struct softpipe_cached_tile *tile = data->tile;
    270    unsigned j;
    271 
    272    /* put updated Z values back into cached tile */
    273    switch (data->format) {
    274    case PIPE_FORMAT_Z16_UNORM:
    275       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    276          int x = quad->input.x0 % TILE_SIZE + (j & 1);
    277          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
    278          tile->data.depth16[y][x] = (ushort) data->bzzzz[j];
    279       }
    280       break;
    281    case PIPE_FORMAT_Z24X8_UNORM:
    282    case PIPE_FORMAT_Z32_UNORM:
    283       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    284          int x = quad->input.x0 % TILE_SIZE + (j & 1);
    285          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
    286          tile->data.depth32[y][x] = data->bzzzz[j];
    287       }
    288       break;
    289    case PIPE_FORMAT_Z24_UNORM_S8_UINT:
    290       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    291          int x = quad->input.x0 % TILE_SIZE + (j & 1);
    292          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
    293          tile->data.depth32[y][x] = (data->stencilVals[j] << 24) | data->bzzzz[j];
    294       }
    295       break;
    296    case PIPE_FORMAT_S8_UINT_Z24_UNORM:
    297       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    298          int x = quad->input.x0 % TILE_SIZE + (j & 1);
    299          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
    300          tile->data.depth32[y][x] = (data->bzzzz[j] << 8) | data->stencilVals[j];
    301       }
    302       break;
    303    case PIPE_FORMAT_X8Z24_UNORM:
    304       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    305          int x = quad->input.x0 % TILE_SIZE + (j & 1);
    306          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
    307          tile->data.depth32[y][x] = data->bzzzz[j] << 8;
    308       }
    309       break;
    310    case PIPE_FORMAT_S8_UINT:
    311       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    312          int x = quad->input.x0 % TILE_SIZE + (j & 1);
    313          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
    314          tile->data.stencil8[y][x] = data->stencilVals[j];
    315       }
    316       break;
    317    case PIPE_FORMAT_Z32_FLOAT:
    318       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    319          int x = quad->input.x0 % TILE_SIZE + (j & 1);
    320          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
    321          tile->data.depth32[y][x] = data->bzzzz[j];
    322       }
    323       break;
    324    case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
    325       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    326          int x = quad->input.x0 % TILE_SIZE + (j & 1);
    327          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
    328          tile->data.depth64[y][x] = (uint64_t)data->bzzzz[j] | ((uint64_t)data->stencilVals[j] << 32);
    329       }
    330       break;
    331    default:
    332       assert(0);
    333    }
    334 }
    335 
    336 
    337 
    338 /** Only 8-bit stencil supported */
    339 #define STENCIL_MAX 0xff
    340 
    341 
    342 /**
    343  * Do the basic stencil test (compare stencil buffer values against the
    344  * reference value.
    345  *
    346  * \param data->stencilVals  the stencil values from the stencil buffer
    347  * \param func  the stencil func (PIPE_FUNC_x)
    348  * \param ref  the stencil reference value
    349  * \param valMask  the stencil value mask indicating which bits of the stencil
    350  *                 values and ref value are to be used.
    351  * \return mask indicating which pixels passed the stencil test
    352  */
    353 static unsigned
    354 do_stencil_test(struct depth_data *data,
    355                 unsigned func,
    356                 unsigned ref, unsigned valMask)
    357 {
    358    unsigned passMask = 0x0;
    359    unsigned j;
    360    ubyte refs[TGSI_QUAD_SIZE];
    361 
    362    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    363       if (data->use_shader_stencil_refs)
    364          refs[j] = data->shader_stencil_refs[j] & valMask;
    365       else
    366          refs[j] = ref & valMask;
    367    }
    368 
    369    switch (func) {
    370    case PIPE_FUNC_NEVER:
    371       /* passMask = 0x0 */
    372       break;
    373    case PIPE_FUNC_LESS:
    374       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    375          if (refs[j] < (data->stencilVals[j] & valMask)) {
    376             passMask |= (1 << j);
    377          }
    378       }
    379       break;
    380    case PIPE_FUNC_EQUAL:
    381       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    382          if (refs[j] == (data->stencilVals[j] & valMask)) {
    383             passMask |= (1 << j);
    384          }
    385       }
    386       break;
    387    case PIPE_FUNC_LEQUAL:
    388       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    389          if (refs[j] <= (data->stencilVals[j] & valMask)) {
    390             passMask |= (1 << j);
    391          }
    392       }
    393       break;
    394    case PIPE_FUNC_GREATER:
    395       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    396          if (refs[j] > (data->stencilVals[j] & valMask)) {
    397             passMask |= (1 << j);
    398          }
    399       }
    400       break;
    401    case PIPE_FUNC_NOTEQUAL:
    402       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    403          if (refs[j] != (data->stencilVals[j] & valMask)) {
    404             passMask |= (1 << j);
    405          }
    406       }
    407       break;
    408    case PIPE_FUNC_GEQUAL:
    409       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    410          if (refs[j] >= (data->stencilVals[j] & valMask)) {
    411             passMask |= (1 << j);
    412          }
    413       }
    414       break;
    415    case PIPE_FUNC_ALWAYS:
    416       passMask = MASK_ALL;
    417       break;
    418    default:
    419       assert(0);
    420    }
    421 
    422    return passMask;
    423 }
    424 
    425 
    426 /**
    427  * Apply the stencil operator to stencil values.
    428  *
    429  * \param data->stencilVals  the stencil buffer values (read and written)
    430  * \param mask  indicates which pixels to update
    431  * \param op  the stencil operator (PIPE_STENCIL_OP_x)
    432  * \param ref  the stencil reference value
    433  * \param wrtMask  writemask controlling which bits are changed in the
    434  *                 stencil values
    435  */
    436 static void
    437 apply_stencil_op(struct depth_data *data,
    438                  unsigned mask, unsigned op, ubyte ref, ubyte wrtMask)
    439 {
    440    unsigned j;
    441    ubyte newstencil[TGSI_QUAD_SIZE];
    442    ubyte refs[TGSI_QUAD_SIZE];
    443 
    444    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    445       newstencil[j] = data->stencilVals[j];
    446       if (data->use_shader_stencil_refs)
    447          refs[j] = data->shader_stencil_refs[j];
    448       else
    449          refs[j] = ref;
    450    }
    451 
    452    switch (op) {
    453    case PIPE_STENCIL_OP_KEEP:
    454       /* no-op */
    455       break;
    456    case PIPE_STENCIL_OP_ZERO:
    457       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    458          if (mask & (1 << j)) {
    459             newstencil[j] = 0;
    460          }
    461       }
    462       break;
    463    case PIPE_STENCIL_OP_REPLACE:
    464       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    465          if (mask & (1 << j)) {
    466             newstencil[j] = refs[j];
    467          }
    468       }
    469       break;
    470    case PIPE_STENCIL_OP_INCR:
    471       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    472          if (mask & (1 << j)) {
    473             if (data->stencilVals[j] < STENCIL_MAX) {
    474                newstencil[j] = data->stencilVals[j] + 1;
    475             }
    476          }
    477       }
    478       break;
    479    case PIPE_STENCIL_OP_DECR:
    480       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    481          if (mask & (1 << j)) {
    482             if (data->stencilVals[j] > 0) {
    483                newstencil[j] = data->stencilVals[j] - 1;
    484             }
    485          }
    486       }
    487       break;
    488    case PIPE_STENCIL_OP_INCR_WRAP:
    489       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    490          if (mask & (1 << j)) {
    491             newstencil[j] = data->stencilVals[j] + 1;
    492          }
    493       }
    494       break;
    495    case PIPE_STENCIL_OP_DECR_WRAP:
    496       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    497          if (mask & (1 << j)) {
    498             newstencil[j] = data->stencilVals[j] - 1;
    499          }
    500       }
    501       break;
    502    case PIPE_STENCIL_OP_INVERT:
    503       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    504          if (mask & (1 << j)) {
    505             newstencil[j] = ~data->stencilVals[j];
    506          }
    507       }
    508       break;
    509    default:
    510       assert(0);
    511    }
    512 
    513    /*
    514     * update the stencil values
    515     */
    516    if (wrtMask != STENCIL_MAX) {
    517       /* apply bit-wise stencil buffer writemask */
    518       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    519          data->stencilVals[j] = (wrtMask & newstencil[j]) | (~wrtMask & data->stencilVals[j]);
    520       }
    521    }
    522    else {
    523       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    524          data->stencilVals[j] = newstencil[j];
    525       }
    526    }
    527 }
    528 
    529 
    530 
    531 /**
    532  * To increase efficiency, we should probably have multiple versions
    533  * of this function that are specifically for Z16, Z32 and FP Z buffers.
    534  * Try to effectively do that with codegen...
    535  */
    536 static boolean
    537 depth_test_quad(struct quad_stage *qs,
    538                 struct depth_data *data,
    539                 struct quad_header *quad)
    540 {
    541    struct softpipe_context *softpipe = qs->softpipe;
    542    unsigned zmask = 0;
    543    unsigned j;
    544 
    545    switch (softpipe->depth_stencil->depth.func) {
    546    case PIPE_FUNC_NEVER:
    547       /* zmask = 0 */
    548       break;
    549    case PIPE_FUNC_LESS:
    550       /* Note this is pretty much a single sse or cell instruction.
    551        * Like this:  quad->mask &= (quad->outputs.depth < zzzz);
    552        */
    553       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    554 	 if (data->qzzzz[j] < data->bzzzz[j])
    555 	    zmask |= 1 << j;
    556       }
    557       break;
    558    case PIPE_FUNC_EQUAL:
    559       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    560 	 if (data->qzzzz[j] == data->bzzzz[j])
    561 	    zmask |= 1 << j;
    562       }
    563       break;
    564    case PIPE_FUNC_LEQUAL:
    565       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    566 	 if (data->qzzzz[j] <= data->bzzzz[j])
    567 	    zmask |= (1 << j);
    568       }
    569       break;
    570    case PIPE_FUNC_GREATER:
    571       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    572 	 if (data->qzzzz[j] > data->bzzzz[j])
    573 	    zmask |= (1 << j);
    574       }
    575       break;
    576    case PIPE_FUNC_NOTEQUAL:
    577       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    578 	 if (data->qzzzz[j] != data->bzzzz[j])
    579 	    zmask |= (1 << j);
    580       }
    581       break;
    582    case PIPE_FUNC_GEQUAL:
    583       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    584 	 if (data->qzzzz[j] >= data->bzzzz[j])
    585 	    zmask |= (1 << j);
    586       }
    587       break;
    588    case PIPE_FUNC_ALWAYS:
    589       zmask = MASK_ALL;
    590       break;
    591    default:
    592       assert(0);
    593    }
    594 
    595    quad->inout.mask &= zmask;
    596    if (quad->inout.mask == 0)
    597       return FALSE;
    598 
    599    /* Update our internal copy only if writemask set.  Even if
    600     * depth.writemask is FALSE, may still need to write out buffer
    601     * data due to stencil changes.
    602     */
    603    if (softpipe->depth_stencil->depth.writemask) {
    604       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    605          if (quad->inout.mask & (1 << j)) {
    606             data->bzzzz[j] = data->qzzzz[j];
    607          }
    608       }
    609    }
    610 
    611    return TRUE;
    612 }
    613 
    614 
    615 
    616 /**
    617  * Do stencil (and depth) testing.  Stenciling depends on the outcome of
    618  * depth testing.
    619  */
    620 static void
    621 depth_stencil_test_quad(struct quad_stage *qs,
    622                         struct depth_data *data,
    623                         struct quad_header *quad)
    624 {
    625    struct softpipe_context *softpipe = qs->softpipe;
    626    unsigned func, zFailOp, zPassOp, failOp;
    627    ubyte ref, wrtMask, valMask;
    628    uint face = quad->input.facing;
    629 
    630    if (!softpipe->depth_stencil->stencil[1].enabled) {
    631       /* single-sided stencil test, use front (face=0) state */
    632       face = 0;
    633    }
    634 
    635    /* 0 = front-face, 1 = back-face */
    636    assert(face == 0 || face == 1);
    637 
    638    /* choose front or back face function, operator, etc */
    639    /* XXX we could do these initializations once per primitive */
    640    func    = softpipe->depth_stencil->stencil[face].func;
    641    failOp  = softpipe->depth_stencil->stencil[face].fail_op;
    642    zFailOp = softpipe->depth_stencil->stencil[face].zfail_op;
    643    zPassOp = softpipe->depth_stencil->stencil[face].zpass_op;
    644    ref     = softpipe->stencil_ref.ref_value[face];
    645    wrtMask = softpipe->depth_stencil->stencil[face].writemask;
    646    valMask = softpipe->depth_stencil->stencil[face].valuemask;
    647 
    648    /* do the stencil test first */
    649    {
    650       unsigned passMask, failMask;
    651       passMask = do_stencil_test(data, func, ref, valMask);
    652       failMask = quad->inout.mask & ~passMask;
    653       quad->inout.mask &= passMask;
    654 
    655       if (failOp != PIPE_STENCIL_OP_KEEP) {
    656          apply_stencil_op(data, failMask, failOp, ref, wrtMask);
    657       }
    658    }
    659 
    660    if (quad->inout.mask) {
    661       /* now the pixels that passed the stencil test are depth tested */
    662       if (softpipe->depth_stencil->depth.enabled) {
    663          const unsigned origMask = quad->inout.mask;
    664 
    665          depth_test_quad(qs, data, quad);  /* quad->mask is updated */
    666 
    667          /* update stencil buffer values according to z pass/fail result */
    668          if (zFailOp != PIPE_STENCIL_OP_KEEP) {
    669             const unsigned zFailMask = origMask & ~quad->inout.mask;
    670             apply_stencil_op(data, zFailMask, zFailOp, ref, wrtMask);
    671          }
    672 
    673          if (zPassOp != PIPE_STENCIL_OP_KEEP) {
    674             const unsigned zPassMask = origMask & quad->inout.mask;
    675             apply_stencil_op(data, zPassMask, zPassOp, ref, wrtMask);
    676          }
    677       }
    678       else {
    679          /* no depth test, apply Zpass operator to stencil buffer values */
    680          apply_stencil_op(data, quad->inout.mask, zPassOp, ref, wrtMask);
    681       }
    682    }
    683 }
    684 
    685 
    686 #define ALPHATEST( FUNC, COMP )                                         \
    687    static unsigned                                                      \
    688    alpha_test_quads_##FUNC( struct quad_stage *qs,                      \
    689                            struct quad_header *quads[],                 \
    690                            unsigned nr )                                \
    691    {                                                                    \
    692       const float ref = qs->softpipe->depth_stencil->alpha.ref_value;   \
    693       const uint cbuf = 0; /* only output[0].alpha is tested */         \
    694       unsigned pass_nr = 0;                                             \
    695       unsigned i;                                                       \
    696                                                                         \
    697       for (i = 0; i < nr; i++) {                                        \
    698          const float *aaaa = quads[i]->output.color[cbuf][3];           \
    699          unsigned passMask = 0;                                         \
    700                                                                         \
    701          if (aaaa[0] COMP ref) passMask |= (1 << 0);                    \
    702          if (aaaa[1] COMP ref) passMask |= (1 << 1);                    \
    703          if (aaaa[2] COMP ref) passMask |= (1 << 2);                    \
    704          if (aaaa[3] COMP ref) passMask |= (1 << 3);                    \
    705                                                                         \
    706          quads[i]->inout.mask &= passMask;                              \
    707                                                                         \
    708          if (quads[i]->inout.mask)                                      \
    709             quads[pass_nr++] = quads[i];                                \
    710       }                                                                 \
    711                                                                         \
    712       return pass_nr;                                                   \
    713    }
    714 
    715 
    716 ALPHATEST( LESS,     < )
    717 ALPHATEST( EQUAL,    == )
    718 ALPHATEST( LEQUAL,   <= )
    719 ALPHATEST( GREATER,  > )
    720 ALPHATEST( NOTEQUAL, != )
    721 ALPHATEST( GEQUAL,   >= )
    722 
    723 
    724 /* XXX: Incorporate into shader using KILL_IF.
    725  */
    726 static unsigned
    727 alpha_test_quads(struct quad_stage *qs,
    728                  struct quad_header *quads[],
    729                  unsigned nr)
    730 {
    731    switch (qs->softpipe->depth_stencil->alpha.func) {
    732    case PIPE_FUNC_LESS:
    733       return alpha_test_quads_LESS( qs, quads, nr );
    734    case PIPE_FUNC_EQUAL:
    735       return alpha_test_quads_EQUAL( qs, quads, nr );
    736    case PIPE_FUNC_LEQUAL:
    737       return alpha_test_quads_LEQUAL( qs, quads, nr );
    738    case PIPE_FUNC_GREATER:
    739       return alpha_test_quads_GREATER( qs, quads, nr );
    740    case PIPE_FUNC_NOTEQUAL:
    741       return alpha_test_quads_NOTEQUAL( qs, quads, nr );
    742    case PIPE_FUNC_GEQUAL:
    743       return alpha_test_quads_GEQUAL( qs, quads, nr );
    744    case PIPE_FUNC_ALWAYS:
    745       return nr;
    746    case PIPE_FUNC_NEVER:
    747    default:
    748       return 0;
    749    }
    750 }
    751 
    752 
    753 static unsigned mask_count[16] =
    754 {
    755    0,                           /* 0x0 */
    756    1,                           /* 0x1 */
    757    1,                           /* 0x2 */
    758    2,                           /* 0x3 */
    759    1,                           /* 0x4 */
    760    2,                           /* 0x5 */
    761    2,                           /* 0x6 */
    762    3,                           /* 0x7 */
    763    1,                           /* 0x8 */
    764    2,                           /* 0x9 */
    765    2,                           /* 0xa */
    766    3,                           /* 0xb */
    767    2,                           /* 0xc */
    768    3,                           /* 0xd */
    769    3,                           /* 0xe */
    770    4,                           /* 0xf */
    771 };
    772 
    773 
    774 
    775 /**
    776  * General depth/stencil test function.  Used when there's no fast-path.
    777  */
    778 static void
    779 depth_test_quads_fallback(struct quad_stage *qs,
    780                           struct quad_header *quads[],
    781                           unsigned nr)
    782 {
    783    unsigned i, pass = 0;
    784    const struct tgsi_shader_info *fsInfo = &qs->softpipe->fs_variant->info;
    785    boolean interp_depth = !fsInfo->writes_z || qs->softpipe->early_depth;
    786    boolean shader_stencil_ref = fsInfo->writes_stencil;
    787    struct depth_data data;
    788    unsigned vp_idx = quads[0]->input.viewport_index;
    789 
    790    data.use_shader_stencil_refs = FALSE;
    791 
    792    if (qs->softpipe->depth_stencil->alpha.enabled) {
    793       nr = alpha_test_quads(qs, quads, nr);
    794    }
    795 
    796    if (qs->softpipe->framebuffer.zsbuf &&
    797          (qs->softpipe->depth_stencil->depth.enabled ||
    798           qs->softpipe->depth_stencil->stencil[0].enabled)) {
    799       float near_val, far_val;
    800 
    801       data.ps = qs->softpipe->framebuffer.zsbuf;
    802       data.format = data.ps->format;
    803       data.tile = sp_get_cached_tile(qs->softpipe->zsbuf_cache,
    804                                      quads[0]->input.x0,
    805                                      quads[0]->input.y0, quads[0]->input.layer);
    806       data.clamp = !qs->softpipe->rasterizer->depth_clip;
    807 
    808       near_val = qs->softpipe->viewports[vp_idx].translate[2] - qs->softpipe->viewports[vp_idx].scale[2];
    809       far_val = near_val + (qs->softpipe->viewports[vp_idx].scale[2] * 2.0);
    810       data.minval = MIN2(near_val, far_val);
    811       data.maxval = MAX2(near_val, far_val);
    812 
    813       for (i = 0; i < nr; i++) {
    814          get_depth_stencil_values(&data, quads[i]);
    815 
    816          if (qs->softpipe->depth_stencil->depth.enabled) {
    817             if (interp_depth)
    818                interpolate_quad_depth(quads[i]);
    819 
    820             convert_quad_depth(&data, quads[i]);
    821          }
    822 
    823          if (qs->softpipe->depth_stencil->stencil[0].enabled) {
    824             if (shader_stencil_ref)
    825                convert_quad_stencil(&data, quads[i]);
    826 
    827             depth_stencil_test_quad(qs, &data, quads[i]);
    828             write_depth_stencil_values(&data, quads[i]);
    829          }
    830          else {
    831             if (!depth_test_quad(qs, &data, quads[i]))
    832                continue;
    833 
    834             if (qs->softpipe->depth_stencil->depth.writemask)
    835                write_depth_stencil_values(&data, quads[i]);
    836          }
    837 
    838          quads[pass++] = quads[i];
    839       }
    840 
    841       nr = pass;
    842    }
    843 
    844    if (qs->softpipe->active_query_count) {
    845       for (i = 0; i < nr; i++)
    846          qs->softpipe->occlusion_count += mask_count[quads[i]->inout.mask];
    847    }
    848 
    849    if (nr)
    850       qs->next->run(qs->next, quads, nr);
    851 }
    852 
    853 
    854 /**
    855  * Special-case Z testing for 16-bit Zbuffer and Z buffer writes enabled.
    856  */
    857 
    858 #define NAME depth_interp_z16_less_write
    859 #define OPERATOR <
    860 #include "sp_quad_depth_test_tmp.h"
    861 
    862 #define NAME depth_interp_z16_equal_write
    863 #define OPERATOR ==
    864 #include "sp_quad_depth_test_tmp.h"
    865 
    866 #define NAME depth_interp_z16_lequal_write
    867 #define OPERATOR <=
    868 #include "sp_quad_depth_test_tmp.h"
    869 
    870 #define NAME depth_interp_z16_greater_write
    871 #define OPERATOR >
    872 #include "sp_quad_depth_test_tmp.h"
    873 
    874 #define NAME depth_interp_z16_notequal_write
    875 #define OPERATOR !=
    876 #include "sp_quad_depth_test_tmp.h"
    877 
    878 #define NAME depth_interp_z16_gequal_write
    879 #define OPERATOR >=
    880 #include "sp_quad_depth_test_tmp.h"
    881 
    882 #define NAME depth_interp_z16_always_write
    883 #define ALWAYS 1
    884 #include "sp_quad_depth_test_tmp.h"
    885 
    886 
    887 
    888 static void
    889 depth_noop(struct quad_stage *qs,
    890            struct quad_header *quads[],
    891            unsigned nr)
    892 {
    893    qs->next->run(qs->next, quads, nr);
    894 }
    895 
    896 
    897 
    898 static void
    899 choose_depth_test(struct quad_stage *qs,
    900                   struct quad_header *quads[],
    901                   unsigned nr)
    902 {
    903    const struct tgsi_shader_info *fsInfo = &qs->softpipe->fs_variant->info;
    904 
    905    boolean interp_depth = !fsInfo->writes_z || qs->softpipe->early_depth;
    906 
    907    boolean alpha = qs->softpipe->depth_stencil->alpha.enabled;
    908 
    909    boolean depth = qs->softpipe->depth_stencil->depth.enabled;
    910 
    911    unsigned depthfunc = qs->softpipe->depth_stencil->depth.func;
    912 
    913    boolean stencil = qs->softpipe->depth_stencil->stencil[0].enabled;
    914 
    915    boolean depthwrite = qs->softpipe->depth_stencil->depth.writemask;
    916 
    917    boolean occlusion = qs->softpipe->active_query_count;
    918 
    919    boolean clipped = !qs->softpipe->rasterizer->depth_clip;
    920 
    921    if(!qs->softpipe->framebuffer.zsbuf)
    922       depth = depthwrite = stencil = FALSE;
    923 
    924    /* default */
    925    qs->run = depth_test_quads_fallback;
    926 
    927    /* look for special cases */
    928    if (!alpha &&
    929        !depth &&
    930        !occlusion &&
    931        !clipped &&
    932        !stencil) {
    933       qs->run = depth_noop;
    934    }
    935    else if (!alpha &&
    936             interp_depth &&
    937             depth &&
    938             depthwrite &&
    939             !occlusion &&
    940             !clipped &&
    941             !stencil)
    942    {
    943       if (qs->softpipe->framebuffer.zsbuf->format == PIPE_FORMAT_Z16_UNORM) {
    944          switch (depthfunc) {
    945          case PIPE_FUNC_NEVER:
    946             qs->run = depth_test_quads_fallback;
    947             break;
    948          case PIPE_FUNC_LESS:
    949             qs->run = depth_interp_z16_less_write;
    950             break;
    951          case PIPE_FUNC_EQUAL:
    952             qs->run = depth_interp_z16_equal_write;
    953             break;
    954          case PIPE_FUNC_LEQUAL:
    955             qs->run = depth_interp_z16_lequal_write;
    956             break;
    957          case PIPE_FUNC_GREATER:
    958             qs->run = depth_interp_z16_greater_write;
    959             break;
    960          case PIPE_FUNC_NOTEQUAL:
    961             qs->run = depth_interp_z16_notequal_write;
    962             break;
    963          case PIPE_FUNC_GEQUAL:
    964             qs->run = depth_interp_z16_gequal_write;
    965             break;
    966          case PIPE_FUNC_ALWAYS:
    967             qs->run = depth_interp_z16_always_write;
    968             break;
    969          default:
    970             qs->run = depth_test_quads_fallback;
    971             break;
    972          }
    973       }
    974    }
    975 
    976    /* next quad/fragment stage */
    977    qs->run( qs, quads, nr );
    978 }
    979 
    980 
    981 
    982 static void
    983 depth_test_begin(struct quad_stage *qs)
    984 {
    985    qs->run = choose_depth_test;
    986    qs->next->begin(qs->next);
    987 }
    988 
    989 
    990 static void
    991 depth_test_destroy(struct quad_stage *qs)
    992 {
    993    FREE( qs );
    994 }
    995 
    996 
    997 struct quad_stage *
    998 sp_quad_depth_test_stage(struct softpipe_context *softpipe)
    999 {
   1000    struct quad_stage *stage = CALLOC_STRUCT(quad_stage);
   1001 
   1002    stage->softpipe = softpipe;
   1003    stage->begin = depth_test_begin;
   1004    stage->run = choose_depth_test;
   1005    stage->destroy = depth_test_destroy;
   1006 
   1007    return stage;
   1008 }
   1009