Home | History | Annotate | Download | only in softpipe
      1 /**************************************************************************
      2  *
      3  * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
      4  * Copyright 2010 VMware, Inc.
      5  * All Rights Reserved.
      6  *
      7  * Permission is hereby granted, free of charge, to any person obtaining a
      8  * copy of this software and associated documentation files (the
      9  * "Software"), to deal in the Software without restriction, including
     10  * without limitation the rights to use, copy, modify, merge, publish,
     11  * distribute, sub license, and/or sell copies of the Software, and to
     12  * permit persons to whom the Software is furnished to do so, subject to
     13  * the following conditions:
     14  *
     15  * The above copyright notice and this permission notice (including the
     16  * next paragraph) shall be included in all copies or substantial portions
     17  * of the Software.
     18  *
     19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
     20  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
     22  * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
     23  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
     24  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
     25  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     26  *
     27  **************************************************************************/
     28 
     29 /**
     30  * \brief  Quad depth / stencil testing
     31  */
     32 
     33 #include "pipe/p_defines.h"
     34 #include "util/u_format.h"
     35 #include "util/u_math.h"
     36 #include "util/u_memory.h"
     37 #include "tgsi/tgsi_scan.h"
     38 #include "sp_context.h"
     39 #include "sp_quad.h"
     40 #include "sp_quad_pipe.h"
     41 #include "sp_tile_cache.h"
     42 #include "sp_state.h"           /* for sp_fragment_shader */
     43 
     44 
     45 struct depth_data {
     46    struct pipe_surface *ps;
     47    enum pipe_format format;
     48    unsigned bzzzz[TGSI_QUAD_SIZE];  /**< Z values fetched from depth buffer */
     49    unsigned qzzzz[TGSI_QUAD_SIZE];  /**< Z values from the quad */
     50    ubyte stencilVals[TGSI_QUAD_SIZE];
     51    boolean use_shader_stencil_refs;
     52    ubyte shader_stencil_refs[TGSI_QUAD_SIZE];
     53    struct softpipe_cached_tile *tile;
     54 };
     55 
     56 
     57 
     58 static void
     59 get_depth_stencil_values( struct depth_data *data,
     60                           const struct quad_header *quad )
     61 {
     62    unsigned j;
     63    const struct softpipe_cached_tile *tile = data->tile;
     64 
     65    switch (data->format) {
     66    case PIPE_FORMAT_Z16_UNORM:
     67       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
     68          int x = quad->input.x0 % TILE_SIZE + (j & 1);
     69          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
     70          data->bzzzz[j] = tile->data.depth16[y][x];
     71       }
     72       break;
     73    case PIPE_FORMAT_Z32_UNORM:
     74       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
     75          int x = quad->input.x0 % TILE_SIZE + (j & 1);
     76          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
     77          data->bzzzz[j] = tile->data.depth32[y][x];
     78       }
     79       break;
     80    case PIPE_FORMAT_Z24X8_UNORM:
     81    case PIPE_FORMAT_Z24_UNORM_S8_UINT:
     82       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
     83          int x = quad->input.x0 % TILE_SIZE + (j & 1);
     84          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
     85          data->bzzzz[j] = tile->data.depth32[y][x] & 0xffffff;
     86          data->stencilVals[j] = tile->data.depth32[y][x] >> 24;
     87       }
     88       break;
     89    case PIPE_FORMAT_X8Z24_UNORM:
     90    case PIPE_FORMAT_S8_UINT_Z24_UNORM:
     91       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
     92          int x = quad->input.x0 % TILE_SIZE + (j & 1);
     93          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
     94          data->bzzzz[j] = tile->data.depth32[y][x] >> 8;
     95          data->stencilVals[j] = tile->data.depth32[y][x] & 0xff;
     96       }
     97       break;
     98    case PIPE_FORMAT_S8_UINT:
     99       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    100          int x = quad->input.x0 % TILE_SIZE + (j & 1);
    101          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
    102          data->bzzzz[j] = 0;
    103          data->stencilVals[j] = tile->data.stencil8[y][x];
    104       }
    105       break;
    106    case PIPE_FORMAT_Z32_FLOAT:
    107       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    108          int x = quad->input.x0 % TILE_SIZE + (j & 1);
    109          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
    110          data->bzzzz[j] = tile->data.depth32[y][x];
    111       }
    112       break;
    113    case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
    114       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    115          int x = quad->input.x0 % TILE_SIZE + (j & 1);
    116          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
    117          data->bzzzz[j] = tile->data.depth64[y][x] & 0xffffffff;
    118          data->stencilVals[j] = (tile->data.depth64[y][x] >> 32) & 0xff;
    119       }
    120       break;
    121    default:
    122       assert(0);
    123    }
    124 }
    125 
    126 
    127 /**
    128  * If the shader has not been run, interpolate the depth values
    129  * ourselves.
    130  */
    131 static void
    132 interpolate_quad_depth( struct quad_header *quad )
    133 {
    134    const float fx = (float) quad->input.x0;
    135    const float fy = (float) quad->input.y0;
    136    const float dzdx = quad->posCoef->dadx[2];
    137    const float dzdy = quad->posCoef->dady[2];
    138    const float z0 = quad->posCoef->a0[2] + dzdx * fx + dzdy * fy;
    139 
    140    quad->output.depth[0] = z0;
    141    quad->output.depth[1] = z0 + dzdx;
    142    quad->output.depth[2] = z0 + dzdy;
    143    quad->output.depth[3] = z0 + dzdx + dzdy;
    144 }
    145 
    146 
    147 /**
    148  * Compute the depth_data::qzzzz[] values from the float fragment Z values.
    149  */
    150 static void
    151 convert_quad_depth( struct depth_data *data,
    152                     const struct quad_header *quad )
    153 {
    154    unsigned j;
    155 
    156    /* Convert quad's float depth values to int depth values (qzzzz).
    157     * If the Z buffer stores integer values, we _have_ to do the depth
    158     * compares with integers (not floats).  Otherwise, the float->int->float
    159     * conversion of Z values (which isn't an identity function) will cause
    160     * Z-fighting errors.
    161     */
    162    switch (data->format) {
    163    case PIPE_FORMAT_Z16_UNORM:
    164       {
    165          float scale = 65535.0;
    166 
    167          for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    168             data->qzzzz[j] = (unsigned) (quad->output.depth[j] * scale);
    169          }
    170       }
    171       break;
    172    case PIPE_FORMAT_Z32_UNORM:
    173       {
    174          double scale = (double) (uint) ~0UL;
    175 
    176          for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    177             data->qzzzz[j] = (unsigned) (quad->output.depth[j] * scale);
    178          }
    179       }
    180       break;
    181    case PIPE_FORMAT_Z24X8_UNORM:
    182    case PIPE_FORMAT_Z24_UNORM_S8_UINT:
    183       {
    184          float scale = (float) ((1 << 24) - 1);
    185 
    186          for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    187             data->qzzzz[j] = (unsigned) (quad->output.depth[j] * scale);
    188          }
    189       }
    190       break;
    191    case PIPE_FORMAT_X8Z24_UNORM:
    192    case PIPE_FORMAT_S8_UINT_Z24_UNORM:
    193       {
    194          float scale = (float) ((1 << 24) - 1);
    195 
    196          for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    197             data->qzzzz[j] = (unsigned) (quad->output.depth[j] * scale);
    198          }
    199       }
    200       break;
    201    case PIPE_FORMAT_Z32_FLOAT:
    202    case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
    203       {
    204          union fi fui;
    205 
    206          for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    207             fui.f = quad->output.depth[j];
    208             data->qzzzz[j] = fui.ui;
    209          }
    210       }
    211       break;
    212    default:
    213       assert(0);
    214    }
    215 }
    216 
    217 
    218 /**
    219  * Compute the depth_data::shader_stencil_refs[] values from the float
    220  * fragment stencil values.
    221  */
    222 static void
    223 convert_quad_stencil( struct depth_data *data,
    224                       const struct quad_header *quad )
    225 {
    226    unsigned j;
    227 
    228    data->use_shader_stencil_refs = TRUE;
    229    /* Copy quads stencil values
    230     */
    231    switch (data->format) {
    232    case PIPE_FORMAT_Z24X8_UNORM:
    233    case PIPE_FORMAT_Z24_UNORM_S8_UINT:
    234    case PIPE_FORMAT_X8Z24_UNORM:
    235    case PIPE_FORMAT_S8_UINT_Z24_UNORM:
    236    case PIPE_FORMAT_S8_UINT:
    237    case PIPE_FORMAT_Z32_FLOAT:
    238    case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
    239       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    240          data->shader_stencil_refs[j] = ((unsigned)(quad->output.stencil[j]));
    241       }
    242       break;
    243    default:
    244       assert(0);
    245    }
    246 }
    247 
    248 
    249 /**
    250  * Write data->bzzzz[] values and data->stencilVals into the Z/stencil buffer.
    251  */
    252 static void
    253 write_depth_stencil_values( struct depth_data *data,
    254                             struct quad_header *quad )
    255 {
    256    struct softpipe_cached_tile *tile = data->tile;
    257    unsigned j;
    258 
    259    /* put updated Z values back into cached tile */
    260    switch (data->format) {
    261    case PIPE_FORMAT_Z16_UNORM:
    262       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    263          int x = quad->input.x0 % TILE_SIZE + (j & 1);
    264          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
    265          tile->data.depth16[y][x] = (ushort) data->bzzzz[j];
    266       }
    267       break;
    268    case PIPE_FORMAT_Z24X8_UNORM:
    269    case PIPE_FORMAT_Z32_UNORM:
    270       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    271          int x = quad->input.x0 % TILE_SIZE + (j & 1);
    272          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
    273          tile->data.depth32[y][x] = data->bzzzz[j];
    274       }
    275       break;
    276    case PIPE_FORMAT_Z24_UNORM_S8_UINT:
    277       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    278          int x = quad->input.x0 % TILE_SIZE + (j & 1);
    279          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
    280          tile->data.depth32[y][x] = (data->stencilVals[j] << 24) | data->bzzzz[j];
    281       }
    282       break;
    283    case PIPE_FORMAT_S8_UINT_Z24_UNORM:
    284       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    285          int x = quad->input.x0 % TILE_SIZE + (j & 1);
    286          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
    287          tile->data.depth32[y][x] = (data->bzzzz[j] << 8) | data->stencilVals[j];
    288       }
    289       break;
    290    case PIPE_FORMAT_X8Z24_UNORM:
    291       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    292          int x = quad->input.x0 % TILE_SIZE + (j & 1);
    293          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
    294          tile->data.depth32[y][x] = data->bzzzz[j] << 8;
    295       }
    296       break;
    297    case PIPE_FORMAT_S8_UINT:
    298       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    299          int x = quad->input.x0 % TILE_SIZE + (j & 1);
    300          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
    301          tile->data.stencil8[y][x] = data->stencilVals[j];
    302       }
    303       break;
    304    case PIPE_FORMAT_Z32_FLOAT:
    305       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    306          int x = quad->input.x0 % TILE_SIZE + (j & 1);
    307          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
    308          tile->data.depth32[y][x] = data->bzzzz[j];
    309       }
    310       break;
    311    case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
    312       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    313          int x = quad->input.x0 % TILE_SIZE + (j & 1);
    314          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
    315          tile->data.depth64[y][x] = (uint64_t)data->bzzzz[j] | ((uint64_t)data->stencilVals[j] << 32);
    316       }
    317       break;
    318    default:
    319       assert(0);
    320    }
    321 }
    322 
    323 
    324 
    325 /** Only 8-bit stencil supported */
    326 #define STENCIL_MAX 0xff
    327 
    328 
    329 /**
    330  * Do the basic stencil test (compare stencil buffer values against the
    331  * reference value.
    332  *
    333  * \param data->stencilVals  the stencil values from the stencil buffer
    334  * \param func  the stencil func (PIPE_FUNC_x)
    335  * \param ref  the stencil reference value
    336  * \param valMask  the stencil value mask indicating which bits of the stencil
    337  *                 values and ref value are to be used.
    338  * \return mask indicating which pixels passed the stencil test
    339  */
    340 static unsigned
    341 do_stencil_test(struct depth_data *data,
    342                 unsigned func,
    343                 unsigned ref, unsigned valMask)
    344 {
    345    unsigned passMask = 0x0;
    346    unsigned j;
    347    ubyte refs[TGSI_QUAD_SIZE];
    348 
    349    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    350       if (data->use_shader_stencil_refs)
    351          refs[j] = data->shader_stencil_refs[j] & valMask;
    352       else
    353          refs[j] = ref & valMask;
    354    }
    355 
    356    switch (func) {
    357    case PIPE_FUNC_NEVER:
    358       /* passMask = 0x0 */
    359       break;
    360    case PIPE_FUNC_LESS:
    361       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    362          if (refs[j] < (data->stencilVals[j] & valMask)) {
    363             passMask |= (1 << j);
    364          }
    365       }
    366       break;
    367    case PIPE_FUNC_EQUAL:
    368       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    369          if (refs[j] == (data->stencilVals[j] & valMask)) {
    370             passMask |= (1 << j);
    371          }
    372       }
    373       break;
    374    case PIPE_FUNC_LEQUAL:
    375       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    376          if (refs[j] <= (data->stencilVals[j] & valMask)) {
    377             passMask |= (1 << j);
    378          }
    379       }
    380       break;
    381    case PIPE_FUNC_GREATER:
    382       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    383          if (refs[j] > (data->stencilVals[j] & valMask)) {
    384             passMask |= (1 << j);
    385          }
    386       }
    387       break;
    388    case PIPE_FUNC_NOTEQUAL:
    389       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    390          if (refs[j] != (data->stencilVals[j] & valMask)) {
    391             passMask |= (1 << j);
    392          }
    393       }
    394       break;
    395    case PIPE_FUNC_GEQUAL:
    396       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    397          if (refs[j] >= (data->stencilVals[j] & valMask)) {
    398             passMask |= (1 << j);
    399          }
    400       }
    401       break;
    402    case PIPE_FUNC_ALWAYS:
    403       passMask = MASK_ALL;
    404       break;
    405    default:
    406       assert(0);
    407    }
    408 
    409    return passMask;
    410 }
    411 
    412 
    413 /**
    414  * Apply the stencil operator to stencil values.
    415  *
    416  * \param data->stencilVals  the stencil buffer values (read and written)
    417  * \param mask  indicates which pixels to update
    418  * \param op  the stencil operator (PIPE_STENCIL_OP_x)
    419  * \param ref  the stencil reference value
    420  * \param wrtMask  writemask controlling which bits are changed in the
    421  *                 stencil values
    422  */
    423 static void
    424 apply_stencil_op(struct depth_data *data,
    425                  unsigned mask, unsigned op, ubyte ref, ubyte wrtMask)
    426 {
    427    unsigned j;
    428    ubyte newstencil[TGSI_QUAD_SIZE];
    429    ubyte refs[TGSI_QUAD_SIZE];
    430 
    431    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    432       newstencil[j] = data->stencilVals[j];
    433       if (data->use_shader_stencil_refs)
    434          refs[j] = data->shader_stencil_refs[j];
    435       else
    436          refs[j] = ref;
    437    }
    438 
    439    switch (op) {
    440    case PIPE_STENCIL_OP_KEEP:
    441       /* no-op */
    442       break;
    443    case PIPE_STENCIL_OP_ZERO:
    444       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    445          if (mask & (1 << j)) {
    446             newstencil[j] = 0;
    447          }
    448       }
    449       break;
    450    case PIPE_STENCIL_OP_REPLACE:
    451       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    452          if (mask & (1 << j)) {
    453             newstencil[j] = refs[j];
    454          }
    455       }
    456       break;
    457    case PIPE_STENCIL_OP_INCR:
    458       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    459          if (mask & (1 << j)) {
    460             if (data->stencilVals[j] < STENCIL_MAX) {
    461                newstencil[j] = data->stencilVals[j] + 1;
    462             }
    463          }
    464       }
    465       break;
    466    case PIPE_STENCIL_OP_DECR:
    467       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    468          if (mask & (1 << j)) {
    469             if (data->stencilVals[j] > 0) {
    470                newstencil[j] = data->stencilVals[j] - 1;
    471             }
    472          }
    473       }
    474       break;
    475    case PIPE_STENCIL_OP_INCR_WRAP:
    476       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    477          if (mask & (1 << j)) {
    478             newstencil[j] = data->stencilVals[j] + 1;
    479          }
    480       }
    481       break;
    482    case PIPE_STENCIL_OP_DECR_WRAP:
    483       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    484          if (mask & (1 << j)) {
    485             newstencil[j] = data->stencilVals[j] - 1;
    486          }
    487       }
    488       break;
    489    case PIPE_STENCIL_OP_INVERT:
    490       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    491          if (mask & (1 << j)) {
    492             newstencil[j] = ~data->stencilVals[j];
    493          }
    494       }
    495       break;
    496    default:
    497       assert(0);
    498    }
    499 
    500    /*
    501     * update the stencil values
    502     */
    503    if (wrtMask != STENCIL_MAX) {
    504       /* apply bit-wise stencil buffer writemask */
    505       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    506          data->stencilVals[j] = (wrtMask & newstencil[j]) | (~wrtMask & data->stencilVals[j]);
    507       }
    508    }
    509    else {
    510       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    511          data->stencilVals[j] = newstencil[j];
    512       }
    513    }
    514 }
    515 
    516 
    517 
    518 /**
    519  * To increase efficiency, we should probably have multiple versions
    520  * of this function that are specifically for Z16, Z32 and FP Z buffers.
    521  * Try to effectively do that with codegen...
    522  */
    523 static boolean
    524 depth_test_quad(struct quad_stage *qs,
    525                 struct depth_data *data,
    526                 struct quad_header *quad)
    527 {
    528    struct softpipe_context *softpipe = qs->softpipe;
    529    unsigned zmask = 0;
    530    unsigned j;
    531 
    532    switch (softpipe->depth_stencil->depth.func) {
    533    case PIPE_FUNC_NEVER:
    534       /* zmask = 0 */
    535       break;
    536    case PIPE_FUNC_LESS:
    537       /* Note this is pretty much a single sse or cell instruction.
    538        * Like this:  quad->mask &= (quad->outputs.depth < zzzz);
    539        */
    540       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    541 	 if (data->qzzzz[j] < data->bzzzz[j])
    542 	    zmask |= 1 << j;
    543       }
    544       break;
    545    case PIPE_FUNC_EQUAL:
    546       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    547 	 if (data->qzzzz[j] == data->bzzzz[j])
    548 	    zmask |= 1 << j;
    549       }
    550       break;
    551    case PIPE_FUNC_LEQUAL:
    552       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    553 	 if (data->qzzzz[j] <= data->bzzzz[j])
    554 	    zmask |= (1 << j);
    555       }
    556       break;
    557    case PIPE_FUNC_GREATER:
    558       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    559 	 if (data->qzzzz[j] > data->bzzzz[j])
    560 	    zmask |= (1 << j);
    561       }
    562       break;
    563    case PIPE_FUNC_NOTEQUAL:
    564       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    565 	 if (data->qzzzz[j] != data->bzzzz[j])
    566 	    zmask |= (1 << j);
    567       }
    568       break;
    569    case PIPE_FUNC_GEQUAL:
    570       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    571 	 if (data->qzzzz[j] >= data->bzzzz[j])
    572 	    zmask |= (1 << j);
    573       }
    574       break;
    575    case PIPE_FUNC_ALWAYS:
    576       zmask = MASK_ALL;
    577       break;
    578    default:
    579       assert(0);
    580    }
    581 
    582    quad->inout.mask &= zmask;
    583    if (quad->inout.mask == 0)
    584       return FALSE;
    585 
    586    /* Update our internal copy only if writemask set.  Even if
    587     * depth.writemask is FALSE, may still need to write out buffer
    588     * data due to stencil changes.
    589     */
    590    if (softpipe->depth_stencil->depth.writemask) {
    591       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
    592          if (quad->inout.mask & (1 << j)) {
    593             data->bzzzz[j] = data->qzzzz[j];
    594          }
    595       }
    596    }
    597 
    598    return TRUE;
    599 }
    600 
    601 
    602 
    603 /**
    604  * Do stencil (and depth) testing.  Stenciling depends on the outcome of
    605  * depth testing.
    606  */
    607 static void
    608 depth_stencil_test_quad(struct quad_stage *qs,
    609                         struct depth_data *data,
    610                         struct quad_header *quad)
    611 {
    612    struct softpipe_context *softpipe = qs->softpipe;
    613    unsigned func, zFailOp, zPassOp, failOp;
    614    ubyte ref, wrtMask, valMask;
    615    uint face = quad->input.facing;
    616 
    617    if (!softpipe->depth_stencil->stencil[1].enabled) {
    618       /* single-sided stencil test, use front (face=0) state */
    619       face = 0;
    620    }
    621 
    622    /* 0 = front-face, 1 = back-face */
    623    assert(face == 0 || face == 1);
    624 
    625    /* choose front or back face function, operator, etc */
    626    /* XXX we could do these initializations once per primitive */
    627    func    = softpipe->depth_stencil->stencil[face].func;
    628    failOp  = softpipe->depth_stencil->stencil[face].fail_op;
    629    zFailOp = softpipe->depth_stencil->stencil[face].zfail_op;
    630    zPassOp = softpipe->depth_stencil->stencil[face].zpass_op;
    631    ref     = softpipe->stencil_ref.ref_value[face];
    632    wrtMask = softpipe->depth_stencil->stencil[face].writemask;
    633    valMask = softpipe->depth_stencil->stencil[face].valuemask;
    634 
    635    /* do the stencil test first */
    636    {
    637       unsigned passMask, failMask;
    638       passMask = do_stencil_test(data, func, ref, valMask);
    639       failMask = quad->inout.mask & ~passMask;
    640       quad->inout.mask &= passMask;
    641 
    642       if (failOp != PIPE_STENCIL_OP_KEEP) {
    643          apply_stencil_op(data, failMask, failOp, ref, wrtMask);
    644       }
    645    }
    646 
    647    if (quad->inout.mask) {
    648       /* now the pixels that passed the stencil test are depth tested */
    649       if (softpipe->depth_stencil->depth.enabled) {
    650          const unsigned origMask = quad->inout.mask;
    651 
    652          depth_test_quad(qs, data, quad);  /* quad->mask is updated */
    653 
    654          /* update stencil buffer values according to z pass/fail result */
    655          if (zFailOp != PIPE_STENCIL_OP_KEEP) {
    656             const unsigned zFailMask = origMask & ~quad->inout.mask;
    657             apply_stencil_op(data, zFailMask, zFailOp, ref, wrtMask);
    658          }
    659 
    660          if (zPassOp != PIPE_STENCIL_OP_KEEP) {
    661             const unsigned zPassMask = origMask & quad->inout.mask;
    662             apply_stencil_op(data, zPassMask, zPassOp, ref, wrtMask);
    663          }
    664       }
    665       else {
    666          /* no depth test, apply Zpass operator to stencil buffer values */
    667          apply_stencil_op(data, quad->inout.mask, zPassOp, ref, wrtMask);
    668       }
    669    }
    670 }
    671 
    672 
    673 #define ALPHATEST( FUNC, COMP )                                         \
    674    static unsigned                                                      \
    675    alpha_test_quads_##FUNC( struct quad_stage *qs,                      \
    676                            struct quad_header *quads[],                 \
    677                            unsigned nr )                                \
    678    {                                                                    \
    679       const float ref = qs->softpipe->depth_stencil->alpha.ref_value;   \
    680       const uint cbuf = 0; /* only output[0].alpha is tested */         \
    681       unsigned pass_nr = 0;                                             \
    682       unsigned i;                                                       \
    683                                                                         \
    684       for (i = 0; i < nr; i++) {                                        \
    685          const float *aaaa = quads[i]->output.color[cbuf][3];           \
    686          unsigned passMask = 0;                                         \
    687                                                                         \
    688          if (aaaa[0] COMP ref) passMask |= (1 << 0);                    \
    689          if (aaaa[1] COMP ref) passMask |= (1 << 1);                    \
    690          if (aaaa[2] COMP ref) passMask |= (1 << 2);                    \
    691          if (aaaa[3] COMP ref) passMask |= (1 << 3);                    \
    692                                                                         \
    693          quads[i]->inout.mask &= passMask;                              \
    694                                                                         \
    695          if (quads[i]->inout.mask)                                      \
    696             quads[pass_nr++] = quads[i];                                \
    697       }                                                                 \
    698                                                                         \
    699       return pass_nr;                                                   \
    700    }
    701 
    702 
    703 ALPHATEST( LESS,     < )
    704 ALPHATEST( EQUAL,    == )
    705 ALPHATEST( LEQUAL,   <= )
    706 ALPHATEST( GREATER,  > )
    707 ALPHATEST( NOTEQUAL, != )
    708 ALPHATEST( GEQUAL,   >= )
    709 
    710 
    711 /* XXX: Incorporate into shader using KILP.
    712  */
    713 static unsigned
    714 alpha_test_quads(struct quad_stage *qs,
    715                  struct quad_header *quads[],
    716                  unsigned nr)
    717 {
    718    switch (qs->softpipe->depth_stencil->alpha.func) {
    719    case PIPE_FUNC_LESS:
    720       return alpha_test_quads_LESS( qs, quads, nr );
    721    case PIPE_FUNC_EQUAL:
    722       return alpha_test_quads_EQUAL( qs, quads, nr );
    723    case PIPE_FUNC_LEQUAL:
    724       return alpha_test_quads_LEQUAL( qs, quads, nr );
    725    case PIPE_FUNC_GREATER:
    726       return alpha_test_quads_GREATER( qs, quads, nr );
    727    case PIPE_FUNC_NOTEQUAL:
    728       return alpha_test_quads_NOTEQUAL( qs, quads, nr );
    729    case PIPE_FUNC_GEQUAL:
    730       return alpha_test_quads_GEQUAL( qs, quads, nr );
    731    case PIPE_FUNC_ALWAYS:
    732       return nr;
    733    case PIPE_FUNC_NEVER:
    734    default:
    735       return 0;
    736    }
    737 }
    738 
    739 
    740 static unsigned mask_count[16] =
    741 {
    742    0,                           /* 0x0 */
    743    1,                           /* 0x1 */
    744    1,                           /* 0x2 */
    745    2,                           /* 0x3 */
    746    1,                           /* 0x4 */
    747    2,                           /* 0x5 */
    748    2,                           /* 0x6 */
    749    3,                           /* 0x7 */
    750    1,                           /* 0x8 */
    751    2,                           /* 0x9 */
    752    2,                           /* 0xa */
    753    3,                           /* 0xb */
    754    2,                           /* 0xc */
    755    3,                           /* 0xd */
    756    3,                           /* 0xe */
    757    4,                           /* 0xf */
    758 };
    759 
    760 
    761 
    762 /**
    763  * General depth/stencil test function.  Used when there's no fast-path.
    764  */
    765 static void
    766 depth_test_quads_fallback(struct quad_stage *qs,
    767                           struct quad_header *quads[],
    768                           unsigned nr)
    769 {
    770    unsigned i, pass = 0;
    771    const struct tgsi_shader_info *fsInfo = &qs->softpipe->fs_variant->info;
    772    boolean interp_depth = !fsInfo->writes_z;
    773    boolean shader_stencil_ref = fsInfo->writes_stencil;
    774    struct depth_data data;
    775 
    776    data.use_shader_stencil_refs = FALSE;
    777 
    778    if (qs->softpipe->depth_stencil->alpha.enabled) {
    779       nr = alpha_test_quads(qs, quads, nr);
    780    }
    781 
    782    if (qs->softpipe->framebuffer.zsbuf &&
    783          (qs->softpipe->depth_stencil->depth.enabled ||
    784           qs->softpipe->depth_stencil->stencil[0].enabled)) {
    785 
    786       data.ps = qs->softpipe->framebuffer.zsbuf;
    787       data.format = data.ps->format;
    788       data.tile = sp_get_cached_tile(qs->softpipe->zsbuf_cache,
    789                                      quads[0]->input.x0,
    790                                      quads[0]->input.y0);
    791 
    792       for (i = 0; i < nr; i++) {
    793          get_depth_stencil_values(&data, quads[i]);
    794 
    795          if (qs->softpipe->depth_stencil->depth.enabled) {
    796             if (interp_depth)
    797                interpolate_quad_depth(quads[i]);
    798 
    799             convert_quad_depth(&data, quads[i]);
    800          }
    801 
    802          if (qs->softpipe->depth_stencil->stencil[0].enabled) {
    803             if (shader_stencil_ref)
    804                convert_quad_stencil(&data, quads[i]);
    805 
    806             depth_stencil_test_quad(qs, &data, quads[i]);
    807             write_depth_stencil_values(&data, quads[i]);
    808          }
    809          else {
    810             if (!depth_test_quad(qs, &data, quads[i]))
    811                continue;
    812 
    813             if (qs->softpipe->depth_stencil->depth.writemask)
    814                write_depth_stencil_values(&data, quads[i]);
    815          }
    816 
    817          quads[pass++] = quads[i];
    818       }
    819 
    820       nr = pass;
    821    }
    822 
    823    if (qs->softpipe->active_query_count) {
    824       for (i = 0; i < nr; i++)
    825          qs->softpipe->occlusion_count += mask_count[quads[i]->inout.mask];
    826    }
    827 
    828    if (nr)
    829       qs->next->run(qs->next, quads, nr);
    830 }
    831 
    832 
    833 /**
    834  * Special-case Z testing for 16-bit Zbuffer and Z buffer writes enabled.
    835  */
    836 
    837 #define NAME depth_interp_z16_less_write
    838 #define OPERATOR <
    839 #include "sp_quad_depth_test_tmp.h"
    840 
    841 #define NAME depth_interp_z16_equal_write
    842 #define OPERATOR ==
    843 #include "sp_quad_depth_test_tmp.h"
    844 
    845 #define NAME depth_interp_z16_lequal_write
    846 #define OPERATOR <=
    847 #include "sp_quad_depth_test_tmp.h"
    848 
    849 #define NAME depth_interp_z16_greater_write
    850 #define OPERATOR >
    851 #include "sp_quad_depth_test_tmp.h"
    852 
    853 #define NAME depth_interp_z16_notequal_write
    854 #define OPERATOR !=
    855 #include "sp_quad_depth_test_tmp.h"
    856 
    857 #define NAME depth_interp_z16_gequal_write
    858 #define OPERATOR >=
    859 #include "sp_quad_depth_test_tmp.h"
    860 
    861 #define NAME depth_interp_z16_always_write
    862 #define ALWAYS 1
    863 #include "sp_quad_depth_test_tmp.h"
    864 
    865 
    866 
    867 static void
    868 depth_noop(struct quad_stage *qs,
    869            struct quad_header *quads[],
    870            unsigned nr)
    871 {
    872    qs->next->run(qs->next, quads, nr);
    873 }
    874 
    875 
    876 
    877 static void
    878 choose_depth_test(struct quad_stage *qs,
    879                   struct quad_header *quads[],
    880                   unsigned nr)
    881 {
    882    const struct tgsi_shader_info *fsInfo = &qs->softpipe->fs_variant->info;
    883 
    884    boolean interp_depth = !fsInfo->writes_z;
    885 
    886    boolean alpha = qs->softpipe->depth_stencil->alpha.enabled;
    887 
    888    boolean depth = qs->softpipe->depth_stencil->depth.enabled;
    889 
    890    unsigned depthfunc = qs->softpipe->depth_stencil->depth.func;
    891 
    892    boolean stencil = qs->softpipe->depth_stencil->stencil[0].enabled;
    893 
    894    boolean depthwrite = qs->softpipe->depth_stencil->depth.writemask;
    895 
    896    boolean occlusion = qs->softpipe->active_query_count;
    897 
    898    if(!qs->softpipe->framebuffer.zsbuf)
    899       depth = depthwrite = stencil = FALSE;
    900 
    901    /* default */
    902    qs->run = depth_test_quads_fallback;
    903 
    904    /* look for special cases */
    905    if (!alpha &&
    906        !depth &&
    907        !occlusion &&
    908        !stencil) {
    909       qs->run = depth_noop;
    910    }
    911    else if (!alpha &&
    912             interp_depth &&
    913             depth &&
    914             depthwrite &&
    915             !occlusion &&
    916             !stencil)
    917    {
    918       if (qs->softpipe->framebuffer.zsbuf->format == PIPE_FORMAT_Z16_UNORM) {
    919          switch (depthfunc) {
    920          case PIPE_FUNC_NEVER:
    921             qs->run = depth_test_quads_fallback;
    922             break;
    923          case PIPE_FUNC_LESS:
    924             qs->run = depth_interp_z16_less_write;
    925             break;
    926          case PIPE_FUNC_EQUAL:
    927             qs->run = depth_interp_z16_equal_write;
    928             break;
    929          case PIPE_FUNC_LEQUAL:
    930             qs->run = depth_interp_z16_lequal_write;
    931             break;
    932          case PIPE_FUNC_GREATER:
    933             qs->run = depth_interp_z16_greater_write;
    934             break;
    935          case PIPE_FUNC_NOTEQUAL:
    936             qs->run = depth_interp_z16_notequal_write;
    937             break;
    938          case PIPE_FUNC_GEQUAL:
    939             qs->run = depth_interp_z16_gequal_write;
    940             break;
    941          case PIPE_FUNC_ALWAYS:
    942             qs->run = depth_interp_z16_always_write;
    943             break;
    944          default:
    945             qs->run = depth_test_quads_fallback;
    946             break;
    947          }
    948       }
    949    }
    950 
    951    /* next quad/fragment stage */
    952    qs->run( qs, quads, nr );
    953 }
    954 
    955 
    956 
    957 static void
    958 depth_test_begin(struct quad_stage *qs)
    959 {
    960    qs->run = choose_depth_test;
    961    qs->next->begin(qs->next);
    962 }
    963 
    964 
    965 static void
    966 depth_test_destroy(struct quad_stage *qs)
    967 {
    968    FREE( qs );
    969 }
    970 
    971 
    972 struct quad_stage *
    973 sp_quad_depth_test_stage(struct softpipe_context *softpipe)
    974 {
    975    struct quad_stage *stage = CALLOC_STRUCT(quad_stage);
    976 
    977    stage->softpipe = softpipe;
    978    stage->begin = depth_test_begin;
    979    stage->run = choose_depth_test;
    980    stage->destroy = depth_test_destroy;
    981 
    982    return stage;
    983 }
    984