Home | History | Annotate | Download | only in llvmpipe
      1 /**************************************************************************
      2  *
      3  * Copyright 2009-2010 VMware, Inc.
      4  * All Rights Reserved.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the
      8  * "Software"), to deal in the Software without restriction, including
      9  * without limitation the rights to use, copy, modify, merge, publish,
     10  * distribute, sub license, and/or sell copies of the Software, and to
     11  * permit persons to whom the Software is furnished to do so, subject to
     12  * the following conditions:
     13  *
     14  * The above copyright notice and this permission notice (including the
     15  * next paragraph) shall be included in all copies or substantial portions
     16  * of the Software.
     17  *
     18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
     19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
     21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
     22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
     23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
     24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     25  *
     26  **************************************************************************/
     27 
     28 /**
     29  * @file
     30  * Depth/stencil testing to LLVM IR translation.
     31  *
     32  * To be done accurately/efficiently the depth/stencil test must be done with
     33  * the same type/format of the depth/stencil buffer, which implies massaging
     34  * the incoming depths to fit into place. Using a more straightforward
     35  * type/format for depth/stencil values internally and only convert when
     36  * flushing would avoid this, but it would most likely result in depth fighting
     37  * artifacts.
     38  *
     39  * We are free to use a different pixel layout though. Since our basic
     40  * processing unit is a quad (2x2 pixel block) we store the depth/stencil
     41  * values tiled, a quad at time. That is, a depth buffer containing
     42  *
     43  *  Z11 Z12 Z13 Z14 ...
     44  *  Z21 Z22 Z23 Z24 ...
     45  *  Z31 Z32 Z33 Z34 ...
     46  *  Z41 Z42 Z43 Z44 ...
     47  *  ... ... ... ... ...
     48  *
     49  * will actually be stored in memory as
     50  *
     51  *  Z11 Z12 Z21 Z22 Z13 Z14 Z23 Z24 ...
     52  *  Z31 Z32 Z41 Z42 Z33 Z34 Z43 Z44 ...
     53  *  ... ... ... ... ... ... ... ... ...
     54  *
     55  *
     56  * @author Jose Fonseca <jfonseca (at) vmware.com>
     57  * @author Brian Paul <jfonseca (at) vmware.com>
     58  */
     59 
     60 #include "pipe/p_state.h"
     61 #include "util/u_format.h"
     62 #include "util/u_cpu_detect.h"
     63 
     64 #include "gallivm/lp_bld_type.h"
     65 #include "gallivm/lp_bld_arit.h"
     66 #include "gallivm/lp_bld_bitarit.h"
     67 #include "gallivm/lp_bld_const.h"
     68 #include "gallivm/lp_bld_conv.h"
     69 #include "gallivm/lp_bld_logic.h"
     70 #include "gallivm/lp_bld_flow.h"
     71 #include "gallivm/lp_bld_intr.h"
     72 #include "gallivm/lp_bld_debug.h"
     73 #include "gallivm/lp_bld_swizzle.h"
     74 
     75 #include "lp_bld_depth.h"
     76 
     77 
     78 /** Used to select fields from pipe_stencil_state */
     79 enum stencil_op {
     80    S_FAIL_OP,
     81    Z_FAIL_OP,
     82    Z_PASS_OP
     83 };
     84 
     85 
     86 
     87 /**
     88  * Do the stencil test comparison (compare FB stencil values against ref value).
     89  * This will be used twice when generating two-sided stencil code.
     90  * \param stencil  the front/back stencil state
     91  * \param stencilRef  the stencil reference value, replicated as a vector
     92  * \param stencilVals  vector of stencil values from framebuffer
     93  * \return vector mask of pass/fail values (~0 or 0)
     94  */
     95 static LLVMValueRef
     96 lp_build_stencil_test_single(struct lp_build_context *bld,
     97                              const struct pipe_stencil_state *stencil,
     98                              LLVMValueRef stencilRef,
     99                              LLVMValueRef stencilVals)
    100 {
    101    LLVMBuilderRef builder = bld->gallivm->builder;
    102    const unsigned stencilMax = 255; /* XXX fix */
    103    struct lp_type type = bld->type;
    104    LLVMValueRef res;
    105 
    106    /*
    107     * SSE2 has intrinsics for signed comparisons, but not unsigned ones. Values
    108     * are between 0..255 so ensure we generate the fastest comparisons for
    109     * wider elements.
    110     */
    111    if (type.width <= 8) {
    112       assert(!type.sign);
    113    } else {
    114       assert(type.sign);
    115    }
    116 
    117    assert(stencil->enabled);
    118 
    119    if (stencil->valuemask != stencilMax) {
    120       /* compute stencilRef = stencilRef & valuemask */
    121       LLVMValueRef valuemask = lp_build_const_int_vec(bld->gallivm, type, stencil->valuemask);
    122       stencilRef = LLVMBuildAnd(builder, stencilRef, valuemask, "");
    123       /* compute stencilVals = stencilVals & valuemask */
    124       stencilVals = LLVMBuildAnd(builder, stencilVals, valuemask, "");
    125    }
    126 
    127    res = lp_build_cmp(bld, stencil->func, stencilRef, stencilVals);
    128 
    129    return res;
    130 }
    131 
    132 
    133 /**
    134  * Do the one or two-sided stencil test comparison.
    135  * \sa lp_build_stencil_test_single
    136  * \param front_facing  an integer vector mask, indicating front (~0) or back
    137  *                      (0) facing polygon. If NULL, assume front-facing.
    138  */
    139 static LLVMValueRef
    140 lp_build_stencil_test(struct lp_build_context *bld,
    141                       const struct pipe_stencil_state stencil[2],
    142                       LLVMValueRef stencilRefs[2],
    143                       LLVMValueRef stencilVals,
    144                       LLVMValueRef front_facing)
    145 {
    146    LLVMValueRef res;
    147 
    148    assert(stencil[0].enabled);
    149 
    150    /* do front face test */
    151    res = lp_build_stencil_test_single(bld, &stencil[0],
    152                                       stencilRefs[0], stencilVals);
    153 
    154    if (stencil[1].enabled && front_facing != NULL) {
    155       /* do back face test */
    156       LLVMValueRef back_res;
    157 
    158       back_res = lp_build_stencil_test_single(bld, &stencil[1],
    159                                               stencilRefs[1], stencilVals);
    160 
    161       res = lp_build_select(bld, front_facing, res, back_res);
    162    }
    163 
    164    return res;
    165 }
    166 
    167 
    168 /**
    169  * Apply the stencil operator (add/sub/keep/etc) to the given vector
    170  * of stencil values.
    171  * \return  new stencil values vector
    172  */
    173 static LLVMValueRef
    174 lp_build_stencil_op_single(struct lp_build_context *bld,
    175                            const struct pipe_stencil_state *stencil,
    176                            enum stencil_op op,
    177                            LLVMValueRef stencilRef,
    178                            LLVMValueRef stencilVals)
    179 
    180 {
    181    LLVMBuilderRef builder = bld->gallivm->builder;
    182    struct lp_type type = bld->type;
    183    LLVMValueRef res;
    184    LLVMValueRef max = lp_build_const_int_vec(bld->gallivm, type, 0xff);
    185    unsigned stencil_op;
    186 
    187    assert(type.sign);
    188 
    189    switch (op) {
    190    case S_FAIL_OP:
    191       stencil_op = stencil->fail_op;
    192       break;
    193    case Z_FAIL_OP:
    194       stencil_op = stencil->zfail_op;
    195       break;
    196    case Z_PASS_OP:
    197       stencil_op = stencil->zpass_op;
    198       break;
    199    default:
    200       assert(0 && "Invalid stencil_op mode");
    201       stencil_op = PIPE_STENCIL_OP_KEEP;
    202    }
    203 
    204    switch (stencil_op) {
    205    case PIPE_STENCIL_OP_KEEP:
    206       res = stencilVals;
    207       /* we can return early for this case */
    208       return res;
    209    case PIPE_STENCIL_OP_ZERO:
    210       res = bld->zero;
    211       break;
    212    case PIPE_STENCIL_OP_REPLACE:
    213       res = stencilRef;
    214       break;
    215    case PIPE_STENCIL_OP_INCR:
    216       res = lp_build_add(bld, stencilVals, bld->one);
    217       res = lp_build_min(bld, res, max);
    218       break;
    219    case PIPE_STENCIL_OP_DECR:
    220       res = lp_build_sub(bld, stencilVals, bld->one);
    221       res = lp_build_max(bld, res, bld->zero);
    222       break;
    223    case PIPE_STENCIL_OP_INCR_WRAP:
    224       res = lp_build_add(bld, stencilVals, bld->one);
    225       res = LLVMBuildAnd(builder, res, max, "");
    226       break;
    227    case PIPE_STENCIL_OP_DECR_WRAP:
    228       res = lp_build_sub(bld, stencilVals, bld->one);
    229       res = LLVMBuildAnd(builder, res, max, "");
    230       break;
    231    case PIPE_STENCIL_OP_INVERT:
    232       res = LLVMBuildNot(builder, stencilVals, "");
    233       res = LLVMBuildAnd(builder, res, max, "");
    234       break;
    235    default:
    236       assert(0 && "bad stencil op mode");
    237       res = bld->undef;
    238    }
    239 
    240    return res;
    241 }
    242 
    243 
    244 /**
    245  * Do the one or two-sided stencil test op/update.
    246  */
    247 static LLVMValueRef
    248 lp_build_stencil_op(struct lp_build_context *bld,
    249                     const struct pipe_stencil_state stencil[2],
    250                     enum stencil_op op,
    251                     LLVMValueRef stencilRefs[2],
    252                     LLVMValueRef stencilVals,
    253                     LLVMValueRef mask,
    254                     LLVMValueRef front_facing)
    255 
    256 {
    257    LLVMBuilderRef builder = bld->gallivm->builder;
    258    LLVMValueRef res;
    259 
    260    assert(stencil[0].enabled);
    261 
    262    /* do front face op */
    263    res = lp_build_stencil_op_single(bld, &stencil[0], op,
    264                                      stencilRefs[0], stencilVals);
    265 
    266    if (stencil[1].enabled && front_facing != NULL) {
    267       /* do back face op */
    268       LLVMValueRef back_res;
    269 
    270       back_res = lp_build_stencil_op_single(bld, &stencil[1], op,
    271                                             stencilRefs[1], stencilVals);
    272 
    273       res = lp_build_select(bld, front_facing, res, back_res);
    274    }
    275 
    276    if (stencil[0].writemask != 0xff ||
    277        (stencil[1].enabled && front_facing != NULL && stencil[1].writemask != 0xff)) {
    278       /* mask &= stencil[0].writemask */
    279       LLVMValueRef writemask = lp_build_const_int_vec(bld->gallivm, bld->type,
    280                                                       stencil[0].writemask);
    281       if (stencil[1].enabled && stencil[1].writemask != stencil[0].writemask && front_facing != NULL) {
    282          LLVMValueRef back_writemask = lp_build_const_int_vec(bld->gallivm, bld->type,
    283                                                          stencil[1].writemask);
    284          writemask = lp_build_select(bld, front_facing, writemask, back_writemask);
    285       }
    286 
    287       mask = LLVMBuildAnd(builder, mask, writemask, "");
    288       /* res = (res & mask) | (stencilVals & ~mask) */
    289       res = lp_build_select_bitwise(bld, mask, res, stencilVals);
    290    }
    291    else {
    292       /* res = mask ? res : stencilVals */
    293       res = lp_build_select(bld, mask, res, stencilVals);
    294    }
    295 
    296    return res;
    297 }
    298 
    299 
    300 
    301 /**
    302  * Return a type appropriate for depth/stencil testing.
    303  */
    304 struct lp_type
    305 lp_depth_type(const struct util_format_description *format_desc,
    306               unsigned length)
    307 {
    308    struct lp_type type;
    309    unsigned swizzle;
    310 
    311    assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS);
    312    assert(format_desc->block.width == 1);
    313    assert(format_desc->block.height == 1);
    314 
    315    swizzle = format_desc->swizzle[0];
    316    assert(swizzle < 4);
    317 
    318    memset(&type, 0, sizeof type);
    319    type.width = format_desc->block.bits;
    320 
    321    if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_FLOAT) {
    322       type.floating = TRUE;
    323       assert(swizzle == 0);
    324       assert(format_desc->channel[swizzle].size == format_desc->block.bits);
    325    }
    326    else if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED) {
    327       assert(format_desc->block.bits <= 32);
    328       assert(format_desc->channel[swizzle].normalized);
    329       if (format_desc->channel[swizzle].size < format_desc->block.bits) {
    330          /* Prefer signed integers when possible, as SSE has less support
    331           * for unsigned comparison;
    332           */
    333          type.sign = TRUE;
    334       }
    335    }
    336    else
    337       assert(0);
    338 
    339    assert(type.width <= length);
    340    type.length = length / type.width;
    341 
    342    return type;
    343 }
    344 
    345 
    346 /**
    347  * Compute bitmask and bit shift to apply to the incoming fragment Z values
    348  * and the Z buffer values needed before doing the Z comparison.
    349  *
    350  * Note that we leave the Z bits in the position that we find them
    351  * in the Z buffer (typically 0xffffff00 or 0x00ffffff).  That lets us
    352  * get by with fewer bit twiddling steps.
    353  */
    354 static boolean
    355 get_z_shift_and_mask(const struct util_format_description *format_desc,
    356                      unsigned *shift, unsigned *width, unsigned *mask)
    357 {
    358    const unsigned total_bits = format_desc->block.bits;
    359    unsigned z_swizzle;
    360    unsigned chan;
    361    unsigned padding_left, padding_right;
    362 
    363    assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS);
    364    assert(format_desc->block.width == 1);
    365    assert(format_desc->block.height == 1);
    366 
    367    z_swizzle = format_desc->swizzle[0];
    368 
    369    if (z_swizzle == UTIL_FORMAT_SWIZZLE_NONE)
    370       return FALSE;
    371 
    372    *width = format_desc->channel[z_swizzle].size;
    373 
    374    padding_right = 0;
    375    for (chan = 0; chan < z_swizzle; ++chan)
    376       padding_right += format_desc->channel[chan].size;
    377 
    378    padding_left =
    379       total_bits - (padding_right + *width);
    380 
    381    if (padding_left || padding_right) {
    382       unsigned long long mask_left = (1ULL << (total_bits - padding_left)) - 1;
    383       unsigned long long mask_right = (1ULL << (padding_right)) - 1;
    384       *mask = mask_left ^ mask_right;
    385    }
    386    else {
    387       *mask = 0xffffffff;
    388    }
    389 
    390    *shift = padding_right;
    391 
    392    return TRUE;
    393 }
    394 
    395 
    396 /**
    397  * Compute bitmask and bit shift to apply to the framebuffer pixel values
    398  * to put the stencil bits in the least significant position.
    399  * (i.e. 0x000000ff)
    400  */
    401 static boolean
    402 get_s_shift_and_mask(const struct util_format_description *format_desc,
    403                      unsigned *shift, unsigned *mask)
    404 {
    405    unsigned s_swizzle;
    406    unsigned chan, sz;
    407 
    408    s_swizzle = format_desc->swizzle[1];
    409 
    410    if (s_swizzle == UTIL_FORMAT_SWIZZLE_NONE)
    411       return FALSE;
    412 
    413    *shift = 0;
    414    for (chan = 0; chan < s_swizzle; chan++)
    415       *shift += format_desc->channel[chan].size;
    416 
    417    sz = format_desc->channel[s_swizzle].size;
    418    *mask = (1U << sz) - 1U;
    419 
    420    return TRUE;
    421 }
    422 
    423 
    424 /**
    425  * Perform the occlusion test and increase the counter.
    426  * Test the depth mask. Add the number of channel which has none zero mask
    427  * into the occlusion counter. e.g. maskvalue is {-1, -1, -1, -1}.
    428  * The counter will add 4.
    429  *
    430  * \param type holds element type of the mask vector.
    431  * \param maskvalue is the depth test mask.
    432  * \param counter is a pointer of the uint32 counter.
    433  */
    434 void
    435 lp_build_occlusion_count(struct gallivm_state *gallivm,
    436                          struct lp_type type,
    437                          LLVMValueRef maskvalue,
    438                          LLVMValueRef counter)
    439 {
    440    LLVMBuilderRef builder = gallivm->builder;
    441    LLVMContextRef context = gallivm->context;
    442    LLVMValueRef countmask = lp_build_const_int_vec(gallivm, type, 1);
    443    LLVMValueRef count, newcount;
    444 
    445    assert(type.length <= 16);
    446    assert(type.floating);
    447 
    448    if(util_cpu_caps.has_sse && type.length == 4) {
    449       const char *movmskintr = "llvm.x86.sse.movmsk.ps";
    450       const char *popcntintr = "llvm.ctpop.i32";
    451       LLVMValueRef bits = LLVMBuildBitCast(builder, maskvalue,
    452                                            lp_build_vec_type(gallivm, type), "");
    453       bits = lp_build_intrinsic_unary(builder, movmskintr,
    454                                       LLVMInt32TypeInContext(context), bits);
    455       count = lp_build_intrinsic_unary(builder, popcntintr,
    456                                        LLVMInt32TypeInContext(context), bits);
    457    }
    458    else if(util_cpu_caps.has_avx && type.length == 8) {
    459       const char *movmskintr = "llvm.x86.avx.movmsk.ps.256";
    460       const char *popcntintr = "llvm.ctpop.i32";
    461       LLVMValueRef bits = LLVMBuildBitCast(builder, maskvalue,
    462                                            lp_build_vec_type(gallivm, type), "");
    463       bits = lp_build_intrinsic_unary(builder, movmskintr,
    464                                       LLVMInt32TypeInContext(context), bits);
    465       count = lp_build_intrinsic_unary(builder, popcntintr,
    466                                        LLVMInt32TypeInContext(context), bits);
    467    }
    468    else {
    469       unsigned i;
    470       LLVMValueRef countv = LLVMBuildAnd(builder, maskvalue, countmask, "countv");
    471       LLVMTypeRef counttype = LLVMIntTypeInContext(context, type.length * 8);
    472       LLVMTypeRef i8vntype = LLVMVectorType(LLVMInt8TypeInContext(context), type.length * 4);
    473       LLVMValueRef shufflev, countd;
    474       LLVMValueRef shuffles[16];
    475       const char *popcntintr = NULL;
    476 
    477       countv = LLVMBuildBitCast(builder, countv, i8vntype, "");
    478 
    479        for (i = 0; i < type.length; i++) {
    480           shuffles[i] = lp_build_const_int32(gallivm, 4*i);
    481        }
    482 
    483        shufflev = LLVMConstVector(shuffles, type.length);
    484        countd = LLVMBuildShuffleVector(builder, countv, LLVMGetUndef(i8vntype), shufflev, "");
    485        countd = LLVMBuildBitCast(builder, countd, counttype, "countd");
    486 
    487        /*
    488         * XXX FIXME
    489         * this is bad on cpus without popcount (on x86 supported by intel
    490         * nehalem, amd barcelona, and up - not tied to sse42).
    491         * Would be much faster to just sum the 4 elements of the vector with
    492         * some horizontal add (shuffle/add/shuffle/add after the initial and).
    493         */
    494        switch (type.length) {
    495        case 4:
    496           popcntintr = "llvm.ctpop.i32";
    497           break;
    498        case 8:
    499           popcntintr = "llvm.ctpop.i64";
    500           break;
    501        case 16:
    502           popcntintr = "llvm.ctpop.i128";
    503           break;
    504        default:
    505           assert(0);
    506        }
    507        count = lp_build_intrinsic_unary(builder, popcntintr, counttype, countd);
    508 
    509        if (type.length > 4) {
    510           count = LLVMBuildTrunc(builder, count, LLVMIntTypeInContext(context, 32), "");
    511        }
    512    }
    513    newcount = LLVMBuildLoad(builder, counter, "origcount");
    514    newcount = LLVMBuildAdd(builder, newcount, count, "newcount");
    515    LLVMBuildStore(builder, newcount, counter);
    516 }
    517 
    518 
    519 
    520 /**
    521  * Generate code for performing depth and/or stencil tests.
    522  * We operate on a vector of values (typically n 2x2 quads).
    523  *
    524  * \param depth  the depth test state
    525  * \param stencil  the front/back stencil state
    526  * \param type  the data type of the fragment depth/stencil values
    527  * \param format_desc  description of the depth/stencil surface
    528  * \param mask  the alive/dead pixel mask for the quad (vector)
    529  * \param stencil_refs  the front/back stencil ref values (scalar)
    530  * \param z_src  the incoming depth/stencil values (n 2x2 quad values, float32)
    531  * \param zs_dst_ptr  pointer to depth/stencil values in framebuffer
    532  * \param face  contains boolean value indicating front/back facing polygon
    533  */
    534 void
    535 lp_build_depth_stencil_test(struct gallivm_state *gallivm,
    536                             const struct pipe_depth_state *depth,
    537                             const struct pipe_stencil_state stencil[2],
    538                             struct lp_type z_src_type,
    539                             const struct util_format_description *format_desc,
    540                             struct lp_build_mask_context *mask,
    541                             LLVMValueRef stencil_refs[2],
    542                             LLVMValueRef z_src,
    543                             LLVMValueRef zs_dst_ptr,
    544                             LLVMValueRef face,
    545                             LLVMValueRef *zs_value,
    546                             boolean do_branch)
    547 {
    548    LLVMBuilderRef builder = gallivm->builder;
    549    struct lp_type z_type;
    550    struct lp_build_context z_bld;
    551    struct lp_build_context s_bld;
    552    struct lp_type s_type;
    553    unsigned z_shift = 0, z_width = 0, z_mask = 0;
    554    LLVMValueRef zs_dst, z_dst = NULL;
    555    LLVMValueRef stencil_vals = NULL;
    556    LLVMValueRef z_bitmask = NULL, stencil_shift = NULL;
    557    LLVMValueRef z_pass = NULL, s_pass_mask = NULL;
    558    LLVMValueRef orig_mask = lp_build_mask_value(mask);
    559    LLVMValueRef front_facing = NULL;
    560 
    561 
    562    /*
    563     * Depths are expected to be between 0 and 1, even if they are stored in
    564     * floats. Setting these bits here will ensure that the lp_build_conv() call
    565     * below won't try to unnecessarily clamp the incoming values.
    566     */
    567    if(z_src_type.floating) {
    568       z_src_type.sign = FALSE;
    569       z_src_type.norm = TRUE;
    570    }
    571    else {
    572       assert(!z_src_type.sign);
    573       assert(z_src_type.norm);
    574    }
    575 
    576    /* Pick the depth type. */
    577    z_type = lp_depth_type(format_desc, z_src_type.width*z_src_type.length);
    578 
    579    /* FIXME: Cope with a depth test type with a different bit width. */
    580    assert(z_type.width == z_src_type.width);
    581    assert(z_type.length == z_src_type.length);
    582 
    583    /* FIXME: for non-float depth/stencil might generate better code
    584     * if we'd always split it up to use 128bit operations.
    585     * For stencil we'd almost certainly want to pack to 8xi16 values,
    586     * for z just run twice.
    587     */
    588 
    589    /* Sanity checking */
    590    {
    591       const unsigned z_swizzle = format_desc->swizzle[0];
    592       const unsigned s_swizzle = format_desc->swizzle[1];
    593 
    594       assert(z_swizzle != UTIL_FORMAT_SWIZZLE_NONE ||
    595              s_swizzle != UTIL_FORMAT_SWIZZLE_NONE);
    596 
    597       assert(depth->enabled || stencil[0].enabled);
    598 
    599       assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS);
    600       assert(format_desc->block.width == 1);
    601       assert(format_desc->block.height == 1);
    602 
    603       if (stencil[0].enabled) {
    604          assert(format_desc->format == PIPE_FORMAT_Z24_UNORM_S8_UINT ||
    605                 format_desc->format == PIPE_FORMAT_S8_UINT_Z24_UNORM);
    606       }
    607 
    608       assert(z_swizzle < 4);
    609       assert(format_desc->block.bits == z_type.width);
    610       if (z_type.floating) {
    611          assert(z_swizzle == 0);
    612          assert(format_desc->channel[z_swizzle].type ==
    613                 UTIL_FORMAT_TYPE_FLOAT);
    614          assert(format_desc->channel[z_swizzle].size ==
    615                 format_desc->block.bits);
    616       }
    617       else {
    618          assert(format_desc->channel[z_swizzle].type ==
    619                 UTIL_FORMAT_TYPE_UNSIGNED);
    620          assert(format_desc->channel[z_swizzle].normalized);
    621          assert(!z_type.fixed);
    622       }
    623    }
    624 
    625 
    626    /* Setup build context for Z vals */
    627    lp_build_context_init(&z_bld, gallivm, z_type);
    628 
    629    /* Setup build context for stencil vals */
    630    s_type = lp_int_type(z_type);
    631    lp_build_context_init(&s_bld, gallivm, s_type);
    632 
    633    /* Load current z/stencil value from z/stencil buffer */
    634    zs_dst_ptr = LLVMBuildBitCast(builder,
    635                                  zs_dst_ptr,
    636                                  LLVMPointerType(z_bld.vec_type, 0), "");
    637    zs_dst = LLVMBuildLoad(builder, zs_dst_ptr, "");
    638 
    639    lp_build_name(zs_dst, "zs_dst");
    640 
    641 
    642    /* Compute and apply the Z/stencil bitmasks and shifts.
    643     */
    644    {
    645       unsigned s_shift, s_mask;
    646 
    647       if (get_z_shift_and_mask(format_desc, &z_shift, &z_width, &z_mask)) {
    648          if (z_mask != 0xffffffff) {
    649             z_bitmask = lp_build_const_int_vec(gallivm, z_type, z_mask);
    650          }
    651 
    652          /*
    653           * Align the framebuffer Z 's LSB to the right.
    654           */
    655          if (z_shift) {
    656             LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_type, z_shift);
    657             z_dst = LLVMBuildLShr(builder, zs_dst, shift, "z_dst");
    658          } else if (z_bitmask) {
    659 	    /* TODO: Instead of loading a mask from memory and ANDing, it's
    660 	     * probably faster to just shake the bits with two shifts. */
    661             z_dst = LLVMBuildAnd(builder, zs_dst, z_bitmask, "z_dst");
    662          } else {
    663             z_dst = zs_dst;
    664             lp_build_name(z_dst, "z_dst");
    665          }
    666       }
    667 
    668       if (get_s_shift_and_mask(format_desc, &s_shift, &s_mask)) {
    669          if (s_shift) {
    670             LLVMValueRef shift = lp_build_const_int_vec(gallivm, s_type, s_shift);
    671             stencil_vals = LLVMBuildLShr(builder, zs_dst, shift, "");
    672             stencil_shift = shift;  /* used below */
    673          }
    674          else {
    675             stencil_vals = zs_dst;
    676          }
    677 
    678          if (s_mask != 0xffffffff) {
    679             LLVMValueRef mask = lp_build_const_int_vec(gallivm, s_type, s_mask);
    680             stencil_vals = LLVMBuildAnd(builder, stencil_vals, mask, "");
    681          }
    682 
    683          lp_build_name(stencil_vals, "s_dst");
    684       }
    685    }
    686 
    687    if (stencil[0].enabled) {
    688 
    689       if (face) {
    690          LLVMValueRef zero = lp_build_const_int32(gallivm, 0);
    691 
    692          /* front_facing = face != 0 ? ~0 : 0 */
    693          front_facing = LLVMBuildICmp(builder, LLVMIntNE, face, zero, "");
    694          front_facing = LLVMBuildSExt(builder, front_facing,
    695                                       LLVMIntTypeInContext(gallivm->context,
    696                                              s_bld.type.length*s_bld.type.width),
    697                                       "");
    698          front_facing = LLVMBuildBitCast(builder, front_facing,
    699                                          s_bld.int_vec_type, "");
    700       }
    701 
    702       /* convert scalar stencil refs into vectors */
    703       stencil_refs[0] = lp_build_broadcast_scalar(&s_bld, stencil_refs[0]);
    704       stencil_refs[1] = lp_build_broadcast_scalar(&s_bld, stencil_refs[1]);
    705 
    706       s_pass_mask = lp_build_stencil_test(&s_bld, stencil,
    707                                           stencil_refs, stencil_vals,
    708                                           front_facing);
    709 
    710       /* apply stencil-fail operator */
    711       {
    712          LLVMValueRef s_fail_mask = lp_build_andnot(&s_bld, orig_mask, s_pass_mask);
    713          stencil_vals = lp_build_stencil_op(&s_bld, stencil, S_FAIL_OP,
    714                                             stencil_refs, stencil_vals,
    715                                             s_fail_mask, front_facing);
    716       }
    717    }
    718 
    719    if (depth->enabled) {
    720       /*
    721        * Convert fragment Z to the desired type, aligning the LSB to the right.
    722        */
    723 
    724       assert(z_type.width == z_src_type.width);
    725       assert(z_type.length == z_src_type.length);
    726       assert(lp_check_value(z_src_type, z_src));
    727       if (z_src_type.floating) {
    728          /*
    729           * Convert from floating point values
    730           */
    731 
    732          if (!z_type.floating) {
    733             z_src = lp_build_clamped_float_to_unsigned_norm(gallivm,
    734                                                             z_src_type,
    735                                                             z_width,
    736                                                             z_src);
    737          }
    738       } else {
    739          /*
    740           * Convert from unsigned normalized values.
    741           */
    742 
    743          assert(!z_src_type.sign);
    744          assert(!z_src_type.fixed);
    745          assert(z_src_type.norm);
    746          assert(!z_type.floating);
    747          if (z_src_type.width > z_width) {
    748             LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_src_type,
    749                                                         z_src_type.width - z_width);
    750             z_src = LLVMBuildLShr(builder, z_src, shift, "");
    751          }
    752       }
    753       assert(lp_check_value(z_type, z_src));
    754 
    755       lp_build_name(z_src, "z_src");
    756 
    757       /* compare src Z to dst Z, returning 'pass' mask */
    758       z_pass = lp_build_cmp(&z_bld, depth->func, z_src, z_dst);
    759 
    760       if (!stencil[0].enabled) {
    761          /* We can potentially skip all remaining operations here, but only
    762           * if stencil is disabled because we still need to update the stencil
    763           * buffer values.  Don't need to update Z buffer values.
    764           */
    765          lp_build_mask_update(mask, z_pass);
    766 
    767          if (do_branch) {
    768             lp_build_mask_check(mask);
    769             do_branch = FALSE;
    770          }
    771       }
    772 
    773       if (depth->writemask) {
    774          LLVMValueRef zselectmask;
    775 
    776          /* mask off bits that failed Z test */
    777          zselectmask = LLVMBuildAnd(builder, orig_mask, z_pass, "");
    778 
    779          /* mask off bits that failed stencil test */
    780          if (s_pass_mask) {
    781             zselectmask = LLVMBuildAnd(builder, zselectmask, s_pass_mask, "");
    782          }
    783 
    784          /* Mix the old and new Z buffer values.
    785           * z_dst[i] = zselectmask[i] ? z_src[i] : z_dst[i]
    786           */
    787          z_dst = lp_build_select(&z_bld, zselectmask, z_src, z_dst);
    788       }
    789 
    790       if (stencil[0].enabled) {
    791          /* update stencil buffer values according to z pass/fail result */
    792          LLVMValueRef z_fail_mask, z_pass_mask;
    793 
    794          /* apply Z-fail operator */
    795          z_fail_mask = lp_build_andnot(&z_bld, orig_mask, z_pass);
    796          stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_FAIL_OP,
    797                                             stencil_refs, stencil_vals,
    798                                             z_fail_mask, front_facing);
    799 
    800          /* apply Z-pass operator */
    801          z_pass_mask = LLVMBuildAnd(builder, orig_mask, z_pass, "");
    802          stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP,
    803                                             stencil_refs, stencil_vals,
    804                                             z_pass_mask, front_facing);
    805       }
    806    }
    807    else {
    808       /* No depth test: apply Z-pass operator to stencil buffer values which
    809        * passed the stencil test.
    810        */
    811       s_pass_mask = LLVMBuildAnd(builder, orig_mask, s_pass_mask, "");
    812       stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP,
    813                                          stencil_refs, stencil_vals,
    814                                          s_pass_mask, front_facing);
    815    }
    816 
    817    /* Put Z and ztencil bits in the right place */
    818    if (z_dst && z_shift) {
    819       LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_type, z_shift);
    820       z_dst = LLVMBuildShl(builder, z_dst, shift, "");
    821    }
    822    if (stencil_vals && stencil_shift)
    823       stencil_vals = LLVMBuildShl(builder, stencil_vals,
    824                                   stencil_shift, "");
    825 
    826    /* Finally, merge/store the z/stencil values */
    827    if ((depth->enabled && depth->writemask) ||
    828        (stencil[0].enabled && stencil[0].writemask)) {
    829 
    830       if (z_dst && stencil_vals)
    831          zs_dst = LLVMBuildOr(builder, z_dst, stencil_vals, "");
    832       else if (z_dst)
    833          zs_dst = z_dst;
    834       else
    835          zs_dst = stencil_vals;
    836 
    837       *zs_value = zs_dst;
    838    }
    839 
    840    if (s_pass_mask)
    841       lp_build_mask_update(mask, s_pass_mask);
    842 
    843    if (depth->enabled && stencil[0].enabled)
    844       lp_build_mask_update(mask, z_pass);
    845 
    846    if (do_branch)
    847       lp_build_mask_check(mask);
    848 
    849 }
    850 
    851 
    852 void
    853 lp_build_depth_write(LLVMBuilderRef builder,
    854                      const struct util_format_description *format_desc,
    855                      LLVMValueRef zs_dst_ptr,
    856                      LLVMValueRef zs_value)
    857 {
    858    zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr,
    859                                  LLVMPointerType(LLVMTypeOf(zs_value), 0), "");
    860 
    861    LLVMBuildStore(builder, zs_value, zs_dst_ptr);
    862 }
    863 
    864 
    865 void
    866 lp_build_deferred_depth_write(struct gallivm_state *gallivm,
    867                               struct lp_type z_src_type,
    868                               const struct util_format_description *format_desc,
    869                               struct lp_build_mask_context *mask,
    870                               LLVMValueRef zs_dst_ptr,
    871                               LLVMValueRef zs_value)
    872 {
    873    struct lp_type z_type;
    874    struct lp_build_context z_bld;
    875    LLVMValueRef z_dst;
    876    LLVMBuilderRef builder = gallivm->builder;
    877 
    878    /* XXX: pointlessly redo type logic:
    879     */
    880    z_type = lp_depth_type(format_desc, z_src_type.width*z_src_type.length);
    881    lp_build_context_init(&z_bld, gallivm, z_type);
    882 
    883    zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr,
    884                                  LLVMPointerType(z_bld.vec_type, 0), "");
    885 
    886    z_dst = LLVMBuildLoad(builder, zs_dst_ptr, "zsbufval");
    887    z_dst = lp_build_select(&z_bld, lp_build_mask_value(mask), zs_value, z_dst);
    888 
    889    LLVMBuildStore(builder, z_dst, zs_dst_ptr);
    890 }
    891