Home | History | Annotate | Download | only in llvmpipe
      1 /**************************************************************************
      2  *
      3  * Copyright 2009 VMware, Inc.
      4  * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
      5  * All Rights Reserved.
      6  *
      7  * Permission is hereby granted, free of charge, to any person obtaining a
      8  * copy of this software and associated documentation files (the
      9  * "Software"), to deal in the Software without restriction, including
     10  * without limitation the rights to use, copy, modify, merge, publish,
     11  * distribute, sub license, and/or sell copies of the Software, and to
     12  * permit persons to whom the Software is furnished to do so, subject to
     13  * the following conditions:
     14  *
     15  * The above copyright notice and this permission notice (including the
     16  * next paragraph) shall be included in all copies or substantial portions
     17  * of the Software.
     18  *
     19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
     20  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
     22  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
     23  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
     24  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
     25  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     26  *
     27  **************************************************************************/
     28 
     29 /**
     30  * @file
     31  * Code generate the whole fragment pipeline.
     32  *
     33  * The fragment pipeline consists of the following stages:
     34  * - early depth test
     35  * - fragment shader
     36  * - alpha test
     37  * - depth/stencil test
     38  * - blending
     39  *
     40  * This file has only the glue to assemble the fragment pipeline.  The actual
     41  * plumbing of converting Gallium state into LLVM IR is done elsewhere, in the
     42  * lp_bld_*.[ch] files, and in a complete generic and reusable way. Here we
     43  * muster the LLVM JIT execution engine to create a function that follows an
     44  * established binary interface and that can be called from C directly.
     45  *
     46  * A big source of complexity here is that we often want to run different
     47  * stages with different precisions and data types and precisions. For example,
     48  * the fragment shader needs typically to be done in floats, but the
     49  * depth/stencil test and blending is better done in the type that most closely
     50  * matches the depth/stencil and color buffer respectively.
     51  *
     52  * Since the width of a SIMD vector register stays the same regardless of the
     53  * element type, different types imply different number of elements, so we must
     54  * code generate more instances of the stages with larger types to be able to
     55  * feed/consume the stages with smaller types.
     56  *
     57  * @author Jose Fonseca <jfonseca (at) vmware.com>
     58  */
     59 
     60 #include <limits.h>
     61 #include "pipe/p_defines.h"
     62 #include "util/u_inlines.h"
     63 #include "util/u_memory.h"
     64 #include "util/u_pointer.h"
     65 #include "util/u_format.h"
     66 #include "util/u_dump.h"
     67 #include "util/u_string.h"
     68 #include "util/u_simple_list.h"
     69 #include "os/os_time.h"
     70 #include "pipe/p_shader_tokens.h"
     71 #include "draw/draw_context.h"
     72 #include "tgsi/tgsi_dump.h"
     73 #include "tgsi/tgsi_scan.h"
     74 #include "tgsi/tgsi_parse.h"
     75 #include "gallivm/lp_bld_type.h"
     76 #include "gallivm/lp_bld_const.h"
     77 #include "gallivm/lp_bld_conv.h"
     78 #include "gallivm/lp_bld_init.h"
     79 #include "gallivm/lp_bld_intr.h"
     80 #include "gallivm/lp_bld_logic.h"
     81 #include "gallivm/lp_bld_tgsi.h"
     82 #include "gallivm/lp_bld_swizzle.h"
     83 #include "gallivm/lp_bld_flow.h"
     84 #include "gallivm/lp_bld_debug.h"
     85 
     86 #include "lp_bld_alpha.h"
     87 #include "lp_bld_blend.h"
     88 #include "lp_bld_depth.h"
     89 #include "lp_bld_interp.h"
     90 #include "lp_context.h"
     91 #include "lp_debug.h"
     92 #include "lp_perf.h"
     93 #include "lp_setup.h"
     94 #include "lp_state.h"
     95 #include "lp_tex_sample.h"
     96 #include "lp_flush.h"
     97 #include "lp_state_fs.h"
     98 
     99 
    100 /** Fragment shader number (for debugging) */
    101 static unsigned fs_no = 0;
    102 
    103 
    104 /**
    105  * Expand the relevant bits of mask_input to a n*4-dword mask for the
    106  * n*four pixels in n 2x2 quads.  This will set the n*four elements of the
    107  * quad mask vector to 0 or ~0.
    108  * Grouping is 01, 23 for 2 quad mode hence only 0 and 2 are valid
    109  * quad arguments with fs length 8.
    110  *
    111  * \param first_quad  which quad(s) of the quad group to test, in [0,3]
    112  * \param mask_input  bitwise mask for the whole 4x4 stamp
    113  */
    114 static LLVMValueRef
    115 generate_quad_mask(struct gallivm_state *gallivm,
    116                    struct lp_type fs_type,
    117                    unsigned first_quad,
    118                    LLVMValueRef mask_input) /* int32 */
    119 {
    120    LLVMBuilderRef builder = gallivm->builder;
    121    struct lp_type mask_type;
    122    LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
    123    LLVMValueRef bits[16];
    124    LLVMValueRef mask;
    125    int shift, i;
    126 
    127    /*
    128     * XXX: We'll need a different path for 16 x u8
    129     */
    130    assert(fs_type.width == 32);
    131    assert(fs_type.length <= Elements(bits));
    132    mask_type = lp_int_type(fs_type);
    133 
    134    /*
    135     * mask_input >>= (quad * 4)
    136     */
    137    switch (first_quad) {
    138    case 0:
    139       shift = 0;
    140       break;
    141    case 1:
    142       assert(fs_type.length == 4);
    143       shift = 2;
    144       break;
    145    case 2:
    146       shift = 8;
    147       break;
    148    case 3:
    149       assert(fs_type.length == 4);
    150       shift = 10;
    151       break;
    152    default:
    153       assert(0);
    154       shift = 0;
    155    }
    156 
    157    mask_input = LLVMBuildLShr(builder,
    158                               mask_input,
    159                               LLVMConstInt(i32t, shift, 0),
    160                               "");
    161 
    162    /*
    163     * mask = { mask_input & (1 << i), for i in [0,3] }
    164     */
    165    mask = lp_build_broadcast(gallivm,
    166                              lp_build_vec_type(gallivm, mask_type),
    167                              mask_input);
    168 
    169    for (i = 0; i < fs_type.length / 4; i++) {
    170       unsigned j = 2 * (i % 2) + (i / 2) * 8;
    171       bits[4*i + 0] = LLVMConstInt(i32t, 1 << (j + 0), 0);
    172       bits[4*i + 1] = LLVMConstInt(i32t, 1 << (j + 1), 0);
    173       bits[4*i + 2] = LLVMConstInt(i32t, 1 << (j + 4), 0);
    174       bits[4*i + 3] = LLVMConstInt(i32t, 1 << (j + 5), 0);
    175    }
    176    mask = LLVMBuildAnd(builder, mask, LLVMConstVector(bits, fs_type.length), "");
    177 
    178    /*
    179     * mask = mask != 0 ? ~0 : 0
    180     */
    181    mask = lp_build_compare(gallivm,
    182                            mask_type, PIPE_FUNC_NOTEQUAL,
    183                            mask,
    184                            lp_build_const_int_vec(gallivm, mask_type, 0));
    185 
    186    return mask;
    187 }
    188 
    189 
    190 #define EARLY_DEPTH_TEST  0x1
    191 #define LATE_DEPTH_TEST   0x2
    192 #define EARLY_DEPTH_WRITE 0x4
    193 #define LATE_DEPTH_WRITE  0x8
    194 
    195 static int
    196 find_output_by_semantic( const struct tgsi_shader_info *info,
    197 			 unsigned semantic,
    198 			 unsigned index )
    199 {
    200    int i;
    201 
    202    for (i = 0; i < info->num_outputs; i++)
    203       if (info->output_semantic_name[i] == semantic &&
    204 	  info->output_semantic_index[i] == index)
    205 	 return i;
    206 
    207    return -1;
    208 }
    209 
    210 
    211 /**
    212  * Generate the fragment shader, depth/stencil test, and alpha tests.
    213  * \param i  which quad in the tile, in range [0,3]
    214  * \param partial_mask  if 1, do mask_input testing
    215  */
    216 static void
    217 generate_fs(struct gallivm_state *gallivm,
    218             struct lp_fragment_shader *shader,
    219             const struct lp_fragment_shader_variant_key *key,
    220             LLVMBuilderRef builder,
    221             struct lp_type type,
    222             LLVMValueRef context_ptr,
    223             unsigned i,
    224             struct lp_build_interp_soa_context *interp,
    225             struct lp_build_sampler_soa *sampler,
    226             LLVMValueRef *pmask,
    227             LLVMValueRef (*color)[4],
    228             LLVMValueRef depth_ptr,
    229             LLVMValueRef facing,
    230             unsigned partial_mask,
    231             LLVMValueRef mask_input,
    232             LLVMValueRef counter)
    233 {
    234    const struct util_format_description *zs_format_desc = NULL;
    235    const struct tgsi_token *tokens = shader->base.tokens;
    236    LLVMTypeRef vec_type;
    237    LLVMValueRef consts_ptr;
    238    LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
    239    LLVMValueRef z;
    240    LLVMValueRef zs_value = NULL;
    241    LLVMValueRef stencil_refs[2];
    242    struct lp_build_mask_context mask;
    243    boolean simple_shader = (shader->info.base.file_count[TGSI_FILE_SAMPLER] == 0 &&
    244                             shader->info.base.num_inputs < 3 &&
    245                             shader->info.base.num_instructions < 8);
    246    unsigned attrib;
    247    unsigned chan;
    248    unsigned cbuf;
    249    unsigned depth_mode;
    250    struct lp_bld_tgsi_system_values system_values;
    251 
    252    memset(&system_values, 0, sizeof(system_values));
    253 
    254    if (key->depth.enabled ||
    255        key->stencil[0].enabled ||
    256        key->stencil[1].enabled) {
    257 
    258       zs_format_desc = util_format_description(key->zsbuf_format);
    259       assert(zs_format_desc);
    260 
    261       if (!shader->info.base.writes_z) {
    262          if (key->alpha.enabled || shader->info.base.uses_kill)
    263             /* With alpha test and kill, can do the depth test early
    264              * and hopefully eliminate some quads.  But need to do a
    265              * special deferred depth write once the final mask value
    266              * is known.
    267              */
    268             depth_mode = EARLY_DEPTH_TEST | LATE_DEPTH_WRITE;
    269          else
    270             depth_mode = EARLY_DEPTH_TEST | EARLY_DEPTH_WRITE;
    271       }
    272       else {
    273          depth_mode = LATE_DEPTH_TEST | LATE_DEPTH_WRITE;
    274       }
    275 
    276       if (!(key->depth.enabled && key->depth.writemask) &&
    277           !(key->stencil[0].enabled && key->stencil[0].writemask))
    278          depth_mode &= ~(LATE_DEPTH_WRITE | EARLY_DEPTH_WRITE);
    279    }
    280    else {
    281       depth_mode = 0;
    282    }
    283 
    284    assert(i < 4);
    285 
    286    stencil_refs[0] = lp_jit_context_stencil_ref_front_value(gallivm, context_ptr);
    287    stencil_refs[1] = lp_jit_context_stencil_ref_back_value(gallivm, context_ptr);
    288 
    289    vec_type = lp_build_vec_type(gallivm, type);
    290 
    291    consts_ptr = lp_jit_context_constants(gallivm, context_ptr);
    292 
    293    memset(outputs, 0, sizeof outputs);
    294 
    295    /* Declare the color and z variables */
    296    for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) {
    297       for(chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
    298          color[cbuf][chan] = lp_build_alloca(gallivm, vec_type, "color");
    299       }
    300    }
    301 
    302    /* do triangle edge testing */
    303    if (partial_mask) {
    304       *pmask = generate_quad_mask(gallivm, type,
    305                                   i*type.length/4, mask_input);
    306    }
    307    else {
    308       *pmask = lp_build_const_int_vec(gallivm, type, ~0);
    309    }
    310 
    311    /* 'mask' will control execution based on quad's pixel alive/killed state */
    312    lp_build_mask_begin(&mask, gallivm, type, *pmask);
    313 
    314    if (!(depth_mode & EARLY_DEPTH_TEST) && !simple_shader)
    315       lp_build_mask_check(&mask);
    316 
    317    lp_build_interp_soa_update_pos(interp, gallivm, i*type.length/4);
    318    z = interp->pos[2];
    319 
    320    if (depth_mode & EARLY_DEPTH_TEST) {
    321       lp_build_depth_stencil_test(gallivm,
    322                                   &key->depth,
    323                                   key->stencil,
    324                                   type,
    325                                   zs_format_desc,
    326                                   &mask,
    327                                   stencil_refs,
    328                                   z,
    329                                   depth_ptr, facing,
    330                                   &zs_value,
    331                                   !simple_shader);
    332 
    333       if (depth_mode & EARLY_DEPTH_WRITE) {
    334          lp_build_depth_write(builder, zs_format_desc, depth_ptr, zs_value);
    335       }
    336    }
    337 
    338    lp_build_interp_soa_update_inputs(interp, gallivm, i*type.length/4);
    339 
    340    /* Build the actual shader */
    341    lp_build_tgsi_soa(gallivm, tokens, type, &mask,
    342                      consts_ptr, &system_values,
    343                      interp->pos, interp->inputs,
    344                      outputs, sampler, &shader->info.base);
    345 
    346    /* Alpha test */
    347    if (key->alpha.enabled) {
    348       int color0 = find_output_by_semantic(&shader->info.base,
    349                                            TGSI_SEMANTIC_COLOR,
    350                                            0);
    351 
    352       if (color0 != -1 && outputs[color0][3]) {
    353          const struct util_format_description *cbuf_format_desc;
    354          LLVMValueRef alpha = LLVMBuildLoad(builder, outputs[color0][3], "alpha");
    355          LLVMValueRef alpha_ref_value;
    356 
    357          alpha_ref_value = lp_jit_context_alpha_ref_value(gallivm, context_ptr);
    358          alpha_ref_value = lp_build_broadcast(gallivm, vec_type, alpha_ref_value);
    359 
    360          cbuf_format_desc = util_format_description(key->cbuf_format[0]);
    361 
    362          lp_build_alpha_test(gallivm, key->alpha.func, type, cbuf_format_desc,
    363                              &mask, alpha, alpha_ref_value,
    364                              (depth_mode & LATE_DEPTH_TEST) != 0);
    365       }
    366    }
    367 
    368    /* Late Z test */
    369    if (depth_mode & LATE_DEPTH_TEST) {
    370       int pos0 = find_output_by_semantic(&shader->info.base,
    371                                          TGSI_SEMANTIC_POSITION,
    372                                          0);
    373 
    374       if (pos0 != -1 && outputs[pos0][2]) {
    375          z = LLVMBuildLoad(builder, outputs[pos0][2], "output.z");
    376       }
    377 
    378       lp_build_depth_stencil_test(gallivm,
    379                                   &key->depth,
    380                                   key->stencil,
    381                                   type,
    382                                   zs_format_desc,
    383                                   &mask,
    384                                   stencil_refs,
    385                                   z,
    386                                   depth_ptr, facing,
    387                                   &zs_value,
    388                                   !simple_shader);
    389       /* Late Z write */
    390       if (depth_mode & LATE_DEPTH_WRITE) {
    391          lp_build_depth_write(builder, zs_format_desc, depth_ptr, zs_value);
    392       }
    393    }
    394    else if ((depth_mode & EARLY_DEPTH_TEST) &&
    395             (depth_mode & LATE_DEPTH_WRITE))
    396    {
    397       /* Need to apply a reduced mask to the depth write.  Reload the
    398        * depth value, update from zs_value with the new mask value and
    399        * write that out.
    400        */
    401       lp_build_deferred_depth_write(gallivm,
    402                                     type,
    403                                     zs_format_desc,
    404                                     &mask,
    405                                     depth_ptr,
    406                                     zs_value);
    407    }
    408 
    409 
    410    /* Color write  */
    411    for (attrib = 0; attrib < shader->info.base.num_outputs; ++attrib)
    412    {
    413       if (shader->info.base.output_semantic_name[attrib] == TGSI_SEMANTIC_COLOR &&
    414           shader->info.base.output_semantic_index[attrib] < key->nr_cbufs)
    415       {
    416          unsigned cbuf = shader->info.base.output_semantic_index[attrib];
    417          for(chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
    418             if(outputs[attrib][chan]) {
    419                /* XXX: just initialize outputs to point at colors[] and
    420                 * skip this.
    421                 */
    422                LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], "");
    423                lp_build_name(out, "color%u.%u.%c", i, attrib, "rgba"[chan]);
    424                LLVMBuildStore(builder, out, color[cbuf][chan]);
    425             }
    426          }
    427       }
    428    }
    429 
    430    if (counter)
    431       lp_build_occlusion_count(gallivm, type,
    432                                lp_build_mask_value(&mask), counter);
    433 
    434    *pmask = lp_build_mask_end(&mask);
    435 }
    436 
    437 
    438 /**
    439  * Generate the fragment shader, depth/stencil test, and alpha tests.
    440  */
    441 static void
    442 generate_fs_loop(struct gallivm_state *gallivm,
    443                  struct lp_fragment_shader *shader,
    444                  const struct lp_fragment_shader_variant_key *key,
    445                  LLVMBuilderRef builder,
    446                  struct lp_type type,
    447                  LLVMValueRef context_ptr,
    448                  LLVMValueRef num_loop,
    449                  struct lp_build_interp_soa_context *interp,
    450                  struct lp_build_sampler_soa *sampler,
    451                  LLVMValueRef mask_store,
    452                  LLVMValueRef (*out_color)[4],
    453                  LLVMValueRef depth_ptr,
    454                  unsigned depth_bits,
    455                  LLVMValueRef facing,
    456                  LLVMValueRef counter)
    457 {
    458    const struct util_format_description *zs_format_desc = NULL;
    459    const struct tgsi_token *tokens = shader->base.tokens;
    460    LLVMTypeRef vec_type;
    461    LLVMValueRef mask_ptr, mask_val;
    462    LLVMValueRef consts_ptr;
    463    LLVMValueRef z;
    464    LLVMValueRef zs_value = NULL;
    465    LLVMValueRef stencil_refs[2];
    466    LLVMValueRef depth_ptr_i;
    467    LLVMValueRef depth_offset;
    468    LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
    469    struct lp_build_for_loop_state loop_state;
    470    struct lp_build_mask_context mask;
    471    boolean simple_shader = (shader->info.base.file_count[TGSI_FILE_SAMPLER] == 0 &&
    472                             shader->info.base.num_inputs < 3 &&
    473                             shader->info.base.num_instructions < 8);
    474    unsigned attrib;
    475    unsigned chan;
    476    unsigned cbuf;
    477    unsigned depth_mode;
    478 
    479    struct lp_bld_tgsi_system_values system_values;
    480 
    481    memset(&system_values, 0, sizeof(system_values));
    482 
    483    if (key->depth.enabled ||
    484        key->stencil[0].enabled ||
    485        key->stencil[1].enabled) {
    486 
    487       zs_format_desc = util_format_description(key->zsbuf_format);
    488       assert(zs_format_desc);
    489 
    490       if (!shader->info.base.writes_z) {
    491          if (key->alpha.enabled || shader->info.base.uses_kill)
    492             /* With alpha test and kill, can do the depth test early
    493              * and hopefully eliminate some quads.  But need to do a
    494              * special deferred depth write once the final mask value
    495              * is known.
    496              */
    497             depth_mode = EARLY_DEPTH_TEST | LATE_DEPTH_WRITE;
    498          else
    499             depth_mode = EARLY_DEPTH_TEST | EARLY_DEPTH_WRITE;
    500       }
    501       else {
    502          depth_mode = LATE_DEPTH_TEST | LATE_DEPTH_WRITE;
    503       }
    504 
    505       if (!(key->depth.enabled && key->depth.writemask) &&
    506           !(key->stencil[0].enabled && key->stencil[0].writemask))
    507          depth_mode &= ~(LATE_DEPTH_WRITE | EARLY_DEPTH_WRITE);
    508    }
    509    else {
    510       depth_mode = 0;
    511    }
    512 
    513 
    514    stencil_refs[0] = lp_jit_context_stencil_ref_front_value(gallivm, context_ptr);
    515    stencil_refs[1] = lp_jit_context_stencil_ref_back_value(gallivm, context_ptr);
    516 
    517    vec_type = lp_build_vec_type(gallivm, type);
    518 
    519    consts_ptr = lp_jit_context_constants(gallivm, context_ptr);
    520 
    521    lp_build_for_loop_begin(&loop_state, gallivm,
    522                            lp_build_const_int32(gallivm, 0),
    523                            LLVMIntULT,
    524                            num_loop,
    525                            lp_build_const_int32(gallivm, 1));
    526 
    527    mask_ptr = LLVMBuildGEP(builder, mask_store,
    528                            &loop_state.counter, 1, "mask_ptr");
    529    mask_val = LLVMBuildLoad(builder, mask_ptr, "");
    530 
    531    depth_offset = LLVMBuildMul(builder, loop_state.counter,
    532                                lp_build_const_int32(gallivm, depth_bits * type.length),
    533                                "");
    534 
    535    depth_ptr_i = LLVMBuildGEP(builder, depth_ptr, &depth_offset, 1, "");
    536 
    537    memset(outputs, 0, sizeof outputs);
    538 
    539    for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) {
    540       for(chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
    541          out_color[cbuf][chan] = lp_build_array_alloca(gallivm,
    542                                                        lp_build_vec_type(gallivm,
    543                                                                          type),
    544                                                        num_loop, "color");
    545       }
    546    }
    547 
    548 
    549 
    550    /* 'mask' will control execution based on quad's pixel alive/killed state */
    551    lp_build_mask_begin(&mask, gallivm, type, mask_val);
    552 
    553    if (!(depth_mode & EARLY_DEPTH_TEST) && !simple_shader)
    554       lp_build_mask_check(&mask);
    555 
    556    lp_build_interp_soa_update_pos_dyn(interp, gallivm, loop_state.counter);
    557    z = interp->pos[2];
    558 
    559    if (depth_mode & EARLY_DEPTH_TEST) {
    560       lp_build_depth_stencil_test(gallivm,
    561                                   &key->depth,
    562                                   key->stencil,
    563                                   type,
    564                                   zs_format_desc,
    565                                   &mask,
    566                                   stencil_refs,
    567                                   z,
    568                                   depth_ptr_i, facing,
    569                                   &zs_value,
    570                                   !simple_shader);
    571 
    572       if (depth_mode & EARLY_DEPTH_WRITE) {
    573          lp_build_depth_write(builder, zs_format_desc, depth_ptr_i, zs_value);
    574       }
    575    }
    576 
    577    lp_build_interp_soa_update_inputs_dyn(interp, gallivm, loop_state.counter);
    578 
    579    /* Build the actual shader */
    580    lp_build_tgsi_soa(gallivm, tokens, type, &mask,
    581                      consts_ptr, &system_values,
    582                      interp->pos, interp->inputs,
    583                      outputs, sampler, &shader->info.base);
    584 
    585    /* Alpha test */
    586    if (key->alpha.enabled) {
    587       int color0 = find_output_by_semantic(&shader->info.base,
    588                                            TGSI_SEMANTIC_COLOR,
    589                                            0);
    590 
    591       if (color0 != -1 && outputs[color0][3]) {
    592          const struct util_format_description *cbuf_format_desc;
    593          LLVMValueRef alpha = LLVMBuildLoad(builder, outputs[color0][3], "alpha");
    594          LLVMValueRef alpha_ref_value;
    595 
    596          alpha_ref_value = lp_jit_context_alpha_ref_value(gallivm, context_ptr);
    597          alpha_ref_value = lp_build_broadcast(gallivm, vec_type, alpha_ref_value);
    598 
    599          cbuf_format_desc = util_format_description(key->cbuf_format[0]);
    600 
    601          lp_build_alpha_test(gallivm, key->alpha.func, type, cbuf_format_desc,
    602                              &mask, alpha, alpha_ref_value,
    603                              (depth_mode & LATE_DEPTH_TEST) != 0);
    604       }
    605    }
    606 
    607    /* Late Z test */
    608    if (depth_mode & LATE_DEPTH_TEST) {
    609       int pos0 = find_output_by_semantic(&shader->info.base,
    610                                          TGSI_SEMANTIC_POSITION,
    611                                          0);
    612 
    613       if (pos0 != -1 && outputs[pos0][2]) {
    614          z = LLVMBuildLoad(builder, outputs[pos0][2], "output.z");
    615       }
    616 
    617       lp_build_depth_stencil_test(gallivm,
    618                                   &key->depth,
    619                                   key->stencil,
    620                                   type,
    621                                   zs_format_desc,
    622                                   &mask,
    623                                   stencil_refs,
    624                                   z,
    625                                   depth_ptr_i, facing,
    626                                   &zs_value,
    627                                   !simple_shader);
    628       /* Late Z write */
    629       if (depth_mode & LATE_DEPTH_WRITE) {
    630          lp_build_depth_write(builder, zs_format_desc, depth_ptr_i, zs_value);
    631       }
    632    }
    633    else if ((depth_mode & EARLY_DEPTH_TEST) &&
    634             (depth_mode & LATE_DEPTH_WRITE))
    635    {
    636       /* Need to apply a reduced mask to the depth write.  Reload the
    637        * depth value, update from zs_value with the new mask value and
    638        * write that out.
    639        */
    640       lp_build_deferred_depth_write(gallivm,
    641                                     type,
    642                                     zs_format_desc,
    643                                     &mask,
    644                                     depth_ptr_i,
    645                                     zs_value);
    646    }
    647 
    648 
    649    /* Color write  */
    650    for (attrib = 0; attrib < shader->info.base.num_outputs; ++attrib)
    651    {
    652       if (shader->info.base.output_semantic_name[attrib] == TGSI_SEMANTIC_COLOR &&
    653           shader->info.base.output_semantic_index[attrib] < key->nr_cbufs)
    654       {
    655          unsigned cbuf = shader->info.base.output_semantic_index[attrib];
    656          for(chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
    657             if(outputs[attrib][chan]) {
    658                /* XXX: just initialize outputs to point at colors[] and
    659                 * skip this.
    660                 */
    661                LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], "");
    662                LLVMValueRef color_ptr;
    663                color_ptr = LLVMBuildGEP(builder, out_color[cbuf][chan],
    664                                         &loop_state.counter, 1, "");
    665                lp_build_name(out, "color%u.%c", attrib, "rgba"[chan]);
    666                LLVMBuildStore(builder, out, color_ptr);
    667             }
    668          }
    669       }
    670    }
    671 
    672    if (key->occlusion_count) {
    673       lp_build_name(counter, "counter");
    674       lp_build_occlusion_count(gallivm, type,
    675                                lp_build_mask_value(&mask), counter);
    676    }
    677 
    678    mask_val = lp_build_mask_end(&mask);
    679    LLVMBuildStore(builder, mask_val, mask_ptr);
    680    lp_build_for_loop_end(&loop_state);
    681 }
    682 
    683 
    684 /**
    685  * Generate color blending and color output.
    686  * \param rt  the render target index (to index blend, colormask state)
    687  * \param type  the pixel color type
    688  * \param context_ptr  pointer to the runtime JIT context
    689  * \param mask  execution mask (active fragment/pixel mask)
    690  * \param src  colors from the fragment shader
    691  * \param dst_ptr  the destination color buffer pointer
    692  */
    693 static void
    694 generate_blend(struct gallivm_state *gallivm,
    695                const struct pipe_blend_state *blend,
    696                unsigned rt,
    697                LLVMBuilderRef builder,
    698                struct lp_type type,
    699                LLVMValueRef context_ptr,
    700                LLVMValueRef mask,
    701                LLVMValueRef *src,
    702                LLVMValueRef dst_ptr,
    703                boolean do_branch)
    704 {
    705    struct lp_build_context bld;
    706    struct lp_build_mask_context mask_ctx;
    707    LLVMTypeRef vec_type;
    708    LLVMValueRef const_ptr;
    709    LLVMValueRef con[4];
    710    LLVMValueRef dst[4];
    711    LLVMValueRef res[4];
    712    unsigned chan;
    713 
    714    lp_build_context_init(&bld, gallivm, type);
    715 
    716    lp_build_mask_begin(&mask_ctx, gallivm, type, mask);
    717    if (do_branch)
    718       lp_build_mask_check(&mask_ctx);
    719 
    720    vec_type = lp_build_vec_type(gallivm, type);
    721 
    722    const_ptr = lp_jit_context_blend_color(gallivm, context_ptr);
    723    const_ptr = LLVMBuildBitCast(builder, const_ptr,
    724                                 LLVMPointerType(vec_type, 0), "");
    725 
    726    /* load constant blend color and colors from the dest color buffer */
    727    for(chan = 0; chan < 4; ++chan) {
    728       LLVMValueRef index = lp_build_const_int32(gallivm, chan);
    729       con[chan] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, const_ptr, &index, 1, ""), "");
    730 
    731       dst[chan] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dst_ptr, &index, 1, ""), "");
    732 
    733       lp_build_name(con[chan], "con.%c", "rgba"[chan]);
    734       lp_build_name(dst[chan], "dst.%c", "rgba"[chan]);
    735    }
    736 
    737    /* do blend */
    738    lp_build_blend_soa(gallivm, blend, type, rt, src, dst, con, res);
    739 
    740    /* store results to color buffer */
    741    for(chan = 0; chan < 4; ++chan) {
    742       if(blend->rt[rt].colormask & (1 << chan)) {
    743          LLVMValueRef index = lp_build_const_int32(gallivm, chan);
    744          lp_build_name(res[chan], "res.%c", "rgba"[chan]);
    745          res[chan] = lp_build_select(&bld, mask, res[chan], dst[chan]);
    746          LLVMBuildStore(builder, res[chan], LLVMBuildGEP(builder, dst_ptr, &index, 1, ""));
    747       }
    748    }
    749 
    750    lp_build_mask_end(&mask_ctx);
    751 }
    752 
    753 
    754 /**
    755  * Generate the runtime callable function for the whole fragment pipeline.
    756  * Note that the function which we generate operates on a block of 16
    757  * pixels at at time.  The block contains 2x2 quads.  Each quad contains
    758  * 2x2 pixels.
    759  */
    760 static void
    761 generate_fragment(struct llvmpipe_context *lp,
    762                   struct lp_fragment_shader *shader,
    763                   struct lp_fragment_shader_variant *variant,
    764                   unsigned partial_mask)
    765 {
    766    struct gallivm_state *gallivm = variant->gallivm;
    767    const struct lp_fragment_shader_variant_key *key = &variant->key;
    768    struct lp_shader_input inputs[PIPE_MAX_SHADER_INPUTS];
    769    char func_name[256];
    770    struct lp_type fs_type;
    771    struct lp_type blend_type;
    772    LLVMTypeRef fs_elem_type;
    773    LLVMTypeRef blend_vec_type;
    774    LLVMTypeRef arg_types[11];
    775    LLVMTypeRef func_type;
    776    LLVMTypeRef int32_type = LLVMInt32TypeInContext(gallivm->context);
    777    LLVMTypeRef int8_type = LLVMInt8TypeInContext(gallivm->context);
    778    LLVMValueRef context_ptr;
    779    LLVMValueRef x;
    780    LLVMValueRef y;
    781    LLVMValueRef a0_ptr;
    782    LLVMValueRef dadx_ptr;
    783    LLVMValueRef dady_ptr;
    784    LLVMValueRef color_ptr_ptr;
    785    LLVMValueRef depth_ptr;
    786    LLVMValueRef mask_input;
    787    LLVMValueRef counter = NULL;
    788    LLVMBasicBlockRef block;
    789    LLVMBuilderRef builder;
    790    struct lp_build_sampler_soa *sampler;
    791    struct lp_build_interp_soa_context interp;
    792    LLVMValueRef fs_mask[16 / 4];
    793    LLVMValueRef fs_out_color[PIPE_MAX_COLOR_BUFS][TGSI_NUM_CHANNELS][16 / 4];
    794    LLVMValueRef blend_mask;
    795    LLVMValueRef function;
    796    LLVMValueRef facing;
    797    const struct util_format_description *zs_format_desc;
    798    unsigned num_fs;
    799    unsigned i;
    800    unsigned chan;
    801    unsigned cbuf;
    802    boolean cbuf0_write_all;
    803    boolean try_loop = TRUE;
    804 
    805    assert(lp_native_vector_width / 32 >= 4);
    806 
    807    /* Adjust color input interpolation according to flatshade state:
    808     */
    809    memcpy(inputs, shader->inputs, shader->info.base.num_inputs * sizeof inputs[0]);
    810    for (i = 0; i < shader->info.base.num_inputs; i++) {
    811       if (inputs[i].interp == LP_INTERP_COLOR) {
    812 	 if (key->flatshade)
    813 	    inputs[i].interp = LP_INTERP_CONSTANT;
    814 	 else
    815 	    inputs[i].interp = LP_INTERP_PERSPECTIVE;
    816       }
    817    }
    818 
    819    /* check if writes to cbuf[0] are to be copied to all cbufs */
    820    cbuf0_write_all = FALSE;
    821    for (i = 0;i < shader->info.base.num_properties; i++) {
    822       if (shader->info.base.properties[i].name ==
    823           TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS) {
    824          cbuf0_write_all = TRUE;
    825          break;
    826       }
    827    }
    828 
    829    /* TODO: actually pick these based on the fs and color buffer
    830     * characteristics. */
    831 
    832    memset(&fs_type, 0, sizeof fs_type);
    833    fs_type.floating = TRUE;      /* floating point values */
    834    fs_type.sign = TRUE;          /* values are signed */
    835    fs_type.norm = FALSE;         /* values are not limited to [0,1] or [-1,1] */
    836    fs_type.width = 32;           /* 32-bit float */
    837    fs_type.length = MIN2(lp_native_vector_width / 32, 16); /* n*4 elements per vector */
    838    num_fs = 16 / fs_type.length; /* number of loops per 4x4 stamp */
    839 
    840    memset(&blend_type, 0, sizeof blend_type);
    841    blend_type.floating = FALSE; /* values are integers */
    842    blend_type.sign = FALSE;     /* values are unsigned */
    843    blend_type.norm = TRUE;      /* values are in [0,1] or [-1,1] */
    844    blend_type.width = 8;        /* 8-bit ubyte values */
    845    blend_type.length = 16;      /* 16 elements per vector */
    846 
    847    /*
    848     * Generate the function prototype. Any change here must be reflected in
    849     * lp_jit.h's lp_jit_frag_func function pointer type, and vice-versa.
    850     */
    851 
    852    fs_elem_type = lp_build_elem_type(gallivm, fs_type);
    853 
    854    blend_vec_type = lp_build_vec_type(gallivm, blend_type);
    855 
    856    util_snprintf(func_name, sizeof(func_name), "fs%u_variant%u_%s",
    857 		 shader->no, variant->no, partial_mask ? "partial" : "whole");
    858 
    859    arg_types[0] = variant->jit_context_ptr_type;       /* context */
    860    arg_types[1] = int32_type;                          /* x */
    861    arg_types[2] = int32_type;                          /* y */
    862    arg_types[3] = int32_type;                          /* facing */
    863    arg_types[4] = LLVMPointerType(fs_elem_type, 0);    /* a0 */
    864    arg_types[5] = LLVMPointerType(fs_elem_type, 0);    /* dadx */
    865    arg_types[6] = LLVMPointerType(fs_elem_type, 0);    /* dady */
    866    arg_types[7] = LLVMPointerType(LLVMPointerType(blend_vec_type, 0), 0);  /* color */
    867    arg_types[8] = LLVMPointerType(int8_type, 0);       /* depth */
    868    arg_types[9] = int32_type;                          /* mask_input */
    869    arg_types[10] = LLVMPointerType(int32_type, 0);     /* counter */
    870 
    871    func_type = LLVMFunctionType(LLVMVoidTypeInContext(gallivm->context),
    872                                 arg_types, Elements(arg_types), 0);
    873 
    874    function = LLVMAddFunction(gallivm->module, func_name, func_type);
    875    LLVMSetFunctionCallConv(function, LLVMCCallConv);
    876 
    877    variant->function[partial_mask] = function;
    878 
    879    /* XXX: need to propagate noalias down into color param now we are
    880     * passing a pointer-to-pointer?
    881     */
    882    for(i = 0; i < Elements(arg_types); ++i)
    883       if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
    884          LLVMAddAttribute(LLVMGetParam(function, i), LLVMNoAliasAttribute);
    885 
    886    context_ptr  = LLVMGetParam(function, 0);
    887    x            = LLVMGetParam(function, 1);
    888    y            = LLVMGetParam(function, 2);
    889    facing       = LLVMGetParam(function, 3);
    890    a0_ptr       = LLVMGetParam(function, 4);
    891    dadx_ptr     = LLVMGetParam(function, 5);
    892    dady_ptr     = LLVMGetParam(function, 6);
    893    color_ptr_ptr = LLVMGetParam(function, 7);
    894    depth_ptr    = LLVMGetParam(function, 8);
    895    mask_input   = LLVMGetParam(function, 9);
    896 
    897    lp_build_name(context_ptr, "context");
    898    lp_build_name(x, "x");
    899    lp_build_name(y, "y");
    900    lp_build_name(a0_ptr, "a0");
    901    lp_build_name(dadx_ptr, "dadx");
    902    lp_build_name(dady_ptr, "dady");
    903    lp_build_name(color_ptr_ptr, "color_ptr_ptr");
    904    lp_build_name(depth_ptr, "depth");
    905    lp_build_name(mask_input, "mask_input");
    906 
    907    if (key->occlusion_count) {
    908       counter = LLVMGetParam(function, 10);
    909       lp_build_name(counter, "counter");
    910    }
    911 
    912    /*
    913     * Function body
    914     */
    915 
    916    block = LLVMAppendBasicBlockInContext(gallivm->context, function, "entry");
    917    builder = gallivm->builder;
    918    assert(builder);
    919    LLVMPositionBuilderAtEnd(builder, block);
    920 
    921    /* code generated texture sampling */
    922    sampler = lp_llvm_sampler_soa_create(key->sampler, context_ptr);
    923 
    924    zs_format_desc = util_format_description(key->zsbuf_format);
    925 
    926    if (!try_loop) {
    927       /*
    928        * The shader input interpolation info is not explicitely baked in the
    929        * shader key, but everything it derives from (TGSI, and flatshade) is
    930        * already included in the shader key.
    931        */
    932       lp_build_interp_soa_init(&interp,
    933                                gallivm,
    934                                shader->info.base.num_inputs,
    935                                inputs,
    936                                builder, fs_type,
    937                                FALSE,
    938                                a0_ptr, dadx_ptr, dady_ptr,
    939                                x, y);
    940 
    941       /* loop over quads in the block */
    942       for(i = 0; i < num_fs; ++i) {
    943          LLVMValueRef depth_offset = LLVMConstInt(int32_type,
    944                                                   i*fs_type.length*zs_format_desc->block.bits/8,
    945                                                   0);
    946          LLVMValueRef out_color[PIPE_MAX_COLOR_BUFS][TGSI_NUM_CHANNELS];
    947          LLVMValueRef depth_ptr_i;
    948 
    949          depth_ptr_i = LLVMBuildGEP(builder, depth_ptr, &depth_offset, 1, "");
    950 
    951          generate_fs(gallivm,
    952                      shader, key,
    953                      builder,
    954                      fs_type,
    955                      context_ptr,
    956                      i,
    957                      &interp,
    958                      sampler,
    959                      &fs_mask[i], /* output */
    960                      out_color,
    961                      depth_ptr_i,
    962                      facing,
    963                      partial_mask,
    964                      mask_input,
    965                      counter);
    966 
    967          for (cbuf = 0; cbuf < key->nr_cbufs; cbuf++)
    968             for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan)
    969                fs_out_color[cbuf][chan][i] =
    970                   out_color[cbuf * !cbuf0_write_all][chan];
    971       }
    972    }
    973    else {
    974       unsigned depth_bits = zs_format_desc->block.bits/8;
    975       LLVMValueRef num_loop = lp_build_const_int32(gallivm, num_fs);
    976       LLVMTypeRef mask_type = lp_build_int_vec_type(gallivm, fs_type);
    977       LLVMValueRef mask_store = lp_build_array_alloca(gallivm, mask_type,
    978                                                       num_loop, "mask_store");
    979       LLVMValueRef color_store[PIPE_MAX_COLOR_BUFS][TGSI_NUM_CHANNELS];
    980 
    981       /*
    982        * The shader input interpolation info is not explicitely baked in the
    983        * shader key, but everything it derives from (TGSI, and flatshade) is
    984        * already included in the shader key.
    985        */
    986       lp_build_interp_soa_init(&interp,
    987                                gallivm,
    988                                shader->info.base.num_inputs,
    989                                inputs,
    990                                builder, fs_type,
    991                                TRUE,
    992                                a0_ptr, dadx_ptr, dady_ptr,
    993                                x, y);
    994 
    995       for (i = 0; i < num_fs; i++) {
    996          LLVMValueRef mask;
    997          LLVMValueRef indexi = lp_build_const_int32(gallivm, i);
    998          LLVMValueRef mask_ptr = LLVMBuildGEP(builder, mask_store,
    999                                               &indexi, 1, "mask_ptr");
   1000 
   1001          if (partial_mask) {
   1002             mask = generate_quad_mask(gallivm, fs_type,
   1003                                       i*fs_type.length/4, mask_input);
   1004          }
   1005          else {
   1006             mask = lp_build_const_int_vec(gallivm, fs_type, ~0);
   1007          }
   1008          LLVMBuildStore(builder, mask, mask_ptr);
   1009       }
   1010 
   1011       generate_fs_loop(gallivm,
   1012                        shader, key,
   1013                        builder,
   1014                        fs_type,
   1015                        context_ptr,
   1016                        num_loop,
   1017                        &interp,
   1018                        sampler,
   1019                        mask_store, /* output */
   1020                        color_store,
   1021                        depth_ptr,
   1022                        depth_bits,
   1023                        facing,
   1024                        counter);
   1025 
   1026       for (i = 0; i < num_fs; i++) {
   1027          LLVMValueRef indexi = lp_build_const_int32(gallivm, i);
   1028          LLVMValueRef ptr = LLVMBuildGEP(builder, mask_store,
   1029                                          &indexi, 1, "");
   1030          fs_mask[i] = LLVMBuildLoad(builder, ptr, "mask");
   1031          /* This is fucked up need to reorganize things */
   1032          for (cbuf = 0; cbuf < key->nr_cbufs; cbuf++) {
   1033             for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
   1034                ptr = LLVMBuildGEP(builder,
   1035                                   color_store[cbuf * !cbuf0_write_all][chan],
   1036                                   &indexi, 1, "");
   1037                fs_out_color[cbuf][chan][i] = ptr;
   1038             }
   1039          }
   1040       }
   1041    }
   1042 
   1043    sampler->destroy(sampler);
   1044 
   1045    /* Loop over color outputs / color buffers to do blending.
   1046     */
   1047    for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) {
   1048       LLVMValueRef color_ptr;
   1049       LLVMValueRef index = lp_build_const_int32(gallivm, cbuf);
   1050       LLVMValueRef blend_in_color[TGSI_NUM_CHANNELS];
   1051       unsigned rt;
   1052 
   1053       /*
   1054        * Convert the fs's output color and mask to fit to the blending type.
   1055        */
   1056       for(chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
   1057          LLVMValueRef fs_color_vals[LP_MAX_VECTOR_LENGTH];
   1058 
   1059          for (i = 0; i < num_fs; i++) {
   1060             fs_color_vals[i] =
   1061                LLVMBuildLoad(builder, fs_out_color[cbuf][chan][i], "fs_color_vals");
   1062          }
   1063 
   1064          lp_build_conv(gallivm, fs_type, blend_type,
   1065                        fs_color_vals,
   1066                        num_fs,
   1067                        &blend_in_color[chan], 1);
   1068 
   1069          lp_build_name(blend_in_color[chan], "color%d.%c", cbuf, "rgba"[chan]);
   1070       }
   1071 
   1072       if (partial_mask || !variant->opaque) {
   1073          lp_build_conv_mask(variant->gallivm, fs_type, blend_type,
   1074                             fs_mask, num_fs,
   1075                             &blend_mask, 1);
   1076       } else {
   1077          blend_mask = lp_build_const_int_vec(variant->gallivm, blend_type, ~0);
   1078       }
   1079 
   1080       color_ptr = LLVMBuildLoad(builder,
   1081                                 LLVMBuildGEP(builder, color_ptr_ptr, &index, 1, ""),
   1082                                 "");
   1083       lp_build_name(color_ptr, "color_ptr%d", cbuf);
   1084 
   1085       /* which blend/colormask state to use */
   1086       rt = key->blend.independent_blend_enable ? cbuf : 0;
   1087 
   1088       /*
   1089        * Blending.
   1090        */
   1091       {
   1092          /* Could the 4x4 have been killed?
   1093           */
   1094          boolean do_branch = ((key->depth.enabled || key->stencil[0].enabled) &&
   1095                               !key->alpha.enabled &&
   1096                               !shader->info.base.uses_kill);
   1097 
   1098          generate_blend(variant->gallivm,
   1099                         &key->blend,
   1100                         rt,
   1101                         builder,
   1102                         blend_type,
   1103                         context_ptr,
   1104                         blend_mask,
   1105                         blend_in_color,
   1106                         color_ptr,
   1107                         do_branch);
   1108       }
   1109    }
   1110 
   1111    LLVMBuildRetVoid(builder);
   1112 
   1113    gallivm_verify_function(gallivm, function);
   1114 
   1115    variant->nr_instrs += lp_build_count_instructions(function);
   1116 }
   1117 
   1118 
   1119 static void
   1120 dump_fs_variant_key(const struct lp_fragment_shader_variant_key *key)
   1121 {
   1122    unsigned i;
   1123 
   1124    debug_printf("fs variant %p:\n", (void *) key);
   1125 
   1126    if (key->flatshade) {
   1127       debug_printf("flatshade = 1\n");
   1128    }
   1129    for (i = 0; i < key->nr_cbufs; ++i) {
   1130       debug_printf("cbuf_format[%u] = %s\n", i, util_format_name(key->cbuf_format[i]));
   1131    }
   1132    if (key->depth.enabled) {
   1133       debug_printf("depth.format = %s\n", util_format_name(key->zsbuf_format));
   1134       debug_printf("depth.func = %s\n", util_dump_func(key->depth.func, TRUE));
   1135       debug_printf("depth.writemask = %u\n", key->depth.writemask);
   1136    }
   1137 
   1138    for (i = 0; i < 2; ++i) {
   1139       if (key->stencil[i].enabled) {
   1140          debug_printf("stencil[%u].func = %s\n", i, util_dump_func(key->stencil[i].func, TRUE));
   1141          debug_printf("stencil[%u].fail_op = %s\n", i, util_dump_stencil_op(key->stencil[i].fail_op, TRUE));
   1142          debug_printf("stencil[%u].zpass_op = %s\n", i, util_dump_stencil_op(key->stencil[i].zpass_op, TRUE));
   1143          debug_printf("stencil[%u].zfail_op = %s\n", i, util_dump_stencil_op(key->stencil[i].zfail_op, TRUE));
   1144          debug_printf("stencil[%u].valuemask = 0x%x\n", i, key->stencil[i].valuemask);
   1145          debug_printf("stencil[%u].writemask = 0x%x\n", i, key->stencil[i].writemask);
   1146       }
   1147    }
   1148 
   1149    if (key->alpha.enabled) {
   1150       debug_printf("alpha.func = %s\n", util_dump_func(key->alpha.func, TRUE));
   1151    }
   1152 
   1153    if (key->occlusion_count) {
   1154       debug_printf("occlusion_count = 1\n");
   1155    }
   1156 
   1157    if (key->blend.logicop_enable) {
   1158       debug_printf("blend.logicop_func = %s\n", util_dump_logicop(key->blend.logicop_func, TRUE));
   1159    }
   1160    else if (key->blend.rt[0].blend_enable) {
   1161       debug_printf("blend.rgb_func = %s\n",   util_dump_blend_func  (key->blend.rt[0].rgb_func, TRUE));
   1162       debug_printf("blend.rgb_src_factor = %s\n",   util_dump_blend_factor(key->blend.rt[0].rgb_src_factor, TRUE));
   1163       debug_printf("blend.rgb_dst_factor = %s\n",   util_dump_blend_factor(key->blend.rt[0].rgb_dst_factor, TRUE));
   1164       debug_printf("blend.alpha_func = %s\n",       util_dump_blend_func  (key->blend.rt[0].alpha_func, TRUE));
   1165       debug_printf("blend.alpha_src_factor = %s\n", util_dump_blend_factor(key->blend.rt[0].alpha_src_factor, TRUE));
   1166       debug_printf("blend.alpha_dst_factor = %s\n", util_dump_blend_factor(key->blend.rt[0].alpha_dst_factor, TRUE));
   1167    }
   1168    debug_printf("blend.colormask = 0x%x\n", key->blend.rt[0].colormask);
   1169    for (i = 0; i < key->nr_samplers; ++i) {
   1170       debug_printf("sampler[%u] = \n", i);
   1171       debug_printf("  .format = %s\n",
   1172                    util_format_name(key->sampler[i].format));
   1173       debug_printf("  .target = %s\n",
   1174                    util_dump_tex_target(key->sampler[i].target, TRUE));
   1175       debug_printf("  .pot = %u %u %u\n",
   1176                    key->sampler[i].pot_width,
   1177                    key->sampler[i].pot_height,
   1178                    key->sampler[i].pot_depth);
   1179       debug_printf("  .wrap = %s %s %s\n",
   1180                    util_dump_tex_wrap(key->sampler[i].wrap_s, TRUE),
   1181                    util_dump_tex_wrap(key->sampler[i].wrap_t, TRUE),
   1182                    util_dump_tex_wrap(key->sampler[i].wrap_r, TRUE));
   1183       debug_printf("  .min_img_filter = %s\n",
   1184                    util_dump_tex_filter(key->sampler[i].min_img_filter, TRUE));
   1185       debug_printf("  .min_mip_filter = %s\n",
   1186                    util_dump_tex_mipfilter(key->sampler[i].min_mip_filter, TRUE));
   1187       debug_printf("  .mag_img_filter = %s\n",
   1188                    util_dump_tex_filter(key->sampler[i].mag_img_filter, TRUE));
   1189       if (key->sampler[i].compare_mode != PIPE_TEX_COMPARE_NONE)
   1190          debug_printf("  .compare_func = %s\n", util_dump_func(key->sampler[i].compare_func, TRUE));
   1191       debug_printf("  .normalized_coords = %u\n", key->sampler[i].normalized_coords);
   1192       debug_printf("  .min_max_lod_equal = %u\n", key->sampler[i].min_max_lod_equal);
   1193       debug_printf("  .lod_bias_non_zero = %u\n", key->sampler[i].lod_bias_non_zero);
   1194       debug_printf("  .apply_min_lod = %u\n", key->sampler[i].apply_min_lod);
   1195       debug_printf("  .apply_max_lod = %u\n", key->sampler[i].apply_max_lod);
   1196    }
   1197 }
   1198 
   1199 
   1200 void
   1201 lp_debug_fs_variant(const struct lp_fragment_shader_variant *variant)
   1202 {
   1203    debug_printf("llvmpipe: Fragment shader #%u variant #%u:\n",
   1204                 variant->shader->no, variant->no);
   1205    tgsi_dump(variant->shader->base.tokens, 0);
   1206    dump_fs_variant_key(&variant->key);
   1207    debug_printf("variant->opaque = %u\n", variant->opaque);
   1208    debug_printf("\n");
   1209 }
   1210 
   1211 
   1212 /**
   1213  * Generate a new fragment shader variant from the shader code and
   1214  * other state indicated by the key.
   1215  */
   1216 static struct lp_fragment_shader_variant *
   1217 generate_variant(struct llvmpipe_context *lp,
   1218                  struct lp_fragment_shader *shader,
   1219                  const struct lp_fragment_shader_variant_key *key)
   1220 {
   1221    struct lp_fragment_shader_variant *variant;
   1222    const struct util_format_description *cbuf0_format_desc;
   1223    boolean fullcolormask;
   1224 
   1225    variant = CALLOC_STRUCT(lp_fragment_shader_variant);
   1226    if(!variant)
   1227       return NULL;
   1228 
   1229    variant->gallivm = gallivm_create();
   1230    if (!variant->gallivm) {
   1231       FREE(variant);
   1232       return NULL;
   1233    }
   1234 
   1235    variant->shader = shader;
   1236    variant->list_item_global.base = variant;
   1237    variant->list_item_local.base = variant;
   1238    variant->no = shader->variants_created++;
   1239 
   1240    memcpy(&variant->key, key, shader->variant_key_size);
   1241 
   1242    /*
   1243     * Determine whether we are touching all channels in the color buffer.
   1244     */
   1245    fullcolormask = FALSE;
   1246    if (key->nr_cbufs == 1) {
   1247       cbuf0_format_desc = util_format_description(key->cbuf_format[0]);
   1248       fullcolormask = util_format_colormask_full(cbuf0_format_desc, key->blend.rt[0].colormask);
   1249    }
   1250 
   1251    variant->opaque =
   1252          !key->blend.logicop_enable &&
   1253          !key->blend.rt[0].blend_enable &&
   1254          fullcolormask &&
   1255          !key->stencil[0].enabled &&
   1256          !key->alpha.enabled &&
   1257          !key->depth.enabled &&
   1258          !shader->info.base.uses_kill
   1259          ? TRUE : FALSE;
   1260 
   1261 
   1262    if ((LP_DEBUG & DEBUG_FS) || (gallivm_debug & GALLIVM_DEBUG_IR)) {
   1263       lp_debug_fs_variant(variant);
   1264    }
   1265 
   1266    lp_jit_init_types(variant);
   1267 
   1268    if (variant->jit_function[RAST_EDGE_TEST] == NULL)
   1269       generate_fragment(lp, shader, variant, RAST_EDGE_TEST);
   1270 
   1271    if (variant->jit_function[RAST_WHOLE] == NULL) {
   1272       if (variant->opaque) {
   1273          /* Specialized shader, which doesn't need to read the color buffer. */
   1274          generate_fragment(lp, shader, variant, RAST_WHOLE);
   1275       }
   1276    }
   1277 
   1278    /*
   1279     * Compile everything
   1280     */
   1281 
   1282    gallivm_compile_module(variant->gallivm);
   1283 
   1284    if (variant->function[RAST_EDGE_TEST]) {
   1285       variant->jit_function[RAST_EDGE_TEST] = (lp_jit_frag_func)
   1286             gallivm_jit_function(variant->gallivm,
   1287                                  variant->function[RAST_EDGE_TEST]);
   1288    }
   1289 
   1290    if (variant->function[RAST_WHOLE]) {
   1291          variant->jit_function[RAST_WHOLE] = (lp_jit_frag_func)
   1292                gallivm_jit_function(variant->gallivm,
   1293                                     variant->function[RAST_WHOLE]);
   1294    } else if (!variant->jit_function[RAST_WHOLE]) {
   1295       variant->jit_function[RAST_WHOLE] = variant->jit_function[RAST_EDGE_TEST];
   1296    }
   1297 
   1298    return variant;
   1299 }
   1300 
   1301 
   1302 static void *
   1303 llvmpipe_create_fs_state(struct pipe_context *pipe,
   1304                          const struct pipe_shader_state *templ)
   1305 {
   1306    struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
   1307    struct lp_fragment_shader *shader;
   1308    int nr_samplers;
   1309    int i;
   1310 
   1311    shader = CALLOC_STRUCT(lp_fragment_shader);
   1312    if (!shader)
   1313       return NULL;
   1314 
   1315    shader->no = fs_no++;
   1316    make_empty_list(&shader->variants);
   1317 
   1318    /* get/save the summary info for this shader */
   1319    lp_build_tgsi_info(templ->tokens, &shader->info);
   1320 
   1321    /* we need to keep a local copy of the tokens */
   1322    shader->base.tokens = tgsi_dup_tokens(templ->tokens);
   1323 
   1324    shader->draw_data = draw_create_fragment_shader(llvmpipe->draw, templ);
   1325    if (shader->draw_data == NULL) {
   1326       FREE((void *) shader->base.tokens);
   1327       FREE(shader);
   1328       return NULL;
   1329    }
   1330 
   1331    nr_samplers = shader->info.base.file_max[TGSI_FILE_SAMPLER] + 1;
   1332 
   1333    shader->variant_key_size = Offset(struct lp_fragment_shader_variant_key,
   1334 				     sampler[nr_samplers]);
   1335 
   1336    for (i = 0; i < shader->info.base.num_inputs; i++) {
   1337       shader->inputs[i].usage_mask = shader->info.base.input_usage_mask[i];
   1338       shader->inputs[i].cyl_wrap = shader->info.base.input_cylindrical_wrap[i];
   1339 
   1340       switch (shader->info.base.input_interpolate[i]) {
   1341       case TGSI_INTERPOLATE_CONSTANT:
   1342 	 shader->inputs[i].interp = LP_INTERP_CONSTANT;
   1343 	 break;
   1344       case TGSI_INTERPOLATE_LINEAR:
   1345 	 shader->inputs[i].interp = LP_INTERP_LINEAR;
   1346 	 break;
   1347       case TGSI_INTERPOLATE_PERSPECTIVE:
   1348 	 shader->inputs[i].interp = LP_INTERP_PERSPECTIVE;
   1349 	 break;
   1350       case TGSI_INTERPOLATE_COLOR:
   1351 	 shader->inputs[i].interp = LP_INTERP_COLOR;
   1352 	 break;
   1353       default:
   1354 	 assert(0);
   1355 	 break;
   1356       }
   1357 
   1358       switch (shader->info.base.input_semantic_name[i]) {
   1359       case TGSI_SEMANTIC_FACE:
   1360 	 shader->inputs[i].interp = LP_INTERP_FACING;
   1361 	 break;
   1362       case TGSI_SEMANTIC_POSITION:
   1363 	 /* Position was already emitted above
   1364 	  */
   1365 	 shader->inputs[i].interp = LP_INTERP_POSITION;
   1366 	 shader->inputs[i].src_index = 0;
   1367 	 continue;
   1368       }
   1369 
   1370       shader->inputs[i].src_index = i+1;
   1371    }
   1372 
   1373    if (LP_DEBUG & DEBUG_TGSI) {
   1374       unsigned attrib;
   1375       debug_printf("llvmpipe: Create fragment shader #%u %p:\n",
   1376                    shader->no, (void *) shader);
   1377       tgsi_dump(templ->tokens, 0);
   1378       debug_printf("usage masks:\n");
   1379       for (attrib = 0; attrib < shader->info.base.num_inputs; ++attrib) {
   1380          unsigned usage_mask = shader->info.base.input_usage_mask[attrib];
   1381          debug_printf("  IN[%u].%s%s%s%s\n",
   1382                       attrib,
   1383                       usage_mask & TGSI_WRITEMASK_X ? "x" : "",
   1384                       usage_mask & TGSI_WRITEMASK_Y ? "y" : "",
   1385                       usage_mask & TGSI_WRITEMASK_Z ? "z" : "",
   1386                       usage_mask & TGSI_WRITEMASK_W ? "w" : "");
   1387       }
   1388       debug_printf("\n");
   1389    }
   1390 
   1391    return shader;
   1392 }
   1393 
   1394 
   1395 static void
   1396 llvmpipe_bind_fs_state(struct pipe_context *pipe, void *fs)
   1397 {
   1398    struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
   1399 
   1400    if (llvmpipe->fs == fs)
   1401       return;
   1402 
   1403    draw_flush(llvmpipe->draw);
   1404 
   1405    llvmpipe->fs = (struct lp_fragment_shader *) fs;
   1406 
   1407    draw_bind_fragment_shader(llvmpipe->draw,
   1408                              (llvmpipe->fs ? llvmpipe->fs->draw_data : NULL));
   1409 
   1410    llvmpipe->dirty |= LP_NEW_FS;
   1411 }
   1412 
   1413 
   1414 /**
   1415  * Remove shader variant from two lists: the shader's variant list
   1416  * and the context's variant list.
   1417  */
   1418 void
   1419 llvmpipe_remove_shader_variant(struct llvmpipe_context *lp,
   1420                                struct lp_fragment_shader_variant *variant)
   1421 {
   1422    unsigned i;
   1423 
   1424    if (gallivm_debug & GALLIVM_DEBUG_IR) {
   1425       debug_printf("llvmpipe: del fs #%u var #%u v created #%u v cached"
   1426                    " #%u v total cached #%u\n",
   1427                    variant->shader->no,
   1428                    variant->no,
   1429                    variant->shader->variants_created,
   1430                    variant->shader->variants_cached,
   1431                    lp->nr_fs_variants);
   1432    }
   1433 
   1434    /* free all the variant's JIT'd functions */
   1435    for (i = 0; i < Elements(variant->function); i++) {
   1436       if (variant->function[i]) {
   1437          gallivm_free_function(variant->gallivm,
   1438                                variant->function[i],
   1439                                variant->jit_function[i]);
   1440       }
   1441    }
   1442 
   1443    gallivm_destroy(variant->gallivm);
   1444 
   1445    /* remove from shader's list */
   1446    remove_from_list(&variant->list_item_local);
   1447    variant->shader->variants_cached--;
   1448 
   1449    /* remove from context's list */
   1450    remove_from_list(&variant->list_item_global);
   1451    lp->nr_fs_variants--;
   1452    lp->nr_fs_instrs -= variant->nr_instrs;
   1453 
   1454    FREE(variant);
   1455 }
   1456 
   1457 
   1458 static void
   1459 llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs)
   1460 {
   1461    struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
   1462    struct lp_fragment_shader *shader = fs;
   1463    struct lp_fs_variant_list_item *li;
   1464 
   1465    assert(fs != llvmpipe->fs);
   1466 
   1467    /*
   1468     * XXX: we need to flush the context until we have some sort of reference
   1469     * counting in fragment shaders as they may still be binned
   1470     * Flushing alone might not sufficient we need to wait on it too.
   1471     */
   1472    llvmpipe_finish(pipe, __FUNCTION__);
   1473 
   1474    /* Delete all the variants */
   1475    li = first_elem(&shader->variants);
   1476    while(!at_end(&shader->variants, li)) {
   1477       struct lp_fs_variant_list_item *next = next_elem(li);
   1478       llvmpipe_remove_shader_variant(llvmpipe, li->base);
   1479       li = next;
   1480    }
   1481 
   1482    /* Delete draw module's data */
   1483    draw_delete_fragment_shader(llvmpipe->draw, shader->draw_data);
   1484 
   1485    assert(shader->variants_cached == 0);
   1486    FREE((void *) shader->base.tokens);
   1487    FREE(shader);
   1488 }
   1489 
   1490 
   1491 
   1492 static void
   1493 llvmpipe_set_constant_buffer(struct pipe_context *pipe,
   1494                              uint shader, uint index,
   1495                              struct pipe_constant_buffer *cb)
   1496 {
   1497    struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
   1498    struct pipe_resource *constants = cb ? cb->buffer : NULL;
   1499    unsigned size;
   1500    const void *data;
   1501 
   1502    if (cb && cb->user_buffer) {
   1503       constants = llvmpipe_user_buffer_create(pipe->screen,
   1504                                               (void *) cb->user_buffer,
   1505                                               cb->buffer_size,
   1506                                               PIPE_BIND_CONSTANT_BUFFER);
   1507    }
   1508 
   1509    size = constants ? constants->width0 : 0;
   1510    data = constants ? llvmpipe_resource_data(constants) : NULL;
   1511 
   1512    assert(shader < PIPE_SHADER_TYPES);
   1513    assert(index < PIPE_MAX_CONSTANT_BUFFERS);
   1514 
   1515    if(llvmpipe->constants[shader][index] == constants)
   1516       return;
   1517 
   1518    draw_flush(llvmpipe->draw);
   1519 
   1520    /* note: reference counting */
   1521    pipe_resource_reference(&llvmpipe->constants[shader][index], constants);
   1522 
   1523    if(shader == PIPE_SHADER_VERTEX ||
   1524       shader == PIPE_SHADER_GEOMETRY) {
   1525       draw_set_mapped_constant_buffer(llvmpipe->draw, shader,
   1526                                       index, data, size);
   1527    }
   1528 
   1529    llvmpipe->dirty |= LP_NEW_CONSTANTS;
   1530 
   1531    if (cb && cb->user_buffer) {
   1532       pipe_resource_reference(&constants, NULL);
   1533    }
   1534 }
   1535 
   1536 
   1537 /**
   1538  * Return the blend factor equivalent to a destination alpha of one.
   1539  */
   1540 static INLINE unsigned
   1541 force_dst_alpha_one(unsigned factor)
   1542 {
   1543    switch(factor) {
   1544    case PIPE_BLENDFACTOR_DST_ALPHA:
   1545       return PIPE_BLENDFACTOR_ONE;
   1546    case PIPE_BLENDFACTOR_INV_DST_ALPHA:
   1547       return PIPE_BLENDFACTOR_ZERO;
   1548    case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
   1549       return PIPE_BLENDFACTOR_ZERO;
   1550    }
   1551 
   1552    return factor;
   1553 }
   1554 
   1555 
   1556 /**
   1557  * We need to generate several variants of the fragment pipeline to match
   1558  * all the combinations of the contributing state atoms.
   1559  *
   1560  * TODO: there is actually no reason to tie this to context state -- the
   1561  * generated code could be cached globally in the screen.
   1562  */
   1563 static void
   1564 make_variant_key(struct llvmpipe_context *lp,
   1565                  struct lp_fragment_shader *shader,
   1566                  struct lp_fragment_shader_variant_key *key)
   1567 {
   1568    unsigned i;
   1569 
   1570    memset(key, 0, shader->variant_key_size);
   1571 
   1572    if (lp->framebuffer.zsbuf) {
   1573       if (lp->depth_stencil->depth.enabled) {
   1574          key->zsbuf_format = lp->framebuffer.zsbuf->format;
   1575          memcpy(&key->depth, &lp->depth_stencil->depth, sizeof key->depth);
   1576       }
   1577       if (lp->depth_stencil->stencil[0].enabled) {
   1578          key->zsbuf_format = lp->framebuffer.zsbuf->format;
   1579          memcpy(&key->stencil, &lp->depth_stencil->stencil, sizeof key->stencil);
   1580       }
   1581    }
   1582 
   1583    key->alpha.enabled = lp->depth_stencil->alpha.enabled;
   1584    if(key->alpha.enabled)
   1585       key->alpha.func = lp->depth_stencil->alpha.func;
   1586    /* alpha.ref_value is passed in jit_context */
   1587 
   1588    key->flatshade = lp->rasterizer->flatshade;
   1589    if (lp->active_query_count) {
   1590       key->occlusion_count = TRUE;
   1591    }
   1592 
   1593    if (lp->framebuffer.nr_cbufs) {
   1594       memcpy(&key->blend, lp->blend, sizeof key->blend);
   1595    }
   1596 
   1597    key->nr_cbufs = lp->framebuffer.nr_cbufs;
   1598    for (i = 0; i < lp->framebuffer.nr_cbufs; i++) {
   1599       enum pipe_format format = lp->framebuffer.cbufs[i]->format;
   1600       struct pipe_rt_blend_state *blend_rt = &key->blend.rt[i];
   1601       const struct util_format_description *format_desc;
   1602 
   1603       key->cbuf_format[i] = format;
   1604 
   1605       format_desc = util_format_description(format);
   1606       assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
   1607              format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB);
   1608 
   1609       blend_rt->colormask = lp->blend->rt[i].colormask;
   1610 
   1611       /*
   1612        * Mask out color channels not present in the color buffer.
   1613        */
   1614       blend_rt->colormask &= util_format_colormask(format_desc);
   1615 
   1616       /*
   1617        * Our swizzled render tiles always have an alpha channel, but the linear
   1618        * render target format often does not, so force here the dst alpha to be
   1619        * one.
   1620        *
   1621        * This is not a mere optimization. Wrong results will be produced if the
   1622        * dst alpha is used, the dst format does not have alpha, and the previous
   1623        * rendering was not flushed from the swizzled to linear buffer. For
   1624        * example, NonPowTwo DCT.
   1625        *
   1626        * TODO: This should be generalized to all channels for better
   1627        * performance, but only alpha causes correctness issues.
   1628        *
   1629        * Also, force rgb/alpha func/factors match, to make AoS blending easier.
   1630        */
   1631       if (format_desc->swizzle[3] > UTIL_FORMAT_SWIZZLE_W ||
   1632 	  format_desc->swizzle[3] == format_desc->swizzle[0]) {
   1633          blend_rt->rgb_src_factor   = force_dst_alpha_one(blend_rt->rgb_src_factor);
   1634          blend_rt->rgb_dst_factor   = force_dst_alpha_one(blend_rt->rgb_dst_factor);
   1635          blend_rt->alpha_func       = blend_rt->rgb_func;
   1636          blend_rt->alpha_src_factor = blend_rt->rgb_src_factor;
   1637          blend_rt->alpha_dst_factor = blend_rt->rgb_dst_factor;
   1638       }
   1639    }
   1640 
   1641    /* This value will be the same for all the variants of a given shader:
   1642     */
   1643    key->nr_samplers = shader->info.base.file_max[TGSI_FILE_SAMPLER] + 1;
   1644 
   1645    for(i = 0; i < key->nr_samplers; ++i) {
   1646       if(shader->info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) {
   1647          lp_sampler_static_state(&key->sampler[i],
   1648 				 lp->sampler_views[PIPE_SHADER_FRAGMENT][i],
   1649 				 lp->samplers[PIPE_SHADER_FRAGMENT][i]);
   1650       }
   1651    }
   1652 }
   1653 
   1654 
   1655 
   1656 /**
   1657  * Update fragment shader state.  This is called just prior to drawing
   1658  * something when some fragment-related state has changed.
   1659  */
   1660 void
   1661 llvmpipe_update_fs(struct llvmpipe_context *lp)
   1662 {
   1663    struct lp_fragment_shader *shader = lp->fs;
   1664    struct lp_fragment_shader_variant_key key;
   1665    struct lp_fragment_shader_variant *variant = NULL;
   1666    struct lp_fs_variant_list_item *li;
   1667 
   1668    make_variant_key(lp, shader, &key);
   1669 
   1670    /* Search the variants for one which matches the key */
   1671    li = first_elem(&shader->variants);
   1672    while(!at_end(&shader->variants, li)) {
   1673       if(memcmp(&li->base->key, &key, shader->variant_key_size) == 0) {
   1674          variant = li->base;
   1675          break;
   1676       }
   1677       li = next_elem(li);
   1678    }
   1679 
   1680    if (variant) {
   1681       /* Move this variant to the head of the list to implement LRU
   1682        * deletion of shader's when we have too many.
   1683        */
   1684       move_to_head(&lp->fs_variants_list, &variant->list_item_global);
   1685    }
   1686    else {
   1687       /* variant not found, create it now */
   1688       int64_t t0, t1, dt;
   1689       unsigned i;
   1690       unsigned variants_to_cull;
   1691 
   1692       if (0) {
   1693          debug_printf("%u variants,\t%u instrs,\t%u instrs/variant\n",
   1694                       lp->nr_fs_variants,
   1695                       lp->nr_fs_instrs,
   1696                       lp->nr_fs_variants ? lp->nr_fs_instrs / lp->nr_fs_variants : 0);
   1697       }
   1698 
   1699       /* First, check if we've exceeded the max number of shader variants.
   1700        * If so, free 25% of them (the least recently used ones).
   1701        */
   1702       variants_to_cull = lp->nr_fs_variants >= LP_MAX_SHADER_VARIANTS ? LP_MAX_SHADER_VARIANTS / 4 : 0;
   1703 
   1704       if (variants_to_cull ||
   1705           lp->nr_fs_instrs >= LP_MAX_SHADER_INSTRUCTIONS) {
   1706          struct pipe_context *pipe = &lp->pipe;
   1707 
   1708          /*
   1709           * XXX: we need to flush the context until we have some sort of
   1710           * reference counting in fragment shaders as they may still be binned
   1711           * Flushing alone might not be sufficient we need to wait on it too.
   1712           */
   1713          llvmpipe_finish(pipe, __FUNCTION__);
   1714 
   1715          /*
   1716           * We need to re-check lp->nr_fs_variants because an arbitrarliy large
   1717           * number of shader variants (potentially all of them) could be
   1718           * pending for destruction on flush.
   1719           */
   1720 
   1721          for (i = 0; i < variants_to_cull || lp->nr_fs_instrs >= LP_MAX_SHADER_INSTRUCTIONS; i++) {
   1722             struct lp_fs_variant_list_item *item;
   1723             if (is_empty_list(&lp->fs_variants_list)) {
   1724                break;
   1725             }
   1726             item = last_elem(&lp->fs_variants_list);
   1727             assert(item);
   1728             assert(item->base);
   1729             llvmpipe_remove_shader_variant(lp, item->base);
   1730          }
   1731       }
   1732 
   1733       /*
   1734        * Generate the new variant.
   1735        */
   1736       t0 = os_time_get();
   1737       variant = generate_variant(lp, shader, &key);
   1738       t1 = os_time_get();
   1739       dt = t1 - t0;
   1740       LP_COUNT_ADD(llvm_compile_time, dt);
   1741       LP_COUNT_ADD(nr_llvm_compiles, 2);  /* emit vs. omit in/out test */
   1742 
   1743       llvmpipe_variant_count++;
   1744 
   1745       /* Put the new variant into the list */
   1746       if (variant) {
   1747          insert_at_head(&shader->variants, &variant->list_item_local);
   1748          insert_at_head(&lp->fs_variants_list, &variant->list_item_global);
   1749          lp->nr_fs_variants++;
   1750          lp->nr_fs_instrs += variant->nr_instrs;
   1751          shader->variants_cached++;
   1752       }
   1753    }
   1754 
   1755    /* Bind this variant */
   1756    lp_setup_set_fs_variant(lp->setup, variant);
   1757 }
   1758 
   1759 
   1760 
   1761 
   1762 
   1763 
   1764 
   1765 void
   1766 llvmpipe_init_fs_funcs(struct llvmpipe_context *llvmpipe)
   1767 {
   1768    llvmpipe->pipe.create_fs_state = llvmpipe_create_fs_state;
   1769    llvmpipe->pipe.bind_fs_state   = llvmpipe_bind_fs_state;
   1770    llvmpipe->pipe.delete_fs_state = llvmpipe_delete_fs_state;
   1771 
   1772    llvmpipe->pipe.set_constant_buffer = llvmpipe_set_constant_buffer;
   1773 }
   1774