Home | History | Annotate | Download | only in llvmpipe
      1 /**************************************************************************
      2  *
      3  * Copyright 2010 VMware.
      4  * All Rights Reserved.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the
      8  * "Software"), to deal in the Software without restriction, including
      9  * without limitation the rights to use, copy, modify, merge, publish,
     10  * distribute, sub license, and/or sell copies of the Software, and to
     11  * permit persons to whom the Software is furnished to do so, subject to
     12  * the following conditions:
     13  *
     14  * The above copyright notice and this permission notice (including the
     15  * next paragraph) shall be included in all copies or substantial portions
     16  * of the Software.
     17  *
     18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
     19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
     21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
     22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
     23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
     24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     25  *
     26  **************************************************************************/
     27 
     28 
     29 #include "util/u_math.h"
     30 #include "util/u_memory.h"
     31 #include "util/simple_list.h"
     32 #include "util/os_time.h"
     33 #include "gallivm/lp_bld_arit.h"
     34 #include "gallivm/lp_bld_bitarit.h"
     35 #include "gallivm/lp_bld_const.h"
     36 #include "gallivm/lp_bld_debug.h"
     37 #include "gallivm/lp_bld_init.h"
     38 #include "gallivm/lp_bld_logic.h"
     39 #include "gallivm/lp_bld_intr.h"
     40 #include "gallivm/lp_bld_flow.h"
     41 #include "gallivm/lp_bld_type.h"
     42 
     43 #include "lp_perf.h"
     44 #include "lp_debug.h"
     45 #include "lp_flush.h"
     46 #include "lp_screen.h"
     47 #include "lp_context.h"
     48 #include "lp_state.h"
     49 #include "lp_state_fs.h"
     50 #include "lp_state_setup.h"
     51 
     52 
     53 /** Setup shader number (for debugging) */
     54 static unsigned setup_no = 0;
     55 
     56 
     57 /* currently organized to interpolate full float[4] attributes even
     58  * when some elements are unused.  Later, can pack vertex data more
     59  * closely.
     60  */
     61 
     62 
     63 struct lp_setup_args
     64 {
     65    /* Function arguments:
     66     */
     67    LLVMValueRef v0;
     68    LLVMValueRef v1;
     69    LLVMValueRef v2;
     70    LLVMValueRef facing;		/* boolean */
     71    LLVMValueRef a0;
     72    LLVMValueRef dadx;
     73    LLVMValueRef dady;
     74 
     75    /* Derived:
     76     */
     77    LLVMValueRef x0_center;
     78    LLVMValueRef y0_center;
     79    LLVMValueRef dy20_ooa;
     80    LLVMValueRef dy01_ooa;
     81    LLVMValueRef dx20_ooa;
     82    LLVMValueRef dx01_ooa;
     83    struct lp_build_context bld;
     84 };
     85 
     86 
     87 static void
     88 store_coef(struct gallivm_state *gallivm,
     89            struct lp_setup_args *args,
     90            unsigned slot,
     91            LLVMValueRef a0,
     92            LLVMValueRef dadx,
     93            LLVMValueRef dady)
     94 {
     95    LLVMBuilderRef builder = gallivm->builder;
     96    LLVMValueRef idx = lp_build_const_int32(gallivm, slot);
     97 
     98    LLVMBuildStore(builder,
     99                   a0,
    100                   LLVMBuildGEP(builder, args->a0, &idx, 1, ""));
    101 
    102    LLVMBuildStore(builder,
    103                   dadx,
    104                   LLVMBuildGEP(builder, args->dadx, &idx, 1, ""));
    105 
    106    LLVMBuildStore(builder,
    107                   dady,
    108                   LLVMBuildGEP(builder, args->dady, &idx, 1, ""));
    109 }
    110 
    111 
    112 
    113 static void
    114 emit_constant_coef4(struct gallivm_state *gallivm,
    115                     struct lp_setup_args *args,
    116                     unsigned slot,
    117                     LLVMValueRef vert)
    118 {
    119    store_coef(gallivm, args, slot, vert, args->bld.zero, args->bld.zero);
    120 }
    121 
    122 
    123 
    124 /**
    125  * Setup the fragment input attribute with the front-facing value.
    126  * \param frontface  is the triangle front facing?
    127  */
    128 static void
    129 emit_facing_coef(struct gallivm_state *gallivm,
    130                  struct lp_setup_args *args,
    131                  unsigned slot )
    132 {
    133    LLVMBuilderRef builder = gallivm->builder;
    134    LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
    135    LLVMValueRef a0_0 = args->facing;
    136    LLVMValueRef a0_0f = LLVMBuildSIToFP(builder, a0_0, float_type, "");
    137    LLVMValueRef a0, face_val;
    138    const unsigned char swizzles[4] = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_0,
    139                                        PIPE_SWIZZLE_0, PIPE_SWIZZLE_0 };
    140    /* Our face val is either 1 or 0 so we do
    141     * face = (val * 2) - 1
    142     * to make it 1 or -1
    143     */
    144    face_val =
    145       LLVMBuildFAdd(builder,
    146                     LLVMBuildFMul(builder, a0_0f,
    147                                   lp_build_const_float(gallivm, 2.0),
    148                                   ""),
    149                     lp_build_const_float(gallivm, -1.0),
    150                     "facing");
    151    face_val = lp_build_broadcast_scalar(&args->bld, face_val);
    152    a0 = lp_build_swizzle_aos(&args->bld, face_val, swizzles);
    153 
    154    store_coef(gallivm, args, slot, a0, args->bld.zero, args->bld.zero);
    155 }
    156 
    157 
    158 static LLVMValueRef
    159 vert_attrib(struct gallivm_state *gallivm,
    160             LLVMValueRef vert,
    161             int attr,
    162             int elem,
    163             const char *name)
    164 {
    165    LLVMBuilderRef b = gallivm->builder;
    166    LLVMValueRef idx[2];
    167    idx[0] = lp_build_const_int32(gallivm, attr);
    168    idx[1] = lp_build_const_int32(gallivm, elem);
    169    return LLVMBuildLoad(b, LLVMBuildGEP(b, vert, idx, 2, ""), name);
    170 }
    171 
    172 
    173 static void
    174 lp_twoside(struct gallivm_state *gallivm,
    175            struct lp_setup_args *args,
    176            const struct lp_setup_variant_key *key,
    177            int bcolor_slot,
    178            LLVMValueRef attribv[3])
    179 {
    180    LLVMBuilderRef b = gallivm->builder;
    181    LLVMValueRef a0_back, a1_back, a2_back;
    182    LLVMValueRef idx2 = lp_build_const_int32(gallivm, bcolor_slot);
    183 
    184    LLVMValueRef facing = args->facing;
    185    LLVMValueRef front_facing = LLVMBuildICmp(b, LLVMIntEQ, facing,
    186                                              lp_build_const_int32(gallivm, 0), ""); /** need i1 for if condition */
    187 
    188    a0_back = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v0, &idx2, 1, ""), "v0a_back");
    189    a1_back = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v1, &idx2, 1, ""), "v1a_back");
    190    a2_back = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v2, &idx2, 1, ""), "v2a_back");
    191 
    192    /* Possibly swap the front and back attrib values,
    193     *
    194     * Prefer select to if so we don't have to worry about phis or
    195     * allocas.
    196     */
    197    attribv[0] = LLVMBuildSelect(b, front_facing, a0_back, attribv[0], "");
    198    attribv[1] = LLVMBuildSelect(b, front_facing, a1_back, attribv[1], "");
    199    attribv[2] = LLVMBuildSelect(b, front_facing, a2_back, attribv[2], "");
    200 
    201 }
    202 
    203 static void
    204 lp_do_offset_tri(struct gallivm_state *gallivm,
    205                  struct lp_setup_args *args,
    206                  const struct lp_setup_variant_key *key,
    207                  LLVMValueRef inv_det,
    208                  LLVMValueRef dxyz01,
    209                  LLVMValueRef dxyz20,
    210                  LLVMValueRef attribv[3])
    211 {
    212    LLVMBuilderRef b = gallivm->builder;
    213    struct lp_build_context flt_scalar_bld;
    214    struct lp_build_context int_scalar_bld;
    215    struct lp_build_context *bld = &args->bld;
    216    LLVMValueRef zoffset, mult;
    217    LLVMValueRef z0_new, z1_new, z2_new;
    218    LLVMValueRef dzdxdzdy, dzdx, dzdy, dzxyz20, dyzzx01, dyzzx01_dzxyz20, dzx01_dyz20;
    219    LLVMValueRef z0z1, z0z1z2;
    220    LLVMValueRef max, max_value, res12;
    221    LLVMValueRef shuffles[4];
    222    LLVMTypeRef shuf_type = LLVMInt32TypeInContext(gallivm->context);
    223    LLVMValueRef onei = lp_build_const_int32(gallivm, 1);
    224    LLVMValueRef zeroi = lp_build_const_int32(gallivm, 0);
    225    LLVMValueRef twoi = lp_build_const_int32(gallivm, 2);
    226    LLVMValueRef threei  = lp_build_const_int32(gallivm, 3);
    227 
    228    /* (res12) = cross(e,f).xy */
    229    shuffles[0] = twoi;
    230    shuffles[1] = zeroi;
    231    shuffles[2] = onei;
    232    shuffles[3] = twoi;
    233    dzxyz20 = LLVMBuildShuffleVector(b, dxyz20, dxyz20, LLVMConstVector(shuffles, 4), "");
    234 
    235    shuffles[0] = onei;
    236    shuffles[1] = twoi;
    237    shuffles[2] = twoi;
    238    shuffles[3] = zeroi;
    239    dyzzx01 = LLVMBuildShuffleVector(b, dxyz01, dxyz01, LLVMConstVector(shuffles, 4), "");
    240 
    241    dyzzx01_dzxyz20 = LLVMBuildFMul(b, dzxyz20, dyzzx01, "dyzzx01_dzxyz20");
    242 
    243    shuffles[0] = twoi;
    244    shuffles[1] = threei;
    245    shuffles[2] = LLVMGetUndef(shuf_type);
    246    shuffles[3] = LLVMGetUndef(shuf_type);
    247    dzx01_dyz20 = LLVMBuildShuffleVector(b, dyzzx01_dzxyz20, dyzzx01_dzxyz20,
    248                                         LLVMConstVector(shuffles, 4), "");
    249 
    250    res12 = LLVMBuildFSub(b, dyzzx01_dzxyz20, dzx01_dyz20, "res12");
    251 
    252    /* dzdx = fabsf(res1 * inv_det), dydx = fabsf(res2 * inv_det)*/
    253    dzdxdzdy = LLVMBuildFMul(b, res12, inv_det, "dzdxdzdy");
    254    dzdxdzdy = lp_build_abs(bld, dzdxdzdy);
    255 
    256    dzdx = LLVMBuildExtractElement(b, dzdxdzdy, zeroi, "");
    257    dzdy = LLVMBuildExtractElement(b, dzdxdzdy, onei, "");
    258 
    259    /* mult = MAX2(dzdx, dzdy) * pgon_offset_scale */
    260    max = LLVMBuildFCmp(b, LLVMRealUGT, dzdx, dzdy, "");
    261    max_value = LLVMBuildSelect(b, max, dzdx, dzdy, "max");
    262 
    263    mult = LLVMBuildFMul(b, max_value,
    264                         lp_build_const_float(gallivm, key->pgon_offset_scale), "");
    265 
    266    lp_build_context_init(&flt_scalar_bld, gallivm, lp_type_float_vec(32, 32));
    267 
    268    if (key->floating_point_depth) {
    269       /*
    270        * bias = pgon_offset_units * 2^(exponent(max(z0, z1, z2)) - mantissa_bits) +
    271        *           MAX2(dzdx, dzdy) * pgon_offset_scale
    272        *
    273        * NOTE: Assumes IEEE float32.
    274        */
    275       LLVMValueRef c23_shifted, exp_mask, bias, exp;
    276       LLVMValueRef maxz_value, maxz0z1_value;
    277 
    278       lp_build_context_init(&int_scalar_bld, gallivm, lp_type_int_vec(32, 32));
    279 
    280       c23_shifted = lp_build_const_int32(gallivm, 23 << 23);
    281       exp_mask = lp_build_const_int32(gallivm, 0xff << 23);
    282 
    283       maxz0z1_value = lp_build_max(&flt_scalar_bld,
    284                          LLVMBuildExtractElement(b, attribv[0], twoi, ""),
    285                          LLVMBuildExtractElement(b, attribv[1], twoi, ""));
    286 
    287       maxz_value = lp_build_max(&flt_scalar_bld,
    288                       LLVMBuildExtractElement(b, attribv[2], twoi, ""),
    289                       maxz0z1_value);
    290 
    291       exp = LLVMBuildBitCast(b, maxz_value, int_scalar_bld.vec_type, "");
    292       exp = lp_build_and(&int_scalar_bld, exp, exp_mask);
    293       exp = lp_build_sub(&int_scalar_bld, exp, c23_shifted);
    294       /* Clamping to zero means mrd will be zero for very small numbers,
    295        * but specs do not indicate this should be prevented by clamping
    296        * mrd to smallest normal number instead. */
    297       exp = lp_build_max(&int_scalar_bld, exp, int_scalar_bld.zero);
    298       exp = LLVMBuildBitCast(b, exp, flt_scalar_bld.vec_type, "");
    299 
    300       bias = LLVMBuildFMul(b, exp,
    301                            lp_build_const_float(gallivm, key->pgon_offset_units),
    302                            "bias");
    303 
    304       zoffset = LLVMBuildFAdd(b, bias, mult, "zoffset");
    305    } else {
    306       /*
    307        * bias = pgon_offset_units + MAX2(dzdx, dzdy) * pgon_offset_scale
    308        */
    309       zoffset = LLVMBuildFAdd(b,
    310                               lp_build_const_float(gallivm, key->pgon_offset_units),
    311                               mult, "zoffset");
    312    }
    313 
    314    if (key->pgon_offset_clamp > 0) {
    315       zoffset = lp_build_min(&flt_scalar_bld,
    316                              lp_build_const_float(gallivm, key->pgon_offset_clamp),
    317                              zoffset);
    318    }
    319    else if (key->pgon_offset_clamp < 0) {
    320       zoffset = lp_build_max(&flt_scalar_bld,
    321                              lp_build_const_float(gallivm, key->pgon_offset_clamp),
    322                              zoffset);
    323    }
    324 
    325    /* yuck */
    326    shuffles[0] = twoi;
    327    shuffles[1] = lp_build_const_int32(gallivm, 6);
    328    shuffles[2] = LLVMGetUndef(shuf_type);
    329    shuffles[3] = LLVMGetUndef(shuf_type);
    330    z0z1 = LLVMBuildShuffleVector(b, attribv[0], attribv[1], LLVMConstVector(shuffles, 4), "");
    331    shuffles[0] = zeroi;
    332    shuffles[1] = onei;
    333    shuffles[2] = lp_build_const_int32(gallivm, 6);
    334    shuffles[3] = LLVMGetUndef(shuf_type);
    335    z0z1z2 = LLVMBuildShuffleVector(b, z0z1, attribv[2], LLVMConstVector(shuffles, 4), "");
    336    zoffset = lp_build_broadcast_scalar(bld, zoffset);
    337 
    338    /* clamp and do offset */
    339    /*
    340     * FIXME I suspect the clamp (is that even right to always clamp to fixed
    341     * 0.0/1.0?) should really be per fragment?
    342     */
    343    z0z1z2 = lp_build_clamp(bld, LLVMBuildFAdd(b, z0z1z2, zoffset, ""), bld->zero, bld->one);
    344 
    345    /* insert into args->a0.z, a1.z, a2.z:
    346     */
    347    z0_new = LLVMBuildExtractElement(b, z0z1z2, zeroi, "");
    348    z1_new = LLVMBuildExtractElement(b, z0z1z2, onei, "");
    349    z2_new = LLVMBuildExtractElement(b, z0z1z2, twoi, "");
    350    attribv[0] = LLVMBuildInsertElement(b, attribv[0], z0_new, twoi, "");
    351    attribv[1] = LLVMBuildInsertElement(b, attribv[1], z1_new, twoi, "");
    352    attribv[2] = LLVMBuildInsertElement(b, attribv[2], z2_new, twoi, "");
    353 }
    354 
    355 static void
    356 load_attribute(struct gallivm_state *gallivm,
    357                struct lp_setup_args *args,
    358                const struct lp_setup_variant_key *key,
    359                unsigned vert_attr,
    360                LLVMValueRef attribv[3])
    361 {
    362    LLVMBuilderRef b = gallivm->builder;
    363    LLVMValueRef idx = lp_build_const_int32(gallivm, vert_attr);
    364 
    365    /* Load the vertex data
    366     */
    367    attribv[0] = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v0, &idx, 1, ""), "v0a");
    368    attribv[1] = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v1, &idx, 1, ""), "v1a");
    369    attribv[2] = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v2, &idx, 1, ""), "v2a");
    370 
    371 
    372    /* Potentially modify it according to twoside, etc:
    373     */
    374    if (key->twoside) {
    375       if (vert_attr == key->color_slot && key->bcolor_slot >= 0)
    376          lp_twoside(gallivm, args, key, key->bcolor_slot, attribv);
    377       else if (vert_attr == key->spec_slot && key->bspec_slot >= 0)
    378          lp_twoside(gallivm, args, key, key->bspec_slot, attribv);
    379    }
    380 }
    381 
    382 /*
    383  * FIXME: interpolation is always done wrt fb origin (0/0).
    384  * However, if some (small) tri is far away from the origin and gradients
    385  * are large, this can lead to HUGE errors, since the a0 value calculated
    386  * here can get very large (with the actual values inside the triangle way
    387  * smaller), leading to complete loss of accuracy. This could be prevented
    388  * by using some point inside (or at corner) of the tri as interpolation
    389  * origin, or just use barycentric interpolation (which GL suggests and is
    390  * what real hw does - you can get the barycentric coordinates from the
    391  * edge functions in rasterization in principle (though we skip these
    392  * sometimes completely in case of tris covering a block fully,
    393  * which obviously wouldn't work)).
    394  */
    395 static void
    396 emit_coef4( struct gallivm_state *gallivm,
    397             struct lp_setup_args *args,
    398             unsigned slot,
    399             LLVMValueRef a0,
    400             LLVMValueRef a1,
    401             LLVMValueRef a2)
    402 {
    403    LLVMBuilderRef b = gallivm->builder;
    404    LLVMValueRef attr_0;
    405    LLVMValueRef dy20_ooa = args->dy20_ooa;
    406    LLVMValueRef dy01_ooa = args->dy01_ooa;
    407    LLVMValueRef dx20_ooa = args->dx20_ooa;
    408    LLVMValueRef dx01_ooa = args->dx01_ooa;
    409    LLVMValueRef x0_center = args->x0_center;
    410    LLVMValueRef y0_center = args->y0_center;
    411    LLVMValueRef da01 = LLVMBuildFSub(b, a0, a1, "da01");
    412    LLVMValueRef da20 = LLVMBuildFSub(b, a2, a0, "da20");
    413 
    414    /* Calculate dadx (vec4f)
    415     */
    416    LLVMValueRef da01_dy20_ooa = LLVMBuildFMul(b, da01, dy20_ooa, "da01_dy20_ooa");
    417    LLVMValueRef da20_dy01_ooa = LLVMBuildFMul(b, da20, dy01_ooa, "da20_dy01_ooa");
    418    LLVMValueRef dadx          = LLVMBuildFSub(b, da01_dy20_ooa, da20_dy01_ooa, "dadx");
    419 
    420    /* Calculate dady (vec4f)
    421     */
    422    LLVMValueRef da01_dx20_ooa = LLVMBuildFMul(b, da01, dx20_ooa, "da01_dx20_ooa");
    423    LLVMValueRef da20_dx01_ooa = LLVMBuildFMul(b, da20, dx01_ooa, "da20_dx01_ooa");
    424    LLVMValueRef dady          = LLVMBuildFSub(b, da20_dx01_ooa, da01_dx20_ooa, "dady");
    425 
    426    /* Calculate a0 - the attribute value at the origin
    427     */
    428    LLVMValueRef dadx_x0    = LLVMBuildFMul(b, dadx, x0_center, "dadx_x0");
    429    LLVMValueRef dady_y0    = LLVMBuildFMul(b, dady, y0_center, "dady_y0");
    430    LLVMValueRef attr_v0    = LLVMBuildFAdd(b, dadx_x0, dady_y0, "attr_v0");
    431    attr_0                  = LLVMBuildFSub(b, a0, attr_v0, "attr_0");
    432 
    433    store_coef(gallivm, args, slot, attr_0, dadx, dady);
    434 }
    435 
    436 
    437 static void
    438 emit_linear_coef( struct gallivm_state *gallivm,
    439                   struct lp_setup_args *args,
    440                   unsigned slot,
    441                   LLVMValueRef attribv[3])
    442 {
    443    /* nothing to do anymore */
    444    emit_coef4(gallivm,
    445               args, slot,
    446               attribv[0],
    447               attribv[1],
    448               attribv[2]);
    449 }
    450 
    451 
    452 /**
    453  * Compute a0, dadx and dady for a perspective-corrected interpolant,
    454  * for a triangle.
    455  * We basically multiply the vertex value by 1/w before computing
    456  * the plane coefficients (a0, dadx, dady).
    457  * Later, when we compute the value at a particular fragment position we'll
    458  * divide the interpolated value by the interpolated W at that fragment.
    459  */
    460 static void
    461 apply_perspective_corr( struct gallivm_state *gallivm,
    462                         struct lp_setup_args *args,
    463                         unsigned slot,
    464                         LLVMValueRef attribv[3])
    465 {
    466    LLVMBuilderRef b = gallivm->builder;
    467 
    468    /* premultiply by 1/w  (v[0][3] is always 1/w):
    469     */
    470    LLVMValueRef v0_oow = lp_build_broadcast_scalar(&args->bld,
    471                             vert_attrib(gallivm, args->v0, 0, 3, "v0_oow"));
    472    LLVMValueRef v1_oow = lp_build_broadcast_scalar(&args->bld,
    473                             vert_attrib(gallivm, args->v1, 0, 3, "v1_oow"));
    474    LLVMValueRef v2_oow = lp_build_broadcast_scalar(&args->bld,
    475                             vert_attrib(gallivm, args->v2, 0, 3, "v2_oow"));
    476 
    477    attribv[0] = LLVMBuildFMul(b, attribv[0], v0_oow, "v0_oow_v0a");
    478    attribv[1] = LLVMBuildFMul(b, attribv[1], v1_oow, "v1_oow_v1a");
    479    attribv[2] = LLVMBuildFMul(b, attribv[2], v2_oow, "v2_oow_v2a");
    480 }
    481 
    482 
    483 /**
    484  * Applys cylindrical wrapping to vertex attributes if enabled.
    485  * Input coordinates must be in [0, 1] range, otherwise results are undefined.
    486  *
    487  * @param cyl_wrap  TGSI_CYLINDRICAL_WRAP_x flags
    488  */
    489 static void
    490 emit_apply_cyl_wrap(struct gallivm_state *gallivm,
    491                     struct lp_setup_args *args,
    492                     uint cyl_wrap,
    493                     LLVMValueRef attribv[3])
    494 
    495 {
    496    LLVMBuilderRef builder = gallivm->builder;
    497    struct lp_type type = args->bld.type;
    498    LLVMTypeRef float_vec_type = args->bld.vec_type;
    499    LLVMValueRef pos_half;
    500    LLVMValueRef neg_half;
    501    LLVMValueRef cyl_mask;
    502    LLVMValueRef offset;
    503    LLVMValueRef delta;
    504    LLVMValueRef one;
    505 
    506    if (!cyl_wrap)
    507       return;
    508 
    509    /* Constants */
    510    pos_half = lp_build_const_vec(gallivm, type, +0.5f);
    511    neg_half = lp_build_const_vec(gallivm, type, -0.5f);
    512    cyl_mask = lp_build_const_mask_aos(gallivm, type, cyl_wrap, 4);
    513 
    514    one = lp_build_const_vec(gallivm, type, 1.0f);
    515    one = LLVMBuildBitCast(builder, one, lp_build_int_vec_type(gallivm, type), "");
    516    one = LLVMBuildAnd(builder, one, cyl_mask, "");
    517 
    518    /* Edge v0 -> v1 */
    519    delta = LLVMBuildFSub(builder, attribv[1], attribv[0], "");
    520 
    521    offset     = lp_build_compare(gallivm, type, PIPE_FUNC_GREATER, delta, pos_half);
    522    offset     = LLVMBuildAnd(builder, offset, one, "");
    523    offset     = LLVMBuildBitCast(builder, offset, float_vec_type, "");
    524    attribv[0] = LLVMBuildFAdd(builder, attribv[0], offset, "");
    525 
    526    offset     = lp_build_compare(gallivm, type, PIPE_FUNC_LESS, delta, neg_half);
    527    offset     = LLVMBuildAnd(builder, offset, one, "");
    528    offset     = LLVMBuildBitCast(builder, offset, float_vec_type, "");
    529    attribv[1] = LLVMBuildFAdd(builder, attribv[1], offset, "");
    530 
    531    /* Edge v1 -> v2 */
    532    delta = LLVMBuildFSub(builder, attribv[2], attribv[1], "");
    533 
    534    offset     = lp_build_compare(gallivm, type, PIPE_FUNC_GREATER, delta, pos_half);
    535    offset     = LLVMBuildAnd(builder, offset, one, "");
    536    offset     = LLVMBuildBitCast(builder, offset, float_vec_type, "");
    537    attribv[1] = LLVMBuildFAdd(builder, attribv[1], offset, "");
    538 
    539    offset     = lp_build_compare(gallivm, type, PIPE_FUNC_LESS, delta, neg_half);
    540    offset     = LLVMBuildAnd(builder, offset, one, "");
    541    offset     = LLVMBuildBitCast(builder, offset, float_vec_type, "");
    542    attribv[2] = LLVMBuildFAdd(builder, attribv[2], offset, "");
    543 
    544    /* Edge v2 -> v0 */
    545    delta = LLVMBuildFSub(builder, attribv[0], attribv[2], "");
    546 
    547    offset     = lp_build_compare(gallivm, type, PIPE_FUNC_GREATER, delta, pos_half);
    548    offset     = LLVMBuildAnd(builder, offset, one, "");
    549    offset     = LLVMBuildBitCast(builder, offset, float_vec_type, "");
    550    attribv[2] = LLVMBuildFAdd(builder, attribv[2], offset, "");
    551 
    552    offset     = lp_build_compare(gallivm, type, PIPE_FUNC_LESS, delta, neg_half);
    553    offset     = LLVMBuildAnd(builder, offset, one, "");
    554    offset     = LLVMBuildBitCast(builder, offset, float_vec_type, "");
    555    attribv[0] = LLVMBuildFAdd(builder, attribv[0], offset, "");
    556 }
    557 
    558 
    559 /**
    560  * Compute the inputs-> dadx, dady, a0 values.
    561  */
    562 static void
    563 emit_tri_coef( struct gallivm_state *gallivm,
    564                const struct lp_setup_variant_key *key,
    565                struct lp_setup_args *args)
    566 {
    567    unsigned slot;
    568 
    569    LLVMValueRef attribs[3];
    570 
    571   /* setup interpolation for all the remaining attributes:
    572     */
    573    for (slot = 0; slot < key->num_inputs; slot++) {
    574       switch (key->inputs[slot].interp) {
    575       case LP_INTERP_CONSTANT:
    576          load_attribute(gallivm, args, key, key->inputs[slot].src_index, attribs);
    577          if (key->flatshade_first) {
    578             emit_constant_coef4(gallivm, args, slot+1, attribs[0]);
    579          }
    580          else {
    581             emit_constant_coef4(gallivm, args, slot+1, attribs[2]);
    582          }
    583          break;
    584 
    585       case LP_INTERP_LINEAR:
    586          load_attribute(gallivm, args, key, key->inputs[slot].src_index, attribs);
    587          emit_apply_cyl_wrap(gallivm, args, key->inputs[slot].cyl_wrap, attribs);
    588          emit_linear_coef(gallivm, args, slot+1, attribs);
    589          break;
    590 
    591       case LP_INTERP_PERSPECTIVE:
    592          load_attribute(gallivm, args, key, key->inputs[slot].src_index, attribs);
    593          emit_apply_cyl_wrap(gallivm, args, key->inputs[slot].cyl_wrap, attribs);
    594          apply_perspective_corr(gallivm, args, slot+1, attribs);
    595          emit_linear_coef(gallivm, args, slot+1, attribs);
    596          break;
    597 
    598       case LP_INTERP_POSITION:
    599          /*
    600           * The generated pixel interpolators will pick up the coeffs from
    601           * slot 0.
    602           */
    603          break;
    604 
    605       case LP_INTERP_FACING:
    606          emit_facing_coef(gallivm, args, slot+1);
    607          break;
    608 
    609       default:
    610          assert(0);
    611       }
    612    }
    613 }
    614 
    615 
    616 /* XXX: generic code:
    617  */
    618 static void
    619 set_noalias(LLVMBuilderRef builder,
    620             LLVMValueRef function,
    621             const LLVMTypeRef *arg_types,
    622             int nr_args)
    623 {
    624    int i;
    625    for(i = 0; i < nr_args; ++i)
    626       if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
    627          lp_add_function_attr(function, i + 1, LP_FUNC_ATTR_NOALIAS);
    628 }
    629 
    630 static void
    631 init_args(struct gallivm_state *gallivm,
    632           const struct lp_setup_variant_key *key,
    633           struct lp_setup_args *args)
    634 {
    635    LLVMBuilderRef b = gallivm->builder;
    636    LLVMTypeRef shuf_type = LLVMInt32TypeInContext(gallivm->context);
    637    LLVMValueRef onef = lp_build_const_float(gallivm, 1.0);
    638    LLVMValueRef onei = lp_build_const_int32(gallivm, 1);
    639    LLVMValueRef zeroi = lp_build_const_int32(gallivm, 0);
    640    LLVMValueRef pixel_center, xy0_center, dxy01, dxy20, dyx20;
    641    LLVMValueRef e, f, ef, ooa;
    642    LLVMValueRef shuffles[4], shuf10;
    643    LLVMValueRef attr_pos[3];
    644    struct lp_type typef4 = lp_type_float_vec(32, 128);
    645    struct lp_build_context bld;
    646 
    647    lp_build_context_init(&bld, gallivm, typef4);
    648    args->bld = bld;
    649 
    650    /* The internal position input is in slot zero:
    651     */
    652    load_attribute(gallivm, args, key, 0, attr_pos);
    653 
    654    pixel_center = lp_build_const_vec(gallivm, typef4,
    655                                      key->pixel_center_half ? 0.5 : 0.0);
    656 
    657    /*
    658     * xy are first two elems in v0a/v1a/v2a but just use vec4 arit
    659     * also offset_tri uses actually xyz in them
    660     */
    661    xy0_center = LLVMBuildFSub(b, attr_pos[0], pixel_center, "xy0_center" );
    662 
    663    dxy01 = LLVMBuildFSub(b, attr_pos[0], attr_pos[1], "dxy01");
    664    dxy20 = LLVMBuildFSub(b, attr_pos[2], attr_pos[0], "dxy20");
    665 
    666    shuffles[0] = onei;
    667    shuffles[1] = zeroi;
    668    shuffles[2] = LLVMGetUndef(shuf_type);
    669    shuffles[3] = LLVMGetUndef(shuf_type);
    670    shuf10 = LLVMConstVector(shuffles, 4);
    671 
    672    dyx20 = LLVMBuildShuffleVector(b, dxy20, dxy20, shuf10, "");
    673 
    674    ef = LLVMBuildFMul(b, dxy01, dyx20, "ef");
    675    e = LLVMBuildExtractElement(b, ef, zeroi, "");
    676    f = LLVMBuildExtractElement(b, ef, onei, "");
    677 
    678    ooa  = LLVMBuildFDiv(b, onef, LLVMBuildFSub(b, e, f, ""), "ooa");
    679 
    680    ooa = lp_build_broadcast_scalar(&bld, ooa);
    681 
    682    /* tri offset calc shares a lot of arithmetic, do it here */
    683    if (key->pgon_offset_scale != 0.0f || key->pgon_offset_units != 0.0f) {
    684       lp_do_offset_tri(gallivm, args, key, ooa, dxy01, dxy20, attr_pos);
    685    }
    686 
    687    dxy20 = LLVMBuildFMul(b, dxy20, ooa, "");
    688    dxy01 = LLVMBuildFMul(b, dxy01, ooa, "");
    689 
    690    args->dy20_ooa  = lp_build_extract_broadcast(gallivm, typef4, typef4, dxy20, onei);
    691    args->dy01_ooa  = lp_build_extract_broadcast(gallivm, typef4, typef4, dxy01, onei);
    692 
    693    args->dx20_ooa  = lp_build_extract_broadcast(gallivm, typef4, typef4, dxy20, zeroi);
    694    args->dx01_ooa  = lp_build_extract_broadcast(gallivm, typef4, typef4, dxy01, zeroi);
    695 
    696    args->x0_center = lp_build_extract_broadcast(gallivm, typef4, typef4, xy0_center, zeroi);
    697    args->y0_center = lp_build_extract_broadcast(gallivm, typef4, typef4, xy0_center, onei);
    698 
    699    emit_linear_coef(gallivm, args, 0, attr_pos);
    700 }
    701 
    702 /**
    703  * Generate the runtime callable function for the coefficient calculation.
    704  *
    705  */
    706 static struct lp_setup_variant *
    707 generate_setup_variant(struct lp_setup_variant_key *key,
    708                        struct llvmpipe_context *lp)
    709 {
    710    struct lp_setup_variant *variant = NULL;
    711    struct gallivm_state *gallivm;
    712    struct lp_setup_args args;
    713    char func_name[64];
    714    LLVMTypeRef vec4f_type;
    715    LLVMTypeRef func_type;
    716    LLVMTypeRef arg_types[7];
    717    LLVMBasicBlockRef block;
    718    LLVMBuilderRef builder;
    719    int64_t t0 = 0, t1;
    720 
    721    if (0)
    722       goto fail;
    723 
    724    variant = CALLOC_STRUCT(lp_setup_variant);
    725    if (!variant)
    726       goto fail;
    727 
    728    variant->no = setup_no++;
    729 
    730    util_snprintf(func_name, sizeof(func_name), "setup_variant_%u",
    731                  variant->no);
    732 
    733    variant->gallivm = gallivm = gallivm_create(func_name, lp->context);
    734    if (!variant->gallivm) {
    735       goto fail;
    736    }
    737 
    738    builder = gallivm->builder;
    739 
    740    if (LP_DEBUG & DEBUG_COUNTERS) {
    741       t0 = os_time_get();
    742    }
    743 
    744    memcpy(&variant->key, key, key->size);
    745    variant->list_item_global.base = variant;
    746 
    747    /* Currently always deal with full 4-wide vertex attributes from
    748     * the vertices.
    749     */
    750 
    751    vec4f_type = LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4);
    752 
    753    arg_types[0] = LLVMPointerType(vec4f_type, 0);        /* v0 */
    754    arg_types[1] = LLVMPointerType(vec4f_type, 0);        /* v1 */
    755    arg_types[2] = LLVMPointerType(vec4f_type, 0);        /* v2 */
    756    arg_types[3] = LLVMInt32TypeInContext(gallivm->context); /* facing */
    757    arg_types[4] = LLVMPointerType(vec4f_type, 0);	/* a0, aligned */
    758    arg_types[5] = LLVMPointerType(vec4f_type, 0);	/* dadx, aligned */
    759    arg_types[6] = LLVMPointerType(vec4f_type, 0);	/* dady, aligned */
    760 
    761    func_type = LLVMFunctionType(LLVMVoidTypeInContext(gallivm->context),
    762                                 arg_types, ARRAY_SIZE(arg_types), 0);
    763 
    764    variant->function = LLVMAddFunction(gallivm->module, func_name, func_type);
    765    if (!variant->function)
    766       goto fail;
    767 
    768    LLVMSetFunctionCallConv(variant->function, LLVMCCallConv);
    769 
    770    args.v0       = LLVMGetParam(variant->function, 0);
    771    args.v1       = LLVMGetParam(variant->function, 1);
    772    args.v2       = LLVMGetParam(variant->function, 2);
    773    args.facing   = LLVMGetParam(variant->function, 3);
    774    args.a0       = LLVMGetParam(variant->function, 4);
    775    args.dadx     = LLVMGetParam(variant->function, 5);
    776    args.dady     = LLVMGetParam(variant->function, 6);
    777 
    778    lp_build_name(args.v0, "in_v0");
    779    lp_build_name(args.v1, "in_v1");
    780    lp_build_name(args.v2, "in_v2");
    781    lp_build_name(args.facing, "in_facing");
    782    lp_build_name(args.a0, "out_a0");
    783    lp_build_name(args.dadx, "out_dadx");
    784    lp_build_name(args.dady, "out_dady");
    785 
    786    /*
    787     * Function body
    788     */
    789    block = LLVMAppendBasicBlockInContext(gallivm->context,
    790                                          variant->function, "entry");
    791    LLVMPositionBuilderAtEnd(builder, block);
    792 
    793    set_noalias(builder, variant->function, arg_types, ARRAY_SIZE(arg_types));
    794    init_args(gallivm, &variant->key, &args);
    795    emit_tri_coef(gallivm, &variant->key, &args);
    796 
    797    LLVMBuildRetVoid(builder);
    798 
    799    gallivm_verify_function(gallivm, variant->function);
    800 
    801    gallivm_compile_module(gallivm);
    802 
    803    variant->jit_function = (lp_jit_setup_triangle)
    804       gallivm_jit_function(gallivm, variant->function);
    805    if (!variant->jit_function)
    806       goto fail;
    807 
    808    gallivm_free_ir(variant->gallivm);
    809 
    810    /*
    811     * Update timing information:
    812     */
    813    if (LP_DEBUG & DEBUG_COUNTERS) {
    814       t1 = os_time_get();
    815       LP_COUNT_ADD(llvm_compile_time, t1 - t0);
    816       LP_COUNT_ADD(nr_llvm_compiles, 1);
    817    }
    818 
    819    return variant;
    820 
    821 fail:
    822    if (variant) {
    823       if (variant->gallivm) {
    824          gallivm_destroy(variant->gallivm);
    825       }
    826       FREE(variant);
    827    }
    828 
    829    return NULL;
    830 }
    831 
    832 
    833 
    834 static void
    835 lp_make_setup_variant_key(struct llvmpipe_context *lp,
    836                           struct lp_setup_variant_key *key)
    837 {
    838    struct lp_fragment_shader *fs = lp->fs;
    839    unsigned i;
    840 
    841    assert(sizeof key->inputs[0] == sizeof(uint));
    842 
    843    key->num_inputs = fs->info.base.num_inputs;
    844    key->flatshade_first = lp->rasterizer->flatshade_first;
    845    key->pixel_center_half = lp->rasterizer->half_pixel_center;
    846    key->twoside = lp->rasterizer->light_twoside;
    847    key->size = Offset(struct lp_setup_variant_key,
    848                       inputs[key->num_inputs]);
    849 
    850    key->color_slot = lp->color_slot[0];
    851    key->bcolor_slot = lp->bcolor_slot[0];
    852    key->spec_slot = lp->color_slot[1];
    853    key->bspec_slot = lp->bcolor_slot[1];
    854 
    855    /*
    856     * If depth is floating point, depth bias is calculated with respect
    857     * to the primitive's maximum Z value. Retain the original depth bias
    858     * value until that stage.
    859     */
    860    key->floating_point_depth = lp->floating_point_depth;
    861 
    862    if (key->floating_point_depth) {
    863       key->pgon_offset_units = (float) lp->rasterizer->offset_units;
    864    } else {
    865       key->pgon_offset_units =
    866          (float) (lp->rasterizer->offset_units * lp->mrd);
    867    }
    868 
    869    key->pgon_offset_scale = lp->rasterizer->offset_scale;
    870    key->pgon_offset_clamp = lp->rasterizer->offset_clamp;
    871    key->pad = 0;
    872    memcpy(key->inputs, fs->inputs, key->num_inputs * sizeof key->inputs[0]);
    873    for (i = 0; i < key->num_inputs; i++) {
    874       if (key->inputs[i].interp == LP_INTERP_COLOR) {
    875          if (lp->rasterizer->flatshade)
    876             key->inputs[i].interp = LP_INTERP_CONSTANT;
    877          else
    878             key->inputs[i].interp = LP_INTERP_PERSPECTIVE;
    879       }
    880    }
    881 
    882 }
    883 
    884 
    885 static void
    886 remove_setup_variant(struct llvmpipe_context *lp,
    887                      struct lp_setup_variant *variant)
    888 {
    889    if (gallivm_debug & GALLIVM_DEBUG_IR) {
    890       debug_printf("llvmpipe: del setup_variant #%u total %u\n",
    891                    variant->no, lp->nr_setup_variants);
    892    }
    893 
    894    if (variant->gallivm) {
    895       gallivm_destroy(variant->gallivm);
    896    }
    897 
    898    remove_from_list(&variant->list_item_global);
    899    lp->nr_setup_variants--;
    900    FREE(variant);
    901 }
    902 
    903 
    904 
    905 /* When the number of setup variants exceeds a threshold, cull a
    906  * fraction (currently a quarter) of them.
    907  */
    908 static void
    909 cull_setup_variants(struct llvmpipe_context *lp)
    910 {
    911    struct pipe_context *pipe = &lp->pipe;
    912    int i;
    913 
    914    /*
    915     * XXX: we need to flush the context until we have some sort of reference
    916     * counting in fragment shaders as they may still be binned
    917     * Flushing alone might not be sufficient we need to wait on it too.
    918     */
    919    llvmpipe_finish(pipe, __FUNCTION__);
    920 
    921    for (i = 0; i < LP_MAX_SETUP_VARIANTS / 4; i++) {
    922       struct lp_setup_variant_list_item *item;
    923       if (is_empty_list(&lp->setup_variants_list)) {
    924          break;
    925       }
    926       item = last_elem(&lp->setup_variants_list);
    927       assert(item);
    928       assert(item->base);
    929       remove_setup_variant(lp, item->base);
    930    }
    931 }
    932 
    933 
    934 /**
    935  * Update fragment/vertex shader linkage state.  This is called just
    936  * prior to drawing something when some fragment-related state has
    937  * changed.
    938  */
    939 void
    940 llvmpipe_update_setup(struct llvmpipe_context *lp)
    941 {
    942    struct lp_setup_variant_key *key = &lp->setup_variant.key;
    943    struct lp_setup_variant *variant = NULL;
    944    struct lp_setup_variant_list_item *li;
    945 
    946    lp_make_setup_variant_key(lp, key);
    947 
    948    foreach(li, &lp->setup_variants_list) {
    949       if(li->base->key.size == key->size &&
    950          memcmp(&li->base->key, key, key->size) == 0) {
    951          variant = li->base;
    952          break;
    953       }
    954    }
    955 
    956    if (variant) {
    957       move_to_head(&lp->setup_variants_list, &variant->list_item_global);
    958    }
    959    else {
    960       if (lp->nr_setup_variants >= LP_MAX_SETUP_VARIANTS) {
    961          cull_setup_variants(lp);
    962       }
    963 
    964       variant = generate_setup_variant(key, lp);
    965       if (variant) {
    966          insert_at_head(&lp->setup_variants_list, &variant->list_item_global);
    967          lp->nr_setup_variants++;
    968       }
    969    }
    970 
    971    lp_setup_set_setup_variant(lp->setup, variant);
    972 }
    973 
    974 void
    975 lp_delete_setup_variants(struct llvmpipe_context *lp)
    976 {
    977    struct lp_setup_variant_list_item *li;
    978    li = first_elem(&lp->setup_variants_list);
    979    while(!at_end(&lp->setup_variants_list, li)) {
    980       struct lp_setup_variant_list_item *next = next_elem(li);
    981       remove_setup_variant(lp, li->base);
    982       li = next;
    983    }
    984 }
    985 
    986 void
    987 lp_dump_setup_coef(const struct lp_setup_variant_key *key,
    988                    const float (*sa0)[4],
    989                    const float (*sdadx)[4],
    990                    const float (*sdady)[4])
    991 {
    992    int i, slot;
    993 
    994    for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
    995       float a0   = sa0  [0][i];
    996       float dadx = sdadx[0][i];
    997       float dady = sdady[0][i];
    998 
    999       debug_printf("POS.%c: a0 = %f, dadx = %f, dady = %f\n",
   1000                    "xyzw"[i], a0, dadx, dady);
   1001    }
   1002 
   1003    for (slot = 0; slot < key->num_inputs; slot++) {
   1004       unsigned usage_mask = key->inputs[slot].usage_mask;
   1005       for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
   1006          if (usage_mask & (1 << i)) {
   1007             float a0   = sa0  [1 + slot][i];
   1008             float dadx = sdadx[1 + slot][i];
   1009             float dady = sdady[1 + slot][i];
   1010 
   1011             debug_printf("IN[%u].%c: a0 = %f, dadx = %f, dady = %f\n",
   1012                          slot, "xyzw"[i], a0, dadx, dady);
   1013          }
   1014       }
   1015    }
   1016 }
   1017