Home | History | Annotate | Download | only in gallivm
      1 /**************************************************************************
      2  *
      3  * Copyright 2009 VMware, Inc.
      4  * All Rights Reserved.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the
      8  * "Software"), to deal in the Software without restriction, including
      9  * without limitation the rights to use, copy, modify, merge, publish,
     10  * distribute, sub license, and/or sell copies of the Software, and to
     11  * permit persons to whom the Software is furnished to do so, subject to
     12  * the following conditions:
     13  *
     14  * The above copyright notice and this permission notice (including the
     15  * next paragraph) shall be included in all copies or substantial portions
     16  * of the Software.
     17  *
     18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
     19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
     21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
     22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
     23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
     24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     25  *
     26  **************************************************************************/
     27 
     28 /**
     29  * @file
     30  * Texture sampling -- SoA.
     31  *
     32  * @author Jose Fonseca <jfonseca (at) vmware.com>
     33  * @author Brian Paul <brianp (at) vmware.com>
     34  */
     35 
     36 #include "pipe/p_defines.h"
     37 #include "pipe/p_state.h"
     38 #include "pipe/p_shader_tokens.h"
     39 #include "util/u_debug.h"
     40 #include "util/u_dump.h"
     41 #include "util/u_memory.h"
     42 #include "util/u_math.h"
     43 #include "util/u_format.h"
     44 #include "util/u_cpu_detect.h"
     45 #include "util/format_rgb9e5.h"
     46 #include "lp_bld_debug.h"
     47 #include "lp_bld_type.h"
     48 #include "lp_bld_const.h"
     49 #include "lp_bld_conv.h"
     50 #include "lp_bld_arit.h"
     51 #include "lp_bld_bitarit.h"
     52 #include "lp_bld_logic.h"
     53 #include "lp_bld_printf.h"
     54 #include "lp_bld_swizzle.h"
     55 #include "lp_bld_flow.h"
     56 #include "lp_bld_gather.h"
     57 #include "lp_bld_format.h"
     58 #include "lp_bld_sample.h"
     59 #include "lp_bld_sample_aos.h"
     60 #include "lp_bld_struct.h"
     61 #include "lp_bld_quad.h"
     62 #include "lp_bld_pack.h"
     63 #include "lp_bld_intr.h"
     64 
     65 
     66 /**
     67  * Generate code to fetch a texel from a texture at int coords (x, y, z).
     68  * The computation depends on whether the texture is 1D, 2D or 3D.
     69  * The result, texel, will be float vectors:
     70  *   texel[0] = red values
     71  *   texel[1] = green values
     72  *   texel[2] = blue values
     73  *   texel[3] = alpha values
     74  */
     75 static void
     76 lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
     77                           LLVMValueRef width,
     78                           LLVMValueRef height,
     79                           LLVMValueRef depth,
     80                           LLVMValueRef x,
     81                           LLVMValueRef y,
     82                           LLVMValueRef z,
     83                           LLVMValueRef y_stride,
     84                           LLVMValueRef z_stride,
     85                           LLVMValueRef data_ptr,
     86                           LLVMValueRef mipoffsets,
     87                           LLVMValueRef texel_out[4])
     88 {
     89    const struct lp_static_sampler_state *static_state = bld->static_sampler_state;
     90    const unsigned dims = bld->dims;
     91    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
     92    LLVMBuilderRef builder = bld->gallivm->builder;
     93    LLVMValueRef offset;
     94    LLVMValueRef i, j;
     95    LLVMValueRef use_border = NULL;
     96 
     97    /* use_border = x < 0 || x >= width || y < 0 || y >= height */
     98    if (lp_sampler_wrap_mode_uses_border_color(static_state->wrap_s,
     99                                               static_state->min_img_filter,
    100                                               static_state->mag_img_filter)) {
    101       LLVMValueRef b1, b2;
    102       b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero);
    103       b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width);
    104       use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2");
    105    }
    106 
    107    if (dims >= 2 &&
    108        lp_sampler_wrap_mode_uses_border_color(static_state->wrap_t,
    109                                               static_state->min_img_filter,
    110                                               static_state->mag_img_filter)) {
    111       LLVMValueRef b1, b2;
    112       b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero);
    113       b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
    114       if (use_border) {
    115          use_border = LLVMBuildOr(builder, use_border, b1, "ub_or_b1");
    116          use_border = LLVMBuildOr(builder, use_border, b2, "ub_or_b2");
    117       }
    118       else {
    119          use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2");
    120       }
    121    }
    122 
    123    if (dims == 3 &&
    124        lp_sampler_wrap_mode_uses_border_color(static_state->wrap_r,
    125                                               static_state->min_img_filter,
    126                                               static_state->mag_img_filter)) {
    127       LLVMValueRef b1, b2;
    128       b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero);
    129       b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth);
    130       if (use_border) {
    131          use_border = LLVMBuildOr(builder, use_border, b1, "ub_or_b1");
    132          use_border = LLVMBuildOr(builder, use_border, b2, "ub_or_b2");
    133       }
    134       else {
    135          use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2");
    136       }
    137    }
    138 
    139    /* convert x,y,z coords to linear offset from start of texture, in bytes */
    140    lp_build_sample_offset(&bld->int_coord_bld,
    141                           bld->format_desc,
    142                           x, y, z, y_stride, z_stride,
    143                           &offset, &i, &j);
    144    if (mipoffsets) {
    145       offset = lp_build_add(&bld->int_coord_bld, offset, mipoffsets);
    146    }
    147 
    148    if (use_border) {
    149       /* If we can sample the border color, it means that texcoords may
    150        * lie outside the bounds of the texture image.  We need to do
    151        * something to prevent reading out of bounds and causing a segfault.
    152        *
    153        * Simply AND the texture coords with !use_border.  This will cause
    154        * coords which are out of bounds to become zero.  Zero's guaranteed
    155        * to be inside the texture image.
    156        */
    157       offset = lp_build_andnot(&bld->int_coord_bld, offset, use_border);
    158    }
    159 
    160    lp_build_fetch_rgba_soa(bld->gallivm,
    161                            bld->format_desc,
    162                            bld->texel_type, TRUE,
    163                            data_ptr, offset,
    164                            i, j,
    165                            bld->cache,
    166                            texel_out);
    167 
    168    /*
    169     * Note: if we find an app which frequently samples the texture border
    170     * we might want to implement a true conditional here to avoid sampling
    171     * the texture whenever possible (since that's quite a bit of code).
    172     * Ex:
    173     *   if (use_border) {
    174     *      texel = border_color;
    175     *   }
    176     *   else {
    177     *      texel = sample_texture(coord);
    178     *   }
    179     * As it is now, we always sample the texture, then selectively replace
    180     * the texel color results with the border color.
    181     */
    182 
    183    if (use_border) {
    184       /* select texel color or border color depending on use_border. */
    185       const struct util_format_description *format_desc = bld->format_desc;
    186       int chan;
    187       struct lp_type border_type = bld->texel_type;
    188       border_type.length = 4;
    189       /*
    190        * Only replace channels which are actually present. The others should
    191        * get optimized away eventually by sampler_view swizzle anyway but it's
    192        * easier too.
    193        */
    194       for (chan = 0; chan < 4; chan++) {
    195          unsigned chan_s;
    196          /* reverse-map channel... */
    197          for (chan_s = 0; chan_s < 4; chan_s++) {
    198             if (chan_s == format_desc->swizzle[chan]) {
    199                break;
    200             }
    201          }
    202          if (chan_s <= 3) {
    203             /* use the already clamped color */
    204             LLVMValueRef idx = lp_build_const_int32(bld->gallivm, chan);
    205             LLVMValueRef border_chan;
    206 
    207             border_chan = lp_build_extract_broadcast(bld->gallivm,
    208                                                      border_type,
    209                                                      bld->texel_type,
    210                                                      bld->border_color_clamped,
    211                                                      idx);
    212             texel_out[chan] = lp_build_select(&bld->texel_bld, use_border,
    213                                               border_chan, texel_out[chan]);
    214          }
    215       }
    216    }
    217 }
    218 
    219 
    220 /**
    221  * Helper to compute the mirror function for the PIPE_WRAP_MIRROR modes.
    222  */
    223 static LLVMValueRef
    224 lp_build_coord_mirror(struct lp_build_sample_context *bld,
    225                       LLVMValueRef coord)
    226 {
    227    struct lp_build_context *coord_bld = &bld->coord_bld;
    228    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
    229    LLVMValueRef fract, flr, isOdd;
    230 
    231    lp_build_ifloor_fract(coord_bld, coord, &flr, &fract);
    232    /* kill off NaNs */
    233    /* XXX: not safe without arch rounding, fract can be anything. */
    234    fract = lp_build_max_ext(coord_bld, fract, coord_bld->zero,
    235                             GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
    236 
    237    /* isOdd = flr & 1 */
    238    isOdd = LLVMBuildAnd(bld->gallivm->builder, flr, int_coord_bld->one, "");
    239 
    240    /* make coord positive or negative depending on isOdd */
    241    /* XXX slight overkill masking out sign bit is unnecessary */
    242    coord = lp_build_set_sign(coord_bld, fract, isOdd);
    243 
    244    /* convert isOdd to float */
    245    isOdd = lp_build_int_to_float(coord_bld, isOdd);
    246 
    247    /* add isOdd to coord */
    248    coord = lp_build_add(coord_bld, coord, isOdd);
    249 
    250    return coord;
    251 }
    252 
    253 
    254 /**
    255  * Helper to compute the first coord and the weight for
    256  * linear wrap repeat npot textures
    257  */
    258 void
    259 lp_build_coord_repeat_npot_linear(struct lp_build_sample_context *bld,
    260                                   LLVMValueRef coord_f,
    261                                   LLVMValueRef length_i,
    262                                   LLVMValueRef length_f,
    263                                   LLVMValueRef *coord0_i,
    264                                   LLVMValueRef *weight_f)
    265 {
    266    struct lp_build_context *coord_bld = &bld->coord_bld;
    267    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
    268    LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5);
    269    LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length_i,
    270                                                 int_coord_bld->one);
    271    LLVMValueRef mask;
    272    /* wrap with normalized floats is just fract */
    273    coord_f = lp_build_fract(coord_bld, coord_f);
    274    /* mul by size and subtract 0.5 */
    275    coord_f = lp_build_mul(coord_bld, coord_f, length_f);
    276    coord_f = lp_build_sub(coord_bld, coord_f, half);
    277    /*
    278     * we avoided the 0.5/length division before the repeat wrap,
    279     * now need to fix up edge cases with selects
    280     */
    281    /*
    282     * Note we do a float (unordered) compare so we can eliminate NaNs.
    283     * (Otherwise would need fract_safe above).
    284     */
    285    mask = lp_build_compare(coord_bld->gallivm, coord_bld->type,
    286                            PIPE_FUNC_LESS, coord_f, coord_bld->zero);
    287 
    288    /* convert to int, compute lerp weight */
    289    lp_build_ifloor_fract(coord_bld, coord_f, coord0_i, weight_f);
    290    *coord0_i = lp_build_select(int_coord_bld, mask, length_minus_one, *coord0_i);
    291 }
    292 
    293 
    294 /**
    295  * Build LLVM code for texture wrap mode for linear filtering.
    296  * \param x0_out  returns first integer texcoord
    297  * \param x1_out  returns second integer texcoord
    298  * \param weight_out  returns linear interpolation weight
    299  */
    300 static void
    301 lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
    302                             LLVMValueRef coord,
    303                             LLVMValueRef length,
    304                             LLVMValueRef length_f,
    305                             LLVMValueRef offset,
    306                             boolean is_pot,
    307                             unsigned wrap_mode,
    308                             LLVMValueRef *x0_out,
    309                             LLVMValueRef *x1_out,
    310                             LLVMValueRef *weight_out)
    311 {
    312    struct lp_build_context *coord_bld = &bld->coord_bld;
    313    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
    314    LLVMBuilderRef builder = bld->gallivm->builder;
    315    LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5);
    316    LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
    317    LLVMValueRef coord0, coord1, weight;
    318 
    319    switch(wrap_mode) {
    320    case PIPE_TEX_WRAP_REPEAT:
    321       if (is_pot) {
    322          /* mul by size and subtract 0.5 */
    323          coord = lp_build_mul(coord_bld, coord, length_f);
    324          coord = lp_build_sub(coord_bld, coord, half);
    325          if (offset) {
    326             offset = lp_build_int_to_float(coord_bld, offset);
    327             coord = lp_build_add(coord_bld, coord, offset);
    328          }
    329          /* convert to int, compute lerp weight */
    330          lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
    331          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
    332          /* repeat wrap */
    333          coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, "");
    334          coord1 = LLVMBuildAnd(builder, coord1, length_minus_one, "");
    335       }
    336       else {
    337          LLVMValueRef mask;
    338          if (offset) {
    339             offset = lp_build_int_to_float(coord_bld, offset);
    340             offset = lp_build_div(coord_bld, offset, length_f);
    341             coord = lp_build_add(coord_bld, coord, offset);
    342          }
    343          lp_build_coord_repeat_npot_linear(bld, coord,
    344                                            length, length_f,
    345                                            &coord0, &weight);
    346          mask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
    347                                  PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
    348          coord1 = LLVMBuildAnd(builder,
    349                                lp_build_add(int_coord_bld, coord0, int_coord_bld->one),
    350                                mask, "");
    351       }
    352       break;
    353 
    354    case PIPE_TEX_WRAP_CLAMP:
    355       if (bld->static_sampler_state->normalized_coords) {
    356          /* scale coord to length */
    357          coord = lp_build_mul(coord_bld, coord, length_f);
    358       }
    359       if (offset) {
    360          offset = lp_build_int_to_float(coord_bld, offset);
    361          coord = lp_build_add(coord_bld, coord, offset);
    362       }
    363 
    364       /* clamp to [0, length] */
    365       coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, length_f);
    366 
    367       coord = lp_build_sub(coord_bld, coord, half);
    368 
    369       /* convert to int, compute lerp weight */
    370       lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
    371       coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
    372       break;
    373 
    374    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
    375       {
    376          struct lp_build_context abs_coord_bld = bld->coord_bld;
    377          abs_coord_bld.type.sign = FALSE;
    378 
    379          if (bld->static_sampler_state->normalized_coords) {
    380             /* mul by tex size */
    381             coord = lp_build_mul(coord_bld, coord, length_f);
    382          }
    383          if (offset) {
    384             offset = lp_build_int_to_float(coord_bld, offset);
    385             coord = lp_build_add(coord_bld, coord, offset);
    386          }
    387 
    388          /* clamp to length max */
    389          coord = lp_build_min_ext(coord_bld, coord, length_f,
    390                                   GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
    391          /* subtract 0.5 */
    392          coord = lp_build_sub(coord_bld, coord, half);
    393          /* clamp to [0, length - 0.5] */
    394          coord = lp_build_max(coord_bld, coord, coord_bld->zero);
    395          /* convert to int, compute lerp weight */
    396          lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight);
    397          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
    398          /* coord1 = min(coord1, length-1) */
    399          coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
    400          break;
    401       }
    402 
    403    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
    404       if (bld->static_sampler_state->normalized_coords) {
    405          /* scale coord to length */
    406          coord = lp_build_mul(coord_bld, coord, length_f);
    407       }
    408       if (offset) {
    409          offset = lp_build_int_to_float(coord_bld, offset);
    410          coord = lp_build_add(coord_bld, coord, offset);
    411       }
    412       /* was: clamp to [-0.5, length + 0.5], then sub 0.5 */
    413       /* can skip clamp (though might not work for very large coord values) */
    414       coord = lp_build_sub(coord_bld, coord, half);
    415       /* convert to int, compute lerp weight */
    416       lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
    417       coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
    418       break;
    419 
    420    case PIPE_TEX_WRAP_MIRROR_REPEAT:
    421       if (offset) {
    422          offset = lp_build_int_to_float(coord_bld, offset);
    423          offset = lp_build_div(coord_bld, offset, length_f);
    424          coord = lp_build_add(coord_bld, coord, offset);
    425       }
    426       /* compute mirror function */
    427       coord = lp_build_coord_mirror(bld, coord);
    428 
    429       /* scale coord to length */
    430       coord = lp_build_mul(coord_bld, coord, length_f);
    431       coord = lp_build_sub(coord_bld, coord, half);
    432 
    433       /* convert to int, compute lerp weight */
    434       lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
    435       coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
    436 
    437       /* coord0 = max(coord0, 0) */
    438       coord0 = lp_build_max(int_coord_bld, coord0, int_coord_bld->zero);
    439       /* coord1 = min(coord1, length-1) */
    440       coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
    441       break;
    442 
    443    case PIPE_TEX_WRAP_MIRROR_CLAMP:
    444       if (bld->static_sampler_state->normalized_coords) {
    445          /* scale coord to length */
    446          coord = lp_build_mul(coord_bld, coord, length_f);
    447       }
    448       if (offset) {
    449          offset = lp_build_int_to_float(coord_bld, offset);
    450          coord = lp_build_add(coord_bld, coord, offset);
    451       }
    452       coord = lp_build_abs(coord_bld, coord);
    453 
    454       /* clamp to [0, length] */
    455       coord = lp_build_min(coord_bld, coord, length_f);
    456 
    457       coord = lp_build_sub(coord_bld, coord, half);
    458 
    459       /* convert to int, compute lerp weight */
    460       lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
    461       coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
    462       break;
    463 
    464    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
    465       {
    466          struct lp_build_context abs_coord_bld = bld->coord_bld;
    467          abs_coord_bld.type.sign = FALSE;
    468 
    469          if (bld->static_sampler_state->normalized_coords) {
    470             /* scale coord to length */
    471             coord = lp_build_mul(coord_bld, coord, length_f);
    472          }
    473          if (offset) {
    474             offset = lp_build_int_to_float(coord_bld, offset);
    475             coord = lp_build_add(coord_bld, coord, offset);
    476          }
    477          coord = lp_build_abs(coord_bld, coord);
    478 
    479          /* clamp to length max */
    480          coord = lp_build_min_ext(coord_bld, coord, length_f,
    481                                   GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
    482          /* subtract 0.5 */
    483          coord = lp_build_sub(coord_bld, coord, half);
    484          /* clamp to [0, length - 0.5] */
    485          coord = lp_build_max(coord_bld, coord, coord_bld->zero);
    486 
    487          /* convert to int, compute lerp weight */
    488          lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight);
    489          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
    490          /* coord1 = min(coord1, length-1) */
    491          coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
    492       }
    493       break;
    494 
    495    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
    496       {
    497          if (bld->static_sampler_state->normalized_coords) {
    498             /* scale coord to length */
    499             coord = lp_build_mul(coord_bld, coord, length_f);
    500          }
    501          if (offset) {
    502             offset = lp_build_int_to_float(coord_bld, offset);
    503             coord = lp_build_add(coord_bld, coord, offset);
    504          }
    505          coord = lp_build_abs(coord_bld, coord);
    506 
    507          /* was: clamp to [-0.5, length + 0.5] then sub 0.5 */
    508          /* skip clamp - always positive, and other side
    509             only potentially matters for very large coords */
    510          coord = lp_build_sub(coord_bld, coord, half);
    511 
    512          /* convert to int, compute lerp weight */
    513          lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
    514          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
    515       }
    516       break;
    517 
    518    default:
    519       assert(0);
    520       coord0 = NULL;
    521       coord1 = NULL;
    522       weight = NULL;
    523    }
    524 
    525    *x0_out = coord0;
    526    *x1_out = coord1;
    527    *weight_out = weight;
    528 }
    529 
    530 
    531 /**
    532  * Build LLVM code for texture wrap mode for nearest filtering.
    533  * \param coord  the incoming texcoord (nominally in [0,1])
    534  * \param length  the texture size along one dimension, as int vector
    535  * \param length_f  the texture size along one dimension, as float vector
    536  * \param offset  texel offset along one dimension (as int vector)
    537  * \param is_pot  if TRUE, length is a power of two
    538  * \param wrap_mode  one of PIPE_TEX_WRAP_x
    539  */
    540 static LLVMValueRef
    541 lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
    542                              LLVMValueRef coord,
    543                              LLVMValueRef length,
    544                              LLVMValueRef length_f,
    545                              LLVMValueRef offset,
    546                              boolean is_pot,
    547                              unsigned wrap_mode)
    548 {
    549    struct lp_build_context *coord_bld = &bld->coord_bld;
    550    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
    551    LLVMBuilderRef builder = bld->gallivm->builder;
    552    LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
    553    LLVMValueRef icoord;
    554 
    555    switch(wrap_mode) {
    556    case PIPE_TEX_WRAP_REPEAT:
    557       if (is_pot) {
    558          coord = lp_build_mul(coord_bld, coord, length_f);
    559          icoord = lp_build_ifloor(coord_bld, coord);
    560          if (offset) {
    561             icoord = lp_build_add(int_coord_bld, icoord, offset);
    562          }
    563          icoord = LLVMBuildAnd(builder, icoord, length_minus_one, "");
    564       }
    565       else {
    566           if (offset) {
    567              offset = lp_build_int_to_float(coord_bld, offset);
    568              offset = lp_build_div(coord_bld, offset, length_f);
    569              coord = lp_build_add(coord_bld, coord, offset);
    570           }
    571           /* take fraction, unnormalize */
    572           coord = lp_build_fract_safe(coord_bld, coord);
    573           coord = lp_build_mul(coord_bld, coord, length_f);
    574           icoord = lp_build_itrunc(coord_bld, coord);
    575       }
    576       break;
    577 
    578    case PIPE_TEX_WRAP_CLAMP:
    579    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
    580       if (bld->static_sampler_state->normalized_coords) {
    581          /* scale coord to length */
    582          coord = lp_build_mul(coord_bld, coord, length_f);
    583       }
    584 
    585       if (offset) {
    586          offset = lp_build_int_to_float(coord_bld, offset);
    587          coord = lp_build_add(coord_bld, coord, offset);
    588       }
    589       /* floor */
    590       /* use itrunc instead since we clamp to 0 anyway */
    591       icoord = lp_build_itrunc(coord_bld, coord);
    592 
    593       /* clamp to [0, length - 1]. */
    594       icoord = lp_build_clamp(int_coord_bld, icoord, int_coord_bld->zero,
    595                               length_minus_one);
    596       break;
    597 
    598    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
    599       if (bld->static_sampler_state->normalized_coords) {
    600          /* scale coord to length */
    601          coord = lp_build_mul(coord_bld, coord, length_f);
    602       }
    603       /* no clamp necessary, border masking will handle this */
    604       icoord = lp_build_ifloor(coord_bld, coord);
    605       if (offset) {
    606          icoord = lp_build_add(int_coord_bld, icoord, offset);
    607       }
    608       break;
    609 
    610    case PIPE_TEX_WRAP_MIRROR_REPEAT:
    611       if (offset) {
    612          offset = lp_build_int_to_float(coord_bld, offset);
    613          offset = lp_build_div(coord_bld, offset, length_f);
    614          coord = lp_build_add(coord_bld, coord, offset);
    615       }
    616       /* compute mirror function */
    617       coord = lp_build_coord_mirror(bld, coord);
    618 
    619       /* scale coord to length */
    620       assert(bld->static_sampler_state->normalized_coords);
    621       coord = lp_build_mul(coord_bld, coord, length_f);
    622 
    623       /* itrunc == ifloor here */
    624       icoord = lp_build_itrunc(coord_bld, coord);
    625 
    626       /* clamp to [0, length - 1] */
    627       icoord = lp_build_min(int_coord_bld, icoord, length_minus_one);
    628       break;
    629 
    630    case PIPE_TEX_WRAP_MIRROR_CLAMP:
    631    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
    632       if (bld->static_sampler_state->normalized_coords) {
    633          /* scale coord to length */
    634          coord = lp_build_mul(coord_bld, coord, length_f);
    635       }
    636       if (offset) {
    637          offset = lp_build_int_to_float(coord_bld, offset);
    638          coord = lp_build_add(coord_bld, coord, offset);
    639       }
    640       coord = lp_build_abs(coord_bld, coord);
    641 
    642       /* itrunc == ifloor here */
    643       icoord = lp_build_itrunc(coord_bld, coord);
    644       /*
    645        * Use unsigned min due to possible undef values (NaNs, overflow)
    646        */
    647       {
    648          struct lp_build_context abs_coord_bld = *int_coord_bld;
    649          abs_coord_bld.type.sign = FALSE;
    650          /* clamp to [0, length - 1] */
    651          icoord = lp_build_min(&abs_coord_bld, icoord, length_minus_one);
    652       }
    653       break;
    654 
    655    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
    656       if (bld->static_sampler_state->normalized_coords) {
    657          /* scale coord to length */
    658          coord = lp_build_mul(coord_bld, coord, length_f);
    659       }
    660       if (offset) {
    661          offset = lp_build_int_to_float(coord_bld, offset);
    662          coord = lp_build_add(coord_bld, coord, offset);
    663       }
    664       coord = lp_build_abs(coord_bld, coord);
    665 
    666       /* itrunc == ifloor here */
    667       icoord = lp_build_itrunc(coord_bld, coord);
    668       break;
    669 
    670    default:
    671       assert(0);
    672       icoord = NULL;
    673    }
    674 
    675    return icoord;
    676 }
    677 
    678 
    679 /**
    680  * Do shadow test/comparison.
    681  * \param p shadow ref value
    682  * \param texel  the texel to compare against
    683  */
    684 static LLVMValueRef
    685 lp_build_sample_comparefunc(struct lp_build_sample_context *bld,
    686                             LLVMValueRef p,
    687                             LLVMValueRef texel)
    688 {
    689    struct lp_build_context *texel_bld = &bld->texel_bld;
    690    LLVMValueRef res;
    691 
    692    if (0) {
    693       //lp_build_print_value(bld->gallivm, "shadow cmp coord", p);
    694       lp_build_print_value(bld->gallivm, "shadow cmp texel", texel);
    695    }
    696 
    697    /* result = (p FUNC texel) ? 1 : 0 */
    698    /*
    699     * honor d3d10 floating point rules here, which state that comparisons
    700     * are ordered except NOT_EQUAL which is unordered.
    701     */
    702    if (bld->static_sampler_state->compare_func != PIPE_FUNC_NOTEQUAL) {
    703       res = lp_build_cmp_ordered(texel_bld, bld->static_sampler_state->compare_func,
    704                                  p, texel);
    705    }
    706    else {
    707       res = lp_build_cmp(texel_bld, bld->static_sampler_state->compare_func,
    708                          p, texel);
    709    }
    710    return res;
    711 }
    712 
    713 
    714 /**
    715  * Generate code to sample a mipmap level with nearest filtering.
    716  * If sampling a cube texture, r = cube face in [0,5].
    717  */
    718 static void
    719 lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
    720                               LLVMValueRef size,
    721                               LLVMValueRef row_stride_vec,
    722                               LLVMValueRef img_stride_vec,
    723                               LLVMValueRef data_ptr,
    724                               LLVMValueRef mipoffsets,
    725                               LLVMValueRef *coords,
    726                               const LLVMValueRef *offsets,
    727                               LLVMValueRef colors_out[4])
    728 {
    729    const unsigned dims = bld->dims;
    730    LLVMValueRef width_vec;
    731    LLVMValueRef height_vec;
    732    LLVMValueRef depth_vec;
    733    LLVMValueRef flt_size;
    734    LLVMValueRef flt_width_vec;
    735    LLVMValueRef flt_height_vec;
    736    LLVMValueRef flt_depth_vec;
    737    LLVMValueRef x, y = NULL, z = NULL;
    738 
    739    lp_build_extract_image_sizes(bld,
    740                                 &bld->int_size_bld,
    741                                 bld->int_coord_type,
    742                                 size,
    743                                 &width_vec, &height_vec, &depth_vec);
    744 
    745    flt_size = lp_build_int_to_float(&bld->float_size_bld, size);
    746 
    747    lp_build_extract_image_sizes(bld,
    748                                 &bld->float_size_bld,
    749                                 bld->coord_type,
    750                                 flt_size,
    751                                 &flt_width_vec, &flt_height_vec, &flt_depth_vec);
    752 
    753    /*
    754     * Compute integer texcoords.
    755     */
    756    x = lp_build_sample_wrap_nearest(bld, coords[0], width_vec,
    757                                     flt_width_vec, offsets[0],
    758                                     bld->static_texture_state->pot_width,
    759                                     bld->static_sampler_state->wrap_s);
    760    lp_build_name(x, "tex.x.wrapped");
    761 
    762    if (dims >= 2) {
    763       y = lp_build_sample_wrap_nearest(bld, coords[1], height_vec,
    764                                        flt_height_vec, offsets[1],
    765                                        bld->static_texture_state->pot_height,
    766                                        bld->static_sampler_state->wrap_t);
    767       lp_build_name(y, "tex.y.wrapped");
    768 
    769       if (dims == 3) {
    770          z = lp_build_sample_wrap_nearest(bld, coords[2], depth_vec,
    771                                           flt_depth_vec, offsets[2],
    772                                           bld->static_texture_state->pot_depth,
    773                                           bld->static_sampler_state->wrap_r);
    774          lp_build_name(z, "tex.z.wrapped");
    775       }
    776    }
    777    if (has_layer_coord(bld->static_texture_state->target)) {
    778       if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
    779          /* add cube layer to face */
    780          z = lp_build_add(&bld->int_coord_bld, coords[2], coords[3]);
    781       }
    782       else {
    783          z = coords[2];
    784       }
    785       lp_build_name(z, "tex.z.layer");
    786    }
    787 
    788    /*
    789     * Get texture colors.
    790     */
    791    lp_build_sample_texel_soa(bld,
    792                              width_vec, height_vec, depth_vec,
    793                              x, y, z,
    794                              row_stride_vec, img_stride_vec,
    795                              data_ptr, mipoffsets, colors_out);
    796 
    797    if (bld->static_sampler_state->compare_mode != PIPE_TEX_COMPARE_NONE) {
    798       LLVMValueRef cmpval;
    799       cmpval = lp_build_sample_comparefunc(bld, coords[4], colors_out[0]);
    800       /* this is really just a AND 1.0, cmpval but llvm is clever enough */
    801       colors_out[0] = lp_build_select(&bld->texel_bld, cmpval,
    802                                       bld->texel_bld.one, bld->texel_bld.zero);
    803       colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0];
    804    }
    805 
    806 }
    807 
    808 
    809 /**
    810  * Like a lerp, but inputs are 0/~0 masks, so can simplify slightly.
    811  */
    812 static LLVMValueRef
    813 lp_build_masklerp(struct lp_build_context *bld,
    814                  LLVMValueRef weight,
    815                  LLVMValueRef mask0,
    816                  LLVMValueRef mask1)
    817 {
    818    struct gallivm_state *gallivm = bld->gallivm;
    819    LLVMBuilderRef builder = gallivm->builder;
    820    LLVMValueRef weight2;
    821 
    822    weight2 = lp_build_sub(bld, bld->one, weight);
    823    weight = LLVMBuildBitCast(builder, weight,
    824                               lp_build_int_vec_type(gallivm, bld->type), "");
    825    weight2 = LLVMBuildBitCast(builder, weight2,
    826                               lp_build_int_vec_type(gallivm, bld->type), "");
    827    weight = LLVMBuildAnd(builder, weight, mask1, "");
    828    weight2 = LLVMBuildAnd(builder, weight2, mask0, "");
    829    weight = LLVMBuildBitCast(builder, weight, bld->vec_type, "");
    830    weight2 = LLVMBuildBitCast(builder, weight2, bld->vec_type, "");
    831    return lp_build_add(bld, weight, weight2);
    832 }
    833 
    834 /**
    835  * Like a 2d lerp, but inputs are 0/~0 masks, so can simplify slightly.
    836  */
    837 static LLVMValueRef
    838 lp_build_masklerp2d(struct lp_build_context *bld,
    839                     LLVMValueRef weight0,
    840                     LLVMValueRef weight1,
    841                     LLVMValueRef mask00,
    842                     LLVMValueRef mask01,
    843                     LLVMValueRef mask10,
    844                     LLVMValueRef mask11)
    845 {
    846    LLVMValueRef val0 = lp_build_masklerp(bld, weight0, mask00, mask01);
    847    LLVMValueRef val1 = lp_build_masklerp(bld, weight0, mask10, mask11);
    848    return lp_build_lerp(bld, weight1, val0, val1, 0);
    849 }
    850 
    851 /*
    852  * this is a bit excessive code for something OpenGL just recommends
    853  * but does not require.
    854  */
    855 #define ACCURATE_CUBE_CORNERS 1
    856 
    857 /**
    858  * Generate code to sample a mipmap level with linear filtering.
    859  * If sampling a cube texture, r = cube face in [0,5].
    860  * If linear_mask is present, only pixels having their mask set
    861  * will receive linear filtering, the rest will use nearest.
    862  */
    863 static void
    864 lp_build_sample_image_linear(struct lp_build_sample_context *bld,
    865                              boolean is_gather,
    866                              LLVMValueRef size,
    867                              LLVMValueRef linear_mask,
    868                              LLVMValueRef row_stride_vec,
    869                              LLVMValueRef img_stride_vec,
    870                              LLVMValueRef data_ptr,
    871                              LLVMValueRef mipoffsets,
    872                              LLVMValueRef *coords,
    873                              const LLVMValueRef *offsets,
    874                              LLVMValueRef colors_out[4])
    875 {
    876    LLVMBuilderRef builder = bld->gallivm->builder;
    877    struct lp_build_context *ivec_bld = &bld->int_coord_bld;
    878    struct lp_build_context *coord_bld = &bld->coord_bld;
    879    struct lp_build_context *texel_bld = &bld->texel_bld;
    880    const unsigned dims = bld->dims;
    881    LLVMValueRef width_vec;
    882    LLVMValueRef height_vec;
    883    LLVMValueRef depth_vec;
    884    LLVMValueRef flt_size;
    885    LLVMValueRef flt_width_vec;
    886    LLVMValueRef flt_height_vec;
    887    LLVMValueRef flt_depth_vec;
    888    LLVMValueRef fall_off[4], have_corners;
    889    LLVMValueRef z1 = NULL;
    890    LLVMValueRef z00 = NULL, z01 = NULL, z10 = NULL, z11 = NULL;
    891    LLVMValueRef x00 = NULL, x01 = NULL, x10 = NULL, x11 = NULL;
    892    LLVMValueRef y00 = NULL, y01 = NULL, y10 = NULL, y11 = NULL;
    893    LLVMValueRef s_fpart, t_fpart = NULL, r_fpart = NULL;
    894    LLVMValueRef xs[4], ys[4], zs[4];
    895    LLVMValueRef neighbors[2][2][4];
    896    int chan, texel_index;
    897    boolean seamless_cube_filter, accurate_cube_corners;
    898 
    899    seamless_cube_filter = (bld->static_texture_state->target == PIPE_TEXTURE_CUBE ||
    900                            bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) &&
    901                           bld->static_sampler_state->seamless_cube_map;
    902    /*
    903     * XXX I don't know how this is really supposed to work with gather. From GL
    904     * spec wording (not gather specific) it sounds like the 4th missing texel
    905     * should be an average of the other 3, hence for gather could return this.
    906     * This is however NOT how the code here works, which just fixes up the
    907     * weights used for filtering instead. And of course for gather there is
    908     * no filter to tweak...
    909     */
    910    accurate_cube_corners = ACCURATE_CUBE_CORNERS && seamless_cube_filter &&
    911                            !is_gather;
    912 
    913    lp_build_extract_image_sizes(bld,
    914                                 &bld->int_size_bld,
    915                                 bld->int_coord_type,
    916                                 size,
    917                                 &width_vec, &height_vec, &depth_vec);
    918 
    919    flt_size = lp_build_int_to_float(&bld->float_size_bld, size);
    920 
    921    lp_build_extract_image_sizes(bld,
    922                                 &bld->float_size_bld,
    923                                 bld->coord_type,
    924                                 flt_size,
    925                                 &flt_width_vec, &flt_height_vec, &flt_depth_vec);
    926 
    927    /*
    928     * Compute integer texcoords.
    929     */
    930 
    931    if (!seamless_cube_filter) {
    932       lp_build_sample_wrap_linear(bld, coords[0], width_vec,
    933                                   flt_width_vec, offsets[0],
    934                                   bld->static_texture_state->pot_width,
    935                                   bld->static_sampler_state->wrap_s,
    936                                   &x00, &x01, &s_fpart);
    937       lp_build_name(x00, "tex.x0.wrapped");
    938       lp_build_name(x01, "tex.x1.wrapped");
    939       x10 = x00;
    940       x11 = x01;
    941 
    942       if (dims >= 2) {
    943          lp_build_sample_wrap_linear(bld, coords[1], height_vec,
    944                                      flt_height_vec, offsets[1],
    945                                      bld->static_texture_state->pot_height,
    946                                      bld->static_sampler_state->wrap_t,
    947                                      &y00, &y10, &t_fpart);
    948          lp_build_name(y00, "tex.y0.wrapped");
    949          lp_build_name(y10, "tex.y1.wrapped");
    950          y01 = y00;
    951          y11 = y10;
    952 
    953          if (dims == 3) {
    954             lp_build_sample_wrap_linear(bld, coords[2], depth_vec,
    955                                         flt_depth_vec, offsets[2],
    956                                         bld->static_texture_state->pot_depth,
    957                                         bld->static_sampler_state->wrap_r,
    958                                         &z00, &z1, &r_fpart);
    959             z01 = z10 = z11 = z00;
    960             lp_build_name(z00, "tex.z0.wrapped");
    961             lp_build_name(z1, "tex.z1.wrapped");
    962          }
    963       }
    964       if (has_layer_coord(bld->static_texture_state->target)) {
    965          if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
    966             /* add cube layer to face */
    967             z00 = z01 = z10 = z11 = z1 =
    968                lp_build_add(&bld->int_coord_bld, coords[2], coords[3]);
    969          }
    970          else {
    971             z00 = z01 = z10 = z11 = z1 = coords[2];  /* cube face or layer */
    972          }
    973          lp_build_name(z00, "tex.z0.layer");
    974          lp_build_name(z1, "tex.z1.layer");
    975       }
    976    }
    977    else {
    978       struct lp_build_if_state edge_if;
    979       LLVMTypeRef int1t;
    980       LLVMValueRef new_faces[4], new_xcoords[4][2], new_ycoords[4][2];
    981       LLVMValueRef coord, have_edge, have_corner;
    982       LLVMValueRef fall_off_ym_notxm, fall_off_ym_notxp, fall_off_x, fall_off_y;
    983       LLVMValueRef fall_off_yp_notxm, fall_off_yp_notxp;
    984       LLVMValueRef x0, x1, y0, y1, y0_clamped, y1_clamped;
    985       LLVMValueRef face = coords[2];
    986       LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5f);
    987       LLVMValueRef length_minus_one = lp_build_sub(ivec_bld, width_vec, ivec_bld->one);
    988       /* XXX drop height calcs. Could (should) do this without seamless filtering too */
    989       height_vec = width_vec;
    990       flt_height_vec = flt_width_vec;
    991 
    992       /* XXX the overflow logic is actually sort of duplicated with trilinear,
    993        * since an overflow in one mip should also have a corresponding overflow
    994        * in another.
    995        */
    996       /* should always have normalized coords, and offsets are undefined */
    997       assert(bld->static_sampler_state->normalized_coords);
    998       coord = lp_build_mul(coord_bld, coords[0], flt_width_vec);
    999       /* instead of clamp, build mask if overflowed */
   1000       coord = lp_build_sub(coord_bld, coord, half);
   1001       /* convert to int, compute lerp weight */
   1002       /* not ideal with AVX (and no AVX2) */
   1003       lp_build_ifloor_fract(coord_bld, coord, &x0, &s_fpart);
   1004       x1 = lp_build_add(ivec_bld, x0, ivec_bld->one);
   1005       coord = lp_build_mul(coord_bld, coords[1], flt_height_vec);
   1006       coord = lp_build_sub(coord_bld, coord, half);
   1007       lp_build_ifloor_fract(coord_bld, coord, &y0, &t_fpart);
   1008       y1 = lp_build_add(ivec_bld, y0, ivec_bld->one);
   1009 
   1010       fall_off[0] = lp_build_cmp(ivec_bld, PIPE_FUNC_LESS, x0, ivec_bld->zero);
   1011       fall_off[1] = lp_build_cmp(ivec_bld, PIPE_FUNC_GREATER, x1, length_minus_one);
   1012       fall_off[2] = lp_build_cmp(ivec_bld, PIPE_FUNC_LESS, y0, ivec_bld->zero);
   1013       fall_off[3] = lp_build_cmp(ivec_bld, PIPE_FUNC_GREATER, y1, length_minus_one);
   1014 
   1015       fall_off_x = lp_build_or(ivec_bld, fall_off[0], fall_off[1]);
   1016       fall_off_y = lp_build_or(ivec_bld, fall_off[2], fall_off[3]);
   1017       have_edge = lp_build_or(ivec_bld, fall_off_x, fall_off_y);
   1018       have_edge = lp_build_any_true_range(ivec_bld, ivec_bld->type.length, have_edge);
   1019 
   1020       /* needed for accurate corner filtering branch later, rely on 0 init */
   1021       int1t = LLVMInt1TypeInContext(bld->gallivm->context);
   1022       have_corners = lp_build_alloca(bld->gallivm, int1t, "have_corner");
   1023 
   1024       for (texel_index = 0; texel_index < 4; texel_index++) {
   1025          xs[texel_index] = lp_build_alloca(bld->gallivm, ivec_bld->vec_type, "xs");
   1026          ys[texel_index] = lp_build_alloca(bld->gallivm, ivec_bld->vec_type, "ys");
   1027          zs[texel_index] = lp_build_alloca(bld->gallivm, ivec_bld->vec_type, "zs");
   1028       }
   1029 
   1030       lp_build_if(&edge_if, bld->gallivm, have_edge);
   1031 
   1032       have_corner = lp_build_and(ivec_bld, fall_off_x, fall_off_y);
   1033       have_corner = lp_build_any_true_range(ivec_bld, ivec_bld->type.length, have_corner);
   1034       LLVMBuildStore(builder, have_corner, have_corners);
   1035 
   1036       /*
   1037        * Need to feed clamped values here for cheap corner handling,
   1038        * but only for y coord (as when falling off both edges we only
   1039        * fall off the x one) - this should be sufficient.
   1040        */
   1041       y0_clamped = lp_build_max(ivec_bld, y0, ivec_bld->zero);
   1042       y1_clamped = lp_build_min(ivec_bld, y1, length_minus_one);
   1043 
   1044       /*
   1045        * Get all possible new coords.
   1046        */
   1047       lp_build_cube_new_coords(ivec_bld, face,
   1048                                x0, x1, y0_clamped, y1_clamped,
   1049                                length_minus_one,
   1050                                new_faces, new_xcoords, new_ycoords);
   1051 
   1052       /* handle fall off x-, x+ direction */
   1053       /* determine new coords, face (not both fall_off vars can be true at same time) */
   1054       x00 = lp_build_select(ivec_bld, fall_off[0], new_xcoords[0][0], x0);
   1055       y00 = lp_build_select(ivec_bld, fall_off[0], new_ycoords[0][0], y0_clamped);
   1056       x10 = lp_build_select(ivec_bld, fall_off[0], new_xcoords[0][1], x0);
   1057       y10 = lp_build_select(ivec_bld, fall_off[0], new_ycoords[0][1], y1_clamped);
   1058       x01 = lp_build_select(ivec_bld, fall_off[1], new_xcoords[1][0], x1);
   1059       y01 = lp_build_select(ivec_bld, fall_off[1], new_ycoords[1][0], y0_clamped);
   1060       x11 = lp_build_select(ivec_bld, fall_off[1], new_xcoords[1][1], x1);
   1061       y11 = lp_build_select(ivec_bld, fall_off[1], new_ycoords[1][1], y1_clamped);
   1062 
   1063       z00 = z10 = lp_build_select(ivec_bld, fall_off[0], new_faces[0], face);
   1064       z01 = z11 = lp_build_select(ivec_bld, fall_off[1], new_faces[1], face);
   1065 
   1066       /* handle fall off y-, y+ direction */
   1067       /*
   1068        * Cheap corner logic: just hack up things so a texel doesn't fall
   1069        * off both sides (which means filter weights will be wrong but we'll only
   1070        * use valid texels in the filter).
   1071        * This means however (y) coords must additionally be clamped (see above).
   1072        * This corner handling should be fully OpenGL (but not d3d10) compliant.
   1073        */
   1074       fall_off_ym_notxm = lp_build_andnot(ivec_bld, fall_off[2], fall_off[0]);
   1075       fall_off_ym_notxp = lp_build_andnot(ivec_bld, fall_off[2], fall_off[1]);
   1076       fall_off_yp_notxm = lp_build_andnot(ivec_bld, fall_off[3], fall_off[0]);
   1077       fall_off_yp_notxp = lp_build_andnot(ivec_bld, fall_off[3], fall_off[1]);
   1078 
   1079       x00 = lp_build_select(ivec_bld, fall_off_ym_notxm, new_xcoords[2][0], x00);
   1080       y00 = lp_build_select(ivec_bld, fall_off_ym_notxm, new_ycoords[2][0], y00);
   1081       x01 = lp_build_select(ivec_bld, fall_off_ym_notxp, new_xcoords[2][1], x01);
   1082       y01 = lp_build_select(ivec_bld, fall_off_ym_notxp, new_ycoords[2][1], y01);
   1083       x10 = lp_build_select(ivec_bld, fall_off_yp_notxm, new_xcoords[3][0], x10);
   1084       y10 = lp_build_select(ivec_bld, fall_off_yp_notxm, new_ycoords[3][0], y10);
   1085       x11 = lp_build_select(ivec_bld, fall_off_yp_notxp, new_xcoords[3][1], x11);
   1086       y11 = lp_build_select(ivec_bld, fall_off_yp_notxp, new_ycoords[3][1], y11);
   1087 
   1088       z00 = lp_build_select(ivec_bld, fall_off_ym_notxm, new_faces[2], z00);
   1089       z01 = lp_build_select(ivec_bld, fall_off_ym_notxp, new_faces[2], z01);
   1090       z10 = lp_build_select(ivec_bld, fall_off_yp_notxm, new_faces[3], z10);
   1091       z11 = lp_build_select(ivec_bld, fall_off_yp_notxp, new_faces[3], z11);
   1092 
   1093       if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
   1094          /* now can add cube layer to face (per sample) */
   1095          z00 = lp_build_add(ivec_bld, z00, coords[3]);
   1096          z01 = lp_build_add(ivec_bld, z01, coords[3]);
   1097          z10 = lp_build_add(ivec_bld, z10, coords[3]);
   1098          z11 = lp_build_add(ivec_bld, z11, coords[3]);
   1099       }
   1100 
   1101       LLVMBuildStore(builder, x00, xs[0]);
   1102       LLVMBuildStore(builder, x01, xs[1]);
   1103       LLVMBuildStore(builder, x10, xs[2]);
   1104       LLVMBuildStore(builder, x11, xs[3]);
   1105       LLVMBuildStore(builder, y00, ys[0]);
   1106       LLVMBuildStore(builder, y01, ys[1]);
   1107       LLVMBuildStore(builder, y10, ys[2]);
   1108       LLVMBuildStore(builder, y11, ys[3]);
   1109       LLVMBuildStore(builder, z00, zs[0]);
   1110       LLVMBuildStore(builder, z01, zs[1]);
   1111       LLVMBuildStore(builder, z10, zs[2]);
   1112       LLVMBuildStore(builder, z11, zs[3]);
   1113 
   1114       lp_build_else(&edge_if);
   1115 
   1116       LLVMBuildStore(builder, x0, xs[0]);
   1117       LLVMBuildStore(builder, x1, xs[1]);
   1118       LLVMBuildStore(builder, x0, xs[2]);
   1119       LLVMBuildStore(builder, x1, xs[3]);
   1120       LLVMBuildStore(builder, y0, ys[0]);
   1121       LLVMBuildStore(builder, y0, ys[1]);
   1122       LLVMBuildStore(builder, y1, ys[2]);
   1123       LLVMBuildStore(builder, y1, ys[3]);
   1124       if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
   1125          LLVMValueRef cube_layer = lp_build_add(ivec_bld, face, coords[3]);
   1126          LLVMBuildStore(builder, cube_layer, zs[0]);
   1127          LLVMBuildStore(builder, cube_layer, zs[1]);
   1128          LLVMBuildStore(builder, cube_layer, zs[2]);
   1129          LLVMBuildStore(builder, cube_layer, zs[3]);
   1130       }
   1131       else {
   1132          LLVMBuildStore(builder, face, zs[0]);
   1133          LLVMBuildStore(builder, face, zs[1]);
   1134          LLVMBuildStore(builder, face, zs[2]);
   1135          LLVMBuildStore(builder, face, zs[3]);
   1136       }
   1137 
   1138       lp_build_endif(&edge_if);
   1139 
   1140       x00 = LLVMBuildLoad(builder, xs[0], "");
   1141       x01 = LLVMBuildLoad(builder, xs[1], "");
   1142       x10 = LLVMBuildLoad(builder, xs[2], "");
   1143       x11 = LLVMBuildLoad(builder, xs[3], "");
   1144       y00 = LLVMBuildLoad(builder, ys[0], "");
   1145       y01 = LLVMBuildLoad(builder, ys[1], "");
   1146       y10 = LLVMBuildLoad(builder, ys[2], "");
   1147       y11 = LLVMBuildLoad(builder, ys[3], "");
   1148       z00 = LLVMBuildLoad(builder, zs[0], "");
   1149       z01 = LLVMBuildLoad(builder, zs[1], "");
   1150       z10 = LLVMBuildLoad(builder, zs[2], "");
   1151       z11 = LLVMBuildLoad(builder, zs[3], "");
   1152    }
   1153 
   1154    if (linear_mask) {
   1155       /*
   1156        * Whack filter weights into place. Whatever texel had more weight is
   1157        * the one which should have been selected by nearest filtering hence
   1158        * just use 100% weight for it.
   1159        */
   1160       struct lp_build_context *c_bld = &bld->coord_bld;
   1161       LLVMValueRef w1_mask, w1_weight;
   1162       LLVMValueRef half = lp_build_const_vec(bld->gallivm, c_bld->type, 0.5f);
   1163 
   1164       w1_mask = lp_build_cmp(c_bld, PIPE_FUNC_GREATER, s_fpart, half);
   1165       /* this select is really just a "and" */
   1166       w1_weight = lp_build_select(c_bld, w1_mask, c_bld->one, c_bld->zero);
   1167       s_fpart = lp_build_select(c_bld, linear_mask, s_fpart, w1_weight);
   1168       if (dims >= 2) {
   1169          w1_mask = lp_build_cmp(c_bld, PIPE_FUNC_GREATER, t_fpart, half);
   1170          w1_weight = lp_build_select(c_bld, w1_mask, c_bld->one, c_bld->zero);
   1171          t_fpart = lp_build_select(c_bld, linear_mask, t_fpart, w1_weight);
   1172          if (dims == 3) {
   1173             w1_mask = lp_build_cmp(c_bld, PIPE_FUNC_GREATER, r_fpart, half);
   1174             w1_weight = lp_build_select(c_bld, w1_mask, c_bld->one, c_bld->zero);
   1175             r_fpart = lp_build_select(c_bld, linear_mask, r_fpart, w1_weight);
   1176          }
   1177       }
   1178    }
   1179 
   1180    /*
   1181     * Get texture colors.
   1182     */
   1183    /* get x0/x1 texels */
   1184    lp_build_sample_texel_soa(bld,
   1185                              width_vec, height_vec, depth_vec,
   1186                              x00, y00, z00,
   1187                              row_stride_vec, img_stride_vec,
   1188                              data_ptr, mipoffsets, neighbors[0][0]);
   1189    lp_build_sample_texel_soa(bld,
   1190                              width_vec, height_vec, depth_vec,
   1191                              x01, y01, z01,
   1192                              row_stride_vec, img_stride_vec,
   1193                              data_ptr, mipoffsets, neighbors[0][1]);
   1194 
   1195    if (dims == 1) {
   1196       assert(!is_gather);
   1197       if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
   1198          /* Interpolate two samples from 1D image to produce one color */
   1199          for (chan = 0; chan < 4; chan++) {
   1200             colors_out[chan] = lp_build_lerp(texel_bld, s_fpart,
   1201                                              neighbors[0][0][chan],
   1202                                              neighbors[0][1][chan],
   1203                                              0);
   1204          }
   1205       }
   1206       else {
   1207          LLVMValueRef cmpval0, cmpval1;
   1208          cmpval0 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
   1209          cmpval1 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
   1210          /* simplified lerp, AND mask with weight and add */
   1211          colors_out[0] = lp_build_masklerp(texel_bld, s_fpart,
   1212                                            cmpval0, cmpval1);
   1213          colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0];
   1214       }
   1215    }
   1216    else {
   1217       /* 2D/3D texture */
   1218       struct lp_build_if_state corner_if;
   1219       LLVMValueRef colors0[4], colorss[4];
   1220 
   1221       /* get x0/x1 texels at y1 */
   1222       lp_build_sample_texel_soa(bld,
   1223                                 width_vec, height_vec, depth_vec,
   1224                                 x10, y10, z10,
   1225                                 row_stride_vec, img_stride_vec,
   1226                                 data_ptr, mipoffsets, neighbors[1][0]);
   1227       lp_build_sample_texel_soa(bld,
   1228                                 width_vec, height_vec, depth_vec,
   1229                                 x11, y11, z11,
   1230                                 row_stride_vec, img_stride_vec,
   1231                                 data_ptr, mipoffsets, neighbors[1][1]);
   1232 
   1233       /*
   1234        * To avoid having to duplicate linear_mask / fetch code use
   1235        * another branch (with corner condition though edge would work
   1236        * as well) here.
   1237        */
   1238       if (accurate_cube_corners) {
   1239          LLVMValueRef w00, w01, w10, w11, wx0, wy0;
   1240          LLVMValueRef c_weight, c00, c01, c10, c11;
   1241          LLVMValueRef have_corner, one_third, tmp;
   1242 
   1243          colorss[0] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs");
   1244          colorss[1] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs");
   1245          colorss[2] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs");
   1246          colorss[3] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs");
   1247 
   1248          have_corner = LLVMBuildLoad(builder, have_corners, "");
   1249 
   1250          lp_build_if(&corner_if, bld->gallivm, have_corner);
   1251 
   1252          /*
   1253           * we can't use standard 2d lerp as we need per-element weight
   1254           * in case of corners, so just calculate bilinear result as
   1255           * w00*s00 + w01*s01 + w10*s10 + w11*s11.
   1256           * (This is actually less work than using 2d lerp, 7 vs. 9 instructions,
   1257           * however calculating the weights needs another 6, so actually probably
   1258           * not slower than 2d lerp only for 4 channels as weights only need
   1259           * to be calculated once - of course fixing the weights has additional cost.)
   1260           */
   1261          wx0 = lp_build_sub(coord_bld, coord_bld->one, s_fpart);
   1262          wy0 = lp_build_sub(coord_bld, coord_bld->one, t_fpart);
   1263          w00 = lp_build_mul(coord_bld, wx0, wy0);
   1264          w01 = lp_build_mul(coord_bld, s_fpart, wy0);
   1265          w10 = lp_build_mul(coord_bld, wx0, t_fpart);
   1266          w11 = lp_build_mul(coord_bld, s_fpart, t_fpart);
   1267 
   1268          /* find corner weight */
   1269          c00 = lp_build_and(ivec_bld, fall_off[0], fall_off[2]);
   1270          c_weight = lp_build_select(coord_bld, c00, w00, coord_bld->zero);
   1271          c01 = lp_build_and(ivec_bld, fall_off[1], fall_off[2]);
   1272          c_weight = lp_build_select(coord_bld, c01, w01, c_weight);
   1273          c10 = lp_build_and(ivec_bld, fall_off[0], fall_off[3]);
   1274          c_weight = lp_build_select(coord_bld, c10, w10, c_weight);
   1275          c11 = lp_build_and(ivec_bld, fall_off[1], fall_off[3]);
   1276          c_weight = lp_build_select(coord_bld, c11, w11, c_weight);
   1277 
   1278          /*
   1279           * add 1/3 of the corner weight to each of the 3 other samples
   1280           * and null out corner weight
   1281           */
   1282          one_third = lp_build_const_vec(bld->gallivm, coord_bld->type, 1.0f/3.0f);
   1283          c_weight = lp_build_mul(coord_bld, c_weight, one_third);
   1284          w00 = lp_build_add(coord_bld, w00, c_weight);
   1285          c00 = LLVMBuildBitCast(builder, c00, coord_bld->vec_type, "");
   1286          w00 = lp_build_andnot(coord_bld, w00, c00);
   1287          w01 = lp_build_add(coord_bld, w01, c_weight);
   1288          c01 = LLVMBuildBitCast(builder, c01, coord_bld->vec_type, "");
   1289          w01 = lp_build_andnot(coord_bld, w01, c01);
   1290          w10 = lp_build_add(coord_bld, w10, c_weight);
   1291          c10 = LLVMBuildBitCast(builder, c10, coord_bld->vec_type, "");
   1292          w10 = lp_build_andnot(coord_bld, w10, c10);
   1293          w11 = lp_build_add(coord_bld, w11, c_weight);
   1294          c11 = LLVMBuildBitCast(builder, c11, coord_bld->vec_type, "");
   1295          w11 = lp_build_andnot(coord_bld, w11, c11);
   1296 
   1297          if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
   1298             for (chan = 0; chan < 4; chan++) {
   1299                colors0[chan] = lp_build_mul(coord_bld, w00, neighbors[0][0][chan]);
   1300                tmp = lp_build_mul(coord_bld, w01, neighbors[0][1][chan]);
   1301                colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]);
   1302                tmp = lp_build_mul(coord_bld, w10, neighbors[1][0][chan]);
   1303                colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]);
   1304                tmp = lp_build_mul(coord_bld, w11, neighbors[1][1][chan]);
   1305                colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]);
   1306             }
   1307          }
   1308          else {
   1309             LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;
   1310             cmpval00 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
   1311             cmpval01 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
   1312             cmpval10 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][0][0]);
   1313             cmpval11 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][1][0]);
   1314             /* inputs to interpolation are just masks so just add masked weights together */
   1315             cmpval00 = LLVMBuildBitCast(builder, cmpval00, coord_bld->vec_type, "");
   1316             cmpval01 = LLVMBuildBitCast(builder, cmpval01, coord_bld->vec_type, "");
   1317             cmpval10 = LLVMBuildBitCast(builder, cmpval10, coord_bld->vec_type, "");
   1318             cmpval11 = LLVMBuildBitCast(builder, cmpval11, coord_bld->vec_type, "");
   1319             colors0[0] = lp_build_and(coord_bld, w00, cmpval00);
   1320             tmp = lp_build_and(coord_bld, w01, cmpval01);
   1321             colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]);
   1322             tmp = lp_build_and(coord_bld, w10, cmpval10);
   1323             colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]);
   1324             tmp = lp_build_and(coord_bld, w11, cmpval11);
   1325             colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]);
   1326             colors0[1] = colors0[2] = colors0[3] = colors0[0];
   1327          }
   1328 
   1329          LLVMBuildStore(builder, colors0[0], colorss[0]);
   1330          LLVMBuildStore(builder, colors0[1], colorss[1]);
   1331          LLVMBuildStore(builder, colors0[2], colorss[2]);
   1332          LLVMBuildStore(builder, colors0[3], colorss[3]);
   1333 
   1334          lp_build_else(&corner_if);
   1335       }
   1336 
   1337       if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
   1338          if (is_gather) {
   1339             /*
   1340              * Just assign the red channel (no component selection yet).
   1341              * This is a bit hackish, we usually do the swizzle at the
   1342              * end of sampling (much less values to swizzle), but this
   1343              * obviously cannot work when using gather.
   1344              */
   1345             unsigned chan_swiz = bld->static_texture_state->swizzle_r;
   1346             colors0[0] = lp_build_swizzle_soa_channel(texel_bld,
   1347                                                       neighbors[1][0],
   1348                                                       chan_swiz);
   1349             colors0[1] = lp_build_swizzle_soa_channel(texel_bld,
   1350                                                       neighbors[1][1],
   1351                                                       chan_swiz);
   1352             colors0[2] = lp_build_swizzle_soa_channel(texel_bld,
   1353                                                       neighbors[0][1],
   1354                                                       chan_swiz);
   1355             colors0[3] = lp_build_swizzle_soa_channel(texel_bld,
   1356                                                       neighbors[0][0],
   1357                                                       chan_swiz);
   1358          }
   1359          else {
   1360             /* Bilinear interpolate the four samples from the 2D image / 3D slice */
   1361             for (chan = 0; chan < 4; chan++) {
   1362                colors0[chan] = lp_build_lerp_2d(texel_bld,
   1363                                                 s_fpart, t_fpart,
   1364                                                 neighbors[0][0][chan],
   1365                                                 neighbors[0][1][chan],
   1366                                                 neighbors[1][0][chan],
   1367                                                 neighbors[1][1][chan],
   1368                                                 0);
   1369             }
   1370          }
   1371       }
   1372       else {
   1373          LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;
   1374          cmpval00 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
   1375          cmpval01 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
   1376          cmpval10 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][0][0]);
   1377          cmpval11 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][1][0]);
   1378 
   1379          if (is_gather) {
   1380             /* more hacks for swizzling, should be X, ONE or ZERO... */
   1381             unsigned chan_swiz = bld->static_texture_state->swizzle_r;
   1382             if (chan_swiz <= PIPE_SWIZZLE_W) {
   1383                colors0[0] = lp_build_select(texel_bld, cmpval10,
   1384                                             texel_bld->one, texel_bld->zero);
   1385                colors0[1] = lp_build_select(texel_bld, cmpval11,
   1386                                             texel_bld->one, texel_bld->zero);
   1387                colors0[2] = lp_build_select(texel_bld, cmpval01,
   1388                                             texel_bld->one, texel_bld->zero);
   1389                colors0[3] = lp_build_select(texel_bld, cmpval00,
   1390                                             texel_bld->one, texel_bld->zero);
   1391             }
   1392             else if (chan_swiz == PIPE_SWIZZLE_0) {
   1393                colors0[0] = colors0[1] = colors0[2] = colors0[3] =
   1394                             texel_bld->zero;
   1395             }
   1396             else {
   1397                colors0[0] = colors0[1] = colors0[2] = colors0[3] =
   1398                             texel_bld->one;
   1399             }
   1400          }
   1401          else {
   1402             colors0[0] = lp_build_masklerp2d(texel_bld, s_fpart, t_fpart,
   1403                                              cmpval00, cmpval01, cmpval10, cmpval11);
   1404             colors0[1] = colors0[2] = colors0[3] = colors0[0];
   1405          }
   1406       }
   1407 
   1408       if (accurate_cube_corners) {
   1409          LLVMBuildStore(builder, colors0[0], colorss[0]);
   1410          LLVMBuildStore(builder, colors0[1], colorss[1]);
   1411          LLVMBuildStore(builder, colors0[2], colorss[2]);
   1412          LLVMBuildStore(builder, colors0[3], colorss[3]);
   1413 
   1414          lp_build_endif(&corner_if);
   1415 
   1416          colors0[0] = LLVMBuildLoad(builder, colorss[0], "");
   1417          colors0[1] = LLVMBuildLoad(builder, colorss[1], "");
   1418          colors0[2] = LLVMBuildLoad(builder, colorss[2], "");
   1419          colors0[3] = LLVMBuildLoad(builder, colorss[3], "");
   1420       }
   1421 
   1422       if (dims == 3) {
   1423          LLVMValueRef neighbors1[2][2][4];
   1424          LLVMValueRef colors1[4];
   1425 
   1426          assert(!is_gather);
   1427 
   1428          /* get x0/x1/y0/y1 texels at z1 */
   1429          lp_build_sample_texel_soa(bld,
   1430                                    width_vec, height_vec, depth_vec,
   1431                                    x00, y00, z1,
   1432                                    row_stride_vec, img_stride_vec,
   1433                                    data_ptr, mipoffsets, neighbors1[0][0]);
   1434          lp_build_sample_texel_soa(bld,
   1435                                    width_vec, height_vec, depth_vec,
   1436                                    x01, y01, z1,
   1437                                    row_stride_vec, img_stride_vec,
   1438                                    data_ptr, mipoffsets, neighbors1[0][1]);
   1439          lp_build_sample_texel_soa(bld,
   1440                                    width_vec, height_vec, depth_vec,
   1441                                    x10, y10, z1,
   1442                                    row_stride_vec, img_stride_vec,
   1443                                    data_ptr, mipoffsets, neighbors1[1][0]);
   1444          lp_build_sample_texel_soa(bld,
   1445                                    width_vec, height_vec, depth_vec,
   1446                                    x11, y11, z1,
   1447                                    row_stride_vec, img_stride_vec,
   1448                                    data_ptr, mipoffsets, neighbors1[1][1]);
   1449 
   1450          if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
   1451             /* Bilinear interpolate the four samples from the second Z slice */
   1452             for (chan = 0; chan < 4; chan++) {
   1453                colors1[chan] = lp_build_lerp_2d(texel_bld,
   1454                                                 s_fpart, t_fpart,
   1455                                                 neighbors1[0][0][chan],
   1456                                                 neighbors1[0][1][chan],
   1457                                                 neighbors1[1][0][chan],
   1458                                                 neighbors1[1][1][chan],
   1459                                                 0);
   1460             }
   1461             /* Linearly interpolate the two samples from the two 3D slices */
   1462             for (chan = 0; chan < 4; chan++) {
   1463                colors_out[chan] = lp_build_lerp(texel_bld,
   1464                                                 r_fpart,
   1465                                                 colors0[chan], colors1[chan],
   1466                                                 0);
   1467             }
   1468          }
   1469          else {
   1470             LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;
   1471             cmpval00 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
   1472             cmpval01 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
   1473             cmpval10 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][0][0]);
   1474             cmpval11 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][1][0]);
   1475             colors1[0] = lp_build_masklerp2d(texel_bld, s_fpart, t_fpart,
   1476                                              cmpval00, cmpval01, cmpval10, cmpval11);
   1477             /* Linearly interpolate the two samples from the two 3D slices */
   1478             colors_out[0] = lp_build_lerp(texel_bld,
   1479                                           r_fpart,
   1480                                           colors0[0], colors1[0],
   1481                                           0);
   1482             colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0];
   1483          }
   1484       }
   1485       else {
   1486          /* 2D tex */
   1487          for (chan = 0; chan < 4; chan++) {
   1488             colors_out[chan] = colors0[chan];
   1489          }
   1490       }
   1491    }
   1492 }
   1493 
   1494 
   1495 /**
   1496  * Sample the texture/mipmap using given image filter and mip filter.
   1497  * ilevel0 and ilevel1 indicate the two mipmap levels to sample
   1498  * from (vectors or scalars).
   1499  * If we're using nearest miplevel sampling the '1' values will be null/unused.
   1500  */
   1501 static void
   1502 lp_build_sample_mipmap(struct lp_build_sample_context *bld,
   1503                        unsigned img_filter,
   1504                        unsigned mip_filter,
   1505                        boolean is_gather,
   1506                        LLVMValueRef *coords,
   1507                        const LLVMValueRef *offsets,
   1508                        LLVMValueRef ilevel0,
   1509                        LLVMValueRef ilevel1,
   1510                        LLVMValueRef lod_fpart,
   1511                        LLVMValueRef *colors_out)
   1512 {
   1513    LLVMBuilderRef builder = bld->gallivm->builder;
   1514    LLVMValueRef size0 = NULL;
   1515    LLVMValueRef size1 = NULL;
   1516    LLVMValueRef row_stride0_vec = NULL;
   1517    LLVMValueRef row_stride1_vec = NULL;
   1518    LLVMValueRef img_stride0_vec = NULL;
   1519    LLVMValueRef img_stride1_vec = NULL;
   1520    LLVMValueRef data_ptr0 = NULL;
   1521    LLVMValueRef data_ptr1 = NULL;
   1522    LLVMValueRef mipoff0 = NULL;
   1523    LLVMValueRef mipoff1 = NULL;
   1524    LLVMValueRef colors0[4], colors1[4];
   1525    unsigned chan;
   1526 
   1527    /* sample the first mipmap level */
   1528    lp_build_mipmap_level_sizes(bld, ilevel0,
   1529                                &size0,
   1530                                &row_stride0_vec, &img_stride0_vec);
   1531    if (bld->num_mips == 1) {
   1532       data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
   1533    }
   1534    else {
   1535       /* This path should work for num_lods 1 too but slightly less efficient */
   1536       data_ptr0 = bld->base_ptr;
   1537       mipoff0 = lp_build_get_mip_offsets(bld, ilevel0);
   1538    }
   1539    if (img_filter == PIPE_TEX_FILTER_NEAREST) {
   1540       lp_build_sample_image_nearest(bld, size0,
   1541                                     row_stride0_vec, img_stride0_vec,
   1542                                     data_ptr0, mipoff0, coords, offsets,
   1543                                     colors0);
   1544    }
   1545    else {
   1546       assert(img_filter == PIPE_TEX_FILTER_LINEAR);
   1547       lp_build_sample_image_linear(bld, is_gather, size0, NULL,
   1548                                    row_stride0_vec, img_stride0_vec,
   1549                                    data_ptr0, mipoff0, coords, offsets,
   1550                                    colors0);
   1551    }
   1552 
   1553    /* Store the first level's colors in the output variables */
   1554    for (chan = 0; chan < 4; chan++) {
   1555        LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
   1556    }
   1557 
   1558    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
   1559       struct lp_build_if_state if_ctx;
   1560       LLVMValueRef need_lerp;
   1561 
   1562       /* need_lerp = lod_fpart > 0 */
   1563       if (bld->num_lods == 1) {
   1564          need_lerp = LLVMBuildFCmp(builder, LLVMRealUGT,
   1565                                    lod_fpart, bld->lodf_bld.zero,
   1566                                    "need_lerp");
   1567       }
   1568       else {
   1569          /*
   1570           * We'll do mip filtering if any of the quads (or individual
   1571           * pixel in case of per-pixel lod) need it.
   1572           * It might be better to split the vectors here and only fetch/filter
   1573           * quads which need it (if there's one lod per quad).
   1574           */
   1575          need_lerp = lp_build_compare(bld->gallivm, bld->lodf_bld.type,
   1576                                       PIPE_FUNC_GREATER,
   1577                                       lod_fpart, bld->lodf_bld.zero);
   1578          need_lerp = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods, need_lerp);
   1579       }
   1580 
   1581       lp_build_if(&if_ctx, bld->gallivm, need_lerp);
   1582       {
   1583          /*
   1584           * We unfortunately need to clamp lod_fpart here since we can get
   1585           * negative values which would screw up filtering if not all
   1586           * lod_fpart values have same sign.
   1587           */
   1588          lod_fpart = lp_build_max(&bld->lodf_bld, lod_fpart,
   1589                                   bld->lodf_bld.zero);
   1590          /* sample the second mipmap level */
   1591          lp_build_mipmap_level_sizes(bld, ilevel1,
   1592                                      &size1,
   1593                                      &row_stride1_vec, &img_stride1_vec);
   1594          if (bld->num_mips == 1) {
   1595             data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
   1596          }
   1597          else {
   1598             data_ptr1 = bld->base_ptr;
   1599             mipoff1 = lp_build_get_mip_offsets(bld, ilevel1);
   1600          }
   1601          if (img_filter == PIPE_TEX_FILTER_NEAREST) {
   1602             lp_build_sample_image_nearest(bld, size1,
   1603                                           row_stride1_vec, img_stride1_vec,
   1604                                           data_ptr1, mipoff1, coords, offsets,
   1605                                           colors1);
   1606          }
   1607          else {
   1608             lp_build_sample_image_linear(bld, FALSE, size1, NULL,
   1609                                          row_stride1_vec, img_stride1_vec,
   1610                                          data_ptr1, mipoff1, coords, offsets,
   1611                                          colors1);
   1612          }
   1613 
   1614          /* interpolate samples from the two mipmap levels */
   1615 
   1616          if (bld->num_lods != bld->coord_type.length)
   1617             lod_fpart = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
   1618                                                               bld->lodf_bld.type,
   1619                                                               bld->texel_bld.type,
   1620                                                               lod_fpart);
   1621 
   1622          for (chan = 0; chan < 4; chan++) {
   1623             colors0[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart,
   1624                                           colors0[chan], colors1[chan],
   1625                                           0);
   1626             LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
   1627          }
   1628       }
   1629       lp_build_endif(&if_ctx);
   1630    }
   1631 }
   1632 
   1633 
   1634 /**
   1635  * Sample the texture/mipmap using given mip filter, and using
   1636  * both nearest and linear filtering at the same time depending
   1637  * on linear_mask.
   1638  * lod can be per quad but linear_mask is always per pixel.
   1639  * ilevel0 and ilevel1 indicate the two mipmap levels to sample
   1640  * from (vectors or scalars).
   1641  * If we're using nearest miplevel sampling the '1' values will be null/unused.
   1642  */
   1643 static void
   1644 lp_build_sample_mipmap_both(struct lp_build_sample_context *bld,
   1645                             LLVMValueRef linear_mask,
   1646                             unsigned mip_filter,
   1647                             LLVMValueRef *coords,
   1648                             const LLVMValueRef *offsets,
   1649                             LLVMValueRef ilevel0,
   1650                             LLVMValueRef ilevel1,
   1651                             LLVMValueRef lod_fpart,
   1652                             LLVMValueRef lod_positive,
   1653                             LLVMValueRef *colors_out)
   1654 {
   1655    LLVMBuilderRef builder = bld->gallivm->builder;
   1656    LLVMValueRef size0 = NULL;
   1657    LLVMValueRef size1 = NULL;
   1658    LLVMValueRef row_stride0_vec = NULL;
   1659    LLVMValueRef row_stride1_vec = NULL;
   1660    LLVMValueRef img_stride0_vec = NULL;
   1661    LLVMValueRef img_stride1_vec = NULL;
   1662    LLVMValueRef data_ptr0 = NULL;
   1663    LLVMValueRef data_ptr1 = NULL;
   1664    LLVMValueRef mipoff0 = NULL;
   1665    LLVMValueRef mipoff1 = NULL;
   1666    LLVMValueRef colors0[4], colors1[4];
   1667    unsigned chan;
   1668 
   1669    /* sample the first mipmap level */
   1670    lp_build_mipmap_level_sizes(bld, ilevel0,
   1671                                &size0,
   1672                                &row_stride0_vec, &img_stride0_vec);
   1673    if (bld->num_mips == 1) {
   1674       data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
   1675    }
   1676    else {
   1677       /* This path should work for num_lods 1 too but slightly less efficient */
   1678       data_ptr0 = bld->base_ptr;
   1679       mipoff0 = lp_build_get_mip_offsets(bld, ilevel0);
   1680    }
   1681 
   1682    lp_build_sample_image_linear(bld, FALSE, size0, linear_mask,
   1683                                 row_stride0_vec, img_stride0_vec,
   1684                                 data_ptr0, mipoff0, coords, offsets,
   1685                                 colors0);
   1686 
   1687    /* Store the first level's colors in the output variables */
   1688    for (chan = 0; chan < 4; chan++) {
   1689        LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
   1690    }
   1691 
   1692    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
   1693       struct lp_build_if_state if_ctx;
   1694       LLVMValueRef need_lerp;
   1695 
   1696       /*
   1697        * We'll do mip filtering if any of the quads (or individual
   1698        * pixel in case of per-pixel lod) need it.
   1699        * Note using lod_positive here not lod_fpart since it may be the same
   1700        * condition as that used in the outer "if" in the caller hence llvm
   1701        * should be able to merge the branches in this case.
   1702        */
   1703       need_lerp = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods, lod_positive);
   1704 
   1705       lp_build_if(&if_ctx, bld->gallivm, need_lerp);
   1706       {
   1707          /*
   1708           * We unfortunately need to clamp lod_fpart here since we can get
   1709           * negative values which would screw up filtering if not all
   1710           * lod_fpart values have same sign.
   1711           */
   1712          lod_fpart = lp_build_max(&bld->lodf_bld, lod_fpart,
   1713                                   bld->lodf_bld.zero);
   1714          /* sample the second mipmap level */
   1715          lp_build_mipmap_level_sizes(bld, ilevel1,
   1716                                      &size1,
   1717                                      &row_stride1_vec, &img_stride1_vec);
   1718          if (bld->num_mips == 1) {
   1719             data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
   1720          }
   1721          else {
   1722             data_ptr1 = bld->base_ptr;
   1723             mipoff1 = lp_build_get_mip_offsets(bld, ilevel1);
   1724          }
   1725 
   1726          lp_build_sample_image_linear(bld, FALSE, size1, linear_mask,
   1727                                       row_stride1_vec, img_stride1_vec,
   1728                                       data_ptr1, mipoff1, coords, offsets,
   1729                                       colors1);
   1730 
   1731          /* interpolate samples from the two mipmap levels */
   1732 
   1733          if (bld->num_lods != bld->coord_type.length)
   1734             lod_fpart = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
   1735                                                               bld->lodf_bld.type,
   1736                                                               bld->texel_bld.type,
   1737                                                               lod_fpart);
   1738 
   1739          for (chan = 0; chan < 4; chan++) {
   1740             colors0[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart,
   1741                                           colors0[chan], colors1[chan],
   1742                                           0);
   1743             LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
   1744          }
   1745       }
   1746       lp_build_endif(&if_ctx);
   1747    }
   1748 }
   1749 
   1750 
   1751 /**
   1752  * Build (per-coord) layer value.
   1753  * Either clamp layer to valid values or fill in optional out_of_bounds
   1754  * value and just return value unclamped.
   1755  */
   1756 static LLVMValueRef
   1757 lp_build_layer_coord(struct lp_build_sample_context *bld,
   1758                      unsigned texture_unit,
   1759                      boolean is_cube_array,
   1760                      LLVMValueRef layer,
   1761                      LLVMValueRef *out_of_bounds)
   1762 {
   1763    LLVMValueRef num_layers;
   1764    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
   1765 
   1766    num_layers = bld->dynamic_state->depth(bld->dynamic_state, bld->gallivm,
   1767                                           bld->context_ptr, texture_unit);
   1768 
   1769    if (out_of_bounds) {
   1770       LLVMValueRef out1, out;
   1771       assert(!is_cube_array);
   1772       num_layers = lp_build_broadcast_scalar(int_coord_bld, num_layers);
   1773       out = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, layer, int_coord_bld->zero);
   1774       out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, layer, num_layers);
   1775       *out_of_bounds = lp_build_or(int_coord_bld, out, out1);
   1776       return layer;
   1777    }
   1778    else {
   1779       LLVMValueRef maxlayer;
   1780       LLVMValueRef s = is_cube_array ? lp_build_const_int32(bld->gallivm, 6) :
   1781                                        bld->int_bld.one;
   1782       maxlayer = lp_build_sub(&bld->int_bld, num_layers, s);
   1783       maxlayer = lp_build_broadcast_scalar(int_coord_bld, maxlayer);
   1784       return lp_build_clamp(int_coord_bld, layer, int_coord_bld->zero, maxlayer);
   1785    }
   1786 }
   1787 
   1788 
   1789 /**
   1790  * Calculate cube face, lod, mip levels.
   1791  */
   1792 static void
   1793 lp_build_sample_common(struct lp_build_sample_context *bld,
   1794                        unsigned texture_index,
   1795                        unsigned sampler_index,
   1796                        LLVMValueRef *coords,
   1797                        const struct lp_derivatives *derivs, /* optional */
   1798                        LLVMValueRef lod_bias, /* optional */
   1799                        LLVMValueRef explicit_lod, /* optional */
   1800                        LLVMValueRef *lod_pos_or_zero,
   1801                        LLVMValueRef *lod_fpart,
   1802                        LLVMValueRef *ilevel0,
   1803                        LLVMValueRef *ilevel1)
   1804 {
   1805    const unsigned mip_filter = bld->static_sampler_state->min_mip_filter;
   1806    const unsigned min_filter = bld->static_sampler_state->min_img_filter;
   1807    const unsigned mag_filter = bld->static_sampler_state->mag_img_filter;
   1808    const unsigned target = bld->static_texture_state->target;
   1809    LLVMValueRef first_level, cube_rho = NULL;
   1810    LLVMValueRef lod_ipart = NULL;
   1811    struct lp_derivatives cube_derivs;
   1812 
   1813    /*
   1814    printf("%s mip %d  min %d  mag %d\n", __FUNCTION__,
   1815           mip_filter, min_filter, mag_filter);
   1816    */
   1817 
   1818    /*
   1819     * Choose cube face, recompute texcoords for the chosen face and
   1820     * compute rho here too (as it requires transform of derivatives).
   1821     */
   1822    if (target == PIPE_TEXTURE_CUBE || target == PIPE_TEXTURE_CUBE_ARRAY) {
   1823       boolean need_derivs;
   1824       need_derivs = ((min_filter != mag_filter ||
   1825                       mip_filter != PIPE_TEX_MIPFILTER_NONE) &&
   1826                       !bld->static_sampler_state->min_max_lod_equal &&
   1827                       !explicit_lod);
   1828       lp_build_cube_lookup(bld, coords, derivs, &cube_rho, &cube_derivs, need_derivs);
   1829       derivs = &cube_derivs;
   1830       if (target == PIPE_TEXTURE_CUBE_ARRAY) {
   1831          /* calculate cube layer coord now */
   1832          LLVMValueRef layer = lp_build_iround(&bld->coord_bld, coords[3]);
   1833          LLVMValueRef six = lp_build_const_int_vec(bld->gallivm, bld->int_coord_type, 6);
   1834          layer = lp_build_mul(&bld->int_coord_bld, layer, six);
   1835          coords[3] = lp_build_layer_coord(bld, texture_index, TRUE, layer, NULL);
   1836          /* because of seamless filtering can't add it to face (coords[2]) here. */
   1837       }
   1838    }
   1839    else if (target == PIPE_TEXTURE_1D_ARRAY ||
   1840             target == PIPE_TEXTURE_2D_ARRAY) {
   1841       coords[2] = lp_build_iround(&bld->coord_bld, coords[2]);
   1842       coords[2] = lp_build_layer_coord(bld, texture_index, FALSE, coords[2], NULL);
   1843    }
   1844 
   1845    if (bld->static_sampler_state->compare_mode != PIPE_TEX_COMPARE_NONE) {
   1846       /*
   1847        * Clamp p coords to [0,1] for fixed function depth texture format here.
   1848        * Technically this is not entirely correct for unorm depth as the ref value
   1849        * should be converted to the depth format (quantization!) and comparison
   1850        * then done in texture format. This would actually help performance (since
   1851        * only need to do it once and could save the per-sample conversion of texels
   1852        * to floats instead), but it would need more messy code (would need to push
   1853        * at least some bits down to actual fetch so conversion could be skipped,
   1854        * and would have ugly interaction with border color, would need to convert
   1855        * border color to that format too or do some other tricks to make it work).
   1856        */
   1857       const struct util_format_description *format_desc = bld->format_desc;
   1858       unsigned chan_type;
   1859       /* not entirely sure we couldn't end up with non-valid swizzle here */
   1860       chan_type = format_desc->swizzle[0] <= PIPE_SWIZZLE_W ?
   1861                      format_desc->channel[format_desc->swizzle[0]].type :
   1862                      UTIL_FORMAT_TYPE_FLOAT;
   1863       if (chan_type != UTIL_FORMAT_TYPE_FLOAT) {
   1864          coords[4] = lp_build_clamp(&bld->coord_bld, coords[4],
   1865                                     bld->coord_bld.zero, bld->coord_bld.one);
   1866       }
   1867    }
   1868 
   1869    /*
   1870     * Compute the level of detail (float).
   1871     */
   1872    if (min_filter != mag_filter ||
   1873        mip_filter != PIPE_TEX_MIPFILTER_NONE) {
   1874       /* Need to compute lod either to choose mipmap levels or to
   1875        * distinguish between minification/magnification with one mipmap level.
   1876        */
   1877       lp_build_lod_selector(bld, texture_index, sampler_index,
   1878                             coords[0], coords[1], coords[2], cube_rho,
   1879                             derivs, lod_bias, explicit_lod,
   1880                             mip_filter,
   1881                             &lod_ipart, lod_fpart, lod_pos_or_zero);
   1882    } else {
   1883       lod_ipart = bld->lodi_bld.zero;
   1884       *lod_pos_or_zero = bld->lodi_bld.zero;
   1885    }
   1886 
   1887    if (bld->num_lods != bld->num_mips) {
   1888       /* only makes sense if there's just a single mip level */
   1889       assert(bld->num_mips == 1);
   1890       lod_ipart = lp_build_extract_range(bld->gallivm, lod_ipart, 0, 1);
   1891    }
   1892 
   1893    /*
   1894     * Compute integer mipmap level(s) to fetch texels from: ilevel0, ilevel1
   1895     */
   1896    switch (mip_filter) {
   1897    default:
   1898       assert(0 && "bad mip_filter value in lp_build_sample_soa()");
   1899       /* fall-through */
   1900    case PIPE_TEX_MIPFILTER_NONE:
   1901       /* always use mip level 0 */
   1902       first_level = bld->dynamic_state->first_level(bld->dynamic_state,
   1903                                                     bld->gallivm, bld->context_ptr,
   1904                                                     texture_index);
   1905       first_level = lp_build_broadcast_scalar(&bld->leveli_bld, first_level);
   1906       *ilevel0 = first_level;
   1907       break;
   1908    case PIPE_TEX_MIPFILTER_NEAREST:
   1909       assert(lod_ipart);
   1910       lp_build_nearest_mip_level(bld, texture_index, lod_ipart, ilevel0, NULL);
   1911       break;
   1912    case PIPE_TEX_MIPFILTER_LINEAR:
   1913       assert(lod_ipart);
   1914       assert(*lod_fpart);
   1915       lp_build_linear_mip_levels(bld, texture_index,
   1916                                  lod_ipart, lod_fpart,
   1917                                  ilevel0, ilevel1);
   1918       break;
   1919    }
   1920 }
   1921 
   1922 static void
   1923 lp_build_clamp_border_color(struct lp_build_sample_context *bld,
   1924                             unsigned sampler_unit)
   1925 {
   1926    struct gallivm_state *gallivm = bld->gallivm;
   1927    LLVMBuilderRef builder = gallivm->builder;
   1928    LLVMValueRef border_color_ptr =
   1929       bld->dynamic_state->border_color(bld->dynamic_state, gallivm,
   1930                                        bld->context_ptr, sampler_unit);
   1931    LLVMValueRef border_color;
   1932    const struct util_format_description *format_desc = bld->format_desc;
   1933    struct lp_type vec4_type = bld->texel_type;
   1934    struct lp_build_context vec4_bld;
   1935    LLVMValueRef min_clamp = NULL;
   1936    LLVMValueRef max_clamp = NULL;
   1937 
   1938    /*
   1939     * For normalized format need to clamp border color (technically
   1940     * probably should also quantize the data). Really sucks doing this
   1941     * here but can't avoid at least for now since this is part of
   1942     * sampler state and texture format is part of sampler_view state.
   1943     * GL expects also expects clamping for uint/sint formats too so
   1944     * do that as well (d3d10 can't end up here with uint/sint since it
   1945     * only supports them with ld).
   1946     */
   1947    vec4_type.length = 4;
   1948    lp_build_context_init(&vec4_bld, gallivm, vec4_type);
   1949 
   1950    /*
   1951     * Vectorized clamping of border color. Loading is a bit of a hack since
   1952     * we just cast the pointer to float array to pointer to vec4
   1953     * (int or float).
   1954     */
   1955    border_color_ptr = lp_build_array_get_ptr(gallivm, border_color_ptr,
   1956                                              lp_build_const_int32(gallivm, 0));
   1957    border_color_ptr = LLVMBuildBitCast(builder, border_color_ptr,
   1958                                        LLVMPointerType(vec4_bld.vec_type, 0), "");
   1959    border_color = LLVMBuildLoad(builder, border_color_ptr, "");
   1960    /* we don't have aligned type in the dynamic state unfortunately */
   1961    LLVMSetAlignment(border_color, 4);
   1962 
   1963    /*
   1964     * Instead of having some incredibly complex logic which will try to figure out
   1965     * clamping necessary for each channel, simply use the first channel, and treat
   1966     * mixed signed/unsigned normalized formats specially.
   1967     * (Mixed non-normalized, which wouldn't work at all here, do not exist for a
   1968     * good reason.)
   1969     */
   1970    if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) {
   1971       int chan;
   1972       /* d/s needs special handling because both present means just sampling depth */
   1973       if (util_format_is_depth_and_stencil(format_desc->format)) {
   1974          chan = format_desc->swizzle[0];
   1975       }
   1976       else {
   1977          chan = util_format_get_first_non_void_channel(format_desc->format);
   1978       }
   1979       if (chan >= 0 && chan <= PIPE_SWIZZLE_W) {
   1980          unsigned chan_type = format_desc->channel[chan].type;
   1981          unsigned chan_norm = format_desc->channel[chan].normalized;
   1982          unsigned chan_pure = format_desc->channel[chan].pure_integer;
   1983          if (chan_type == UTIL_FORMAT_TYPE_SIGNED) {
   1984             if (chan_norm) {
   1985                min_clamp = lp_build_const_vec(gallivm, vec4_type, -1.0F);
   1986                max_clamp = vec4_bld.one;
   1987             }
   1988             else if (chan_pure) {
   1989                /*
   1990                 * Border color was stored as int, hence need min/max clamp
   1991                 * only if chan has less than 32 bits..
   1992                 */
   1993                unsigned chan_size = format_desc->channel[chan].size;
   1994                if (chan_size < 32) {
   1995                   min_clamp = lp_build_const_int_vec(gallivm, vec4_type,
   1996                                                      0 - (1 << (chan_size - 1)));
   1997                   max_clamp = lp_build_const_int_vec(gallivm, vec4_type,
   1998                                                      (1 << (chan_size - 1)) - 1);
   1999                }
   2000             }
   2001             /* TODO: no idea about non-pure, non-normalized! */
   2002          }
   2003          else if (chan_type == UTIL_FORMAT_TYPE_UNSIGNED) {
   2004             if (chan_norm) {
   2005                min_clamp = vec4_bld.zero;
   2006                max_clamp = vec4_bld.one;
   2007             }
   2008             /*
   2009              * Need a ugly hack here, because we don't have Z32_FLOAT_X8X24
   2010              * we use Z32_FLOAT_S8X24 to imply sampling depth component
   2011              * and ignoring stencil, which will blow up here if we try to
   2012              * do a uint clamp in a float texel build...
   2013              * And even if we had that format, mesa st also thinks using z24s8
   2014              * means depth sampling ignoring stencil.
   2015              */
   2016             else if (chan_pure) {
   2017                /*
   2018                 * Border color was stored as uint, hence never need min
   2019                 * clamp, and only need max clamp if chan has less than 32 bits.
   2020                 */
   2021                unsigned chan_size = format_desc->channel[chan].size;
   2022                if (chan_size < 32) {
   2023                   max_clamp = lp_build_const_int_vec(gallivm, vec4_type,
   2024                                                      (1 << chan_size) - 1);
   2025                }
   2026                /* TODO: no idea about non-pure, non-normalized! */
   2027             }
   2028          }
   2029          else if (chan_type == UTIL_FORMAT_TYPE_FIXED) {
   2030             /* TODO: I have no idea what clamp this would need if any! */
   2031          }
   2032       }
   2033       /* mixed plain formats (or different pure size) */
   2034       switch (format_desc->format) {
   2035       case PIPE_FORMAT_B10G10R10A2_UINT:
   2036       case PIPE_FORMAT_R10G10B10A2_UINT:
   2037       {
   2038          unsigned max10 = (1 << 10) - 1;
   2039          max_clamp = lp_build_const_aos(gallivm, vec4_type, max10, max10,
   2040                                         max10, (1 << 2) - 1, NULL);
   2041       }
   2042          break;
   2043       case PIPE_FORMAT_R10SG10SB10SA2U_NORM:
   2044          min_clamp = lp_build_const_aos(gallivm, vec4_type, -1.0F, -1.0F,
   2045                                         -1.0F, 0.0F, NULL);
   2046          max_clamp = vec4_bld.one;
   2047          break;
   2048       case PIPE_FORMAT_R8SG8SB8UX8U_NORM:
   2049       case PIPE_FORMAT_R5SG5SB6U_NORM:
   2050          min_clamp = lp_build_const_aos(gallivm, vec4_type, -1.0F, -1.0F,
   2051                                         0.0F, 0.0F, NULL);
   2052          max_clamp = vec4_bld.one;
   2053          break;
   2054       default:
   2055          break;
   2056       }
   2057    }
   2058    else {
   2059       /* cannot figure this out from format description */
   2060       if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
   2061          /* s3tc formats are always unorm */
   2062          min_clamp = vec4_bld.zero;
   2063          max_clamp = vec4_bld.one;
   2064       }
   2065       else if (format_desc->layout == UTIL_FORMAT_LAYOUT_RGTC ||
   2066                format_desc->layout == UTIL_FORMAT_LAYOUT_ETC) {
   2067          switch (format_desc->format) {
   2068          case PIPE_FORMAT_RGTC1_UNORM:
   2069          case PIPE_FORMAT_RGTC2_UNORM:
   2070          case PIPE_FORMAT_LATC1_UNORM:
   2071          case PIPE_FORMAT_LATC2_UNORM:
   2072          case PIPE_FORMAT_ETC1_RGB8:
   2073             min_clamp = vec4_bld.zero;
   2074             max_clamp = vec4_bld.one;
   2075             break;
   2076          case PIPE_FORMAT_RGTC1_SNORM:
   2077          case PIPE_FORMAT_RGTC2_SNORM:
   2078          case PIPE_FORMAT_LATC1_SNORM:
   2079          case PIPE_FORMAT_LATC2_SNORM:
   2080             min_clamp = lp_build_const_vec(gallivm, vec4_type, -1.0F);
   2081             max_clamp = vec4_bld.one;
   2082             break;
   2083          default:
   2084             assert(0);
   2085             break;
   2086          }
   2087       }
   2088       /*
   2089        * all others from subsampled/other group, though we don't care
   2090        * about yuv (and should not have any from zs here)
   2091        */
   2092       else if (format_desc->colorspace != UTIL_FORMAT_COLORSPACE_YUV){
   2093          switch (format_desc->format) {
   2094          case PIPE_FORMAT_R8G8_B8G8_UNORM:
   2095          case PIPE_FORMAT_G8R8_G8B8_UNORM:
   2096          case PIPE_FORMAT_G8R8_B8R8_UNORM:
   2097          case PIPE_FORMAT_R8G8_R8B8_UNORM:
   2098          case PIPE_FORMAT_R1_UNORM: /* doesn't make sense but ah well */
   2099             min_clamp = vec4_bld.zero;
   2100             max_clamp = vec4_bld.one;
   2101             break;
   2102          case PIPE_FORMAT_R8G8Bx_SNORM:
   2103             min_clamp = lp_build_const_vec(gallivm, vec4_type, -1.0F);
   2104             max_clamp = vec4_bld.one;
   2105             break;
   2106             /*
   2107              * Note smallfloat formats usually don't need clamping
   2108              * (they still have infinite range) however this is not
   2109              * true for r11g11b10 and r9g9b9e5, which can't represent
   2110              * negative numbers (and additionally r9g9b9e5 can't represent
   2111              * very large numbers). d3d10 seems happy without clamping in
   2112              * this case, but gl spec is pretty clear: "for floating
   2113              * point and integer formats, border values are clamped to
   2114              * the representable range of the format" so do that here.
   2115              */
   2116          case PIPE_FORMAT_R11G11B10_FLOAT:
   2117             min_clamp = vec4_bld.zero;
   2118             break;
   2119          case PIPE_FORMAT_R9G9B9E5_FLOAT:
   2120             min_clamp = vec4_bld.zero;
   2121             max_clamp = lp_build_const_vec(gallivm, vec4_type, MAX_RGB9E5);
   2122             break;
   2123          default:
   2124             assert(0);
   2125             break;
   2126          }
   2127       }
   2128    }
   2129 
   2130    if (min_clamp) {
   2131       border_color = lp_build_max(&vec4_bld, border_color, min_clamp);
   2132    }
   2133    if (max_clamp) {
   2134       border_color = lp_build_min(&vec4_bld, border_color, max_clamp);
   2135    }
   2136 
   2137    bld->border_color_clamped = border_color;
   2138 }
   2139 
   2140 
   2141 /**
   2142  * General texture sampling codegen.
   2143  * This function handles texture sampling for all texture targets (1D,
   2144  * 2D, 3D, cube) and all filtering modes.
   2145  */
   2146 static void
   2147 lp_build_sample_general(struct lp_build_sample_context *bld,
   2148                         unsigned sampler_unit,
   2149                         boolean is_gather,
   2150                         LLVMValueRef *coords,
   2151                         const LLVMValueRef *offsets,
   2152                         LLVMValueRef lod_positive,
   2153                         LLVMValueRef lod_fpart,
   2154                         LLVMValueRef ilevel0,
   2155                         LLVMValueRef ilevel1,
   2156                         LLVMValueRef *colors_out)
   2157 {
   2158    LLVMBuilderRef builder = bld->gallivm->builder;
   2159    const struct lp_static_sampler_state *sampler_state = bld->static_sampler_state;
   2160    const unsigned mip_filter = sampler_state->min_mip_filter;
   2161    const unsigned min_filter = sampler_state->min_img_filter;
   2162    const unsigned mag_filter = sampler_state->mag_img_filter;
   2163    LLVMValueRef texels[4];
   2164    unsigned chan;
   2165 
   2166    /* if we need border color, (potentially) clamp it now */
   2167    if (lp_sampler_wrap_mode_uses_border_color(sampler_state->wrap_s,
   2168                                               min_filter,
   2169                                               mag_filter) ||
   2170        (bld->dims > 1 &&
   2171            lp_sampler_wrap_mode_uses_border_color(sampler_state->wrap_t,
   2172                                                   min_filter,
   2173                                                   mag_filter)) ||
   2174        (bld->dims > 2 &&
   2175            lp_sampler_wrap_mode_uses_border_color(sampler_state->wrap_r,
   2176                                                   min_filter,
   2177                                                   mag_filter))) {
   2178       lp_build_clamp_border_color(bld, sampler_unit);
   2179    }
   2180 
   2181 
   2182    /*
   2183     * Get/interpolate texture colors.
   2184     */
   2185 
   2186    for (chan = 0; chan < 4; ++chan) {
   2187      texels[chan] = lp_build_alloca(bld->gallivm, bld->texel_bld.vec_type, "");
   2188      lp_build_name(texels[chan], "sampler%u_texel_%c_var", sampler_unit, "xyzw"[chan]);
   2189    }
   2190 
   2191    if (min_filter == mag_filter) {
   2192       /* no need to distinguish between minification and magnification */
   2193       lp_build_sample_mipmap(bld, min_filter, mip_filter,
   2194                              is_gather,
   2195                              coords, offsets,
   2196                              ilevel0, ilevel1, lod_fpart,
   2197                              texels);
   2198    }
   2199    else {
   2200       /*
   2201        * Could also get rid of the if-logic and always use mipmap_both, both
   2202        * for the single lod and multi-lod case if nothing really uses this.
   2203        */
   2204       if (bld->num_lods == 1) {
   2205          /* Emit conditional to choose min image filter or mag image filter
   2206           * depending on the lod being > 0 or <= 0, respectively.
   2207           */
   2208          struct lp_build_if_state if_ctx;
   2209 
   2210          lod_positive = LLVMBuildTrunc(builder, lod_positive,
   2211                                        LLVMInt1TypeInContext(bld->gallivm->context), "");
   2212 
   2213          lp_build_if(&if_ctx, bld->gallivm, lod_positive);
   2214          {
   2215             /* Use the minification filter */
   2216             lp_build_sample_mipmap(bld, min_filter, mip_filter, FALSE,
   2217                                    coords, offsets,
   2218                                    ilevel0, ilevel1, lod_fpart,
   2219                                    texels);
   2220          }
   2221          lp_build_else(&if_ctx);
   2222          {
   2223             /* Use the magnification filter */
   2224             lp_build_sample_mipmap(bld, mag_filter, PIPE_TEX_MIPFILTER_NONE,
   2225                                    FALSE,
   2226                                    coords, offsets,
   2227                                    ilevel0, NULL, NULL,
   2228                                    texels);
   2229          }
   2230          lp_build_endif(&if_ctx);
   2231       }
   2232       else {
   2233          LLVMValueRef need_linear, linear_mask;
   2234          unsigned mip_filter_for_nearest;
   2235          struct lp_build_if_state if_ctx;
   2236 
   2237          if (min_filter == PIPE_TEX_FILTER_LINEAR) {
   2238             linear_mask = lod_positive;
   2239             mip_filter_for_nearest = PIPE_TEX_MIPFILTER_NONE;
   2240          }
   2241          else {
   2242             linear_mask = lp_build_not(&bld->lodi_bld, lod_positive);
   2243             mip_filter_for_nearest = mip_filter;
   2244          }
   2245          need_linear = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods,
   2246                                                linear_mask);
   2247 
   2248          if (bld->num_lods != bld->coord_type.length) {
   2249             linear_mask = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
   2250                                                                 bld->lodi_type,
   2251                                                                 bld->int_coord_type,
   2252                                                                 linear_mask);
   2253          }
   2254 
   2255          lp_build_if(&if_ctx, bld->gallivm, need_linear);
   2256          {
   2257             /*
   2258              * Do sampling with both filters simultaneously. This means using
   2259              * a linear filter and doing some tricks (with weights) for the pixels
   2260              * which need nearest filter.
   2261              * Note that it's probably rare some pixels need nearest and some
   2262              * linear filter but the fixups required for the nearest pixels
   2263              * aren't all that complicated so just always run a combined path
   2264              * if at least some pixels require linear.
   2265              */
   2266             lp_build_sample_mipmap_both(bld, linear_mask, mip_filter,
   2267                                         coords, offsets,
   2268                                         ilevel0, ilevel1,
   2269                                         lod_fpart, lod_positive,
   2270                                         texels);
   2271          }
   2272          lp_build_else(&if_ctx);
   2273          {
   2274             /*
   2275              * All pixels require just nearest filtering, which is way
   2276              * cheaper than linear, hence do a separate path for that.
   2277              */
   2278             lp_build_sample_mipmap(bld, PIPE_TEX_FILTER_NEAREST,
   2279                                    mip_filter_for_nearest, FALSE,
   2280                                    coords, offsets,
   2281                                    ilevel0, ilevel1, lod_fpart,
   2282                                    texels);
   2283          }
   2284          lp_build_endif(&if_ctx);
   2285       }
   2286    }
   2287 
   2288    for (chan = 0; chan < 4; ++chan) {
   2289      colors_out[chan] = LLVMBuildLoad(builder, texels[chan], "");
   2290      lp_build_name(colors_out[chan], "sampler%u_texel_%c", sampler_unit, "xyzw"[chan]);
   2291    }
   2292 }
   2293 
   2294 
   2295 /**
   2296  * Texel fetch function.
   2297  * In contrast to general sampling there is no filtering, no coord minification,
   2298  * lod (if any) is always explicit uint, coords are uints (in terms of texel units)
   2299  * directly to be applied to the selected mip level (after adding texel offsets).
   2300  * This function handles texel fetch for all targets where texel fetch is supported
   2301  * (no cube maps, but 1d, 2d, 3d are supported, arrays and buffers should be too).
   2302  */
   2303 static void
   2304 lp_build_fetch_texel(struct lp_build_sample_context *bld,
   2305                      unsigned texture_unit,
   2306                      const LLVMValueRef *coords,
   2307                      LLVMValueRef explicit_lod,
   2308                      const LLVMValueRef *offsets,
   2309                      LLVMValueRef *colors_out)
   2310 {
   2311    struct lp_build_context *perquadi_bld = &bld->lodi_bld;
   2312    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
   2313    unsigned dims = bld->dims, chan;
   2314    unsigned target = bld->static_texture_state->target;
   2315    boolean out_of_bound_ret_zero = TRUE;
   2316    LLVMValueRef size, ilevel;
   2317    LLVMValueRef row_stride_vec = NULL, img_stride_vec = NULL;
   2318    LLVMValueRef x = coords[0], y = coords[1], z = coords[2];
   2319    LLVMValueRef width, height, depth, i, j;
   2320    LLVMValueRef offset, out_of_bounds, out1;
   2321 
   2322    out_of_bounds = int_coord_bld->zero;
   2323 
   2324    if (explicit_lod && bld->static_texture_state->target != PIPE_BUFFER) {
   2325       if (bld->num_mips != int_coord_bld->type.length) {
   2326          ilevel = lp_build_pack_aos_scalars(bld->gallivm, int_coord_bld->type,
   2327                                             perquadi_bld->type, explicit_lod, 0);
   2328       }
   2329       else {
   2330          ilevel = explicit_lod;
   2331       }
   2332       lp_build_nearest_mip_level(bld, texture_unit, ilevel, &ilevel,
   2333                                  out_of_bound_ret_zero ? &out_of_bounds : NULL);
   2334    }
   2335    else {
   2336       assert(bld->num_mips == 1);
   2337       if (bld->static_texture_state->target != PIPE_BUFFER) {
   2338          ilevel = bld->dynamic_state->first_level(bld->dynamic_state, bld->gallivm,
   2339                                                   bld->context_ptr, texture_unit);
   2340       }
   2341       else {
   2342          ilevel = lp_build_const_int32(bld->gallivm, 0);
   2343       }
   2344    }
   2345    lp_build_mipmap_level_sizes(bld, ilevel,
   2346                                &size,
   2347                                &row_stride_vec, &img_stride_vec);
   2348    lp_build_extract_image_sizes(bld, &bld->int_size_bld, int_coord_bld->type,
   2349                                 size, &width, &height, &depth);
   2350 
   2351    if (target == PIPE_TEXTURE_1D_ARRAY ||
   2352        target == PIPE_TEXTURE_2D_ARRAY) {
   2353       if (out_of_bound_ret_zero) {
   2354          z = lp_build_layer_coord(bld, texture_unit, FALSE, z, &out1);
   2355          out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
   2356       }
   2357       else {
   2358          z = lp_build_layer_coord(bld, texture_unit, FALSE, z, NULL);
   2359       }
   2360    }
   2361 
   2362    /* This is a lot like border sampling */
   2363    if (offsets[0]) {
   2364       /*
   2365        * coords are really unsigned, offsets are signed, but I don't think
   2366        * exceeding 31 bits is possible
   2367        */
   2368       x = lp_build_add(int_coord_bld, x, offsets[0]);
   2369    }
   2370    out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero);
   2371    out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
   2372    out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width);
   2373    out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
   2374 
   2375    if (dims >= 2) {
   2376       if (offsets[1]) {
   2377          y = lp_build_add(int_coord_bld, y, offsets[1]);
   2378       }
   2379       out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero);
   2380       out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
   2381       out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
   2382       out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
   2383 
   2384       if (dims >= 3) {
   2385          if (offsets[2]) {
   2386             z = lp_build_add(int_coord_bld, z, offsets[2]);
   2387          }
   2388          out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero);
   2389          out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
   2390          out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth);
   2391          out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
   2392       }
   2393    }
   2394 
   2395    lp_build_sample_offset(int_coord_bld,
   2396                           bld->format_desc,
   2397                           x, y, z, row_stride_vec, img_stride_vec,
   2398                           &offset, &i, &j);
   2399 
   2400    if (bld->static_texture_state->target != PIPE_BUFFER) {
   2401       offset = lp_build_add(int_coord_bld, offset,
   2402                             lp_build_get_mip_offsets(bld, ilevel));
   2403    }
   2404 
   2405    offset = lp_build_andnot(int_coord_bld, offset, out_of_bounds);
   2406 
   2407    lp_build_fetch_rgba_soa(bld->gallivm,
   2408                            bld->format_desc,
   2409                            bld->texel_type, TRUE,
   2410                            bld->base_ptr, offset,
   2411                            i, j,
   2412                            bld->cache,
   2413                            colors_out);
   2414 
   2415    if (out_of_bound_ret_zero) {
   2416       /*
   2417        * Only needed for ARB_robust_buffer_access_behavior and d3d10.
   2418        * Could use min/max above instead of out-of-bounds comparisons
   2419        * if we don't care about the result returned for out-of-bounds.
   2420        */
   2421       for (chan = 0; chan < 4; chan++) {
   2422          colors_out[chan] = lp_build_select(&bld->texel_bld, out_of_bounds,
   2423                                             bld->texel_bld.zero, colors_out[chan]);
   2424       }
   2425    }
   2426 }
   2427 
   2428 
   2429 /**
   2430  * Just set texels to white instead of actually sampling the texture.
   2431  * For debugging.
   2432  */
   2433 void
   2434 lp_build_sample_nop(struct gallivm_state *gallivm,
   2435                     struct lp_type type,
   2436                     const LLVMValueRef *coords,
   2437                     LLVMValueRef texel_out[4])
   2438 {
   2439    LLVMValueRef one = lp_build_one(gallivm, type);
   2440    unsigned chan;
   2441 
   2442    for (chan = 0; chan < 4; chan++) {
   2443       texel_out[chan] = one;
   2444    }
   2445 }
   2446 
   2447 
   2448 /**
   2449  * Build the actual texture sampling code.
   2450  * 'texel' will return a vector of four LLVMValueRefs corresponding to
   2451  * R, G, B, A.
   2452  * \param type  vector float type to use for coords, etc.
   2453  * \param sample_key
   2454  * \param derivs  partial derivatives of (s,t,r,q) with respect to x and y
   2455  */
   2456 static void
   2457 lp_build_sample_soa_code(struct gallivm_state *gallivm,
   2458                          const struct lp_static_texture_state *static_texture_state,
   2459                          const struct lp_static_sampler_state *static_sampler_state,
   2460                          struct lp_sampler_dynamic_state *dynamic_state,
   2461                          struct lp_type type,
   2462                          unsigned sample_key,
   2463                          unsigned texture_index,
   2464                          unsigned sampler_index,
   2465                          LLVMValueRef context_ptr,
   2466                          LLVMValueRef thread_data_ptr,
   2467                          const LLVMValueRef *coords,
   2468                          const LLVMValueRef *offsets,
   2469                          const struct lp_derivatives *derivs, /* optional */
   2470                          LLVMValueRef lod, /* optional */
   2471                          LLVMValueRef texel_out[4])
   2472 {
   2473    unsigned target = static_texture_state->target;
   2474    unsigned dims = texture_dims(target);
   2475    unsigned num_quads = type.length / 4;
   2476    unsigned mip_filter, min_img_filter, mag_img_filter, i;
   2477    struct lp_build_sample_context bld;
   2478    struct lp_static_sampler_state derived_sampler_state = *static_sampler_state;
   2479    LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
   2480    LLVMBuilderRef builder = gallivm->builder;
   2481    LLVMValueRef tex_width, newcoords[5];
   2482    enum lp_sampler_lod_property lod_property;
   2483    enum lp_sampler_lod_control lod_control;
   2484    enum lp_sampler_op_type op_type;
   2485    LLVMValueRef lod_bias = NULL;
   2486    LLVMValueRef explicit_lod = NULL;
   2487    boolean op_is_tex;
   2488 
   2489    if (0) {
   2490       enum pipe_format fmt = static_texture_state->format;
   2491       debug_printf("Sample from %s\n", util_format_name(fmt));
   2492    }
   2493 
   2494    lod_property = (sample_key & LP_SAMPLER_LOD_PROPERTY_MASK) >>
   2495                      LP_SAMPLER_LOD_PROPERTY_SHIFT;
   2496    lod_control = (sample_key & LP_SAMPLER_LOD_CONTROL_MASK) >>
   2497                     LP_SAMPLER_LOD_CONTROL_SHIFT;
   2498    op_type = (sample_key & LP_SAMPLER_OP_TYPE_MASK) >>
   2499                  LP_SAMPLER_OP_TYPE_SHIFT;
   2500 
   2501    op_is_tex = op_type == LP_SAMPLER_OP_TEXTURE;
   2502 
   2503    if (lod_control == LP_SAMPLER_LOD_BIAS) {
   2504       lod_bias = lod;
   2505       assert(lod);
   2506       assert(derivs == NULL);
   2507    }
   2508    else if (lod_control == LP_SAMPLER_LOD_EXPLICIT) {
   2509       explicit_lod = lod;
   2510       assert(lod);
   2511       assert(derivs == NULL);
   2512    }
   2513    else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) {
   2514       assert(derivs);
   2515       assert(lod == NULL);
   2516    }
   2517    else {
   2518       assert(derivs == NULL);
   2519       assert(lod == NULL);
   2520    }
   2521 
   2522    if (static_texture_state->format == PIPE_FORMAT_NONE) {
   2523       /*
   2524        * If there's nothing bound, format is NONE, and we must return
   2525        * all zero as mandated by d3d10 in this case.
   2526        */
   2527       unsigned chan;
   2528       LLVMValueRef zero = lp_build_zero(gallivm, type);
   2529       for (chan = 0; chan < 4; chan++) {
   2530          texel_out[chan] = zero;
   2531       }
   2532       return;
   2533    }
   2534 
   2535    assert(type.floating);
   2536 
   2537    /* Setup our build context */
   2538    memset(&bld, 0, sizeof bld);
   2539    bld.gallivm = gallivm;
   2540    bld.context_ptr = context_ptr;
   2541    bld.static_sampler_state = &derived_sampler_state;
   2542    bld.static_texture_state = static_texture_state;
   2543    bld.dynamic_state = dynamic_state;
   2544    bld.format_desc = util_format_description(static_texture_state->format);
   2545    bld.dims = dims;
   2546 
   2547    bld.vector_width = lp_type_width(type);
   2548 
   2549    bld.float_type = lp_type_float(32);
   2550    bld.int_type = lp_type_int(32);
   2551    bld.coord_type = type;
   2552    bld.int_coord_type = lp_int_type(type);
   2553    bld.float_size_in_type = lp_type_float(32);
   2554    bld.float_size_in_type.length = dims > 1 ? 4 : 1;
   2555    bld.int_size_in_type = lp_int_type(bld.float_size_in_type);
   2556    bld.texel_type = type;
   2557 
   2558    /* always using the first channel hopefully should be safe,
   2559     * if not things WILL break in other places anyway.
   2560     */
   2561    if (bld.format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB &&
   2562        bld.format_desc->channel[0].pure_integer) {
   2563       if (bld.format_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) {
   2564          bld.texel_type = lp_type_int_vec(type.width, type.width * type.length);
   2565       }
   2566       else if (bld.format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) {
   2567          bld.texel_type = lp_type_uint_vec(type.width, type.width * type.length);
   2568       }
   2569    }
   2570    else if (util_format_has_stencil(bld.format_desc) &&
   2571        !util_format_has_depth(bld.format_desc)) {
   2572       /* for stencil only formats, sample stencil (uint) */
   2573       bld.texel_type = lp_type_int_vec(type.width, type.width * type.length);
   2574    }
   2575 
   2576    if (!static_texture_state->level_zero_only) {
   2577       derived_sampler_state.min_mip_filter = static_sampler_state->min_mip_filter;
   2578    } else {
   2579       derived_sampler_state.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
   2580    }
   2581    if (op_type == LP_SAMPLER_OP_GATHER) {
   2582       /*
   2583        * gather4 is exactly like GL_LINEAR filtering but in the end skipping
   2584        * the actual filtering. Using mostly the same paths, so cube face
   2585        * selection, coord wrapping etc. all naturally uses the same code.
   2586        */
   2587       derived_sampler_state.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
   2588       derived_sampler_state.min_img_filter = PIPE_TEX_FILTER_LINEAR;
   2589       derived_sampler_state.mag_img_filter = PIPE_TEX_FILTER_LINEAR;
   2590    }
   2591    mip_filter = derived_sampler_state.min_mip_filter;
   2592 
   2593    if (0) {
   2594       debug_printf("  .min_mip_filter = %u\n", derived_sampler_state.min_mip_filter);
   2595    }
   2596 
   2597    if (static_texture_state->target == PIPE_TEXTURE_CUBE ||
   2598        static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY)
   2599    {
   2600       /*
   2601        * Seamless filtering ignores wrap modes.
   2602        * Setting to CLAMP_TO_EDGE is correct for nearest filtering, for
   2603        * bilinear it's not correct but way better than using for instance repeat.
   2604        * Note we even set this for non-seamless. Technically GL allows any wrap
   2605        * mode, which made sense when supporting true borders (can get seamless
   2606        * effect with border and CLAMP_TO_BORDER), but gallium doesn't support
   2607        * borders and d3d9 requires wrap modes to be ignored and it's a pain to fix
   2608        * up the sampler state (as it makes it texture dependent).
   2609        */
   2610       derived_sampler_state.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
   2611       derived_sampler_state.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
   2612    }
   2613    /*
   2614     * We could force CLAMP to CLAMP_TO_EDGE here if min/mag filter is nearest,
   2615     * so AoS path could be used. Not sure it's worth the trouble...
   2616     */
   2617 
   2618    min_img_filter = derived_sampler_state.min_img_filter;
   2619    mag_img_filter = derived_sampler_state.mag_img_filter;
   2620 
   2621 
   2622    /*
   2623     * This is all a bit complicated different paths are chosen for performance
   2624     * reasons.
   2625     * Essentially, there can be 1 lod per element, 1 lod per quad or 1 lod for
   2626     * everything (the last two options are equivalent for 4-wide case).
   2627     * If there's per-quad lod but we split to 4-wide so we can use AoS, per-quad
   2628     * lod is calculated then the lod value extracted afterwards so making this
   2629     * case basically the same as far as lod handling is concerned for the
   2630     * further sample/filter code as the 1 lod for everything case.
   2631     * Different lod handling mostly shows up when building mipmap sizes
   2632     * (lp_build_mipmap_level_sizes() and friends) and also in filtering
   2633     * (getting the fractional part of the lod to the right texels).
   2634     */
   2635 
   2636    /*
   2637     * There are other situations where at least the multiple int lods could be
   2638     * avoided like min and max lod being equal.
   2639     */
   2640    bld.num_mips = bld.num_lods = 1;
   2641 
   2642    if ((gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) &&
   2643        (gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX) &&
   2644        (static_texture_state->target == PIPE_TEXTURE_CUBE ||
   2645         static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) &&
   2646        (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
   2647       /*
   2648        * special case for using per-pixel lod even for implicit lod,
   2649        * which is generally never required (ok by APIs) except to please
   2650        * some (somewhat broken imho) tests (because per-pixel face selection
   2651        * can cause derivatives to be different for pixels outside the primitive
   2652        * due to the major axis division even if pre-project derivatives are
   2653        * looking normal).
   2654        */
   2655       bld.num_mips = type.length;
   2656       bld.num_lods = type.length;
   2657    }
   2658    else if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT ||
   2659        (explicit_lod || lod_bias || derivs)) {
   2660       if ((!op_is_tex && target != PIPE_BUFFER) ||
   2661           (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
   2662          bld.num_mips = type.length;
   2663          bld.num_lods = type.length;
   2664       }
   2665       else if (op_is_tex && min_img_filter != mag_img_filter) {
   2666          bld.num_mips = 1;
   2667          bld.num_lods = type.length;
   2668       }
   2669    }
   2670    /* TODO: for true scalar_lod should only use 1 lod value */
   2671    else if ((!op_is_tex && explicit_lod && target != PIPE_BUFFER) ||
   2672             (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
   2673       bld.num_mips = num_quads;
   2674       bld.num_lods = num_quads;
   2675    }
   2676    else if (op_is_tex && min_img_filter != mag_img_filter) {
   2677       bld.num_mips = 1;
   2678       bld.num_lods = num_quads;
   2679    }
   2680 
   2681 
   2682    bld.lodf_type = type;
   2683    /* we want native vector size to be able to use our intrinsics */
   2684    if (bld.num_lods != type.length) {
   2685       /* TODO: this currently always has to be per-quad or per-element */
   2686       bld.lodf_type.length = type.length > 4 ? ((type.length + 15) / 16) * 4 : 1;
   2687    }
   2688    bld.lodi_type = lp_int_type(bld.lodf_type);
   2689    bld.levelf_type = bld.lodf_type;
   2690    if (bld.num_mips == 1) {
   2691       bld.levelf_type.length = 1;
   2692    }
   2693    bld.leveli_type = lp_int_type(bld.levelf_type);
   2694    bld.float_size_type = bld.float_size_in_type;
   2695    /* Note: size vectors may not be native. They contain minified w/h/d/_ values,
   2696     * with per-element lod that is w0/h0/d0/_/w1/h1/d1_/... so up to 8x4f32 */
   2697    if (bld.num_mips > 1) {
   2698       bld.float_size_type.length = bld.num_mips == type.length ?
   2699                                       bld.num_mips * bld.float_size_in_type.length :
   2700                                       type.length;
   2701    }
   2702    bld.int_size_type = lp_int_type(bld.float_size_type);
   2703 
   2704    lp_build_context_init(&bld.float_bld, gallivm, bld.float_type);
   2705    lp_build_context_init(&bld.float_vec_bld, gallivm, type);
   2706    lp_build_context_init(&bld.int_bld, gallivm, bld.int_type);
   2707    lp_build_context_init(&bld.coord_bld, gallivm, bld.coord_type);
   2708    lp_build_context_init(&bld.int_coord_bld, gallivm, bld.int_coord_type);
   2709    lp_build_context_init(&bld.int_size_in_bld, gallivm, bld.int_size_in_type);
   2710    lp_build_context_init(&bld.float_size_in_bld, gallivm, bld.float_size_in_type);
   2711    lp_build_context_init(&bld.int_size_bld, gallivm, bld.int_size_type);
   2712    lp_build_context_init(&bld.float_size_bld, gallivm, bld.float_size_type);
   2713    lp_build_context_init(&bld.texel_bld, gallivm, bld.texel_type);
   2714    lp_build_context_init(&bld.levelf_bld, gallivm, bld.levelf_type);
   2715    lp_build_context_init(&bld.leveli_bld, gallivm, bld.leveli_type);
   2716    lp_build_context_init(&bld.lodf_bld, gallivm, bld.lodf_type);
   2717    lp_build_context_init(&bld.lodi_bld, gallivm, bld.lodi_type);
   2718 
   2719    /* Get the dynamic state */
   2720    tex_width = dynamic_state->width(dynamic_state, gallivm,
   2721                                     context_ptr, texture_index);
   2722    bld.row_stride_array = dynamic_state->row_stride(dynamic_state, gallivm,
   2723                                                     context_ptr, texture_index);
   2724    bld.img_stride_array = dynamic_state->img_stride(dynamic_state, gallivm,
   2725                                                     context_ptr, texture_index);
   2726    bld.base_ptr = dynamic_state->base_ptr(dynamic_state, gallivm,
   2727                                           context_ptr, texture_index);
   2728    bld.mip_offsets = dynamic_state->mip_offsets(dynamic_state, gallivm,
   2729                                                 context_ptr, texture_index);
   2730    /* Note that mip_offsets is an array[level] of offsets to texture images */
   2731 
   2732    if (dynamic_state->cache_ptr && thread_data_ptr) {
   2733       bld.cache = dynamic_state->cache_ptr(dynamic_state, gallivm,
   2734                                            thread_data_ptr, texture_index);
   2735    }
   2736 
   2737    /* width, height, depth as single int vector */
   2738    if (dims <= 1) {
   2739       bld.int_size = tex_width;
   2740    }
   2741    else {
   2742       bld.int_size = LLVMBuildInsertElement(builder, bld.int_size_in_bld.undef,
   2743                                             tex_width,
   2744                                             LLVMConstInt(i32t, 0, 0), "");
   2745       if (dims >= 2) {
   2746          LLVMValueRef tex_height =
   2747             dynamic_state->height(dynamic_state, gallivm,
   2748                                   context_ptr, texture_index);
   2749          bld.int_size = LLVMBuildInsertElement(builder, bld.int_size,
   2750                                                tex_height,
   2751                                                LLVMConstInt(i32t, 1, 0), "");
   2752          if (dims >= 3) {
   2753             LLVMValueRef tex_depth =
   2754                dynamic_state->depth(dynamic_state, gallivm, context_ptr,
   2755                                     texture_index);
   2756             bld.int_size = LLVMBuildInsertElement(builder, bld.int_size,
   2757                                                   tex_depth,
   2758                                                   LLVMConstInt(i32t, 2, 0), "");
   2759          }
   2760       }
   2761    }
   2762 
   2763    for (i = 0; i < 5; i++) {
   2764       newcoords[i] = coords[i];
   2765    }
   2766 
   2767    if (0) {
   2768       /* For debug: no-op texture sampling */
   2769       lp_build_sample_nop(gallivm,
   2770                           bld.texel_type,
   2771                           newcoords,
   2772                           texel_out);
   2773    }
   2774 
   2775    else if (op_type == LP_SAMPLER_OP_FETCH) {
   2776       lp_build_fetch_texel(&bld, texture_index, newcoords,
   2777                            lod, offsets,
   2778                            texel_out);
   2779    }
   2780 
   2781    else {
   2782       LLVMValueRef lod_fpart = NULL, lod_positive = NULL;
   2783       LLVMValueRef ilevel0 = NULL, ilevel1 = NULL;
   2784       boolean use_aos;
   2785 
   2786       if (util_format_is_pure_integer(static_texture_state->format) &&
   2787           !util_format_has_depth(bld.format_desc) &&
   2788           (static_sampler_state->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR ||
   2789            static_sampler_state->min_img_filter == PIPE_TEX_FILTER_LINEAR ||
   2790            static_sampler_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR)) {
   2791          /*
   2792           * Bail if impossible filtering is specified (the awkard additional
   2793           * depth check is because it is legal in gallium to have things like S8Z24
   2794           * here which would say it's pure int despite such formats should sample
   2795           * the depth component).
   2796           * In GL such filters make the texture incomplete, this makes it robust
   2797           * against state trackers which set this up regardless (we'd crash in the
   2798           * lerp later (except for gather)).
   2799           * Must do this after fetch_texel code since with GL state tracker we'll
   2800           * get some junk sampler for buffer textures.
   2801           */
   2802          unsigned chan;
   2803          LLVMValueRef zero = lp_build_zero(gallivm, type);
   2804          for (chan = 0; chan < 4; chan++) {
   2805             texel_out[chan] = zero;
   2806          }
   2807          return;
   2808       }
   2809 
   2810       use_aos = util_format_fits_8unorm(bld.format_desc) &&
   2811                 op_is_tex &&
   2812                 /* not sure this is strictly needed or simply impossible */
   2813                 derived_sampler_state.compare_mode == PIPE_TEX_COMPARE_NONE &&
   2814                 lp_is_simple_wrap_mode(derived_sampler_state.wrap_s);
   2815 
   2816       use_aos &= bld.num_lods <= num_quads ||
   2817                  derived_sampler_state.min_img_filter ==
   2818                     derived_sampler_state.mag_img_filter;
   2819       if (dims > 1) {
   2820          use_aos &= lp_is_simple_wrap_mode(derived_sampler_state.wrap_t);
   2821          if (dims > 2) {
   2822             use_aos &= lp_is_simple_wrap_mode(derived_sampler_state.wrap_r);
   2823          }
   2824       }
   2825       if ((static_texture_state->target == PIPE_TEXTURE_CUBE ||
   2826            static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) &&
   2827           derived_sampler_state.seamless_cube_map &&
   2828           (derived_sampler_state.min_img_filter == PIPE_TEX_FILTER_LINEAR ||
   2829            derived_sampler_state.mag_img_filter == PIPE_TEX_FILTER_LINEAR)) {
   2830          /* theoretically possible with AoS filtering but not implemented (complex!) */
   2831          use_aos = 0;
   2832       }
   2833 
   2834       if ((gallivm_debug & GALLIVM_DEBUG_PERF) &&
   2835           !use_aos && util_format_fits_8unorm(bld.format_desc)) {
   2836          debug_printf("%s: using floating point linear filtering for %s\n",
   2837                       __FUNCTION__, bld.format_desc->short_name);
   2838          debug_printf("  min_img %d  mag_img %d  mip %d  target %d  seamless %d"
   2839                       "  wraps %d  wrapt %d  wrapr %d\n",
   2840                       derived_sampler_state.min_img_filter,
   2841                       derived_sampler_state.mag_img_filter,
   2842                       derived_sampler_state.min_mip_filter,
   2843                       static_texture_state->target,
   2844                       derived_sampler_state.seamless_cube_map,
   2845                       derived_sampler_state.wrap_s,
   2846                       derived_sampler_state.wrap_t,
   2847                       derived_sampler_state.wrap_r);
   2848       }
   2849 
   2850       lp_build_sample_common(&bld, texture_index, sampler_index,
   2851                              newcoords,
   2852                              derivs, lod_bias, explicit_lod,
   2853                              &lod_positive, &lod_fpart,
   2854                              &ilevel0, &ilevel1);
   2855 
   2856       if (use_aos && static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
   2857          /* The aos path doesn't do seamless filtering so simply add cube layer
   2858           * to face now.
   2859           */
   2860          newcoords[2] = lp_build_add(&bld.int_coord_bld, newcoords[2], newcoords[3]);
   2861       }
   2862 
   2863       /*
   2864        * we only try 8-wide sampling with soa or if we have AVX2
   2865        * as it appears to be a loss with just AVX)
   2866        */
   2867       if (num_quads == 1 || !use_aos ||
   2868           (util_cpu_caps.has_avx2 &&
   2869            (bld.num_lods == 1 ||
   2870             derived_sampler_state.min_img_filter == derived_sampler_state.mag_img_filter))) {
   2871          if (use_aos) {
   2872             /* do sampling/filtering with fixed pt arithmetic */
   2873             lp_build_sample_aos(&bld, sampler_index,
   2874                                 newcoords[0], newcoords[1],
   2875                                 newcoords[2],
   2876                                 offsets, lod_positive, lod_fpart,
   2877                                 ilevel0, ilevel1,
   2878                                 texel_out);
   2879          }
   2880 
   2881          else {
   2882             lp_build_sample_general(&bld, sampler_index,
   2883                                     op_type == LP_SAMPLER_OP_GATHER,
   2884                                     newcoords, offsets,
   2885                                     lod_positive, lod_fpart,
   2886                                     ilevel0, ilevel1,
   2887                                     texel_out);
   2888          }
   2889       }
   2890       else {
   2891          unsigned j;
   2892          struct lp_build_sample_context bld4;
   2893          struct lp_type type4 = type;
   2894          unsigned i;
   2895          LLVMValueRef texelout4[4];
   2896          LLVMValueRef texelouttmp[4][LP_MAX_VECTOR_LENGTH/16];
   2897 
   2898          type4.length = 4;
   2899 
   2900          /* Setup our build context */
   2901          memset(&bld4, 0, sizeof bld4);
   2902          bld4.gallivm = bld.gallivm;
   2903          bld4.context_ptr = bld.context_ptr;
   2904          bld4.static_texture_state = bld.static_texture_state;
   2905          bld4.static_sampler_state = bld.static_sampler_state;
   2906          bld4.dynamic_state = bld.dynamic_state;
   2907          bld4.format_desc = bld.format_desc;
   2908          bld4.dims = bld.dims;
   2909          bld4.row_stride_array = bld.row_stride_array;
   2910          bld4.img_stride_array = bld.img_stride_array;
   2911          bld4.base_ptr = bld.base_ptr;
   2912          bld4.mip_offsets = bld.mip_offsets;
   2913          bld4.int_size = bld.int_size;
   2914          bld4.cache = bld.cache;
   2915 
   2916          bld4.vector_width = lp_type_width(type4);
   2917 
   2918          bld4.float_type = lp_type_float(32);
   2919          bld4.int_type = lp_type_int(32);
   2920          bld4.coord_type = type4;
   2921          bld4.int_coord_type = lp_int_type(type4);
   2922          bld4.float_size_in_type = lp_type_float(32);
   2923          bld4.float_size_in_type.length = dims > 1 ? 4 : 1;
   2924          bld4.int_size_in_type = lp_int_type(bld4.float_size_in_type);
   2925          bld4.texel_type = bld.texel_type;
   2926          bld4.texel_type.length = 4;
   2927 
   2928          bld4.num_mips = bld4.num_lods = 1;
   2929          if ((gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) &&
   2930              (gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX) &&
   2931              (static_texture_state->target == PIPE_TEXTURE_CUBE ||
   2932               static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) &&
   2933              (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
   2934             bld4.num_mips = type4.length;
   2935             bld4.num_lods = type4.length;
   2936          }
   2937          if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT &&
   2938              (explicit_lod || lod_bias || derivs)) {
   2939             if ((!op_is_tex && target != PIPE_BUFFER) ||
   2940                 (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
   2941                bld4.num_mips = type4.length;
   2942                bld4.num_lods = type4.length;
   2943             }
   2944             else if (op_is_tex && min_img_filter != mag_img_filter) {
   2945                bld4.num_mips = 1;
   2946                bld4.num_lods = type4.length;
   2947             }
   2948          }
   2949 
   2950          /* we want native vector size to be able to use our intrinsics */
   2951          bld4.lodf_type = type4;
   2952          if (bld4.num_lods != type4.length) {
   2953             bld4.lodf_type.length = 1;
   2954          }
   2955          bld4.lodi_type = lp_int_type(bld4.lodf_type);
   2956          bld4.levelf_type = type4;
   2957          if (bld4.num_mips != type4.length) {
   2958             bld4.levelf_type.length = 1;
   2959          }
   2960          bld4.leveli_type = lp_int_type(bld4.levelf_type);
   2961          bld4.float_size_type = bld4.float_size_in_type;
   2962          if (bld4.num_mips > 1) {
   2963             bld4.float_size_type.length = bld4.num_mips == type4.length ?
   2964                                             bld4.num_mips * bld4.float_size_in_type.length :
   2965                                             type4.length;
   2966          }
   2967          bld4.int_size_type = lp_int_type(bld4.float_size_type);
   2968 
   2969          lp_build_context_init(&bld4.float_bld, gallivm, bld4.float_type);
   2970          lp_build_context_init(&bld4.float_vec_bld, gallivm, type4);
   2971          lp_build_context_init(&bld4.int_bld, gallivm, bld4.int_type);
   2972          lp_build_context_init(&bld4.coord_bld, gallivm, bld4.coord_type);
   2973          lp_build_context_init(&bld4.int_coord_bld, gallivm, bld4.int_coord_type);
   2974          lp_build_context_init(&bld4.int_size_in_bld, gallivm, bld4.int_size_in_type);
   2975          lp_build_context_init(&bld4.float_size_in_bld, gallivm, bld4.float_size_in_type);
   2976          lp_build_context_init(&bld4.int_size_bld, gallivm, bld4.int_size_type);
   2977          lp_build_context_init(&bld4.float_size_bld, gallivm, bld4.float_size_type);
   2978          lp_build_context_init(&bld4.texel_bld, gallivm, bld4.texel_type);
   2979          lp_build_context_init(&bld4.levelf_bld, gallivm, bld4.levelf_type);
   2980          lp_build_context_init(&bld4.leveli_bld, gallivm, bld4.leveli_type);
   2981          lp_build_context_init(&bld4.lodf_bld, gallivm, bld4.lodf_type);
   2982          lp_build_context_init(&bld4.lodi_bld, gallivm, bld4.lodi_type);
   2983 
   2984          for (i = 0; i < num_quads; i++) {
   2985             LLVMValueRef s4, t4, r4;
   2986             LLVMValueRef lod_positive4, lod_fpart4 = NULL;
   2987             LLVMValueRef ilevel04, ilevel14 = NULL;
   2988             LLVMValueRef offsets4[4] = { NULL };
   2989             unsigned num_lods = bld4.num_lods;
   2990 
   2991             s4 = lp_build_extract_range(gallivm, newcoords[0], 4*i, 4);
   2992             t4 = lp_build_extract_range(gallivm, newcoords[1], 4*i, 4);
   2993             r4 = lp_build_extract_range(gallivm, newcoords[2], 4*i, 4);
   2994 
   2995             if (offsets[0]) {
   2996                offsets4[0] = lp_build_extract_range(gallivm, offsets[0], 4*i, 4);
   2997                if (dims > 1) {
   2998                   offsets4[1] = lp_build_extract_range(gallivm, offsets[1], 4*i, 4);
   2999                   if (dims > 2) {
   3000                      offsets4[2] = lp_build_extract_range(gallivm, offsets[2], 4*i, 4);
   3001                   }
   3002                }
   3003             }
   3004             lod_positive4 = lp_build_extract_range(gallivm, lod_positive, num_lods * i, num_lods);
   3005             ilevel04 = bld.num_mips == 1 ? ilevel0 :
   3006                           lp_build_extract_range(gallivm, ilevel0, num_lods * i, num_lods);
   3007             if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
   3008                ilevel14 = lp_build_extract_range(gallivm, ilevel1, num_lods * i, num_lods);
   3009                lod_fpart4 = lp_build_extract_range(gallivm, lod_fpart, num_lods * i, num_lods);
   3010             }
   3011 
   3012             if (use_aos) {
   3013                /* do sampling/filtering with fixed pt arithmetic */
   3014                lp_build_sample_aos(&bld4, sampler_index,
   3015                                    s4, t4, r4, offsets4,
   3016                                    lod_positive4, lod_fpart4,
   3017                                    ilevel04, ilevel14,
   3018                                    texelout4);
   3019             }
   3020 
   3021             else {
   3022                /* this path is currently unreachable and hence might break easily... */
   3023                LLVMValueRef newcoords4[5];
   3024                newcoords4[0] = s4;
   3025                newcoords4[1] = t4;
   3026                newcoords4[2] = r4;
   3027                newcoords4[3] = lp_build_extract_range(gallivm, newcoords[3], 4*i, 4);
   3028                newcoords4[4] = lp_build_extract_range(gallivm, newcoords[4], 4*i, 4);
   3029 
   3030                lp_build_sample_general(&bld4, sampler_index,
   3031                                        op_type == LP_SAMPLER_OP_GATHER,
   3032                                        newcoords4, offsets4,
   3033                                        lod_positive4, lod_fpart4,
   3034                                        ilevel04, ilevel14,
   3035                                        texelout4);
   3036             }
   3037             for (j = 0; j < 4; j++) {
   3038                texelouttmp[j][i] = texelout4[j];
   3039             }
   3040          }
   3041 
   3042          for (j = 0; j < 4; j++) {
   3043             texel_out[j] = lp_build_concat(gallivm, texelouttmp[j], type4, num_quads);
   3044          }
   3045       }
   3046    }
   3047 
   3048    if (target != PIPE_BUFFER && op_type != LP_SAMPLER_OP_GATHER) {
   3049       apply_sampler_swizzle(&bld, texel_out);
   3050    }
   3051 
   3052    /*
   3053     * texel type can be a (32bit) int/uint (for pure int formats only),
   3054     * however we are expected to always return floats (storage is untyped).
   3055     */
   3056    if (!bld.texel_type.floating) {
   3057       unsigned chan;
   3058       for (chan = 0; chan < 4; chan++) {
   3059          texel_out[chan] = LLVMBuildBitCast(builder, texel_out[chan],
   3060                                             lp_build_vec_type(gallivm, type), "");
   3061       }
   3062    }
   3063 }
   3064 
   3065 
   3066 #define USE_TEX_FUNC_CALL 1
   3067 
   3068 #define LP_MAX_TEX_FUNC_ARGS 32
   3069 
   3070 static inline void
   3071 get_target_info(enum pipe_texture_target target,
   3072                 unsigned *num_coords, unsigned *num_derivs,
   3073                 unsigned *num_offsets, unsigned *layer)
   3074 {
   3075    unsigned dims = texture_dims(target);
   3076    *num_coords = dims;
   3077    *num_offsets = dims;
   3078    *num_derivs = (target == PIPE_TEXTURE_CUBE ||
   3079                   target == PIPE_TEXTURE_CUBE_ARRAY) ? 3 : dims;
   3080    *layer = has_layer_coord(target) ? 2: 0;
   3081    if (target == PIPE_TEXTURE_CUBE_ARRAY) {
   3082       /*
   3083        * dims doesn't include r coord for cubes - this is handled
   3084        * by layer instead, but need to fix up for cube arrays...
   3085        */
   3086       *layer = 3;
   3087       *num_coords = 3;
   3088    }
   3089 }
   3090 
   3091 
   3092 /**
   3093  * Generate the function body for a texture sampling function.
   3094  */
   3095 static void
   3096 lp_build_sample_gen_func(struct gallivm_state *gallivm,
   3097                          const struct lp_static_texture_state *static_texture_state,
   3098                          const struct lp_static_sampler_state *static_sampler_state,
   3099                          struct lp_sampler_dynamic_state *dynamic_state,
   3100                          struct lp_type type,
   3101                          unsigned texture_index,
   3102                          unsigned sampler_index,
   3103                          LLVMValueRef function,
   3104                          unsigned num_args,
   3105                          unsigned sample_key)
   3106 {
   3107    LLVMBuilderRef old_builder;
   3108    LLVMBasicBlockRef block;
   3109    LLVMValueRef coords[5];
   3110    LLVMValueRef offsets[3] = { NULL };
   3111    LLVMValueRef lod = NULL;
   3112    LLVMValueRef context_ptr;
   3113    LLVMValueRef thread_data_ptr = NULL;
   3114    LLVMValueRef texel_out[4];
   3115    struct lp_derivatives derivs;
   3116    struct lp_derivatives *deriv_ptr = NULL;
   3117    unsigned num_param = 0;
   3118    unsigned i, num_coords, num_derivs, num_offsets, layer;
   3119    enum lp_sampler_lod_control lod_control;
   3120    boolean need_cache = FALSE;
   3121 
   3122    lod_control = (sample_key & LP_SAMPLER_LOD_CONTROL_MASK) >>
   3123                     LP_SAMPLER_LOD_CONTROL_SHIFT;
   3124 
   3125    get_target_info(static_texture_state->target,
   3126                    &num_coords, &num_derivs, &num_offsets, &layer);
   3127 
   3128    if (dynamic_state->cache_ptr) {
   3129       const struct util_format_description *format_desc;
   3130       format_desc = util_format_description(static_texture_state->format);
   3131       if (format_desc && format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
   3132          need_cache = TRUE;
   3133       }
   3134    }
   3135 
   3136    /* "unpack" arguments */
   3137    context_ptr = LLVMGetParam(function, num_param++);
   3138    if (need_cache) {
   3139       thread_data_ptr = LLVMGetParam(function, num_param++);
   3140    }
   3141    for (i = 0; i < num_coords; i++) {
   3142       coords[i] = LLVMGetParam(function, num_param++);
   3143    }
   3144    for (i = num_coords; i < 5; i++) {
   3145       /* This is rather unfortunate... */
   3146       coords[i] = lp_build_undef(gallivm, type);
   3147    }
   3148    if (layer) {
   3149       coords[layer] = LLVMGetParam(function, num_param++);
   3150    }
   3151    if (sample_key & LP_SAMPLER_SHADOW) {
   3152       coords[4] = LLVMGetParam(function, num_param++);
   3153    }
   3154    if (sample_key & LP_SAMPLER_OFFSETS) {
   3155       for (i = 0; i < num_offsets; i++) {
   3156          offsets[i] = LLVMGetParam(function, num_param++);
   3157       }
   3158    }
   3159    if (lod_control == LP_SAMPLER_LOD_BIAS ||
   3160        lod_control == LP_SAMPLER_LOD_EXPLICIT) {
   3161       lod = LLVMGetParam(function, num_param++);
   3162    }
   3163    else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) {
   3164       for (i = 0; i < num_derivs; i++) {
   3165          derivs.ddx[i] = LLVMGetParam(function, num_param++);
   3166          derivs.ddy[i] = LLVMGetParam(function, num_param++);
   3167       }
   3168       deriv_ptr = &derivs;
   3169    }
   3170 
   3171    assert(num_args == num_param);
   3172 
   3173    /*
   3174     * Function body
   3175     */
   3176 
   3177    old_builder = gallivm->builder;
   3178    block = LLVMAppendBasicBlockInContext(gallivm->context, function, "entry");
   3179    gallivm->builder = LLVMCreateBuilderInContext(gallivm->context);
   3180    LLVMPositionBuilderAtEnd(gallivm->builder, block);
   3181 
   3182    lp_build_sample_soa_code(gallivm,
   3183                             static_texture_state,
   3184                             static_sampler_state,
   3185                             dynamic_state,
   3186                             type,
   3187                             sample_key,
   3188                             texture_index,
   3189                             sampler_index,
   3190                             context_ptr,
   3191                             thread_data_ptr,
   3192                             coords,
   3193                             offsets,
   3194                             deriv_ptr,
   3195                             lod,
   3196                             texel_out);
   3197 
   3198    LLVMBuildAggregateRet(gallivm->builder, texel_out, 4);
   3199 
   3200    LLVMDisposeBuilder(gallivm->builder);
   3201    gallivm->builder = old_builder;
   3202 
   3203    gallivm_verify_function(gallivm, function);
   3204 }
   3205 
   3206 
   3207 /**
   3208  * Call the matching function for texture sampling.
   3209  * If there's no match, generate a new one.
   3210  */
   3211 static void
   3212 lp_build_sample_soa_func(struct gallivm_state *gallivm,
   3213                          const struct lp_static_texture_state *static_texture_state,
   3214                          const struct lp_static_sampler_state *static_sampler_state,
   3215                          struct lp_sampler_dynamic_state *dynamic_state,
   3216                          const struct lp_sampler_params *params)
   3217 {
   3218    LLVMBuilderRef builder = gallivm->builder;
   3219    LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(
   3220                              LLVMGetInsertBlock(builder)));
   3221    LLVMValueRef function, inst;
   3222    LLVMValueRef args[LP_MAX_TEX_FUNC_ARGS];
   3223    LLVMBasicBlockRef bb;
   3224    LLVMValueRef tex_ret;
   3225    unsigned num_args = 0;
   3226    char func_name[64];
   3227    unsigned i, num_coords, num_derivs, num_offsets, layer;
   3228    unsigned texture_index = params->texture_index;
   3229    unsigned sampler_index = params->sampler_index;
   3230    unsigned sample_key = params->sample_key;
   3231    const LLVMValueRef *coords = params->coords;
   3232    const LLVMValueRef *offsets = params->offsets;
   3233    const struct lp_derivatives *derivs = params->derivs;
   3234    enum lp_sampler_lod_control lod_control;
   3235    boolean need_cache = FALSE;
   3236 
   3237    lod_control = (sample_key & LP_SAMPLER_LOD_CONTROL_MASK) >>
   3238                     LP_SAMPLER_LOD_CONTROL_SHIFT;
   3239 
   3240    get_target_info(static_texture_state->target,
   3241                    &num_coords, &num_derivs, &num_offsets, &layer);
   3242 
   3243    if (dynamic_state->cache_ptr) {
   3244       const struct util_format_description *format_desc;
   3245       format_desc = util_format_description(static_texture_state->format);
   3246       if (format_desc && format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
   3247          /*
   3248           * This is not 100% correct, if we have cache but the
   3249           * util_format_s3tc_prefer is true the cache won't get used
   3250           * regardless (could hook up the block decode there...) */
   3251          need_cache = TRUE;
   3252       }
   3253    }
   3254    /*
   3255     * texture function matches are found by name.
   3256     * Thus the name has to include both the texture and sampler unit
   3257     * (which covers all static state) plus the actual texture function
   3258     * (including things like offsets, shadow coord, lod control).
   3259     * Additionally lod_property has to be included too.
   3260     */
   3261 
   3262    util_snprintf(func_name, sizeof(func_name), "texfunc_res_%d_sam_%d_%x",
   3263                  texture_index, sampler_index, sample_key);
   3264 
   3265    function = LLVMGetNamedFunction(module, func_name);
   3266 
   3267    if(!function) {
   3268       LLVMTypeRef arg_types[LP_MAX_TEX_FUNC_ARGS];
   3269       LLVMTypeRef ret_type;
   3270       LLVMTypeRef function_type;
   3271       LLVMTypeRef val_type[4];
   3272       unsigned num_param = 0;
   3273 
   3274       /*
   3275        * Generate the function prototype.
   3276        */
   3277 
   3278       arg_types[num_param++] = LLVMTypeOf(params->context_ptr);
   3279       if (need_cache) {
   3280          arg_types[num_param++] = LLVMTypeOf(params->thread_data_ptr);
   3281       }
   3282       for (i = 0; i < num_coords; i++) {
   3283          arg_types[num_param++] = LLVMTypeOf(coords[0]);
   3284          assert(LLVMTypeOf(coords[0]) == LLVMTypeOf(coords[i]));
   3285       }
   3286       if (layer) {
   3287          arg_types[num_param++] = LLVMTypeOf(coords[layer]);
   3288          assert(LLVMTypeOf(coords[0]) == LLVMTypeOf(coords[layer]));
   3289       }
   3290       if (sample_key & LP_SAMPLER_SHADOW) {
   3291          arg_types[num_param++] = LLVMTypeOf(coords[0]);
   3292       }
   3293       if (sample_key & LP_SAMPLER_OFFSETS) {
   3294          for (i = 0; i < num_offsets; i++) {
   3295             arg_types[num_param++] = LLVMTypeOf(offsets[0]);
   3296             assert(LLVMTypeOf(offsets[0]) == LLVMTypeOf(offsets[i]));
   3297          }
   3298       }
   3299       if (lod_control == LP_SAMPLER_LOD_BIAS ||
   3300           lod_control == LP_SAMPLER_LOD_EXPLICIT) {
   3301          arg_types[num_param++] = LLVMTypeOf(params->lod);
   3302       }
   3303       else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) {
   3304          for (i = 0; i < num_derivs; i++) {
   3305             arg_types[num_param++] = LLVMTypeOf(derivs->ddx[i]);
   3306             arg_types[num_param++] = LLVMTypeOf(derivs->ddy[i]);
   3307             assert(LLVMTypeOf(derivs->ddx[0]) == LLVMTypeOf(derivs->ddx[i]));
   3308             assert(LLVMTypeOf(derivs->ddy[0]) == LLVMTypeOf(derivs->ddy[i]));
   3309          }
   3310       }
   3311 
   3312       val_type[0] = val_type[1] = val_type[2] = val_type[3] =
   3313          lp_build_vec_type(gallivm, params->type);
   3314       ret_type = LLVMStructTypeInContext(gallivm->context, val_type, 4, 0);
   3315       function_type = LLVMFunctionType(ret_type, arg_types, num_param, 0);
   3316       function = LLVMAddFunction(module, func_name, function_type);
   3317 
   3318       for (i = 0; i < num_param; ++i) {
   3319          if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) {
   3320 
   3321             lp_add_function_attr(function, i + 1, LP_FUNC_ATTR_NOALIAS);
   3322          }
   3323       }
   3324 
   3325       LLVMSetFunctionCallConv(function, LLVMFastCallConv);
   3326       LLVMSetLinkage(function, LLVMInternalLinkage);
   3327 
   3328       lp_build_sample_gen_func(gallivm,
   3329                                static_texture_state,
   3330                                static_sampler_state,
   3331                                dynamic_state,
   3332                                params->type,
   3333                                texture_index,
   3334                                sampler_index,
   3335                                function,
   3336                                num_param,
   3337                                sample_key);
   3338    }
   3339 
   3340    num_args = 0;
   3341    args[num_args++] = params->context_ptr;
   3342    if (need_cache) {
   3343       args[num_args++] = params->thread_data_ptr;
   3344    }
   3345    for (i = 0; i < num_coords; i++) {
   3346       args[num_args++] = coords[i];
   3347    }
   3348    if (layer) {
   3349       args[num_args++] = coords[layer];
   3350    }
   3351    if (sample_key & LP_SAMPLER_SHADOW) {
   3352       args[num_args++] = coords[4];
   3353    }
   3354    if (sample_key & LP_SAMPLER_OFFSETS) {
   3355       for (i = 0; i < num_offsets; i++) {
   3356          args[num_args++] = offsets[i];
   3357       }
   3358    }
   3359    if (lod_control == LP_SAMPLER_LOD_BIAS ||
   3360        lod_control == LP_SAMPLER_LOD_EXPLICIT) {
   3361       args[num_args++] = params->lod;
   3362    }
   3363    else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) {
   3364       for (i = 0; i < num_derivs; i++) {
   3365          args[num_args++] = derivs->ddx[i];
   3366          args[num_args++] = derivs->ddy[i];
   3367       }
   3368    }
   3369 
   3370    assert(num_args <= LP_MAX_TEX_FUNC_ARGS);
   3371 
   3372    tex_ret = LLVMBuildCall(builder, function, args, num_args, "");
   3373    bb = LLVMGetInsertBlock(builder);
   3374    inst = LLVMGetLastInstruction(bb);
   3375    LLVMSetInstructionCallConv(inst, LLVMFastCallConv);
   3376 
   3377    for (i = 0; i < 4; i++) {
   3378       params->texel[i] = LLVMBuildExtractValue(gallivm->builder, tex_ret, i, "");
   3379    }
   3380 }
   3381 
   3382 
   3383 /**
   3384  * Build texture sampling code.
   3385  * Either via a function call or inline it directly.
   3386  */
   3387 void
   3388 lp_build_sample_soa(const struct lp_static_texture_state *static_texture_state,
   3389                     const struct lp_static_sampler_state *static_sampler_state,
   3390                     struct lp_sampler_dynamic_state *dynamic_state,
   3391                     struct gallivm_state *gallivm,
   3392                     const struct lp_sampler_params *params)
   3393 {
   3394    boolean use_tex_func = FALSE;
   3395 
   3396    /*
   3397     * Do not use a function call if the sampling is "simple enough".
   3398     * We define this by
   3399     * a) format
   3400     * b) no mips (either one level only or no mip filter)
   3401     * No mips will definitely make the code smaller, though
   3402     * the format requirement is a bit iffy - there's some (SoA) formats
   3403     * which definitely generate less code. This does happen to catch
   3404     * some important cases though which are hurt quite a bit by using
   3405     * a call (though not really because of the call overhead but because
   3406     * they are reusing the same texture unit with some of the same
   3407     * parameters).
   3408     * Ideally we'd let llvm recognize this stuff by doing IPO passes.
   3409     */
   3410 
   3411    if (USE_TEX_FUNC_CALL) {
   3412       const struct util_format_description *format_desc;
   3413       boolean simple_format;
   3414       boolean simple_tex;
   3415       enum lp_sampler_op_type op_type;
   3416       format_desc = util_format_description(static_texture_state->format);
   3417       simple_format = !format_desc ||
   3418                          (util_format_is_rgba8_variant(format_desc) &&
   3419                           format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB);
   3420 
   3421       op_type = (params->sample_key & LP_SAMPLER_OP_TYPE_MASK) >>
   3422                     LP_SAMPLER_OP_TYPE_SHIFT;
   3423       simple_tex =
   3424          op_type != LP_SAMPLER_OP_TEXTURE ||
   3425            ((static_sampler_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE ||
   3426              static_texture_state->level_zero_only == TRUE) &&
   3427             static_sampler_state->min_img_filter == static_sampler_state->mag_img_filter);
   3428 
   3429       use_tex_func = format_desc && !(simple_format && simple_tex);
   3430    }
   3431 
   3432    if (use_tex_func) {
   3433       lp_build_sample_soa_func(gallivm,
   3434                                static_texture_state,
   3435                                static_sampler_state,
   3436                                dynamic_state,
   3437                                params);
   3438    }
   3439    else {
   3440       lp_build_sample_soa_code(gallivm,
   3441                                static_texture_state,
   3442                                static_sampler_state,
   3443                                dynamic_state,
   3444                                params->type,
   3445                                params->sample_key,
   3446                                params->texture_index,
   3447                                params->sampler_index,
   3448                                params->context_ptr,
   3449                                params->thread_data_ptr,
   3450                                params->coords,
   3451                                params->offsets,
   3452                                params->derivs,
   3453                                params->lod,
   3454                                params->texel);
   3455    }
   3456 }
   3457 
   3458 
   3459 void
   3460 lp_build_size_query_soa(struct gallivm_state *gallivm,
   3461                         const struct lp_static_texture_state *static_state,
   3462                         struct lp_sampler_dynamic_state *dynamic_state,
   3463                         const struct lp_sampler_size_query_params *params)
   3464 {
   3465    LLVMValueRef lod, level, size;
   3466    LLVMValueRef first_level = NULL;
   3467    int dims, i;
   3468    boolean has_array;
   3469    unsigned num_lods = 1;
   3470    struct lp_build_context bld_int_vec4;
   3471    LLVMValueRef context_ptr = params->context_ptr;
   3472    unsigned texture_unit = params->texture_unit;
   3473    unsigned target = params->target;
   3474 
   3475    if (static_state->format == PIPE_FORMAT_NONE) {
   3476       /*
   3477        * If there's nothing bound, format is NONE, and we must return
   3478        * all zero as mandated by d3d10 in this case.
   3479        */
   3480       unsigned chan;
   3481       LLVMValueRef zero = lp_build_const_vec(gallivm, params->int_type, 0.0F);
   3482       for (chan = 0; chan < 4; chan++) {
   3483          params->sizes_out[chan] = zero;
   3484       }
   3485       return;
   3486    }
   3487 
   3488    /*
   3489     * Do some sanity verification about bound texture and shader dcl target.
   3490     * Not entirely sure what's possible but assume array/non-array
   3491     * always compatible (probably not ok for OpenGL but d3d10 has no
   3492     * distinction of arrays at the resource level).
   3493     * Everything else looks bogus (though not entirely sure about rect/2d).
   3494     * Currently disabled because it causes assertion failures if there's
   3495     * nothing bound (or rather a dummy texture, not that this case would
   3496     * return the right values).
   3497     */
   3498    if (0 && static_state->target != target) {
   3499       if (static_state->target == PIPE_TEXTURE_1D)
   3500          assert(target == PIPE_TEXTURE_1D_ARRAY);
   3501       else if (static_state->target == PIPE_TEXTURE_1D_ARRAY)
   3502          assert(target == PIPE_TEXTURE_1D);
   3503       else if (static_state->target == PIPE_TEXTURE_2D)
   3504          assert(target == PIPE_TEXTURE_2D_ARRAY);
   3505       else if (static_state->target == PIPE_TEXTURE_2D_ARRAY)
   3506          assert(target == PIPE_TEXTURE_2D);
   3507       else if (static_state->target == PIPE_TEXTURE_CUBE)
   3508          assert(target == PIPE_TEXTURE_CUBE_ARRAY);
   3509       else if (static_state->target == PIPE_TEXTURE_CUBE_ARRAY)
   3510          assert(target == PIPE_TEXTURE_CUBE);
   3511       else
   3512          assert(0);
   3513    }
   3514 
   3515    dims = texture_dims(target);
   3516 
   3517    switch (target) {
   3518    case PIPE_TEXTURE_1D_ARRAY:
   3519    case PIPE_TEXTURE_2D_ARRAY:
   3520    case PIPE_TEXTURE_CUBE_ARRAY:
   3521       has_array = TRUE;
   3522       break;
   3523    default:
   3524       has_array = FALSE;
   3525       break;
   3526    }
   3527 
   3528    assert(!params->int_type.floating);
   3529 
   3530    lp_build_context_init(&bld_int_vec4, gallivm, lp_type_int_vec(32, 128));
   3531 
   3532    if (params->explicit_lod) {
   3533       /* FIXME: this needs to honor per-element lod */
   3534       lod = LLVMBuildExtractElement(gallivm->builder, params->explicit_lod,
   3535                                     lp_build_const_int32(gallivm, 0), "");
   3536       first_level = dynamic_state->first_level(dynamic_state, gallivm,
   3537                                                context_ptr, texture_unit);
   3538       level = LLVMBuildAdd(gallivm->builder, lod, first_level, "level");
   3539       lod = lp_build_broadcast_scalar(&bld_int_vec4, level);
   3540    } else {
   3541       lod = bld_int_vec4.zero;
   3542    }
   3543 
   3544    size = bld_int_vec4.undef;
   3545 
   3546    size = LLVMBuildInsertElement(gallivm->builder, size,
   3547                                  dynamic_state->width(dynamic_state, gallivm,
   3548                                                       context_ptr, texture_unit),
   3549                                  lp_build_const_int32(gallivm, 0), "");
   3550 
   3551    if (dims >= 2) {
   3552       size = LLVMBuildInsertElement(gallivm->builder, size,
   3553                                     dynamic_state->height(dynamic_state, gallivm,
   3554                                                           context_ptr, texture_unit),
   3555                                     lp_build_const_int32(gallivm, 1), "");
   3556    }
   3557 
   3558    if (dims >= 3) {
   3559       size = LLVMBuildInsertElement(gallivm->builder, size,
   3560                                     dynamic_state->depth(dynamic_state, gallivm,
   3561                                                          context_ptr, texture_unit),
   3562                                     lp_build_const_int32(gallivm, 2), "");
   3563    }
   3564 
   3565    size = lp_build_minify(&bld_int_vec4, size, lod, TRUE);
   3566 
   3567    if (has_array) {
   3568       LLVMValueRef layers = dynamic_state->depth(dynamic_state, gallivm,
   3569                                                  context_ptr, texture_unit);
   3570       if (target == PIPE_TEXTURE_CUBE_ARRAY) {
   3571          /*
   3572           * It looks like GL wants number of cubes, d3d10.1 has it undefined?
   3573           * Could avoid this by passing in number of cubes instead of total
   3574           * number of layers (might make things easier elsewhere too).
   3575           */
   3576          LLVMValueRef six = lp_build_const_int32(gallivm, 6);
   3577          layers = LLVMBuildSDiv(gallivm->builder, layers, six, "");
   3578       }
   3579       size = LLVMBuildInsertElement(gallivm->builder, size, layers,
   3580                                     lp_build_const_int32(gallivm, dims), "");
   3581    }
   3582 
   3583    /*
   3584     * d3d10 requires zero for x/y/z values (but not w, i.e. mip levels)
   3585     * if level is out of bounds (note this can't cover unbound texture
   3586     * here, which also requires returning zero).
   3587     */
   3588    if (params->explicit_lod && params->is_sviewinfo) {
   3589       LLVMValueRef last_level, out, out1;
   3590       struct lp_build_context leveli_bld;
   3591 
   3592       /* everything is scalar for now */
   3593       lp_build_context_init(&leveli_bld, gallivm, lp_type_int_vec(32, 32));
   3594       last_level = dynamic_state->last_level(dynamic_state, gallivm,
   3595                                              context_ptr, texture_unit);
   3596 
   3597       out = lp_build_cmp(&leveli_bld, PIPE_FUNC_LESS, level, first_level);
   3598       out1 = lp_build_cmp(&leveli_bld, PIPE_FUNC_GREATER, level, last_level);
   3599       out = lp_build_or(&leveli_bld, out, out1);
   3600       if (num_lods == 1) {
   3601          out = lp_build_broadcast_scalar(&bld_int_vec4, out);
   3602       }
   3603       else {
   3604          /* TODO */
   3605          assert(0);
   3606       }
   3607       size = lp_build_andnot(&bld_int_vec4, size, out);
   3608    }
   3609    for (i = 0; i < dims + (has_array ? 1 : 0); i++) {
   3610       params->sizes_out[i] = lp_build_extract_broadcast(gallivm, bld_int_vec4.type, params->int_type,
   3611                                                 size,
   3612                                                 lp_build_const_int32(gallivm, i));
   3613    }
   3614    if (params->is_sviewinfo) {
   3615       for (; i < 4; i++) {
   3616          params->sizes_out[i] = lp_build_const_vec(gallivm, params->int_type, 0.0);
   3617       }
   3618    }
   3619 
   3620    /*
   3621     * if there's no explicit_lod (buffers, rects) queries requiring nr of
   3622     * mips would be illegal.
   3623     */
   3624    if (params->is_sviewinfo && params->explicit_lod) {
   3625       struct lp_build_context bld_int_scalar;
   3626       LLVMValueRef num_levels;
   3627       lp_build_context_init(&bld_int_scalar, gallivm, lp_type_int(32));
   3628 
   3629       if (static_state->level_zero_only) {
   3630          num_levels = bld_int_scalar.one;
   3631       }
   3632       else {
   3633          LLVMValueRef last_level;
   3634 
   3635          last_level = dynamic_state->last_level(dynamic_state, gallivm,
   3636                                                 context_ptr, texture_unit);
   3637          num_levels = lp_build_sub(&bld_int_scalar, last_level, first_level);
   3638          num_levels = lp_build_add(&bld_int_scalar, num_levels, bld_int_scalar.one);
   3639       }
   3640       params->sizes_out[3] = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm, params->int_type),
   3641                                         num_levels);
   3642    }
   3643 }
   3644