Home | History | Annotate | Download | only in gallivm
      1 /**************************************************************************
      2  *
      3  * Copyright 2010 VMware, Inc.
      4  * All Rights Reserved.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the
      8  * "Software"), to deal in the Software without restriction, including
      9  * without limitation the rights to use, copy, modify, merge, publish,
     10  * distribute, sub license, and/or sell copies of the Software, and to
     11  * permit persons to whom the Software is furnished to do so, subject to
     12  * the following conditions:
     13  *
     14  * The above copyright notice and this permission notice (including the
     15  * next paragraph) shall be included in all copies or substantial portions
     16  * of the Software.
     17  *
     18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
     19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
     21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
     22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
     23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
     24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     25  *
     26  **************************************************************************/
     27 
     28 /**
     29  * @file
     30  * Texture sampling -- AoS.
     31  *
     32  * @author Jose Fonseca <jfonseca (at) vmware.com>
     33  * @author Brian Paul <brianp (at) vmware.com>
     34  */
     35 
     36 #include "pipe/p_defines.h"
     37 #include "pipe/p_state.h"
     38 #include "util/u_debug.h"
     39 #include "util/u_dump.h"
     40 #include "util/u_memory.h"
     41 #include "util/u_math.h"
     42 #include "util/u_format.h"
     43 #include "util/u_cpu_detect.h"
     44 #include "lp_bld_debug.h"
     45 #include "lp_bld_type.h"
     46 #include "lp_bld_const.h"
     47 #include "lp_bld_conv.h"
     48 #include "lp_bld_arit.h"
     49 #include "lp_bld_bitarit.h"
     50 #include "lp_bld_logic.h"
     51 #include "lp_bld_swizzle.h"
     52 #include "lp_bld_pack.h"
     53 #include "lp_bld_flow.h"
     54 #include "lp_bld_gather.h"
     55 #include "lp_bld_format.h"
     56 #include "lp_bld_init.h"
     57 #include "lp_bld_sample.h"
     58 #include "lp_bld_sample_aos.h"
     59 #include "lp_bld_quad.h"
     60 
     61 
     62 /**
     63  * Build LLVM code for texture coord wrapping, for nearest filtering,
     64  * for scaled integer texcoords.
     65  * \param block_length  is the length of the pixel block along the
     66  *                      coordinate axis
     67  * \param coord  the incoming texcoord (s,t or r) scaled to the texture size
     68  * \param coord_f  the incoming texcoord (s,t or r) as float vec
     69  * \param length  the texture size along one dimension
     70  * \param stride  pixel stride along the coordinate axis (in bytes)
     71  * \param offset  the texel offset along the coord axis
     72  * \param is_pot  if TRUE, length is a power of two
     73  * \param wrap_mode  one of PIPE_TEX_WRAP_x
     74  * \param out_offset  byte offset for the wrapped coordinate
     75  * \param out_i  resulting sub-block pixel coordinate for coord0
     76  */
     77 static void
     78 lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld,
     79                                  unsigned block_length,
     80                                  LLVMValueRef coord,
     81                                  LLVMValueRef coord_f,
     82                                  LLVMValueRef length,
     83                                  LLVMValueRef stride,
     84                                  LLVMValueRef offset,
     85                                  boolean is_pot,
     86                                  unsigned wrap_mode,
     87                                  LLVMValueRef *out_offset,
     88                                  LLVMValueRef *out_i)
     89 {
     90    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
     91    LLVMBuilderRef builder = bld->gallivm->builder;
     92    LLVMValueRef length_minus_one;
     93 
     94    length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
     95 
     96    switch(wrap_mode) {
     97    case PIPE_TEX_WRAP_REPEAT:
     98       if(is_pot)
     99          coord = LLVMBuildAnd(builder, coord, length_minus_one, "");
    100       else {
    101          struct lp_build_context *coord_bld = &bld->coord_bld;
    102          LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length);
    103          if (offset) {
    104             offset = lp_build_int_to_float(coord_bld, offset);
    105             offset = lp_build_div(coord_bld, offset, length_f);
    106             coord_f = lp_build_add(coord_bld, coord_f, offset);
    107          }
    108          coord = lp_build_fract_safe(coord_bld, coord_f);
    109          coord = lp_build_mul(coord_bld, coord, length_f);
    110          coord = lp_build_itrunc(coord_bld, coord);
    111       }
    112       break;
    113 
    114    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
    115       coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero);
    116       coord = lp_build_min(int_coord_bld, coord, length_minus_one);
    117       break;
    118 
    119    case PIPE_TEX_WRAP_CLAMP:
    120    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
    121    case PIPE_TEX_WRAP_MIRROR_REPEAT:
    122    case PIPE_TEX_WRAP_MIRROR_CLAMP:
    123    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
    124    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
    125    default:
    126       assert(0);
    127    }
    128 
    129    lp_build_sample_partial_offset(int_coord_bld, block_length, coord, stride,
    130                                   out_offset, out_i);
    131 }
    132 
    133 
    134 /**
    135  * Build LLVM code for texture coord wrapping, for nearest filtering,
    136  * for float texcoords.
    137  * \param coord  the incoming texcoord (s,t or r)
    138  * \param length  the texture size along one dimension
    139  * \param offset  the texel offset along the coord axis
    140  * \param is_pot  if TRUE, length is a power of two
    141  * \param wrap_mode  one of PIPE_TEX_WRAP_x
    142  * \param icoord  the texcoord after wrapping, as int
    143  */
    144 static void
    145 lp_build_sample_wrap_nearest_float(struct lp_build_sample_context *bld,
    146                                    LLVMValueRef coord,
    147                                    LLVMValueRef length,
    148                                    LLVMValueRef offset,
    149                                    boolean is_pot,
    150                                    unsigned wrap_mode,
    151                                    LLVMValueRef *icoord)
    152 {
    153    struct lp_build_context *coord_bld = &bld->coord_bld;
    154    LLVMValueRef length_minus_one;
    155 
    156    switch(wrap_mode) {
    157    case PIPE_TEX_WRAP_REPEAT:
    158       if (offset) {
    159          /* this is definitely not ideal for POT case */
    160          offset = lp_build_int_to_float(coord_bld, offset);
    161          offset = lp_build_div(coord_bld, offset, length);
    162          coord = lp_build_add(coord_bld, coord, offset);
    163       }
    164       /* take fraction, unnormalize */
    165       coord = lp_build_fract_safe(coord_bld, coord);
    166       coord = lp_build_mul(coord_bld, coord, length);
    167       *icoord = lp_build_itrunc(coord_bld, coord);
    168       break;
    169    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
    170       length_minus_one = lp_build_sub(coord_bld, length, coord_bld->one);
    171       if (bld->static_sampler_state->normalized_coords) {
    172          /* scale coord to length */
    173          coord = lp_build_mul(coord_bld, coord, length);
    174       }
    175       if (offset) {
    176          offset = lp_build_int_to_float(coord_bld, offset);
    177          coord = lp_build_add(coord_bld, coord, offset);
    178       }
    179       coord = lp_build_clamp(coord_bld, coord, coord_bld->zero,
    180                              length_minus_one);
    181       *icoord = lp_build_itrunc(coord_bld, coord);
    182       break;
    183 
    184    case PIPE_TEX_WRAP_CLAMP:
    185    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
    186    case PIPE_TEX_WRAP_MIRROR_REPEAT:
    187    case PIPE_TEX_WRAP_MIRROR_CLAMP:
    188    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
    189    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
    190    default:
    191       assert(0);
    192    }
    193 }
    194 
    195 
    196 /**
    197  * Helper to compute the first coord and the weight for
    198  * linear wrap repeat npot textures
    199  */
    200 static void
    201 lp_build_coord_repeat_npot_linear_int(struct lp_build_sample_context *bld,
    202                                       LLVMValueRef coord_f,
    203                                       LLVMValueRef length_i,
    204                                       LLVMValueRef length_f,
    205                                       LLVMValueRef *coord0_i,
    206                                       LLVMValueRef *weight_i)
    207 {
    208    struct lp_build_context *coord_bld = &bld->coord_bld;
    209    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
    210    struct lp_build_context abs_coord_bld;
    211    struct lp_type abs_type;
    212    LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length_i,
    213                                                 int_coord_bld->one);
    214    LLVMValueRef mask, i32_c8, i32_c128, i32_c255;
    215 
    216    /* wrap with normalized floats is just fract */
    217    coord_f = lp_build_fract(coord_bld, coord_f);
    218    /* mul by size */
    219    coord_f = lp_build_mul(coord_bld, coord_f, length_f);
    220    /* convert to int, compute lerp weight */
    221    coord_f = lp_build_mul_imm(&bld->coord_bld, coord_f, 256);
    222 
    223    /* At this point we don't have any negative numbers so use non-signed
    224     * build context which might help on some archs.
    225     */
    226    abs_type = coord_bld->type;
    227    abs_type.sign = 0;
    228    lp_build_context_init(&abs_coord_bld, bld->gallivm, abs_type);
    229    *coord0_i = lp_build_iround(&abs_coord_bld, coord_f);
    230 
    231    /* subtract 0.5 (add -128) */
    232    i32_c128 = lp_build_const_int_vec(bld->gallivm, bld->int_coord_type, -128);
    233    *coord0_i = LLVMBuildAdd(bld->gallivm->builder, *coord0_i, i32_c128, "");
    234 
    235    /* compute fractional part (AND with 0xff) */
    236    i32_c255 = lp_build_const_int_vec(bld->gallivm, bld->int_coord_type, 255);
    237    *weight_i = LLVMBuildAnd(bld->gallivm->builder, *coord0_i, i32_c255, "");
    238 
    239    /* compute floor (shift right 8) */
    240    i32_c8 = lp_build_const_int_vec(bld->gallivm, bld->int_coord_type, 8);
    241    *coord0_i = LLVMBuildAShr(bld->gallivm->builder, *coord0_i, i32_c8, "");
    242    /*
    243     * we avoided the 0.5/length division before the repeat wrap,
    244     * now need to fix up edge cases with selects
    245     */
    246    mask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
    247                            PIPE_FUNC_LESS, *coord0_i, int_coord_bld->zero);
    248    *coord0_i = lp_build_select(int_coord_bld, mask, length_minus_one, *coord0_i);
    249    /*
    250     * We should never get values too large - except if coord was nan or inf,
    251     * in which case things go terribly wrong...
    252     * Alternatively, could use fract_safe above...
    253     */
    254    *coord0_i = lp_build_min(int_coord_bld, *coord0_i, length_minus_one);
    255 }
    256 
    257 
    258 /**
    259  * Build LLVM code for texture coord wrapping, for linear filtering,
    260  * for scaled integer texcoords.
    261  * \param block_length  is the length of the pixel block along the
    262  *                      coordinate axis
    263  * \param coord0  the incoming texcoord (s,t or r) scaled to the texture size
    264  * \param coord_f  the incoming texcoord (s,t or r) as float vec
    265  * \param length  the texture size along one dimension
    266  * \param stride  pixel stride along the coordinate axis (in bytes)
    267  * \param offset  the texel offset along the coord axis
    268  * \param is_pot  if TRUE, length is a power of two
    269  * \param wrap_mode  one of PIPE_TEX_WRAP_x
    270  * \param offset0  resulting relative offset for coord0
    271  * \param offset1  resulting relative offset for coord0 + 1
    272  * \param i0  resulting sub-block pixel coordinate for coord0
    273  * \param i1  resulting sub-block pixel coordinate for coord0 + 1
    274  */
    275 static void
    276 lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,
    277                                 unsigned block_length,
    278                                 LLVMValueRef coord0,
    279                                 LLVMValueRef *weight_i,
    280                                 LLVMValueRef coord_f,
    281                                 LLVMValueRef length,
    282                                 LLVMValueRef stride,
    283                                 LLVMValueRef offset,
    284                                 boolean is_pot,
    285                                 unsigned wrap_mode,
    286                                 LLVMValueRef *offset0,
    287                                 LLVMValueRef *offset1,
    288                                 LLVMValueRef *i0,
    289                                 LLVMValueRef *i1)
    290 {
    291    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
    292    LLVMBuilderRef builder = bld->gallivm->builder;
    293    LLVMValueRef length_minus_one;
    294    LLVMValueRef lmask, umask, mask;
    295 
    296    /*
    297     * If the pixel block covers more than one pixel then there is no easy
    298     * way to calculate offset1 relative to offset0. Instead, compute them
    299     * independently. Otherwise, try to compute offset0 and offset1 with
    300     * a single stride multiplication.
    301     */
    302 
    303    length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
    304 
    305    if (block_length != 1) {
    306       LLVMValueRef coord1;
    307       switch(wrap_mode) {
    308       case PIPE_TEX_WRAP_REPEAT:
    309          if (is_pot) {
    310             coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
    311             coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, "");
    312             coord1 = LLVMBuildAnd(builder, coord1, length_minus_one, "");
    313          }
    314          else {
    315             LLVMValueRef mask;
    316             LLVMValueRef length_f = lp_build_int_to_float(&bld->coord_bld, length);
    317             if (offset) {
    318                offset = lp_build_int_to_float(&bld->coord_bld, offset);
    319                offset = lp_build_div(&bld->coord_bld, offset, length_f);
    320                coord_f = lp_build_add(&bld->coord_bld, coord_f, offset);
    321             }
    322             lp_build_coord_repeat_npot_linear_int(bld, coord_f,
    323                                                   length, length_f,
    324                                                   &coord0, weight_i);
    325             mask = lp_build_compare(bld->gallivm, int_coord_bld->type,
    326                                     PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
    327             coord1 = LLVMBuildAnd(builder,
    328                                   lp_build_add(int_coord_bld, coord0,
    329                                                int_coord_bld->one),
    330                                   mask, "");
    331          }
    332          break;
    333 
    334       case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
    335          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
    336          coord0 = lp_build_clamp(int_coord_bld, coord0, int_coord_bld->zero,
    337                                 length_minus_one);
    338          coord1 = lp_build_clamp(int_coord_bld, coord1, int_coord_bld->zero,
    339                                 length_minus_one);
    340          break;
    341 
    342       case PIPE_TEX_WRAP_CLAMP:
    343       case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
    344       case PIPE_TEX_WRAP_MIRROR_REPEAT:
    345       case PIPE_TEX_WRAP_MIRROR_CLAMP:
    346       case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
    347       case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
    348       default:
    349          assert(0);
    350          coord0 = int_coord_bld->zero;
    351          coord1 = int_coord_bld->zero;
    352          break;
    353       }
    354       lp_build_sample_partial_offset(int_coord_bld, block_length, coord0, stride,
    355                                      offset0, i0);
    356       lp_build_sample_partial_offset(int_coord_bld, block_length, coord1, stride,
    357                                      offset1, i1);
    358       return;
    359    }
    360 
    361    *i0 = int_coord_bld->zero;
    362    *i1 = int_coord_bld->zero;
    363 
    364    switch(wrap_mode) {
    365    case PIPE_TEX_WRAP_REPEAT:
    366       if (is_pot) {
    367          coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, "");
    368       }
    369       else {
    370          LLVMValueRef length_f = lp_build_int_to_float(&bld->coord_bld, length);
    371          if (offset) {
    372             offset = lp_build_int_to_float(&bld->coord_bld, offset);
    373             offset = lp_build_div(&bld->coord_bld, offset, length_f);
    374             coord_f = lp_build_add(&bld->coord_bld, coord_f, offset);
    375          }
    376          lp_build_coord_repeat_npot_linear_int(bld, coord_f,
    377                                                length, length_f,
    378                                                &coord0, weight_i);
    379       }
    380 
    381       mask = lp_build_compare(bld->gallivm, int_coord_bld->type,
    382                               PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
    383 
    384       *offset0 = lp_build_mul(int_coord_bld, coord0, stride);
    385       *offset1 = LLVMBuildAnd(builder,
    386                               lp_build_add(int_coord_bld, *offset0, stride),
    387                               mask, "");
    388       break;
    389 
    390    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
    391       /* XXX this might be slower than the separate path
    392        * on some newer cpus. With sse41 this is 8 instructions vs. 7
    393        * - at least on SNB this is almost certainly slower since
    394        * min/max are cheaper than selects, and the muls aren't bad.
    395        */
    396       lmask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
    397                                PIPE_FUNC_GEQUAL, coord0, int_coord_bld->zero);
    398       umask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
    399                                PIPE_FUNC_LESS, coord0, length_minus_one);
    400 
    401       coord0 = lp_build_select(int_coord_bld, lmask, coord0, int_coord_bld->zero);
    402       coord0 = lp_build_select(int_coord_bld, umask, coord0, length_minus_one);
    403 
    404       mask = LLVMBuildAnd(builder, lmask, umask, "");
    405 
    406       *offset0 = lp_build_mul(int_coord_bld, coord0, stride);
    407       *offset1 = lp_build_add(int_coord_bld,
    408                               *offset0,
    409                               LLVMBuildAnd(builder, stride, mask, ""));
    410       break;
    411 
    412    case PIPE_TEX_WRAP_CLAMP:
    413    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
    414    case PIPE_TEX_WRAP_MIRROR_REPEAT:
    415    case PIPE_TEX_WRAP_MIRROR_CLAMP:
    416    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
    417    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
    418    default:
    419       assert(0);
    420       *offset0 = int_coord_bld->zero;
    421       *offset1 = int_coord_bld->zero;
    422       break;
    423    }
    424 }
    425 
    426 
    427 /**
    428  * Build LLVM code for texture coord wrapping, for linear filtering,
    429  * for float texcoords.
    430  * \param block_length  is the length of the pixel block along the
    431  *                      coordinate axis
    432  * \param coord  the incoming texcoord (s,t or r)
    433  * \param length  the texture size along one dimension
    434  * \param offset  the texel offset along the coord axis
    435  * \param is_pot  if TRUE, length is a power of two
    436  * \param wrap_mode  one of PIPE_TEX_WRAP_x
    437  * \param coord0  the first texcoord after wrapping, as int
    438  * \param coord1  the second texcoord after wrapping, as int
    439  * \param weight  the filter weight as int (0-255)
    440  * \param force_nearest  if this coord actually uses nearest filtering
    441  */
    442 static void
    443 lp_build_sample_wrap_linear_float(struct lp_build_sample_context *bld,
    444                                   unsigned block_length,
    445                                   LLVMValueRef coord,
    446                                   LLVMValueRef length,
    447                                   LLVMValueRef offset,
    448                                   boolean is_pot,
    449                                   unsigned wrap_mode,
    450                                   LLVMValueRef *coord0,
    451                                   LLVMValueRef *coord1,
    452                                   LLVMValueRef *weight,
    453                                   unsigned force_nearest)
    454 {
    455    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
    456    struct lp_build_context *coord_bld = &bld->coord_bld;
    457    LLVMBuilderRef builder = bld->gallivm->builder;
    458    LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5);
    459    LLVMValueRef length_minus_one = lp_build_sub(coord_bld, length, coord_bld->one);
    460 
    461    switch(wrap_mode) {
    462    case PIPE_TEX_WRAP_REPEAT:
    463       if (is_pot) {
    464          /* mul by size and subtract 0.5 */
    465          coord = lp_build_mul(coord_bld, coord, length);
    466          if (offset) {
    467             offset = lp_build_int_to_float(coord_bld, offset);
    468             coord = lp_build_add(coord_bld, coord, offset);
    469          }
    470          if (!force_nearest)
    471             coord = lp_build_sub(coord_bld, coord, half);
    472          *coord1 = lp_build_add(coord_bld, coord, coord_bld->one);
    473          /* convert to int, compute lerp weight */
    474          lp_build_ifloor_fract(coord_bld, coord, coord0, weight);
    475          *coord1 = lp_build_ifloor(coord_bld, *coord1);
    476          /* repeat wrap */
    477          length_minus_one = lp_build_itrunc(coord_bld, length_minus_one);
    478          *coord0 = LLVMBuildAnd(builder, *coord0, length_minus_one, "");
    479          *coord1 = LLVMBuildAnd(builder, *coord1, length_minus_one, "");
    480       }
    481       else {
    482          LLVMValueRef mask;
    483          if (offset) {
    484             offset = lp_build_int_to_float(coord_bld, offset);
    485             offset = lp_build_div(coord_bld, offset, length);
    486             coord = lp_build_add(coord_bld, coord, offset);
    487          }
    488          /* wrap with normalized floats is just fract */
    489          coord = lp_build_fract(coord_bld, coord);
    490          /* unnormalize */
    491          coord = lp_build_mul(coord_bld, coord, length);
    492          /*
    493           * we avoided the 0.5/length division, have to fix up wrong
    494           * edge cases with selects
    495           */
    496          *coord1 = lp_build_add(coord_bld, coord, half);
    497          coord = lp_build_sub(coord_bld, coord, half);
    498          *weight = lp_build_fract(coord_bld, coord);
    499          /*
    500           * It is important for this comparison to be unordered
    501           * (or need fract_safe above).
    502           */
    503          mask = lp_build_compare(coord_bld->gallivm, coord_bld->type,
    504                                  PIPE_FUNC_LESS, coord, coord_bld->zero);
    505          *coord0 = lp_build_select(coord_bld, mask, length_minus_one, coord);
    506          *coord0 = lp_build_itrunc(coord_bld, *coord0);
    507          mask = lp_build_compare(coord_bld->gallivm, coord_bld->type,
    508                                  PIPE_FUNC_LESS, *coord1, length);
    509          *coord1 = lp_build_select(coord_bld, mask, *coord1, coord_bld->zero);
    510          *coord1 = lp_build_itrunc(coord_bld, *coord1);
    511       }
    512       break;
    513    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
    514       if (bld->static_sampler_state->normalized_coords) {
    515          /* mul by tex size */
    516          coord = lp_build_mul(coord_bld, coord, length);
    517       }
    518       if (offset) {
    519          offset = lp_build_int_to_float(coord_bld, offset);
    520          coord = lp_build_add(coord_bld, coord, offset);
    521       }
    522       /* subtract 0.5 */
    523       if (!force_nearest) {
    524          coord = lp_build_sub(coord_bld, coord, half);
    525       }
    526       /* clamp to [0, length - 1] */
    527       coord = lp_build_min_ext(coord_bld, coord, length_minus_one,
    528                                GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
    529       coord = lp_build_max(coord_bld, coord, coord_bld->zero);
    530       *coord1 = lp_build_add(coord_bld, coord, coord_bld->one);
    531       /* convert to int, compute lerp weight */
    532       lp_build_ifloor_fract(coord_bld, coord, coord0, weight);
    533       /* coord1 = min(coord1, length-1) */
    534       *coord1 = lp_build_min(coord_bld, *coord1, length_minus_one);
    535       *coord1 = lp_build_itrunc(coord_bld, *coord1);
    536       break;
    537    default:
    538       assert(0);
    539       *coord0 = int_coord_bld->zero;
    540       *coord1 = int_coord_bld->zero;
    541       *weight = coord_bld->zero;
    542       break;
    543    }
    544    *weight = lp_build_mul_imm(coord_bld, *weight, 256);
    545    *weight = lp_build_itrunc(coord_bld, *weight);
    546    return;
    547 }
    548 
    549 
    550 /**
    551  * Fetch texels for image with nearest sampling.
    552  * Return filtered color as two vectors of 16-bit fixed point values.
    553  */
    554 static void
    555 lp_build_sample_fetch_image_nearest(struct lp_build_sample_context *bld,
    556                                     LLVMValueRef data_ptr,
    557                                     LLVMValueRef offset,
    558                                     LLVMValueRef x_subcoord,
    559                                     LLVMValueRef y_subcoord,
    560                                     LLVMValueRef *colors)
    561 {
    562    /*
    563     * Fetch the pixels as 4 x 32bit (rgba order might differ):
    564     *
    565     *   rgba0 rgba1 rgba2 rgba3
    566     *
    567     * bit cast them into 16 x u8
    568     *
    569     *   r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
    570     *
    571     * unpack them into two 8 x i16:
    572     *
    573     *   r0 g0 b0 a0 r1 g1 b1 a1
    574     *   r2 g2 b2 a2 r3 g3 b3 a3
    575     *
    576     * The higher 8 bits of the resulting elements will be zero.
    577     */
    578    LLVMBuilderRef builder = bld->gallivm->builder;
    579    LLVMValueRef rgba8;
    580    struct lp_build_context u8n;
    581    LLVMTypeRef u8n_vec_type;
    582    struct lp_type fetch_type;
    583 
    584    lp_build_context_init(&u8n, bld->gallivm, lp_type_unorm(8, bld->vector_width));
    585    u8n_vec_type = lp_build_vec_type(bld->gallivm, u8n.type);
    586 
    587    fetch_type = lp_type_uint(bld->texel_type.width);
    588    if (util_format_is_rgba8_variant(bld->format_desc)) {
    589       /*
    590        * Given the format is a rgba8, just read the pixels as is,
    591        * without any swizzling. Swizzling will be done later.
    592        */
    593       rgba8 = lp_build_gather(bld->gallivm,
    594                               bld->texel_type.length,
    595                               bld->format_desc->block.bits,
    596                               fetch_type,
    597                               TRUE,
    598                               data_ptr, offset, TRUE);
    599 
    600       rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
    601    }
    602    else {
    603       rgba8 = lp_build_fetch_rgba_aos(bld->gallivm,
    604                                       bld->format_desc,
    605                                       u8n.type,
    606                                       TRUE,
    607                                       data_ptr, offset,
    608                                       x_subcoord,
    609                                       y_subcoord,
    610                                       bld->cache);
    611    }
    612 
    613    *colors = rgba8;
    614 }
    615 
    616 
    617 /**
    618  * Sample a single texture image with nearest sampling.
    619  * If sampling a cube texture, r = cube face in [0,5].
    620  * Return filtered color as two vectors of 16-bit fixed point values.
    621  */
    622 static void
    623 lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
    624                               LLVMValueRef int_size,
    625                               LLVMValueRef row_stride_vec,
    626                               LLVMValueRef img_stride_vec,
    627                               LLVMValueRef data_ptr,
    628                               LLVMValueRef mipoffsets,
    629                               LLVMValueRef s,
    630                               LLVMValueRef t,
    631                               LLVMValueRef r,
    632                               const LLVMValueRef *offsets,
    633                               LLVMValueRef *colors)
    634 {
    635    const unsigned dims = bld->dims;
    636    struct lp_build_context i32;
    637    LLVMValueRef width_vec, height_vec, depth_vec;
    638    LLVMValueRef s_ipart, t_ipart = NULL, r_ipart = NULL;
    639    LLVMValueRef s_float, t_float = NULL, r_float = NULL;
    640    LLVMValueRef x_stride;
    641    LLVMValueRef x_offset, offset;
    642    LLVMValueRef x_subcoord, y_subcoord, z_subcoord;
    643 
    644    lp_build_context_init(&i32, bld->gallivm, lp_type_int_vec(32, bld->vector_width));
    645 
    646    lp_build_extract_image_sizes(bld,
    647                                 &bld->int_size_bld,
    648                                 bld->int_coord_type,
    649                                 int_size,
    650                                 &width_vec,
    651                                 &height_vec,
    652                                 &depth_vec);
    653 
    654    s_float = s; t_float = t; r_float = r;
    655 
    656    if (bld->static_sampler_state->normalized_coords) {
    657       LLVMValueRef flt_size;
    658 
    659       flt_size = lp_build_int_to_float(&bld->float_size_bld, int_size);
    660 
    661       lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r);
    662    }
    663 
    664    /* convert float to int */
    665    /* For correct rounding, need floor, not truncation here.
    666     * Note that in some cases (clamp to edge, no texel offsets) we
    667     * could use a non-signed build context which would help archs
    668     * greatly which don't have arch rounding.
    669     */
    670    s_ipart = lp_build_ifloor(&bld->coord_bld, s);
    671    if (dims >= 2)
    672       t_ipart = lp_build_ifloor(&bld->coord_bld, t);
    673    if (dims >= 3)
    674       r_ipart = lp_build_ifloor(&bld->coord_bld, r);
    675 
    676    /* add texel offsets */
    677    if (offsets[0]) {
    678       s_ipart = lp_build_add(&i32, s_ipart, offsets[0]);
    679       if (dims >= 2) {
    680          t_ipart = lp_build_add(&i32, t_ipart, offsets[1]);
    681          if (dims >= 3) {
    682             r_ipart = lp_build_add(&i32, r_ipart, offsets[2]);
    683          }
    684       }
    685    }
    686 
    687    /* get pixel, row, image strides */
    688    x_stride = lp_build_const_vec(bld->gallivm,
    689                                  bld->int_coord_bld.type,
    690                                  bld->format_desc->block.bits/8);
    691 
    692    /* Do texcoord wrapping, compute texel offset */
    693    lp_build_sample_wrap_nearest_int(bld,
    694                                     bld->format_desc->block.width,
    695                                     s_ipart, s_float,
    696                                     width_vec, x_stride, offsets[0],
    697                                     bld->static_texture_state->pot_width,
    698                                     bld->static_sampler_state->wrap_s,
    699                                     &x_offset, &x_subcoord);
    700    offset = x_offset;
    701    if (dims >= 2) {
    702       LLVMValueRef y_offset;
    703       lp_build_sample_wrap_nearest_int(bld,
    704                                        bld->format_desc->block.height,
    705                                        t_ipart, t_float,
    706                                        height_vec, row_stride_vec, offsets[1],
    707                                        bld->static_texture_state->pot_height,
    708                                        bld->static_sampler_state->wrap_t,
    709                                        &y_offset, &y_subcoord);
    710       offset = lp_build_add(&bld->int_coord_bld, offset, y_offset);
    711       if (dims >= 3) {
    712          LLVMValueRef z_offset;
    713          lp_build_sample_wrap_nearest_int(bld,
    714                                           1, /* block length (depth) */
    715                                           r_ipart, r_float,
    716                                           depth_vec, img_stride_vec, offsets[2],
    717                                           bld->static_texture_state->pot_depth,
    718                                           bld->static_sampler_state->wrap_r,
    719                                           &z_offset, &z_subcoord);
    720          offset = lp_build_add(&bld->int_coord_bld, offset, z_offset);
    721       }
    722    }
    723    if (has_layer_coord(bld->static_texture_state->target)) {
    724       LLVMValueRef z_offset;
    725       /* The r coord is the cube face in [0,5] or array layer */
    726       z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec);
    727       offset = lp_build_add(&bld->int_coord_bld, offset, z_offset);
    728    }
    729    if (mipoffsets) {
    730       offset = lp_build_add(&bld->int_coord_bld, offset, mipoffsets);
    731    }
    732 
    733    lp_build_sample_fetch_image_nearest(bld, data_ptr, offset,
    734                                        x_subcoord, y_subcoord,
    735                                        colors);
    736 }
    737 
    738 
    739 /**
    740  * Sample a single texture image with nearest sampling.
    741  * If sampling a cube texture, r = cube face in [0,5].
    742  * Return filtered color as two vectors of 16-bit fixed point values.
    743  * Does address calcs (except offsets) with floats.
    744  * Useful for AVX which has support for 8x32 floats but not 8x32 ints.
    745  */
    746 static void
    747 lp_build_sample_image_nearest_afloat(struct lp_build_sample_context *bld,
    748                                      LLVMValueRef int_size,
    749                                      LLVMValueRef row_stride_vec,
    750                                      LLVMValueRef img_stride_vec,
    751                                      LLVMValueRef data_ptr,
    752                                      LLVMValueRef mipoffsets,
    753                                      LLVMValueRef s,
    754                                      LLVMValueRef t,
    755                                      LLVMValueRef r,
    756                                      const LLVMValueRef *offsets,
    757                                      LLVMValueRef *colors)
    758    {
    759    const unsigned dims = bld->dims;
    760    LLVMValueRef width_vec, height_vec, depth_vec;
    761    LLVMValueRef offset;
    762    LLVMValueRef x_subcoord, y_subcoord;
    763    LLVMValueRef x_icoord = NULL, y_icoord = NULL, z_icoord = NULL;
    764    LLVMValueRef flt_size;
    765 
    766    flt_size = lp_build_int_to_float(&bld->float_size_bld, int_size);
    767 
    768    lp_build_extract_image_sizes(bld,
    769                                 &bld->float_size_bld,
    770                                 bld->coord_type,
    771                                 flt_size,
    772                                 &width_vec,
    773                                 &height_vec,
    774                                 &depth_vec);
    775 
    776    /* Do texcoord wrapping */
    777    lp_build_sample_wrap_nearest_float(bld,
    778                                       s, width_vec, offsets[0],
    779                                       bld->static_texture_state->pot_width,
    780                                       bld->static_sampler_state->wrap_s,
    781                                       &x_icoord);
    782 
    783    if (dims >= 2) {
    784       lp_build_sample_wrap_nearest_float(bld,
    785                                          t, height_vec, offsets[1],
    786                                          bld->static_texture_state->pot_height,
    787                                          bld->static_sampler_state->wrap_t,
    788                                          &y_icoord);
    789 
    790       if (dims >= 3) {
    791          lp_build_sample_wrap_nearest_float(bld,
    792                                             r, depth_vec, offsets[2],
    793                                             bld->static_texture_state->pot_depth,
    794                                             bld->static_sampler_state->wrap_r,
    795                                             &z_icoord);
    796       }
    797    }
    798    if (has_layer_coord(bld->static_texture_state->target)) {
    799       z_icoord = r;
    800    }
    801 
    802    /*
    803     * From here on we deal with ints, and we should split up the 256bit
    804     * vectors manually for better generated code.
    805     */
    806 
    807    /*
    808     * compute texel offsets -
    809     * cannot do offset calc with floats, difficult for block-based formats,
    810     * and not enough precision anyway.
    811     */
    812    lp_build_sample_offset(&bld->int_coord_bld,
    813                           bld->format_desc,
    814                           x_icoord, y_icoord,
    815                           z_icoord,
    816                           row_stride_vec, img_stride_vec,
    817                           &offset,
    818                           &x_subcoord, &y_subcoord);
    819    if (mipoffsets) {
    820       offset = lp_build_add(&bld->int_coord_bld, offset, mipoffsets);
    821    }
    822 
    823    lp_build_sample_fetch_image_nearest(bld, data_ptr, offset,
    824                                        x_subcoord, y_subcoord,
    825                                        colors);
    826 }
    827 
    828 
    829 /**
    830  * Fetch texels for image with linear sampling.
    831  * Return filtered color as two vectors of 16-bit fixed point values.
    832  */
    833 static void
    834 lp_build_sample_fetch_image_linear(struct lp_build_sample_context *bld,
    835                                    LLVMValueRef data_ptr,
    836                                    LLVMValueRef offset[2][2][2],
    837                                    LLVMValueRef x_subcoord[2],
    838                                    LLVMValueRef y_subcoord[2],
    839                                    LLVMValueRef s_fpart,
    840                                    LLVMValueRef t_fpart,
    841                                    LLVMValueRef r_fpart,
    842                                    LLVMValueRef *colors)
    843 {
    844    const unsigned dims = bld->dims;
    845    LLVMBuilderRef builder = bld->gallivm->builder;
    846    struct lp_build_context u8n;
    847    LLVMTypeRef u8n_vec_type;
    848    LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context);
    849    LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
    850    LLVMValueRef shuffle;
    851    LLVMValueRef neighbors[2][2][2]; /* [z][y][x] */
    852    LLVMValueRef packed;
    853    unsigned i, j, k;
    854    unsigned numj, numk;
    855 
    856    lp_build_context_init(&u8n, bld->gallivm, lp_type_unorm(8, bld->vector_width));
    857    u8n_vec_type = lp_build_vec_type(bld->gallivm, u8n.type);
    858 
    859    /*
    860     * Transform 4 x i32 in
    861     *
    862     *   s_fpart = {s0, s1, s2, s3}
    863     *
    864     * where each value is between 0 and 0xff,
    865     *
    866     * into one 16 x i20
    867     *
    868     *   s_fpart = {s0, s0, s0, s0, s1, s1, s1, s1, s2, s2, s2, s2, s3, s3, s3, s3}
    869     *
    870     * and likewise for t_fpart. There is no risk of loosing precision here
    871     * since the fractional parts only use the lower 8bits.
    872     */
    873    s_fpart = LLVMBuildBitCast(builder, s_fpart, u8n_vec_type, "");
    874    if (dims >= 2)
    875       t_fpart = LLVMBuildBitCast(builder, t_fpart, u8n_vec_type, "");
    876    if (dims >= 3)
    877       r_fpart = LLVMBuildBitCast(builder, r_fpart, u8n_vec_type, "");
    878 
    879    for (j = 0; j < u8n.type.length; j += 4) {
    880 #ifdef PIPE_ARCH_LITTLE_ENDIAN
    881       unsigned subindex = 0;
    882 #else
    883       unsigned subindex = 3;
    884 #endif
    885       LLVMValueRef index;
    886 
    887       index = LLVMConstInt(elem_type, j + subindex, 0);
    888       for (i = 0; i < 4; ++i)
    889          shuffles[j + i] = index;
    890    }
    891 
    892    shuffle = LLVMConstVector(shuffles, u8n.type.length);
    893 
    894    s_fpart = LLVMBuildShuffleVector(builder, s_fpart, u8n.undef,
    895                                     shuffle, "");
    896    if (dims >= 2) {
    897       t_fpart = LLVMBuildShuffleVector(builder, t_fpart, u8n.undef,
    898                                        shuffle, "");
    899    }
    900    if (dims >= 3) {
    901       r_fpart = LLVMBuildShuffleVector(builder, r_fpart, u8n.undef,
    902                                        shuffle, "");
    903    }
    904 
    905    /*
    906     * Fetch the pixels as 4 x 32bit (rgba order might differ):
    907     *
    908     *   rgba0 rgba1 rgba2 rgba3
    909     *
    910     * bit cast them into 16 x u8
    911     *
    912     *   r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
    913     *
    914     * unpack them into two 8 x i16:
    915     *
    916     *   r0 g0 b0 a0 r1 g1 b1 a1
    917     *   r2 g2 b2 a2 r3 g3 b3 a3
    918     *
    919     * The higher 8 bits of the resulting elements will be zero.
    920     */
    921    numj = 1 + (dims >= 2);
    922    numk = 1 + (dims >= 3);
    923 
    924    for (k = 0; k < numk; k++) {
    925       for (j = 0; j < numj; j++) {
    926          for (i = 0; i < 2; i++) {
    927             LLVMValueRef rgba8;
    928 
    929             if (util_format_is_rgba8_variant(bld->format_desc)) {
    930                struct lp_type fetch_type;
    931                /*
    932                 * Given the format is a rgba8, just read the pixels as is,
    933                 * without any swizzling. Swizzling will be done later.
    934                 */
    935                fetch_type = lp_type_uint(bld->texel_type.width);
    936                rgba8 = lp_build_gather(bld->gallivm,
    937                                        bld->texel_type.length,
    938                                        bld->format_desc->block.bits,
    939                                        fetch_type,
    940                                        TRUE,
    941                                        data_ptr, offset[k][j][i], TRUE);
    942 
    943                rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
    944             }
    945             else {
    946                rgba8 = lp_build_fetch_rgba_aos(bld->gallivm,
    947                                                bld->format_desc,
    948                                                u8n.type,
    949                                                TRUE,
    950                                                data_ptr, offset[k][j][i],
    951                                                x_subcoord[i],
    952                                                y_subcoord[j],
    953                                                bld->cache);
    954             }
    955 
    956             neighbors[k][j][i] = rgba8;
    957          }
    958       }
    959    }
    960 
    961    /*
    962     * Linear interpolation with 8.8 fixed point.
    963     */
    964    if (bld->static_sampler_state->force_nearest_s) {
    965       /* special case 1-D lerp */
    966       packed = lp_build_lerp(&u8n,
    967                              t_fpart,
    968                              neighbors[0][0][0],
    969                              neighbors[0][0][1],
    970                              LP_BLD_LERP_PRESCALED_WEIGHTS);
    971    }
    972    else if (bld->static_sampler_state->force_nearest_t) {
    973       /* special case 1-D lerp */
    974       packed = lp_build_lerp(&u8n,
    975                              s_fpart,
    976                              neighbors[0][0][0],
    977                              neighbors[0][0][1],
    978                              LP_BLD_LERP_PRESCALED_WEIGHTS);
    979    }
    980    else {
    981       /* general 1/2/3-D lerping */
    982       if (dims == 1) {
    983          packed = lp_build_lerp(&u8n,
    984                                 s_fpart,
    985                                 neighbors[0][0][0],
    986                                 neighbors[0][0][1],
    987                                 LP_BLD_LERP_PRESCALED_WEIGHTS);
    988       } else if (dims == 2) {
    989          /* 2-D lerp */
    990          packed = lp_build_lerp_2d(&u8n,
    991                                    s_fpart, t_fpart,
    992                                    neighbors[0][0][0],
    993                                    neighbors[0][0][1],
    994                                    neighbors[0][1][0],
    995                                    neighbors[0][1][1],
    996                                    LP_BLD_LERP_PRESCALED_WEIGHTS);
    997       } else {
    998          /* 3-D lerp */
    999          assert(dims == 3);
   1000          packed = lp_build_lerp_3d(&u8n,
   1001                                    s_fpart, t_fpart, r_fpart,
   1002                                    neighbors[0][0][0],
   1003                                    neighbors[0][0][1],
   1004                                    neighbors[0][1][0],
   1005                                    neighbors[0][1][1],
   1006                                    neighbors[1][0][0],
   1007                                    neighbors[1][0][1],
   1008                                    neighbors[1][1][0],
   1009                                    neighbors[1][1][1],
   1010                                    LP_BLD_LERP_PRESCALED_WEIGHTS);
   1011       }
   1012    }
   1013 
   1014    *colors = packed;
   1015 }
   1016 
   1017 /**
   1018  * Sample a single texture image with (bi-)(tri-)linear sampling.
   1019  * Return filtered color as two vectors of 16-bit fixed point values.
   1020  */
   1021 static void
   1022 lp_build_sample_image_linear(struct lp_build_sample_context *bld,
   1023                              LLVMValueRef int_size,
   1024                              LLVMValueRef row_stride_vec,
   1025                              LLVMValueRef img_stride_vec,
   1026                              LLVMValueRef data_ptr,
   1027                              LLVMValueRef mipoffsets,
   1028                              LLVMValueRef s,
   1029                              LLVMValueRef t,
   1030                              LLVMValueRef r,
   1031                              const LLVMValueRef *offsets,
   1032                              LLVMValueRef *colors)
   1033 {
   1034    const unsigned dims = bld->dims;
   1035    LLVMBuilderRef builder = bld->gallivm->builder;
   1036    struct lp_build_context i32;
   1037    LLVMValueRef i32_c8, i32_c128, i32_c255;
   1038    LLVMValueRef width_vec, height_vec, depth_vec;
   1039    LLVMValueRef s_ipart, s_fpart, s_float;
   1040    LLVMValueRef t_ipart = NULL, t_fpart = NULL, t_float = NULL;
   1041    LLVMValueRef r_ipart = NULL, r_fpart = NULL, r_float = NULL;
   1042    LLVMValueRef x_stride, y_stride, z_stride;
   1043    LLVMValueRef x_offset0, x_offset1;
   1044    LLVMValueRef y_offset0, y_offset1;
   1045    LLVMValueRef z_offset0, z_offset1;
   1046    LLVMValueRef offset[2][2][2]; /* [z][y][x] */
   1047    LLVMValueRef x_subcoord[2], y_subcoord[2], z_subcoord[2];
   1048    unsigned x, y, z;
   1049 
   1050    lp_build_context_init(&i32, bld->gallivm, lp_type_int_vec(32, bld->vector_width));
   1051 
   1052    lp_build_extract_image_sizes(bld,
   1053                                 &bld->int_size_bld,
   1054                                 bld->int_coord_type,
   1055                                 int_size,
   1056                                 &width_vec,
   1057                                 &height_vec,
   1058                                 &depth_vec);
   1059 
   1060    s_float = s; t_float = t; r_float = r;
   1061 
   1062    if (bld->static_sampler_state->normalized_coords) {
   1063       LLVMValueRef scaled_size;
   1064       LLVMValueRef flt_size;
   1065 
   1066       /* scale size by 256 (8 fractional bits) */
   1067       scaled_size = lp_build_shl_imm(&bld->int_size_bld, int_size, 8);
   1068 
   1069       flt_size = lp_build_int_to_float(&bld->float_size_bld, scaled_size);
   1070 
   1071       lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r);
   1072    }
   1073    else {
   1074       /* scale coords by 256 (8 fractional bits) */
   1075       s = lp_build_mul_imm(&bld->coord_bld, s, 256);
   1076       if (dims >= 2)
   1077          t = lp_build_mul_imm(&bld->coord_bld, t, 256);
   1078       if (dims >= 3)
   1079          r = lp_build_mul_imm(&bld->coord_bld, r, 256);
   1080    }
   1081 
   1082    /* convert float to int */
   1083    /* For correct rounding, need round to nearest, not truncation here.
   1084     * Note that in some cases (clamp to edge, no texel offsets) we
   1085     * could use a non-signed build context which would help archs which
   1086     * don't have fptosi intrinsic with nearest rounding implemented.
   1087     */
   1088    s = lp_build_iround(&bld->coord_bld, s);
   1089    if (dims >= 2)
   1090       t = lp_build_iround(&bld->coord_bld, t);
   1091    if (dims >= 3)
   1092       r = lp_build_iround(&bld->coord_bld, r);
   1093 
   1094    /* subtract 0.5 (add -128) */
   1095    i32_c128 = lp_build_const_int_vec(bld->gallivm, i32.type, -128);
   1096    if (!bld->static_sampler_state->force_nearest_s) {
   1097       s = LLVMBuildAdd(builder, s, i32_c128, "");
   1098    }
   1099    if (dims >= 2 && !bld->static_sampler_state->force_nearest_t) {
   1100       t = LLVMBuildAdd(builder, t, i32_c128, "");
   1101    }
   1102    if (dims >= 3) {
   1103       r = LLVMBuildAdd(builder, r, i32_c128, "");
   1104    }
   1105 
   1106    /* compute floor (shift right 8) */
   1107    i32_c8 = lp_build_const_int_vec(bld->gallivm, i32.type, 8);
   1108    s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
   1109    if (dims >= 2)
   1110       t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
   1111    if (dims >= 3)
   1112       r_ipart = LLVMBuildAShr(builder, r, i32_c8, "");
   1113 
   1114    /* add texel offsets */
   1115    if (offsets[0]) {
   1116       s_ipart = lp_build_add(&i32, s_ipart, offsets[0]);
   1117       if (dims >= 2) {
   1118          t_ipart = lp_build_add(&i32, t_ipart, offsets[1]);
   1119          if (dims >= 3) {
   1120             r_ipart = lp_build_add(&i32, r_ipart, offsets[2]);
   1121          }
   1122       }
   1123    }
   1124 
   1125    /* compute fractional part (AND with 0xff) */
   1126    i32_c255 = lp_build_const_int_vec(bld->gallivm, i32.type, 255);
   1127    s_fpart = LLVMBuildAnd(builder, s, i32_c255, "");
   1128    if (dims >= 2)
   1129       t_fpart = LLVMBuildAnd(builder, t, i32_c255, "");
   1130    if (dims >= 3)
   1131       r_fpart = LLVMBuildAnd(builder, r, i32_c255, "");
   1132 
   1133    /* get pixel, row and image strides */
   1134    x_stride = lp_build_const_vec(bld->gallivm, bld->int_coord_bld.type,
   1135                                  bld->format_desc->block.bits/8);
   1136    y_stride = row_stride_vec;
   1137    z_stride = img_stride_vec;
   1138 
   1139    /* do texcoord wrapping and compute texel offsets */
   1140    lp_build_sample_wrap_linear_int(bld,
   1141                                    bld->format_desc->block.width,
   1142                                    s_ipart, &s_fpart, s_float,
   1143                                    width_vec, x_stride, offsets[0],
   1144                                    bld->static_texture_state->pot_width,
   1145                                    bld->static_sampler_state->wrap_s,
   1146                                    &x_offset0, &x_offset1,
   1147                                    &x_subcoord[0], &x_subcoord[1]);
   1148 
   1149    /* add potential cube/array/mip offsets now as they are constant per pixel */
   1150    if (has_layer_coord(bld->static_texture_state->target)) {
   1151       LLVMValueRef z_offset;
   1152       z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec);
   1153       /* The r coord is the cube face in [0,5] or array layer */
   1154       x_offset0 = lp_build_add(&bld->int_coord_bld, x_offset0, z_offset);
   1155       x_offset1 = lp_build_add(&bld->int_coord_bld, x_offset1, z_offset);
   1156    }
   1157    if (mipoffsets) {
   1158       x_offset0 = lp_build_add(&bld->int_coord_bld, x_offset0, mipoffsets);
   1159       x_offset1 = lp_build_add(&bld->int_coord_bld, x_offset1, mipoffsets);
   1160    }
   1161 
   1162    for (z = 0; z < 2; z++) {
   1163       for (y = 0; y < 2; y++) {
   1164          offset[z][y][0] = x_offset0;
   1165          offset[z][y][1] = x_offset1;
   1166       }
   1167    }
   1168 
   1169    if (dims >= 2) {
   1170       lp_build_sample_wrap_linear_int(bld,
   1171                                       bld->format_desc->block.height,
   1172                                       t_ipart, &t_fpart, t_float,
   1173                                       height_vec, y_stride, offsets[1],
   1174                                       bld->static_texture_state->pot_height,
   1175                                       bld->static_sampler_state->wrap_t,
   1176                                       &y_offset0, &y_offset1,
   1177                                       &y_subcoord[0], &y_subcoord[1]);
   1178 
   1179       for (z = 0; z < 2; z++) {
   1180          for (x = 0; x < 2; x++) {
   1181             offset[z][0][x] = lp_build_add(&bld->int_coord_bld,
   1182                                            offset[z][0][x], y_offset0);
   1183             offset[z][1][x] = lp_build_add(&bld->int_coord_bld,
   1184                                            offset[z][1][x], y_offset1);
   1185          }
   1186       }
   1187    }
   1188 
   1189    if (dims >= 3) {
   1190       lp_build_sample_wrap_linear_int(bld,
   1191                                       1, /* block length (depth) */
   1192                                       r_ipart, &r_fpart, r_float,
   1193                                       depth_vec, z_stride, offsets[2],
   1194                                       bld->static_texture_state->pot_depth,
   1195                                       bld->static_sampler_state->wrap_r,
   1196                                       &z_offset0, &z_offset1,
   1197                                       &z_subcoord[0], &z_subcoord[1]);
   1198       for (y = 0; y < 2; y++) {
   1199          for (x = 0; x < 2; x++) {
   1200             offset[0][y][x] = lp_build_add(&bld->int_coord_bld,
   1201                                            offset[0][y][x], z_offset0);
   1202             offset[1][y][x] = lp_build_add(&bld->int_coord_bld,
   1203                                            offset[1][y][x], z_offset1);
   1204          }
   1205       }
   1206    }
   1207 
   1208    lp_build_sample_fetch_image_linear(bld, data_ptr, offset,
   1209                                       x_subcoord, y_subcoord,
   1210                                       s_fpart, t_fpart, r_fpart,
   1211                                       colors);
   1212 }
   1213 
   1214 
   1215 /**
   1216  * Sample a single texture image with (bi-)(tri-)linear sampling.
   1217  * Return filtered color as two vectors of 16-bit fixed point values.
   1218  * Does address calcs (except offsets) with floats.
   1219  * Useful for AVX which has support for 8x32 floats but not 8x32 ints.
   1220  */
   1221 static void
   1222 lp_build_sample_image_linear_afloat(struct lp_build_sample_context *bld,
   1223                                     LLVMValueRef int_size,
   1224                                     LLVMValueRef row_stride_vec,
   1225                                     LLVMValueRef img_stride_vec,
   1226                                     LLVMValueRef data_ptr,
   1227                                     LLVMValueRef mipoffsets,
   1228                                     LLVMValueRef s,
   1229                                     LLVMValueRef t,
   1230                                     LLVMValueRef r,
   1231                                     const LLVMValueRef *offsets,
   1232                                     LLVMValueRef *colors)
   1233 {
   1234    const unsigned dims = bld->dims;
   1235    LLVMValueRef width_vec, height_vec, depth_vec;
   1236    LLVMValueRef s_fpart;
   1237    LLVMValueRef t_fpart = NULL;
   1238    LLVMValueRef r_fpart = NULL;
   1239    LLVMValueRef x_stride, y_stride, z_stride;
   1240    LLVMValueRef x_offset0, x_offset1;
   1241    LLVMValueRef y_offset0, y_offset1;
   1242    LLVMValueRef z_offset0, z_offset1;
   1243    LLVMValueRef offset[2][2][2]; /* [z][y][x] */
   1244    LLVMValueRef x_subcoord[2], y_subcoord[2];
   1245    LLVMValueRef flt_size;
   1246    LLVMValueRef x_icoord0, x_icoord1;
   1247    LLVMValueRef y_icoord0, y_icoord1;
   1248    LLVMValueRef z_icoord0, z_icoord1;
   1249    unsigned x, y, z;
   1250 
   1251    flt_size = lp_build_int_to_float(&bld->float_size_bld, int_size);
   1252 
   1253    lp_build_extract_image_sizes(bld,
   1254                                 &bld->float_size_bld,
   1255                                 bld->coord_type,
   1256                                 flt_size,
   1257                                 &width_vec,
   1258                                 &height_vec,
   1259                                 &depth_vec);
   1260 
   1261    /* do texcoord wrapping and compute texel offsets */
   1262    lp_build_sample_wrap_linear_float(bld,
   1263                                      bld->format_desc->block.width,
   1264                                      s, width_vec, offsets[0],
   1265                                      bld->static_texture_state->pot_width,
   1266                                      bld->static_sampler_state->wrap_s,
   1267                                      &x_icoord0, &x_icoord1,
   1268                                      &s_fpart,
   1269                                      bld->static_sampler_state->force_nearest_s);
   1270 
   1271    if (dims >= 2) {
   1272       lp_build_sample_wrap_linear_float(bld,
   1273                                         bld->format_desc->block.height,
   1274                                         t, height_vec, offsets[1],
   1275                                         bld->static_texture_state->pot_height,
   1276                                         bld->static_sampler_state->wrap_t,
   1277                                         &y_icoord0, &y_icoord1,
   1278                                         &t_fpart,
   1279                                         bld->static_sampler_state->force_nearest_t);
   1280 
   1281       if (dims >= 3) {
   1282          lp_build_sample_wrap_linear_float(bld,
   1283                                            1, /* block length (depth) */
   1284                                            r, depth_vec, offsets[2],
   1285                                            bld->static_texture_state->pot_depth,
   1286                                            bld->static_sampler_state->wrap_r,
   1287                                            &z_icoord0, &z_icoord1,
   1288                                            &r_fpart, 0);
   1289       }
   1290    }
   1291 
   1292    /*
   1293     * From here on we deal with ints, and we should split up the 256bit
   1294     * vectors manually for better generated code.
   1295     */
   1296 
   1297    /* get pixel, row and image strides */
   1298    x_stride = lp_build_const_vec(bld->gallivm,
   1299                                  bld->int_coord_bld.type,
   1300                                  bld->format_desc->block.bits/8);
   1301    y_stride = row_stride_vec;
   1302    z_stride = img_stride_vec;
   1303 
   1304    /*
   1305     * compute texel offset -
   1306     * cannot do offset calc with floats, difficult for block-based formats,
   1307     * and not enough precision anyway.
   1308     */
   1309    lp_build_sample_partial_offset(&bld->int_coord_bld,
   1310                                   bld->format_desc->block.width,
   1311                                   x_icoord0, x_stride,
   1312                                   &x_offset0, &x_subcoord[0]);
   1313    lp_build_sample_partial_offset(&bld->int_coord_bld,
   1314                                   bld->format_desc->block.width,
   1315                                   x_icoord1, x_stride,
   1316                                   &x_offset1, &x_subcoord[1]);
   1317 
   1318    /* add potential cube/array/mip offsets now as they are constant per pixel */
   1319    if (has_layer_coord(bld->static_texture_state->target)) {
   1320       LLVMValueRef z_offset;
   1321       z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec);
   1322       /* The r coord is the cube face in [0,5] or array layer */
   1323       x_offset0 = lp_build_add(&bld->int_coord_bld, x_offset0, z_offset);
   1324       x_offset1 = lp_build_add(&bld->int_coord_bld, x_offset1, z_offset);
   1325    }
   1326    if (mipoffsets) {
   1327       x_offset0 = lp_build_add(&bld->int_coord_bld, x_offset0, mipoffsets);
   1328       x_offset1 = lp_build_add(&bld->int_coord_bld, x_offset1, mipoffsets);
   1329    }
   1330 
   1331    for (z = 0; z < 2; z++) {
   1332       for (y = 0; y < 2; y++) {
   1333          offset[z][y][0] = x_offset0;
   1334          offset[z][y][1] = x_offset1;
   1335       }
   1336    }
   1337 
   1338    if (dims >= 2) {
   1339       lp_build_sample_partial_offset(&bld->int_coord_bld,
   1340                                      bld->format_desc->block.height,
   1341                                      y_icoord0, y_stride,
   1342                                      &y_offset0, &y_subcoord[0]);
   1343       lp_build_sample_partial_offset(&bld->int_coord_bld,
   1344                                      bld->format_desc->block.height,
   1345                                      y_icoord1, y_stride,
   1346                                      &y_offset1, &y_subcoord[1]);
   1347       for (z = 0; z < 2; z++) {
   1348          for (x = 0; x < 2; x++) {
   1349             offset[z][0][x] = lp_build_add(&bld->int_coord_bld,
   1350                                            offset[z][0][x], y_offset0);
   1351             offset[z][1][x] = lp_build_add(&bld->int_coord_bld,
   1352                                            offset[z][1][x], y_offset1);
   1353          }
   1354       }
   1355    }
   1356 
   1357    if (dims >= 3) {
   1358       LLVMValueRef z_subcoord[2];
   1359       lp_build_sample_partial_offset(&bld->int_coord_bld,
   1360                                      1,
   1361                                      z_icoord0, z_stride,
   1362                                      &z_offset0, &z_subcoord[0]);
   1363       lp_build_sample_partial_offset(&bld->int_coord_bld,
   1364                                      1,
   1365                                      z_icoord1, z_stride,
   1366                                      &z_offset1, &z_subcoord[1]);
   1367       for (y = 0; y < 2; y++) {
   1368          for (x = 0; x < 2; x++) {
   1369             offset[0][y][x] = lp_build_add(&bld->int_coord_bld,
   1370                                            offset[0][y][x], z_offset0);
   1371             offset[1][y][x] = lp_build_add(&bld->int_coord_bld,
   1372                                            offset[1][y][x], z_offset1);
   1373          }
   1374       }
   1375    }
   1376 
   1377    lp_build_sample_fetch_image_linear(bld, data_ptr, offset,
   1378                                       x_subcoord, y_subcoord,
   1379                                       s_fpart, t_fpart, r_fpart,
   1380                                       colors);
   1381 }
   1382 
   1383 
   1384 /**
   1385  * Sample the texture/mipmap using given image filter and mip filter.
   1386  * data0_ptr and data1_ptr point to the two mipmap levels to sample
   1387  * from.  width0/1_vec, height0/1_vec, depth0/1_vec indicate their sizes.
   1388  * If we're using nearest miplevel sampling the '1' values will be null/unused.
   1389  */
   1390 static void
   1391 lp_build_sample_mipmap(struct lp_build_sample_context *bld,
   1392                        unsigned img_filter,
   1393                        unsigned mip_filter,
   1394                        LLVMValueRef s,
   1395                        LLVMValueRef t,
   1396                        LLVMValueRef r,
   1397                        const LLVMValueRef *offsets,
   1398                        LLVMValueRef ilevel0,
   1399                        LLVMValueRef ilevel1,
   1400                        LLVMValueRef lod_fpart,
   1401                        LLVMValueRef colors_var)
   1402 {
   1403    LLVMBuilderRef builder = bld->gallivm->builder;
   1404    LLVMValueRef size0;
   1405    LLVMValueRef size1;
   1406    LLVMValueRef row_stride0_vec = NULL;
   1407    LLVMValueRef row_stride1_vec = NULL;
   1408    LLVMValueRef img_stride0_vec = NULL;
   1409    LLVMValueRef img_stride1_vec = NULL;
   1410    LLVMValueRef data_ptr0;
   1411    LLVMValueRef data_ptr1;
   1412    LLVMValueRef mipoff0 = NULL;
   1413    LLVMValueRef mipoff1 = NULL;
   1414    LLVMValueRef colors0;
   1415    LLVMValueRef colors1;
   1416    boolean use_floats = util_cpu_caps.has_avx &&
   1417                         !util_cpu_caps.has_avx2 &&
   1418                         bld->coord_type.length > 4;
   1419 
   1420    /* sample the first mipmap level */
   1421    lp_build_mipmap_level_sizes(bld, ilevel0,
   1422                                &size0,
   1423                                &row_stride0_vec, &img_stride0_vec);
   1424    if (bld->num_mips == 1) {
   1425       data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
   1426    }
   1427    else {
   1428       /* This path should work for num_lods 1 too but slightly less efficient */
   1429       data_ptr0 = bld->base_ptr;
   1430       mipoff0 = lp_build_get_mip_offsets(bld, ilevel0);
   1431    }
   1432 
   1433    if (use_floats) {
   1434       if (img_filter == PIPE_TEX_FILTER_NEAREST) {
   1435          lp_build_sample_image_nearest_afloat(bld,
   1436                                               size0,
   1437                                               row_stride0_vec, img_stride0_vec,
   1438                                               data_ptr0, mipoff0, s, t, r, offsets,
   1439                                               &colors0);
   1440       }
   1441       else {
   1442          assert(img_filter == PIPE_TEX_FILTER_LINEAR);
   1443          lp_build_sample_image_linear_afloat(bld,
   1444                                              size0,
   1445                                              row_stride0_vec, img_stride0_vec,
   1446                                              data_ptr0, mipoff0, s, t, r, offsets,
   1447                                              &colors0);
   1448       }
   1449    }
   1450    else {
   1451       if (img_filter == PIPE_TEX_FILTER_NEAREST) {
   1452          lp_build_sample_image_nearest(bld,
   1453                                        size0,
   1454                                        row_stride0_vec, img_stride0_vec,
   1455                                        data_ptr0, mipoff0, s, t, r, offsets,
   1456                                        &colors0);
   1457       }
   1458       else {
   1459          assert(img_filter == PIPE_TEX_FILTER_LINEAR);
   1460          lp_build_sample_image_linear(bld,
   1461                                       size0,
   1462                                       row_stride0_vec, img_stride0_vec,
   1463                                       data_ptr0, mipoff0, s, t, r, offsets,
   1464                                       &colors0);
   1465       }
   1466    }
   1467 
   1468    /* Store the first level's colors in the output variables */
   1469    LLVMBuildStore(builder, colors0, colors_var);
   1470 
   1471    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
   1472       LLVMValueRef h16vec_scale = lp_build_const_vec(bld->gallivm,
   1473                                                      bld->lodf_bld.type, 256.0);
   1474       LLVMTypeRef i32vec_type = bld->lodi_bld.vec_type;
   1475       struct lp_build_if_state if_ctx;
   1476       LLVMValueRef need_lerp;
   1477       unsigned num_quads = bld->coord_bld.type.length / 4;
   1478       unsigned i;
   1479 
   1480       lod_fpart = LLVMBuildFMul(builder, lod_fpart, h16vec_scale, "");
   1481       lod_fpart = LLVMBuildFPToSI(builder, lod_fpart, i32vec_type, "lod_fpart.fixed16");
   1482 
   1483       /* need_lerp = lod_fpart > 0 */
   1484       if (bld->num_lods == 1) {
   1485          need_lerp = LLVMBuildICmp(builder, LLVMIntSGT,
   1486                                    lod_fpart, bld->lodi_bld.zero,
   1487                                    "need_lerp");
   1488       }
   1489       else {
   1490          /*
   1491           * We'll do mip filtering if any of the quads need it.
   1492           * It might be better to split the vectors here and only fetch/filter
   1493           * quads which need it.
   1494           */
   1495          /*
   1496           * We need to clamp lod_fpart here since we can get negative
   1497           * values which would screw up filtering if not all
   1498           * lod_fpart values have same sign.
   1499           * We can however then skip the greater than comparison.
   1500           */
   1501          lod_fpart = lp_build_max(&bld->lodi_bld, lod_fpart,
   1502                                   bld->lodi_bld.zero);
   1503          need_lerp = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods, lod_fpart);
   1504       }
   1505 
   1506       lp_build_if(&if_ctx, bld->gallivm, need_lerp);
   1507       {
   1508          struct lp_build_context u8n_bld;
   1509 
   1510          lp_build_context_init(&u8n_bld, bld->gallivm, lp_type_unorm(8, bld->vector_width));
   1511 
   1512          /* sample the second mipmap level */
   1513          lp_build_mipmap_level_sizes(bld, ilevel1,
   1514                                      &size1,
   1515                                      &row_stride1_vec, &img_stride1_vec);
   1516          if (bld->num_mips == 1) {
   1517             data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
   1518          }
   1519          else {
   1520             data_ptr1 = bld->base_ptr;
   1521             mipoff1 = lp_build_get_mip_offsets(bld, ilevel1);
   1522          }
   1523 
   1524          if (use_floats) {
   1525             if (img_filter == PIPE_TEX_FILTER_NEAREST) {
   1526                lp_build_sample_image_nearest_afloat(bld,
   1527                                                     size1,
   1528                                                     row_stride1_vec, img_stride1_vec,
   1529                                                     data_ptr1, mipoff1, s, t, r, offsets,
   1530                                                     &colors1);
   1531             }
   1532             else {
   1533                lp_build_sample_image_linear_afloat(bld,
   1534                                                    size1,
   1535                                                    row_stride1_vec, img_stride1_vec,
   1536                                                    data_ptr1, mipoff1, s, t, r, offsets,
   1537                                                    &colors1);
   1538             }
   1539          }
   1540          else {
   1541             if (img_filter == PIPE_TEX_FILTER_NEAREST) {
   1542                lp_build_sample_image_nearest(bld,
   1543                                              size1,
   1544                                              row_stride1_vec, img_stride1_vec,
   1545                                              data_ptr1, mipoff1, s, t, r, offsets,
   1546                                              &colors1);
   1547             }
   1548             else {
   1549                lp_build_sample_image_linear(bld,
   1550                                             size1,
   1551                                             row_stride1_vec, img_stride1_vec,
   1552                                             data_ptr1, mipoff1, s, t, r, offsets,
   1553                                             &colors1);
   1554             }
   1555          }
   1556 
   1557          /* interpolate samples from the two mipmap levels */
   1558 
   1559          if (num_quads == 1 && bld->num_lods == 1) {
   1560             lod_fpart = LLVMBuildTrunc(builder, lod_fpart, u8n_bld.elem_type, "");
   1561             lod_fpart = lp_build_broadcast_scalar(&u8n_bld, lod_fpart);
   1562          }
   1563          else {
   1564             unsigned num_chans_per_lod = 4 * bld->coord_type.length / bld->num_lods;
   1565             LLVMTypeRef tmp_vec_type = LLVMVectorType(u8n_bld.elem_type, bld->lodi_bld.type.length);
   1566             LLVMValueRef shuffle[LP_MAX_VECTOR_LENGTH];
   1567 
   1568             /* Take the LSB of lod_fpart */
   1569             lod_fpart = LLVMBuildTrunc(builder, lod_fpart, tmp_vec_type, "");
   1570 
   1571             /* Broadcast each lod weight into their respective channels */
   1572             for (i = 0; i < u8n_bld.type.length; ++i) {
   1573                shuffle[i] = lp_build_const_int32(bld->gallivm, i / num_chans_per_lod);
   1574             }
   1575             lod_fpart = LLVMBuildShuffleVector(builder, lod_fpart, LLVMGetUndef(tmp_vec_type),
   1576                                                LLVMConstVector(shuffle, u8n_bld.type.length), "");
   1577          }
   1578 
   1579          colors0 = lp_build_lerp(&u8n_bld, lod_fpart,
   1580                                  colors0, colors1,
   1581                                  LP_BLD_LERP_PRESCALED_WEIGHTS);
   1582 
   1583          LLVMBuildStore(builder, colors0, colors_var);
   1584       }
   1585       lp_build_endif(&if_ctx);
   1586    }
   1587 }
   1588 
   1589 
   1590 
   1591 /**
   1592  * Texture sampling in AoS format.  Used when sampling common 32-bit/texel
   1593  * formats.  1D/2D/3D/cube texture supported.  All mipmap sampling modes
   1594  * but only limited texture coord wrap modes.
   1595  */
   1596 void
   1597 lp_build_sample_aos(struct lp_build_sample_context *bld,
   1598                     unsigned sampler_unit,
   1599                     LLVMValueRef s,
   1600                     LLVMValueRef t,
   1601                     LLVMValueRef r,
   1602                     const LLVMValueRef *offsets,
   1603                     LLVMValueRef lod_positive,
   1604                     LLVMValueRef lod_fpart,
   1605                     LLVMValueRef ilevel0,
   1606                     LLVMValueRef ilevel1,
   1607                     LLVMValueRef texel_out[4])
   1608 {
   1609    LLVMBuilderRef builder = bld->gallivm->builder;
   1610    const unsigned mip_filter = bld->static_sampler_state->min_mip_filter;
   1611    const unsigned min_filter = bld->static_sampler_state->min_img_filter;
   1612    const unsigned mag_filter = bld->static_sampler_state->mag_img_filter;
   1613    const unsigned dims = bld->dims;
   1614    LLVMValueRef packed_var, packed;
   1615    LLVMValueRef unswizzled[4];
   1616    struct lp_build_context u8n_bld;
   1617 
   1618    /* we only support the common/simple wrap modes at this time */
   1619    assert(lp_is_simple_wrap_mode(bld->static_sampler_state->wrap_s));
   1620    if (dims >= 2)
   1621       assert(lp_is_simple_wrap_mode(bld->static_sampler_state->wrap_t));
   1622    if (dims >= 3)
   1623       assert(lp_is_simple_wrap_mode(bld->static_sampler_state->wrap_r));
   1624 
   1625 
   1626    /* make 8-bit unorm builder context */
   1627    lp_build_context_init(&u8n_bld, bld->gallivm, lp_type_unorm(8, bld->vector_width));
   1628 
   1629    /*
   1630     * Get/interpolate texture colors.
   1631     */
   1632 
   1633    packed_var = lp_build_alloca(bld->gallivm, u8n_bld.vec_type, "packed_var");
   1634 
   1635    if (min_filter == mag_filter) {
   1636       /* no need to distinguish between minification and magnification */
   1637       lp_build_sample_mipmap(bld,
   1638                              min_filter, mip_filter,
   1639                              s, t, r, offsets,
   1640                              ilevel0, ilevel1, lod_fpart,
   1641                              packed_var);
   1642    }
   1643    else {
   1644       /* Emit conditional to choose min image filter or mag image filter
   1645        * depending on the lod being > 0 or <= 0, respectively.
   1646        */
   1647       struct lp_build_if_state if_ctx;
   1648 
   1649       /*
   1650        * FIXME this should take all lods into account, if some are min
   1651        * some max probably could hack up the weights in the linear
   1652        * path with selects to work for nearest.
   1653        */
   1654       if (bld->num_lods > 1)
   1655          lod_positive = LLVMBuildExtractElement(builder, lod_positive,
   1656                                                 lp_build_const_int32(bld->gallivm, 0), "");
   1657 
   1658       lod_positive = LLVMBuildTrunc(builder, lod_positive,
   1659                                     LLVMInt1TypeInContext(bld->gallivm->context), "");
   1660 
   1661       lp_build_if(&if_ctx, bld->gallivm, lod_positive);
   1662       {
   1663          /* Use the minification filter */
   1664          lp_build_sample_mipmap(bld,
   1665                                 min_filter, mip_filter,
   1666                                 s, t, r, offsets,
   1667                                 ilevel0, ilevel1, lod_fpart,
   1668                                 packed_var);
   1669       }
   1670       lp_build_else(&if_ctx);
   1671       {
   1672          /* Use the magnification filter */
   1673          lp_build_sample_mipmap(bld,
   1674                                 mag_filter, PIPE_TEX_MIPFILTER_NONE,
   1675                                 s, t, r, offsets,
   1676                                 ilevel0, NULL, NULL,
   1677                                 packed_var);
   1678       }
   1679       lp_build_endif(&if_ctx);
   1680    }
   1681 
   1682    packed = LLVMBuildLoad(builder, packed_var, "");
   1683 
   1684    /*
   1685     * Convert to SoA and swizzle.
   1686     */
   1687    lp_build_rgba8_to_fi32_soa(bld->gallivm,
   1688                              bld->texel_type,
   1689                              packed, unswizzled);
   1690 
   1691    if (util_format_is_rgba8_variant(bld->format_desc)) {
   1692       lp_build_format_swizzle_soa(bld->format_desc,
   1693                                   &bld->texel_bld,
   1694                                   unswizzled, texel_out);
   1695    }
   1696    else {
   1697       texel_out[0] = unswizzled[0];
   1698       texel_out[1] = unswizzled[1];
   1699       texel_out[2] = unswizzled[2];
   1700       texel_out[3] = unswizzled[3];
   1701    }
   1702 }
   1703