Home | History | Annotate | Download | only in gallivm
      1 /**************************************************************************
      2  *
      3  * Copyright 2010 VMware, Inc.
      4  * All Rights Reserved.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the
      8  * "Software"), to deal in the Software without restriction, including
      9  * without limitation the rights to use, copy, modify, merge, publish,
     10  * distribute, sub license, and/or sell copies of the Software, and to
     11  * permit persons to whom the Software is furnished to do so, subject to
     12  * the following conditions:
     13  *
     14  * The above copyright notice and this permission notice (including the
     15  * next paragraph) shall be included in all copies or substantial portions
     16  * of the Software.
     17  *
     18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
     19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
     21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
     22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
     23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
     24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     25  *
     26  **************************************************************************/
     27 
     28 /**
     29  * @file
     30  * Texture sampling -- AoS.
     31  *
     32  * @author Jose Fonseca <jfonseca (at) vmware.com>
     33  * @author Brian Paul <brianp (at) vmware.com>
     34  */
     35 
     36 #include "pipe/p_defines.h"
     37 #include "pipe/p_state.h"
     38 #include "util/u_debug.h"
     39 #include "util/u_dump.h"
     40 #include "util/u_memory.h"
     41 #include "util/u_math.h"
     42 #include "util/u_format.h"
     43 #include "util/u_cpu_detect.h"
     44 #include "lp_bld_debug.h"
     45 #include "lp_bld_type.h"
     46 #include "lp_bld_const.h"
     47 #include "lp_bld_conv.h"
     48 #include "lp_bld_arit.h"
     49 #include "lp_bld_bitarit.h"
     50 #include "lp_bld_logic.h"
     51 #include "lp_bld_swizzle.h"
     52 #include "lp_bld_pack.h"
     53 #include "lp_bld_flow.h"
     54 #include "lp_bld_gather.h"
     55 #include "lp_bld_format.h"
     56 #include "lp_bld_init.h"
     57 #include "lp_bld_sample.h"
     58 #include "lp_bld_sample_aos.h"
     59 #include "lp_bld_quad.h"
     60 
     61 
     62 /**
     63  * Build LLVM code for texture coord wrapping, for nearest filtering,
     64  * for scaled integer texcoords.
     65  * \param block_length  is the length of the pixel block along the
     66  *                      coordinate axis
     67  * \param coord  the incoming texcoord (s,t,r or q) scaled to the texture size
     68  * \param length  the texture size along one dimension
     69  * \param stride  pixel stride along the coordinate axis (in bytes)
     70  * \param is_pot  if TRUE, length is a power of two
     71  * \param wrap_mode  one of PIPE_TEX_WRAP_x
     72  * \param out_offset  byte offset for the wrapped coordinate
     73  * \param out_i  resulting sub-block pixel coordinate for coord0
     74  */
     75 static void
     76 lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld,
     77                                  unsigned block_length,
     78                                  LLVMValueRef coord,
     79                                  LLVMValueRef coord_f,
     80                                  LLVMValueRef length,
     81                                  LLVMValueRef stride,
     82                                  boolean is_pot,
     83                                  unsigned wrap_mode,
     84                                  LLVMValueRef *out_offset,
     85                                  LLVMValueRef *out_i)
     86 {
     87    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
     88    LLVMBuilderRef builder = bld->gallivm->builder;
     89    LLVMValueRef length_minus_one;
     90 
     91    length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
     92 
     93    switch(wrap_mode) {
     94    case PIPE_TEX_WRAP_REPEAT:
     95       if(is_pot)
     96          coord = LLVMBuildAnd(builder, coord, length_minus_one, "");
     97       else {
     98          struct lp_build_context *coord_bld = &bld->coord_bld;
     99          LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length);
    100          coord = lp_build_fract_safe(coord_bld, coord_f);
    101          coord = lp_build_mul(coord_bld, coord, length_f);
    102          coord = lp_build_itrunc(coord_bld, coord);
    103       }
    104       break;
    105 
    106    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
    107       coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero);
    108       coord = lp_build_min(int_coord_bld, coord, length_minus_one);
    109       break;
    110 
    111    case PIPE_TEX_WRAP_CLAMP:
    112    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
    113    case PIPE_TEX_WRAP_MIRROR_REPEAT:
    114    case PIPE_TEX_WRAP_MIRROR_CLAMP:
    115    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
    116    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
    117    default:
    118       assert(0);
    119    }
    120 
    121    lp_build_sample_partial_offset(int_coord_bld, block_length, coord, stride,
    122                                   out_offset, out_i);
    123 }
    124 
    125 
    126 /**
    127  * Build LLVM code for texture coord wrapping, for nearest filtering,
    128  * for float texcoords.
    129  * \param coord  the incoming texcoord (s,t,r or q)
    130  * \param length  the texture size along one dimension
    131  * \param is_pot  if TRUE, length is a power of two
    132  * \param wrap_mode  one of PIPE_TEX_WRAP_x
    133  * \param icoord  the texcoord after wrapping, as int
    134  */
    135 static void
    136 lp_build_sample_wrap_nearest_float(struct lp_build_sample_context *bld,
    137                                    LLVMValueRef coord,
    138                                    LLVMValueRef length,
    139                                    boolean is_pot,
    140                                    unsigned wrap_mode,
    141                                    LLVMValueRef *icoord)
    142 {
    143    struct lp_build_context *coord_bld = &bld->coord_bld;
    144    LLVMValueRef length_minus_one;
    145 
    146    switch(wrap_mode) {
    147    case PIPE_TEX_WRAP_REPEAT:
    148       /* take fraction, unnormalize */
    149       coord = lp_build_fract_safe(coord_bld, coord);
    150       coord = lp_build_mul(coord_bld, coord, length);
    151       *icoord = lp_build_itrunc(coord_bld, coord);
    152       break;
    153    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
    154       length_minus_one = lp_build_sub(coord_bld, length, coord_bld->one);
    155       if (bld->static_state->normalized_coords) {
    156          /* scale coord to length */
    157          coord = lp_build_mul(coord_bld, coord, length);
    158       }
    159       coord = lp_build_clamp(coord_bld, coord, coord_bld->zero,
    160                              length_minus_one);
    161       *icoord = lp_build_itrunc(coord_bld, coord);
    162       break;
    163 
    164    case PIPE_TEX_WRAP_CLAMP:
    165    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
    166    case PIPE_TEX_WRAP_MIRROR_REPEAT:
    167    case PIPE_TEX_WRAP_MIRROR_CLAMP:
    168    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
    169    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
    170    default:
    171       assert(0);
    172    }
    173 }
    174 
    175 
    176 /**
    177  * Build LLVM code for texture coord wrapping, for linear filtering,
    178  * for scaled integer texcoords.
    179  * \param block_length  is the length of the pixel block along the
    180  *                      coordinate axis
    181  * \param coord0  the incoming texcoord (s,t,r or q) scaled to the texture size
    182  * \param length  the texture size along one dimension
    183  * \param stride  pixel stride along the coordinate axis (in bytes)
    184  * \param is_pot  if TRUE, length is a power of two
    185  * \param wrap_mode  one of PIPE_TEX_WRAP_x
    186  * \param offset0  resulting relative offset for coord0
    187  * \param offset1  resulting relative offset for coord0 + 1
    188  * \param i0  resulting sub-block pixel coordinate for coord0
    189  * \param i1  resulting sub-block pixel coordinate for coord0 + 1
    190  */
    191 static void
    192 lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,
    193                                 unsigned block_length,
    194                                 LLVMValueRef coord0,
    195                                 LLVMValueRef *weight_i,
    196                                 LLVMValueRef coord_f,
    197                                 LLVMValueRef length,
    198                                 LLVMValueRef stride,
    199                                 boolean is_pot,
    200                                 unsigned wrap_mode,
    201                                 LLVMValueRef *offset0,
    202                                 LLVMValueRef *offset1,
    203                                 LLVMValueRef *i0,
    204                                 LLVMValueRef *i1)
    205 {
    206    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
    207    LLVMBuilderRef builder = bld->gallivm->builder;
    208    LLVMValueRef length_minus_one;
    209    LLVMValueRef lmask, umask, mask;
    210 
    211    /*
    212     * If the pixel block covers more than one pixel then there is no easy
    213     * way to calculate offset1 relative to offset0. Instead, compute them
    214     * independently. Otherwise, try to compute offset0 and offset1 with
    215     * a single stride multiplication.
    216     */
    217 
    218    length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
    219 
    220    if (block_length != 1) {
    221       LLVMValueRef coord1;
    222       switch(wrap_mode) {
    223       case PIPE_TEX_WRAP_REPEAT:
    224          if (is_pot) {
    225             coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
    226             coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, "");
    227             coord1 = LLVMBuildAnd(builder, coord1, length_minus_one, "");
    228          }
    229          else {
    230             LLVMValueRef mask;
    231             LLVMValueRef weight;
    232             LLVMValueRef length_f = lp_build_int_to_float(&bld->coord_bld, length);
    233             lp_build_coord_repeat_npot_linear(bld, coord_f,
    234                                               length, length_f,
    235                                               &coord0, &weight);
    236             mask = lp_build_compare(bld->gallivm, int_coord_bld->type,
    237                                     PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
    238             coord1 = LLVMBuildAnd(builder,
    239                                   lp_build_add(int_coord_bld, coord0,
    240                                                int_coord_bld->one),
    241                                   mask, "");
    242             weight = lp_build_mul_imm(&bld->coord_bld, weight, 256);
    243             *weight_i = lp_build_itrunc(&bld->coord_bld, weight);
    244          }
    245          break;
    246 
    247       case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
    248          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
    249          coord0 = lp_build_clamp(int_coord_bld, coord0, int_coord_bld->zero,
    250                                 length_minus_one);
    251          coord1 = lp_build_clamp(int_coord_bld, coord1, int_coord_bld->zero,
    252                                 length_minus_one);
    253          break;
    254 
    255       case PIPE_TEX_WRAP_CLAMP:
    256       case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
    257       case PIPE_TEX_WRAP_MIRROR_REPEAT:
    258       case PIPE_TEX_WRAP_MIRROR_CLAMP:
    259       case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
    260       case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
    261       default:
    262          assert(0);
    263          coord0 = int_coord_bld->zero;
    264          coord1 = int_coord_bld->zero;
    265          break;
    266       }
    267       lp_build_sample_partial_offset(int_coord_bld, block_length, coord0, stride,
    268                                      offset0, i0);
    269       lp_build_sample_partial_offset(int_coord_bld, block_length, coord1, stride,
    270                                      offset1, i1);
    271       return;
    272    }
    273 
    274    *i0 = int_coord_bld->zero;
    275    *i1 = int_coord_bld->zero;
    276 
    277    switch(wrap_mode) {
    278    case PIPE_TEX_WRAP_REPEAT:
    279       if (is_pot) {
    280          coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, "");
    281       }
    282       else {
    283          LLVMValueRef weight;
    284          LLVMValueRef length_f = lp_build_int_to_float(&bld->coord_bld, length);
    285          lp_build_coord_repeat_npot_linear(bld, coord_f,
    286                                            length, length_f,
    287                                            &coord0, &weight);
    288          weight = lp_build_mul_imm(&bld->coord_bld, weight, 256);
    289          *weight_i = lp_build_itrunc(&bld->coord_bld, weight);
    290       }
    291 
    292       mask = lp_build_compare(bld->gallivm, int_coord_bld->type,
    293                               PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
    294 
    295       *offset0 = lp_build_mul(int_coord_bld, coord0, stride);
    296       *offset1 = LLVMBuildAnd(builder,
    297                               lp_build_add(int_coord_bld, *offset0, stride),
    298                               mask, "");
    299       break;
    300 
    301    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
    302       /* XXX this might be slower than the separate path
    303        * on some newer cpus. With sse41 this is 8 instructions vs. 7
    304        * - at least on SNB this is almost certainly slower since
    305        * min/max are cheaper than selects, and the muls aren't bad.
    306        */
    307       lmask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
    308                                PIPE_FUNC_GEQUAL, coord0, int_coord_bld->zero);
    309       umask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
    310                                PIPE_FUNC_LESS, coord0, length_minus_one);
    311 
    312       coord0 = lp_build_select(int_coord_bld, lmask, coord0, int_coord_bld->zero);
    313       coord0 = lp_build_select(int_coord_bld, umask, coord0, length_minus_one);
    314 
    315       mask = LLVMBuildAnd(builder, lmask, umask, "");
    316 
    317       *offset0 = lp_build_mul(int_coord_bld, coord0, stride);
    318       *offset1 = lp_build_add(int_coord_bld,
    319                               *offset0,
    320                               LLVMBuildAnd(builder, stride, mask, ""));
    321       break;
    322 
    323    case PIPE_TEX_WRAP_CLAMP:
    324    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
    325    case PIPE_TEX_WRAP_MIRROR_REPEAT:
    326    case PIPE_TEX_WRAP_MIRROR_CLAMP:
    327    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
    328    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
    329    default:
    330       assert(0);
    331       *offset0 = int_coord_bld->zero;
    332       *offset1 = int_coord_bld->zero;
    333       break;
    334    }
    335 }
    336 
    337 
    338 /**
    339  * Build LLVM code for texture coord wrapping, for linear filtering,
    340  * for float texcoords.
    341  * \param block_length  is the length of the pixel block along the
    342  *                      coordinate axis
    343  * \param coord  the incoming texcoord (s,t,r or q)
    344  * \param length  the texture size along one dimension
    345  * \param is_pot  if TRUE, length is a power of two
    346  * \param wrap_mode  one of PIPE_TEX_WRAP_x
    347  * \param coord0  the first texcoord after wrapping, as int
    348  * \param coord1  the second texcoord after wrapping, as int
    349  * \param weight  the filter weight as int (0-255)
    350  * \param force_nearest  if this coord actually uses nearest filtering
    351  */
    352 static void
    353 lp_build_sample_wrap_linear_float(struct lp_build_sample_context *bld,
    354                                   unsigned block_length,
    355                                   LLVMValueRef coord,
    356                                   LLVMValueRef length,
    357                                   boolean is_pot,
    358                                   unsigned wrap_mode,
    359                                   LLVMValueRef *coord0,
    360                                   LLVMValueRef *coord1,
    361                                   LLVMValueRef *weight,
    362                                   unsigned force_nearest)
    363 {
    364    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
    365    struct lp_build_context *coord_bld = &bld->coord_bld;
    366    LLVMBuilderRef builder = bld->gallivm->builder;
    367    LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5);
    368    LLVMValueRef length_minus_one = lp_build_sub(coord_bld, length, coord_bld->one);
    369 
    370    switch(wrap_mode) {
    371    case PIPE_TEX_WRAP_REPEAT:
    372       if (is_pot) {
    373          /* mul by size and subtract 0.5 */
    374          coord = lp_build_mul(coord_bld, coord, length);
    375          if (!force_nearest)
    376             coord = lp_build_sub(coord_bld, coord, half);
    377          *coord1 = lp_build_add(coord_bld, coord, coord_bld->one);
    378          /* convert to int, compute lerp weight */
    379          lp_build_ifloor_fract(coord_bld, coord, coord0, weight);
    380          *coord1 = lp_build_ifloor(coord_bld, *coord1);
    381          /* repeat wrap */
    382          length_minus_one = lp_build_itrunc(coord_bld, length_minus_one);
    383          *coord0 = LLVMBuildAnd(builder, *coord0, length_minus_one, "");
    384          *coord1 = LLVMBuildAnd(builder, *coord1, length_minus_one, "");
    385       }
    386       else {
    387          LLVMValueRef mask;
    388          /* wrap with normalized floats is just fract */
    389          coord = lp_build_fract(coord_bld, coord);
    390          /* unnormalize */
    391          coord = lp_build_mul(coord_bld, coord, length);
    392          /*
    393           * we avoided the 0.5/length division, have to fix up wrong
    394           * edge cases with selects
    395           */
    396          *coord1 = lp_build_add(coord_bld, coord, half);
    397          coord = lp_build_sub(coord_bld, coord, half);
    398          *weight = lp_build_fract(coord_bld, coord);
    399          mask = lp_build_compare(coord_bld->gallivm, coord_bld->type,
    400                                  PIPE_FUNC_LESS, coord, coord_bld->zero);
    401          *coord0 = lp_build_select(coord_bld, mask, length_minus_one, coord);
    402          *coord0 = lp_build_itrunc(coord_bld, *coord0);
    403          mask = lp_build_compare(coord_bld->gallivm, coord_bld->type,
    404                                  PIPE_FUNC_LESS, *coord1, length);
    405          *coord1 = lp_build_select(coord_bld, mask, *coord1, coord_bld->zero);
    406          *coord1 = lp_build_itrunc(coord_bld, *coord1);
    407       }
    408       break;
    409    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
    410       if (bld->static_state->normalized_coords) {
    411          /* mul by tex size */
    412          coord = lp_build_mul(coord_bld, coord, length);
    413       }
    414       /* subtract 0.5 */
    415       if (!force_nearest) {
    416          coord = lp_build_sub(coord_bld, coord, half);
    417       }
    418       /* clamp to [0, length - 1] */
    419       coord = lp_build_min(coord_bld, coord, length_minus_one);
    420       coord = lp_build_max(coord_bld, coord, coord_bld->zero);
    421       *coord1 = lp_build_add(coord_bld, coord, coord_bld->one);
    422       /* convert to int, compute lerp weight */
    423       lp_build_ifloor_fract(coord_bld, coord, coord0, weight);
    424       /* coord1 = min(coord1, length-1) */
    425       *coord1 = lp_build_min(coord_bld, *coord1, length_minus_one);
    426       *coord1 = lp_build_itrunc(coord_bld, *coord1);
    427       break;
    428    default:
    429       assert(0);
    430       *coord0 = int_coord_bld->zero;
    431       *coord1 = int_coord_bld->zero;
    432       *weight = coord_bld->zero;
    433       break;
    434    }
    435    *weight = lp_build_mul_imm(coord_bld, *weight, 256);
    436    *weight = lp_build_itrunc(coord_bld, *weight);
    437    return;
    438 }
    439 
    440 
    441 /**
    442  * Fetch texels for image with nearest sampling.
    443  * Return filtered color as two vectors of 16-bit fixed point values.
    444  */
    445 static void
    446 lp_build_sample_fetch_image_nearest(struct lp_build_sample_context *bld,
    447                                     LLVMValueRef data_ptr,
    448                                     LLVMValueRef offset,
    449                                     LLVMValueRef x_subcoord,
    450                                     LLVMValueRef y_subcoord,
    451                                     LLVMValueRef *colors_lo,
    452                                     LLVMValueRef *colors_hi)
    453 {
    454    /*
    455     * Fetch the pixels as 4 x 32bit (rgba order might differ):
    456     *
    457     *   rgba0 rgba1 rgba2 rgba3
    458     *
    459     * bit cast them into 16 x u8
    460     *
    461     *   r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
    462     *
    463     * unpack them into two 8 x i16:
    464     *
    465     *   r0 g0 b0 a0 r1 g1 b1 a1
    466     *   r2 g2 b2 a2 r3 g3 b3 a3
    467     *
    468     * The higher 8 bits of the resulting elements will be zero.
    469     */
    470    LLVMBuilderRef builder = bld->gallivm->builder;
    471    LLVMValueRef rgba8;
    472    struct lp_build_context h16, u8n;
    473    LLVMTypeRef u8n_vec_type;
    474 
    475    lp_build_context_init(&h16, bld->gallivm, lp_type_ufixed(16, bld->vector_width));
    476    lp_build_context_init(&u8n, bld->gallivm, lp_type_unorm(8, bld->vector_width));
    477    u8n_vec_type = lp_build_vec_type(bld->gallivm, u8n.type);
    478 
    479    if (util_format_is_rgba8_variant(bld->format_desc)) {
    480       /*
    481        * Given the format is a rgba8, just read the pixels as is,
    482        * without any swizzling. Swizzling will be done later.
    483        */
    484       rgba8 = lp_build_gather(bld->gallivm,
    485                               bld->texel_type.length,
    486                               bld->format_desc->block.bits,
    487                               bld->texel_type.width,
    488                               data_ptr, offset);
    489 
    490       rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
    491    }
    492    else {
    493       rgba8 = lp_build_fetch_rgba_aos(bld->gallivm,
    494                                       bld->format_desc,
    495                                       u8n.type,
    496                                       data_ptr, offset,
    497                                       x_subcoord,
    498                                       y_subcoord);
    499    }
    500 
    501    /* Expand one 4*rgba8 to two 2*rgba16 */
    502    lp_build_unpack2(bld->gallivm, u8n.type, h16.type,
    503                     rgba8,
    504                     colors_lo, colors_hi);
    505 }
    506 
    507 
    508 /**
    509  * Sample a single texture image with nearest sampling.
    510  * If sampling a cube texture, r = cube face in [0,5].
    511  * Return filtered color as two vectors of 16-bit fixed point values.
    512  */
    513 static void
    514 lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
    515                               LLVMValueRef int_size,
    516                               LLVMValueRef row_stride_vec,
    517                               LLVMValueRef img_stride_vec,
    518                               LLVMValueRef data_ptr,
    519                               LLVMValueRef s,
    520                               LLVMValueRef t,
    521                               LLVMValueRef r,
    522                               LLVMValueRef *colors_lo,
    523                               LLVMValueRef *colors_hi)
    524 {
    525    const unsigned dims = bld->dims;
    526    LLVMBuilderRef builder = bld->gallivm->builder;
    527    struct lp_build_context i32;
    528    LLVMTypeRef i32_vec_type;
    529    LLVMValueRef i32_c8;
    530    LLVMValueRef width_vec, height_vec, depth_vec;
    531    LLVMValueRef s_ipart, t_ipart = NULL, r_ipart = NULL;
    532    LLVMValueRef s_float, t_float = NULL, r_float = NULL;
    533    LLVMValueRef x_stride;
    534    LLVMValueRef x_offset, offset;
    535    LLVMValueRef x_subcoord, y_subcoord, z_subcoord;
    536 
    537    lp_build_context_init(&i32, bld->gallivm, lp_type_int_vec(32, bld->vector_width));
    538 
    539    i32_vec_type = lp_build_vec_type(bld->gallivm, i32.type);
    540 
    541    lp_build_extract_image_sizes(bld,
    542                                 bld->int_size_type,
    543                                 bld->int_coord_type,
    544                                 int_size,
    545                                 &width_vec,
    546                                 &height_vec,
    547                                 &depth_vec);
    548 
    549    s_float = s; t_float = t; r_float = r;
    550 
    551    if (bld->static_state->normalized_coords) {
    552       LLVMValueRef scaled_size;
    553       LLVMValueRef flt_size;
    554 
    555       /* scale size by 256 (8 fractional bits) */
    556       scaled_size = lp_build_shl_imm(&bld->int_size_bld, int_size, 8);
    557 
    558       flt_size = lp_build_int_to_float(&bld->float_size_bld, scaled_size);
    559 
    560       lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r);
    561    }
    562    else {
    563       /* scale coords by 256 (8 fractional bits) */
    564       s = lp_build_mul_imm(&bld->coord_bld, s, 256);
    565       if (dims >= 2)
    566          t = lp_build_mul_imm(&bld->coord_bld, t, 256);
    567       if (dims >= 3)
    568          r = lp_build_mul_imm(&bld->coord_bld, r, 256);
    569    }
    570 
    571    /* convert float to int */
    572    s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
    573    if (dims >= 2)
    574       t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");
    575    if (dims >= 3)
    576       r = LLVMBuildFPToSI(builder, r, i32_vec_type, "");
    577 
    578    /* compute floor (shift right 8) */
    579    i32_c8 = lp_build_const_int_vec(bld->gallivm, i32.type, 8);
    580    s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
    581    if (dims >= 2)
    582       t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
    583    if (dims >= 3)
    584       r_ipart = LLVMBuildAShr(builder, r, i32_c8, "");
    585 
    586    /* get pixel, row, image strides */
    587    x_stride = lp_build_const_vec(bld->gallivm,
    588                                  bld->int_coord_bld.type,
    589                                  bld->format_desc->block.bits/8);
    590 
    591    /* Do texcoord wrapping, compute texel offset */
    592    lp_build_sample_wrap_nearest_int(bld,
    593                                     bld->format_desc->block.width,
    594                                     s_ipart, s_float,
    595                                     width_vec, x_stride,
    596                                     bld->static_state->pot_width,
    597                                     bld->static_state->wrap_s,
    598                                     &x_offset, &x_subcoord);
    599    offset = x_offset;
    600    if (dims >= 2) {
    601       LLVMValueRef y_offset;
    602       lp_build_sample_wrap_nearest_int(bld,
    603                                        bld->format_desc->block.height,
    604                                        t_ipart, t_float,
    605                                        height_vec, row_stride_vec,
    606                                        bld->static_state->pot_height,
    607                                        bld->static_state->wrap_t,
    608                                        &y_offset, &y_subcoord);
    609       offset = lp_build_add(&bld->int_coord_bld, offset, y_offset);
    610       if (dims >= 3) {
    611          LLVMValueRef z_offset;
    612          lp_build_sample_wrap_nearest_int(bld,
    613                                           1, /* block length (depth) */
    614                                           r_ipart, r_float,
    615                                           depth_vec, img_stride_vec,
    616                                           bld->static_state->pot_depth,
    617                                           bld->static_state->wrap_r,
    618                                           &z_offset, &z_subcoord);
    619          offset = lp_build_add(&bld->int_coord_bld, offset, z_offset);
    620       }
    621       else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
    622          LLVMValueRef z_offset;
    623          /* The r coord is the cube face in [0,5] */
    624          z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec);
    625          offset = lp_build_add(&bld->int_coord_bld, offset, z_offset);
    626       }
    627    }
    628 
    629    lp_build_sample_fetch_image_nearest(bld, data_ptr, offset,
    630                                        x_subcoord, y_subcoord,
    631                                        colors_lo, colors_hi);
    632 }
    633 
    634 
    635 /**
    636  * Sample a single texture image with nearest sampling.
    637  * If sampling a cube texture, r = cube face in [0,5].
    638  * Return filtered color as two vectors of 16-bit fixed point values.
    639  * Does address calcs (except offsets) with floats.
    640  * Useful for AVX which has support for 8x32 floats but not 8x32 ints.
    641  */
    642 static void
    643 lp_build_sample_image_nearest_afloat(struct lp_build_sample_context *bld,
    644                                      LLVMValueRef int_size,
    645                                      LLVMValueRef row_stride_vec,
    646                                      LLVMValueRef img_stride_vec,
    647                                      LLVMValueRef data_ptr,
    648                                      LLVMValueRef s,
    649                                      LLVMValueRef t,
    650                                      LLVMValueRef r,
    651                                      LLVMValueRef *colors_lo,
    652                                      LLVMValueRef *colors_hi)
    653    {
    654    const unsigned dims = bld->dims;
    655    LLVMValueRef width_vec, height_vec, depth_vec;
    656    LLVMValueRef offset;
    657    LLVMValueRef x_subcoord, y_subcoord;
    658    LLVMValueRef x_icoord = NULL, y_icoord = NULL, z_icoord = NULL;
    659    LLVMValueRef flt_size;
    660 
    661    flt_size = lp_build_int_to_float(&bld->float_size_bld, int_size);
    662 
    663    lp_build_extract_image_sizes(bld,
    664                                 bld->float_size_type,
    665                                 bld->coord_type,
    666                                 flt_size,
    667                                 &width_vec,
    668                                 &height_vec,
    669                                 &depth_vec);
    670 
    671    /* Do texcoord wrapping */
    672    lp_build_sample_wrap_nearest_float(bld,
    673                                       s, width_vec,
    674                                       bld->static_state->pot_width,
    675                                       bld->static_state->wrap_s,
    676                                       &x_icoord);
    677 
    678    if (dims >= 2) {
    679       lp_build_sample_wrap_nearest_float(bld,
    680                                          t, height_vec,
    681                                          bld->static_state->pot_height,
    682                                          bld->static_state->wrap_t,
    683                                          &y_icoord);
    684 
    685       if (dims >= 3) {
    686          lp_build_sample_wrap_nearest_float(bld,
    687                                             r, depth_vec,
    688                                             bld->static_state->pot_depth,
    689                                             bld->static_state->wrap_r,
    690                                             &z_icoord);
    691       }
    692       else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
    693          z_icoord = r;
    694       }
    695    }
    696 
    697    /*
    698     * From here on we deal with ints, and we should split up the 256bit
    699     * vectors manually for better generated code.
    700     */
    701 
    702    /*
    703     * compute texel offsets -
    704     * cannot do offset calc with floats, difficult for block-based formats,
    705     * and not enough precision anyway.
    706     */
    707    lp_build_sample_offset(&bld->int_coord_bld,
    708                           bld->format_desc,
    709                           x_icoord, y_icoord,
    710                           z_icoord,
    711                           row_stride_vec, img_stride_vec,
    712                           &offset,
    713                           &x_subcoord, &y_subcoord);
    714 
    715    lp_build_sample_fetch_image_nearest(bld, data_ptr, offset,
    716                                        x_subcoord, y_subcoord,
    717                                        colors_lo, colors_hi);
    718 }
    719 
    720 
    721 /**
    722  * Fetch texels for image with linear sampling.
    723  * Return filtered color as two vectors of 16-bit fixed point values.
    724  */
    725 static void
    726 lp_build_sample_fetch_image_linear(struct lp_build_sample_context *bld,
    727                                    LLVMValueRef data_ptr,
    728                                    LLVMValueRef offset[2][2][2],
    729                                    LLVMValueRef x_subcoord[2],
    730                                    LLVMValueRef y_subcoord[2],
    731                                    LLVMValueRef s_fpart,
    732                                    LLVMValueRef t_fpart,
    733                                    LLVMValueRef r_fpart,
    734                                    LLVMValueRef *colors_lo,
    735                                    LLVMValueRef *colors_hi)
    736 {
    737    const unsigned dims = bld->dims;
    738    LLVMBuilderRef builder = bld->gallivm->builder;
    739    struct lp_build_context h16, u8n;
    740    LLVMTypeRef h16_vec_type, u8n_vec_type;
    741    LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context);
    742    LLVMValueRef shuffles_lo[LP_MAX_VECTOR_LENGTH];
    743    LLVMValueRef shuffles_hi[LP_MAX_VECTOR_LENGTH];
    744    LLVMValueRef shuffle_lo, shuffle_hi;
    745    LLVMValueRef s_fpart_lo, s_fpart_hi;
    746    LLVMValueRef t_fpart_lo = NULL, t_fpart_hi = NULL;
    747    LLVMValueRef r_fpart_lo = NULL, r_fpart_hi = NULL;
    748    LLVMValueRef neighbors_lo[2][2][2]; /* [z][y][x] */
    749    LLVMValueRef neighbors_hi[2][2][2]; /* [z][y][x] */
    750    LLVMValueRef packed_lo, packed_hi;
    751    unsigned i, j, k;
    752    unsigned numj, numk;
    753 
    754    lp_build_context_init(&h16, bld->gallivm, lp_type_ufixed(16, bld->vector_width));
    755    lp_build_context_init(&u8n, bld->gallivm, lp_type_unorm(8, bld->vector_width));
    756    h16_vec_type = lp_build_vec_type(bld->gallivm, h16.type);
    757    u8n_vec_type = lp_build_vec_type(bld->gallivm, u8n.type);
    758 
    759    /*
    760     * Transform 4 x i32 in
    761     *
    762     *   s_fpart = {s0, s1, s2, s3}
    763     *
    764     * into 8 x i16
    765     *
    766     *   s_fpart = {00, s0, 00, s1, 00, s2, 00, s3}
    767     *
    768     * into two 8 x i16
    769     *
    770     *   s_fpart_lo = {s0, s0, s0, s0, s1, s1, s1, s1}
    771     *   s_fpart_hi = {s2, s2, s2, s2, s3, s3, s3, s3}
    772     *
    773     * and likewise for t_fpart. There is no risk of loosing precision here
    774     * since the fractional parts only use the lower 8bits.
    775     */
    776    s_fpart = LLVMBuildBitCast(builder, s_fpart, h16_vec_type, "");
    777    if (dims >= 2)
    778       t_fpart = LLVMBuildBitCast(builder, t_fpart, h16_vec_type, "");
    779    if (dims >= 3)
    780       r_fpart = LLVMBuildBitCast(builder, r_fpart, h16_vec_type, "");
    781 
    782    for (j = 0; j < h16.type.length; j += 4) {
    783 #ifdef PIPE_ARCH_LITTLE_ENDIAN
    784       unsigned subindex = 0;
    785 #else
    786       unsigned subindex = 1;
    787 #endif
    788       LLVMValueRef index;
    789 
    790       index = LLVMConstInt(elem_type, j/2 + subindex, 0);
    791       for (i = 0; i < 4; ++i)
    792          shuffles_lo[j + i] = index;
    793 
    794       index = LLVMConstInt(elem_type, h16.type.length/2 + j/2 + subindex, 0);
    795       for (i = 0; i < 4; ++i)
    796          shuffles_hi[j + i] = index;
    797    }
    798 
    799    shuffle_lo = LLVMConstVector(shuffles_lo, h16.type.length);
    800    shuffle_hi = LLVMConstVector(shuffles_hi, h16.type.length);
    801 
    802    s_fpart_lo = LLVMBuildShuffleVector(builder, s_fpart, h16.undef,
    803                                        shuffle_lo, "");
    804    s_fpart_hi = LLVMBuildShuffleVector(builder, s_fpart, h16.undef,
    805                                        shuffle_hi, "");
    806    if (dims >= 2) {
    807       t_fpart_lo = LLVMBuildShuffleVector(builder, t_fpart, h16.undef,
    808                                           shuffle_lo, "");
    809       t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef,
    810                                           shuffle_hi, "");
    811    }
    812    if (dims >= 3) {
    813       r_fpart_lo = LLVMBuildShuffleVector(builder, r_fpart, h16.undef,
    814                                           shuffle_lo, "");
    815       r_fpart_hi = LLVMBuildShuffleVector(builder, r_fpart, h16.undef,
    816                                           shuffle_hi, "");
    817    }
    818 
    819    /*
    820     * Fetch the pixels as 4 x 32bit (rgba order might differ):
    821     *
    822     *   rgba0 rgba1 rgba2 rgba3
    823     *
    824     * bit cast them into 16 x u8
    825     *
    826     *   r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
    827     *
    828     * unpack them into two 8 x i16:
    829     *
    830     *   r0 g0 b0 a0 r1 g1 b1 a1
    831     *   r2 g2 b2 a2 r3 g3 b3 a3
    832     *
    833     * The higher 8 bits of the resulting elements will be zero.
    834     */
    835    numj = 1 + (dims >= 2);
    836    numk = 1 + (dims >= 3);
    837 
    838    for (k = 0; k < numk; k++) {
    839       for (j = 0; j < numj; j++) {
    840          for (i = 0; i < 2; i++) {
    841             LLVMValueRef rgba8;
    842 
    843             if (util_format_is_rgba8_variant(bld->format_desc)) {
    844                /*
    845                 * Given the format is a rgba8, just read the pixels as is,
    846                 * without any swizzling. Swizzling will be done later.
    847                 */
    848                rgba8 = lp_build_gather(bld->gallivm,
    849                                        bld->texel_type.length,
    850                                        bld->format_desc->block.bits,
    851                                        bld->texel_type.width,
    852                                        data_ptr, offset[k][j][i]);
    853 
    854                rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
    855             }
    856             else {
    857                rgba8 = lp_build_fetch_rgba_aos(bld->gallivm,
    858                                                bld->format_desc,
    859                                                u8n.type,
    860                                                data_ptr, offset[k][j][i],
    861                                                x_subcoord[i],
    862                                                y_subcoord[j]);
    863             }
    864 
    865             /* Expand one 4*rgba8 to two 2*rgba16 */
    866             lp_build_unpack2(bld->gallivm, u8n.type, h16.type,
    867                              rgba8,
    868                              &neighbors_lo[k][j][i], &neighbors_hi[k][j][i]);
    869          }
    870       }
    871    }
    872 
    873    /*
    874     * Linear interpolation with 8.8 fixed point.
    875     */
    876    if (bld->static_state->force_nearest_s) {
    877       /* special case 1-D lerp */
    878       packed_lo = lp_build_lerp(&h16,
    879                                 t_fpart_lo,
    880                                 neighbors_lo[0][0][0],
    881                                 neighbors_lo[0][0][1]);
    882 
    883       packed_hi = lp_build_lerp(&h16,
    884                                 t_fpart_hi,
    885                                 neighbors_hi[0][1][0],
    886                                 neighbors_hi[0][1][0]);
    887    }
    888    else if (bld->static_state->force_nearest_t) {
    889       /* special case 1-D lerp */
    890       packed_lo = lp_build_lerp(&h16,
    891                                 s_fpart_lo,
    892                                 neighbors_lo[0][0][0],
    893                                 neighbors_lo[0][0][1]);
    894 
    895       packed_hi = lp_build_lerp(&h16,
    896                                 s_fpart_hi,
    897                                 neighbors_hi[0][0][0],
    898                                 neighbors_hi[0][0][1]);
    899    }
    900    else {
    901       /* general 1/2/3-D lerping */
    902       if (dims == 1) {
    903          packed_lo = lp_build_lerp(&h16,
    904                                    s_fpart_lo,
    905                                    neighbors_lo[0][0][0],
    906                                    neighbors_lo[0][0][1]);
    907 
    908          packed_hi = lp_build_lerp(&h16,
    909                                    s_fpart_hi,
    910                                    neighbors_hi[0][0][0],
    911                                    neighbors_hi[0][0][1]);
    912       }
    913       else {
    914          /* 2-D lerp */
    915          packed_lo = lp_build_lerp_2d(&h16,
    916                                       s_fpart_lo, t_fpart_lo,
    917                                       neighbors_lo[0][0][0],
    918                                       neighbors_lo[0][0][1],
    919                                       neighbors_lo[0][1][0],
    920                                       neighbors_lo[0][1][1]);
    921 
    922          packed_hi = lp_build_lerp_2d(&h16,
    923                                       s_fpart_hi, t_fpart_hi,
    924                                       neighbors_hi[0][0][0],
    925                                       neighbors_hi[0][0][1],
    926                                       neighbors_hi[0][1][0],
    927                                       neighbors_hi[0][1][1]);
    928 
    929          if (dims >= 3) {
    930             LLVMValueRef packed_lo2, packed_hi2;
    931 
    932             /* lerp in the second z slice */
    933             packed_lo2 = lp_build_lerp_2d(&h16,
    934                                           s_fpart_lo, t_fpart_lo,
    935                                           neighbors_lo[1][0][0],
    936                                           neighbors_lo[1][0][1],
    937                                           neighbors_lo[1][1][0],
    938                                           neighbors_lo[1][1][1]);
    939 
    940             packed_hi2 = lp_build_lerp_2d(&h16,
    941                                           s_fpart_hi, t_fpart_hi,
    942                                           neighbors_hi[1][0][0],
    943                                           neighbors_hi[1][0][1],
    944                                           neighbors_hi[1][1][0],
    945                                           neighbors_hi[1][1][1]);
    946             /* interp between two z slices */
    947             packed_lo = lp_build_lerp(&h16, r_fpart_lo,
    948                                       packed_lo, packed_lo2);
    949             packed_hi = lp_build_lerp(&h16, r_fpart_hi,
    950                                       packed_hi, packed_hi2);
    951          }
    952       }
    953    }
    954 
    955    *colors_lo = packed_lo;
    956    *colors_hi = packed_hi;
    957 }
    958 
    959 /**
    960  * Sample a single texture image with (bi-)(tri-)linear sampling.
    961  * Return filtered color as two vectors of 16-bit fixed point values.
    962  */
    963 static void
    964 lp_build_sample_image_linear(struct lp_build_sample_context *bld,
    965                              LLVMValueRef int_size,
    966                              LLVMValueRef row_stride_vec,
    967                              LLVMValueRef img_stride_vec,
    968                              LLVMValueRef data_ptr,
    969                              LLVMValueRef s,
    970                              LLVMValueRef t,
    971                              LLVMValueRef r,
    972                              LLVMValueRef *colors_lo,
    973                              LLVMValueRef *colors_hi)
    974 {
    975    const unsigned dims = bld->dims;
    976    LLVMBuilderRef builder = bld->gallivm->builder;
    977    struct lp_build_context i32;
    978    LLVMTypeRef i32_vec_type;
    979    LLVMValueRef i32_c8, i32_c128, i32_c255;
    980    LLVMValueRef width_vec, height_vec, depth_vec;
    981    LLVMValueRef s_ipart, s_fpart, s_float;
    982    LLVMValueRef t_ipart = NULL, t_fpart = NULL, t_float = NULL;
    983    LLVMValueRef r_ipart = NULL, r_fpart = NULL, r_float = NULL;
    984    LLVMValueRef x_stride, y_stride, z_stride;
    985    LLVMValueRef x_offset0, x_offset1;
    986    LLVMValueRef y_offset0, y_offset1;
    987    LLVMValueRef z_offset0, z_offset1;
    988    LLVMValueRef offset[2][2][2]; /* [z][y][x] */
    989    LLVMValueRef x_subcoord[2], y_subcoord[2], z_subcoord[2];
    990    unsigned x, y, z;
    991 
    992    lp_build_context_init(&i32, bld->gallivm, lp_type_int_vec(32, bld->vector_width));
    993 
    994    i32_vec_type = lp_build_vec_type(bld->gallivm, i32.type);
    995 
    996    lp_build_extract_image_sizes(bld,
    997                                 bld->int_size_type,
    998                                 bld->int_coord_type,
    999                                 int_size,
   1000                                 &width_vec,
   1001                                 &height_vec,
   1002                                 &depth_vec);
   1003 
   1004    s_float = s; t_float = t; r_float = r;
   1005 
   1006    if (bld->static_state->normalized_coords) {
   1007       LLVMValueRef scaled_size;
   1008       LLVMValueRef flt_size;
   1009 
   1010       /* scale size by 256 (8 fractional bits) */
   1011       scaled_size = lp_build_shl_imm(&bld->int_size_bld, int_size, 8);
   1012 
   1013       flt_size = lp_build_int_to_float(&bld->float_size_bld, scaled_size);
   1014 
   1015       lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r);
   1016    }
   1017    else {
   1018       /* scale coords by 256 (8 fractional bits) */
   1019       s = lp_build_mul_imm(&bld->coord_bld, s, 256);
   1020       if (dims >= 2)
   1021          t = lp_build_mul_imm(&bld->coord_bld, t, 256);
   1022       if (dims >= 3)
   1023          r = lp_build_mul_imm(&bld->coord_bld, r, 256);
   1024    }
   1025 
   1026    /* convert float to int */
   1027    s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
   1028    if (dims >= 2)
   1029       t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");
   1030    if (dims >= 3)
   1031       r = LLVMBuildFPToSI(builder, r, i32_vec_type, "");
   1032 
   1033    /* subtract 0.5 (add -128) */
   1034    i32_c128 = lp_build_const_int_vec(bld->gallivm, i32.type, -128);
   1035    if (!bld->static_state->force_nearest_s) {
   1036       s = LLVMBuildAdd(builder, s, i32_c128, "");
   1037    }
   1038    if (dims >= 2 && !bld->static_state->force_nearest_t) {
   1039       t = LLVMBuildAdd(builder, t, i32_c128, "");
   1040    }
   1041    if (dims >= 3) {
   1042       r = LLVMBuildAdd(builder, r, i32_c128, "");
   1043    }
   1044 
   1045    /* compute floor (shift right 8) */
   1046    i32_c8 = lp_build_const_int_vec(bld->gallivm, i32.type, 8);
   1047    s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
   1048    if (dims >= 2)
   1049       t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
   1050    if (dims >= 3)
   1051       r_ipart = LLVMBuildAShr(builder, r, i32_c8, "");
   1052 
   1053    /* compute fractional part (AND with 0xff) */
   1054    i32_c255 = lp_build_const_int_vec(bld->gallivm, i32.type, 255);
   1055    s_fpart = LLVMBuildAnd(builder, s, i32_c255, "");
   1056    if (dims >= 2)
   1057       t_fpart = LLVMBuildAnd(builder, t, i32_c255, "");
   1058    if (dims >= 3)
   1059       r_fpart = LLVMBuildAnd(builder, r, i32_c255, "");
   1060 
   1061    /* get pixel, row and image strides */
   1062    x_stride = lp_build_const_vec(bld->gallivm, bld->int_coord_bld.type,
   1063                                  bld->format_desc->block.bits/8);
   1064    y_stride = row_stride_vec;
   1065    z_stride = img_stride_vec;
   1066 
   1067    /* do texcoord wrapping and compute texel offsets */
   1068    lp_build_sample_wrap_linear_int(bld,
   1069                                    bld->format_desc->block.width,
   1070                                    s_ipart, &s_fpart, s_float,
   1071                                    width_vec, x_stride,
   1072                                    bld->static_state->pot_width,
   1073                                    bld->static_state->wrap_s,
   1074                                    &x_offset0, &x_offset1,
   1075                                    &x_subcoord[0], &x_subcoord[1]);
   1076    for (z = 0; z < 2; z++) {
   1077       for (y = 0; y < 2; y++) {
   1078          offset[z][y][0] = x_offset0;
   1079          offset[z][y][1] = x_offset1;
   1080       }
   1081    }
   1082 
   1083    if (dims >= 2) {
   1084       lp_build_sample_wrap_linear_int(bld,
   1085                                       bld->format_desc->block.height,
   1086                                       t_ipart, &t_fpart, t_float,
   1087                                       height_vec, y_stride,
   1088                                       bld->static_state->pot_height,
   1089                                       bld->static_state->wrap_t,
   1090                                       &y_offset0, &y_offset1,
   1091                                       &y_subcoord[0], &y_subcoord[1]);
   1092 
   1093       for (z = 0; z < 2; z++) {
   1094          for (x = 0; x < 2; x++) {
   1095             offset[z][0][x] = lp_build_add(&bld->int_coord_bld,
   1096                                            offset[z][0][x], y_offset0);
   1097             offset[z][1][x] = lp_build_add(&bld->int_coord_bld,
   1098                                            offset[z][1][x], y_offset1);
   1099          }
   1100       }
   1101    }
   1102 
   1103    if (dims >= 3) {
   1104       lp_build_sample_wrap_linear_int(bld,
   1105                                       bld->format_desc->block.height,
   1106                                       r_ipart, &r_fpart, r_float,
   1107                                       depth_vec, z_stride,
   1108                                       bld->static_state->pot_depth,
   1109                                       bld->static_state->wrap_r,
   1110                                       &z_offset0, &z_offset1,
   1111                                       &z_subcoord[0], &z_subcoord[1]);
   1112       for (y = 0; y < 2; y++) {
   1113          for (x = 0; x < 2; x++) {
   1114             offset[0][y][x] = lp_build_add(&bld->int_coord_bld,
   1115                                            offset[0][y][x], z_offset0);
   1116             offset[1][y][x] = lp_build_add(&bld->int_coord_bld,
   1117                                            offset[1][y][x], z_offset1);
   1118          }
   1119       }
   1120    }
   1121    else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
   1122       LLVMValueRef z_offset;
   1123       z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec);
   1124       for (y = 0; y < 2; y++) {
   1125          for (x = 0; x < 2; x++) {
   1126             /* The r coord is the cube face in [0,5] */
   1127             offset[0][y][x] = lp_build_add(&bld->int_coord_bld,
   1128                                            offset[0][y][x], z_offset);
   1129          }
   1130       }
   1131    }
   1132 
   1133    lp_build_sample_fetch_image_linear(bld, data_ptr, offset,
   1134                                       x_subcoord, y_subcoord,
   1135                                       s_fpart, t_fpart, r_fpart,
   1136                                       colors_lo, colors_hi);
   1137 }
   1138 
   1139 
   1140 /**
   1141  * Sample a single texture image with (bi-)(tri-)linear sampling.
   1142  * Return filtered color as two vectors of 16-bit fixed point values.
   1143  * Does address calcs (except offsets) with floats.
   1144  * Useful for AVX which has support for 8x32 floats but not 8x32 ints.
   1145  */
   1146 static void
   1147 lp_build_sample_image_linear_afloat(struct lp_build_sample_context *bld,
   1148                                     LLVMValueRef int_size,
   1149                                     LLVMValueRef row_stride_vec,
   1150                                     LLVMValueRef img_stride_vec,
   1151                                     LLVMValueRef data_ptr,
   1152                                     LLVMValueRef s,
   1153                                     LLVMValueRef t,
   1154                                     LLVMValueRef r,
   1155                                     LLVMValueRef *colors_lo,
   1156                                     LLVMValueRef *colors_hi)
   1157 {
   1158    const unsigned dims = bld->dims;
   1159    LLVMValueRef width_vec, height_vec, depth_vec;
   1160    LLVMValueRef s_fpart;
   1161    LLVMValueRef t_fpart = NULL;
   1162    LLVMValueRef r_fpart = NULL;
   1163    LLVMValueRef x_stride, y_stride, z_stride;
   1164    LLVMValueRef x_offset0, x_offset1;
   1165    LLVMValueRef y_offset0, y_offset1;
   1166    LLVMValueRef z_offset0, z_offset1;
   1167    LLVMValueRef offset[2][2][2]; /* [z][y][x] */
   1168    LLVMValueRef x_subcoord[2], y_subcoord[2];
   1169    LLVMValueRef flt_size;
   1170    LLVMValueRef x_icoord0, x_icoord1;
   1171    LLVMValueRef y_icoord0, y_icoord1;
   1172    LLVMValueRef z_icoord0, z_icoord1;
   1173    unsigned x, y, z;
   1174 
   1175    flt_size = lp_build_int_to_float(&bld->float_size_bld, int_size);
   1176 
   1177    lp_build_extract_image_sizes(bld,
   1178                                 bld->float_size_type,
   1179                                 bld->coord_type,
   1180                                 flt_size,
   1181                                 &width_vec,
   1182                                 &height_vec,
   1183                                 &depth_vec);
   1184 
   1185    /* do texcoord wrapping and compute texel offsets */
   1186    lp_build_sample_wrap_linear_float(bld,
   1187                                      bld->format_desc->block.width,
   1188                                      s, width_vec,
   1189                                      bld->static_state->pot_width,
   1190                                      bld->static_state->wrap_s,
   1191                                      &x_icoord0, &x_icoord1,
   1192                                      &s_fpart,
   1193                                      bld->static_state->force_nearest_s);
   1194 
   1195    if (dims >= 2) {
   1196       lp_build_sample_wrap_linear_float(bld,
   1197                                         bld->format_desc->block.height,
   1198                                         t, height_vec,
   1199                                         bld->static_state->pot_height,
   1200                                         bld->static_state->wrap_t,
   1201                                         &y_icoord0, &y_icoord1,
   1202                                         &t_fpart,
   1203                                         bld->static_state->force_nearest_t);
   1204 
   1205       if (dims >= 3) {
   1206          lp_build_sample_wrap_linear_float(bld,
   1207                                            bld->format_desc->block.height,
   1208                                            r, depth_vec,
   1209                                            bld->static_state->pot_depth,
   1210                                            bld->static_state->wrap_r,
   1211                                            &z_icoord0, &z_icoord1,
   1212                                            &r_fpart, 0);
   1213       }
   1214    }
   1215 
   1216    /*
   1217     * From here on we deal with ints, and we should split up the 256bit
   1218     * vectors manually for better generated code.
   1219     */
   1220 
   1221    /* get pixel, row and image strides */
   1222    x_stride = lp_build_const_vec(bld->gallivm,
   1223                                  bld->int_coord_bld.type,
   1224                                  bld->format_desc->block.bits/8);
   1225    y_stride = row_stride_vec;
   1226    z_stride = img_stride_vec;
   1227 
   1228    /*
   1229     * compute texel offset -
   1230     * cannot do offset calc with floats, difficult for block-based formats,
   1231     * and not enough precision anyway.
   1232     */
   1233    lp_build_sample_partial_offset(&bld->int_coord_bld,
   1234                                   bld->format_desc->block.width,
   1235                                   x_icoord0, x_stride,
   1236                                   &x_offset0, &x_subcoord[0]);
   1237    lp_build_sample_partial_offset(&bld->int_coord_bld,
   1238                                   bld->format_desc->block.width,
   1239                                   x_icoord1, x_stride,
   1240                                   &x_offset1, &x_subcoord[1]);
   1241    for (z = 0; z < 2; z++) {
   1242       for (y = 0; y < 2; y++) {
   1243          offset[z][y][0] = x_offset0;
   1244          offset[z][y][1] = x_offset1;
   1245       }
   1246    }
   1247 
   1248    if (dims >= 2) {
   1249       lp_build_sample_partial_offset(&bld->int_coord_bld,
   1250                                      bld->format_desc->block.height,
   1251                                      y_icoord0, y_stride,
   1252                                      &y_offset0, &y_subcoord[0]);
   1253       lp_build_sample_partial_offset(&bld->int_coord_bld,
   1254                                      bld->format_desc->block.height,
   1255                                      y_icoord1, y_stride,
   1256                                      &y_offset1, &y_subcoord[1]);
   1257       for (z = 0; z < 2; z++) {
   1258          for (x = 0; x < 2; x++) {
   1259             offset[z][0][x] = lp_build_add(&bld->int_coord_bld,
   1260                                            offset[z][0][x], y_offset0);
   1261             offset[z][1][x] = lp_build_add(&bld->int_coord_bld,
   1262                                            offset[z][1][x], y_offset1);
   1263          }
   1264       }
   1265    }
   1266 
   1267    if (dims >= 3) {
   1268       LLVMValueRef z_subcoord[2];
   1269       lp_build_sample_partial_offset(&bld->int_coord_bld,
   1270                                      1,
   1271                                      z_icoord0, z_stride,
   1272                                      &z_offset0, &z_subcoord[0]);
   1273       lp_build_sample_partial_offset(&bld->int_coord_bld,
   1274                                      1,
   1275                                      z_icoord1, z_stride,
   1276                                      &z_offset1, &z_subcoord[1]);
   1277       for (y = 0; y < 2; y++) {
   1278          for (x = 0; x < 2; x++) {
   1279             offset[0][y][x] = lp_build_add(&bld->int_coord_bld,
   1280                                            offset[0][y][x], z_offset0);
   1281             offset[1][y][x] = lp_build_add(&bld->int_coord_bld,
   1282                                            offset[1][y][x], z_offset1);
   1283          }
   1284       }
   1285    }
   1286    else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
   1287       LLVMValueRef z_offset;
   1288       z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec);
   1289       for (y = 0; y < 2; y++) {
   1290          for (x = 0; x < 2; x++) {
   1291             /* The r coord is the cube face in [0,5] */
   1292             offset[0][y][x] = lp_build_add(&bld->int_coord_bld,
   1293                                            offset[0][y][x], z_offset);
   1294          }
   1295       }
   1296    }
   1297 
   1298    lp_build_sample_fetch_image_linear(bld, data_ptr, offset,
   1299                                       x_subcoord, y_subcoord,
   1300                                       s_fpart, t_fpart, r_fpart,
   1301                                       colors_lo, colors_hi);
   1302 }
   1303 
   1304 
   1305 /**
   1306  * Sample the texture/mipmap using given image filter and mip filter.
   1307  * data0_ptr and data1_ptr point to the two mipmap levels to sample
   1308  * from.  width0/1_vec, height0/1_vec, depth0/1_vec indicate their sizes.
   1309  * If we're using nearest miplevel sampling the '1' values will be null/unused.
   1310  */
   1311 static void
   1312 lp_build_sample_mipmap(struct lp_build_sample_context *bld,
   1313                        unsigned img_filter,
   1314                        unsigned mip_filter,
   1315                        LLVMValueRef s,
   1316                        LLVMValueRef t,
   1317                        LLVMValueRef r,
   1318                        LLVMValueRef ilevel0,
   1319                        LLVMValueRef ilevel1,
   1320                        LLVMValueRef lod_fpart,
   1321                        LLVMValueRef colors_lo_var,
   1322                        LLVMValueRef colors_hi_var)
   1323 {
   1324    LLVMBuilderRef builder = bld->gallivm->builder;
   1325    LLVMValueRef size0;
   1326    LLVMValueRef size1;
   1327    LLVMValueRef row_stride0_vec = NULL;
   1328    LLVMValueRef row_stride1_vec = NULL;
   1329    LLVMValueRef img_stride0_vec = NULL;
   1330    LLVMValueRef img_stride1_vec = NULL;
   1331    LLVMValueRef data_ptr0;
   1332    LLVMValueRef data_ptr1;
   1333    LLVMValueRef colors0_lo, colors0_hi;
   1334    LLVMValueRef colors1_lo, colors1_hi;
   1335 
   1336    /* sample the first mipmap level */
   1337    lp_build_mipmap_level_sizes(bld, ilevel0,
   1338                                &size0,
   1339                                &row_stride0_vec, &img_stride0_vec);
   1340    data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
   1341    if (util_cpu_caps.has_avx && bld->coord_type.length > 4) {
   1342       if (img_filter == PIPE_TEX_FILTER_NEAREST) {
   1343          lp_build_sample_image_nearest_afloat(bld,
   1344                                               size0,
   1345                                               row_stride0_vec, img_stride0_vec,
   1346                                               data_ptr0, s, t, r,
   1347                                               &colors0_lo, &colors0_hi);
   1348       }
   1349       else {
   1350          assert(img_filter == PIPE_TEX_FILTER_LINEAR);
   1351          lp_build_sample_image_linear_afloat(bld,
   1352                                              size0,
   1353                                              row_stride0_vec, img_stride0_vec,
   1354                                              data_ptr0, s, t, r,
   1355                                              &colors0_lo, &colors0_hi);
   1356       }
   1357    }
   1358    else {
   1359       if (img_filter == PIPE_TEX_FILTER_NEAREST) {
   1360          lp_build_sample_image_nearest(bld,
   1361                                        size0,
   1362                                        row_stride0_vec, img_stride0_vec,
   1363                                        data_ptr0, s, t, r,
   1364                                        &colors0_lo, &colors0_hi);
   1365       }
   1366       else {
   1367          assert(img_filter == PIPE_TEX_FILTER_LINEAR);
   1368          lp_build_sample_image_linear(bld,
   1369                                       size0,
   1370                                       row_stride0_vec, img_stride0_vec,
   1371                                       data_ptr0, s, t, r,
   1372                                       &colors0_lo, &colors0_hi);
   1373       }
   1374    }
   1375 
   1376    /* Store the first level's colors in the output variables */
   1377    LLVMBuildStore(builder, colors0_lo, colors_lo_var);
   1378    LLVMBuildStore(builder, colors0_hi, colors_hi_var);
   1379 
   1380    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
   1381       LLVMValueRef h16vec_scale = lp_build_const_vec(bld->gallivm,
   1382                                                      bld->perquadf_bld.type, 256.0);
   1383       LLVMTypeRef i32vec_type = lp_build_vec_type(bld->gallivm, bld->perquadi_bld.type);
   1384       struct lp_build_if_state if_ctx;
   1385       LLVMValueRef need_lerp;
   1386       unsigned num_quads = bld->coord_bld.type.length / 4;
   1387       unsigned i;
   1388 
   1389       lod_fpart = LLVMBuildFMul(builder, lod_fpart, h16vec_scale, "");
   1390       lod_fpart = LLVMBuildFPToSI(builder, lod_fpart, i32vec_type, "lod_fpart.fixed16");
   1391 
   1392       /* need_lerp = lod_fpart > 0 */
   1393       if (num_quads == 1) {
   1394          need_lerp = LLVMBuildICmp(builder, LLVMIntSGT,
   1395                                    lod_fpart, bld->perquadi_bld.zero,
   1396                                    "need_lerp");
   1397       }
   1398       else {
   1399          /*
   1400           * We'll do mip filtering if any of the quads need it.
   1401           * It might be better to split the vectors here and only fetch/filter
   1402           * quads which need it.
   1403           */
   1404          /*
   1405           * We need to clamp lod_fpart here since we can get negative
   1406           * values which would screw up filtering if not all
   1407           * lod_fpart values have same sign.
   1408           * We can however then skip the greater than comparison.
   1409           */
   1410          lod_fpart = lp_build_max(&bld->perquadi_bld, lod_fpart,
   1411                                   bld->perquadi_bld.zero);
   1412          need_lerp = lp_build_any_true_range(&bld->perquadi_bld, num_quads, lod_fpart);
   1413       }
   1414 
   1415       lp_build_if(&if_ctx, bld->gallivm, need_lerp);
   1416       {
   1417          struct lp_build_context h16_bld;
   1418 
   1419          lp_build_context_init(&h16_bld, bld->gallivm, lp_type_ufixed(16, bld->vector_width));
   1420 
   1421          /* sample the second mipmap level */
   1422          lp_build_mipmap_level_sizes(bld, ilevel1,
   1423                                      &size1,
   1424                                      &row_stride1_vec, &img_stride1_vec);
   1425          data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
   1426 
   1427          if (util_cpu_caps.has_avx && bld->coord_type.length > 4) {
   1428             if (img_filter == PIPE_TEX_FILTER_NEAREST) {
   1429                lp_build_sample_image_nearest_afloat(bld,
   1430                                                     size1,
   1431                                                     row_stride1_vec, img_stride1_vec,
   1432                                                     data_ptr1, s, t, r,
   1433                                                     &colors1_lo, &colors1_hi);
   1434             }
   1435             else {
   1436                lp_build_sample_image_linear_afloat(bld,
   1437                                                    size1,
   1438                                                    row_stride1_vec, img_stride1_vec,
   1439                                                    data_ptr1, s, t, r,
   1440                                                    &colors1_lo, &colors1_hi);
   1441             }
   1442          }
   1443          else {
   1444             if (img_filter == PIPE_TEX_FILTER_NEAREST) {
   1445                lp_build_sample_image_nearest(bld,
   1446                                              size1,
   1447                                              row_stride1_vec, img_stride1_vec,
   1448                                              data_ptr1, s, t, r,
   1449                                              &colors1_lo, &colors1_hi);
   1450             }
   1451             else {
   1452                lp_build_sample_image_linear(bld,
   1453                                             size1,
   1454                                             row_stride1_vec, img_stride1_vec,
   1455                                             data_ptr1, s, t, r,
   1456                                             &colors1_lo, &colors1_hi);
   1457             }
   1458          }
   1459 
   1460          /* interpolate samples from the two mipmap levels */
   1461 
   1462          if (num_quads == 1) {
   1463             lod_fpart = LLVMBuildTrunc(builder, lod_fpart, h16_bld.elem_type, "");
   1464             lod_fpart = lp_build_broadcast_scalar(&h16_bld, lod_fpart);
   1465 
   1466 #if HAVE_LLVM == 0x208
   1467             /* This is a work-around for a bug in LLVM 2.8.
   1468              * Evidently, something goes wrong in the construction of the
   1469              * lod_fpart short[8] vector.  Adding this no-effect shuffle seems
   1470              * to force the vector to be properly constructed.
   1471              * Tested with mesa-demos/src/tests/mipmap_limits.c (press t, f).
   1472              */
   1473             {
   1474                LLVMValueRef shuffles[8], shuffle;
   1475                assert(h16_bld.type.length <= Elements(shuffles));
   1476                for (i = 0; i < h16_bld.type.length; i++)
   1477                   shuffles[i] = lp_build_const_int32(bld->gallivm, 2 * (i & 1));
   1478                shuffle = LLVMConstVector(shuffles, h16_bld.type.length);
   1479                lod_fpart = LLVMBuildShuffleVector(builder,
   1480                                                   lod_fpart, lod_fpart,
   1481                                                   shuffle, "");
   1482             }
   1483 #endif
   1484 
   1485             colors0_lo = lp_build_lerp(&h16_bld, lod_fpart,
   1486                                        colors0_lo, colors1_lo);
   1487             colors0_hi = lp_build_lerp(&h16_bld, lod_fpart,
   1488                                        colors0_hi, colors1_hi);
   1489          }
   1490          else {
   1491             LLVMValueRef lod_parts[LP_MAX_VECTOR_LENGTH/16];
   1492             struct lp_type perquadi16_type = bld->perquadi_bld.type;
   1493             perquadi16_type.width /= 2;
   1494             perquadi16_type.length *= 2;
   1495             lod_fpart = LLVMBuildBitCast(builder, lod_fpart,
   1496                                          lp_build_vec_type(bld->gallivm,
   1497                                                            perquadi16_type), "");
   1498             /* XXX this only works for exactly 2 quads. More quads need shuffle */
   1499             assert(num_quads == 2);
   1500             for (i = 0; i < num_quads; i++) {
   1501                LLVMValueRef indexi2 = lp_build_const_int32(bld->gallivm, i*2);
   1502                lod_parts[i] = lp_build_extract_broadcast(bld->gallivm,
   1503                                                          perquadi16_type,
   1504                                                          h16_bld.type,
   1505                                                          lod_fpart,
   1506                                                          indexi2);
   1507             }
   1508             colors0_lo = lp_build_lerp(&h16_bld, lod_parts[0],
   1509                                        colors0_lo, colors1_lo);
   1510             colors0_hi = lp_build_lerp(&h16_bld, lod_parts[1],
   1511                                        colors0_hi, colors1_hi);
   1512          }
   1513 
   1514          LLVMBuildStore(builder, colors0_lo, colors_lo_var);
   1515          LLVMBuildStore(builder, colors0_hi, colors_hi_var);
   1516       }
   1517       lp_build_endif(&if_ctx);
   1518    }
   1519 }
   1520 
   1521 
   1522 
   1523 /**
   1524  * Texture sampling in AoS format.  Used when sampling common 32-bit/texel
   1525  * formats.  1D/2D/3D/cube texture supported.  All mipmap sampling modes
   1526  * but only limited texture coord wrap modes.
   1527  */
   1528 void
   1529 lp_build_sample_aos(struct lp_build_sample_context *bld,
   1530                     unsigned unit,
   1531                     LLVMValueRef s,
   1532                     LLVMValueRef t,
   1533                     LLVMValueRef r,
   1534                     LLVMValueRef lod_ipart,
   1535                     LLVMValueRef lod_fpart,
   1536                     LLVMValueRef ilevel0,
   1537                     LLVMValueRef ilevel1,
   1538                     LLVMValueRef texel_out[4])
   1539 {
   1540    struct lp_build_context *int_bld = &bld->int_bld;
   1541    LLVMBuilderRef builder = bld->gallivm->builder;
   1542    const unsigned mip_filter = bld->static_state->min_mip_filter;
   1543    const unsigned min_filter = bld->static_state->min_img_filter;
   1544    const unsigned mag_filter = bld->static_state->mag_img_filter;
   1545    const unsigned dims = bld->dims;
   1546    LLVMValueRef packed, packed_lo, packed_hi;
   1547    LLVMValueRef unswizzled[4];
   1548    struct lp_build_context h16_bld;
   1549 
   1550    /* we only support the common/simple wrap modes at this time */
   1551    assert(lp_is_simple_wrap_mode(bld->static_state->wrap_s));
   1552    if (dims >= 2)
   1553       assert(lp_is_simple_wrap_mode(bld->static_state->wrap_t));
   1554    if (dims >= 3)
   1555       assert(lp_is_simple_wrap_mode(bld->static_state->wrap_r));
   1556 
   1557 
   1558    /* make 16-bit fixed-pt builder context */
   1559    lp_build_context_init(&h16_bld, bld->gallivm, lp_type_ufixed(16, bld->vector_width));
   1560 
   1561    /*
   1562     * Get/interpolate texture colors.
   1563     */
   1564 
   1565    packed_lo = lp_build_alloca(bld->gallivm, h16_bld.vec_type, "packed_lo");
   1566    packed_hi = lp_build_alloca(bld->gallivm, h16_bld.vec_type, "packed_hi");
   1567 
   1568    if (min_filter == mag_filter) {
   1569       /* no need to distinguish between minification and magnification */
   1570       lp_build_sample_mipmap(bld,
   1571                              min_filter, mip_filter,
   1572                              s, t, r,
   1573                              ilevel0, ilevel1, lod_fpart,
   1574                              packed_lo, packed_hi);
   1575    }
   1576    else {
   1577       /* Emit conditional to choose min image filter or mag image filter
   1578        * depending on the lod being > 0 or <= 0, respectively.
   1579        */
   1580       struct lp_build_if_state if_ctx;
   1581       LLVMValueRef minify;
   1582 
   1583       /* minify = lod >= 0.0 */
   1584       minify = LLVMBuildICmp(builder, LLVMIntSGE,
   1585                              lod_ipart, int_bld->zero, "");
   1586 
   1587       lp_build_if(&if_ctx, bld->gallivm, minify);
   1588       {
   1589          /* Use the minification filter */
   1590          lp_build_sample_mipmap(bld,
   1591                                 min_filter, mip_filter,
   1592                                 s, t, r,
   1593                                 ilevel0, ilevel1, lod_fpart,
   1594                                 packed_lo, packed_hi);
   1595       }
   1596       lp_build_else(&if_ctx);
   1597       {
   1598          /* Use the magnification filter */
   1599          lp_build_sample_mipmap(bld,
   1600                                 mag_filter, PIPE_TEX_MIPFILTER_NONE,
   1601                                 s, t, r,
   1602                                 ilevel0, NULL, NULL,
   1603                                 packed_lo, packed_hi);
   1604       }
   1605       lp_build_endif(&if_ctx);
   1606    }
   1607 
   1608    /*
   1609     * combine the values stored in 'packed_lo' and 'packed_hi' variables
   1610     * into 'packed'
   1611     */
   1612    packed = lp_build_pack2(bld->gallivm,
   1613                            h16_bld.type, lp_type_unorm(8, bld->vector_width),
   1614                            LLVMBuildLoad(builder, packed_lo, ""),
   1615                            LLVMBuildLoad(builder, packed_hi, ""));
   1616 
   1617    /*
   1618     * Convert to SoA and swizzle.
   1619     */
   1620    lp_build_rgba8_to_f32_soa(bld->gallivm,
   1621                              bld->texel_type,
   1622                              packed, unswizzled);
   1623 
   1624    if (util_format_is_rgba8_variant(bld->format_desc)) {
   1625       lp_build_format_swizzle_soa(bld->format_desc,
   1626                                   &bld->texel_bld,
   1627                                   unswizzled, texel_out);
   1628    }
   1629    else {
   1630       texel_out[0] = unswizzled[0];
   1631       texel_out[1] = unswizzled[1];
   1632       texel_out[2] = unswizzled[2];
   1633       texel_out[3] = unswizzled[3];
   1634    }
   1635 }
   1636