Home | History | Annotate | Download | only in gallivm
      1 /**************************************************************************
      2  *
      3  * Copyright 2009 VMware, Inc.
      4  * All Rights Reserved.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the
      8  * "Software"), to deal in the Software without restriction, including
      9  * without limitation the rights to use, copy, modify, merge, publish,
     10  * distribute, sub license, and/or sell copies of the Software, and to
     11  * permit persons to whom the Software is furnished to do so, subject to
     12  * the following conditions:
     13  *
     14  * The above copyright notice and this permission notice (including the
     15  * next paragraph) shall be included in all copies or substantial portions
     16  * of the Software.
     17  *
     18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
     19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
     21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
     22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
     23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
     24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     25  *
     26  **************************************************************************/
     27 
     28 /**
     29  * @file
     30  * AoS pixel format manipulation.
     31  *
     32  * @author Jose Fonseca <jfonseca (at) vmware.com>
     33  */
     34 
     35 
     36 #include "util/u_format.h"
     37 #include "util/u_memory.h"
     38 #include "util/u_math.h"
     39 #include "util/u_pointer.h"
     40 #include "util/u_string.h"
     41 
     42 #include "lp_bld_arit.h"
     43 #include "lp_bld_init.h"
     44 #include "lp_bld_type.h"
     45 #include "lp_bld_flow.h"
     46 #include "lp_bld_const.h"
     47 #include "lp_bld_conv.h"
     48 #include "lp_bld_swizzle.h"
     49 #include "lp_bld_gather.h"
     50 #include "lp_bld_debug.h"
     51 #include "lp_bld_format.h"
     52 
     53 
     54 /**
     55  * Basic swizzling.  Rearrange the order of the unswizzled array elements
     56  * according to the format description.  PIPE_SWIZZLE_ZERO/ONE are supported
     57  * too.
     58  * Ex: if unswizzled[4] = {B, G, R, x}, then swizzled_out[4] = {R, G, B, 1}.
     59  */
     60 LLVMValueRef
     61 lp_build_format_swizzle_aos(const struct util_format_description *desc,
     62                             struct lp_build_context *bld,
     63                             LLVMValueRef unswizzled)
     64 {
     65    unsigned char swizzles[4];
     66    unsigned chan;
     67 
     68    assert(bld->type.length % 4 == 0);
     69 
     70    for (chan = 0; chan < 4; ++chan) {
     71       enum util_format_swizzle swizzle;
     72 
     73       if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
     74          /*
     75           * For ZS formats do RGBA = ZZZ1
     76           */
     77          if (chan == 3) {
     78             swizzle = UTIL_FORMAT_SWIZZLE_1;
     79          } else if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_NONE) {
     80             swizzle = UTIL_FORMAT_SWIZZLE_0;
     81          } else {
     82             swizzle = desc->swizzle[0];
     83          }
     84       } else {
     85          swizzle = desc->swizzle[chan];
     86       }
     87       swizzles[chan] = swizzle;
     88    }
     89 
     90    return lp_build_swizzle_aos(bld, unswizzled, swizzles);
     91 }
     92 
     93 
     94 /**
     95  * Whether the format matches the vector type, apart of swizzles.
     96  */
     97 static INLINE boolean
     98 format_matches_type(const struct util_format_description *desc,
     99                     struct lp_type type)
    100 {
    101    enum util_format_type chan_type;
    102    unsigned chan;
    103 
    104    assert(type.length % 4 == 0);
    105 
    106    if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN ||
    107        desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB ||
    108        desc->block.width != 1 ||
    109        desc->block.height != 1) {
    110       return FALSE;
    111    }
    112 
    113    if (type.floating) {
    114       chan_type = UTIL_FORMAT_TYPE_FLOAT;
    115    } else if (type.fixed) {
    116       chan_type = UTIL_FORMAT_TYPE_FIXED;
    117    } else if (type.sign) {
    118       chan_type = UTIL_FORMAT_TYPE_SIGNED;
    119    } else {
    120       chan_type = UTIL_FORMAT_TYPE_UNSIGNED;
    121    }
    122 
    123    for (chan = 0; chan < desc->nr_channels; ++chan) {
    124       if (desc->channel[chan].size != type.width) {
    125          return FALSE;
    126       }
    127 
    128       if (desc->channel[chan].type != UTIL_FORMAT_TYPE_VOID) {
    129          if (desc->channel[chan].type != chan_type ||
    130              desc->channel[chan].normalized != type.norm) {
    131             return FALSE;
    132          }
    133       }
    134    }
    135 
    136    return TRUE;
    137 }
    138 
    139 
    140 /**
    141  * Unpack a single pixel into its RGBA components.
    142  *
    143  * @param desc  the pixel format for the packed pixel value
    144  * @param packed integer pixel in a format such as PIPE_FORMAT_B8G8R8A8_UNORM
    145  *
    146  * @return RGBA in a float[4] or ubyte[4] or ushort[4] vector.
    147  */
    148 static INLINE LLVMValueRef
    149 lp_build_unpack_arith_rgba_aos(struct gallivm_state *gallivm,
    150                                const struct util_format_description *desc,
    151                                LLVMValueRef packed)
    152 {
    153    LLVMBuilderRef builder = gallivm->builder;
    154    LLVMValueRef shifted, casted, scaled, masked;
    155    LLVMValueRef shifts[4];
    156    LLVMValueRef masks[4];
    157    LLVMValueRef scales[4];
    158 
    159    boolean normalized;
    160    boolean needs_uitofp;
    161    unsigned shift;
    162    unsigned i;
    163 
    164    /* TODO: Support more formats */
    165    assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
    166    assert(desc->block.width == 1);
    167    assert(desc->block.height == 1);
    168    assert(desc->block.bits <= 32);
    169 
    170    /* Do the intermediate integer computations with 32bit integers since it
    171     * matches floating point size */
    172    assert (LLVMTypeOf(packed) == LLVMInt32TypeInContext(gallivm->context));
    173 
    174    /* Broadcast the packed value to all four channels
    175     * before: packed = BGRA
    176     * after: packed = {BGRA, BGRA, BGRA, BGRA}
    177     */
    178    packed = LLVMBuildInsertElement(builder,
    179                                    LLVMGetUndef(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4)),
    180                                    packed,
    181                                    LLVMConstNull(LLVMInt32TypeInContext(gallivm->context)),
    182                                    "");
    183    packed = LLVMBuildShuffleVector(builder,
    184                                    packed,
    185                                    LLVMGetUndef(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4)),
    186                                    LLVMConstNull(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4)),
    187                                    "");
    188 
    189    /* Initialize vector constants */
    190    normalized = FALSE;
    191    needs_uitofp = FALSE;
    192    shift = 0;
    193 
    194    /* Loop over 4 color components */
    195    for (i = 0; i < 4; ++i) {
    196       unsigned bits = desc->channel[i].size;
    197 
    198       if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) {
    199          shifts[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
    200          masks[i] = LLVMConstNull(LLVMInt32TypeInContext(gallivm->context));
    201          scales[i] =  LLVMConstNull(LLVMFloatTypeInContext(gallivm->context));
    202       }
    203       else {
    204          unsigned long long mask = (1ULL << bits) - 1;
    205 
    206          assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED);
    207 
    208          if (bits == 32) {
    209             needs_uitofp = TRUE;
    210          }
    211 
    212          shifts[i] = lp_build_const_int32(gallivm, shift);
    213          masks[i] = lp_build_const_int32(gallivm, mask);
    214 
    215          if (desc->channel[i].normalized) {
    216             scales[i] = lp_build_const_float(gallivm, 1.0 / mask);
    217             normalized = TRUE;
    218          }
    219          else
    220             scales[i] =  lp_build_const_float(gallivm, 1.0);
    221       }
    222 
    223       shift += bits;
    224    }
    225 
    226    /* Ex: convert packed = {BGRA, BGRA, BGRA, BGRA}
    227     * into masked = {B, G, R, A}
    228     */
    229    shifted = LLVMBuildLShr(builder, packed, LLVMConstVector(shifts, 4), "");
    230    masked = LLVMBuildAnd(builder, shifted, LLVMConstVector(masks, 4), "");
    231 
    232 
    233    if (!needs_uitofp) {
    234       /* UIToFP can't be expressed in SSE2 */
    235       casted = LLVMBuildSIToFP(builder, masked, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4), "");
    236    } else {
    237       casted = LLVMBuildUIToFP(builder, masked, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4), "");
    238    }
    239 
    240    /* At this point 'casted' may be a vector of floats such as
    241     * {255.0, 255.0, 255.0, 255.0}.  Next, if the pixel values are normalized
    242     * we'll scale this to {1.0, 1.0, 1.0, 1.0}.
    243     */
    244 
    245    if (normalized)
    246       scaled = LLVMBuildFMul(builder, casted, LLVMConstVector(scales, 4), "");
    247    else
    248       scaled = casted;
    249 
    250    return scaled;
    251 }
    252 
    253 
    254 /**
    255  * Pack a single pixel.
    256  *
    257  * @param rgba 4 float vector with the unpacked components.
    258  *
    259  * XXX: This is mostly for reference and testing -- operating a single pixel at
    260  * a time is rarely if ever needed.
    261  */
    262 LLVMValueRef
    263 lp_build_pack_rgba_aos(struct gallivm_state *gallivm,
    264                        const struct util_format_description *desc,
    265                        LLVMValueRef rgba)
    266 {
    267    LLVMBuilderRef builder = gallivm->builder;
    268    LLVMTypeRef type;
    269    LLVMValueRef packed = NULL;
    270    LLVMValueRef swizzles[4];
    271    LLVMValueRef shifted, casted, scaled, unswizzled;
    272    LLVMValueRef shifts[4];
    273    LLVMValueRef scales[4];
    274    boolean normalized;
    275    unsigned shift;
    276    unsigned i, j;
    277 
    278    assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
    279    assert(desc->block.width == 1);
    280    assert(desc->block.height == 1);
    281 
    282    type = LLVMIntTypeInContext(gallivm->context, desc->block.bits);
    283 
    284    /* Unswizzle the color components into the source vector. */
    285    for (i = 0; i < 4; ++i) {
    286       for (j = 0; j < 4; ++j) {
    287          if (desc->swizzle[j] == i)
    288             break;
    289       }
    290       if (j < 4)
    291          swizzles[i] = lp_build_const_int32(gallivm, j);
    292       else
    293          swizzles[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
    294    }
    295 
    296    unswizzled = LLVMBuildShuffleVector(builder, rgba,
    297                                        LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4)),
    298                                        LLVMConstVector(swizzles, 4), "");
    299 
    300    normalized = FALSE;
    301    shift = 0;
    302    for (i = 0; i < 4; ++i) {
    303       unsigned bits = desc->channel[i].size;
    304 
    305       if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) {
    306          shifts[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
    307          scales[i] =  LLVMGetUndef(LLVMFloatTypeInContext(gallivm->context));
    308       }
    309       else {
    310          unsigned mask = (1 << bits) - 1;
    311 
    312          assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED);
    313          assert(bits < 32);
    314 
    315          shifts[i] = lp_build_const_int32(gallivm, shift);
    316 
    317          if (desc->channel[i].normalized) {
    318             scales[i] = lp_build_const_float(gallivm, mask);
    319             normalized = TRUE;
    320          }
    321          else
    322             scales[i] = lp_build_const_float(gallivm, 1.0);
    323       }
    324 
    325       shift += bits;
    326    }
    327 
    328    if (normalized)
    329       scaled = LLVMBuildFMul(builder, unswizzled, LLVMConstVector(scales, 4), "");
    330    else
    331       scaled = unswizzled;
    332 
    333    casted = LLVMBuildFPToSI(builder, scaled, LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4), "");
    334 
    335    shifted = LLVMBuildShl(builder, casted, LLVMConstVector(shifts, 4), "");
    336 
    337    /* Bitwise or all components */
    338    for (i = 0; i < 4; ++i) {
    339       if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) {
    340          LLVMValueRef component = LLVMBuildExtractElement(builder, shifted,
    341                                                lp_build_const_int32(gallivm, i), "");
    342          if (packed)
    343             packed = LLVMBuildOr(builder, packed, component, "");
    344          else
    345             packed = component;
    346       }
    347    }
    348 
    349    if (!packed)
    350       packed = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
    351 
    352    if (desc->block.bits < 32)
    353       packed = LLVMBuildTrunc(builder, packed, type, "");
    354 
    355    return packed;
    356 }
    357 
    358 
    359 
    360 
    361 /**
    362  * Fetch a pixel into a 4 float AoS.
    363  *
    364  * \param format_desc  describes format of the image we're fetching from
    365  * \param ptr  address of the pixel block (or the texel if uncompressed)
    366  * \param i, j  the sub-block pixel coordinates.  For non-compressed formats
    367  *              these will always be (0, 0).
    368  * \return  a 4 element vector with the pixel's RGBA values.
    369  */
    370 LLVMValueRef
    371 lp_build_fetch_rgba_aos(struct gallivm_state *gallivm,
    372                         const struct util_format_description *format_desc,
    373                         struct lp_type type,
    374                         LLVMValueRef base_ptr,
    375                         LLVMValueRef offset,
    376                         LLVMValueRef i,
    377                         LLVMValueRef j)
    378 {
    379    LLVMBuilderRef builder = gallivm->builder;
    380    unsigned num_pixels = type.length / 4;
    381    struct lp_build_context bld;
    382 
    383    assert(type.length <= LP_MAX_VECTOR_LENGTH);
    384    assert(type.length % 4 == 0);
    385 
    386    lp_build_context_init(&bld, gallivm, type);
    387 
    388    /*
    389     * Trivial case
    390     *
    391     * The format matches the type (apart of a swizzle) so no need for
    392     * scaling or converting.
    393     */
    394 
    395    if (format_matches_type(format_desc, type) &&
    396        format_desc->block.bits <= type.width * 4 &&
    397        util_is_power_of_two(format_desc->block.bits)) {
    398       LLVMValueRef packed;
    399 
    400       /*
    401        * The format matches the type (apart of a swizzle) so no need for
    402        * scaling or converting.
    403        */
    404 
    405       packed = lp_build_gather(gallivm, type.length/4,
    406                                format_desc->block.bits, type.width*4,
    407                                base_ptr, offset);
    408 
    409       assert(format_desc->block.bits <= type.width * type.length);
    410 
    411       packed = LLVMBuildBitCast(gallivm->builder, packed,
    412                                 lp_build_vec_type(gallivm, type), "");
    413 
    414       return lp_build_format_swizzle_aos(format_desc, &bld, packed);
    415    }
    416 
    417    /*
    418     * Bit arithmetic
    419     */
    420 
    421    if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
    422        (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
    423         format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) &&
    424        format_desc->block.width == 1 &&
    425        format_desc->block.height == 1 &&
    426        util_is_power_of_two(format_desc->block.bits) &&
    427        format_desc->block.bits <= 32 &&
    428        format_desc->is_bitmask &&
    429        !format_desc->is_mixed &&
    430        (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED ||
    431         format_desc->channel[1].type == UTIL_FORMAT_TYPE_UNSIGNED)) {
    432 
    433       LLVMValueRef tmps[LP_MAX_VECTOR_LENGTH/4];
    434       LLVMValueRef res;
    435       unsigned k;
    436 
    437       /*
    438        * Unpack a pixel at a time into a <4 x float> RGBA vector
    439        */
    440 
    441       for (k = 0; k < num_pixels; ++k) {
    442          LLVMValueRef packed;
    443 
    444          packed = lp_build_gather_elem(gallivm, num_pixels,
    445                                        format_desc->block.bits, 32,
    446                                        base_ptr, offset, k);
    447 
    448          tmps[k] = lp_build_unpack_arith_rgba_aos(gallivm,
    449                                                   format_desc,
    450                                                   packed);
    451       }
    452 
    453       /*
    454        * Type conversion.
    455        *
    456        * TODO: We could avoid floating conversion for integer to
    457        * integer conversions.
    458        */
    459 
    460       if (gallivm_debug & GALLIVM_DEBUG_PERF && !type.floating) {
    461          debug_printf("%s: unpacking %s with floating point\n",
    462                       __FUNCTION__, format_desc->short_name);
    463       }
    464 
    465       lp_build_conv(gallivm,
    466                     lp_float32_vec4_type(),
    467                     type,
    468                     tmps, num_pixels, &res, 1);
    469 
    470       return lp_build_format_swizzle_aos(format_desc, &bld, res);
    471    }
    472 
    473    /* If all channels are of same type and we are not using half-floats */
    474    if (util_format_is_array(format_desc)) {
    475       return lp_build_fetch_rgba_aos_array(gallivm, format_desc, type, base_ptr, offset);
    476    }
    477 
    478    /*
    479     * YUV / subsampled formats
    480     */
    481 
    482    if (format_desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
    483       struct lp_type tmp_type;
    484       LLVMValueRef tmp;
    485 
    486       memset(&tmp_type, 0, sizeof tmp_type);
    487       tmp_type.width = 8;
    488       tmp_type.length = num_pixels * 4;
    489       tmp_type.norm = TRUE;
    490 
    491       tmp = lp_build_fetch_subsampled_rgba_aos(gallivm,
    492                                                format_desc,
    493                                                num_pixels,
    494                                                base_ptr,
    495                                                offset,
    496                                                i, j);
    497 
    498       lp_build_conv(gallivm,
    499                     tmp_type, type,
    500                     &tmp, 1, &tmp, 1);
    501 
    502       return tmp;
    503    }
    504 
    505    /*
    506     * Fallback to util_format_description::fetch_rgba_8unorm().
    507     */
    508 
    509    if (format_desc->fetch_rgba_8unorm &&
    510        !type.floating && type.width == 8 && !type.sign && type.norm) {
    511       /*
    512        * Fallback to calling util_format_description::fetch_rgba_8unorm.
    513        *
    514        * This is definitely not the most efficient way of fetching pixels, as
    515        * we miss the opportunity to do vectorization, but this it is a
    516        * convenient for formats or scenarios for which there was no opportunity
    517        * or incentive to optimize.
    518        */
    519 
    520       LLVMTypeRef i8t = LLVMInt8TypeInContext(gallivm->context);
    521       LLVMTypeRef pi8t = LLVMPointerType(i8t, 0);
    522       LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
    523       LLVMValueRef function;
    524       LLVMValueRef tmp_ptr;
    525       LLVMValueRef tmp;
    526       LLVMValueRef res;
    527       unsigned k;
    528 
    529       if (gallivm_debug & GALLIVM_DEBUG_PERF) {
    530          debug_printf("%s: falling back to util_format_%s_fetch_rgba_8unorm\n",
    531                       __FUNCTION__, format_desc->short_name);
    532       }
    533 
    534       /*
    535        * Declare and bind format_desc->fetch_rgba_8unorm().
    536        */
    537 
    538       {
    539          /*
    540           * Function to call looks like:
    541           *   fetch(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j)
    542           */
    543          LLVMTypeRef ret_type;
    544          LLVMTypeRef arg_types[4];
    545          LLVMTypeRef function_type;
    546 
    547          ret_type = LLVMVoidTypeInContext(gallivm->context);
    548          arg_types[0] = pi8t;
    549          arg_types[1] = pi8t;
    550          arg_types[2] = i32t;
    551          arg_types[3] = i32t;
    552          function_type = LLVMFunctionType(ret_type, arg_types,
    553                                           Elements(arg_types), 0);
    554 
    555          /* make const pointer for the C fetch_rgba_8unorm function */
    556          function = lp_build_const_int_pointer(gallivm,
    557             func_to_pointer((func_pointer) format_desc->fetch_rgba_8unorm));
    558 
    559          /* cast the callee pointer to the function's type */
    560          function = LLVMBuildBitCast(builder, function,
    561                                      LLVMPointerType(function_type, 0),
    562                                      "cast callee");
    563       }
    564 
    565       tmp_ptr = lp_build_alloca(gallivm, i32t, "");
    566 
    567       res = LLVMGetUndef(LLVMVectorType(i32t, num_pixels));
    568 
    569       /*
    570        * Invoke format_desc->fetch_rgba_8unorm() for each pixel and insert the result
    571        * in the SoA vectors.
    572        */
    573 
    574       for (k = 0; k < num_pixels; ++k) {
    575          LLVMValueRef index = lp_build_const_int32(gallivm, k);
    576          LLVMValueRef args[4];
    577 
    578          args[0] = LLVMBuildBitCast(builder, tmp_ptr, pi8t, "");
    579          args[1] = lp_build_gather_elem_ptr(gallivm, num_pixels,
    580                                             base_ptr, offset, k);
    581 
    582          if (num_pixels == 1) {
    583             args[2] = i;
    584             args[3] = j;
    585          }
    586          else {
    587             args[2] = LLVMBuildExtractElement(builder, i, index, "");
    588             args[3] = LLVMBuildExtractElement(builder, j, index, "");
    589          }
    590 
    591          LLVMBuildCall(builder, function, args, Elements(args), "");
    592 
    593          tmp = LLVMBuildLoad(builder, tmp_ptr, "");
    594 
    595          if (num_pixels == 1) {
    596             res = tmp;
    597          }
    598          else {
    599             res = LLVMBuildInsertElement(builder, res, tmp, index, "");
    600          }
    601       }
    602 
    603       /* Bitcast from <n x i32> to <4n x i8> */
    604       res = LLVMBuildBitCast(builder, res, bld.vec_type, "");
    605 
    606       return res;
    607    }
    608 
    609    /*
    610     * Fallback to util_format_description::fetch_rgba_float().
    611     */
    612 
    613    if (format_desc->fetch_rgba_float) {
    614       /*
    615        * Fallback to calling util_format_description::fetch_rgba_float.
    616        *
    617        * This is definitely not the most efficient way of fetching pixels, as
    618        * we miss the opportunity to do vectorization, but this it is a
    619        * convenient for formats or scenarios for which there was no opportunity
    620        * or incentive to optimize.
    621        */
    622 
    623       LLVMTypeRef f32t = LLVMFloatTypeInContext(gallivm->context);
    624       LLVMTypeRef f32x4t = LLVMVectorType(f32t, 4);
    625       LLVMTypeRef pf32t = LLVMPointerType(f32t, 0);
    626       LLVMTypeRef pi8t = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);
    627       LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
    628       LLVMValueRef function;
    629       LLVMValueRef tmp_ptr;
    630       LLVMValueRef tmps[LP_MAX_VECTOR_LENGTH/4];
    631       LLVMValueRef res;
    632       unsigned k;
    633 
    634       if (gallivm_debug & GALLIVM_DEBUG_PERF) {
    635          debug_printf("%s: falling back to util_format_%s_fetch_rgba_float\n",
    636                       __FUNCTION__, format_desc->short_name);
    637       }
    638 
    639       /*
    640        * Declare and bind format_desc->fetch_rgba_float().
    641        */
    642 
    643       {
    644          /*
    645           * Function to call looks like:
    646           *   fetch(float *dst, const uint8_t *src, unsigned i, unsigned j)
    647           */
    648          LLVMTypeRef ret_type;
    649          LLVMTypeRef arg_types[4];
    650 
    651          ret_type = LLVMVoidTypeInContext(gallivm->context);
    652          arg_types[0] = pf32t;
    653          arg_types[1] = pi8t;
    654          arg_types[2] = i32t;
    655          arg_types[3] = i32t;
    656 
    657          function = lp_build_const_func_pointer(gallivm,
    658                                                 func_to_pointer((func_pointer) format_desc->fetch_rgba_float),
    659                                                 ret_type,
    660                                                 arg_types, Elements(arg_types),
    661                                                 format_desc->short_name);
    662       }
    663 
    664       tmp_ptr = lp_build_alloca(gallivm, f32x4t, "");
    665 
    666       /*
    667        * Invoke format_desc->fetch_rgba_float() for each pixel and insert the result
    668        * in the SoA vectors.
    669        */
    670 
    671       for (k = 0; k < num_pixels; ++k) {
    672          LLVMValueRef args[4];
    673 
    674          args[0] = LLVMBuildBitCast(builder, tmp_ptr, pf32t, "");
    675          args[1] = lp_build_gather_elem_ptr(gallivm, num_pixels,
    676                                             base_ptr, offset, k);
    677 
    678          if (num_pixels == 1) {
    679             args[2] = i;
    680             args[3] = j;
    681          }
    682          else {
    683             LLVMValueRef index = lp_build_const_int32(gallivm, k);
    684             args[2] = LLVMBuildExtractElement(builder, i, index, "");
    685             args[3] = LLVMBuildExtractElement(builder, j, index, "");
    686          }
    687 
    688          LLVMBuildCall(builder, function, args, Elements(args), "");
    689 
    690          tmps[k] = LLVMBuildLoad(builder, tmp_ptr, "");
    691       }
    692 
    693       lp_build_conv(gallivm,
    694                     lp_float32_vec4_type(),
    695                     type,
    696                     tmps, num_pixels, &res, 1);
    697 
    698       return res;
    699    }
    700 
    701    assert(0);
    702    return lp_build_undef(gallivm, type);
    703 }
    704