Home | History | Annotate | Download | only in gallivm
      1 /**************************************************************************
      2  *
      3  * Copyright 2013 VMware, Inc.
      4  * All Rights Reserved.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the
      8  * "Software"), to deal in the Software without restriction, including
      9  * without limitation the rights to use, copy, modify, merge, publish,
     10  * distribute, sub license, and/or sell copies of the Software, and to
     11  * permit persons to whom the Software is furnished to do so, subject to
     12  * the following conditions:
     13  *
     14  * The above copyright notice and this permission notice (including the
     15  * next paragraph) shall be included in all copies or substantial portions
     16  * of the Software.
     17  *
     18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
     19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
     21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
     22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
     23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
     24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     25  *
     26  **************************************************************************/
     27 
     28 
     29 /**
     30  * @file
     31  * Format conversion code for "special" float formats.
     32  *
     33  * @author Roland Scheidegger <sroland (at) vmware.com>
     34  */
     35 
     36 
     37 #include "util/u_debug.h"
     38 
     39 #include "lp_bld_type.h"
     40 #include "lp_bld_const.h"
     41 #include "lp_bld_arit.h"
     42 #include "lp_bld_bitarit.h"
     43 #include "lp_bld_logic.h"
     44 #include "lp_bld_format.h"
     45 
     46 
     47 /**
     48  * Convert float32 to a float-like value with less exponent and mantissa
     49  * bits. The mantissa is still biased, and the mantissa still has an implied 1,
     50  * and there may be a sign bit.
     51  *
     52  * @param src             (vector) float value to convert
     53  * @param mantissa_bits   the number of mantissa bits
     54  * @param exponent_bits   the number of exponent bits
     55  * @param mantissa_start  the start position of the small float in result value
     56  * @param has_sign        if the small float has a sign bit
     57  *
     58  * This implements round-towards-zero (trunc) hence too large numbers get
     59  * converted to largest representable number, not infinity.
     60  * Small numbers may get converted to denorms, depending on normal
     61  * float denorm handling of the cpu.
     62  * Note that compared to the references, below, we skip any rounding bias
     63  * since we do rounding towards zero - OpenGL allows rounding towards zero
     64  * (though not preferred) and DX10 even seems to require it.
     65  * Note that this will pack mantissa, exponent and sign bit (if any) together,
     66  * and shift the result to mantissa_start.
     67  *
     68  * ref http://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/
     69  * ref https://gist.github.com/rygorous/2156668
     70  */
     71 LLVMValueRef
     72 lp_build_float_to_smallfloat(struct gallivm_state *gallivm,
     73                              struct lp_type i32_type,
     74                              LLVMValueRef src,
     75                              unsigned mantissa_bits,
     76                              unsigned exponent_bits,
     77                              unsigned mantissa_start,
     78                              boolean has_sign)
     79 {
     80    LLVMBuilderRef builder = gallivm->builder;
     81    LLVMValueRef i32_floatexpmask, i32_smallexpmask, magic, normal;
     82    LLVMValueRef rescale_src, i32_roundmask, small_max;
     83    LLVMValueRef i32_qnanbit, shift, res;
     84    LLVMValueRef is_nan_or_inf, nan_or_inf, mask, i32_src;
     85    struct lp_type f32_type = lp_type_float_vec(32, 32 * i32_type.length);
     86    struct lp_build_context f32_bld, i32_bld;
     87    LLVMValueRef zero = lp_build_const_vec(gallivm, f32_type, 0.0f);
     88    unsigned exponent_start = mantissa_start + mantissa_bits;
     89    boolean always_preserve_nans = true;
     90    boolean maybe_correct_denorm_rounding = true;
     91 
     92    lp_build_context_init(&f32_bld, gallivm, f32_type);
     93    lp_build_context_init(&i32_bld, gallivm, i32_type);
     94 
     95    i32_smallexpmask = lp_build_const_int_vec(gallivm, i32_type,
     96                                              ((1 << exponent_bits) - 1) << 23);
     97    i32_floatexpmask = lp_build_const_int_vec(gallivm, i32_type, 0xff << 23);
     98 
     99    i32_src = LLVMBuildBitCast(builder, src, i32_bld.vec_type, "");
    100 
    101    if (has_sign) {
    102       rescale_src = src;
    103    }
    104    else {
    105       /* clamp to pos range (can still have sign bit if NaN or negative zero) */
    106       rescale_src = lp_build_max(&f32_bld, zero, src);
    107    }
    108    rescale_src = LLVMBuildBitCast(builder, rescale_src, i32_bld.vec_type, "");
    109 
    110    /* "ordinary" number */
    111    /*
    112     * get rid of excess mantissa bits and sign bit
    113     * This is only really needed for correct rounding of denorms I think
    114     * but only if we use the preserve NaN path does using
    115     * src_abs instead save us any instruction.
    116     */
    117    if (maybe_correct_denorm_rounding || !always_preserve_nans) {
    118       i32_roundmask = lp_build_const_int_vec(gallivm, i32_type,
    119                                              ~((1 << (23 - mantissa_bits)) - 1) &
    120                                              0x7fffffff);
    121       rescale_src = LLVMBuildBitCast(builder, rescale_src, i32_bld.vec_type, "");
    122       rescale_src = lp_build_and(&i32_bld, rescale_src, i32_roundmask);
    123       rescale_src = LLVMBuildBitCast(builder, rescale_src, f32_bld.vec_type, "");
    124    }
    125    else {
    126       rescale_src = lp_build_abs(&f32_bld, src);
    127    }
    128 
    129    /* bias exponent (and denormalize if necessary) */
    130    magic = lp_build_const_int_vec(gallivm, i32_type,
    131                                   ((1 << (exponent_bits - 1)) - 1) << 23);
    132    magic = LLVMBuildBitCast(builder, magic, f32_bld.vec_type, "");
    133    normal = lp_build_mul(&f32_bld, rescale_src, magic);
    134 
    135    /* clamp to max value - largest non-infinity number */
    136    small_max = lp_build_const_int_vec(gallivm, i32_type,
    137                                       (((1 << exponent_bits) - 2) << 23) |
    138                                       (((1 << mantissa_bits) - 1) << (23 - mantissa_bits)));
    139    small_max = LLVMBuildBitCast(builder, small_max, f32_bld.vec_type, "");
    140    normal = lp_build_min(&f32_bld, normal, small_max);
    141    normal = LLVMBuildBitCast(builder, normal, i32_bld.vec_type, "");
    142 
    143    /*
    144     * handle nan/inf cases
    145     * a little bit tricky since -Inf -> 0, +Inf -> +Inf, +-Nan -> +Nan
    146     * (for no sign) else ->Inf -> ->Inf too.
    147     * could use explicit "unordered" comparison checking for NaNs
    148     * which might save us from calculating src_abs too.
    149     * (Cannot actually save the comparison since we need to distinguish
    150     * Inf and NaN cases anyway, but it would be better for AVX.)
    151     */
    152    if (always_preserve_nans) {
    153       LLVMValueRef infcheck_src, is_inf, is_nan;
    154       LLVMValueRef src_abs = lp_build_abs(&f32_bld, src);
    155       src_abs = LLVMBuildBitCast(builder, src_abs, i32_bld.vec_type, "");
    156 
    157       if (has_sign) {
    158          infcheck_src = src_abs;
    159       }
    160       else {
    161          infcheck_src = i32_src;
    162       }
    163       is_nan = lp_build_compare(gallivm, i32_type, PIPE_FUNC_GREATER,
    164                                 src_abs, i32_floatexpmask);
    165       is_inf = lp_build_compare(gallivm, i32_type, PIPE_FUNC_EQUAL,
    166                                 infcheck_src, i32_floatexpmask);
    167       is_nan_or_inf = lp_build_or(&i32_bld, is_nan, is_inf);
    168       /* could also set more mantissa bits but need at least the highest mantissa bit */
    169       i32_qnanbit = lp_build_const_vec(gallivm, i32_type, 1 << 22);
    170       /* combine maxexp with qnanbit */
    171       nan_or_inf = lp_build_or(&i32_bld, i32_smallexpmask,
    172                                lp_build_and(&i32_bld, is_nan, i32_qnanbit));
    173    }
    174    else {
    175       /*
    176        * A couple simplifications, with mostly 2 drawbacks (so disabled):
    177        * - it will promote some SNaNs (those which only had bits set
    178        * in the mantissa part which got chopped off) to +-Infinity.
    179        * (Those bits get chopped off anyway later so can as well use
    180        * rescale_src instead of src_abs here saving the calculation of that.)
    181        * - for no sign case, it relies on the max() being used for rescale_src
    182        * to give back the NaN (which is NOT ieee754r behavior, but should work
    183        * with sse2 on a full moon (rather if I got the operand order right) -
    184        * we _don't_ have well-defined behavior specified with min/max wrt NaNs,
    185        * however, and if it gets converted to cmp/select it may not work (we
    186        * don't really have specified behavior for cmp wrt NaNs neither).
    187        */
    188       rescale_src = LLVMBuildBitCast(builder, rescale_src, i32_bld.vec_type, "");
    189       is_nan_or_inf = lp_build_compare(gallivm, i32_type, PIPE_FUNC_GEQUAL,
    190                                        rescale_src, i32_floatexpmask);
    191       /* note this will introduce excess exponent bits */
    192       nan_or_inf = rescale_src;
    193    }
    194    res = lp_build_select(&i32_bld, is_nan_or_inf, nan_or_inf, normal);
    195 
    196    if (mantissa_start > 0 || !always_preserve_nans) {
    197       /* mask off excess bits */
    198       unsigned maskbits = (1 << (mantissa_bits + exponent_bits)) - 1;
    199       mask = lp_build_const_int_vec(gallivm, i32_type,
    200                                     maskbits << (23 - mantissa_bits));
    201       res = lp_build_and(&i32_bld, res, mask);
    202    }
    203 
    204    /* add back sign bit at right position */
    205    if (has_sign) {
    206       LLVMValueRef sign;
    207       struct lp_type u32_type = lp_type_uint_vec(32, 32 * i32_type.length);
    208       struct lp_build_context u32_bld;
    209       lp_build_context_init(&u32_bld, gallivm, u32_type);
    210 
    211       mask = lp_build_const_int_vec(gallivm, i32_type, 0x80000000);
    212       shift = lp_build_const_int_vec(gallivm, i32_type, 8 - exponent_bits);
    213       sign = lp_build_and(&i32_bld, mask, i32_src);
    214       sign = lp_build_shr(&u32_bld, sign, shift);
    215       res = lp_build_or(&i32_bld, sign, res);
    216    }
    217 
    218    /* shift to final position */
    219    if (exponent_start < 23) {
    220       shift = lp_build_const_int_vec(gallivm, i32_type, 23 - exponent_start);
    221       res = lp_build_shr(&i32_bld, res, shift);
    222    }
    223    else {
    224       shift = lp_build_const_int_vec(gallivm, i32_type, exponent_start - 23);
    225       res = lp_build_shl(&i32_bld, res, shift);
    226    }
    227    return res;
    228 }
    229 
    230 
    231 /**
    232  * Convert rgba float SoA values to packed r11g11b10 values.
    233  *
    234  * @param src   SoA float (vector) values to convert.
    235  */
    236 LLVMValueRef
    237 lp_build_float_to_r11g11b10(struct gallivm_state *gallivm,
    238                             LLVMValueRef *src)
    239 {
    240    LLVMValueRef dst, rcomp, bcomp, gcomp;
    241    struct lp_build_context i32_bld;
    242    LLVMTypeRef src_type = LLVMTypeOf(*src);
    243    unsigned src_length = LLVMGetTypeKind(src_type) == LLVMVectorTypeKind ?
    244                             LLVMGetVectorSize(src_type) : 1;
    245    struct lp_type i32_type = lp_type_int_vec(32, 32 * src_length);
    246 
    247    lp_build_context_init(&i32_bld, gallivm, i32_type);
    248 
    249    /* "rescale" and put in right position */
    250    rcomp = lp_build_float_to_smallfloat(gallivm, i32_type, src[0], 6, 5, 0, false);
    251    gcomp = lp_build_float_to_smallfloat(gallivm, i32_type, src[1], 6, 5, 11, false);
    252    bcomp = lp_build_float_to_smallfloat(gallivm, i32_type, src[2], 5, 5, 22, false);
    253 
    254    /* combine the values */
    255    dst = lp_build_or(&i32_bld, rcomp, gcomp);
    256    return lp_build_or(&i32_bld, dst, bcomp);
    257 }
    258 
    259 
    260 /**
    261  * Convert a float-like value with less exponent and mantissa
    262  * bits than a normal float32 to a float32. The mantissa of
    263  * the source value is assumed to have an implied 1, and the exponent
    264  * is biased. There may be a sign bit.
    265  * The source value to extract must be in a 32bit int (bits not part of
    266  * the value to convert will be masked off).
    267  * This works for things like 11-bit floats or half-floats,
    268  * mantissa, exponent (and sign if present) must be packed
    269  * the same as they are in a ordinary float.
    270  *
    271  * @param src             (vector) value to convert
    272  * @param mantissa_bits   the number of mantissa bits
    273  * @param exponent_bits   the number of exponent bits
    274  * @param mantissa_start  the bit start position of the packed component
    275  * @param has_sign        if the small float has a sign bit
    276  *
    277  * ref http://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/
    278  * ref https://gist.github.com/rygorous/2156668
    279  */
    280 LLVMValueRef
    281 lp_build_smallfloat_to_float(struct gallivm_state *gallivm,
    282                              struct lp_type f32_type,
    283                              LLVMValueRef src,
    284                              unsigned mantissa_bits,
    285                              unsigned exponent_bits,
    286                              unsigned mantissa_start,
    287                              boolean has_sign)
    288 {
    289    LLVMBuilderRef builder = gallivm->builder;
    290    LLVMValueRef smallexpmask, i32_floatexpmask, magic;
    291    LLVMValueRef wasinfnan, tmp, res, shift, maskabs, srcabs, sign;
    292    unsigned exponent_start = mantissa_start + mantissa_bits;
    293    struct lp_type i32_type = lp_type_int_vec(32, 32 * f32_type.length);
    294    struct lp_build_context f32_bld, i32_bld;
    295 
    296    lp_build_context_init(&f32_bld, gallivm, f32_type);
    297    lp_build_context_init(&i32_bld, gallivm, i32_type);
    298 
    299    /* extract the component to "float position" */
    300    if (exponent_start < 23) {
    301       shift = lp_build_const_int_vec(gallivm, i32_type, 23 - exponent_start);
    302       src = lp_build_shl(&i32_bld, src, shift);
    303    }
    304    else {
    305       shift = lp_build_const_int_vec(gallivm, i32_type, exponent_start - 23);
    306       src = lp_build_shr(&i32_bld, src, shift);
    307    }
    308    maskabs = lp_build_const_int_vec(gallivm, i32_type,
    309                                     ((1 << (mantissa_bits + exponent_bits)) - 1)
    310                                     << (23 - mantissa_bits));
    311    srcabs = lp_build_and(&i32_bld, src, maskabs);
    312 
    313    /* now do the actual scaling */
    314    smallexpmask = lp_build_const_int_vec(gallivm, i32_type,
    315                                          ((1 << exponent_bits) - 1) << 23);
    316    i32_floatexpmask = lp_build_const_int_vec(gallivm, i32_type, 0xff << 23);
    317 
    318    if (0) {
    319      /*
    320       * Note that this code path, while simpler, will convert small
    321       * float denorms to floats according to current cpu denorm mode, if
    322       * denorms are disabled it will flush them to zero!
    323       * If cpu denorms are enabled, it should be faster though as long as
    324       * there's no denorms in the inputs, but if there are actually denorms
    325       * it's likely to be an order of magnitude slower (on x86 cpus).
    326       */
    327 
    328       srcabs = LLVMBuildBitCast(builder, srcabs, f32_bld.vec_type, "");
    329 
    330       /*
    331        * magic number has exponent new exp bias + (new exp bias - old exp bias),
    332        * mantissa is 0.
    333        */
    334       magic = lp_build_const_int_vec(gallivm, i32_type,
    335                                      (255 - (1 << (exponent_bits - 1))) << 23);
    336       magic = LLVMBuildBitCast(builder, magic, f32_bld.vec_type, "");
    337 
    338       /* adjust exponent and fix denorms */
    339       res = lp_build_mul(&f32_bld, srcabs, magic);
    340 
    341       /*
    342        * if exp was max (== NaN or Inf) set new exp to max (keep mantissa),
    343        * so a simple "or" will do (because exp adjust will leave mantissa intact)
    344        */
    345       /* use float compare (better for AVX 8-wide / no AVX2 but else should use int) */
    346       smallexpmask = LLVMBuildBitCast(builder, smallexpmask, f32_bld.vec_type, "");
    347       wasinfnan = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GEQUAL, srcabs, smallexpmask);
    348       res = LLVMBuildBitCast(builder, res, i32_bld.vec_type, "");
    349       tmp = lp_build_and(&i32_bld, i32_floatexpmask, wasinfnan);
    350       res = lp_build_or(&i32_bld, tmp, res);
    351    }
    352 
    353    else {
    354       LLVMValueRef exp_one, isdenorm, denorm, normal, exp_adj;
    355 
    356       /* denorm (or zero) if exponent is zero */
    357       exp_one = lp_build_const_int_vec(gallivm, i32_type, 1 << 23);
    358       isdenorm = lp_build_cmp(&i32_bld, PIPE_FUNC_LESS, srcabs, exp_one);
    359 
    360       /* inf or nan if exponent is max */
    361       wasinfnan = lp_build_cmp(&i32_bld, PIPE_FUNC_GEQUAL, srcabs, smallexpmask);
    362 
    363       /* for denormal (or zero), add (== or) magic exp to mantissa (== srcabs) (as int)
    364        * then subtract it (as float).
    365        * Another option would be to just do inttofp then do a rescale mul.
    366        */
    367       magic = lp_build_const_int_vec(gallivm, i32_type,
    368                                      (127 - ((1 << (exponent_bits - 1)) - 2)) << 23);
    369       denorm = lp_build_or(&i32_bld, srcabs, magic);
    370       denorm = LLVMBuildBitCast(builder, denorm, f32_bld.vec_type, "");
    371       denorm = lp_build_sub(&f32_bld, denorm,
    372                             LLVMBuildBitCast(builder, magic, f32_bld.vec_type, ""));
    373       denorm = LLVMBuildBitCast(builder, denorm, i32_bld.vec_type, "");
    374 
    375       /* for normals, Infs, Nans fix up exponent */
    376       exp_adj = lp_build_const_int_vec(gallivm, i32_type,
    377                                       (127 - ((1 << (exponent_bits - 1)) - 1)) << 23);
    378       normal = lp_build_add(&i32_bld, srcabs, exp_adj);
    379       tmp = lp_build_and(&i32_bld, wasinfnan, i32_floatexpmask);
    380       normal = lp_build_or(&i32_bld, tmp, normal);
    381 
    382       res = lp_build_select(&i32_bld, isdenorm, denorm, normal);
    383    }
    384 
    385    if (has_sign) {
    386       LLVMValueRef signmask = lp_build_const_int_vec(gallivm, i32_type, 0x80000000);
    387       shift = lp_build_const_int_vec(gallivm, i32_type, 8 - exponent_bits);
    388       sign = lp_build_shl(&i32_bld, src, shift);
    389       sign = lp_build_and(&i32_bld, signmask, sign);
    390       res = lp_build_or(&i32_bld, res, sign);
    391    }
    392 
    393    return LLVMBuildBitCast(builder, res, f32_bld.vec_type, "");
    394 }
    395 
    396 
    397 /**
    398  * Convert packed float format (r11g11b10) value(s) to rgba float SoA values.
    399  *
    400  * @param src   packed AoS r11g11b10 values (as (vector) int32)
    401  * @param dst   pointer to the SoA result values
    402  */
    403 void
    404 lp_build_r11g11b10_to_float(struct gallivm_state *gallivm,
    405                             LLVMValueRef src,
    406                             LLVMValueRef *dst)
    407 {
    408    LLVMTypeRef src_type = LLVMTypeOf(src);
    409    unsigned src_length = LLVMGetTypeKind(src_type) == LLVMVectorTypeKind ?
    410                             LLVMGetVectorSize(src_type) : 1;
    411    struct lp_type f32_type = lp_type_float_vec(32, 32 * src_length);
    412 
    413    dst[0] = lp_build_smallfloat_to_float(gallivm, f32_type, src, 6, 5, 0, false);
    414    dst[1] = lp_build_smallfloat_to_float(gallivm, f32_type, src, 6, 5, 11, false);
    415    dst[2] = lp_build_smallfloat_to_float(gallivm, f32_type, src, 5, 5, 22, false);
    416 
    417    /* Just set alpha to one */
    418    dst[3] = lp_build_one(gallivm, f32_type);
    419 }
    420 
    421 
    422 static LLVMValueRef
    423 lp_build_rgb9_to_float_helper(struct gallivm_state *gallivm,
    424                               struct lp_type f32_type,
    425                               LLVMValueRef src,
    426                               LLVMValueRef scale,
    427                               unsigned mantissa_start)
    428 {
    429    LLVMValueRef shift, mask;
    430 
    431    struct lp_type i32_type = lp_type_int_vec(32, 32 * f32_type.length);
    432    struct lp_build_context i32_bld, f32_bld;
    433 
    434    lp_build_context_init(&i32_bld, gallivm, i32_type);
    435    lp_build_context_init(&f32_bld, gallivm, f32_type);
    436 
    437    /*
    438     * This is much easier as other weirdo float formats, since
    439     * there's no sign, no Inf/NaN, and there's nothing special
    440     * required for normals/denormals neither (as without the implied one
    441     * for the mantissa for other formats, everything looks like a denormal).
    442     * So just do (float)comp_bits * scale
    443     */
    444    shift = lp_build_const_int_vec(gallivm, i32_type, mantissa_start);
    445    mask = lp_build_const_int_vec(gallivm, i32_type, 0x1ff);
    446    src = lp_build_shr(&i32_bld, src, shift);
    447    src = lp_build_and(&i32_bld, src, mask);
    448    src = lp_build_int_to_float(&f32_bld, src);
    449    return lp_build_mul(&f32_bld, src, scale);
    450 }
    451 
    452 
    453 /**
    454  * Convert shared exponent format (rgb9e5) value(s) to rgba float SoA values.
    455  *
    456  * @param src   packed AoS rgb9e5 values (as (vector) int32)
    457  * @param dst   pointer to the SoA result values
    458  */
    459 void
    460 lp_build_rgb9e5_to_float(struct gallivm_state *gallivm,
    461                          LLVMValueRef src,
    462                          LLVMValueRef *dst)
    463 {
    464    LLVMBuilderRef builder = gallivm->builder;
    465    LLVMTypeRef src_type = LLVMTypeOf(src);
    466    LLVMValueRef shift, scale, bias, exp;
    467    unsigned src_length = LLVMGetTypeKind(src_type) == LLVMVectorTypeKind ?
    468                             LLVMGetVectorSize(src_type) : 1;
    469    struct lp_type i32_type = lp_type_int_vec(32, 32 * src_length);
    470    struct lp_type u32_type = lp_type_uint_vec(32, 32 * src_length);
    471    struct lp_type f32_type = lp_type_float_vec(32, 32 * src_length);
    472    struct lp_build_context i32_bld, u32_bld, f32_bld;
    473 
    474    lp_build_context_init(&i32_bld, gallivm, i32_type);
    475    lp_build_context_init(&u32_bld, gallivm, u32_type);
    476    lp_build_context_init(&f32_bld, gallivm, f32_type);
    477 
    478    /* extract exponent */
    479    shift = lp_build_const_int_vec(gallivm, i32_type, 27);
    480    /* this shift needs to be unsigned otherwise need mask */
    481    exp = lp_build_shr(&u32_bld, src, shift);
    482 
    483    /*
    484     * scale factor is 2 ^ (exp - bias)
    485     * (and additionally corrected here for the mantissa bits)
    486     * not using shift because
    487     * a) don't have vector shift in a lot of cases
    488     * b) shift direction changes hence need 2 shifts + conditional
    489     *    (or rotate instruction which is even more rare (for instance XOP))
    490     * so use whacky float 2 ^ function instead manipulating exponent
    491     * (saves us the float conversion at the end too)
    492     */
    493    bias = lp_build_const_int_vec(gallivm, i32_type, 127 - (15 + 9));
    494    scale = lp_build_add(&i32_bld, exp, bias);
    495    shift = lp_build_const_int_vec(gallivm, i32_type, 23);
    496    scale = lp_build_shl(&i32_bld, scale, shift);
    497    scale = LLVMBuildBitCast(builder, scale, f32_bld.vec_type, "");
    498 
    499    dst[0] = lp_build_rgb9_to_float_helper(gallivm, f32_type, src, scale, 0);
    500    dst[1] = lp_build_rgb9_to_float_helper(gallivm, f32_type, src, scale, 9);
    501    dst[2] = lp_build_rgb9_to_float_helper(gallivm, f32_type, src, scale, 18);
    502 
    503    /* Just set alpha to one */
    504    dst[3] = f32_bld.one;
    505 }
    506