Home | History | Annotate | Download | only in gallivm
      1 /**************************************************************************
      2  *
      3  * Copyright 2009 VMware, Inc.
      4  * All Rights Reserved.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the
      8  * "Software"), to deal in the Software without restriction, including
      9  * without limitation the rights to use, copy, modify, merge, publish,
     10  * distribute, sub license, and/or sell copies of the Software, and to
     11  * permit persons to whom the Software is furnished to do so, subject to
     12  * the following conditions:
     13  *
     14  * The above copyright notice and this permission notice (including the
     15  * next paragraph) shall be included in all copies or substantial portions
     16  * of the Software.
     17  *
     18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
     19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
     21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
     22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
     23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
     24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     25  *
     26  **************************************************************************/
     27 
     28 /**
     29  * @file
     30  * Helper functions for swizzling/shuffling.
     31  *
     32  * @author Jose Fonseca <jfonseca (at) vmware.com>
     33  */
     34 
     35 
     36 #include "util/u_debug.h"
     37 
     38 #include "lp_bld_type.h"
     39 #include "lp_bld_const.h"
     40 #include "lp_bld_init.h"
     41 #include "lp_bld_logic.h"
     42 #include "lp_bld_swizzle.h"
     43 #include "lp_bld_pack.h"
     44 
     45 
     46 LLVMValueRef
     47 lp_build_broadcast(struct gallivm_state *gallivm,
     48                    LLVMTypeRef vec_type,
     49                    LLVMValueRef scalar)
     50 {
     51    LLVMValueRef res;
     52 
     53    if (LLVMGetTypeKind(vec_type) != LLVMVectorTypeKind) {
     54       /* scalar */
     55       assert(vec_type == LLVMTypeOf(scalar));
     56       res = scalar;
     57    } else {
     58       LLVMBuilderRef builder = gallivm->builder;
     59       const unsigned length = LLVMGetVectorSize(vec_type);
     60       LLVMValueRef undef = LLVMGetUndef(vec_type);
     61       LLVMTypeRef i32_type = LLVMInt32TypeInContext(gallivm->context);
     62 
     63       assert(LLVMGetElementType(vec_type) == LLVMTypeOf(scalar));
     64 
     65       if (HAVE_LLVM >= 0x207) {
     66          /* The shuffle vector is always made of int32 elements */
     67          LLVMTypeRef i32_vec_type = LLVMVectorType(i32_type, length);
     68          res = LLVMBuildInsertElement(builder, undef, scalar, LLVMConstNull(i32_type), "");
     69          res = LLVMBuildShuffleVector(builder, res, undef, LLVMConstNull(i32_vec_type), "");
     70       } else {
     71          /* XXX: The above path provokes a bug in LLVM 2.6 */
     72          unsigned i;
     73          res = undef;
     74          for(i = 0; i < length; ++i) {
     75             LLVMValueRef index = lp_build_const_int32(gallivm, i);
     76             res = LLVMBuildInsertElement(builder, res, scalar, index, "");
     77          }
     78       }
     79    }
     80 
     81    return res;
     82 }
     83 
     84 
     85 /**
     86  * Broadcast
     87  */
     88 LLVMValueRef
     89 lp_build_broadcast_scalar(struct lp_build_context *bld,
     90                           LLVMValueRef scalar)
     91 {
     92    assert(lp_check_elem_type(bld->type, LLVMTypeOf(scalar)));
     93 
     94    return lp_build_broadcast(bld->gallivm, bld->vec_type, scalar);
     95 }
     96 
     97 
     98 /**
     99  * Combined extract and broadcast (mere shuffle in most cases)
    100  */
    101 LLVMValueRef
    102 lp_build_extract_broadcast(struct gallivm_state *gallivm,
    103                            struct lp_type src_type,
    104                            struct lp_type dst_type,
    105                            LLVMValueRef vector,
    106                            LLVMValueRef index)
    107 {
    108    LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
    109    LLVMValueRef res;
    110 
    111    assert(src_type.floating == dst_type.floating);
    112    assert(src_type.width    == dst_type.width);
    113 
    114    assert(lp_check_value(src_type, vector));
    115    assert(LLVMTypeOf(index) == i32t);
    116 
    117    if (src_type.length == 1) {
    118       if (dst_type.length == 1) {
    119          /*
    120           * Trivial scalar -> scalar.
    121           */
    122 
    123          res = vector;
    124       }
    125       else {
    126          /*
    127           * Broadcast scalar -> vector.
    128           */
    129 
    130          res = lp_build_broadcast(gallivm,
    131                                   lp_build_vec_type(gallivm, dst_type),
    132                                   vector);
    133       }
    134    }
    135    else {
    136       if (dst_type.length > 1) {
    137          /*
    138           * shuffle - result can be of different length.
    139           */
    140 
    141          LLVMValueRef shuffle;
    142          shuffle = lp_build_broadcast(gallivm,
    143                                       LLVMVectorType(i32t, dst_type.length),
    144                                       index);
    145          res = LLVMBuildShuffleVector(gallivm->builder, vector,
    146                                       LLVMGetUndef(lp_build_vec_type(gallivm, src_type)),
    147                                       shuffle, "");
    148       }
    149       else {
    150          /*
    151           * Trivial extract scalar from vector.
    152           */
    153           res = LLVMBuildExtractElement(gallivm->builder, vector, index, "");
    154       }
    155    }
    156 
    157    return res;
    158 }
    159 
    160 
    161 /**
    162  * Swizzle one channel into all other three channels.
    163  */
    164 LLVMValueRef
    165 lp_build_swizzle_scalar_aos(struct lp_build_context *bld,
    166                             LLVMValueRef a,
    167                             unsigned channel)
    168 {
    169    LLVMBuilderRef builder = bld->gallivm->builder;
    170    const struct lp_type type = bld->type;
    171    const unsigned n = type.length;
    172    unsigned i, j;
    173 
    174    if(a == bld->undef || a == bld->zero || a == bld->one)
    175       return a;
    176 
    177    /* XXX: SSE3 has PSHUFB which should be better than bitmasks, but forcing
    178     * using shuffles here actually causes worst results. More investigation is
    179     * needed. */
    180    if (type.width >= 16) {
    181       /*
    182        * Shuffle.
    183        */
    184       LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context);
    185       LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
    186 
    187       for(j = 0; j < n; j += 4)
    188          for(i = 0; i < 4; ++i)
    189             shuffles[j + i] = LLVMConstInt(elem_type, j + channel, 0);
    190 
    191       return LLVMBuildShuffleVector(builder, a, bld->undef, LLVMConstVector(shuffles, n), "");
    192    }
    193    else {
    194       /*
    195        * Bit mask and recursive shifts
    196        *
    197        *   XYZW XYZW .... XYZW  <= input
    198        *   0Y00 0Y00 .... 0Y00
    199        *   YY00 YY00 .... YY00
    200        *   YYYY YYYY .... YYYY  <= output
    201        */
    202       struct lp_type type4;
    203       const char shifts[4][2] = {
    204          { 1,  2},
    205          {-1,  2},
    206          { 1, -2},
    207          {-1, -2}
    208       };
    209       unsigned i;
    210 
    211       a = LLVMBuildAnd(builder, a,
    212                        lp_build_const_mask_aos(bld->gallivm,
    213                                                type, 1 << channel), "");
    214 
    215       /*
    216        * Build a type where each element is an integer that cover the four
    217        * channels.
    218        */
    219 
    220       type4 = type;
    221       type4.floating = FALSE;
    222       type4.width *= 4;
    223       type4.length /= 4;
    224 
    225       a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type4), "");
    226 
    227       for(i = 0; i < 2; ++i) {
    228          LLVMValueRef tmp = NULL;
    229          int shift = shifts[channel][i];
    230 
    231 #ifdef PIPE_ARCH_LITTLE_ENDIAN
    232          shift = -shift;
    233 #endif
    234 
    235          if(shift > 0)
    236             tmp = LLVMBuildLShr(builder, a, lp_build_const_int_vec(bld->gallivm, type4, shift*type.width), "");
    237          if(shift < 0)
    238             tmp = LLVMBuildShl(builder, a, lp_build_const_int_vec(bld->gallivm, type4, -shift*type.width), "");
    239 
    240          assert(tmp);
    241          if(tmp)
    242             a = LLVMBuildOr(builder, a, tmp, "");
    243       }
    244 
    245       return LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type), "");
    246    }
    247 }
    248 
    249 
    250 LLVMValueRef
    251 lp_build_swizzle_aos(struct lp_build_context *bld,
    252                      LLVMValueRef a,
    253                      const unsigned char swizzles[4])
    254 {
    255    LLVMBuilderRef builder = bld->gallivm->builder;
    256    const struct lp_type type = bld->type;
    257    const unsigned n = type.length;
    258    unsigned i, j;
    259 
    260    if (swizzles[0] == PIPE_SWIZZLE_RED &&
    261        swizzles[1] == PIPE_SWIZZLE_GREEN &&
    262        swizzles[2] == PIPE_SWIZZLE_BLUE &&
    263        swizzles[3] == PIPE_SWIZZLE_ALPHA) {
    264       return a;
    265    }
    266 
    267    if (swizzles[0] == swizzles[1] &&
    268        swizzles[1] == swizzles[2] &&
    269        swizzles[2] == swizzles[3]) {
    270       switch (swizzles[0]) {
    271       case PIPE_SWIZZLE_RED:
    272       case PIPE_SWIZZLE_GREEN:
    273       case PIPE_SWIZZLE_BLUE:
    274       case PIPE_SWIZZLE_ALPHA:
    275          return lp_build_swizzle_scalar_aos(bld, a, swizzles[0]);
    276       case PIPE_SWIZZLE_ZERO:
    277          return bld->zero;
    278       case PIPE_SWIZZLE_ONE:
    279          return bld->one;
    280       case LP_BLD_SWIZZLE_DONTCARE:
    281          return bld->undef;
    282       default:
    283          assert(0);
    284          return bld->undef;
    285       }
    286    }
    287 
    288    if (type.width >= 16) {
    289       /*
    290        * Shuffle.
    291        */
    292       LLVMValueRef undef = LLVMGetUndef(lp_build_elem_type(bld->gallivm, type));
    293       LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
    294       LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
    295       LLVMValueRef aux[LP_MAX_VECTOR_LENGTH];
    296 
    297       memset(aux, 0, sizeof aux);
    298 
    299       for(j = 0; j < n; j += 4) {
    300          for(i = 0; i < 4; ++i) {
    301             unsigned shuffle;
    302             switch (swizzles[i]) {
    303             default:
    304                assert(0);
    305                /* fall through */
    306             case PIPE_SWIZZLE_RED:
    307             case PIPE_SWIZZLE_GREEN:
    308             case PIPE_SWIZZLE_BLUE:
    309             case PIPE_SWIZZLE_ALPHA:
    310                shuffle = j + swizzles[i];
    311                shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0);
    312                break;
    313             case PIPE_SWIZZLE_ZERO:
    314                shuffle = type.length + 0;
    315                shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0);
    316                if (!aux[0]) {
    317                   aux[0] = lp_build_const_elem(bld->gallivm, type, 0.0);
    318                }
    319                break;
    320             case PIPE_SWIZZLE_ONE:
    321                shuffle = type.length + 1;
    322                shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0);
    323                if (!aux[1]) {
    324                   aux[1] = lp_build_const_elem(bld->gallivm, type, 1.0);
    325                }
    326                break;
    327             case LP_BLD_SWIZZLE_DONTCARE:
    328                shuffles[j + i] = LLVMGetUndef(i32t);
    329                break;
    330             }
    331          }
    332       }
    333 
    334       for (i = 0; i < n; ++i) {
    335          if (!aux[i]) {
    336             aux[i] = undef;
    337          }
    338       }
    339 
    340       return LLVMBuildShuffleVector(builder, a,
    341                                     LLVMConstVector(aux, n),
    342                                     LLVMConstVector(shuffles, n), "");
    343    } else {
    344       /*
    345        * Bit mask and shifts.
    346        *
    347        * For example, this will convert BGRA to RGBA by doing
    348        *
    349        *   rgba = (bgra & 0x00ff0000) >> 16
    350        *        | (bgra & 0xff00ff00)
    351        *        | (bgra & 0x000000ff) << 16
    352        *
    353        * This is necessary not only for faster cause, but because X86 backend
    354        * will refuse shuffles of <4 x i8> vectors
    355        */
    356       LLVMValueRef res;
    357       struct lp_type type4;
    358       unsigned cond = 0;
    359       unsigned chan;
    360       int shift;
    361 
    362       /*
    363        * Start with a mixture of 1 and 0.
    364        */
    365       for (chan = 0; chan < 4; ++chan) {
    366          if (swizzles[chan] == PIPE_SWIZZLE_ONE) {
    367             cond |= 1 << chan;
    368          }
    369       }
    370       res = lp_build_select_aos(bld, cond, bld->one, bld->zero);
    371 
    372       /*
    373        * Build a type where each element is an integer that cover the four
    374        * channels.
    375        */
    376       type4 = type;
    377       type4.floating = FALSE;
    378       type4.width *= 4;
    379       type4.length /= 4;
    380 
    381       a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type4), "");
    382       res = LLVMBuildBitCast(builder, res, lp_build_vec_type(bld->gallivm, type4), "");
    383 
    384       /*
    385        * Mask and shift the channels, trying to group as many channels in the
    386        * same shift as possible
    387        */
    388       for (shift = -3; shift <= 3; ++shift) {
    389          unsigned long long mask = 0;
    390 
    391          assert(type4.width <= sizeof(mask)*8);
    392 
    393          for (chan = 0; chan < 4; ++chan) {
    394             /* FIXME: big endian */
    395             if (swizzles[chan] < 4 &&
    396                 chan - swizzles[chan] == shift) {
    397                mask |= ((1ULL << type.width) - 1) << (swizzles[chan] * type.width);
    398             }
    399          }
    400 
    401          if (mask) {
    402             LLVMValueRef masked;
    403             LLVMValueRef shifted;
    404 
    405             if (0)
    406                debug_printf("shift = %i, mask = 0x%08llx\n", shift, mask);
    407 
    408             masked = LLVMBuildAnd(builder, a,
    409                                   lp_build_const_int_vec(bld->gallivm, type4, mask), "");
    410             if (shift > 0) {
    411                shifted = LLVMBuildShl(builder, masked,
    412                                       lp_build_const_int_vec(bld->gallivm, type4, shift*type.width), "");
    413             } else if (shift < 0) {
    414                shifted = LLVMBuildLShr(builder, masked,
    415                                        lp_build_const_int_vec(bld->gallivm, type4, -shift*type.width), "");
    416             } else {
    417                shifted = masked;
    418             }
    419 
    420             res = LLVMBuildOr(builder, res, shifted, "");
    421          }
    422       }
    423 
    424       return LLVMBuildBitCast(builder, res,
    425                               lp_build_vec_type(bld->gallivm, type), "");
    426    }
    427 }
    428 
    429 
    430 /**
    431  * Extended swizzle of a single channel of a SoA vector.
    432  *
    433  * @param bld         building context
    434  * @param unswizzled  array with the 4 unswizzled values
    435  * @param swizzle     one of the PIPE_SWIZZLE_*
    436  *
    437  * @return  the swizzled value.
    438  */
    439 LLVMValueRef
    440 lp_build_swizzle_soa_channel(struct lp_build_context *bld,
    441                              const LLVMValueRef *unswizzled,
    442                              unsigned swizzle)
    443 {
    444    switch (swizzle) {
    445    case PIPE_SWIZZLE_RED:
    446    case PIPE_SWIZZLE_GREEN:
    447    case PIPE_SWIZZLE_BLUE:
    448    case PIPE_SWIZZLE_ALPHA:
    449       return unswizzled[swizzle];
    450    case PIPE_SWIZZLE_ZERO:
    451       return bld->zero;
    452    case PIPE_SWIZZLE_ONE:
    453       return bld->one;
    454    default:
    455       assert(0);
    456       return bld->undef;
    457    }
    458 }
    459 
    460 
    461 /**
    462  * Extended swizzle of a SoA vector.
    463  *
    464  * @param bld         building context
    465  * @param unswizzled  array with the 4 unswizzled values
    466  * @param swizzles    array of PIPE_SWIZZLE_*
    467  * @param swizzled    output swizzled values
    468  */
    469 void
    470 lp_build_swizzle_soa(struct lp_build_context *bld,
    471                      const LLVMValueRef *unswizzled,
    472                      const unsigned char swizzles[4],
    473                      LLVMValueRef *swizzled)
    474 {
    475    unsigned chan;
    476 
    477    for (chan = 0; chan < 4; ++chan) {
    478       swizzled[chan] = lp_build_swizzle_soa_channel(bld, unswizzled,
    479                                                     swizzles[chan]);
    480    }
    481 }
    482 
    483 
    484 /**
    485  * Do an extended swizzle of a SoA vector inplace.
    486  *
    487  * @param bld         building context
    488  * @param values      intput/output array with the 4 values
    489  * @param swizzles    array of PIPE_SWIZZLE_*
    490  */
    491 void
    492 lp_build_swizzle_soa_inplace(struct lp_build_context *bld,
    493                              LLVMValueRef *values,
    494                              const unsigned char swizzles[4])
    495 {
    496    LLVMValueRef unswizzled[4];
    497    unsigned chan;
    498 
    499    for (chan = 0; chan < 4; ++chan) {
    500       unswizzled[chan] = values[chan];
    501    }
    502 
    503    lp_build_swizzle_soa(bld, unswizzled, swizzles, values);
    504 }
    505 
    506 
    507 /**
    508  * Transpose from AOS <-> SOA
    509  *
    510  * @param single_type_lp   type of pixels
    511  * @param src              the 4 * n pixel input
    512  * @param dst              the 4 * n pixel output
    513  */
    514 void
    515 lp_build_transpose_aos(struct gallivm_state *gallivm,
    516                        struct lp_type single_type_lp,
    517                        const LLVMValueRef src[4],
    518                        LLVMValueRef dst[4])
    519 {
    520    struct lp_type double_type_lp = single_type_lp;
    521    LLVMTypeRef single_type;
    522    LLVMTypeRef double_type;
    523    LLVMValueRef t0, t1, t2, t3;
    524 
    525    double_type_lp.length >>= 1;
    526    double_type_lp.width  <<= 1;
    527 
    528    double_type = lp_build_vec_type(gallivm, double_type_lp);
    529    single_type = lp_build_vec_type(gallivm, single_type_lp);
    530 
    531    /* Interleave x, y, z, w -> xy and zw */
    532    t0 = lp_build_interleave2_half(gallivm, single_type_lp, src[0], src[1], 0);
    533    t1 = lp_build_interleave2_half(gallivm, single_type_lp, src[2], src[3], 0);
    534    t2 = lp_build_interleave2_half(gallivm, single_type_lp, src[0], src[1], 1);
    535    t3 = lp_build_interleave2_half(gallivm, single_type_lp, src[2], src[3], 1);
    536 
    537    /* Cast to double width type for second interleave */
    538    t0 = LLVMBuildBitCast(gallivm->builder, t0, double_type, "t0");
    539    t1 = LLVMBuildBitCast(gallivm->builder, t1, double_type, "t1");
    540    t2 = LLVMBuildBitCast(gallivm->builder, t2, double_type, "t2");
    541    t3 = LLVMBuildBitCast(gallivm->builder, t3, double_type, "t3");
    542 
    543    /* Interleave xy, zw -> xyzw */
    544    dst[0] = lp_build_interleave2_half(gallivm, double_type_lp, t0, t1, 0);
    545    dst[1] = lp_build_interleave2_half(gallivm, double_type_lp, t0, t1, 1);
    546    dst[2] = lp_build_interleave2_half(gallivm, double_type_lp, t2, t3, 0);
    547    dst[3] = lp_build_interleave2_half(gallivm, double_type_lp, t2, t3, 1);
    548 
    549    /* Cast back to original single width type */
    550    dst[0] = LLVMBuildBitCast(gallivm->builder, dst[0], single_type, "dst0");
    551    dst[1] = LLVMBuildBitCast(gallivm->builder, dst[1], single_type, "dst1");
    552    dst[2] = LLVMBuildBitCast(gallivm->builder, dst[2], single_type, "dst2");
    553    dst[3] = LLVMBuildBitCast(gallivm->builder, dst[3], single_type, "dst3");
    554 }
    555 
    556 
    557 /**
    558  * Pack first element of aos values,
    559  * pad out to destination size.
    560  * i.e. x1 _ _ _ x2 _ _ _ will become x1 x2 _ _
    561  */
    562 LLVMValueRef
    563 lp_build_pack_aos_scalars(struct gallivm_state *gallivm,
    564                           struct lp_type src_type,
    565                           struct lp_type dst_type,
    566                           const LLVMValueRef src)
    567 {
    568    LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
    569    LLVMValueRef undef = LLVMGetUndef(i32t);
    570    LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
    571    unsigned num_src = src_type.length / 4;
    572    unsigned num_dst = dst_type.length;
    573    unsigned i;
    574 
    575    assert(num_src <= num_dst);
    576 
    577    for (i = 0; i < num_src; i++) {
    578       shuffles[i] = LLVMConstInt(i32t, i * 4, 0);
    579    }
    580    for (i = num_src; i < num_dst; i++) {
    581       shuffles[i] = undef;
    582    }
    583 
    584    if (num_dst == 1) {
    585       return LLVMBuildExtractElement(gallivm->builder, src, shuffles[0], "");
    586    }
    587    else {
    588       return LLVMBuildShuffleVector(gallivm->builder, src, src,
    589                                     LLVMConstVector(shuffles, num_dst), "");
    590    }
    591 }
    592 
    593 
    594 /**
    595  * Unpack and broadcast packed aos values consisting of only the
    596  * first value, i.e. x1 x2 _ _ will become x1 x1 x1 x1 x2 x2 x2 x2
    597  */
    598 LLVMValueRef
    599 lp_build_unpack_broadcast_aos_scalars(struct gallivm_state *gallivm,
    600                                       struct lp_type src_type,
    601                                       struct lp_type dst_type,
    602                                       const LLVMValueRef src)
    603 {
    604    LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
    605    LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
    606    unsigned num_dst = dst_type.length;
    607    unsigned num_src = dst_type.length / 4;
    608    unsigned i;
    609 
    610    assert(num_dst / 4 <= src_type.length);
    611 
    612    for (i = 0; i < num_src; i++) {
    613       shuffles[i*4] = LLVMConstInt(i32t, i, 0);
    614       shuffles[i*4+1] = LLVMConstInt(i32t, i, 0);
    615       shuffles[i*4+2] = LLVMConstInt(i32t, i, 0);
    616       shuffles[i*4+3] = LLVMConstInt(i32t, i, 0);
    617    }
    618 
    619    if (num_src == 1) {
    620       return lp_build_extract_broadcast(gallivm, src_type, dst_type,
    621                                         src, shuffles[0]);
    622    }
    623    else {
    624       return LLVMBuildShuffleVector(gallivm->builder, src, src,
    625                                     LLVMConstVector(shuffles, num_dst), "");
    626    }
    627 }
    628 
    629