Home | History | Annotate | Download | only in gallivm
      1 /**************************************************************************
      2  *
      3  * Copyright 2009 VMware, Inc.
      4  * All Rights Reserved.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the
      8  * "Software"), to deal in the Software without restriction, including
      9  * without limitation the rights to use, copy, modify, merge, publish,
     10  * distribute, sub license, and/or sell copies of the Software, and to
     11  * permit persons to whom the Software is furnished to do so, subject to
     12  * the following conditions:
     13  *
     14  * The above copyright notice and this permission notice (including the
     15  * next paragraph) shall be included in all copies or substantial portions
     16  * of the Software.
     17  *
     18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
     19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
     21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
     22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
     23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
     24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     25  *
     26  **************************************************************************/
     27 
     28 /**
     29  * @file
     30  * Helper functions for logical operations.
     31  *
     32  * @author Jose Fonseca <jfonseca (at) vmware.com>
     33  */
     34 
     35 
     36 #include "util/u_cpu_detect.h"
     37 #include "util/u_memory.h"
     38 #include "util/u_debug.h"
     39 
     40 #include "lp_bld_type.h"
     41 #include "lp_bld_const.h"
     42 #include "lp_bld_swizzle.h"
     43 #include "lp_bld_init.h"
     44 #include "lp_bld_intr.h"
     45 #include "lp_bld_debug.h"
     46 #include "lp_bld_logic.h"
     47 
     48 
     49 /*
     50  * XXX
     51  *
     52  * Selection with vector conditional like
     53  *
     54  *    select <4 x i1> %C, %A, %B
     55  *
     56  * is valid IR (e.g. llvm/test/Assembler/vector-select.ll), but it is only
     57  * supported on some backends (x86) starting with llvm 3.1.
     58  *
     59  * Expanding the boolean vector to full SIMD register width, as in
     60  *
     61  *    sext <4 x i1> %C to <4 x i32>
     62  *
     63  * is valid and supported (e.g., llvm/test/CodeGen/X86/vec_compare.ll), but
     64  * it causes assertion failures in LLVM 2.6. It appears to work correctly on
     65  * LLVM 2.7.
     66  */
     67 
     68 
     69 /**
     70  * Build code to compare two values 'a' and 'b' of 'type' using the given func.
     71  * \param func  one of PIPE_FUNC_x
     72  * If the ordered argument is true the function will use LLVM's ordered
     73  * comparisons, otherwise unordered comparisons will be used.
     74  * The result values will be 0 for false or ~0 for true.
     75  */
     76 static LLVMValueRef
     77 lp_build_compare_ext(struct gallivm_state *gallivm,
     78                      const struct lp_type type,
     79                      unsigned func,
     80                      LLVMValueRef a,
     81                      LLVMValueRef b,
     82                      boolean ordered)
     83 {
     84    LLVMBuilderRef builder = gallivm->builder;
     85    LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, type);
     86    LLVMValueRef zeros = LLVMConstNull(int_vec_type);
     87    LLVMValueRef ones = LLVMConstAllOnes(int_vec_type);
     88    LLVMValueRef cond;
     89    LLVMValueRef res;
     90 
     91    assert(lp_check_value(type, a));
     92    assert(lp_check_value(type, b));
     93 
     94    if(func == PIPE_FUNC_NEVER)
     95       return zeros;
     96    if(func == PIPE_FUNC_ALWAYS)
     97       return ones;
     98 
     99    assert(func > PIPE_FUNC_NEVER);
    100    assert(func < PIPE_FUNC_ALWAYS);
    101 
    102    if(type.floating) {
    103       LLVMRealPredicate op;
    104       switch(func) {
    105       case PIPE_FUNC_EQUAL:
    106          op = ordered ? LLVMRealOEQ : LLVMRealUEQ;
    107          break;
    108       case PIPE_FUNC_NOTEQUAL:
    109          op = ordered ? LLVMRealONE : LLVMRealUNE;
    110          break;
    111       case PIPE_FUNC_LESS:
    112          op = ordered ? LLVMRealOLT : LLVMRealULT;
    113          break;
    114       case PIPE_FUNC_LEQUAL:
    115          op = ordered ? LLVMRealOLE : LLVMRealULE;
    116          break;
    117       case PIPE_FUNC_GREATER:
    118          op = ordered ? LLVMRealOGT : LLVMRealUGT;
    119          break;
    120       case PIPE_FUNC_GEQUAL:
    121          op = ordered ? LLVMRealOGE : LLVMRealUGE;
    122          break;
    123       default:
    124          assert(0);
    125          return lp_build_undef(gallivm, type);
    126       }
    127 
    128       cond = LLVMBuildFCmp(builder, op, a, b, "");
    129       res = LLVMBuildSExt(builder, cond, int_vec_type, "");
    130    }
    131    else {
    132       LLVMIntPredicate op;
    133       switch(func) {
    134       case PIPE_FUNC_EQUAL:
    135          op = LLVMIntEQ;
    136          break;
    137       case PIPE_FUNC_NOTEQUAL:
    138          op = LLVMIntNE;
    139          break;
    140       case PIPE_FUNC_LESS:
    141          op = type.sign ? LLVMIntSLT : LLVMIntULT;
    142          break;
    143       case PIPE_FUNC_LEQUAL:
    144          op = type.sign ? LLVMIntSLE : LLVMIntULE;
    145          break;
    146       case PIPE_FUNC_GREATER:
    147          op = type.sign ? LLVMIntSGT : LLVMIntUGT;
    148          break;
    149       case PIPE_FUNC_GEQUAL:
    150          op = type.sign ? LLVMIntSGE : LLVMIntUGE;
    151          break;
    152       default:
    153          assert(0);
    154          return lp_build_undef(gallivm, type);
    155       }
    156 
    157       cond = LLVMBuildICmp(builder, op, a, b, "");
    158       res = LLVMBuildSExt(builder, cond, int_vec_type, "");
    159    }
    160 
    161    return res;
    162 }
    163 
    164 /**
    165  * Build code to compare two values 'a' and 'b' of 'type' using the given func.
    166  * \param func  one of PIPE_FUNC_x
    167  * The result values will be 0 for false or ~0 for true.
    168  */
    169 LLVMValueRef
    170 lp_build_compare(struct gallivm_state *gallivm,
    171                  const struct lp_type type,
    172                  unsigned func,
    173                  LLVMValueRef a,
    174                  LLVMValueRef b)
    175 {
    176    LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, type);
    177    LLVMValueRef zeros = LLVMConstNull(int_vec_type);
    178    LLVMValueRef ones = LLVMConstAllOnes(int_vec_type);
    179 
    180    assert(lp_check_value(type, a));
    181    assert(lp_check_value(type, b));
    182 
    183    if(func == PIPE_FUNC_NEVER)
    184       return zeros;
    185    if(func == PIPE_FUNC_ALWAYS)
    186       return ones;
    187 
    188    assert(func > PIPE_FUNC_NEVER);
    189    assert(func < PIPE_FUNC_ALWAYS);
    190 
    191 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
    192    /*
    193     * There are no unsigned integer comparison instructions in SSE.
    194     */
    195 
    196    if (!type.floating && !type.sign &&
    197        type.width * type.length == 128 &&
    198        util_cpu_caps.has_sse2 &&
    199        (func == PIPE_FUNC_LESS ||
    200         func == PIPE_FUNC_LEQUAL ||
    201         func == PIPE_FUNC_GREATER ||
    202         func == PIPE_FUNC_GEQUAL) &&
    203        (gallivm_debug & GALLIVM_DEBUG_PERF)) {
    204          debug_printf("%s: inefficient <%u x i%u> unsigned comparison\n",
    205                       __FUNCTION__, type.length, type.width);
    206    }
    207 #endif
    208 
    209    return lp_build_compare_ext(gallivm, type, func, a, b, FALSE);
    210 }
    211 
    212 /**
    213  * Build code to compare two values 'a' and 'b' using the given func.
    214  * \param func  one of PIPE_FUNC_x
    215  * If the operands are floating point numbers, the function will use
    216  * ordered comparison which means that it will return true if both
    217  * operands are not a NaN and the specified condition evaluates to true.
    218  * The result values will be 0 for false or ~0 for true.
    219  */
    220 LLVMValueRef
    221 lp_build_cmp_ordered(struct lp_build_context *bld,
    222                      unsigned func,
    223                      LLVMValueRef a,
    224                      LLVMValueRef b)
    225 {
    226    return lp_build_compare_ext(bld->gallivm, bld->type, func, a, b, TRUE);
    227 }
    228 
    229 /**
    230  * Build code to compare two values 'a' and 'b' using the given func.
    231  * \param func  one of PIPE_FUNC_x
    232  * If the operands are floating point numbers, the function will use
    233  * unordered comparison which means that it will return true if either
    234  * operand is a NaN or the specified condition evaluates to true.
    235  * The result values will be 0 for false or ~0 for true.
    236  */
    237 LLVMValueRef
    238 lp_build_cmp(struct lp_build_context *bld,
    239              unsigned func,
    240              LLVMValueRef a,
    241              LLVMValueRef b)
    242 {
    243    return lp_build_compare(bld->gallivm, bld->type, func, a, b);
    244 }
    245 
    246 
    247 /**
    248  * Return (mask & a) | (~mask & b);
    249  */
    250 LLVMValueRef
    251 lp_build_select_bitwise(struct lp_build_context *bld,
    252                         LLVMValueRef mask,
    253                         LLVMValueRef a,
    254                         LLVMValueRef b)
    255 {
    256    LLVMBuilderRef builder = bld->gallivm->builder;
    257    struct lp_type type = bld->type;
    258    LLVMValueRef res;
    259 
    260    assert(lp_check_value(type, a));
    261    assert(lp_check_value(type, b));
    262 
    263    if (a == b) {
    264       return a;
    265    }
    266 
    267    if(type.floating) {
    268       LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->gallivm, type);
    269       a = LLVMBuildBitCast(builder, a, int_vec_type, "");
    270       b = LLVMBuildBitCast(builder, b, int_vec_type, "");
    271    }
    272 
    273    a = LLVMBuildAnd(builder, a, mask, "");
    274 
    275    /* This often gets translated to PANDN, but sometimes the NOT is
    276     * pre-computed and stored in another constant. The best strategy depends
    277     * on available registers, so it is not a big deal -- hopefully LLVM does
    278     * the right decision attending the rest of the program.
    279     */
    280    b = LLVMBuildAnd(builder, b, LLVMBuildNot(builder, mask, ""), "");
    281 
    282    res = LLVMBuildOr(builder, a, b, "");
    283 
    284    if(type.floating) {
    285       LLVMTypeRef vec_type = lp_build_vec_type(bld->gallivm, type);
    286       res = LLVMBuildBitCast(builder, res, vec_type, "");
    287    }
    288 
    289    return res;
    290 }
    291 
    292 
    293 /**
    294  * Return mask ? a : b;
    295  *
    296  * mask is a bitwise mask, composed of 0 or ~0 for each element. Any other value
    297  * will yield unpredictable results.
    298  */
    299 LLVMValueRef
    300 lp_build_select(struct lp_build_context *bld,
    301                 LLVMValueRef mask,
    302                 LLVMValueRef a,
    303                 LLVMValueRef b)
    304 {
    305    LLVMBuilderRef builder = bld->gallivm->builder;
    306    LLVMContextRef lc = bld->gallivm->context;
    307    struct lp_type type = bld->type;
    308    LLVMValueRef res;
    309 
    310    assert(lp_check_value(type, a));
    311    assert(lp_check_value(type, b));
    312 
    313    if(a == b)
    314       return a;
    315 
    316    if (type.length == 1) {
    317       mask = LLVMBuildTrunc(builder, mask, LLVMInt1TypeInContext(lc), "");
    318       res = LLVMBuildSelect(builder, mask, a, b, "");
    319    }
    320    else if (!(HAVE_LLVM == 0x0307) &&
    321             (LLVMIsConstant(mask) ||
    322              LLVMGetInstructionOpcode(mask) == LLVMSExt)) {
    323       /* Generate a vector select.
    324        *
    325        * Using vector selects should avoid emitting intrinsics hence avoid
    326        * hindering optimization passes, but vector selects weren't properly
    327        * supported yet for a long time, and LLVM will generate poor code when
    328        * the mask is not the result of a comparison.
    329        * Also, llvm 3.7 may miscompile them (bug 94972).
    330        * XXX: Even if the instruction was an SExt, this may still produce
    331        * terrible code. Try piglit stencil-twoside.
    332        */
    333 
    334       /* Convert the mask to a vector of booleans.
    335        *
    336        * XXX: In x86 the mask is controlled by the MSB, so if we shifted the
    337        * mask by `type.width - 1`, LLVM should realize the mask is ready.  Alas
    338        * what really happens is that LLVM will emit two shifts back to back.
    339        */
    340       if (0) {
    341          LLVMValueRef shift = LLVMConstInt(bld->int_elem_type, bld->type.width - 1, 0);
    342          shift = lp_build_broadcast(bld->gallivm, bld->int_vec_type, shift);
    343          mask = LLVMBuildLShr(builder, mask, shift, "");
    344       }
    345       LLVMTypeRef bool_vec_type = LLVMVectorType(LLVMInt1TypeInContext(lc), type.length);
    346       mask = LLVMBuildTrunc(builder, mask, bool_vec_type, "");
    347 
    348       res = LLVMBuildSelect(builder, mask, a, b, "");
    349    }
    350    else if (((util_cpu_caps.has_sse4_1 &&
    351               type.width * type.length == 128) ||
    352              (util_cpu_caps.has_avx &&
    353               type.width * type.length == 256 && type.width >= 32) ||
    354              (util_cpu_caps.has_avx2 &&
    355               type.width * type.length == 256)) &&
    356             !LLVMIsConstant(a) &&
    357             !LLVMIsConstant(b) &&
    358             !LLVMIsConstant(mask)) {
    359       const char *intrinsic;
    360       LLVMTypeRef arg_type;
    361       LLVMValueRef args[3];
    362 
    363       /*
    364        *  There's only float blend in AVX but can just cast i32/i64
    365        *  to float.
    366        */
    367       if (type.width * type.length == 256) {
    368          if (type.width == 64) {
    369            intrinsic = "llvm.x86.avx.blendv.pd.256";
    370            arg_type = LLVMVectorType(LLVMDoubleTypeInContext(lc), 4);
    371          }
    372          else if (type.width == 32) {
    373             intrinsic = "llvm.x86.avx.blendv.ps.256";
    374             arg_type = LLVMVectorType(LLVMFloatTypeInContext(lc), 8);
    375          } else {
    376             assert(util_cpu_caps.has_avx2);
    377             intrinsic = "llvm.x86.avx2.pblendvb";
    378             arg_type = LLVMVectorType(LLVMInt8TypeInContext(lc), 32);
    379          }
    380       }
    381       else if (type.floating &&
    382                type.width == 64) {
    383          intrinsic = "llvm.x86.sse41.blendvpd";
    384          arg_type = LLVMVectorType(LLVMDoubleTypeInContext(lc), 2);
    385       } else if (type.floating &&
    386                  type.width == 32) {
    387          intrinsic = "llvm.x86.sse41.blendvps";
    388          arg_type = LLVMVectorType(LLVMFloatTypeInContext(lc), 4);
    389       } else {
    390          intrinsic = "llvm.x86.sse41.pblendvb";
    391          arg_type = LLVMVectorType(LLVMInt8TypeInContext(lc), 16);
    392       }
    393 
    394       if (arg_type != bld->int_vec_type) {
    395          mask = LLVMBuildBitCast(builder, mask, arg_type, "");
    396       }
    397 
    398       if (arg_type != bld->vec_type) {
    399          a = LLVMBuildBitCast(builder, a, arg_type, "");
    400          b = LLVMBuildBitCast(builder, b, arg_type, "");
    401       }
    402 
    403       args[0] = b;
    404       args[1] = a;
    405       args[2] = mask;
    406 
    407       res = lp_build_intrinsic(builder, intrinsic,
    408                                arg_type, args, ARRAY_SIZE(args), 0);
    409 
    410       if (arg_type != bld->vec_type) {
    411          res = LLVMBuildBitCast(builder, res, bld->vec_type, "");
    412       }
    413    }
    414    else {
    415       res = lp_build_select_bitwise(bld, mask, a, b);
    416    }
    417 
    418    return res;
    419 }
    420 
    421 
    422 /**
    423  * Return mask ? a : b;
    424  *
    425  * mask is a TGSI_WRITEMASK_xxx.
    426  */
    427 LLVMValueRef
    428 lp_build_select_aos(struct lp_build_context *bld,
    429                     unsigned mask,
    430                     LLVMValueRef a,
    431                     LLVMValueRef b,
    432                     unsigned num_channels)
    433 {
    434    LLVMBuilderRef builder = bld->gallivm->builder;
    435    const struct lp_type type = bld->type;
    436    const unsigned n = type.length;
    437    unsigned i, j;
    438 
    439    assert((mask & ~0xf) == 0);
    440    assert(lp_check_value(type, a));
    441    assert(lp_check_value(type, b));
    442 
    443    if(a == b)
    444       return a;
    445    if((mask & 0xf) == 0xf)
    446       return a;
    447    if((mask & 0xf) == 0x0)
    448       return b;
    449    if(a == bld->undef || b == bld->undef)
    450       return bld->undef;
    451 
    452    /*
    453     * There are two major ways of accomplishing this:
    454     * - with a shuffle
    455     * - with a select
    456     *
    457     * The flip between these is empirical and might need to be adjusted.
    458     */
    459    if (n <= 4) {
    460       /*
    461        * Shuffle.
    462        */
    463       LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context);
    464       LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
    465 
    466       for(j = 0; j < n; j += num_channels)
    467          for(i = 0; i < num_channels; ++i)
    468             shuffles[j + i] = LLVMConstInt(elem_type,
    469                                            (mask & (1 << i) ? 0 : n) + j + i,
    470                                            0);
    471 
    472       return LLVMBuildShuffleVector(builder, a, b, LLVMConstVector(shuffles, n), "");
    473    }
    474    else {
    475       LLVMValueRef mask_vec = lp_build_const_mask_aos(bld->gallivm, type, mask, num_channels);
    476       return lp_build_select(bld, mask_vec, a, b);
    477    }
    478 }
    479 
    480 
    481 /**
    482  * Return (scalar-cast)val ? true : false;
    483  */
    484 LLVMValueRef
    485 lp_build_any_true_range(struct lp_build_context *bld,
    486                         unsigned real_length,
    487                         LLVMValueRef val)
    488 {
    489    LLVMBuilderRef builder = bld->gallivm->builder;
    490    LLVMTypeRef scalar_type;
    491    LLVMTypeRef true_type;
    492 
    493    assert(real_length <= bld->type.length);
    494 
    495    true_type = LLVMIntTypeInContext(bld->gallivm->context,
    496                                     bld->type.width * real_length);
    497    scalar_type = LLVMIntTypeInContext(bld->gallivm->context,
    498                                       bld->type.width * bld->type.length);
    499    val = LLVMBuildBitCast(builder, val, scalar_type, "");
    500    /*
    501     * We're using always native types so we can use intrinsics.
    502     * However, if we don't do per-element calculations, we must ensure
    503     * the excess elements aren't used since they may contain garbage.
    504     */
    505    if (real_length < bld->type.length) {
    506       val = LLVMBuildTrunc(builder, val, true_type, "");
    507    }
    508    return LLVMBuildICmp(builder, LLVMIntNE,
    509                         val, LLVMConstNull(true_type), "");
    510 }
    511