Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 -mattr=+sse4.1 | FileCheck %s
      2 
      3 ;CHECK-LABEL: vsel_float:
      4 ;CHECK: blendps
      5 ;CHECK: ret
      6 define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
      7   %vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 true>, <4 x float> %v1, <4 x float> %v2
      8   ret <4 x float> %vsel
      9 }
     10 
     11 
     12 ;CHECK-LABEL: vsel_4xi8:
     13 ;CHECK: blendps
     14 ;CHECK: ret
     15 define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) {
     16   %vsel = select <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i8> %v1, <4 x i8> %v2
     17   ret <4 x i8> %vsel
     18 }
     19 
     20 ;CHECK-LABEL: vsel_4xi16:
     21 ;CHECK: blendps
     22 ;CHECK: ret
     23 define <4 x i16> @vsel_4xi16(<4 x i16> %v1, <4 x i16> %v2) {
     24   %vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 true>, <4 x i16> %v1, <4 x i16> %v2
     25   ret <4 x i16> %vsel
     26 }
     27 
     28 
     29 ;CHECK-LABEL: vsel_i32:
     30 ;CHECK: blendps
     31 ;CHECK: ret
     32 define <4 x i32> @vsel_i32(<4 x i32> %v1, <4 x i32> %v2) {
     33   %vsel = select <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i32> %v1, <4 x i32> %v2
     34   ret <4 x i32> %vsel
     35 }
     36 
     37 
     38 ;CHECK-LABEL: vsel_double:
     39 ;CHECK: movsd
     40 ;CHECK: ret
     41 define <4 x double> @vsel_double(<4 x double> %v1, <4 x double> %v2) {
     42   %vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x double> %v1, <4 x double> %v2
     43   ret <4 x double> %vsel
     44 }
     45 
     46 
     47 ;CHECK-LABEL: vsel_i64:
     48 ;CHECK: movsd
     49 ;CHECK: ret
     50 define <4 x i64> @vsel_i64(<4 x i64> %v1, <4 x i64> %v2) {
     51   %vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i64> %v1, <4 x i64> %v2
     52   ret <4 x i64> %vsel
     53 }
     54 
     55 
     56 ;CHECK-LABEL: vsel_i8:
     57 ;CHECK: pblendvb
     58 ;CHECK: ret
     59 define <16 x i8> @vsel_i8(<16 x i8> %v1, <16 x i8> %v2) {
     60   %vsel = select <16 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <16 x i8> %v1, <16 x i8> %v2
     61   ret <16 x i8> %vsel
     62 }
     63 
     64 ;; TEST blend + compares
     65 ; CHECK: A
     66 define <2 x double> @A(<2 x double> %x, <2 x double> %y) {
     67   ; CHECK: cmplepd
     68   ; CHECK: blendvpd
     69   %max_is_x = fcmp oge <2 x double> %x, %y
     70   %max = select <2 x i1> %max_is_x, <2 x double> %x, <2 x double> %y
     71   ret <2 x double> %max
     72 }
     73 
     74 ; CHECK: B
     75 define <2 x double> @B(<2 x double> %x, <2 x double> %y) {
     76   ; CHECK: cmpnlepd
     77   ; CHECK: blendvpd
     78   %min_is_x = fcmp ult <2 x double> %x, %y
     79   %min = select <2 x i1> %min_is_x, <2 x double> %x, <2 x double> %y
     80   ret <2 x double> %min
     81 }
     82 
     83 ; CHECK: float_crash
     84 define void @float_crash() nounwind {
     85 entry:
     86   %merge205vector_func.i = select <4 x i1> undef, <4 x double> undef, <4 x double> undef
     87   %extract214vector_func.i = extractelement <4 x double> %merge205vector_func.i, i32 0
     88   store double %extract214vector_func.i, double addrspace(1)* undef, align 8
     89   ret void
     90 }
     91 
     92 ; If we can figure out a blend has a constant mask, we should emit the
     93 ; blend instruction with an immediate mask
     94 define <2 x double> @constant_blendvpd(<2 x double> %xy, <2 x double> %ab) {
     95 ; In this case, we emit a simple movss
     96 ; CHECK-LABEL: constant_blendvpd
     97 ; CHECK: movsd
     98 ; CHECK: ret
     99   %1 = select <2 x i1> <i1 true, i1 false>, <2 x double> %xy, <2 x double> %ab
    100   ret <2 x double> %1
    101 }
    102 
    103 define <4 x float> @constant_blendvps(<4 x float> %xyzw, <4 x float> %abcd) {
    104 ; CHECK-LABEL: constant_blendvps
    105 ; CHECK-NOT: mov
    106 ; CHECK: blendps $7
    107 ; CHECK: ret
    108   %1 = select <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x float> %xyzw, <4 x float> %abcd
    109   ret <4 x float> %1
    110 }
    111 
    112 define <16 x i8> @constant_pblendvb(<16 x i8> %xyzw, <16 x i8> %abcd) {
    113 ; CHECK-LABEL: constant_pblendvb:
    114 ; CHECK: movaps
    115 ; CHECK: pblendvb
    116 ; CHECK: ret
    117   %1 = select <16 x i1> <i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false>, <16 x i8> %xyzw, <16 x i8> %abcd
    118   ret <16 x i8> %1
    119 }
    120 
    121 declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>)
    122 declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>)
    123 declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>)
    124 
    125 ;; 2 tests for shufflevectors that optimize to blend + immediate
    126 ; CHECK-LABEL: @blend_shufflevector_4xfloat
    127 ; CHECK: blendps $6, %xmm1, %xmm0
    128 ; CHECK: ret
    129 define <4 x float> @blend_shufflevector_4xfloat(<4 x float> %a, <4 x float> %b) {
    130   %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
    131   ret <4 x float> %1
    132 }
    133 
    134 ; CHECK-LABEL: @blend_shufflevector_8xi16
    135 ; CHECK: pblendw $134, %xmm1, %xmm0
    136 ; CHECK: ret
    137 define <8 x i16> @blend_shufflevector_8xi16(<8 x i16> %a, <8 x i16> %b) {
    138   %1 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 4, i32 5, i32 6, i32 15>
    139   ret <8 x i16> %1
    140 }
    141